{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5016442784683128, "eval_steps": 500, "global_step": 9000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 5.573825316314587e-05, "grad_norm": 0.5498427152633667, "learning_rate": 3.3333333333333335e-07, "loss": 1.7989, "step": 1 }, { "epoch": 0.00011147650632629174, "grad_norm": 0.6303576827049255, "learning_rate": 6.666666666666667e-07, "loss": 1.996, "step": 2 }, { "epoch": 0.0001672147594894376, "grad_norm": 0.5333236455917358, "learning_rate": 1.0000000000000002e-06, "loss": 1.8613, "step": 3 }, { "epoch": 0.00022295301265258348, "grad_norm": 0.5659189224243164, "learning_rate": 1.3333333333333334e-06, "loss": 1.8904, "step": 4 }, { "epoch": 0.0002786912658157293, "grad_norm": 0.6221416592597961, "learning_rate": 1.6666666666666667e-06, "loss": 2.0151, "step": 5 }, { "epoch": 0.0003344295189788752, "grad_norm": 0.6198977828025818, "learning_rate": 2.0000000000000003e-06, "loss": 1.9774, "step": 6 }, { "epoch": 0.0003901677721420211, "grad_norm": 0.6328762173652649, "learning_rate": 2.3333333333333336e-06, "loss": 1.8994, "step": 7 }, { "epoch": 0.00044590602530516696, "grad_norm": 0.6075513362884521, "learning_rate": 2.666666666666667e-06, "loss": 1.894, "step": 8 }, { "epoch": 0.0005016442784683128, "grad_norm": 0.6397244930267334, "learning_rate": 3e-06, "loss": 2.0865, "step": 9 }, { "epoch": 0.0005573825316314586, "grad_norm": 0.6115519404411316, "learning_rate": 3.3333333333333333e-06, "loss": 1.9688, "step": 10 }, { "epoch": 0.0006131207847946045, "grad_norm": 0.546791672706604, "learning_rate": 3.666666666666667e-06, "loss": 1.8239, "step": 11 }, { "epoch": 0.0006688590379577504, "grad_norm": 0.690762996673584, "learning_rate": 4.000000000000001e-06, "loss": 2.0367, "step": 12 }, { "epoch": 0.0007245972911208963, "grad_norm": 0.7190566062927246, "learning_rate": 4.333333333333334e-06, "loss": 1.9817, "step": 13 }, { "epoch": 0.0007803355442840422, "grad_norm": 0.6093202233314514, "learning_rate": 4.666666666666667e-06, "loss": 2.01, "step": 14 }, { "epoch": 0.000836073797447188, "grad_norm": 0.5230669975280762, "learning_rate": 5e-06, "loss": 1.8419, "step": 15 }, { "epoch": 0.0008918120506103339, "grad_norm": 0.5391668677330017, "learning_rate": 5.333333333333334e-06, "loss": 1.8663, "step": 16 }, { "epoch": 0.0009475503037734797, "grad_norm": 0.6359019875526428, "learning_rate": 5.666666666666667e-06, "loss": 2.2089, "step": 17 }, { "epoch": 0.0010032885569366257, "grad_norm": 0.61967533826828, "learning_rate": 6e-06, "loss": 2.0842, "step": 18 }, { "epoch": 0.0010590268100997716, "grad_norm": 0.491642028093338, "learning_rate": 6.333333333333334e-06, "loss": 1.755, "step": 19 }, { "epoch": 0.0011147650632629172, "grad_norm": 0.7064740657806396, "learning_rate": 6.666666666666667e-06, "loss": 2.2494, "step": 20 }, { "epoch": 0.0011705033164260631, "grad_norm": 0.5671775937080383, "learning_rate": 7.000000000000001e-06, "loss": 2.0236, "step": 21 }, { "epoch": 0.001226241569589209, "grad_norm": 0.5698847770690918, "learning_rate": 7.333333333333334e-06, "loss": 1.8295, "step": 22 }, { "epoch": 0.001281979822752355, "grad_norm": 0.5910470485687256, "learning_rate": 7.666666666666667e-06, "loss": 2.1311, "step": 23 }, { "epoch": 0.0013377180759155008, "grad_norm": 0.567130446434021, "learning_rate": 8.000000000000001e-06, "loss": 1.888, "step": 24 }, { "epoch": 0.0013934563290786467, "grad_norm": 0.5540428757667542, "learning_rate": 8.333333333333334e-06, "loss": 1.6625, "step": 25 }, { "epoch": 0.0014491945822417925, "grad_norm": 0.5729663372039795, "learning_rate": 8.666666666666668e-06, "loss": 2.0062, "step": 26 }, { "epoch": 0.0015049328354049384, "grad_norm": 0.5232088565826416, "learning_rate": 9e-06, "loss": 1.7991, "step": 27 }, { "epoch": 0.0015606710885680843, "grad_norm": 0.5638092160224915, "learning_rate": 9.333333333333334e-06, "loss": 2.0728, "step": 28 }, { "epoch": 0.0016164093417312302, "grad_norm": 0.5504807829856873, "learning_rate": 9.666666666666667e-06, "loss": 1.808, "step": 29 }, { "epoch": 0.001672147594894376, "grad_norm": 0.5935587882995605, "learning_rate": 1e-05, "loss": 1.9738, "step": 30 }, { "epoch": 0.001727885848057522, "grad_norm": 0.6431534886360168, "learning_rate": 1.0333333333333333e-05, "loss": 2.0967, "step": 31 }, { "epoch": 0.0017836241012206678, "grad_norm": 0.5587693452835083, "learning_rate": 1.0666666666666667e-05, "loss": 1.6821, "step": 32 }, { "epoch": 0.0018393623543838135, "grad_norm": 0.5473759174346924, "learning_rate": 1.1000000000000001e-05, "loss": 1.8442, "step": 33 }, { "epoch": 0.0018951006075469594, "grad_norm": 0.6185194849967957, "learning_rate": 1.1333333333333334e-05, "loss": 2.0705, "step": 34 }, { "epoch": 0.0019508388607101053, "grad_norm": 0.5253747701644897, "learning_rate": 1.1666666666666668e-05, "loss": 1.7944, "step": 35 }, { "epoch": 0.0020065771138732514, "grad_norm": 0.5742389559745789, "learning_rate": 1.2e-05, "loss": 2.0, "step": 36 }, { "epoch": 0.0020623153670363973, "grad_norm": 0.6290589570999146, "learning_rate": 1.2333333333333334e-05, "loss": 2.1365, "step": 37 }, { "epoch": 0.002118053620199543, "grad_norm": 0.5194576382637024, "learning_rate": 1.2666666666666668e-05, "loss": 1.8569, "step": 38 }, { "epoch": 0.0021737918733626886, "grad_norm": 0.5665763020515442, "learning_rate": 1.3000000000000001e-05, "loss": 1.9313, "step": 39 }, { "epoch": 0.0022295301265258345, "grad_norm": 0.5268619060516357, "learning_rate": 1.3333333333333333e-05, "loss": 1.8843, "step": 40 }, { "epoch": 0.0022852683796889804, "grad_norm": 0.7840973734855652, "learning_rate": 1.3666666666666666e-05, "loss": 1.929, "step": 41 }, { "epoch": 0.0023410066328521262, "grad_norm": 0.5785960555076599, "learning_rate": 1.4000000000000001e-05, "loss": 2.0276, "step": 42 }, { "epoch": 0.002396744886015272, "grad_norm": 0.5202842354774475, "learning_rate": 1.4333333333333334e-05, "loss": 1.949, "step": 43 }, { "epoch": 0.002452483139178418, "grad_norm": 0.72431480884552, "learning_rate": 1.4666666666666668e-05, "loss": 2.2978, "step": 44 }, { "epoch": 0.002508221392341564, "grad_norm": 0.5558940768241882, "learning_rate": 1.5e-05, "loss": 1.9125, "step": 45 }, { "epoch": 0.00256395964550471, "grad_norm": 0.5687503814697266, "learning_rate": 1.5333333333333334e-05, "loss": 1.8533, "step": 46 }, { "epoch": 0.0026196978986678557, "grad_norm": 0.5703473091125488, "learning_rate": 1.5666666666666667e-05, "loss": 1.9015, "step": 47 }, { "epoch": 0.0026754361518310016, "grad_norm": 0.5496488809585571, "learning_rate": 1.6000000000000003e-05, "loss": 1.682, "step": 48 }, { "epoch": 0.0027311744049941474, "grad_norm": 0.6371431946754456, "learning_rate": 1.6333333333333335e-05, "loss": 2.0425, "step": 49 }, { "epoch": 0.0027869126581572933, "grad_norm": 0.6071433424949646, "learning_rate": 1.6666666666666667e-05, "loss": 1.8745, "step": 50 }, { "epoch": 0.002842650911320439, "grad_norm": 0.5981681942939758, "learning_rate": 1.7000000000000003e-05, "loss": 1.8872, "step": 51 }, { "epoch": 0.002898389164483585, "grad_norm": 0.6591808795928955, "learning_rate": 1.7333333333333336e-05, "loss": 2.0187, "step": 52 }, { "epoch": 0.002954127417646731, "grad_norm": 0.6213610172271729, "learning_rate": 1.7666666666666668e-05, "loss": 2.0231, "step": 53 }, { "epoch": 0.003009865670809877, "grad_norm": 0.6377214789390564, "learning_rate": 1.8e-05, "loss": 1.8641, "step": 54 }, { "epoch": 0.0030656039239730227, "grad_norm": 0.675821840763092, "learning_rate": 1.8333333333333333e-05, "loss": 2.215, "step": 55 }, { "epoch": 0.0031213421771361686, "grad_norm": 0.5989570021629333, "learning_rate": 1.866666666666667e-05, "loss": 1.9232, "step": 56 }, { "epoch": 0.0031770804302993145, "grad_norm": 0.6279881596565247, "learning_rate": 1.9e-05, "loss": 1.8452, "step": 57 }, { "epoch": 0.0032328186834624604, "grad_norm": 0.5670164227485657, "learning_rate": 1.9333333333333333e-05, "loss": 1.5623, "step": 58 }, { "epoch": 0.0032885569366256063, "grad_norm": 0.5822334289550781, "learning_rate": 1.9666666666666666e-05, "loss": 1.7901, "step": 59 }, { "epoch": 0.003344295189788752, "grad_norm": 0.6322411298751831, "learning_rate": 2e-05, "loss": 1.8802, "step": 60 }, { "epoch": 0.003400033442951898, "grad_norm": 0.6066840291023254, "learning_rate": 2.0333333333333334e-05, "loss": 1.8334, "step": 61 }, { "epoch": 0.003455771696115044, "grad_norm": 0.6801030039787292, "learning_rate": 2.0666666666666666e-05, "loss": 2.1029, "step": 62 }, { "epoch": 0.00351150994927819, "grad_norm": 0.6445280909538269, "learning_rate": 2.1e-05, "loss": 2.0333, "step": 63 }, { "epoch": 0.0035672482024413357, "grad_norm": 0.6259938478469849, "learning_rate": 2.1333333333333335e-05, "loss": 1.6012, "step": 64 }, { "epoch": 0.003622986455604481, "grad_norm": 0.6786999702453613, "learning_rate": 2.1666666666666667e-05, "loss": 2.0818, "step": 65 }, { "epoch": 0.003678724708767627, "grad_norm": 0.6728941202163696, "learning_rate": 2.2000000000000003e-05, "loss": 1.9022, "step": 66 }, { "epoch": 0.003734462961930773, "grad_norm": 0.6992253661155701, "learning_rate": 2.2333333333333335e-05, "loss": 1.7435, "step": 67 }, { "epoch": 0.003790201215093919, "grad_norm": 0.6083998084068298, "learning_rate": 2.2666666666666668e-05, "loss": 1.7816, "step": 68 }, { "epoch": 0.0038459394682570647, "grad_norm": 0.6070435643196106, "learning_rate": 2.3000000000000003e-05, "loss": 1.672, "step": 69 }, { "epoch": 0.0039016777214202106, "grad_norm": 0.6032823920249939, "learning_rate": 2.3333333333333336e-05, "loss": 1.771, "step": 70 }, { "epoch": 0.0039574159745833564, "grad_norm": 0.689372181892395, "learning_rate": 2.3666666666666668e-05, "loss": 1.9594, "step": 71 }, { "epoch": 0.004013154227746503, "grad_norm": 0.6333785653114319, "learning_rate": 2.4e-05, "loss": 1.8492, "step": 72 }, { "epoch": 0.004068892480909648, "grad_norm": 0.638140857219696, "learning_rate": 2.4333333333333336e-05, "loss": 1.798, "step": 73 }, { "epoch": 0.0041246307340727945, "grad_norm": 0.6000136137008667, "learning_rate": 2.466666666666667e-05, "loss": 1.6625, "step": 74 }, { "epoch": 0.00418036898723594, "grad_norm": 0.7654765248298645, "learning_rate": 2.5e-05, "loss": 2.1015, "step": 75 }, { "epoch": 0.004236107240399086, "grad_norm": 0.6845409870147705, "learning_rate": 2.5333333333333337e-05, "loss": 1.9176, "step": 76 }, { "epoch": 0.004291845493562232, "grad_norm": 0.6557128429412842, "learning_rate": 2.5666666666666666e-05, "loss": 1.8244, "step": 77 }, { "epoch": 0.004347583746725377, "grad_norm": 0.6574406027793884, "learning_rate": 2.6000000000000002e-05, "loss": 1.851, "step": 78 }, { "epoch": 0.0044033219998885235, "grad_norm": 0.6624826192855835, "learning_rate": 2.633333333333333e-05, "loss": 1.8332, "step": 79 }, { "epoch": 0.004459060253051669, "grad_norm": 0.7041051983833313, "learning_rate": 2.6666666666666667e-05, "loss": 1.8357, "step": 80 }, { "epoch": 0.004514798506214815, "grad_norm": 0.6737162470817566, "learning_rate": 2.7000000000000002e-05, "loss": 1.8162, "step": 81 }, { "epoch": 0.004570536759377961, "grad_norm": 0.6803858280181885, "learning_rate": 2.733333333333333e-05, "loss": 1.9187, "step": 82 }, { "epoch": 0.004626275012541107, "grad_norm": 0.6441910862922668, "learning_rate": 2.7666666666666667e-05, "loss": 1.9235, "step": 83 }, { "epoch": 0.0046820132657042525, "grad_norm": 0.6409979462623596, "learning_rate": 2.8000000000000003e-05, "loss": 1.9148, "step": 84 }, { "epoch": 0.004737751518867399, "grad_norm": 0.722623348236084, "learning_rate": 2.8333333333333335e-05, "loss": 1.9738, "step": 85 }, { "epoch": 0.004793489772030544, "grad_norm": 0.6637834310531616, "learning_rate": 2.8666666666666668e-05, "loss": 1.6872, "step": 86 }, { "epoch": 0.004849228025193691, "grad_norm": 0.7143079042434692, "learning_rate": 2.9e-05, "loss": 1.9944, "step": 87 }, { "epoch": 0.004904966278356836, "grad_norm": 0.7566176652908325, "learning_rate": 2.9333333333333336e-05, "loss": 1.7542, "step": 88 }, { "epoch": 0.004960704531519982, "grad_norm": 0.6472474932670593, "learning_rate": 2.9666666666666672e-05, "loss": 1.9534, "step": 89 }, { "epoch": 0.005016442784683128, "grad_norm": 0.6678224205970764, "learning_rate": 3e-05, "loss": 1.7684, "step": 90 }, { "epoch": 0.005072181037846274, "grad_norm": 0.6665822267532349, "learning_rate": 3.0333333333333337e-05, "loss": 1.9028, "step": 91 }, { "epoch": 0.00512791929100942, "grad_norm": 0.7907567620277405, "learning_rate": 3.066666666666667e-05, "loss": 1.8876, "step": 92 }, { "epoch": 0.005183657544172566, "grad_norm": 0.6738147735595703, "learning_rate": 3.1e-05, "loss": 1.7623, "step": 93 }, { "epoch": 0.005239395797335711, "grad_norm": 0.6898536086082458, "learning_rate": 3.1333333333333334e-05, "loss": 1.7103, "step": 94 }, { "epoch": 0.005295134050498858, "grad_norm": 0.6961106061935425, "learning_rate": 3.1666666666666666e-05, "loss": 1.537, "step": 95 }, { "epoch": 0.005350872303662003, "grad_norm": 0.6331319808959961, "learning_rate": 3.2000000000000005e-05, "loss": 1.6681, "step": 96 }, { "epoch": 0.005406610556825149, "grad_norm": 0.7678634524345398, "learning_rate": 3.233333333333333e-05, "loss": 2.1339, "step": 97 }, { "epoch": 0.005462348809988295, "grad_norm": 0.7012338638305664, "learning_rate": 3.266666666666667e-05, "loss": 1.7591, "step": 98 }, { "epoch": 0.005518087063151441, "grad_norm": 0.7289243340492249, "learning_rate": 3.3e-05, "loss": 1.901, "step": 99 }, { "epoch": 0.005573825316314587, "grad_norm": 0.6416298747062683, "learning_rate": 3.3333333333333335e-05, "loss": 1.5989, "step": 100 }, { "epoch": 0.005629563569477733, "grad_norm": 0.6193853616714478, "learning_rate": 3.366666666666667e-05, "loss": 1.7429, "step": 101 }, { "epoch": 0.005685301822640878, "grad_norm": 0.7283613681793213, "learning_rate": 3.4000000000000007e-05, "loss": 1.9885, "step": 102 }, { "epoch": 0.005741040075804025, "grad_norm": 0.6713369488716125, "learning_rate": 3.433333333333333e-05, "loss": 1.8521, "step": 103 }, { "epoch": 0.00579677832896717, "grad_norm": 0.6700227856636047, "learning_rate": 3.466666666666667e-05, "loss": 1.8404, "step": 104 }, { "epoch": 0.005852516582130316, "grad_norm": 0.6885061860084534, "learning_rate": 3.5e-05, "loss": 1.8081, "step": 105 }, { "epoch": 0.005908254835293462, "grad_norm": 0.6814194917678833, "learning_rate": 3.5333333333333336e-05, "loss": 1.8672, "step": 106 }, { "epoch": 0.005963993088456607, "grad_norm": 0.6492342948913574, "learning_rate": 3.566666666666667e-05, "loss": 1.7029, "step": 107 }, { "epoch": 0.006019731341619754, "grad_norm": 0.5920109748840332, "learning_rate": 3.6e-05, "loss": 1.5455, "step": 108 }, { "epoch": 0.006075469594782899, "grad_norm": 0.6685107946395874, "learning_rate": 3.633333333333333e-05, "loss": 1.9576, "step": 109 }, { "epoch": 0.0061312078479460455, "grad_norm": 0.6917557716369629, "learning_rate": 3.6666666666666666e-05, "loss": 1.9341, "step": 110 }, { "epoch": 0.006186946101109191, "grad_norm": 0.730872631072998, "learning_rate": 3.7e-05, "loss": 1.9828, "step": 111 }, { "epoch": 0.006242684354272337, "grad_norm": 0.7139527797698975, "learning_rate": 3.733333333333334e-05, "loss": 2.0277, "step": 112 }, { "epoch": 0.006298422607435483, "grad_norm": 0.6276320219039917, "learning_rate": 3.766666666666667e-05, "loss": 1.7702, "step": 113 }, { "epoch": 0.006354160860598629, "grad_norm": 0.6891281008720398, "learning_rate": 3.8e-05, "loss": 1.9062, "step": 114 }, { "epoch": 0.0064098991137617745, "grad_norm": 0.7155683636665344, "learning_rate": 3.8333333333333334e-05, "loss": 1.8527, "step": 115 }, { "epoch": 0.006465637366924921, "grad_norm": 0.6917515397071838, "learning_rate": 3.866666666666667e-05, "loss": 1.8439, "step": 116 }, { "epoch": 0.006521375620088066, "grad_norm": 0.7216237783432007, "learning_rate": 3.9000000000000006e-05, "loss": 2.0114, "step": 117 }, { "epoch": 0.0065771138732512125, "grad_norm": 0.6636412739753723, "learning_rate": 3.933333333333333e-05, "loss": 1.6951, "step": 118 }, { "epoch": 0.006632852126414358, "grad_norm": 0.7715172171592712, "learning_rate": 3.966666666666667e-05, "loss": 1.9907, "step": 119 }, { "epoch": 0.006688590379577504, "grad_norm": 0.6481485366821289, "learning_rate": 4e-05, "loss": 1.7934, "step": 120 }, { "epoch": 0.00674432863274065, "grad_norm": 0.6104344725608826, "learning_rate": 4.0333333333333336e-05, "loss": 1.6549, "step": 121 }, { "epoch": 0.006800066885903796, "grad_norm": 0.706912100315094, "learning_rate": 4.066666666666667e-05, "loss": 1.9666, "step": 122 }, { "epoch": 0.0068558051390669415, "grad_norm": 0.7835676670074463, "learning_rate": 4.1e-05, "loss": 2.024, "step": 123 }, { "epoch": 0.006911543392230088, "grad_norm": 0.6462398171424866, "learning_rate": 4.133333333333333e-05, "loss": 1.6993, "step": 124 }, { "epoch": 0.006967281645393233, "grad_norm": 0.7756698727607727, "learning_rate": 4.166666666666667e-05, "loss": 2.0135, "step": 125 }, { "epoch": 0.00702301989855638, "grad_norm": 0.6666940450668335, "learning_rate": 4.2e-05, "loss": 1.9444, "step": 126 }, { "epoch": 0.007078758151719525, "grad_norm": 0.6363375782966614, "learning_rate": 4.233333333333334e-05, "loss": 1.6977, "step": 127 }, { "epoch": 0.007134496404882671, "grad_norm": 0.6881687045097351, "learning_rate": 4.266666666666667e-05, "loss": 1.7938, "step": 128 }, { "epoch": 0.007190234658045817, "grad_norm": 0.7950214147567749, "learning_rate": 4.3e-05, "loss": 2.1036, "step": 129 }, { "epoch": 0.007245972911208962, "grad_norm": 0.6743674874305725, "learning_rate": 4.3333333333333334e-05, "loss": 2.0052, "step": 130 }, { "epoch": 0.007301711164372109, "grad_norm": 0.7302188277244568, "learning_rate": 4.3666666666666666e-05, "loss": 1.7815, "step": 131 }, { "epoch": 0.007357449417535254, "grad_norm": 0.691747784614563, "learning_rate": 4.4000000000000006e-05, "loss": 1.7225, "step": 132 }, { "epoch": 0.0074131876706984, "grad_norm": 0.6021103262901306, "learning_rate": 4.433333333333334e-05, "loss": 1.5821, "step": 133 }, { "epoch": 0.007468925923861546, "grad_norm": 0.7083866000175476, "learning_rate": 4.466666666666667e-05, "loss": 1.7831, "step": 134 }, { "epoch": 0.007524664177024692, "grad_norm": 0.6396238207817078, "learning_rate": 4.5e-05, "loss": 1.7933, "step": 135 }, { "epoch": 0.007580402430187838, "grad_norm": 0.6446027159690857, "learning_rate": 4.5333333333333335e-05, "loss": 1.697, "step": 136 }, { "epoch": 0.007636140683350984, "grad_norm": 0.6570568084716797, "learning_rate": 4.566666666666667e-05, "loss": 1.8226, "step": 137 }, { "epoch": 0.007691878936514129, "grad_norm": 0.7829813361167908, "learning_rate": 4.600000000000001e-05, "loss": 1.9071, "step": 138 }, { "epoch": 0.007747617189677276, "grad_norm": 0.6894962787628174, "learning_rate": 4.633333333333333e-05, "loss": 1.8796, "step": 139 }, { "epoch": 0.007803355442840421, "grad_norm": 0.6631702184677124, "learning_rate": 4.666666666666667e-05, "loss": 1.7765, "step": 140 }, { "epoch": 0.007859093696003567, "grad_norm": 0.7325467467308044, "learning_rate": 4.7e-05, "loss": 1.9653, "step": 141 }, { "epoch": 0.007914831949166713, "grad_norm": 0.7264820337295532, "learning_rate": 4.7333333333333336e-05, "loss": 1.9019, "step": 142 }, { "epoch": 0.00797057020232986, "grad_norm": 0.6573049426078796, "learning_rate": 4.766666666666667e-05, "loss": 1.8028, "step": 143 }, { "epoch": 0.008026308455493006, "grad_norm": 0.6475189328193665, "learning_rate": 4.8e-05, "loss": 1.8229, "step": 144 }, { "epoch": 0.00808204670865615, "grad_norm": 0.6277217864990234, "learning_rate": 4.8333333333333334e-05, "loss": 1.8648, "step": 145 }, { "epoch": 0.008137784961819296, "grad_norm": 0.6631461381912231, "learning_rate": 4.866666666666667e-05, "loss": 1.7499, "step": 146 }, { "epoch": 0.008193523214982443, "grad_norm": 0.8212792873382568, "learning_rate": 4.9e-05, "loss": 1.9345, "step": 147 }, { "epoch": 0.008249261468145589, "grad_norm": 0.6783550977706909, "learning_rate": 4.933333333333334e-05, "loss": 2.0028, "step": 148 }, { "epoch": 0.008304999721308734, "grad_norm": 0.7066723704338074, "learning_rate": 4.966666666666667e-05, "loss": 2.0291, "step": 149 }, { "epoch": 0.00836073797447188, "grad_norm": 0.772089958190918, "learning_rate": 5e-05, "loss": 2.0909, "step": 150 }, { "epoch": 0.008416476227635026, "grad_norm": 0.6396070718765259, "learning_rate": 5.0333333333333335e-05, "loss": 1.75, "step": 151 }, { "epoch": 0.008472214480798173, "grad_norm": 0.6549371480941772, "learning_rate": 5.0666666666666674e-05, "loss": 1.8499, "step": 152 }, { "epoch": 0.008527952733961317, "grad_norm": 0.7041524648666382, "learning_rate": 5.1000000000000006e-05, "loss": 1.9604, "step": 153 }, { "epoch": 0.008583690987124463, "grad_norm": 0.6144838929176331, "learning_rate": 5.133333333333333e-05, "loss": 1.813, "step": 154 }, { "epoch": 0.00863942924028761, "grad_norm": 0.5433954000473022, "learning_rate": 5.166666666666667e-05, "loss": 1.7692, "step": 155 }, { "epoch": 0.008695167493450754, "grad_norm": 0.6341120600700378, "learning_rate": 5.2000000000000004e-05, "loss": 1.8756, "step": 156 }, { "epoch": 0.0087509057466139, "grad_norm": 0.6475428938865662, "learning_rate": 5.2333333333333336e-05, "loss": 2.0465, "step": 157 }, { "epoch": 0.008806643999777047, "grad_norm": 0.6457498669624329, "learning_rate": 5.266666666666666e-05, "loss": 1.9387, "step": 158 }, { "epoch": 0.008862382252940193, "grad_norm": 0.562533974647522, "learning_rate": 5.300000000000001e-05, "loss": 1.7746, "step": 159 }, { "epoch": 0.008918120506103338, "grad_norm": 0.6415228247642517, "learning_rate": 5.333333333333333e-05, "loss": 1.7729, "step": 160 }, { "epoch": 0.008973858759266484, "grad_norm": 0.6404130458831787, "learning_rate": 5.3666666666666666e-05, "loss": 1.7488, "step": 161 }, { "epoch": 0.00902959701242963, "grad_norm": 0.6626627445220947, "learning_rate": 5.4000000000000005e-05, "loss": 1.8962, "step": 162 }, { "epoch": 0.009085335265592777, "grad_norm": 0.6191387176513672, "learning_rate": 5.433333333333334e-05, "loss": 1.8141, "step": 163 }, { "epoch": 0.009141073518755921, "grad_norm": 0.5454838871955872, "learning_rate": 5.466666666666666e-05, "loss": 1.5107, "step": 164 }, { "epoch": 0.009196811771919068, "grad_norm": 0.6767019033432007, "learning_rate": 5.500000000000001e-05, "loss": 2.1324, "step": 165 }, { "epoch": 0.009252550025082214, "grad_norm": 0.6267591714859009, "learning_rate": 5.5333333333333334e-05, "loss": 1.7378, "step": 166 }, { "epoch": 0.00930828827824536, "grad_norm": 0.5743867754936218, "learning_rate": 5.566666666666667e-05, "loss": 1.7654, "step": 167 }, { "epoch": 0.009364026531408505, "grad_norm": 0.5550642013549805, "learning_rate": 5.6000000000000006e-05, "loss": 1.8091, "step": 168 }, { "epoch": 0.009419764784571651, "grad_norm": 0.5943305492401123, "learning_rate": 5.633333333333334e-05, "loss": 1.6823, "step": 169 }, { "epoch": 0.009475503037734798, "grad_norm": 0.6027736663818359, "learning_rate": 5.666666666666667e-05, "loss": 1.7736, "step": 170 }, { "epoch": 0.009531241290897944, "grad_norm": 0.6379444003105164, "learning_rate": 5.6999999999999996e-05, "loss": 2.0331, "step": 171 }, { "epoch": 0.009586979544061089, "grad_norm": 0.6117588877677917, "learning_rate": 5.7333333333333336e-05, "loss": 1.8546, "step": 172 }, { "epoch": 0.009642717797224235, "grad_norm": 0.6109329462051392, "learning_rate": 5.766666666666667e-05, "loss": 2.0427, "step": 173 }, { "epoch": 0.009698456050387381, "grad_norm": 0.5530399084091187, "learning_rate": 5.8e-05, "loss": 1.7323, "step": 174 }, { "epoch": 0.009754194303550527, "grad_norm": 0.7092908024787903, "learning_rate": 5.833333333333334e-05, "loss": 2.2, "step": 175 }, { "epoch": 0.009809932556713672, "grad_norm": 0.5897237658500671, "learning_rate": 5.866666666666667e-05, "loss": 1.5879, "step": 176 }, { "epoch": 0.009865670809876818, "grad_norm": 0.5485551357269287, "learning_rate": 5.9e-05, "loss": 1.6043, "step": 177 }, { "epoch": 0.009921409063039965, "grad_norm": 0.5792586803436279, "learning_rate": 5.9333333333333343e-05, "loss": 1.8772, "step": 178 }, { "epoch": 0.009977147316203111, "grad_norm": 0.6716285943984985, "learning_rate": 5.966666666666667e-05, "loss": 1.7887, "step": 179 }, { "epoch": 0.010032885569366256, "grad_norm": 0.5866957902908325, "learning_rate": 6e-05, "loss": 1.7228, "step": 180 }, { "epoch": 0.010088623822529402, "grad_norm": 0.6197178363800049, "learning_rate": 6.033333333333334e-05, "loss": 1.7767, "step": 181 }, { "epoch": 0.010144362075692548, "grad_norm": 0.6811436414718628, "learning_rate": 6.066666666666667e-05, "loss": 2.002, "step": 182 }, { "epoch": 0.010200100328855693, "grad_norm": 0.6519239544868469, "learning_rate": 6.1e-05, "loss": 1.7755, "step": 183 }, { "epoch": 0.01025583858201884, "grad_norm": 0.5758973360061646, "learning_rate": 6.133333333333334e-05, "loss": 1.7244, "step": 184 }, { "epoch": 0.010311576835181985, "grad_norm": 0.5882923007011414, "learning_rate": 6.166666666666667e-05, "loss": 1.8041, "step": 185 }, { "epoch": 0.010367315088345132, "grad_norm": 0.5509873032569885, "learning_rate": 6.2e-05, "loss": 1.7813, "step": 186 }, { "epoch": 0.010423053341508276, "grad_norm": 0.5870537757873535, "learning_rate": 6.233333333333334e-05, "loss": 1.9419, "step": 187 }, { "epoch": 0.010478791594671423, "grad_norm": 0.5315700173377991, "learning_rate": 6.266666666666667e-05, "loss": 1.6804, "step": 188 }, { "epoch": 0.010534529847834569, "grad_norm": 0.5694735646247864, "learning_rate": 6.3e-05, "loss": 1.8406, "step": 189 }, { "epoch": 0.010590268100997715, "grad_norm": 0.5579227209091187, "learning_rate": 6.333333333333333e-05, "loss": 1.9451, "step": 190 }, { "epoch": 0.01064600635416086, "grad_norm": 0.5777730941772461, "learning_rate": 6.366666666666668e-05, "loss": 1.7783, "step": 191 }, { "epoch": 0.010701744607324006, "grad_norm": 0.5626804828643799, "learning_rate": 6.400000000000001e-05, "loss": 1.8944, "step": 192 }, { "epoch": 0.010757482860487153, "grad_norm": 0.5726325511932373, "learning_rate": 6.433333333333333e-05, "loss": 1.8799, "step": 193 }, { "epoch": 0.010813221113650299, "grad_norm": 0.6156812906265259, "learning_rate": 6.466666666666666e-05, "loss": 1.8651, "step": 194 }, { "epoch": 0.010868959366813443, "grad_norm": 0.545893669128418, "learning_rate": 6.500000000000001e-05, "loss": 1.6938, "step": 195 }, { "epoch": 0.01092469761997659, "grad_norm": 0.5374442934989929, "learning_rate": 6.533333333333334e-05, "loss": 1.756, "step": 196 }, { "epoch": 0.010980435873139736, "grad_norm": 0.5943235754966736, "learning_rate": 6.566666666666666e-05, "loss": 1.8388, "step": 197 }, { "epoch": 0.011036174126302882, "grad_norm": 0.7199476361274719, "learning_rate": 6.6e-05, "loss": 2.0311, "step": 198 }, { "epoch": 0.011091912379466027, "grad_norm": 0.65143883228302, "learning_rate": 6.633333333333334e-05, "loss": 2.0285, "step": 199 }, { "epoch": 0.011147650632629173, "grad_norm": 0.5984755754470825, "learning_rate": 6.666666666666667e-05, "loss": 1.7062, "step": 200 }, { "epoch": 0.01120338888579232, "grad_norm": 0.5733404755592346, "learning_rate": 6.7e-05, "loss": 1.916, "step": 201 }, { "epoch": 0.011259127138955466, "grad_norm": 0.5946204662322998, "learning_rate": 6.733333333333333e-05, "loss": 1.9394, "step": 202 }, { "epoch": 0.01131486539211861, "grad_norm": 0.677741527557373, "learning_rate": 6.766666666666667e-05, "loss": 2.248, "step": 203 }, { "epoch": 0.011370603645281757, "grad_norm": 0.5983121991157532, "learning_rate": 6.800000000000001e-05, "loss": 1.835, "step": 204 }, { "epoch": 0.011426341898444903, "grad_norm": 0.5219351053237915, "learning_rate": 6.833333333333333e-05, "loss": 1.7373, "step": 205 }, { "epoch": 0.01148208015160805, "grad_norm": 0.657131552696228, "learning_rate": 6.866666666666666e-05, "loss": 2.1801, "step": 206 }, { "epoch": 0.011537818404771194, "grad_norm": 0.6068251132965088, "learning_rate": 6.9e-05, "loss": 1.7873, "step": 207 }, { "epoch": 0.01159355665793434, "grad_norm": 0.5744972825050354, "learning_rate": 6.933333333333334e-05, "loss": 1.9491, "step": 208 }, { "epoch": 0.011649294911097487, "grad_norm": 0.5395380854606628, "learning_rate": 6.966666666666668e-05, "loss": 1.7532, "step": 209 }, { "epoch": 0.011705033164260631, "grad_norm": 0.5843316912651062, "learning_rate": 7e-05, "loss": 1.7694, "step": 210 }, { "epoch": 0.011760771417423778, "grad_norm": 0.6699615716934204, "learning_rate": 7.033333333333334e-05, "loss": 2.2063, "step": 211 }, { "epoch": 0.011816509670586924, "grad_norm": 0.5723788738250732, "learning_rate": 7.066666666666667e-05, "loss": 1.8842, "step": 212 }, { "epoch": 0.01187224792375007, "grad_norm": 0.5478008985519409, "learning_rate": 7.1e-05, "loss": 1.7411, "step": 213 }, { "epoch": 0.011927986176913215, "grad_norm": 0.567477285861969, "learning_rate": 7.133333333333334e-05, "loss": 1.8457, "step": 214 }, { "epoch": 0.011983724430076361, "grad_norm": 0.5568417310714722, "learning_rate": 7.166666666666667e-05, "loss": 1.8425, "step": 215 }, { "epoch": 0.012039462683239507, "grad_norm": 0.552416205406189, "learning_rate": 7.2e-05, "loss": 1.9535, "step": 216 }, { "epoch": 0.012095200936402654, "grad_norm": 0.6089819073677063, "learning_rate": 7.233333333333335e-05, "loss": 1.8465, "step": 217 }, { "epoch": 0.012150939189565798, "grad_norm": 0.6218812465667725, "learning_rate": 7.266666666666667e-05, "loss": 2.1711, "step": 218 }, { "epoch": 0.012206677442728945, "grad_norm": 0.5704020261764526, "learning_rate": 7.3e-05, "loss": 1.7793, "step": 219 }, { "epoch": 0.012262415695892091, "grad_norm": 0.5598061084747314, "learning_rate": 7.333333333333333e-05, "loss": 1.9454, "step": 220 }, { "epoch": 0.012318153949055237, "grad_norm": 0.5439260601997375, "learning_rate": 7.366666666666668e-05, "loss": 1.8544, "step": 221 }, { "epoch": 0.012373892202218382, "grad_norm": 0.5953371524810791, "learning_rate": 7.4e-05, "loss": 1.8335, "step": 222 }, { "epoch": 0.012429630455381528, "grad_norm": 0.5699326395988464, "learning_rate": 7.433333333333333e-05, "loss": 1.6647, "step": 223 }, { "epoch": 0.012485368708544674, "grad_norm": 0.5833302140235901, "learning_rate": 7.466666666666667e-05, "loss": 1.9092, "step": 224 }, { "epoch": 0.01254110696170782, "grad_norm": 0.5663686394691467, "learning_rate": 7.500000000000001e-05, "loss": 1.7344, "step": 225 }, { "epoch": 0.012596845214870965, "grad_norm": 0.5459832549095154, "learning_rate": 7.533333333333334e-05, "loss": 1.6805, "step": 226 }, { "epoch": 0.012652583468034112, "grad_norm": 0.6193357110023499, "learning_rate": 7.566666666666667e-05, "loss": 1.6711, "step": 227 }, { "epoch": 0.012708321721197258, "grad_norm": 0.6414167284965515, "learning_rate": 7.6e-05, "loss": 1.9194, "step": 228 }, { "epoch": 0.012764059974360404, "grad_norm": 0.541812539100647, "learning_rate": 7.633333333333334e-05, "loss": 1.9374, "step": 229 }, { "epoch": 0.012819798227523549, "grad_norm": 0.5368767976760864, "learning_rate": 7.666666666666667e-05, "loss": 1.605, "step": 230 }, { "epoch": 0.012875536480686695, "grad_norm": 0.622112512588501, "learning_rate": 7.7e-05, "loss": 1.804, "step": 231 }, { "epoch": 0.012931274733849842, "grad_norm": 0.5820221900939941, "learning_rate": 7.733333333333333e-05, "loss": 1.796, "step": 232 }, { "epoch": 0.012987012987012988, "grad_norm": 0.5530866980552673, "learning_rate": 7.766666666666667e-05, "loss": 1.704, "step": 233 }, { "epoch": 0.013042751240176132, "grad_norm": 0.5967001914978027, "learning_rate": 7.800000000000001e-05, "loss": 2.0598, "step": 234 }, { "epoch": 0.013098489493339279, "grad_norm": 0.5761673450469971, "learning_rate": 7.833333333333333e-05, "loss": 1.9391, "step": 235 }, { "epoch": 0.013154227746502425, "grad_norm": 0.582139253616333, "learning_rate": 7.866666666666666e-05, "loss": 1.851, "step": 236 }, { "epoch": 0.01320996599966557, "grad_norm": 0.6047868132591248, "learning_rate": 7.900000000000001e-05, "loss": 1.9757, "step": 237 }, { "epoch": 0.013265704252828716, "grad_norm": 0.6394466757774353, "learning_rate": 7.933333333333334e-05, "loss": 2.2063, "step": 238 }, { "epoch": 0.013321442505991862, "grad_norm": 0.6129965782165527, "learning_rate": 7.966666666666666e-05, "loss": 1.8813, "step": 239 }, { "epoch": 0.013377180759155009, "grad_norm": 0.5982023477554321, "learning_rate": 8e-05, "loss": 1.928, "step": 240 }, { "epoch": 0.013432919012318153, "grad_norm": 0.515180230140686, "learning_rate": 8.033333333333334e-05, "loss": 1.5582, "step": 241 }, { "epoch": 0.0134886572654813, "grad_norm": 0.669916033744812, "learning_rate": 8.066666666666667e-05, "loss": 2.1044, "step": 242 }, { "epoch": 0.013544395518644446, "grad_norm": 0.5825132131576538, "learning_rate": 8.1e-05, "loss": 1.7521, "step": 243 }, { "epoch": 0.013600133771807592, "grad_norm": 0.6118985414505005, "learning_rate": 8.133333333333334e-05, "loss": 1.9605, "step": 244 }, { "epoch": 0.013655872024970737, "grad_norm": 0.5747547745704651, "learning_rate": 8.166666666666667e-05, "loss": 1.8198, "step": 245 }, { "epoch": 0.013711610278133883, "grad_norm": 0.609553337097168, "learning_rate": 8.2e-05, "loss": 2.0001, "step": 246 }, { "epoch": 0.01376734853129703, "grad_norm": 0.5751491189002991, "learning_rate": 8.233333333333333e-05, "loss": 1.9317, "step": 247 }, { "epoch": 0.013823086784460176, "grad_norm": 0.599029541015625, "learning_rate": 8.266666666666667e-05, "loss": 1.7716, "step": 248 }, { "epoch": 0.01387882503762332, "grad_norm": 0.5347121953964233, "learning_rate": 8.3e-05, "loss": 1.82, "step": 249 }, { "epoch": 0.013934563290786467, "grad_norm": 0.5724605917930603, "learning_rate": 8.333333333333334e-05, "loss": 1.8309, "step": 250 }, { "epoch": 0.013990301543949613, "grad_norm": 0.531136691570282, "learning_rate": 8.366666666666668e-05, "loss": 1.682, "step": 251 }, { "epoch": 0.01404603979711276, "grad_norm": 0.5464481115341187, "learning_rate": 8.4e-05, "loss": 2.001, "step": 252 }, { "epoch": 0.014101778050275904, "grad_norm": 0.5945254564285278, "learning_rate": 8.433333333333334e-05, "loss": 1.7766, "step": 253 }, { "epoch": 0.01415751630343905, "grad_norm": 0.5452976226806641, "learning_rate": 8.466666666666667e-05, "loss": 1.6948, "step": 254 }, { "epoch": 0.014213254556602196, "grad_norm": 0.5722144842147827, "learning_rate": 8.5e-05, "loss": 1.8978, "step": 255 }, { "epoch": 0.014268992809765343, "grad_norm": 0.5629029870033264, "learning_rate": 8.533333333333334e-05, "loss": 1.7381, "step": 256 }, { "epoch": 0.014324731062928487, "grad_norm": 0.584661066532135, "learning_rate": 8.566666666666667e-05, "loss": 1.7016, "step": 257 }, { "epoch": 0.014380469316091634, "grad_norm": 0.544104814529419, "learning_rate": 8.6e-05, "loss": 1.8649, "step": 258 }, { "epoch": 0.01443620756925478, "grad_norm": 0.5734279751777649, "learning_rate": 8.633333333333334e-05, "loss": 1.7844, "step": 259 }, { "epoch": 0.014491945822417925, "grad_norm": 0.5523878335952759, "learning_rate": 8.666666666666667e-05, "loss": 2.0572, "step": 260 }, { "epoch": 0.014547684075581071, "grad_norm": 0.5634390115737915, "learning_rate": 8.7e-05, "loss": 1.8073, "step": 261 }, { "epoch": 0.014603422328744217, "grad_norm": 0.5875604152679443, "learning_rate": 8.733333333333333e-05, "loss": 1.9706, "step": 262 }, { "epoch": 0.014659160581907364, "grad_norm": 0.534288227558136, "learning_rate": 8.766666666666668e-05, "loss": 1.7742, "step": 263 }, { "epoch": 0.014714898835070508, "grad_norm": 0.5286023020744324, "learning_rate": 8.800000000000001e-05, "loss": 1.6763, "step": 264 }, { "epoch": 0.014770637088233654, "grad_norm": 0.5768111944198608, "learning_rate": 8.833333333333333e-05, "loss": 1.5731, "step": 265 }, { "epoch": 0.0148263753413968, "grad_norm": 0.552629292011261, "learning_rate": 8.866666666666668e-05, "loss": 1.9837, "step": 266 }, { "epoch": 0.014882113594559947, "grad_norm": 0.5081507563591003, "learning_rate": 8.900000000000001e-05, "loss": 1.8844, "step": 267 }, { "epoch": 0.014937851847723092, "grad_norm": 0.563845694065094, "learning_rate": 8.933333333333334e-05, "loss": 1.9141, "step": 268 }, { "epoch": 0.014993590100886238, "grad_norm": 0.5855246186256409, "learning_rate": 8.966666666666666e-05, "loss": 2.1101, "step": 269 }, { "epoch": 0.015049328354049384, "grad_norm": 0.5010532736778259, "learning_rate": 9e-05, "loss": 1.8388, "step": 270 }, { "epoch": 0.01510506660721253, "grad_norm": 0.5565475225448608, "learning_rate": 9.033333333333334e-05, "loss": 1.8648, "step": 271 }, { "epoch": 0.015160804860375675, "grad_norm": 0.5293692350387573, "learning_rate": 9.066666666666667e-05, "loss": 1.7059, "step": 272 }, { "epoch": 0.015216543113538821, "grad_norm": 0.5180760025978088, "learning_rate": 9.1e-05, "loss": 1.8659, "step": 273 }, { "epoch": 0.015272281366701968, "grad_norm": 0.5416427254676819, "learning_rate": 9.133333333333334e-05, "loss": 1.6187, "step": 274 }, { "epoch": 0.015328019619865114, "grad_norm": 0.603060781955719, "learning_rate": 9.166666666666667e-05, "loss": 1.8554, "step": 275 }, { "epoch": 0.015383757873028259, "grad_norm": 0.5260182023048401, "learning_rate": 9.200000000000001e-05, "loss": 1.8108, "step": 276 }, { "epoch": 0.015439496126191405, "grad_norm": 0.5307485461235046, "learning_rate": 9.233333333333333e-05, "loss": 1.7369, "step": 277 }, { "epoch": 0.015495234379354551, "grad_norm": 0.5671928524971008, "learning_rate": 9.266666666666666e-05, "loss": 1.7879, "step": 278 }, { "epoch": 0.015550972632517698, "grad_norm": 0.5482888221740723, "learning_rate": 9.300000000000001e-05, "loss": 1.8687, "step": 279 }, { "epoch": 0.015606710885680842, "grad_norm": 0.5492271184921265, "learning_rate": 9.333333333333334e-05, "loss": 2.0486, "step": 280 }, { "epoch": 0.01566244913884399, "grad_norm": 0.5533493757247925, "learning_rate": 9.366666666666668e-05, "loss": 1.8764, "step": 281 }, { "epoch": 0.015718187392007133, "grad_norm": 0.5373388528823853, "learning_rate": 9.4e-05, "loss": 1.8098, "step": 282 }, { "epoch": 0.01577392564517028, "grad_norm": 0.5737355351448059, "learning_rate": 9.433333333333334e-05, "loss": 1.8023, "step": 283 }, { "epoch": 0.015829663898333426, "grad_norm": 0.6059421896934509, "learning_rate": 9.466666666666667e-05, "loss": 1.9003, "step": 284 }, { "epoch": 0.015885402151496572, "grad_norm": 0.545070230960846, "learning_rate": 9.5e-05, "loss": 1.6793, "step": 285 }, { "epoch": 0.01594114040465972, "grad_norm": 0.5391154885292053, "learning_rate": 9.533333333333334e-05, "loss": 1.7691, "step": 286 }, { "epoch": 0.015996878657822865, "grad_norm": 0.5233768820762634, "learning_rate": 9.566666666666667e-05, "loss": 1.8312, "step": 287 }, { "epoch": 0.01605261691098601, "grad_norm": 0.5520955920219421, "learning_rate": 9.6e-05, "loss": 1.9652, "step": 288 }, { "epoch": 0.016108355164149154, "grad_norm": 0.5521306991577148, "learning_rate": 9.633333333333335e-05, "loss": 1.8264, "step": 289 }, { "epoch": 0.0161640934173123, "grad_norm": 0.5325077176094055, "learning_rate": 9.666666666666667e-05, "loss": 1.9074, "step": 290 }, { "epoch": 0.016219831670475447, "grad_norm": 0.5402048230171204, "learning_rate": 9.7e-05, "loss": 1.9993, "step": 291 }, { "epoch": 0.016275569923638593, "grad_norm": 0.5164310336112976, "learning_rate": 9.733333333333335e-05, "loss": 1.6385, "step": 292 }, { "epoch": 0.01633130817680174, "grad_norm": 0.5265329480171204, "learning_rate": 9.766666666666668e-05, "loss": 1.8513, "step": 293 }, { "epoch": 0.016387046429964885, "grad_norm": 0.5051769614219666, "learning_rate": 9.8e-05, "loss": 1.7628, "step": 294 }, { "epoch": 0.016442784683128032, "grad_norm": 0.5061401128768921, "learning_rate": 9.833333333333333e-05, "loss": 1.8406, "step": 295 }, { "epoch": 0.016498522936291178, "grad_norm": 0.6622328162193298, "learning_rate": 9.866666666666668e-05, "loss": 1.9504, "step": 296 }, { "epoch": 0.01655426118945432, "grad_norm": 0.5525157451629639, "learning_rate": 9.900000000000001e-05, "loss": 1.9845, "step": 297 }, { "epoch": 0.016609999442617467, "grad_norm": 0.5412437319755554, "learning_rate": 9.933333333333334e-05, "loss": 1.8234, "step": 298 }, { "epoch": 0.016665737695780614, "grad_norm": 0.53217613697052, "learning_rate": 9.966666666666667e-05, "loss": 1.6132, "step": 299 }, { "epoch": 0.01672147594894376, "grad_norm": 0.6531130075454712, "learning_rate": 0.0001, "loss": 2.0395, "step": 300 }, { "epoch": 0.016777214202106906, "grad_norm": 0.49301308393478394, "learning_rate": 9.999999920714576e-05, "loss": 1.6945, "step": 301 }, { "epoch": 0.016832952455270053, "grad_norm": 0.49394482374191284, "learning_rate": 9.999999682858307e-05, "loss": 1.6877, "step": 302 }, { "epoch": 0.0168886907084332, "grad_norm": 0.504688024520874, "learning_rate": 9.9999992864312e-05, "loss": 1.6779, "step": 303 }, { "epoch": 0.016944428961596345, "grad_norm": 0.5286409258842468, "learning_rate": 9.999998731433267e-05, "loss": 1.64, "step": 304 }, { "epoch": 0.017000167214759488, "grad_norm": 0.4911554157733917, "learning_rate": 9.999998017864527e-05, "loss": 1.66, "step": 305 }, { "epoch": 0.017055905467922634, "grad_norm": 0.4851885735988617, "learning_rate": 9.999997145725001e-05, "loss": 1.8884, "step": 306 }, { "epoch": 0.01711164372108578, "grad_norm": 0.521120011806488, "learning_rate": 9.999996115014719e-05, "loss": 1.6844, "step": 307 }, { "epoch": 0.017167381974248927, "grad_norm": 0.5494885444641113, "learning_rate": 9.99999492573371e-05, "loss": 1.7733, "step": 308 }, { "epoch": 0.017223120227412073, "grad_norm": 0.4475904703140259, "learning_rate": 9.999993577882016e-05, "loss": 1.6295, "step": 309 }, { "epoch": 0.01727885848057522, "grad_norm": 0.4610547721385956, "learning_rate": 9.999992071459676e-05, "loss": 1.6118, "step": 310 }, { "epoch": 0.017334596733738366, "grad_norm": 0.49445369839668274, "learning_rate": 9.999990406466741e-05, "loss": 1.594, "step": 311 }, { "epoch": 0.01739033498690151, "grad_norm": 0.5013507008552551, "learning_rate": 9.999988582903262e-05, "loss": 1.6829, "step": 312 }, { "epoch": 0.017446073240064655, "grad_norm": 0.5492314100265503, "learning_rate": 9.999986600769295e-05, "loss": 1.662, "step": 313 }, { "epoch": 0.0175018114932278, "grad_norm": 0.49456071853637695, "learning_rate": 9.999984460064908e-05, "loss": 1.7087, "step": 314 }, { "epoch": 0.017557549746390948, "grad_norm": 0.587954580783844, "learning_rate": 9.999982160790164e-05, "loss": 1.8628, "step": 315 }, { "epoch": 0.017613287999554094, "grad_norm": 0.6061418652534485, "learning_rate": 9.999979702945138e-05, "loss": 2.143, "step": 316 }, { "epoch": 0.01766902625271724, "grad_norm": 0.52556973695755, "learning_rate": 9.999977086529909e-05, "loss": 1.6862, "step": 317 }, { "epoch": 0.017724764505880387, "grad_norm": 0.5804201364517212, "learning_rate": 9.999974311544556e-05, "loss": 1.8495, "step": 318 }, { "epoch": 0.017780502759043533, "grad_norm": 0.5533789396286011, "learning_rate": 9.999971377989172e-05, "loss": 1.9501, "step": 319 }, { "epoch": 0.017836241012206676, "grad_norm": 0.5596528649330139, "learning_rate": 9.999968285863848e-05, "loss": 1.981, "step": 320 }, { "epoch": 0.017891979265369822, "grad_norm": 0.538735568523407, "learning_rate": 9.99996503516868e-05, "loss": 1.9126, "step": 321 }, { "epoch": 0.01794771751853297, "grad_norm": 0.48604801297187805, "learning_rate": 9.999961625903774e-05, "loss": 1.7568, "step": 322 }, { "epoch": 0.018003455771696115, "grad_norm": 0.5091099143028259, "learning_rate": 9.999958058069237e-05, "loss": 1.9625, "step": 323 }, { "epoch": 0.01805919402485926, "grad_norm": 0.4944256842136383, "learning_rate": 9.999954331665182e-05, "loss": 1.6326, "step": 324 }, { "epoch": 0.018114932278022407, "grad_norm": 0.5379263162612915, "learning_rate": 9.999950446691728e-05, "loss": 1.8484, "step": 325 }, { "epoch": 0.018170670531185554, "grad_norm": 0.5548909306526184, "learning_rate": 9.999946403148997e-05, "loss": 1.8855, "step": 326 }, { "epoch": 0.0182264087843487, "grad_norm": 0.5878908634185791, "learning_rate": 9.999942201037118e-05, "loss": 1.8222, "step": 327 }, { "epoch": 0.018282147037511843, "grad_norm": 0.48953092098236084, "learning_rate": 9.999937840356224e-05, "loss": 1.4395, "step": 328 }, { "epoch": 0.01833788529067499, "grad_norm": 0.503923237323761, "learning_rate": 9.999933321106452e-05, "loss": 1.7122, "step": 329 }, { "epoch": 0.018393623543838136, "grad_norm": 0.5150753855705261, "learning_rate": 9.999928643287948e-05, "loss": 1.8863, "step": 330 }, { "epoch": 0.018449361797001282, "grad_norm": 0.5160688757896423, "learning_rate": 9.999923806900859e-05, "loss": 1.8184, "step": 331 }, { "epoch": 0.018505100050164428, "grad_norm": 0.5423057079315186, "learning_rate": 9.99991881194534e-05, "loss": 1.843, "step": 332 }, { "epoch": 0.018560838303327575, "grad_norm": 0.5026907324790955, "learning_rate": 9.999913658421544e-05, "loss": 1.7728, "step": 333 }, { "epoch": 0.01861657655649072, "grad_norm": 0.5391967296600342, "learning_rate": 9.999908346329642e-05, "loss": 1.9225, "step": 334 }, { "epoch": 0.018672314809653867, "grad_norm": 0.5050860047340393, "learning_rate": 9.999902875669797e-05, "loss": 1.7579, "step": 335 }, { "epoch": 0.01872805306281701, "grad_norm": 0.48109737038612366, "learning_rate": 9.999897246442184e-05, "loss": 1.8859, "step": 336 }, { "epoch": 0.018783791315980156, "grad_norm": 0.5002635717391968, "learning_rate": 9.999891458646983e-05, "loss": 1.6809, "step": 337 }, { "epoch": 0.018839529569143303, "grad_norm": 0.5138371586799622, "learning_rate": 9.999885512284375e-05, "loss": 1.7961, "step": 338 }, { "epoch": 0.01889526782230645, "grad_norm": 0.47246232628822327, "learning_rate": 9.999879407354551e-05, "loss": 1.6943, "step": 339 }, { "epoch": 0.018951006075469595, "grad_norm": 0.47807106375694275, "learning_rate": 9.999873143857704e-05, "loss": 1.7652, "step": 340 }, { "epoch": 0.01900674432863274, "grad_norm": 0.4725436270236969, "learning_rate": 9.99986672179403e-05, "loss": 1.7483, "step": 341 }, { "epoch": 0.019062482581795888, "grad_norm": 0.5131480693817139, "learning_rate": 9.999860141163736e-05, "loss": 1.8883, "step": 342 }, { "epoch": 0.01911822083495903, "grad_norm": 0.6150394678115845, "learning_rate": 9.99985340196703e-05, "loss": 2.1536, "step": 343 }, { "epoch": 0.019173959088122177, "grad_norm": 0.5729528069496155, "learning_rate": 9.999846504204124e-05, "loss": 1.9443, "step": 344 }, { "epoch": 0.019229697341285323, "grad_norm": 0.4936676323413849, "learning_rate": 9.999839447875238e-05, "loss": 1.7273, "step": 345 }, { "epoch": 0.01928543559444847, "grad_norm": 0.5480337738990784, "learning_rate": 9.999832232980597e-05, "loss": 1.8024, "step": 346 }, { "epoch": 0.019341173847611616, "grad_norm": 0.4883441925048828, "learning_rate": 9.999824859520428e-05, "loss": 1.6531, "step": 347 }, { "epoch": 0.019396912100774762, "grad_norm": 0.6438686847686768, "learning_rate": 9.999817327494967e-05, "loss": 2.1477, "step": 348 }, { "epoch": 0.01945265035393791, "grad_norm": 0.540684700012207, "learning_rate": 9.999809636904449e-05, "loss": 2.0333, "step": 349 }, { "epoch": 0.019508388607101055, "grad_norm": 0.5322266221046448, "learning_rate": 9.999801787749121e-05, "loss": 1.7542, "step": 350 }, { "epoch": 0.019564126860264198, "grad_norm": 0.5497377514839172, "learning_rate": 9.999793780029232e-05, "loss": 1.9207, "step": 351 }, { "epoch": 0.019619865113427344, "grad_norm": 0.5375553369522095, "learning_rate": 9.999785613745035e-05, "loss": 1.8293, "step": 352 }, { "epoch": 0.01967560336659049, "grad_norm": 0.5242462754249573, "learning_rate": 9.999777288896787e-05, "loss": 1.8176, "step": 353 }, { "epoch": 0.019731341619753637, "grad_norm": 0.5194500088691711, "learning_rate": 9.999768805484757e-05, "loss": 1.961, "step": 354 }, { "epoch": 0.019787079872916783, "grad_norm": 0.4952162504196167, "learning_rate": 9.999760163509209e-05, "loss": 1.6902, "step": 355 }, { "epoch": 0.01984281812607993, "grad_norm": 0.4688204824924469, "learning_rate": 9.99975136297042e-05, "loss": 1.352, "step": 356 }, { "epoch": 0.019898556379243076, "grad_norm": 0.5171904563903809, "learning_rate": 9.999742403868668e-05, "loss": 1.952, "step": 357 }, { "epoch": 0.019954294632406222, "grad_norm": 0.542300283908844, "learning_rate": 9.999733286204238e-05, "loss": 1.8768, "step": 358 }, { "epoch": 0.020010032885569365, "grad_norm": 0.5278236865997314, "learning_rate": 9.99972400997742e-05, "loss": 1.8014, "step": 359 }, { "epoch": 0.02006577113873251, "grad_norm": 0.587790846824646, "learning_rate": 9.999714575188505e-05, "loss": 1.9884, "step": 360 }, { "epoch": 0.020121509391895658, "grad_norm": 0.5114203095436096, "learning_rate": 9.999704981837794e-05, "loss": 1.9038, "step": 361 }, { "epoch": 0.020177247645058804, "grad_norm": 0.538783609867096, "learning_rate": 9.999695229925591e-05, "loss": 1.9049, "step": 362 }, { "epoch": 0.02023298589822195, "grad_norm": 0.5289005637168884, "learning_rate": 9.999685319452208e-05, "loss": 1.7111, "step": 363 }, { "epoch": 0.020288724151385096, "grad_norm": 0.5257157683372498, "learning_rate": 9.999675250417954e-05, "loss": 1.6416, "step": 364 }, { "epoch": 0.020344462404548243, "grad_norm": 0.480473130941391, "learning_rate": 9.999665022823152e-05, "loss": 1.7197, "step": 365 }, { "epoch": 0.020400200657711386, "grad_norm": 0.5564152598381042, "learning_rate": 9.999654636668125e-05, "loss": 1.8762, "step": 366 }, { "epoch": 0.020455938910874532, "grad_norm": 0.6517108082771301, "learning_rate": 9.999644091953204e-05, "loss": 2.4684, "step": 367 }, { "epoch": 0.02051167716403768, "grad_norm": 0.5357886552810669, "learning_rate": 9.999633388678723e-05, "loss": 1.8079, "step": 368 }, { "epoch": 0.020567415417200825, "grad_norm": 0.498740553855896, "learning_rate": 9.999622526845021e-05, "loss": 1.6885, "step": 369 }, { "epoch": 0.02062315367036397, "grad_norm": 0.49749207496643066, "learning_rate": 9.999611506452439e-05, "loss": 1.8686, "step": 370 }, { "epoch": 0.020678891923527117, "grad_norm": 0.5339593291282654, "learning_rate": 9.999600327501333e-05, "loss": 1.8592, "step": 371 }, { "epoch": 0.020734630176690264, "grad_norm": 0.5533782839775085, "learning_rate": 9.999588989992052e-05, "loss": 1.8752, "step": 372 }, { "epoch": 0.02079036842985341, "grad_norm": 0.459504634141922, "learning_rate": 9.99957749392496e-05, "loss": 1.7596, "step": 373 }, { "epoch": 0.020846106683016553, "grad_norm": 0.4722179174423218, "learning_rate": 9.999565839300419e-05, "loss": 1.7573, "step": 374 }, { "epoch": 0.0209018449361797, "grad_norm": 0.49677354097366333, "learning_rate": 9.999554026118798e-05, "loss": 1.9692, "step": 375 }, { "epoch": 0.020957583189342845, "grad_norm": 0.49444639682769775, "learning_rate": 9.999542054380473e-05, "loss": 1.8881, "step": 376 }, { "epoch": 0.02101332144250599, "grad_norm": 0.4882863461971283, "learning_rate": 9.999529924085824e-05, "loss": 1.8369, "step": 377 }, { "epoch": 0.021069059695669138, "grad_norm": 0.475211501121521, "learning_rate": 9.999517635235237e-05, "loss": 1.3352, "step": 378 }, { "epoch": 0.021124797948832284, "grad_norm": 0.5699715614318848, "learning_rate": 9.999505187829096e-05, "loss": 1.763, "step": 379 }, { "epoch": 0.02118053620199543, "grad_norm": 0.5538257360458374, "learning_rate": 9.9994925818678e-05, "loss": 1.7431, "step": 380 }, { "epoch": 0.021236274455158577, "grad_norm": 0.48163720965385437, "learning_rate": 9.99947981735175e-05, "loss": 1.7356, "step": 381 }, { "epoch": 0.02129201270832172, "grad_norm": 0.5482640266418457, "learning_rate": 9.99946689428135e-05, "loss": 1.861, "step": 382 }, { "epoch": 0.021347750961484866, "grad_norm": 0.5083199739456177, "learning_rate": 9.999453812657007e-05, "loss": 1.9594, "step": 383 }, { "epoch": 0.021403489214648012, "grad_norm": 0.513034999370575, "learning_rate": 9.99944057247914e-05, "loss": 2.0073, "step": 384 }, { "epoch": 0.02145922746781116, "grad_norm": 0.5045239329338074, "learning_rate": 9.999427173748164e-05, "loss": 1.6862, "step": 385 }, { "epoch": 0.021514965720974305, "grad_norm": 0.5097934603691101, "learning_rate": 9.999413616464508e-05, "loss": 1.8631, "step": 386 }, { "epoch": 0.02157070397413745, "grad_norm": 0.522888720035553, "learning_rate": 9.999399900628601e-05, "loss": 1.8636, "step": 387 }, { "epoch": 0.021626442227300598, "grad_norm": 0.49189141392707825, "learning_rate": 9.999386026240878e-05, "loss": 1.7465, "step": 388 }, { "epoch": 0.021682180480463744, "grad_norm": 0.5114362239837646, "learning_rate": 9.999371993301779e-05, "loss": 1.6336, "step": 389 }, { "epoch": 0.021737918733626887, "grad_norm": 0.4647996723651886, "learning_rate": 9.999357801811748e-05, "loss": 1.6755, "step": 390 }, { "epoch": 0.021793656986790033, "grad_norm": 0.5380472540855408, "learning_rate": 9.999343451771234e-05, "loss": 1.9477, "step": 391 }, { "epoch": 0.02184939523995318, "grad_norm": 0.4583854377269745, "learning_rate": 9.999328943180697e-05, "loss": 1.7902, "step": 392 }, { "epoch": 0.021905133493116326, "grad_norm": 0.45304641127586365, "learning_rate": 9.999314276040592e-05, "loss": 1.6744, "step": 393 }, { "epoch": 0.021960871746279472, "grad_norm": 0.49699023365974426, "learning_rate": 9.999299450351387e-05, "loss": 1.8258, "step": 394 }, { "epoch": 0.02201660999944262, "grad_norm": 0.49681130051612854, "learning_rate": 9.999284466113552e-05, "loss": 1.8488, "step": 395 }, { "epoch": 0.022072348252605765, "grad_norm": 0.5959085822105408, "learning_rate": 9.999269323327561e-05, "loss": 2.1775, "step": 396 }, { "epoch": 0.022128086505768908, "grad_norm": 0.5063357949256897, "learning_rate": 9.999254021993895e-05, "loss": 1.6503, "step": 397 }, { "epoch": 0.022183824758932054, "grad_norm": 0.5273301005363464, "learning_rate": 9.999238562113038e-05, "loss": 1.8169, "step": 398 }, { "epoch": 0.0222395630120952, "grad_norm": 0.5033614635467529, "learning_rate": 9.999222943685482e-05, "loss": 1.647, "step": 399 }, { "epoch": 0.022295301265258347, "grad_norm": 0.5118756890296936, "learning_rate": 9.999207166711723e-05, "loss": 1.6712, "step": 400 }, { "epoch": 0.022351039518421493, "grad_norm": 0.5338667035102844, "learning_rate": 9.999191231192258e-05, "loss": 1.8125, "step": 401 }, { "epoch": 0.02240677777158464, "grad_norm": 0.5460575819015503, "learning_rate": 9.999175137127596e-05, "loss": 1.8486, "step": 402 }, { "epoch": 0.022462516024747785, "grad_norm": 0.4892098009586334, "learning_rate": 9.999158884518245e-05, "loss": 1.6692, "step": 403 }, { "epoch": 0.022518254277910932, "grad_norm": 0.4894774258136749, "learning_rate": 9.999142473364722e-05, "loss": 1.5916, "step": 404 }, { "epoch": 0.022573992531074075, "grad_norm": 0.4909743070602417, "learning_rate": 9.999125903667545e-05, "loss": 1.646, "step": 405 }, { "epoch": 0.02262973078423722, "grad_norm": 0.48369649052619934, "learning_rate": 9.999109175427243e-05, "loss": 1.6874, "step": 406 }, { "epoch": 0.022685469037400367, "grad_norm": 0.4719717502593994, "learning_rate": 9.999092288644345e-05, "loss": 1.9116, "step": 407 }, { "epoch": 0.022741207290563514, "grad_norm": 0.4719882309436798, "learning_rate": 9.999075243319386e-05, "loss": 1.4898, "step": 408 }, { "epoch": 0.02279694554372666, "grad_norm": 0.5169988870620728, "learning_rate": 9.999058039452906e-05, "loss": 1.7671, "step": 409 }, { "epoch": 0.022852683796889806, "grad_norm": 0.4469069540500641, "learning_rate": 9.999040677045453e-05, "loss": 1.7068, "step": 410 }, { "epoch": 0.022908422050052953, "grad_norm": 0.508651077747345, "learning_rate": 9.999023156097575e-05, "loss": 1.912, "step": 411 }, { "epoch": 0.0229641603032161, "grad_norm": 0.48365309834480286, "learning_rate": 9.99900547660983e-05, "loss": 1.7907, "step": 412 }, { "epoch": 0.02301989855637924, "grad_norm": 0.5189946889877319, "learning_rate": 9.998987638582775e-05, "loss": 1.8333, "step": 413 }, { "epoch": 0.023075636809542388, "grad_norm": 0.5238891839981079, "learning_rate": 9.99896964201698e-05, "loss": 2.0069, "step": 414 }, { "epoch": 0.023131375062705534, "grad_norm": 0.5390001535415649, "learning_rate": 9.998951486913015e-05, "loss": 1.8571, "step": 415 }, { "epoch": 0.02318711331586868, "grad_norm": 0.5339745283126831, "learning_rate": 9.998933173271453e-05, "loss": 1.6536, "step": 416 }, { "epoch": 0.023242851569031827, "grad_norm": 0.48661404848098755, "learning_rate": 9.998914701092877e-05, "loss": 1.8969, "step": 417 }, { "epoch": 0.023298589822194973, "grad_norm": 0.5701104402542114, "learning_rate": 9.998896070377873e-05, "loss": 1.9305, "step": 418 }, { "epoch": 0.02335432807535812, "grad_norm": 0.5289365649223328, "learning_rate": 9.99887728112703e-05, "loss": 1.9801, "step": 419 }, { "epoch": 0.023410066328521262, "grad_norm": 0.4870493412017822, "learning_rate": 9.998858333340945e-05, "loss": 1.879, "step": 420 }, { "epoch": 0.02346580458168441, "grad_norm": 0.46179860830307007, "learning_rate": 9.998839227020221e-05, "loss": 1.6029, "step": 421 }, { "epoch": 0.023521542834847555, "grad_norm": 0.5245276689529419, "learning_rate": 9.998819962165462e-05, "loss": 1.9165, "step": 422 }, { "epoch": 0.0235772810880107, "grad_norm": 0.4952642321586609, "learning_rate": 9.998800538777278e-05, "loss": 1.6276, "step": 423 }, { "epoch": 0.023633019341173848, "grad_norm": 0.48968929052352905, "learning_rate": 9.998780956856285e-05, "loss": 1.5287, "step": 424 }, { "epoch": 0.023688757594336994, "grad_norm": 0.4968630373477936, "learning_rate": 9.998761216403106e-05, "loss": 1.8008, "step": 425 }, { "epoch": 0.02374449584750014, "grad_norm": 0.5983918309211731, "learning_rate": 9.998741317418366e-05, "loss": 2.0055, "step": 426 }, { "epoch": 0.023800234100663287, "grad_norm": 0.49322110414505005, "learning_rate": 9.998721259902694e-05, "loss": 1.6324, "step": 427 }, { "epoch": 0.02385597235382643, "grad_norm": 0.4888675808906555, "learning_rate": 9.99870104385673e-05, "loss": 1.6075, "step": 428 }, { "epoch": 0.023911710606989576, "grad_norm": 0.4783425033092499, "learning_rate": 9.998680669281116e-05, "loss": 1.6517, "step": 429 }, { "epoch": 0.023967448860152722, "grad_norm": 0.5173685550689697, "learning_rate": 9.998660136176492e-05, "loss": 1.6884, "step": 430 }, { "epoch": 0.02402318711331587, "grad_norm": 0.518741250038147, "learning_rate": 9.998639444543514e-05, "loss": 1.7113, "step": 431 }, { "epoch": 0.024078925366479015, "grad_norm": 0.446850448846817, "learning_rate": 9.998618594382836e-05, "loss": 1.5067, "step": 432 }, { "epoch": 0.02413466361964216, "grad_norm": 0.46661272644996643, "learning_rate": 9.99859758569512e-05, "loss": 1.6967, "step": 433 }, { "epoch": 0.024190401872805307, "grad_norm": 0.5824592709541321, "learning_rate": 9.998576418481033e-05, "loss": 2.0151, "step": 434 }, { "epoch": 0.024246140125968454, "grad_norm": 0.4715226888656616, "learning_rate": 9.998555092741247e-05, "loss": 1.6199, "step": 435 }, { "epoch": 0.024301878379131597, "grad_norm": 0.5396628975868225, "learning_rate": 9.998533608476435e-05, "loss": 1.8874, "step": 436 }, { "epoch": 0.024357616632294743, "grad_norm": 0.4999384582042694, "learning_rate": 9.99851196568728e-05, "loss": 1.8761, "step": 437 }, { "epoch": 0.02441335488545789, "grad_norm": 0.4719383418560028, "learning_rate": 9.998490164374472e-05, "loss": 1.6399, "step": 438 }, { "epoch": 0.024469093138621036, "grad_norm": 0.49223801493644714, "learning_rate": 9.998468204538696e-05, "loss": 1.8343, "step": 439 }, { "epoch": 0.024524831391784182, "grad_norm": 0.5116458535194397, "learning_rate": 9.998446086180653e-05, "loss": 2.0423, "step": 440 }, { "epoch": 0.024580569644947328, "grad_norm": 0.48448118567466736, "learning_rate": 9.998423809301043e-05, "loss": 1.5796, "step": 441 }, { "epoch": 0.024636307898110475, "grad_norm": 0.48682916164398193, "learning_rate": 9.998401373900573e-05, "loss": 1.661, "step": 442 }, { "epoch": 0.024692046151273617, "grad_norm": 0.5474771857261658, "learning_rate": 9.998378779979954e-05, "loss": 1.9646, "step": 443 }, { "epoch": 0.024747784404436764, "grad_norm": 0.48878610134124756, "learning_rate": 9.998356027539901e-05, "loss": 1.7896, "step": 444 }, { "epoch": 0.02480352265759991, "grad_norm": 0.49135512113571167, "learning_rate": 9.99833311658114e-05, "loss": 1.7329, "step": 445 }, { "epoch": 0.024859260910763056, "grad_norm": 0.5220357775688171, "learning_rate": 9.998310047104393e-05, "loss": 2.0303, "step": 446 }, { "epoch": 0.024914999163926203, "grad_norm": 0.4597051739692688, "learning_rate": 9.998286819110394e-05, "loss": 1.6114, "step": 447 }, { "epoch": 0.02497073741708935, "grad_norm": 0.5005029439926147, "learning_rate": 9.99826343259988e-05, "loss": 1.8658, "step": 448 }, { "epoch": 0.025026475670252495, "grad_norm": 0.5835437774658203, "learning_rate": 9.99823988757359e-05, "loss": 1.8958, "step": 449 }, { "epoch": 0.02508221392341564, "grad_norm": 0.4960596263408661, "learning_rate": 9.998216184032274e-05, "loss": 1.7768, "step": 450 }, { "epoch": 0.025137952176578784, "grad_norm": 0.4787440299987793, "learning_rate": 9.99819232197668e-05, "loss": 1.7367, "step": 451 }, { "epoch": 0.02519369042974193, "grad_norm": 0.4575479030609131, "learning_rate": 9.99816830140757e-05, "loss": 1.6027, "step": 452 }, { "epoch": 0.025249428682905077, "grad_norm": 0.5182919502258301, "learning_rate": 9.998144122325702e-05, "loss": 1.8879, "step": 453 }, { "epoch": 0.025305166936068223, "grad_norm": 0.49592286348342896, "learning_rate": 9.998119784731843e-05, "loss": 1.954, "step": 454 }, { "epoch": 0.02536090518923137, "grad_norm": 0.4686327576637268, "learning_rate": 9.998095288626765e-05, "loss": 1.6971, "step": 455 }, { "epoch": 0.025416643442394516, "grad_norm": 0.5634790658950806, "learning_rate": 9.998070634011246e-05, "loss": 1.8801, "step": 456 }, { "epoch": 0.025472381695557662, "grad_norm": 0.49380773305892944, "learning_rate": 9.998045820886068e-05, "loss": 1.8882, "step": 457 }, { "epoch": 0.02552811994872081, "grad_norm": 0.5319178104400635, "learning_rate": 9.998020849252017e-05, "loss": 1.7204, "step": 458 }, { "epoch": 0.02558385820188395, "grad_norm": 0.4578639268875122, "learning_rate": 9.997995719109884e-05, "loss": 1.6934, "step": 459 }, { "epoch": 0.025639596455047098, "grad_norm": 0.4672851264476776, "learning_rate": 9.997970430460468e-05, "loss": 1.5534, "step": 460 }, { "epoch": 0.025695334708210244, "grad_norm": 0.4967419505119324, "learning_rate": 9.99794498330457e-05, "loss": 1.7817, "step": 461 }, { "epoch": 0.02575107296137339, "grad_norm": 0.494781494140625, "learning_rate": 9.997919377642997e-05, "loss": 1.759, "step": 462 }, { "epoch": 0.025806811214536537, "grad_norm": 0.47715312242507935, "learning_rate": 9.997893613476561e-05, "loss": 1.6342, "step": 463 }, { "epoch": 0.025862549467699683, "grad_norm": 0.5014367699623108, "learning_rate": 9.99786769080608e-05, "loss": 1.7754, "step": 464 }, { "epoch": 0.02591828772086283, "grad_norm": 0.503808319568634, "learning_rate": 9.997841609632375e-05, "loss": 1.9323, "step": 465 }, { "epoch": 0.025974025974025976, "grad_norm": 0.4935349225997925, "learning_rate": 9.997815369956273e-05, "loss": 1.945, "step": 466 }, { "epoch": 0.02602976422718912, "grad_norm": 0.45313507318496704, "learning_rate": 9.997788971778608e-05, "loss": 1.5908, "step": 467 }, { "epoch": 0.026085502480352265, "grad_norm": 0.48407676815986633, "learning_rate": 9.997762415100214e-05, "loss": 1.449, "step": 468 }, { "epoch": 0.02614124073351541, "grad_norm": 0.4917304813861847, "learning_rate": 9.997735699921938e-05, "loss": 1.7667, "step": 469 }, { "epoch": 0.026196978986678558, "grad_norm": 0.5684965252876282, "learning_rate": 9.997708826244623e-05, "loss": 2.0801, "step": 470 }, { "epoch": 0.026252717239841704, "grad_norm": 0.5034363865852356, "learning_rate": 9.997681794069123e-05, "loss": 1.9385, "step": 471 }, { "epoch": 0.02630845549300485, "grad_norm": 0.5185155272483826, "learning_rate": 9.997654603396294e-05, "loss": 1.9021, "step": 472 }, { "epoch": 0.026364193746167996, "grad_norm": 0.4756320118904114, "learning_rate": 9.997627254227e-05, "loss": 1.7698, "step": 473 }, { "epoch": 0.02641993199933114, "grad_norm": 0.47013306617736816, "learning_rate": 9.997599746562108e-05, "loss": 1.6786, "step": 474 }, { "epoch": 0.026475670252494286, "grad_norm": 0.4797370731830597, "learning_rate": 9.997572080402488e-05, "loss": 1.8663, "step": 475 }, { "epoch": 0.026531408505657432, "grad_norm": 0.4647987186908722, "learning_rate": 9.997544255749021e-05, "loss": 1.6064, "step": 476 }, { "epoch": 0.02658714675882058, "grad_norm": 0.5362509489059448, "learning_rate": 9.99751627260259e-05, "loss": 2.035, "step": 477 }, { "epoch": 0.026642885011983725, "grad_norm": 0.501615047454834, "learning_rate": 9.997488130964077e-05, "loss": 1.7838, "step": 478 }, { "epoch": 0.02669862326514687, "grad_norm": 0.48956695199012756, "learning_rate": 9.997459830834379e-05, "loss": 1.7242, "step": 479 }, { "epoch": 0.026754361518310017, "grad_norm": 0.518091082572937, "learning_rate": 9.997431372214394e-05, "loss": 1.8634, "step": 480 }, { "epoch": 0.026810099771473164, "grad_norm": 0.5070821642875671, "learning_rate": 9.997402755105022e-05, "loss": 1.678, "step": 481 }, { "epoch": 0.026865838024636306, "grad_norm": 0.49108657240867615, "learning_rate": 9.997373979507169e-05, "loss": 1.6952, "step": 482 }, { "epoch": 0.026921576277799453, "grad_norm": 0.4824698269367218, "learning_rate": 9.997345045421753e-05, "loss": 1.6948, "step": 483 }, { "epoch": 0.0269773145309626, "grad_norm": 0.537356972694397, "learning_rate": 9.997315952849688e-05, "loss": 1.9746, "step": 484 }, { "epoch": 0.027033052784125745, "grad_norm": 0.5354846119880676, "learning_rate": 9.997286701791896e-05, "loss": 1.9413, "step": 485 }, { "epoch": 0.02708879103728889, "grad_norm": 0.49684658646583557, "learning_rate": 9.99725729224931e-05, "loss": 1.7646, "step": 486 }, { "epoch": 0.027144529290452038, "grad_norm": 0.5149616599082947, "learning_rate": 9.997227724222855e-05, "loss": 1.6941, "step": 487 }, { "epoch": 0.027200267543615184, "grad_norm": 0.48285308480262756, "learning_rate": 9.997197997713473e-05, "loss": 1.6994, "step": 488 }, { "epoch": 0.02725600579677833, "grad_norm": 0.47129902243614197, "learning_rate": 9.997168112722107e-05, "loss": 1.8408, "step": 489 }, { "epoch": 0.027311744049941473, "grad_norm": 0.44259312748908997, "learning_rate": 9.997138069249703e-05, "loss": 1.636, "step": 490 }, { "epoch": 0.02736748230310462, "grad_norm": 0.4475281238555908, "learning_rate": 9.997107867297216e-05, "loss": 1.5011, "step": 491 }, { "epoch": 0.027423220556267766, "grad_norm": 0.5637838244438171, "learning_rate": 9.997077506865602e-05, "loss": 2.0265, "step": 492 }, { "epoch": 0.027478958809430912, "grad_norm": 0.5333039164543152, "learning_rate": 9.997046987955824e-05, "loss": 2.0372, "step": 493 }, { "epoch": 0.02753469706259406, "grad_norm": 0.49768728017807007, "learning_rate": 9.997016310568851e-05, "loss": 1.8226, "step": 494 }, { "epoch": 0.027590435315757205, "grad_norm": 0.5524271130561829, "learning_rate": 9.996985474705654e-05, "loss": 1.7598, "step": 495 }, { "epoch": 0.02764617356892035, "grad_norm": 0.5334012508392334, "learning_rate": 9.996954480367214e-05, "loss": 1.9021, "step": 496 }, { "epoch": 0.027701911822083494, "grad_norm": 0.5297475457191467, "learning_rate": 9.996923327554511e-05, "loss": 1.7989, "step": 497 }, { "epoch": 0.02775765007524664, "grad_norm": 0.5096792578697205, "learning_rate": 9.996892016268535e-05, "loss": 1.7904, "step": 498 }, { "epoch": 0.027813388328409787, "grad_norm": 0.47295787930488586, "learning_rate": 9.996860546510278e-05, "loss": 1.5494, "step": 499 }, { "epoch": 0.027869126581572933, "grad_norm": 0.48092177510261536, "learning_rate": 9.996828918280737e-05, "loss": 1.6759, "step": 500 }, { "epoch": 0.02792486483473608, "grad_norm": 0.4752250611782074, "learning_rate": 9.996797131580917e-05, "loss": 1.7032, "step": 501 }, { "epoch": 0.027980603087899226, "grad_norm": 0.49519795179367065, "learning_rate": 9.996765186411827e-05, "loss": 1.7786, "step": 502 }, { "epoch": 0.028036341341062372, "grad_norm": 0.5053145289421082, "learning_rate": 9.996733082774477e-05, "loss": 1.9493, "step": 503 }, { "epoch": 0.02809207959422552, "grad_norm": 0.5514931678771973, "learning_rate": 9.996700820669886e-05, "loss": 2.0257, "step": 504 }, { "epoch": 0.02814781784738866, "grad_norm": 0.5103058218955994, "learning_rate": 9.996668400099077e-05, "loss": 1.8291, "step": 505 }, { "epoch": 0.028203556100551808, "grad_norm": 0.4987359941005707, "learning_rate": 9.99663582106308e-05, "loss": 1.6841, "step": 506 }, { "epoch": 0.028259294353714954, "grad_norm": 0.570788562297821, "learning_rate": 9.996603083562928e-05, "loss": 2.1915, "step": 507 }, { "epoch": 0.0283150326068781, "grad_norm": 0.4610704481601715, "learning_rate": 9.996570187599658e-05, "loss": 1.6893, "step": 508 }, { "epoch": 0.028370770860041247, "grad_norm": 0.4623680114746094, "learning_rate": 9.996537133174313e-05, "loss": 1.5927, "step": 509 }, { "epoch": 0.028426509113204393, "grad_norm": 0.4911310076713562, "learning_rate": 9.996503920287942e-05, "loss": 1.6685, "step": 510 }, { "epoch": 0.02848224736636754, "grad_norm": 0.4995778799057007, "learning_rate": 9.996470548941598e-05, "loss": 1.8294, "step": 511 }, { "epoch": 0.028537985619530686, "grad_norm": 0.518905758857727, "learning_rate": 9.996437019136342e-05, "loss": 1.6819, "step": 512 }, { "epoch": 0.02859372387269383, "grad_norm": 0.5348454117774963, "learning_rate": 9.996403330873233e-05, "loss": 1.8129, "step": 513 }, { "epoch": 0.028649462125856975, "grad_norm": 0.49906015396118164, "learning_rate": 9.996369484153342e-05, "loss": 1.8961, "step": 514 }, { "epoch": 0.02870520037902012, "grad_norm": 0.5471760034561157, "learning_rate": 9.996335478977741e-05, "loss": 1.7716, "step": 515 }, { "epoch": 0.028760938632183267, "grad_norm": 0.4836637079715729, "learning_rate": 9.99630131534751e-05, "loss": 1.7395, "step": 516 }, { "epoch": 0.028816676885346414, "grad_norm": 0.4034901261329651, "learning_rate": 9.996266993263732e-05, "loss": 0.9524, "step": 517 }, { "epoch": 0.02887241513850956, "grad_norm": 0.5080105662345886, "learning_rate": 9.996232512727495e-05, "loss": 1.5957, "step": 518 }, { "epoch": 0.028928153391672706, "grad_norm": 0.4828059673309326, "learning_rate": 9.996197873739892e-05, "loss": 1.8356, "step": 519 }, { "epoch": 0.02898389164483585, "grad_norm": 0.47908416390419006, "learning_rate": 9.996163076302023e-05, "loss": 1.7832, "step": 520 }, { "epoch": 0.029039629897998995, "grad_norm": 0.5064157247543335, "learning_rate": 9.996128120414989e-05, "loss": 1.696, "step": 521 }, { "epoch": 0.029095368151162142, "grad_norm": 0.5058413147926331, "learning_rate": 9.996093006079903e-05, "loss": 1.8185, "step": 522 }, { "epoch": 0.029151106404325288, "grad_norm": 0.5816233158111572, "learning_rate": 9.996057733297876e-05, "loss": 2.0013, "step": 523 }, { "epoch": 0.029206844657488434, "grad_norm": 0.506596028804779, "learning_rate": 9.996022302070025e-05, "loss": 1.7923, "step": 524 }, { "epoch": 0.02926258291065158, "grad_norm": 0.48481589555740356, "learning_rate": 9.995986712397477e-05, "loss": 1.674, "step": 525 }, { "epoch": 0.029318321163814727, "grad_norm": 0.6215664148330688, "learning_rate": 9.995950964281357e-05, "loss": 2.041, "step": 526 }, { "epoch": 0.029374059416977873, "grad_norm": 0.5243876576423645, "learning_rate": 9.995915057722804e-05, "loss": 1.9253, "step": 527 }, { "epoch": 0.029429797670141016, "grad_norm": 0.4525597393512726, "learning_rate": 9.995878992722951e-05, "loss": 1.5032, "step": 528 }, { "epoch": 0.029485535923304163, "grad_norm": 0.5035833716392517, "learning_rate": 9.995842769282946e-05, "loss": 1.8901, "step": 529 }, { "epoch": 0.02954127417646731, "grad_norm": 0.5944721698760986, "learning_rate": 9.995806387403934e-05, "loss": 2.1208, "step": 530 }, { "epoch": 0.029597012429630455, "grad_norm": 0.5121837854385376, "learning_rate": 9.995769847087073e-05, "loss": 1.9563, "step": 531 }, { "epoch": 0.0296527506827936, "grad_norm": 0.5083540678024292, "learning_rate": 9.99573314833352e-05, "loss": 2.0126, "step": 532 }, { "epoch": 0.029708488935956748, "grad_norm": 0.4877237379550934, "learning_rate": 9.995696291144438e-05, "loss": 1.92, "step": 533 }, { "epoch": 0.029764227189119894, "grad_norm": 0.4935770034790039, "learning_rate": 9.995659275520995e-05, "loss": 1.5072, "step": 534 }, { "epoch": 0.02981996544228304, "grad_norm": 0.5800178050994873, "learning_rate": 9.995622101464368e-05, "loss": 2.0751, "step": 535 }, { "epoch": 0.029875703695446183, "grad_norm": 0.5653755068778992, "learning_rate": 9.995584768975734e-05, "loss": 2.0538, "step": 536 }, { "epoch": 0.02993144194860933, "grad_norm": 0.463131844997406, "learning_rate": 9.995547278056279e-05, "loss": 1.6813, "step": 537 }, { "epoch": 0.029987180201772476, "grad_norm": 0.5227254629135132, "learning_rate": 9.995509628707189e-05, "loss": 1.9213, "step": 538 }, { "epoch": 0.030042918454935622, "grad_norm": 0.49530157446861267, "learning_rate": 9.99547182092966e-05, "loss": 1.7977, "step": 539 }, { "epoch": 0.03009865670809877, "grad_norm": 0.5396206974983215, "learning_rate": 9.99543385472489e-05, "loss": 1.9346, "step": 540 }, { "epoch": 0.030154394961261915, "grad_norm": 0.517638087272644, "learning_rate": 9.995395730094083e-05, "loss": 1.7214, "step": 541 }, { "epoch": 0.03021013321442506, "grad_norm": 0.5086343884468079, "learning_rate": 9.99535744703845e-05, "loss": 1.6459, "step": 542 }, { "epoch": 0.030265871467588207, "grad_norm": 0.49579426646232605, "learning_rate": 9.995319005559202e-05, "loss": 1.7781, "step": 543 }, { "epoch": 0.03032160972075135, "grad_norm": 0.500481128692627, "learning_rate": 9.995280405657561e-05, "loss": 1.8662, "step": 544 }, { "epoch": 0.030377347973914497, "grad_norm": 0.47389981150627136, "learning_rate": 9.99524164733475e-05, "loss": 1.7803, "step": 545 }, { "epoch": 0.030433086227077643, "grad_norm": 0.4981273114681244, "learning_rate": 9.995202730591997e-05, "loss": 1.7344, "step": 546 }, { "epoch": 0.03048882448024079, "grad_norm": 0.507570207118988, "learning_rate": 9.995163655430539e-05, "loss": 1.864, "step": 547 }, { "epoch": 0.030544562733403936, "grad_norm": 0.4923110008239746, "learning_rate": 9.995124421851614e-05, "loss": 1.711, "step": 548 }, { "epoch": 0.030600300986567082, "grad_norm": 0.42948779463768005, "learning_rate": 9.995085029856464e-05, "loss": 1.4136, "step": 549 }, { "epoch": 0.030656039239730228, "grad_norm": 0.5023720264434814, "learning_rate": 9.99504547944634e-05, "loss": 1.8524, "step": 550 }, { "epoch": 0.03071177749289337, "grad_norm": 0.4656638205051422, "learning_rate": 9.995005770622499e-05, "loss": 1.5452, "step": 551 }, { "epoch": 0.030767515746056517, "grad_norm": 0.49939560890197754, "learning_rate": 9.994965903386198e-05, "loss": 1.8935, "step": 552 }, { "epoch": 0.030823253999219664, "grad_norm": 0.5469990372657776, "learning_rate": 9.994925877738698e-05, "loss": 1.9558, "step": 553 }, { "epoch": 0.03087899225238281, "grad_norm": 0.46579065918922424, "learning_rate": 9.994885693681274e-05, "loss": 1.6339, "step": 554 }, { "epoch": 0.030934730505545956, "grad_norm": 0.4826100468635559, "learning_rate": 9.994845351215199e-05, "loss": 1.6943, "step": 555 }, { "epoch": 0.030990468758709103, "grad_norm": 0.527716338634491, "learning_rate": 9.994804850341748e-05, "loss": 1.9641, "step": 556 }, { "epoch": 0.03104620701187225, "grad_norm": 0.4857400059700012, "learning_rate": 9.994764191062212e-05, "loss": 1.9041, "step": 557 }, { "epoch": 0.031101945265035395, "grad_norm": 0.483614057302475, "learning_rate": 9.994723373377876e-05, "loss": 1.6671, "step": 558 }, { "epoch": 0.031157683518198538, "grad_norm": 0.46863991022109985, "learning_rate": 9.994682397290036e-05, "loss": 1.6415, "step": 559 }, { "epoch": 0.031213421771361684, "grad_norm": 0.5118616223335266, "learning_rate": 9.99464126279999e-05, "loss": 1.9253, "step": 560 }, { "epoch": 0.03126916002452483, "grad_norm": 0.4958517849445343, "learning_rate": 9.994599969909047e-05, "loss": 1.5449, "step": 561 }, { "epoch": 0.03132489827768798, "grad_norm": 0.513558030128479, "learning_rate": 9.99455851861851e-05, "loss": 1.8665, "step": 562 }, { "epoch": 0.03138063653085112, "grad_norm": 0.49571189284324646, "learning_rate": 9.9945169089297e-05, "loss": 1.8442, "step": 563 }, { "epoch": 0.031436374784014266, "grad_norm": 0.550983190536499, "learning_rate": 9.994475140843933e-05, "loss": 1.8436, "step": 564 }, { "epoch": 0.031492113037177416, "grad_norm": 0.4547099173069, "learning_rate": 9.994433214362532e-05, "loss": 1.7172, "step": 565 }, { "epoch": 0.03154785129034056, "grad_norm": 0.4933796525001526, "learning_rate": 9.994391129486833e-05, "loss": 1.6919, "step": 566 }, { "epoch": 0.03160358954350371, "grad_norm": 0.5890671610832214, "learning_rate": 9.994348886218163e-05, "loss": 2.1026, "step": 567 }, { "epoch": 0.03165932779666685, "grad_norm": 0.5334300398826599, "learning_rate": 9.994306484557868e-05, "loss": 1.9232, "step": 568 }, { "epoch": 0.03171506604983, "grad_norm": 0.4899601340293884, "learning_rate": 9.99426392450729e-05, "loss": 1.6408, "step": 569 }, { "epoch": 0.031770804302993144, "grad_norm": 0.5135582089424133, "learning_rate": 9.994221206067777e-05, "loss": 1.8562, "step": 570 }, { "epoch": 0.03182654255615629, "grad_norm": 0.5050702095031738, "learning_rate": 9.994178329240686e-05, "loss": 1.7045, "step": 571 }, { "epoch": 0.03188228080931944, "grad_norm": 0.4874882102012634, "learning_rate": 9.994135294027378e-05, "loss": 1.8015, "step": 572 }, { "epoch": 0.03193801906248258, "grad_norm": 0.6017099022865295, "learning_rate": 9.994092100429215e-05, "loss": 2.1681, "step": 573 }, { "epoch": 0.03199375731564573, "grad_norm": 0.4922308325767517, "learning_rate": 9.994048748447569e-05, "loss": 1.6771, "step": 574 }, { "epoch": 0.03204949556880887, "grad_norm": 0.5013367533683777, "learning_rate": 9.994005238083815e-05, "loss": 1.7157, "step": 575 }, { "epoch": 0.03210523382197202, "grad_norm": 0.47761455178260803, "learning_rate": 9.99396156933933e-05, "loss": 1.8095, "step": 576 }, { "epoch": 0.032160972075135165, "grad_norm": 0.5500997304916382, "learning_rate": 9.993917742215502e-05, "loss": 2.2013, "step": 577 }, { "epoch": 0.03221671032829831, "grad_norm": 0.5222569108009338, "learning_rate": 9.993873756713719e-05, "loss": 1.9967, "step": 578 }, { "epoch": 0.03227244858146146, "grad_norm": 0.520000696182251, "learning_rate": 9.993829612835378e-05, "loss": 1.6328, "step": 579 }, { "epoch": 0.0323281868346246, "grad_norm": 0.501677930355072, "learning_rate": 9.993785310581875e-05, "loss": 1.9793, "step": 580 }, { "epoch": 0.03238392508778775, "grad_norm": 0.4832457900047302, "learning_rate": 9.993740849954619e-05, "loss": 1.7687, "step": 581 }, { "epoch": 0.03243966334095089, "grad_norm": 0.4854641556739807, "learning_rate": 9.99369623095502e-05, "loss": 1.8983, "step": 582 }, { "epoch": 0.03249540159411404, "grad_norm": 0.48794299364089966, "learning_rate": 9.993651453584491e-05, "loss": 1.6625, "step": 583 }, { "epoch": 0.032551139847277186, "grad_norm": 0.4691779911518097, "learning_rate": 9.993606517844452e-05, "loss": 1.7413, "step": 584 }, { "epoch": 0.032606878100440335, "grad_norm": 0.531639039516449, "learning_rate": 9.993561423736331e-05, "loss": 1.875, "step": 585 }, { "epoch": 0.03266261635360348, "grad_norm": 0.5259484648704529, "learning_rate": 9.993516171261555e-05, "loss": 1.9669, "step": 586 }, { "epoch": 0.03271835460676662, "grad_norm": 0.4976826012134552, "learning_rate": 9.993470760421559e-05, "loss": 1.808, "step": 587 }, { "epoch": 0.03277409285992977, "grad_norm": 0.4722268283367157, "learning_rate": 9.993425191217787e-05, "loss": 1.7654, "step": 588 }, { "epoch": 0.032829831113092914, "grad_norm": 0.4951403737068176, "learning_rate": 9.993379463651679e-05, "loss": 1.8282, "step": 589 }, { "epoch": 0.032885569366256064, "grad_norm": 0.4893924295902252, "learning_rate": 9.99333357772469e-05, "loss": 1.6477, "step": 590 }, { "epoch": 0.032941307619419206, "grad_norm": 0.4877261519432068, "learning_rate": 9.993287533438273e-05, "loss": 1.6518, "step": 591 }, { "epoch": 0.032997045872582356, "grad_norm": 0.48906272649765015, "learning_rate": 9.993241330793888e-05, "loss": 1.6485, "step": 592 }, { "epoch": 0.0330527841257455, "grad_norm": 0.5735100507736206, "learning_rate": 9.993194969792999e-05, "loss": 2.0397, "step": 593 }, { "epoch": 0.03310852237890864, "grad_norm": 0.45156189799308777, "learning_rate": 9.99314845043708e-05, "loss": 1.6368, "step": 594 }, { "epoch": 0.03316426063207179, "grad_norm": 0.4821372628211975, "learning_rate": 9.993101772727602e-05, "loss": 1.6886, "step": 595 }, { "epoch": 0.033219998885234935, "grad_norm": 0.501278817653656, "learning_rate": 9.993054936666048e-05, "loss": 1.7587, "step": 596 }, { "epoch": 0.033275737138398084, "grad_norm": 0.5598791241645813, "learning_rate": 9.993007942253905e-05, "loss": 1.8861, "step": 597 }, { "epoch": 0.03333147539156123, "grad_norm": 0.48821693658828735, "learning_rate": 9.99296078949266e-05, "loss": 1.6563, "step": 598 }, { "epoch": 0.03338721364472438, "grad_norm": 0.4853152632713318, "learning_rate": 9.99291347838381e-05, "loss": 1.5493, "step": 599 }, { "epoch": 0.03344295189788752, "grad_norm": 0.5629671812057495, "learning_rate": 9.992866008928855e-05, "loss": 2.1359, "step": 600 }, { "epoch": 0.03349869015105066, "grad_norm": 0.5176377892494202, "learning_rate": 9.9928183811293e-05, "loss": 2.0139, "step": 601 }, { "epoch": 0.03355442840421381, "grad_norm": 0.46964964270591736, "learning_rate": 9.992770594986658e-05, "loss": 1.6594, "step": 602 }, { "epoch": 0.033610166657376955, "grad_norm": 0.49720609188079834, "learning_rate": 9.992722650502442e-05, "loss": 1.8432, "step": 603 }, { "epoch": 0.033665904910540105, "grad_norm": 0.4787680506706238, "learning_rate": 9.992674547678171e-05, "loss": 1.8071, "step": 604 }, { "epoch": 0.03372164316370325, "grad_norm": 0.4432480037212372, "learning_rate": 9.992626286515373e-05, "loss": 1.6391, "step": 605 }, { "epoch": 0.0337773814168664, "grad_norm": 0.5781794786453247, "learning_rate": 9.992577867015581e-05, "loss": 2.0711, "step": 606 }, { "epoch": 0.03383311967002954, "grad_norm": 0.45807138085365295, "learning_rate": 9.992529289180326e-05, "loss": 1.5886, "step": 607 }, { "epoch": 0.03388885792319269, "grad_norm": 0.5234102606773376, "learning_rate": 9.992480553011151e-05, "loss": 1.9211, "step": 608 }, { "epoch": 0.03394459617635583, "grad_norm": 0.5202253460884094, "learning_rate": 9.9924316585096e-05, "loss": 1.819, "step": 609 }, { "epoch": 0.034000334429518976, "grad_norm": 0.4516846537590027, "learning_rate": 9.992382605677226e-05, "loss": 1.6631, "step": 610 }, { "epoch": 0.034056072682682126, "grad_norm": 0.5501968860626221, "learning_rate": 9.992333394515583e-05, "loss": 2.0759, "step": 611 }, { "epoch": 0.03411181093584527, "grad_norm": 0.4812159836292267, "learning_rate": 9.992284025026231e-05, "loss": 1.6721, "step": 612 }, { "epoch": 0.03416754918900842, "grad_norm": 0.5236145257949829, "learning_rate": 9.992234497210737e-05, "loss": 1.807, "step": 613 }, { "epoch": 0.03422328744217156, "grad_norm": 0.5123412609100342, "learning_rate": 9.992184811070673e-05, "loss": 1.9095, "step": 614 }, { "epoch": 0.03427902569533471, "grad_norm": 0.49797573685646057, "learning_rate": 9.992134966607612e-05, "loss": 1.7303, "step": 615 }, { "epoch": 0.034334763948497854, "grad_norm": 0.48441436886787415, "learning_rate": 9.992084963823136e-05, "loss": 1.6339, "step": 616 }, { "epoch": 0.034390502201661, "grad_norm": 0.5459060668945312, "learning_rate": 9.992034802718832e-05, "loss": 1.8881, "step": 617 }, { "epoch": 0.03444624045482415, "grad_norm": 0.5051499009132385, "learning_rate": 9.991984483296288e-05, "loss": 1.9386, "step": 618 }, { "epoch": 0.03450197870798729, "grad_norm": 0.5421403050422668, "learning_rate": 9.991934005557103e-05, "loss": 2.0836, "step": 619 }, { "epoch": 0.03455771696115044, "grad_norm": 0.4838196933269501, "learning_rate": 9.991883369502874e-05, "loss": 1.6526, "step": 620 }, { "epoch": 0.03461345521431358, "grad_norm": 0.49810105562210083, "learning_rate": 9.991832575135211e-05, "loss": 1.7326, "step": 621 }, { "epoch": 0.03466919346747673, "grad_norm": 0.46195507049560547, "learning_rate": 9.991781622455723e-05, "loss": 1.6398, "step": 622 }, { "epoch": 0.034724931720639875, "grad_norm": 0.46615251898765564, "learning_rate": 9.991730511466026e-05, "loss": 1.7927, "step": 623 }, { "epoch": 0.03478066997380302, "grad_norm": 0.5302008390426636, "learning_rate": 9.991679242167741e-05, "loss": 1.8047, "step": 624 }, { "epoch": 0.03483640822696617, "grad_norm": 0.49787190556526184, "learning_rate": 9.991627814562494e-05, "loss": 1.9146, "step": 625 }, { "epoch": 0.03489214648012931, "grad_norm": 0.5156252384185791, "learning_rate": 9.991576228651915e-05, "loss": 1.9453, "step": 626 }, { "epoch": 0.03494788473329246, "grad_norm": 0.45635107159614563, "learning_rate": 9.991524484437642e-05, "loss": 1.7143, "step": 627 }, { "epoch": 0.0350036229864556, "grad_norm": 0.48797038197517395, "learning_rate": 9.991472581921316e-05, "loss": 1.7371, "step": 628 }, { "epoch": 0.03505936123961875, "grad_norm": 0.549708366394043, "learning_rate": 9.99142052110458e-05, "loss": 1.9569, "step": 629 }, { "epoch": 0.035115099492781895, "grad_norm": 0.4693654179573059, "learning_rate": 9.991368301989088e-05, "loss": 1.4609, "step": 630 }, { "epoch": 0.035170837745945045, "grad_norm": 0.5259846448898315, "learning_rate": 9.991315924576495e-05, "loss": 1.7577, "step": 631 }, { "epoch": 0.03522657599910819, "grad_norm": 0.49805745482444763, "learning_rate": 9.991263388868461e-05, "loss": 1.8534, "step": 632 }, { "epoch": 0.03528231425227133, "grad_norm": 0.4565132260322571, "learning_rate": 9.991210694866654e-05, "loss": 1.6853, "step": 633 }, { "epoch": 0.03533805250543448, "grad_norm": 0.5158933401107788, "learning_rate": 9.991157842572747e-05, "loss": 1.8088, "step": 634 }, { "epoch": 0.035393790758597624, "grad_norm": 0.49667277932167053, "learning_rate": 9.991104831988412e-05, "loss": 1.9148, "step": 635 }, { "epoch": 0.03544952901176077, "grad_norm": 0.48701363801956177, "learning_rate": 9.991051663115331e-05, "loss": 1.7816, "step": 636 }, { "epoch": 0.035505267264923916, "grad_norm": 0.5608890056610107, "learning_rate": 9.990998335955193e-05, "loss": 1.8764, "step": 637 }, { "epoch": 0.035561005518087066, "grad_norm": 0.49871060252189636, "learning_rate": 9.990944850509685e-05, "loss": 1.6103, "step": 638 }, { "epoch": 0.03561674377125021, "grad_norm": 0.46610593795776367, "learning_rate": 9.990891206780506e-05, "loss": 1.7798, "step": 639 }, { "epoch": 0.03567248202441335, "grad_norm": 0.5284513831138611, "learning_rate": 9.990837404769358e-05, "loss": 1.8771, "step": 640 }, { "epoch": 0.0357282202775765, "grad_norm": 0.5929260849952698, "learning_rate": 9.990783444477946e-05, "loss": 2.0712, "step": 641 }, { "epoch": 0.035783958530739644, "grad_norm": 0.5146616697311401, "learning_rate": 9.990729325907981e-05, "loss": 1.7693, "step": 642 }, { "epoch": 0.035839696783902794, "grad_norm": 0.5243765711784363, "learning_rate": 9.99067504906118e-05, "loss": 1.8675, "step": 643 }, { "epoch": 0.03589543503706594, "grad_norm": 0.48738136887550354, "learning_rate": 9.990620613939263e-05, "loss": 1.7557, "step": 644 }, { "epoch": 0.03595117329022909, "grad_norm": 0.5006791353225708, "learning_rate": 9.990566020543959e-05, "loss": 1.7199, "step": 645 }, { "epoch": 0.03600691154339223, "grad_norm": 0.5283340811729431, "learning_rate": 9.990511268876998e-05, "loss": 1.9156, "step": 646 }, { "epoch": 0.03606264979655538, "grad_norm": 0.47615885734558105, "learning_rate": 9.990456358940115e-05, "loss": 1.6183, "step": 647 }, { "epoch": 0.03611838804971852, "grad_norm": 0.48326513171195984, "learning_rate": 9.990401290735053e-05, "loss": 1.8159, "step": 648 }, { "epoch": 0.036174126302881665, "grad_norm": 0.489183247089386, "learning_rate": 9.990346064263558e-05, "loss": 1.9306, "step": 649 }, { "epoch": 0.036229864556044815, "grad_norm": 0.44880211353302, "learning_rate": 9.990290679527382e-05, "loss": 1.4257, "step": 650 }, { "epoch": 0.03628560280920796, "grad_norm": 0.49666327238082886, "learning_rate": 9.990235136528281e-05, "loss": 1.6587, "step": 651 }, { "epoch": 0.03634134106237111, "grad_norm": 0.5396116971969604, "learning_rate": 9.990179435268017e-05, "loss": 1.9138, "step": 652 }, { "epoch": 0.03639707931553425, "grad_norm": 0.512506902217865, "learning_rate": 9.990123575748355e-05, "loss": 2.0153, "step": 653 }, { "epoch": 0.0364528175686974, "grad_norm": 0.48785391449928284, "learning_rate": 9.990067557971068e-05, "loss": 1.9489, "step": 654 }, { "epoch": 0.03650855582186054, "grad_norm": 0.49123311042785645, "learning_rate": 9.990011381937933e-05, "loss": 1.6926, "step": 655 }, { "epoch": 0.036564294075023686, "grad_norm": 0.4744409918785095, "learning_rate": 9.98995504765073e-05, "loss": 1.7961, "step": 656 }, { "epoch": 0.036620032328186836, "grad_norm": 0.5175344944000244, "learning_rate": 9.989898555111245e-05, "loss": 1.8846, "step": 657 }, { "epoch": 0.03667577058134998, "grad_norm": 0.4825249910354614, "learning_rate": 9.989841904321274e-05, "loss": 1.7094, "step": 658 }, { "epoch": 0.03673150883451313, "grad_norm": 0.5392758250236511, "learning_rate": 9.989785095282609e-05, "loss": 1.8777, "step": 659 }, { "epoch": 0.03678724708767627, "grad_norm": 0.5122122764587402, "learning_rate": 9.989728127997052e-05, "loss": 1.8686, "step": 660 }, { "epoch": 0.03684298534083942, "grad_norm": 0.4976766109466553, "learning_rate": 9.989671002466412e-05, "loss": 1.7542, "step": 661 }, { "epoch": 0.036898723594002564, "grad_norm": 0.4618877172470093, "learning_rate": 9.989613718692501e-05, "loss": 1.4741, "step": 662 }, { "epoch": 0.03695446184716571, "grad_norm": 0.4870270788669586, "learning_rate": 9.989556276677133e-05, "loss": 1.6816, "step": 663 }, { "epoch": 0.037010200100328856, "grad_norm": 0.5549145936965942, "learning_rate": 9.989498676422131e-05, "loss": 1.8716, "step": 664 }, { "epoch": 0.037065938353492, "grad_norm": 0.501438319683075, "learning_rate": 9.989440917929321e-05, "loss": 1.7686, "step": 665 }, { "epoch": 0.03712167660665515, "grad_norm": 0.5713873505592346, "learning_rate": 9.989383001200536e-05, "loss": 2.116, "step": 666 }, { "epoch": 0.03717741485981829, "grad_norm": 0.4839586615562439, "learning_rate": 9.989324926237613e-05, "loss": 1.8245, "step": 667 }, { "epoch": 0.03723315311298144, "grad_norm": 0.5154809355735779, "learning_rate": 9.989266693042394e-05, "loss": 1.661, "step": 668 }, { "epoch": 0.037288891366144584, "grad_norm": 0.4965420365333557, "learning_rate": 9.989208301616724e-05, "loss": 1.6531, "step": 669 }, { "epoch": 0.037344629619307734, "grad_norm": 0.4850505292415619, "learning_rate": 9.989149751962455e-05, "loss": 1.8691, "step": 670 }, { "epoch": 0.03740036787247088, "grad_norm": 0.47275611758232117, "learning_rate": 9.989091044081445e-05, "loss": 1.7718, "step": 671 }, { "epoch": 0.03745610612563402, "grad_norm": 0.5606955885887146, "learning_rate": 9.989032177975554e-05, "loss": 2.2129, "step": 672 }, { "epoch": 0.03751184437879717, "grad_norm": 0.49657538533210754, "learning_rate": 9.988973153646654e-05, "loss": 1.9084, "step": 673 }, { "epoch": 0.03756758263196031, "grad_norm": 0.5135958790779114, "learning_rate": 9.988913971096611e-05, "loss": 1.9491, "step": 674 }, { "epoch": 0.03762332088512346, "grad_norm": 0.48900923132896423, "learning_rate": 9.988854630327305e-05, "loss": 1.7176, "step": 675 }, { "epoch": 0.037679059138286605, "grad_norm": 0.463521808385849, "learning_rate": 9.988795131340616e-05, "loss": 1.5625, "step": 676 }, { "epoch": 0.037734797391449755, "grad_norm": 0.48082444071769714, "learning_rate": 9.988735474138433e-05, "loss": 1.7208, "step": 677 }, { "epoch": 0.0377905356446129, "grad_norm": 0.5012754201889038, "learning_rate": 9.988675658722648e-05, "loss": 1.9678, "step": 678 }, { "epoch": 0.03784627389777604, "grad_norm": 0.5888019800186157, "learning_rate": 9.988615685095155e-05, "loss": 2.2239, "step": 679 }, { "epoch": 0.03790201215093919, "grad_norm": 0.47830748558044434, "learning_rate": 9.98855555325786e-05, "loss": 1.6574, "step": 680 }, { "epoch": 0.03795775040410233, "grad_norm": 0.47648170590400696, "learning_rate": 9.988495263212667e-05, "loss": 1.6185, "step": 681 }, { "epoch": 0.03801348865726548, "grad_norm": 0.5321143269538879, "learning_rate": 9.98843481496149e-05, "loss": 2.0788, "step": 682 }, { "epoch": 0.038069226910428626, "grad_norm": 0.4451909363269806, "learning_rate": 9.988374208506243e-05, "loss": 1.7213, "step": 683 }, { "epoch": 0.038124965163591776, "grad_norm": 0.4888899028301239, "learning_rate": 9.988313443848853e-05, "loss": 1.9524, "step": 684 }, { "epoch": 0.03818070341675492, "grad_norm": 0.5075884461402893, "learning_rate": 9.988252520991244e-05, "loss": 1.9489, "step": 685 }, { "epoch": 0.03823644166991806, "grad_norm": 0.5244428515434265, "learning_rate": 9.988191439935348e-05, "loss": 1.8805, "step": 686 }, { "epoch": 0.03829217992308121, "grad_norm": 0.5269452333450317, "learning_rate": 9.988130200683103e-05, "loss": 1.916, "step": 687 }, { "epoch": 0.038347918176244354, "grad_norm": 0.40096086263656616, "learning_rate": 9.98806880323645e-05, "loss": 1.3248, "step": 688 }, { "epoch": 0.038403656429407504, "grad_norm": 0.555325984954834, "learning_rate": 9.988007247597337e-05, "loss": 1.945, "step": 689 }, { "epoch": 0.03845939468257065, "grad_norm": 0.4987097382545471, "learning_rate": 9.987945533767717e-05, "loss": 1.9159, "step": 690 }, { "epoch": 0.0385151329357338, "grad_norm": 0.46860477328300476, "learning_rate": 9.987883661749548e-05, "loss": 1.7105, "step": 691 }, { "epoch": 0.03857087118889694, "grad_norm": 0.4867911636829376, "learning_rate": 9.987821631544789e-05, "loss": 1.6607, "step": 692 }, { "epoch": 0.03862660944206009, "grad_norm": 0.5149185061454773, "learning_rate": 9.987759443155409e-05, "loss": 1.8422, "step": 693 }, { "epoch": 0.03868234769522323, "grad_norm": 0.508399248123169, "learning_rate": 9.98769709658338e-05, "loss": 1.8393, "step": 694 }, { "epoch": 0.038738085948386375, "grad_norm": 0.4841381907463074, "learning_rate": 9.987634591830679e-05, "loss": 1.8819, "step": 695 }, { "epoch": 0.038793824201549525, "grad_norm": 0.4869403541088104, "learning_rate": 9.987571928899288e-05, "loss": 1.7872, "step": 696 }, { "epoch": 0.03884956245471267, "grad_norm": 0.49572715163230896, "learning_rate": 9.987509107791196e-05, "loss": 1.8078, "step": 697 }, { "epoch": 0.03890530070787582, "grad_norm": 0.5188158750534058, "learning_rate": 9.987446128508396e-05, "loss": 1.7838, "step": 698 }, { "epoch": 0.03896103896103896, "grad_norm": 0.4589369595050812, "learning_rate": 9.98738299105288e-05, "loss": 1.7299, "step": 699 }, { "epoch": 0.03901677721420211, "grad_norm": 0.5023289322853088, "learning_rate": 9.987319695426657e-05, "loss": 1.7414, "step": 700 }, { "epoch": 0.03907251546736525, "grad_norm": 0.5241897702217102, "learning_rate": 9.98725624163173e-05, "loss": 1.8223, "step": 701 }, { "epoch": 0.039128253720528396, "grad_norm": 0.4720919728279114, "learning_rate": 9.987192629670112e-05, "loss": 1.791, "step": 702 }, { "epoch": 0.039183991973691545, "grad_norm": 0.5045210719108582, "learning_rate": 9.987128859543824e-05, "loss": 1.7428, "step": 703 }, { "epoch": 0.03923973022685469, "grad_norm": 0.5130773782730103, "learning_rate": 9.987064931254884e-05, "loss": 1.6701, "step": 704 }, { "epoch": 0.03929546848001784, "grad_norm": 0.5155162215232849, "learning_rate": 9.987000844805319e-05, "loss": 1.9592, "step": 705 }, { "epoch": 0.03935120673318098, "grad_norm": 0.46410509943962097, "learning_rate": 9.986936600197165e-05, "loss": 1.786, "step": 706 }, { "epoch": 0.03940694498634413, "grad_norm": 0.5000941157341003, "learning_rate": 9.986872197432459e-05, "loss": 1.7937, "step": 707 }, { "epoch": 0.039462683239507274, "grad_norm": 0.4663851261138916, "learning_rate": 9.986807636513241e-05, "loss": 1.8019, "step": 708 }, { "epoch": 0.039518421492670416, "grad_norm": 0.5445390343666077, "learning_rate": 9.986742917441561e-05, "loss": 1.9214, "step": 709 }, { "epoch": 0.039574159745833566, "grad_norm": 0.49968406558036804, "learning_rate": 9.986678040219469e-05, "loss": 1.7621, "step": 710 }, { "epoch": 0.03962989799899671, "grad_norm": 0.514168381690979, "learning_rate": 9.986613004849024e-05, "loss": 1.7435, "step": 711 }, { "epoch": 0.03968563625215986, "grad_norm": 0.4899461269378662, "learning_rate": 9.986547811332289e-05, "loss": 1.7199, "step": 712 }, { "epoch": 0.039741374505323, "grad_norm": 0.5172072052955627, "learning_rate": 9.986482459671332e-05, "loss": 1.9435, "step": 713 }, { "epoch": 0.03979711275848615, "grad_norm": 0.5198094844818115, "learning_rate": 9.986416949868223e-05, "loss": 1.799, "step": 714 }, { "epoch": 0.039852851011649294, "grad_norm": 0.47976863384246826, "learning_rate": 9.986351281925042e-05, "loss": 1.8455, "step": 715 }, { "epoch": 0.039908589264812444, "grad_norm": 0.4702402949333191, "learning_rate": 9.986285455843872e-05, "loss": 1.5848, "step": 716 }, { "epoch": 0.03996432751797559, "grad_norm": 0.4698415994644165, "learning_rate": 9.986219471626797e-05, "loss": 1.6527, "step": 717 }, { "epoch": 0.04002006577113873, "grad_norm": 0.5518625974655151, "learning_rate": 9.986153329275913e-05, "loss": 1.8773, "step": 718 }, { "epoch": 0.04007580402430188, "grad_norm": 0.5149457454681396, "learning_rate": 9.986087028793316e-05, "loss": 1.8737, "step": 719 }, { "epoch": 0.04013154227746502, "grad_norm": 0.527282178401947, "learning_rate": 9.98602057018111e-05, "loss": 1.9581, "step": 720 }, { "epoch": 0.04018728053062817, "grad_norm": 0.48371025919914246, "learning_rate": 9.985953953441402e-05, "loss": 1.887, "step": 721 }, { "epoch": 0.040243018783791315, "grad_norm": 0.5474866032600403, "learning_rate": 9.985887178576305e-05, "loss": 1.9981, "step": 722 }, { "epoch": 0.040298757036954465, "grad_norm": 0.5417437553405762, "learning_rate": 9.985820245587936e-05, "loss": 2.0195, "step": 723 }, { "epoch": 0.04035449529011761, "grad_norm": 0.458363801240921, "learning_rate": 9.985753154478418e-05, "loss": 1.6134, "step": 724 }, { "epoch": 0.04041023354328075, "grad_norm": 0.49649447202682495, "learning_rate": 9.98568590524988e-05, "loss": 1.7501, "step": 725 }, { "epoch": 0.0404659717964439, "grad_norm": 0.5304057002067566, "learning_rate": 9.985618497904453e-05, "loss": 1.9164, "step": 726 }, { "epoch": 0.04052171004960704, "grad_norm": 0.4757838249206543, "learning_rate": 9.985550932444275e-05, "loss": 1.8159, "step": 727 }, { "epoch": 0.04057744830277019, "grad_norm": 0.48324036598205566, "learning_rate": 9.98548320887149e-05, "loss": 1.6184, "step": 728 }, { "epoch": 0.040633186555933336, "grad_norm": 0.5059638023376465, "learning_rate": 9.985415327188245e-05, "loss": 1.8383, "step": 729 }, { "epoch": 0.040688924809096486, "grad_norm": 0.4717106819152832, "learning_rate": 9.985347287396692e-05, "loss": 1.67, "step": 730 }, { "epoch": 0.04074466306225963, "grad_norm": 0.4953088164329529, "learning_rate": 9.98527908949899e-05, "loss": 1.8185, "step": 731 }, { "epoch": 0.04080040131542277, "grad_norm": 0.49030283093452454, "learning_rate": 9.985210733497301e-05, "loss": 1.7909, "step": 732 }, { "epoch": 0.04085613956858592, "grad_norm": 0.5224010944366455, "learning_rate": 9.985142219393795e-05, "loss": 1.8615, "step": 733 }, { "epoch": 0.040911877821749064, "grad_norm": 0.5008676648139954, "learning_rate": 9.985073547190641e-05, "loss": 1.9337, "step": 734 }, { "epoch": 0.040967616074912214, "grad_norm": 0.4777420163154602, "learning_rate": 9.98500471689002e-05, "loss": 1.8345, "step": 735 }, { "epoch": 0.04102335432807536, "grad_norm": 0.4995800852775574, "learning_rate": 9.984935728494113e-05, "loss": 1.843, "step": 736 }, { "epoch": 0.041079092581238506, "grad_norm": 0.5097813010215759, "learning_rate": 9.984866582005111e-05, "loss": 1.9642, "step": 737 }, { "epoch": 0.04113483083440165, "grad_norm": 0.4956590533256531, "learning_rate": 9.984797277425204e-05, "loss": 1.8874, "step": 738 }, { "epoch": 0.0411905690875648, "grad_norm": 0.5304232239723206, "learning_rate": 9.98472781475659e-05, "loss": 1.9269, "step": 739 }, { "epoch": 0.04124630734072794, "grad_norm": 0.5134212374687195, "learning_rate": 9.984658194001474e-05, "loss": 1.5059, "step": 740 }, { "epoch": 0.041302045593891085, "grad_norm": 0.4551413953304291, "learning_rate": 9.984588415162061e-05, "loss": 1.7386, "step": 741 }, { "epoch": 0.041357783847054234, "grad_norm": 0.5477944612503052, "learning_rate": 9.984518478240568e-05, "loss": 1.9075, "step": 742 }, { "epoch": 0.04141352210021738, "grad_norm": 0.4997386038303375, "learning_rate": 9.98444838323921e-05, "loss": 1.7812, "step": 743 }, { "epoch": 0.04146926035338053, "grad_norm": 0.5239866971969604, "learning_rate": 9.984378130160208e-05, "loss": 1.9155, "step": 744 }, { "epoch": 0.04152499860654367, "grad_norm": 0.46206948161125183, "learning_rate": 9.984307719005795e-05, "loss": 1.6661, "step": 745 }, { "epoch": 0.04158073685970682, "grad_norm": 0.4978305399417877, "learning_rate": 9.984237149778201e-05, "loss": 1.8456, "step": 746 }, { "epoch": 0.04163647511286996, "grad_norm": 0.50936359167099, "learning_rate": 9.984166422479663e-05, "loss": 1.9118, "step": 747 }, { "epoch": 0.041692213366033105, "grad_norm": 0.49744611978530884, "learning_rate": 9.984095537112429e-05, "loss": 1.7721, "step": 748 }, { "epoch": 0.041747951619196255, "grad_norm": 0.536056637763977, "learning_rate": 9.984024493678743e-05, "loss": 1.7968, "step": 749 }, { "epoch": 0.0418036898723594, "grad_norm": 0.5262266993522644, "learning_rate": 9.983953292180857e-05, "loss": 1.858, "step": 750 }, { "epoch": 0.04185942812552255, "grad_norm": 0.5085186958312988, "learning_rate": 9.983881932621033e-05, "loss": 1.751, "step": 751 }, { "epoch": 0.04191516637868569, "grad_norm": 0.4641915261745453, "learning_rate": 9.983810415001531e-05, "loss": 1.5998, "step": 752 }, { "epoch": 0.04197090463184884, "grad_norm": 0.5268242955207825, "learning_rate": 9.983738739324621e-05, "loss": 1.7263, "step": 753 }, { "epoch": 0.04202664288501198, "grad_norm": 0.5283384919166565, "learning_rate": 9.983666905592576e-05, "loss": 1.9334, "step": 754 }, { "epoch": 0.042082381138175126, "grad_norm": 0.5007447600364685, "learning_rate": 9.983594913807672e-05, "loss": 1.6944, "step": 755 }, { "epoch": 0.042138119391338276, "grad_norm": 0.5626598596572876, "learning_rate": 9.983522763972196e-05, "loss": 2.042, "step": 756 }, { "epoch": 0.04219385764450142, "grad_norm": 0.46739470958709717, "learning_rate": 9.983450456088432e-05, "loss": 1.6733, "step": 757 }, { "epoch": 0.04224959589766457, "grad_norm": 0.5124320983886719, "learning_rate": 9.983377990158676e-05, "loss": 1.8463, "step": 758 }, { "epoch": 0.04230533415082771, "grad_norm": 0.4762093722820282, "learning_rate": 9.983305366185223e-05, "loss": 1.7602, "step": 759 }, { "epoch": 0.04236107240399086, "grad_norm": 0.5182420015335083, "learning_rate": 9.983232584170381e-05, "loss": 1.8644, "step": 760 }, { "epoch": 0.042416810657154004, "grad_norm": 0.4640427231788635, "learning_rate": 9.983159644116454e-05, "loss": 1.6919, "step": 761 }, { "epoch": 0.042472548910317154, "grad_norm": 0.4894956946372986, "learning_rate": 9.983086546025759e-05, "loss": 1.9491, "step": 762 }, { "epoch": 0.0425282871634803, "grad_norm": 0.49869638681411743, "learning_rate": 9.98301328990061e-05, "loss": 1.9184, "step": 763 }, { "epoch": 0.04258402541664344, "grad_norm": 0.5161083936691284, "learning_rate": 9.982939875743333e-05, "loss": 1.826, "step": 764 }, { "epoch": 0.04263976366980659, "grad_norm": 0.4913845956325531, "learning_rate": 9.982866303556258e-05, "loss": 1.7675, "step": 765 }, { "epoch": 0.04269550192296973, "grad_norm": 0.49277618527412415, "learning_rate": 9.982792573341713e-05, "loss": 1.8539, "step": 766 }, { "epoch": 0.04275124017613288, "grad_norm": 0.5222828388214111, "learning_rate": 9.982718685102039e-05, "loss": 1.9196, "step": 767 }, { "epoch": 0.042806978429296025, "grad_norm": 0.5137212872505188, "learning_rate": 9.982644638839583e-05, "loss": 1.8719, "step": 768 }, { "epoch": 0.042862716682459175, "grad_norm": 0.646440327167511, "learning_rate": 9.982570434556686e-05, "loss": 1.9678, "step": 769 }, { "epoch": 0.04291845493562232, "grad_norm": 0.4992925524711609, "learning_rate": 9.982496072255708e-05, "loss": 1.7078, "step": 770 }, { "epoch": 0.04297419318878546, "grad_norm": 0.4863613247871399, "learning_rate": 9.982421551939003e-05, "loss": 1.8064, "step": 771 }, { "epoch": 0.04302993144194861, "grad_norm": 0.4646783769130707, "learning_rate": 9.982346873608937e-05, "loss": 1.6427, "step": 772 }, { "epoch": 0.04308566969511175, "grad_norm": 0.5143455266952515, "learning_rate": 9.982272037267877e-05, "loss": 1.7367, "step": 773 }, { "epoch": 0.0431414079482749, "grad_norm": 0.4936600923538208, "learning_rate": 9.982197042918195e-05, "loss": 1.6834, "step": 774 }, { "epoch": 0.043197146201438046, "grad_norm": 0.4923505485057831, "learning_rate": 9.982121890562273e-05, "loss": 1.7545, "step": 775 }, { "epoch": 0.043252884454601195, "grad_norm": 0.5399130582809448, "learning_rate": 9.982046580202493e-05, "loss": 1.784, "step": 776 }, { "epoch": 0.04330862270776434, "grad_norm": 0.49087220430374146, "learning_rate": 9.98197111184124e-05, "loss": 1.8328, "step": 777 }, { "epoch": 0.04336436096092749, "grad_norm": 0.5504277348518372, "learning_rate": 9.981895485480912e-05, "loss": 1.8808, "step": 778 }, { "epoch": 0.04342009921409063, "grad_norm": 0.45953568816185, "learning_rate": 9.981819701123907e-05, "loss": 1.577, "step": 779 }, { "epoch": 0.043475837467253774, "grad_norm": 0.4762939214706421, "learning_rate": 9.981743758772625e-05, "loss": 1.6959, "step": 780 }, { "epoch": 0.043531575720416923, "grad_norm": 0.4667057991027832, "learning_rate": 9.981667658429477e-05, "loss": 1.6635, "step": 781 }, { "epoch": 0.043587313973580066, "grad_norm": 0.5036124587059021, "learning_rate": 9.981591400096877e-05, "loss": 1.854, "step": 782 }, { "epoch": 0.043643052226743216, "grad_norm": 0.48234641551971436, "learning_rate": 9.98151498377724e-05, "loss": 1.781, "step": 783 }, { "epoch": 0.04369879047990636, "grad_norm": 0.4990682005882263, "learning_rate": 9.981438409472994e-05, "loss": 1.6629, "step": 784 }, { "epoch": 0.04375452873306951, "grad_norm": 0.4655357599258423, "learning_rate": 9.981361677186566e-05, "loss": 1.7694, "step": 785 }, { "epoch": 0.04381026698623265, "grad_norm": 0.4690426290035248, "learning_rate": 9.981284786920388e-05, "loss": 1.7242, "step": 786 }, { "epoch": 0.043866005239395794, "grad_norm": 0.46350887417793274, "learning_rate": 9.981207738676899e-05, "loss": 1.6032, "step": 787 }, { "epoch": 0.043921743492558944, "grad_norm": 0.5220307111740112, "learning_rate": 9.981130532458544e-05, "loss": 1.8624, "step": 788 }, { "epoch": 0.04397748174572209, "grad_norm": 0.465497761964798, "learning_rate": 9.98105316826777e-05, "loss": 1.6831, "step": 789 }, { "epoch": 0.04403321999888524, "grad_norm": 0.4893016219139099, "learning_rate": 9.980975646107032e-05, "loss": 1.7933, "step": 790 }, { "epoch": 0.04408895825204838, "grad_norm": 0.4457073509693146, "learning_rate": 9.980897965978787e-05, "loss": 1.6383, "step": 791 }, { "epoch": 0.04414469650521153, "grad_norm": 0.5064904093742371, "learning_rate": 9.980820127885497e-05, "loss": 1.8771, "step": 792 }, { "epoch": 0.04420043475837467, "grad_norm": 0.5663847327232361, "learning_rate": 9.980742131829635e-05, "loss": 2.0977, "step": 793 }, { "epoch": 0.044256173011537815, "grad_norm": 0.558462381362915, "learning_rate": 9.980663977813672e-05, "loss": 1.9813, "step": 794 }, { "epoch": 0.044311911264700965, "grad_norm": 0.5043233633041382, "learning_rate": 9.980585665840087e-05, "loss": 1.7362, "step": 795 }, { "epoch": 0.04436764951786411, "grad_norm": 0.5110850930213928, "learning_rate": 9.980507195911363e-05, "loss": 1.5489, "step": 796 }, { "epoch": 0.04442338777102726, "grad_norm": 0.5611404180526733, "learning_rate": 9.980428568029989e-05, "loss": 1.9545, "step": 797 }, { "epoch": 0.0444791260241904, "grad_norm": 0.44059324264526367, "learning_rate": 9.98034978219846e-05, "loss": 1.6321, "step": 798 }, { "epoch": 0.04453486427735355, "grad_norm": 0.5034955143928528, "learning_rate": 9.980270838419273e-05, "loss": 1.7045, "step": 799 }, { "epoch": 0.04459060253051669, "grad_norm": 0.49383604526519775, "learning_rate": 9.98019173669493e-05, "loss": 1.6414, "step": 800 }, { "epoch": 0.04464634078367984, "grad_norm": 0.5035958290100098, "learning_rate": 9.980112477027942e-05, "loss": 1.8683, "step": 801 }, { "epoch": 0.044702079036842986, "grad_norm": 0.4942208230495453, "learning_rate": 9.980033059420826e-05, "loss": 1.7773, "step": 802 }, { "epoch": 0.04475781729000613, "grad_norm": 0.5211103558540344, "learning_rate": 9.979953483876095e-05, "loss": 2.0631, "step": 803 }, { "epoch": 0.04481355554316928, "grad_norm": 0.5940659046173096, "learning_rate": 9.979873750396273e-05, "loss": 2.0601, "step": 804 }, { "epoch": 0.04486929379633242, "grad_norm": 0.5211898684501648, "learning_rate": 9.979793858983891e-05, "loss": 1.7687, "step": 805 }, { "epoch": 0.04492503204949557, "grad_norm": 0.5175243020057678, "learning_rate": 9.979713809641482e-05, "loss": 1.9662, "step": 806 }, { "epoch": 0.044980770302658714, "grad_norm": 0.5139010548591614, "learning_rate": 9.979633602371586e-05, "loss": 1.7011, "step": 807 }, { "epoch": 0.045036508555821864, "grad_norm": 0.4817015826702118, "learning_rate": 9.979553237176744e-05, "loss": 1.7632, "step": 808 }, { "epoch": 0.045092246808985006, "grad_norm": 0.49766993522644043, "learning_rate": 9.979472714059506e-05, "loss": 1.917, "step": 809 }, { "epoch": 0.04514798506214815, "grad_norm": 0.5208562612533569, "learning_rate": 9.979392033022427e-05, "loss": 1.946, "step": 810 }, { "epoch": 0.0452037233153113, "grad_norm": 0.4790688753128052, "learning_rate": 9.979311194068064e-05, "loss": 1.8072, "step": 811 }, { "epoch": 0.04525946156847444, "grad_norm": 0.46075010299682617, "learning_rate": 9.979230197198981e-05, "loss": 1.6243, "step": 812 }, { "epoch": 0.04531519982163759, "grad_norm": 0.488349974155426, "learning_rate": 9.979149042417749e-05, "loss": 1.7733, "step": 813 }, { "epoch": 0.045370938074800735, "grad_norm": 0.4905661940574646, "learning_rate": 9.979067729726938e-05, "loss": 1.821, "step": 814 }, { "epoch": 0.045426676327963884, "grad_norm": 0.5073617696762085, "learning_rate": 9.978986259129129e-05, "loss": 1.8286, "step": 815 }, { "epoch": 0.04548241458112703, "grad_norm": 0.5074631571769714, "learning_rate": 9.978904630626904e-05, "loss": 1.7967, "step": 816 }, { "epoch": 0.04553815283429017, "grad_norm": 0.5455936193466187, "learning_rate": 9.978822844222855e-05, "loss": 1.9883, "step": 817 }, { "epoch": 0.04559389108745332, "grad_norm": 0.5111860632896423, "learning_rate": 9.978740899919574e-05, "loss": 1.8694, "step": 818 }, { "epoch": 0.04564962934061646, "grad_norm": 0.4975983202457428, "learning_rate": 9.978658797719658e-05, "loss": 1.714, "step": 819 }, { "epoch": 0.04570536759377961, "grad_norm": 0.4770795702934265, "learning_rate": 9.978576537625714e-05, "loss": 1.8288, "step": 820 }, { "epoch": 0.045761105846942755, "grad_norm": 0.5559741854667664, "learning_rate": 9.97849411964035e-05, "loss": 2.093, "step": 821 }, { "epoch": 0.045816844100105905, "grad_norm": 0.4961313307285309, "learning_rate": 9.978411543766177e-05, "loss": 1.6607, "step": 822 }, { "epoch": 0.04587258235326905, "grad_norm": 0.5356935262680054, "learning_rate": 9.978328810005816e-05, "loss": 1.9762, "step": 823 }, { "epoch": 0.0459283206064322, "grad_norm": 0.4933258295059204, "learning_rate": 9.978245918361893e-05, "loss": 1.6018, "step": 824 }, { "epoch": 0.04598405885959534, "grad_norm": 0.5278127193450928, "learning_rate": 9.978162868837034e-05, "loss": 1.8532, "step": 825 }, { "epoch": 0.04603979711275848, "grad_norm": 0.4802572429180145, "learning_rate": 9.978079661433873e-05, "loss": 1.7551, "step": 826 }, { "epoch": 0.04609553536592163, "grad_norm": 0.4906105101108551, "learning_rate": 9.977996296155049e-05, "loss": 1.7463, "step": 827 }, { "epoch": 0.046151273619084776, "grad_norm": 0.43020668625831604, "learning_rate": 9.977912773003206e-05, "loss": 1.6216, "step": 828 }, { "epoch": 0.046207011872247926, "grad_norm": 0.49433162808418274, "learning_rate": 9.977829091980995e-05, "loss": 1.9011, "step": 829 }, { "epoch": 0.04626275012541107, "grad_norm": 0.45222243666648865, "learning_rate": 9.977745253091067e-05, "loss": 1.3583, "step": 830 }, { "epoch": 0.04631848837857422, "grad_norm": 0.4955357015132904, "learning_rate": 9.977661256336081e-05, "loss": 1.7256, "step": 831 }, { "epoch": 0.04637422663173736, "grad_norm": 0.5137125253677368, "learning_rate": 9.977577101718701e-05, "loss": 1.8484, "step": 832 }, { "epoch": 0.046429964884900504, "grad_norm": 0.49741753935813904, "learning_rate": 9.977492789241598e-05, "loss": 1.6564, "step": 833 }, { "epoch": 0.046485703138063654, "grad_norm": 0.4994182586669922, "learning_rate": 9.977408318907444e-05, "loss": 1.721, "step": 834 }, { "epoch": 0.0465414413912268, "grad_norm": 0.539135754108429, "learning_rate": 9.97732369071892e-05, "loss": 2.0474, "step": 835 }, { "epoch": 0.04659717964438995, "grad_norm": 0.49502313137054443, "learning_rate": 9.977238904678707e-05, "loss": 1.4078, "step": 836 }, { "epoch": 0.04665291789755309, "grad_norm": 0.4542715549468994, "learning_rate": 9.977153960789497e-05, "loss": 1.5402, "step": 837 }, { "epoch": 0.04670865615071624, "grad_norm": 0.48588764667510986, "learning_rate": 9.97706885905398e-05, "loss": 1.8641, "step": 838 }, { "epoch": 0.04676439440387938, "grad_norm": 0.529255211353302, "learning_rate": 9.976983599474857e-05, "loss": 1.8055, "step": 839 }, { "epoch": 0.046820132657042525, "grad_norm": 0.4630698561668396, "learning_rate": 9.976898182054832e-05, "loss": 1.5263, "step": 840 }, { "epoch": 0.046875870910205675, "grad_norm": 0.5334575176239014, "learning_rate": 9.976812606796615e-05, "loss": 1.7926, "step": 841 }, { "epoch": 0.04693160916336882, "grad_norm": 0.49275916814804077, "learning_rate": 9.976726873702918e-05, "loss": 1.6341, "step": 842 }, { "epoch": 0.04698734741653197, "grad_norm": 0.5276961326599121, "learning_rate": 9.976640982776461e-05, "loss": 1.882, "step": 843 }, { "epoch": 0.04704308566969511, "grad_norm": 0.49929726123809814, "learning_rate": 9.97655493401997e-05, "loss": 1.6004, "step": 844 }, { "epoch": 0.04709882392285826, "grad_norm": 0.4716168940067291, "learning_rate": 9.97646872743617e-05, "loss": 1.7355, "step": 845 }, { "epoch": 0.0471545621760214, "grad_norm": 0.5293796062469482, "learning_rate": 9.976382363027797e-05, "loss": 1.9073, "step": 846 }, { "epoch": 0.04721030042918455, "grad_norm": 0.47008490562438965, "learning_rate": 9.976295840797589e-05, "loss": 1.6875, "step": 847 }, { "epoch": 0.047266038682347696, "grad_norm": 0.48457372188568115, "learning_rate": 9.976209160748292e-05, "loss": 1.6172, "step": 848 }, { "epoch": 0.04732177693551084, "grad_norm": 0.500151515007019, "learning_rate": 9.976122322882653e-05, "loss": 1.6371, "step": 849 }, { "epoch": 0.04737751518867399, "grad_norm": 0.5459775924682617, "learning_rate": 9.976035327203427e-05, "loss": 1.9283, "step": 850 }, { "epoch": 0.04743325344183713, "grad_norm": 0.5352368950843811, "learning_rate": 9.975948173713374e-05, "loss": 2.0407, "step": 851 }, { "epoch": 0.04748899169500028, "grad_norm": 0.5491572618484497, "learning_rate": 9.975860862415254e-05, "loss": 1.7475, "step": 852 }, { "epoch": 0.047544729948163424, "grad_norm": 0.49011510610580444, "learning_rate": 9.975773393311841e-05, "loss": 1.7922, "step": 853 }, { "epoch": 0.04760046820132657, "grad_norm": 0.5197030305862427, "learning_rate": 9.975685766405906e-05, "loss": 1.7012, "step": 854 }, { "epoch": 0.047656206454489716, "grad_norm": 0.487704336643219, "learning_rate": 9.975597981700228e-05, "loss": 1.6647, "step": 855 }, { "epoch": 0.04771194470765286, "grad_norm": 0.4743403196334839, "learning_rate": 9.975510039197592e-05, "loss": 1.5522, "step": 856 }, { "epoch": 0.04776768296081601, "grad_norm": 0.46670085191726685, "learning_rate": 9.975421938900789e-05, "loss": 1.5235, "step": 857 }, { "epoch": 0.04782342121397915, "grad_norm": 0.48920536041259766, "learning_rate": 9.975333680812609e-05, "loss": 1.8876, "step": 858 }, { "epoch": 0.0478791594671423, "grad_norm": 0.5793198943138123, "learning_rate": 9.975245264935852e-05, "loss": 1.8422, "step": 859 }, { "epoch": 0.047934897720305444, "grad_norm": 0.49111589789390564, "learning_rate": 9.975156691273324e-05, "loss": 1.7702, "step": 860 }, { "epoch": 0.047990635973468594, "grad_norm": 0.5276595950126648, "learning_rate": 9.975067959827833e-05, "loss": 1.9332, "step": 861 }, { "epoch": 0.04804637422663174, "grad_norm": 0.4866962134838104, "learning_rate": 9.974979070602192e-05, "loss": 1.7497, "step": 862 }, { "epoch": 0.04810211247979488, "grad_norm": 0.5197125673294067, "learning_rate": 9.974890023599222e-05, "loss": 2.0405, "step": 863 }, { "epoch": 0.04815785073295803, "grad_norm": 0.49782440066337585, "learning_rate": 9.974800818821746e-05, "loss": 1.7609, "step": 864 }, { "epoch": 0.04821358898612117, "grad_norm": 0.52313232421875, "learning_rate": 9.974711456272593e-05, "loss": 1.9515, "step": 865 }, { "epoch": 0.04826932723928432, "grad_norm": 0.4546637237071991, "learning_rate": 9.974621935954597e-05, "loss": 1.645, "step": 866 }, { "epoch": 0.048325065492447465, "grad_norm": 0.47760143876075745, "learning_rate": 9.974532257870596e-05, "loss": 1.7104, "step": 867 }, { "epoch": 0.048380803745610615, "grad_norm": 0.4868486225605011, "learning_rate": 9.974442422023438e-05, "loss": 1.8043, "step": 868 }, { "epoch": 0.04843654199877376, "grad_norm": 0.5107572078704834, "learning_rate": 9.974352428415968e-05, "loss": 1.9662, "step": 869 }, { "epoch": 0.04849228025193691, "grad_norm": 0.5269783139228821, "learning_rate": 9.974262277051041e-05, "loss": 1.8876, "step": 870 }, { "epoch": 0.04854801850510005, "grad_norm": 0.48782503604888916, "learning_rate": 9.974171967931519e-05, "loss": 1.5996, "step": 871 }, { "epoch": 0.04860375675826319, "grad_norm": 0.5057775974273682, "learning_rate": 9.974081501060259e-05, "loss": 1.6907, "step": 872 }, { "epoch": 0.04865949501142634, "grad_norm": 0.4904307723045349, "learning_rate": 9.973990876440138e-05, "loss": 1.7377, "step": 873 }, { "epoch": 0.048715233264589486, "grad_norm": 0.4725581407546997, "learning_rate": 9.973900094074027e-05, "loss": 1.8001, "step": 874 }, { "epoch": 0.048770971517752636, "grad_norm": 0.527885913848877, "learning_rate": 9.973809153964804e-05, "loss": 1.8128, "step": 875 }, { "epoch": 0.04882670977091578, "grad_norm": 0.5520697236061096, "learning_rate": 9.973718056115354e-05, "loss": 2.0648, "step": 876 }, { "epoch": 0.04888244802407893, "grad_norm": 0.4812840223312378, "learning_rate": 9.973626800528566e-05, "loss": 1.8552, "step": 877 }, { "epoch": 0.04893818627724207, "grad_norm": 0.46856966614723206, "learning_rate": 9.973535387207333e-05, "loss": 1.577, "step": 878 }, { "epoch": 0.048993924530405214, "grad_norm": 0.4921995997428894, "learning_rate": 9.973443816154557e-05, "loss": 1.66, "step": 879 }, { "epoch": 0.049049662783568364, "grad_norm": 0.5018383264541626, "learning_rate": 9.97335208737314e-05, "loss": 1.7623, "step": 880 }, { "epoch": 0.04910540103673151, "grad_norm": 0.5345847010612488, "learning_rate": 9.973260200865991e-05, "loss": 1.8681, "step": 881 }, { "epoch": 0.049161139289894656, "grad_norm": 0.5296522974967957, "learning_rate": 9.973168156636025e-05, "loss": 1.9225, "step": 882 }, { "epoch": 0.0492168775430578, "grad_norm": 0.5303376317024231, "learning_rate": 9.97307595468616e-05, "loss": 1.8308, "step": 883 }, { "epoch": 0.04927261579622095, "grad_norm": 0.45620301365852356, "learning_rate": 9.97298359501932e-05, "loss": 1.5791, "step": 884 }, { "epoch": 0.04932835404938409, "grad_norm": 0.5314328074455261, "learning_rate": 9.972891077638438e-05, "loss": 1.7279, "step": 885 }, { "epoch": 0.049384092302547235, "grad_norm": 0.4765213429927826, "learning_rate": 9.972798402546441e-05, "loss": 1.5131, "step": 886 }, { "epoch": 0.049439830555710385, "grad_norm": 0.4913032054901123, "learning_rate": 9.972705569746274e-05, "loss": 1.6591, "step": 887 }, { "epoch": 0.04949556880887353, "grad_norm": 0.48732152581214905, "learning_rate": 9.972612579240881e-05, "loss": 1.7141, "step": 888 }, { "epoch": 0.04955130706203668, "grad_norm": 0.5283141732215881, "learning_rate": 9.972519431033206e-05, "loss": 1.8636, "step": 889 }, { "epoch": 0.04960704531519982, "grad_norm": 0.5298954844474792, "learning_rate": 9.972426125126209e-05, "loss": 1.7943, "step": 890 }, { "epoch": 0.04966278356836297, "grad_norm": 0.5104478597640991, "learning_rate": 9.972332661522845e-05, "loss": 1.6949, "step": 891 }, { "epoch": 0.04971852182152611, "grad_norm": 0.5439249873161316, "learning_rate": 9.972239040226082e-05, "loss": 1.9313, "step": 892 }, { "epoch": 0.04977426007468926, "grad_norm": 0.4874706566333771, "learning_rate": 9.972145261238884e-05, "loss": 1.8589, "step": 893 }, { "epoch": 0.049829998327852405, "grad_norm": 0.5243585705757141, "learning_rate": 9.972051324564229e-05, "loss": 1.9736, "step": 894 }, { "epoch": 0.04988573658101555, "grad_norm": 0.5669842958450317, "learning_rate": 9.971957230205096e-05, "loss": 2.1093, "step": 895 }, { "epoch": 0.0499414748341787, "grad_norm": 0.4888775050640106, "learning_rate": 9.971862978164466e-05, "loss": 1.6786, "step": 896 }, { "epoch": 0.04999721308734184, "grad_norm": 0.5279240608215332, "learning_rate": 9.971768568445332e-05, "loss": 1.8162, "step": 897 }, { "epoch": 0.05005295134050499, "grad_norm": 0.4473552405834198, "learning_rate": 9.971674001050686e-05, "loss": 1.3044, "step": 898 }, { "epoch": 0.05010868959366813, "grad_norm": 0.4724571704864502, "learning_rate": 9.971579275983527e-05, "loss": 1.7169, "step": 899 }, { "epoch": 0.05016442784683128, "grad_norm": 0.4805344343185425, "learning_rate": 9.971484393246861e-05, "loss": 1.4898, "step": 900 }, { "epoch": 0.050220166099994426, "grad_norm": 0.4852250814437866, "learning_rate": 9.971389352843695e-05, "loss": 1.6325, "step": 901 }, { "epoch": 0.05027590435315757, "grad_norm": 0.49681854248046875, "learning_rate": 9.971294154777044e-05, "loss": 1.5962, "step": 902 }, { "epoch": 0.05033164260632072, "grad_norm": 0.5085350871086121, "learning_rate": 9.971198799049928e-05, "loss": 1.8215, "step": 903 }, { "epoch": 0.05038738085948386, "grad_norm": 0.49748629331588745, "learning_rate": 9.971103285665369e-05, "loss": 1.9647, "step": 904 }, { "epoch": 0.05044311911264701, "grad_norm": 0.4835662543773651, "learning_rate": 9.971007614626397e-05, "loss": 1.6109, "step": 905 }, { "epoch": 0.050498857365810154, "grad_norm": 0.5058585405349731, "learning_rate": 9.970911785936047e-05, "loss": 1.6419, "step": 906 }, { "epoch": 0.050554595618973304, "grad_norm": 0.5386664271354675, "learning_rate": 9.970815799597358e-05, "loss": 1.6144, "step": 907 }, { "epoch": 0.05061033387213645, "grad_norm": 0.5337561964988708, "learning_rate": 9.970719655613373e-05, "loss": 1.7978, "step": 908 }, { "epoch": 0.05066607212529959, "grad_norm": 0.532317578792572, "learning_rate": 9.970623353987141e-05, "loss": 1.8175, "step": 909 }, { "epoch": 0.05072181037846274, "grad_norm": 0.5630917549133301, "learning_rate": 9.97052689472172e-05, "loss": 2.043, "step": 910 }, { "epoch": 0.05077754863162588, "grad_norm": 0.554322361946106, "learning_rate": 9.970430277820165e-05, "loss": 1.9165, "step": 911 }, { "epoch": 0.05083328688478903, "grad_norm": 0.49685636162757874, "learning_rate": 9.970333503285539e-05, "loss": 1.8203, "step": 912 }, { "epoch": 0.050889025137952175, "grad_norm": 0.5380950570106506, "learning_rate": 9.970236571120915e-05, "loss": 1.9429, "step": 913 }, { "epoch": 0.050944763391115325, "grad_norm": 0.5279613733291626, "learning_rate": 9.970139481329364e-05, "loss": 2.0989, "step": 914 }, { "epoch": 0.05100050164427847, "grad_norm": 0.509904682636261, "learning_rate": 9.970042233913968e-05, "loss": 1.7213, "step": 915 }, { "epoch": 0.05105623989744162, "grad_norm": 0.48252367973327637, "learning_rate": 9.96994482887781e-05, "loss": 1.6979, "step": 916 }, { "epoch": 0.05111197815060476, "grad_norm": 0.5245582461357117, "learning_rate": 9.969847266223979e-05, "loss": 1.7629, "step": 917 }, { "epoch": 0.0511677164037679, "grad_norm": 0.48625627160072327, "learning_rate": 9.969749545955567e-05, "loss": 1.7208, "step": 918 }, { "epoch": 0.05122345465693105, "grad_norm": 0.5168225169181824, "learning_rate": 9.969651668075678e-05, "loss": 1.952, "step": 919 }, { "epoch": 0.051279192910094196, "grad_norm": 0.47759923338890076, "learning_rate": 9.969553632587409e-05, "loss": 1.6574, "step": 920 }, { "epoch": 0.051334931163257345, "grad_norm": 0.49498680233955383, "learning_rate": 9.969455439493877e-05, "loss": 1.6173, "step": 921 }, { "epoch": 0.05139066941642049, "grad_norm": 0.48092684149742126, "learning_rate": 9.96935708879819e-05, "loss": 1.6471, "step": 922 }, { "epoch": 0.05144640766958364, "grad_norm": 0.5342095494270325, "learning_rate": 9.969258580503471e-05, "loss": 2.0134, "step": 923 }, { "epoch": 0.05150214592274678, "grad_norm": 0.58601975440979, "learning_rate": 9.969159914612843e-05, "loss": 2.1658, "step": 924 }, { "epoch": 0.051557884175909924, "grad_norm": 0.4867340922355652, "learning_rate": 9.969061091129433e-05, "loss": 1.9766, "step": 925 }, { "epoch": 0.051613622429073074, "grad_norm": 0.4857270121574402, "learning_rate": 9.968962110056379e-05, "loss": 1.678, "step": 926 }, { "epoch": 0.051669360682236216, "grad_norm": 0.5170820355415344, "learning_rate": 9.968862971396816e-05, "loss": 1.8249, "step": 927 }, { "epoch": 0.051725098935399366, "grad_norm": 0.4657866358757019, "learning_rate": 9.96876367515389e-05, "loss": 1.7606, "step": 928 }, { "epoch": 0.05178083718856251, "grad_norm": 0.5119996666908264, "learning_rate": 9.968664221330751e-05, "loss": 1.8612, "step": 929 }, { "epoch": 0.05183657544172566, "grad_norm": 0.5372640490531921, "learning_rate": 9.968564609930553e-05, "loss": 1.8672, "step": 930 }, { "epoch": 0.0518923136948888, "grad_norm": 0.49778059124946594, "learning_rate": 9.968464840956453e-05, "loss": 1.766, "step": 931 }, { "epoch": 0.05194805194805195, "grad_norm": 0.5260003805160522, "learning_rate": 9.968364914411616e-05, "loss": 1.8631, "step": 932 }, { "epoch": 0.052003790201215094, "grad_norm": 0.5278846621513367, "learning_rate": 9.968264830299213e-05, "loss": 1.5441, "step": 933 }, { "epoch": 0.05205952845437824, "grad_norm": 0.5427425503730774, "learning_rate": 9.968164588622415e-05, "loss": 1.7751, "step": 934 }, { "epoch": 0.05211526670754139, "grad_norm": 0.4653323292732239, "learning_rate": 9.968064189384403e-05, "loss": 1.662, "step": 935 }, { "epoch": 0.05217100496070453, "grad_norm": 0.5192728638648987, "learning_rate": 9.967963632588362e-05, "loss": 1.7384, "step": 936 }, { "epoch": 0.05222674321386768, "grad_norm": 0.4995409846305847, "learning_rate": 9.96786291823748e-05, "loss": 1.8133, "step": 937 }, { "epoch": 0.05228248146703082, "grad_norm": 0.5626217722892761, "learning_rate": 9.96776204633495e-05, "loss": 1.8851, "step": 938 }, { "epoch": 0.05233821972019397, "grad_norm": 0.5185354351997375, "learning_rate": 9.967661016883972e-05, "loss": 1.6583, "step": 939 }, { "epoch": 0.052393957973357115, "grad_norm": 0.5034851431846619, "learning_rate": 9.967559829887749e-05, "loss": 1.6385, "step": 940 }, { "epoch": 0.05244969622652026, "grad_norm": 0.4795439541339874, "learning_rate": 9.967458485349492e-05, "loss": 1.6901, "step": 941 }, { "epoch": 0.05250543447968341, "grad_norm": 0.6365668177604675, "learning_rate": 9.967356983272414e-05, "loss": 1.9757, "step": 942 }, { "epoch": 0.05256117273284655, "grad_norm": 0.48566654324531555, "learning_rate": 9.967255323659734e-05, "loss": 1.6266, "step": 943 }, { "epoch": 0.0526169109860097, "grad_norm": 0.4971524775028229, "learning_rate": 9.967153506514677e-05, "loss": 1.6938, "step": 944 }, { "epoch": 0.05267264923917284, "grad_norm": 0.5263299345970154, "learning_rate": 9.967051531840471e-05, "loss": 1.8448, "step": 945 }, { "epoch": 0.05272838749233599, "grad_norm": 0.4903882145881653, "learning_rate": 9.96694939964035e-05, "loss": 1.5313, "step": 946 }, { "epoch": 0.052784125745499136, "grad_norm": 0.5515956878662109, "learning_rate": 9.966847109917555e-05, "loss": 1.9398, "step": 947 }, { "epoch": 0.05283986399866228, "grad_norm": 0.47069814801216125, "learning_rate": 9.966744662675326e-05, "loss": 1.8052, "step": 948 }, { "epoch": 0.05289560225182543, "grad_norm": 0.4904758036136627, "learning_rate": 9.966642057916915e-05, "loss": 1.7875, "step": 949 }, { "epoch": 0.05295134050498857, "grad_norm": 0.5010367035865784, "learning_rate": 9.966539295645576e-05, "loss": 1.6786, "step": 950 }, { "epoch": 0.05300707875815172, "grad_norm": 0.4812747538089752, "learning_rate": 9.966436375864567e-05, "loss": 1.473, "step": 951 }, { "epoch": 0.053062817011314864, "grad_norm": 0.5010087490081787, "learning_rate": 9.966333298577154e-05, "loss": 1.7648, "step": 952 }, { "epoch": 0.053118555264478014, "grad_norm": 0.5247920155525208, "learning_rate": 9.966230063786602e-05, "loss": 1.6435, "step": 953 }, { "epoch": 0.05317429351764116, "grad_norm": 0.5183125734329224, "learning_rate": 9.96612667149619e-05, "loss": 1.762, "step": 954 }, { "epoch": 0.053230031770804306, "grad_norm": 0.5197505950927734, "learning_rate": 9.966023121709192e-05, "loss": 1.8957, "step": 955 }, { "epoch": 0.05328577002396745, "grad_norm": 0.4871842563152313, "learning_rate": 9.965919414428896e-05, "loss": 1.8783, "step": 956 }, { "epoch": 0.05334150827713059, "grad_norm": 0.4965290427207947, "learning_rate": 9.965815549658589e-05, "loss": 1.8575, "step": 957 }, { "epoch": 0.05339724653029374, "grad_norm": 0.5005083680152893, "learning_rate": 9.965711527401567e-05, "loss": 1.7704, "step": 958 }, { "epoch": 0.053452984783456885, "grad_norm": 0.4561206102371216, "learning_rate": 9.965607347661125e-05, "loss": 1.6103, "step": 959 }, { "epoch": 0.053508723036620034, "grad_norm": 0.5352826714515686, "learning_rate": 9.965503010440571e-05, "loss": 1.9864, "step": 960 }, { "epoch": 0.05356446128978318, "grad_norm": 0.4568333327770233, "learning_rate": 9.965398515743212e-05, "loss": 1.7264, "step": 961 }, { "epoch": 0.05362019954294633, "grad_norm": 0.5570031404495239, "learning_rate": 9.965293863572363e-05, "loss": 2.2176, "step": 962 }, { "epoch": 0.05367593779610947, "grad_norm": 0.5380359888076782, "learning_rate": 9.96518905393134e-05, "loss": 2.0434, "step": 963 }, { "epoch": 0.05373167604927261, "grad_norm": 0.46430766582489014, "learning_rate": 9.965084086823472e-05, "loss": 1.4151, "step": 964 }, { "epoch": 0.05378741430243576, "grad_norm": 0.4653235077857971, "learning_rate": 9.964978962252085e-05, "loss": 1.7144, "step": 965 }, { "epoch": 0.053843152555598905, "grad_norm": 0.49018028378486633, "learning_rate": 9.964873680220512e-05, "loss": 1.6531, "step": 966 }, { "epoch": 0.053898890808762055, "grad_norm": 0.5718449354171753, "learning_rate": 9.964768240732093e-05, "loss": 1.9851, "step": 967 }, { "epoch": 0.0539546290619252, "grad_norm": 0.5048679113388062, "learning_rate": 9.964662643790173e-05, "loss": 1.9137, "step": 968 }, { "epoch": 0.05401036731508835, "grad_norm": 0.5291681885719299, "learning_rate": 9.9645568893981e-05, "loss": 1.8972, "step": 969 }, { "epoch": 0.05406610556825149, "grad_norm": 0.5041894316673279, "learning_rate": 9.964450977559226e-05, "loss": 1.5612, "step": 970 }, { "epoch": 0.054121843821414634, "grad_norm": 0.561788022518158, "learning_rate": 9.964344908276914e-05, "loss": 2.0708, "step": 971 }, { "epoch": 0.05417758207457778, "grad_norm": 0.4838697016239166, "learning_rate": 9.964238681554524e-05, "loss": 1.6573, "step": 972 }, { "epoch": 0.054233320327740926, "grad_norm": 0.5092923641204834, "learning_rate": 9.964132297395428e-05, "loss": 1.918, "step": 973 }, { "epoch": 0.054289058580904076, "grad_norm": 0.5128215551376343, "learning_rate": 9.964025755802997e-05, "loss": 1.721, "step": 974 }, { "epoch": 0.05434479683406722, "grad_norm": 0.597062885761261, "learning_rate": 9.963919056780612e-05, "loss": 1.9453, "step": 975 }, { "epoch": 0.05440053508723037, "grad_norm": 0.5623565316200256, "learning_rate": 9.963812200331656e-05, "loss": 1.9271, "step": 976 }, { "epoch": 0.05445627334039351, "grad_norm": 0.4568030834197998, "learning_rate": 9.963705186459517e-05, "loss": 1.5766, "step": 977 }, { "epoch": 0.05451201159355666, "grad_norm": 0.4906899631023407, "learning_rate": 9.963598015167592e-05, "loss": 1.7721, "step": 978 }, { "epoch": 0.054567749846719804, "grad_norm": 0.5041657090187073, "learning_rate": 9.963490686459277e-05, "loss": 1.6293, "step": 979 }, { "epoch": 0.05462348809988295, "grad_norm": 0.533762514591217, "learning_rate": 9.963383200337977e-05, "loss": 1.8723, "step": 980 }, { "epoch": 0.0546792263530461, "grad_norm": 0.4968359172344208, "learning_rate": 9.963275556807098e-05, "loss": 1.7368, "step": 981 }, { "epoch": 0.05473496460620924, "grad_norm": 0.4822302758693695, "learning_rate": 9.963167755870059e-05, "loss": 1.4994, "step": 982 }, { "epoch": 0.05479070285937239, "grad_norm": 0.5066803097724915, "learning_rate": 9.963059797530274e-05, "loss": 1.8058, "step": 983 }, { "epoch": 0.05484644111253553, "grad_norm": 0.518132209777832, "learning_rate": 9.96295168179117e-05, "loss": 1.7393, "step": 984 }, { "epoch": 0.05490217936569868, "grad_norm": 0.5607625842094421, "learning_rate": 9.962843408656176e-05, "loss": 2.149, "step": 985 }, { "epoch": 0.054957917618861825, "grad_norm": 0.5685406923294067, "learning_rate": 9.962734978128723e-05, "loss": 2.1734, "step": 986 }, { "epoch": 0.05501365587202497, "grad_norm": 0.5319599509239197, "learning_rate": 9.962626390212251e-05, "loss": 1.8782, "step": 987 }, { "epoch": 0.05506939412518812, "grad_norm": 0.4679426848888397, "learning_rate": 9.962517644910204e-05, "loss": 1.7033, "step": 988 }, { "epoch": 0.05512513237835126, "grad_norm": 0.5416939854621887, "learning_rate": 9.962408742226032e-05, "loss": 1.969, "step": 989 }, { "epoch": 0.05518087063151441, "grad_norm": 0.49005210399627686, "learning_rate": 9.962299682163186e-05, "loss": 1.8229, "step": 990 }, { "epoch": 0.05523660888467755, "grad_norm": 0.5170348286628723, "learning_rate": 9.962190464725128e-05, "loss": 1.8161, "step": 991 }, { "epoch": 0.0552923471378407, "grad_norm": 0.5188906192779541, "learning_rate": 9.962081089915319e-05, "loss": 1.938, "step": 992 }, { "epoch": 0.055348085391003846, "grad_norm": 0.4945777952671051, "learning_rate": 9.961971557737227e-05, "loss": 1.7414, "step": 993 }, { "epoch": 0.05540382364416699, "grad_norm": 0.511976420879364, "learning_rate": 9.96186186819433e-05, "loss": 1.8595, "step": 994 }, { "epoch": 0.05545956189733014, "grad_norm": 0.5381083488464355, "learning_rate": 9.961752021290103e-05, "loss": 1.8233, "step": 995 }, { "epoch": 0.05551530015049328, "grad_norm": 0.4679305851459503, "learning_rate": 9.961642017028033e-05, "loss": 1.6666, "step": 996 }, { "epoch": 0.05557103840365643, "grad_norm": 0.5513458847999573, "learning_rate": 9.961531855411603e-05, "loss": 2.0589, "step": 997 }, { "epoch": 0.055626776656819574, "grad_norm": 0.5168341994285583, "learning_rate": 9.961421536444313e-05, "loss": 2.0774, "step": 998 }, { "epoch": 0.055682514909982724, "grad_norm": 0.5111126899719238, "learning_rate": 9.961311060129659e-05, "loss": 1.5936, "step": 999 }, { "epoch": 0.055738253163145866, "grad_norm": 0.5352098941802979, "learning_rate": 9.961200426471142e-05, "loss": 1.8414, "step": 1000 }, { "epoch": 0.055793991416309016, "grad_norm": 0.47616758942604065, "learning_rate": 9.961089635472276e-05, "loss": 1.6496, "step": 1001 }, { "epoch": 0.05584972966947216, "grad_norm": 0.4767918288707733, "learning_rate": 9.96097868713657e-05, "loss": 1.3193, "step": 1002 }, { "epoch": 0.0559054679226353, "grad_norm": 0.46608811616897583, "learning_rate": 9.960867581467546e-05, "loss": 1.6453, "step": 1003 }, { "epoch": 0.05596120617579845, "grad_norm": 0.5042111277580261, "learning_rate": 9.960756318468726e-05, "loss": 1.8798, "step": 1004 }, { "epoch": 0.056016944428961594, "grad_norm": 0.5502855777740479, "learning_rate": 9.960644898143639e-05, "loss": 1.9322, "step": 1005 }, { "epoch": 0.056072682682124744, "grad_norm": 0.4749864935874939, "learning_rate": 9.960533320495818e-05, "loss": 1.5659, "step": 1006 }, { "epoch": 0.05612842093528789, "grad_norm": 0.4787498712539673, "learning_rate": 9.960421585528802e-05, "loss": 1.8482, "step": 1007 }, { "epoch": 0.05618415918845104, "grad_norm": 0.578971266746521, "learning_rate": 9.960309693246135e-05, "loss": 1.9905, "step": 1008 }, { "epoch": 0.05623989744161418, "grad_norm": 0.4983009099960327, "learning_rate": 9.960197643651363e-05, "loss": 1.722, "step": 1009 }, { "epoch": 0.05629563569477732, "grad_norm": 0.5528213977813721, "learning_rate": 9.960085436748044e-05, "loss": 1.8293, "step": 1010 }, { "epoch": 0.05635137394794047, "grad_norm": 0.49824774265289307, "learning_rate": 9.959973072539734e-05, "loss": 1.8081, "step": 1011 }, { "epoch": 0.056407112201103615, "grad_norm": 0.49810606241226196, "learning_rate": 9.959860551029996e-05, "loss": 1.5834, "step": 1012 }, { "epoch": 0.056462850454266765, "grad_norm": 0.515215277671814, "learning_rate": 9.9597478722224e-05, "loss": 1.8318, "step": 1013 }, { "epoch": 0.05651858870742991, "grad_norm": 0.5139912962913513, "learning_rate": 9.959635036120518e-05, "loss": 1.7475, "step": 1014 }, { "epoch": 0.05657432696059306, "grad_norm": 0.4912470579147339, "learning_rate": 9.959522042727932e-05, "loss": 1.6809, "step": 1015 }, { "epoch": 0.0566300652137562, "grad_norm": 0.4990215003490448, "learning_rate": 9.959408892048219e-05, "loss": 1.7024, "step": 1016 }, { "epoch": 0.05668580346691934, "grad_norm": 0.5626692771911621, "learning_rate": 9.959295584084974e-05, "loss": 1.9791, "step": 1017 }, { "epoch": 0.05674154172008249, "grad_norm": 0.4737264811992645, "learning_rate": 9.959182118841786e-05, "loss": 1.5592, "step": 1018 }, { "epoch": 0.056797279973245636, "grad_norm": 0.5367196798324585, "learning_rate": 9.959068496322256e-05, "loss": 2.012, "step": 1019 }, { "epoch": 0.056853018226408786, "grad_norm": 0.5062724947929382, "learning_rate": 9.958954716529987e-05, "loss": 1.6301, "step": 1020 }, { "epoch": 0.05690875647957193, "grad_norm": 0.5419873595237732, "learning_rate": 9.958840779468586e-05, "loss": 1.8351, "step": 1021 }, { "epoch": 0.05696449473273508, "grad_norm": 0.5291727781295776, "learning_rate": 9.958726685141668e-05, "loss": 1.8221, "step": 1022 }, { "epoch": 0.05702023298589822, "grad_norm": 0.5285983085632324, "learning_rate": 9.958612433552852e-05, "loss": 1.8575, "step": 1023 }, { "epoch": 0.05707597123906137, "grad_norm": 0.49050652980804443, "learning_rate": 9.95849802470576e-05, "loss": 1.7646, "step": 1024 }, { "epoch": 0.057131709492224514, "grad_norm": 0.49379006028175354, "learning_rate": 9.95838345860402e-05, "loss": 1.6789, "step": 1025 }, { "epoch": 0.05718744774538766, "grad_norm": 0.4859938621520996, "learning_rate": 9.958268735251266e-05, "loss": 1.8542, "step": 1026 }, { "epoch": 0.057243185998550807, "grad_norm": 0.5445101857185364, "learning_rate": 9.958153854651136e-05, "loss": 1.819, "step": 1027 }, { "epoch": 0.05729892425171395, "grad_norm": 0.5075321197509766, "learning_rate": 9.958038816807276e-05, "loss": 1.7872, "step": 1028 }, { "epoch": 0.0573546625048771, "grad_norm": 0.4982723593711853, "learning_rate": 9.957923621723329e-05, "loss": 1.8243, "step": 1029 }, { "epoch": 0.05741040075804024, "grad_norm": 0.49452096223831177, "learning_rate": 9.957808269402954e-05, "loss": 1.7316, "step": 1030 }, { "epoch": 0.05746613901120339, "grad_norm": 0.5450426936149597, "learning_rate": 9.957692759849806e-05, "loss": 2.0758, "step": 1031 }, { "epoch": 0.057521877264366535, "grad_norm": 0.5058251023292542, "learning_rate": 9.957577093067548e-05, "loss": 1.6588, "step": 1032 }, { "epoch": 0.05757761551752968, "grad_norm": 0.4902496039867401, "learning_rate": 9.957461269059851e-05, "loss": 1.8477, "step": 1033 }, { "epoch": 0.05763335377069283, "grad_norm": 0.5185796618461609, "learning_rate": 9.957345287830386e-05, "loss": 1.7541, "step": 1034 }, { "epoch": 0.05768909202385597, "grad_norm": 0.5609437227249146, "learning_rate": 9.95722914938283e-05, "loss": 1.8738, "step": 1035 }, { "epoch": 0.05774483027701912, "grad_norm": 0.47249266505241394, "learning_rate": 9.957112853720871e-05, "loss": 1.6668, "step": 1036 }, { "epoch": 0.05780056853018226, "grad_norm": 0.4762544333934784, "learning_rate": 9.956996400848191e-05, "loss": 1.5023, "step": 1037 }, { "epoch": 0.05785630678334541, "grad_norm": 0.5092499852180481, "learning_rate": 9.956879790768489e-05, "loss": 1.7614, "step": 1038 }, { "epoch": 0.057912045036508555, "grad_norm": 0.4864351451396942, "learning_rate": 9.95676302348546e-05, "loss": 1.7874, "step": 1039 }, { "epoch": 0.0579677832896717, "grad_norm": 0.5312706828117371, "learning_rate": 9.956646099002807e-05, "loss": 1.7864, "step": 1040 }, { "epoch": 0.05802352154283485, "grad_norm": 0.5099919438362122, "learning_rate": 9.95652901732424e-05, "loss": 1.9396, "step": 1041 }, { "epoch": 0.05807925979599799, "grad_norm": 0.4992043375968933, "learning_rate": 9.95641177845347e-05, "loss": 1.8373, "step": 1042 }, { "epoch": 0.05813499804916114, "grad_norm": 0.557106614112854, "learning_rate": 9.956294382394218e-05, "loss": 2.0565, "step": 1043 }, { "epoch": 0.058190736302324284, "grad_norm": 0.5183643102645874, "learning_rate": 9.956176829150204e-05, "loss": 1.837, "step": 1044 }, { "epoch": 0.05824647455548743, "grad_norm": 0.4911157488822937, "learning_rate": 9.956059118725158e-05, "loss": 1.736, "step": 1045 }, { "epoch": 0.058302212808650576, "grad_norm": 0.524387538433075, "learning_rate": 9.955941251122812e-05, "loss": 1.9561, "step": 1046 }, { "epoch": 0.058357951061813726, "grad_norm": 0.4891200065612793, "learning_rate": 9.955823226346905e-05, "loss": 1.723, "step": 1047 }, { "epoch": 0.05841368931497687, "grad_norm": 0.5014610886573792, "learning_rate": 9.95570504440118e-05, "loss": 1.6632, "step": 1048 }, { "epoch": 0.05846942756814001, "grad_norm": 0.46674925088882446, "learning_rate": 9.955586705289386e-05, "loss": 1.5877, "step": 1049 }, { "epoch": 0.05852516582130316, "grad_norm": 0.5613251328468323, "learning_rate": 9.955468209015273e-05, "loss": 2.0043, "step": 1050 }, { "epoch": 0.058580904074466304, "grad_norm": 0.49603840708732605, "learning_rate": 9.9553495555826e-05, "loss": 1.7604, "step": 1051 }, { "epoch": 0.058636642327629454, "grad_norm": 0.5199983716011047, "learning_rate": 9.955230744995132e-05, "loss": 1.8945, "step": 1052 }, { "epoch": 0.0586923805807926, "grad_norm": 0.5177999138832092, "learning_rate": 9.955111777256635e-05, "loss": 1.9154, "step": 1053 }, { "epoch": 0.05874811883395575, "grad_norm": 0.49996909499168396, "learning_rate": 9.954992652370885e-05, "loss": 1.6888, "step": 1054 }, { "epoch": 0.05880385708711889, "grad_norm": 0.5143979787826538, "learning_rate": 9.954873370341656e-05, "loss": 1.7544, "step": 1055 }, { "epoch": 0.05885959534028203, "grad_norm": 0.498963862657547, "learning_rate": 9.954753931172733e-05, "loss": 1.9448, "step": 1056 }, { "epoch": 0.05891533359344518, "grad_norm": 0.5648823976516724, "learning_rate": 9.954634334867902e-05, "loss": 2.0281, "step": 1057 }, { "epoch": 0.058971071846608325, "grad_norm": 0.4741098880767822, "learning_rate": 9.95451458143096e-05, "loss": 1.7383, "step": 1058 }, { "epoch": 0.059026810099771475, "grad_norm": 0.5303511023521423, "learning_rate": 9.9543946708657e-05, "loss": 1.9047, "step": 1059 }, { "epoch": 0.05908254835293462, "grad_norm": 0.6070243716239929, "learning_rate": 9.95427460317593e-05, "loss": 2.1998, "step": 1060 }, { "epoch": 0.05913828660609777, "grad_norm": 0.509857177734375, "learning_rate": 9.954154378365453e-05, "loss": 1.9788, "step": 1061 }, { "epoch": 0.05919402485926091, "grad_norm": 0.4909118711948395, "learning_rate": 9.954033996438084e-05, "loss": 1.7906, "step": 1062 }, { "epoch": 0.05924976311242406, "grad_norm": 0.5275348424911499, "learning_rate": 9.95391345739764e-05, "loss": 1.9644, "step": 1063 }, { "epoch": 0.0593055013655872, "grad_norm": 0.5134482979774475, "learning_rate": 9.953792761247946e-05, "loss": 1.7528, "step": 1064 }, { "epoch": 0.059361239618750346, "grad_norm": 0.4846155345439911, "learning_rate": 9.953671907992827e-05, "loss": 1.7198, "step": 1065 }, { "epoch": 0.059416977871913496, "grad_norm": 0.508575975894928, "learning_rate": 9.953550897636117e-05, "loss": 1.8502, "step": 1066 }, { "epoch": 0.05947271612507664, "grad_norm": 0.6168702244758606, "learning_rate": 9.953429730181653e-05, "loss": 1.8859, "step": 1067 }, { "epoch": 0.05952845437823979, "grad_norm": 0.5224670767784119, "learning_rate": 9.953308405633281e-05, "loss": 1.9667, "step": 1068 }, { "epoch": 0.05958419263140293, "grad_norm": 0.5521063208580017, "learning_rate": 9.953186923994845e-05, "loss": 1.9502, "step": 1069 }, { "epoch": 0.05963993088456608, "grad_norm": 0.5243295431137085, "learning_rate": 9.953065285270198e-05, "loss": 1.7872, "step": 1070 }, { "epoch": 0.059695669137729224, "grad_norm": 0.457383394241333, "learning_rate": 9.952943489463199e-05, "loss": 1.4861, "step": 1071 }, { "epoch": 0.059751407390892367, "grad_norm": 0.5042887330055237, "learning_rate": 9.95282153657771e-05, "loss": 1.8046, "step": 1072 }, { "epoch": 0.059807145644055516, "grad_norm": 0.5393437147140503, "learning_rate": 9.9526994266176e-05, "loss": 2.0209, "step": 1073 }, { "epoch": 0.05986288389721866, "grad_norm": 0.5133099555969238, "learning_rate": 9.952577159586739e-05, "loss": 2.0277, "step": 1074 }, { "epoch": 0.05991862215038181, "grad_norm": 0.538661539554596, "learning_rate": 9.952454735489007e-05, "loss": 1.9108, "step": 1075 }, { "epoch": 0.05997436040354495, "grad_norm": 0.5276675224304199, "learning_rate": 9.952332154328286e-05, "loss": 2.0656, "step": 1076 }, { "epoch": 0.0600300986567081, "grad_norm": 0.5048499703407288, "learning_rate": 9.952209416108461e-05, "loss": 1.757, "step": 1077 }, { "epoch": 0.060085836909871244, "grad_norm": 0.5175162553787231, "learning_rate": 9.952086520833428e-05, "loss": 1.7967, "step": 1078 }, { "epoch": 0.06014157516303439, "grad_norm": 0.5084596276283264, "learning_rate": 9.951963468507084e-05, "loss": 1.705, "step": 1079 }, { "epoch": 0.06019731341619754, "grad_norm": 0.45831501483917236, "learning_rate": 9.95184025913333e-05, "loss": 1.6394, "step": 1080 }, { "epoch": 0.06025305166936068, "grad_norm": 0.47496846318244934, "learning_rate": 9.951716892716074e-05, "loss": 1.5622, "step": 1081 }, { "epoch": 0.06030878992252383, "grad_norm": 0.5142143964767456, "learning_rate": 9.951593369259229e-05, "loss": 1.943, "step": 1082 }, { "epoch": 0.06036452817568697, "grad_norm": 0.4750124216079712, "learning_rate": 9.951469688766712e-05, "loss": 1.7855, "step": 1083 }, { "epoch": 0.06042026642885012, "grad_norm": 0.5169959664344788, "learning_rate": 9.951345851242445e-05, "loss": 1.8589, "step": 1084 }, { "epoch": 0.060476004682013265, "grad_norm": 0.4891696572303772, "learning_rate": 9.951221856690355e-05, "loss": 1.8431, "step": 1085 }, { "epoch": 0.060531742935176415, "grad_norm": 0.49664726853370667, "learning_rate": 9.951097705114378e-05, "loss": 1.8495, "step": 1086 }, { "epoch": 0.06058748118833956, "grad_norm": 0.4737338423728943, "learning_rate": 9.950973396518449e-05, "loss": 1.6244, "step": 1087 }, { "epoch": 0.0606432194415027, "grad_norm": 0.4466894865036011, "learning_rate": 9.950848930906506e-05, "loss": 1.569, "step": 1088 }, { "epoch": 0.06069895769466585, "grad_norm": 0.5531814694404602, "learning_rate": 9.950724308282504e-05, "loss": 1.8739, "step": 1089 }, { "epoch": 0.06075469594782899, "grad_norm": 0.5358182191848755, "learning_rate": 9.95059952865039e-05, "loss": 1.5985, "step": 1090 }, { "epoch": 0.06081043420099214, "grad_norm": 0.5551037788391113, "learning_rate": 9.950474592014123e-05, "loss": 1.9313, "step": 1091 }, { "epoch": 0.060866172454155286, "grad_norm": 0.46842116117477417, "learning_rate": 9.950349498377666e-05, "loss": 1.5846, "step": 1092 }, { "epoch": 0.060921910707318436, "grad_norm": 0.5490810871124268, "learning_rate": 9.950224247744986e-05, "loss": 1.7246, "step": 1093 }, { "epoch": 0.06097764896048158, "grad_norm": 0.46604838967323303, "learning_rate": 9.950098840120055e-05, "loss": 1.3499, "step": 1094 }, { "epoch": 0.06103338721364472, "grad_norm": 0.4957679808139801, "learning_rate": 9.949973275506847e-05, "loss": 1.7099, "step": 1095 }, { "epoch": 0.06108912546680787, "grad_norm": 0.5058358907699585, "learning_rate": 9.94984755390935e-05, "loss": 2.0376, "step": 1096 }, { "epoch": 0.061144863719971014, "grad_norm": 0.5344205498695374, "learning_rate": 9.949721675331546e-05, "loss": 1.8721, "step": 1097 }, { "epoch": 0.061200601973134164, "grad_norm": 0.5005959272384644, "learning_rate": 9.94959563977743e-05, "loss": 1.8502, "step": 1098 }, { "epoch": 0.06125634022629731, "grad_norm": 0.5033101439476013, "learning_rate": 9.949469447250998e-05, "loss": 1.762, "step": 1099 }, { "epoch": 0.061312078479460456, "grad_norm": 0.489114373922348, "learning_rate": 9.949343097756253e-05, "loss": 1.779, "step": 1100 }, { "epoch": 0.0613678167326236, "grad_norm": 0.49902451038360596, "learning_rate": 9.949216591297203e-05, "loss": 1.6705, "step": 1101 }, { "epoch": 0.06142355498578674, "grad_norm": 0.5019201636314392, "learning_rate": 9.949089927877858e-05, "loss": 1.6734, "step": 1102 }, { "epoch": 0.06147929323894989, "grad_norm": 0.5644415020942688, "learning_rate": 9.948963107502235e-05, "loss": 2.0193, "step": 1103 }, { "epoch": 0.061535031492113035, "grad_norm": 0.55086749792099, "learning_rate": 9.948836130174358e-05, "loss": 1.9377, "step": 1104 }, { "epoch": 0.061590769745276185, "grad_norm": 0.48262813687324524, "learning_rate": 9.94870899589825e-05, "loss": 1.6455, "step": 1105 }, { "epoch": 0.06164650799843933, "grad_norm": 0.5041834115982056, "learning_rate": 9.948581704677949e-05, "loss": 1.9186, "step": 1106 }, { "epoch": 0.06170224625160248, "grad_norm": 0.5112140774726868, "learning_rate": 9.948454256517486e-05, "loss": 1.9353, "step": 1107 }, { "epoch": 0.06175798450476562, "grad_norm": 0.5558189749717712, "learning_rate": 9.948326651420907e-05, "loss": 1.6834, "step": 1108 }, { "epoch": 0.06181372275792877, "grad_norm": 0.5652199983596802, "learning_rate": 9.948198889392255e-05, "loss": 1.8998, "step": 1109 }, { "epoch": 0.06186946101109191, "grad_norm": 0.5617989301681519, "learning_rate": 9.948070970435587e-05, "loss": 2.1707, "step": 1110 }, { "epoch": 0.061925199264255056, "grad_norm": 0.5738351941108704, "learning_rate": 9.947942894554956e-05, "loss": 1.9854, "step": 1111 }, { "epoch": 0.061980937517418205, "grad_norm": 0.4870631694793701, "learning_rate": 9.947814661754425e-05, "loss": 1.6627, "step": 1112 }, { "epoch": 0.06203667577058135, "grad_norm": 0.5056869387626648, "learning_rate": 9.947686272038059e-05, "loss": 2.0686, "step": 1113 }, { "epoch": 0.0620924140237445, "grad_norm": 0.47897595167160034, "learning_rate": 9.947557725409934e-05, "loss": 1.7178, "step": 1114 }, { "epoch": 0.06214815227690764, "grad_norm": 0.5754001140594482, "learning_rate": 9.947429021874123e-05, "loss": 1.9185, "step": 1115 }, { "epoch": 0.06220389053007079, "grad_norm": 0.5134566426277161, "learning_rate": 9.94730016143471e-05, "loss": 1.7684, "step": 1116 }, { "epoch": 0.06225962878323393, "grad_norm": 0.5307061076164246, "learning_rate": 9.947171144095779e-05, "loss": 1.8471, "step": 1117 }, { "epoch": 0.062315367036397076, "grad_norm": 0.5750778913497925, "learning_rate": 9.947041969861424e-05, "loss": 2.0452, "step": 1118 }, { "epoch": 0.062371105289560226, "grad_norm": 0.4882142245769501, "learning_rate": 9.946912638735741e-05, "loss": 1.6376, "step": 1119 }, { "epoch": 0.06242684354272337, "grad_norm": 0.5403459668159485, "learning_rate": 9.946783150722832e-05, "loss": 1.7909, "step": 1120 }, { "epoch": 0.06248258179588652, "grad_norm": 0.6261606812477112, "learning_rate": 9.946653505826802e-05, "loss": 2.3971, "step": 1121 }, { "epoch": 0.06253832004904966, "grad_norm": 0.5000771880149841, "learning_rate": 9.946523704051765e-05, "loss": 1.6772, "step": 1122 }, { "epoch": 0.0625940583022128, "grad_norm": 0.5789170265197754, "learning_rate": 9.946393745401836e-05, "loss": 1.5496, "step": 1123 }, { "epoch": 0.06264979655537596, "grad_norm": 0.5486829280853271, "learning_rate": 9.946263629881137e-05, "loss": 1.926, "step": 1124 }, { "epoch": 0.0627055348085391, "grad_norm": 0.4877256751060486, "learning_rate": 9.946133357493794e-05, "loss": 1.8916, "step": 1125 }, { "epoch": 0.06276127306170225, "grad_norm": 0.505279541015625, "learning_rate": 9.946002928243939e-05, "loss": 1.7043, "step": 1126 }, { "epoch": 0.06281701131486539, "grad_norm": 0.5650628805160522, "learning_rate": 9.945872342135709e-05, "loss": 2.0595, "step": 1127 }, { "epoch": 0.06287274956802853, "grad_norm": 0.5424087047576904, "learning_rate": 9.945741599173244e-05, "loss": 1.7227, "step": 1128 }, { "epoch": 0.06292848782119169, "grad_norm": 0.5090418457984924, "learning_rate": 9.945610699360692e-05, "loss": 1.7466, "step": 1129 }, { "epoch": 0.06298422607435483, "grad_norm": 0.5532562732696533, "learning_rate": 9.945479642702203e-05, "loss": 1.9668, "step": 1130 }, { "epoch": 0.06303996432751797, "grad_norm": 0.4829805791378021, "learning_rate": 9.945348429201933e-05, "loss": 1.664, "step": 1131 }, { "epoch": 0.06309570258068112, "grad_norm": 0.5276423096656799, "learning_rate": 9.945217058864045e-05, "loss": 1.7043, "step": 1132 }, { "epoch": 0.06315144083384426, "grad_norm": 0.49455907940864563, "learning_rate": 9.945085531692704e-05, "loss": 1.6095, "step": 1133 }, { "epoch": 0.06320717908700742, "grad_norm": 0.49773842096328735, "learning_rate": 9.944953847692082e-05, "loss": 1.6696, "step": 1134 }, { "epoch": 0.06326291734017056, "grad_norm": 0.5351307988166809, "learning_rate": 9.944822006866356e-05, "loss": 1.8795, "step": 1135 }, { "epoch": 0.0633186555933337, "grad_norm": 0.5688774585723877, "learning_rate": 9.944690009219705e-05, "loss": 1.6658, "step": 1136 }, { "epoch": 0.06337439384649685, "grad_norm": 0.5083485841751099, "learning_rate": 9.944557854756316e-05, "loss": 1.5768, "step": 1137 }, { "epoch": 0.06343013209966, "grad_norm": 0.5670489072799683, "learning_rate": 9.944425543480382e-05, "loss": 1.9228, "step": 1138 }, { "epoch": 0.06348587035282315, "grad_norm": 0.49227067828178406, "learning_rate": 9.944293075396098e-05, "loss": 1.5889, "step": 1139 }, { "epoch": 0.06354160860598629, "grad_norm": 0.5258840918540955, "learning_rate": 9.944160450507665e-05, "loss": 1.7821, "step": 1140 }, { "epoch": 0.06359734685914943, "grad_norm": 0.5238833427429199, "learning_rate": 9.944027668819286e-05, "loss": 1.6987, "step": 1141 }, { "epoch": 0.06365308511231257, "grad_norm": 0.45374488830566406, "learning_rate": 9.943894730335179e-05, "loss": 1.4687, "step": 1142 }, { "epoch": 0.06370882336547573, "grad_norm": 0.496855765581131, "learning_rate": 9.943761635059554e-05, "loss": 1.6539, "step": 1143 }, { "epoch": 0.06376456161863887, "grad_norm": 0.5250856876373291, "learning_rate": 9.943628382996634e-05, "loss": 1.9439, "step": 1144 }, { "epoch": 0.06382029987180202, "grad_norm": 0.49122875928878784, "learning_rate": 9.943494974150644e-05, "loss": 1.6248, "step": 1145 }, { "epoch": 0.06387603812496516, "grad_norm": 0.5038126111030579, "learning_rate": 9.943361408525818e-05, "loss": 1.8027, "step": 1146 }, { "epoch": 0.06393177637812832, "grad_norm": 0.5918904542922974, "learning_rate": 9.94322768612639e-05, "loss": 2.1447, "step": 1147 }, { "epoch": 0.06398751463129146, "grad_norm": 0.46479690074920654, "learning_rate": 9.943093806956601e-05, "loss": 1.8147, "step": 1148 }, { "epoch": 0.0640432528844546, "grad_norm": 0.5129300355911255, "learning_rate": 9.942959771020694e-05, "loss": 1.9251, "step": 1149 }, { "epoch": 0.06409899113761774, "grad_norm": 0.5755007266998291, "learning_rate": 9.942825578322926e-05, "loss": 1.9842, "step": 1150 }, { "epoch": 0.06415472939078089, "grad_norm": 0.4916748106479645, "learning_rate": 9.942691228867546e-05, "loss": 1.7163, "step": 1151 }, { "epoch": 0.06421046764394404, "grad_norm": 0.5524545311927795, "learning_rate": 9.94255672265882e-05, "loss": 1.8273, "step": 1152 }, { "epoch": 0.06426620589710719, "grad_norm": 0.5353971719741821, "learning_rate": 9.942422059701012e-05, "loss": 1.8914, "step": 1153 }, { "epoch": 0.06432194415027033, "grad_norm": 0.48068755865097046, "learning_rate": 9.942287239998392e-05, "loss": 1.7668, "step": 1154 }, { "epoch": 0.06437768240343347, "grad_norm": 0.48459264636039734, "learning_rate": 9.942152263555237e-05, "loss": 1.5809, "step": 1155 }, { "epoch": 0.06443342065659662, "grad_norm": 0.5255505442619324, "learning_rate": 9.942017130375825e-05, "loss": 1.8543, "step": 1156 }, { "epoch": 0.06448915890975977, "grad_norm": 0.5935083627700806, "learning_rate": 9.941881840464447e-05, "loss": 1.7744, "step": 1157 }, { "epoch": 0.06454489716292292, "grad_norm": 0.5216168761253357, "learning_rate": 9.941746393825386e-05, "loss": 1.5802, "step": 1158 }, { "epoch": 0.06460063541608606, "grad_norm": 0.5127310752868652, "learning_rate": 9.941610790462946e-05, "loss": 1.8704, "step": 1159 }, { "epoch": 0.0646563736692492, "grad_norm": 0.5310918688774109, "learning_rate": 9.94147503038142e-05, "loss": 1.7503, "step": 1160 }, { "epoch": 0.06471211192241236, "grad_norm": 0.5417837500572205, "learning_rate": 9.941339113585117e-05, "loss": 1.7069, "step": 1161 }, { "epoch": 0.0647678501755755, "grad_norm": 0.46583306789398193, "learning_rate": 9.94120304007835e-05, "loss": 1.6529, "step": 1162 }, { "epoch": 0.06482358842873864, "grad_norm": 0.5210421681404114, "learning_rate": 9.941066809865429e-05, "loss": 1.8965, "step": 1163 }, { "epoch": 0.06487932668190179, "grad_norm": 0.4983007311820984, "learning_rate": 9.940930422950679e-05, "loss": 1.797, "step": 1164 }, { "epoch": 0.06493506493506493, "grad_norm": 0.5835360884666443, "learning_rate": 9.940793879338424e-05, "loss": 1.9707, "step": 1165 }, { "epoch": 0.06499080318822809, "grad_norm": 0.48875924944877625, "learning_rate": 9.940657179032993e-05, "loss": 1.8563, "step": 1166 }, { "epoch": 0.06504654144139123, "grad_norm": 0.4999620020389557, "learning_rate": 9.940520322038722e-05, "loss": 1.6063, "step": 1167 }, { "epoch": 0.06510227969455437, "grad_norm": 0.49378272891044617, "learning_rate": 9.940383308359951e-05, "loss": 1.8387, "step": 1168 }, { "epoch": 0.06515801794771751, "grad_norm": 0.44992733001708984, "learning_rate": 9.940246138001027e-05, "loss": 1.4808, "step": 1169 }, { "epoch": 0.06521375620088067, "grad_norm": 0.5133140683174133, "learning_rate": 9.9401088109663e-05, "loss": 1.9234, "step": 1170 }, { "epoch": 0.06526949445404381, "grad_norm": 0.6143995523452759, "learning_rate": 9.939971327260122e-05, "loss": 2.1587, "step": 1171 }, { "epoch": 0.06532523270720696, "grad_norm": 0.5144213438034058, "learning_rate": 9.939833686886857e-05, "loss": 1.8453, "step": 1172 }, { "epoch": 0.0653809709603701, "grad_norm": 0.48773664236068726, "learning_rate": 9.939695889850869e-05, "loss": 1.7421, "step": 1173 }, { "epoch": 0.06543670921353324, "grad_norm": 0.48457232117652893, "learning_rate": 9.939557936156527e-05, "loss": 1.7447, "step": 1174 }, { "epoch": 0.0654924474666964, "grad_norm": 0.48477059602737427, "learning_rate": 9.939419825808207e-05, "loss": 1.5579, "step": 1175 }, { "epoch": 0.06554818571985954, "grad_norm": 0.5835525393486023, "learning_rate": 9.93928155881029e-05, "loss": 2.1224, "step": 1176 }, { "epoch": 0.06560392397302268, "grad_norm": 0.5277059078216553, "learning_rate": 9.939143135167158e-05, "loss": 1.8331, "step": 1177 }, { "epoch": 0.06565966222618583, "grad_norm": 0.5046493411064148, "learning_rate": 9.939004554883205e-05, "loss": 1.7895, "step": 1178 }, { "epoch": 0.06571540047934897, "grad_norm": 0.5206563472747803, "learning_rate": 9.938865817962822e-05, "loss": 1.7342, "step": 1179 }, { "epoch": 0.06577113873251213, "grad_norm": 0.43598276376724243, "learning_rate": 9.938726924410412e-05, "loss": 1.5657, "step": 1180 }, { "epoch": 0.06582687698567527, "grad_norm": 0.49584537744522095, "learning_rate": 9.938587874230379e-05, "loss": 1.7487, "step": 1181 }, { "epoch": 0.06588261523883841, "grad_norm": 0.539125382900238, "learning_rate": 9.938448667427131e-05, "loss": 1.8534, "step": 1182 }, { "epoch": 0.06593835349200156, "grad_norm": 0.4833453595638275, "learning_rate": 9.938309304005086e-05, "loss": 1.6074, "step": 1183 }, { "epoch": 0.06599409174516471, "grad_norm": 0.5339459180831909, "learning_rate": 9.938169783968663e-05, "loss": 1.7358, "step": 1184 }, { "epoch": 0.06604982999832786, "grad_norm": 0.5234376788139343, "learning_rate": 9.938030107322283e-05, "loss": 1.5923, "step": 1185 }, { "epoch": 0.066105568251491, "grad_norm": 0.5175224542617798, "learning_rate": 9.93789027407038e-05, "loss": 1.8394, "step": 1186 }, { "epoch": 0.06616130650465414, "grad_norm": 0.5155382752418518, "learning_rate": 9.937750284217389e-05, "loss": 1.6385, "step": 1187 }, { "epoch": 0.06621704475781728, "grad_norm": 0.47023966908454895, "learning_rate": 9.937610137767747e-05, "loss": 1.6236, "step": 1188 }, { "epoch": 0.06627278301098044, "grad_norm": 0.4659249484539032, "learning_rate": 9.937469834725898e-05, "loss": 1.6139, "step": 1189 }, { "epoch": 0.06632852126414358, "grad_norm": 0.4964550733566284, "learning_rate": 9.937329375096297e-05, "loss": 1.62, "step": 1190 }, { "epoch": 0.06638425951730673, "grad_norm": 0.5324812531471252, "learning_rate": 9.937188758883393e-05, "loss": 1.8803, "step": 1191 }, { "epoch": 0.06643999777046987, "grad_norm": 0.5404229164123535, "learning_rate": 9.937047986091646e-05, "loss": 1.9219, "step": 1192 }, { "epoch": 0.06649573602363303, "grad_norm": 0.49228188395500183, "learning_rate": 9.936907056725524e-05, "loss": 1.7777, "step": 1193 }, { "epoch": 0.06655147427679617, "grad_norm": 0.5689822435379028, "learning_rate": 9.936765970789492e-05, "loss": 1.9888, "step": 1194 }, { "epoch": 0.06660721252995931, "grad_norm": 0.5374904274940491, "learning_rate": 9.936624728288029e-05, "loss": 1.6308, "step": 1195 }, { "epoch": 0.06666295078312245, "grad_norm": 0.48381903767585754, "learning_rate": 9.93648332922561e-05, "loss": 1.6621, "step": 1196 }, { "epoch": 0.0667186890362856, "grad_norm": 0.5000702738761902, "learning_rate": 9.936341773606723e-05, "loss": 1.6883, "step": 1197 }, { "epoch": 0.06677442728944875, "grad_norm": 0.4849522113800049, "learning_rate": 9.936200061435857e-05, "loss": 1.6099, "step": 1198 }, { "epoch": 0.0668301655426119, "grad_norm": 0.5355091094970703, "learning_rate": 9.936058192717502e-05, "loss": 1.725, "step": 1199 }, { "epoch": 0.06688590379577504, "grad_norm": 0.4482690095901489, "learning_rate": 9.935916167456163e-05, "loss": 1.5314, "step": 1200 }, { "epoch": 0.06694164204893818, "grad_norm": 0.4166151285171509, "learning_rate": 9.93577398565634e-05, "loss": 1.094, "step": 1201 }, { "epoch": 0.06699738030210133, "grad_norm": 0.569545328617096, "learning_rate": 9.935631647322544e-05, "loss": 1.9806, "step": 1202 }, { "epoch": 0.06705311855526448, "grad_norm": 0.528708279132843, "learning_rate": 9.93548915245929e-05, "loss": 1.7586, "step": 1203 }, { "epoch": 0.06710885680842762, "grad_norm": 0.48107293248176575, "learning_rate": 9.935346501071095e-05, "loss": 1.6344, "step": 1204 }, { "epoch": 0.06716459506159077, "grad_norm": 0.5078762769699097, "learning_rate": 9.935203693162483e-05, "loss": 1.7792, "step": 1205 }, { "epoch": 0.06722033331475391, "grad_norm": 0.4985436797142029, "learning_rate": 9.935060728737986e-05, "loss": 1.8226, "step": 1206 }, { "epoch": 0.06727607156791707, "grad_norm": 0.5001996755599976, "learning_rate": 9.934917607802135e-05, "loss": 1.65, "step": 1207 }, { "epoch": 0.06733180982108021, "grad_norm": 0.4552146792411804, "learning_rate": 9.934774330359471e-05, "loss": 1.5889, "step": 1208 }, { "epoch": 0.06738754807424335, "grad_norm": 0.4674372673034668, "learning_rate": 9.934630896414536e-05, "loss": 1.6367, "step": 1209 }, { "epoch": 0.0674432863274065, "grad_norm": 0.4658129811286926, "learning_rate": 9.93448730597188e-05, "loss": 1.6565, "step": 1210 }, { "epoch": 0.06749902458056964, "grad_norm": 0.4953976273536682, "learning_rate": 9.934343559036056e-05, "loss": 1.7874, "step": 1211 }, { "epoch": 0.0675547628337328, "grad_norm": 0.5296363830566406, "learning_rate": 9.934199655611624e-05, "loss": 1.4178, "step": 1212 }, { "epoch": 0.06761050108689594, "grad_norm": 0.5114982724189758, "learning_rate": 9.934055595703149e-05, "loss": 1.8371, "step": 1213 }, { "epoch": 0.06766623934005908, "grad_norm": 0.54044109582901, "learning_rate": 9.933911379315198e-05, "loss": 1.77, "step": 1214 }, { "epoch": 0.06772197759322222, "grad_norm": 0.5306605100631714, "learning_rate": 9.933767006452341e-05, "loss": 1.7457, "step": 1215 }, { "epoch": 0.06777771584638538, "grad_norm": 0.45446470379829407, "learning_rate": 9.933622477119165e-05, "loss": 1.4759, "step": 1216 }, { "epoch": 0.06783345409954852, "grad_norm": 0.5077145099639893, "learning_rate": 9.933477791320246e-05, "loss": 1.5853, "step": 1217 }, { "epoch": 0.06788919235271167, "grad_norm": 0.4767955541610718, "learning_rate": 9.933332949060177e-05, "loss": 1.624, "step": 1218 }, { "epoch": 0.06794493060587481, "grad_norm": 0.5637747049331665, "learning_rate": 9.93318795034355e-05, "loss": 1.9126, "step": 1219 }, { "epoch": 0.06800066885903795, "grad_norm": 0.5085890889167786, "learning_rate": 9.933042795174963e-05, "loss": 1.7807, "step": 1220 }, { "epoch": 0.06805640711220111, "grad_norm": 0.539089024066925, "learning_rate": 9.93289748355902e-05, "loss": 1.8777, "step": 1221 }, { "epoch": 0.06811214536536425, "grad_norm": 0.557056725025177, "learning_rate": 9.93275201550033e-05, "loss": 1.7479, "step": 1222 }, { "epoch": 0.0681678836185274, "grad_norm": 0.5699108839035034, "learning_rate": 9.932606391003508e-05, "loss": 1.9158, "step": 1223 }, { "epoch": 0.06822362187169054, "grad_norm": 0.5341405868530273, "learning_rate": 9.932460610073167e-05, "loss": 1.7554, "step": 1224 }, { "epoch": 0.06827936012485368, "grad_norm": 0.6143330335617065, "learning_rate": 9.932314672713936e-05, "loss": 1.7927, "step": 1225 }, { "epoch": 0.06833509837801684, "grad_norm": 0.500853419303894, "learning_rate": 9.932168578930439e-05, "loss": 1.7221, "step": 1226 }, { "epoch": 0.06839083663117998, "grad_norm": 0.5622022151947021, "learning_rate": 9.932022328727313e-05, "loss": 2.0262, "step": 1227 }, { "epoch": 0.06844657488434312, "grad_norm": 0.4860107898712158, "learning_rate": 9.931875922109195e-05, "loss": 1.7353, "step": 1228 }, { "epoch": 0.06850231313750627, "grad_norm": 0.5524904131889343, "learning_rate": 9.931729359080726e-05, "loss": 1.8789, "step": 1229 }, { "epoch": 0.06855805139066942, "grad_norm": 0.5192303657531738, "learning_rate": 9.931582639646556e-05, "loss": 1.9549, "step": 1230 }, { "epoch": 0.06861378964383257, "grad_norm": 0.47247666120529175, "learning_rate": 9.931435763811338e-05, "loss": 1.7371, "step": 1231 }, { "epoch": 0.06866952789699571, "grad_norm": 0.5242395401000977, "learning_rate": 9.93128873157973e-05, "loss": 1.8187, "step": 1232 }, { "epoch": 0.06872526615015885, "grad_norm": 0.4895036816596985, "learning_rate": 9.931141542956394e-05, "loss": 1.6269, "step": 1233 }, { "epoch": 0.068781004403322, "grad_norm": 0.5657653212547302, "learning_rate": 9.930994197945999e-05, "loss": 1.9831, "step": 1234 }, { "epoch": 0.06883674265648515, "grad_norm": 0.5430802702903748, "learning_rate": 9.930846696553219e-05, "loss": 1.9577, "step": 1235 }, { "epoch": 0.0688924809096483, "grad_norm": 0.6241572499275208, "learning_rate": 9.930699038782729e-05, "loss": 1.7921, "step": 1236 }, { "epoch": 0.06894821916281144, "grad_norm": 0.5370758175849915, "learning_rate": 9.930551224639215e-05, "loss": 1.921, "step": 1237 }, { "epoch": 0.06900395741597458, "grad_norm": 0.5141679048538208, "learning_rate": 9.930403254127363e-05, "loss": 1.8209, "step": 1238 }, { "epoch": 0.06905969566913774, "grad_norm": 0.511951208114624, "learning_rate": 9.930255127251866e-05, "loss": 1.9209, "step": 1239 }, { "epoch": 0.06911543392230088, "grad_norm": 0.5124894976615906, "learning_rate": 9.93010684401742e-05, "loss": 1.9073, "step": 1240 }, { "epoch": 0.06917117217546402, "grad_norm": 0.49549224972724915, "learning_rate": 9.929958404428732e-05, "loss": 1.6648, "step": 1241 }, { "epoch": 0.06922691042862716, "grad_norm": 0.4937445819377899, "learning_rate": 9.929809808490505e-05, "loss": 1.6878, "step": 1242 }, { "epoch": 0.06928264868179031, "grad_norm": 0.5082506537437439, "learning_rate": 9.929661056207455e-05, "loss": 1.8051, "step": 1243 }, { "epoch": 0.06933838693495346, "grad_norm": 0.5111956596374512, "learning_rate": 9.929512147584297e-05, "loss": 1.7016, "step": 1244 }, { "epoch": 0.0693941251881166, "grad_norm": 0.46468988060951233, "learning_rate": 9.929363082625755e-05, "loss": 1.7512, "step": 1245 }, { "epoch": 0.06944986344127975, "grad_norm": 0.5274616479873657, "learning_rate": 9.929213861336557e-05, "loss": 1.7578, "step": 1246 }, { "epoch": 0.06950560169444289, "grad_norm": 0.5274865031242371, "learning_rate": 9.929064483721435e-05, "loss": 1.7655, "step": 1247 }, { "epoch": 0.06956133994760604, "grad_norm": 0.5010793209075928, "learning_rate": 9.928914949785124e-05, "loss": 1.8085, "step": 1248 }, { "epoch": 0.06961707820076919, "grad_norm": 0.5141963362693787, "learning_rate": 9.928765259532371e-05, "loss": 1.4068, "step": 1249 }, { "epoch": 0.06967281645393233, "grad_norm": 0.5250492691993713, "learning_rate": 9.928615412967919e-05, "loss": 1.9137, "step": 1250 }, { "epoch": 0.06972855470709548, "grad_norm": 0.5868452191352844, "learning_rate": 9.928465410096521e-05, "loss": 1.6562, "step": 1251 }, { "epoch": 0.06978429296025862, "grad_norm": 0.553932785987854, "learning_rate": 9.928315250922937e-05, "loss": 1.7661, "step": 1252 }, { "epoch": 0.06984003121342178, "grad_norm": 0.49618422985076904, "learning_rate": 9.928164935451927e-05, "loss": 1.9336, "step": 1253 }, { "epoch": 0.06989576946658492, "grad_norm": 0.5094950199127197, "learning_rate": 9.928014463688257e-05, "loss": 1.8955, "step": 1254 }, { "epoch": 0.06995150771974806, "grad_norm": 0.5146217942237854, "learning_rate": 9.927863835636703e-05, "loss": 1.7892, "step": 1255 }, { "epoch": 0.0700072459729112, "grad_norm": 0.5579236745834351, "learning_rate": 9.927713051302037e-05, "loss": 1.8628, "step": 1256 }, { "epoch": 0.07006298422607435, "grad_norm": 0.5719481706619263, "learning_rate": 9.927562110689046e-05, "loss": 1.9999, "step": 1257 }, { "epoch": 0.0701187224792375, "grad_norm": 0.5164546966552734, "learning_rate": 9.927411013802512e-05, "loss": 1.6341, "step": 1258 }, { "epoch": 0.07017446073240065, "grad_norm": 0.5111738443374634, "learning_rate": 9.927259760647232e-05, "loss": 1.8801, "step": 1259 }, { "epoch": 0.07023019898556379, "grad_norm": 0.47879326343536377, "learning_rate": 9.927108351227998e-05, "loss": 1.6122, "step": 1260 }, { "epoch": 0.07028593723872693, "grad_norm": 0.6105756759643555, "learning_rate": 9.926956785549616e-05, "loss": 2.0343, "step": 1261 }, { "epoch": 0.07034167549189009, "grad_norm": 0.5080457329750061, "learning_rate": 9.92680506361689e-05, "loss": 1.9449, "step": 1262 }, { "epoch": 0.07039741374505323, "grad_norm": 0.4686660170555115, "learning_rate": 9.926653185434634e-05, "loss": 1.7354, "step": 1263 }, { "epoch": 0.07045315199821638, "grad_norm": 0.5146884322166443, "learning_rate": 9.926501151007662e-05, "loss": 1.8347, "step": 1264 }, { "epoch": 0.07050889025137952, "grad_norm": 0.5533162355422974, "learning_rate": 9.926348960340796e-05, "loss": 1.887, "step": 1265 }, { "epoch": 0.07056462850454266, "grad_norm": 0.5264948606491089, "learning_rate": 9.926196613438865e-05, "loss": 1.8267, "step": 1266 }, { "epoch": 0.07062036675770582, "grad_norm": 0.5064124464988708, "learning_rate": 9.926044110306698e-05, "loss": 1.4021, "step": 1267 }, { "epoch": 0.07067610501086896, "grad_norm": 0.5374730229377747, "learning_rate": 9.925891450949135e-05, "loss": 2.1346, "step": 1268 }, { "epoch": 0.0707318432640321, "grad_norm": 0.5050212144851685, "learning_rate": 9.925738635371011e-05, "loss": 1.7458, "step": 1269 }, { "epoch": 0.07078758151719525, "grad_norm": 0.5477495789527893, "learning_rate": 9.925585663577181e-05, "loss": 1.9184, "step": 1270 }, { "epoch": 0.0708433197703584, "grad_norm": 0.4926922917366028, "learning_rate": 9.92543253557249e-05, "loss": 1.7406, "step": 1271 }, { "epoch": 0.07089905802352155, "grad_norm": 0.5027531981468201, "learning_rate": 9.925279251361795e-05, "loss": 1.6771, "step": 1272 }, { "epoch": 0.07095479627668469, "grad_norm": 0.44907525181770325, "learning_rate": 9.92512581094996e-05, "loss": 1.534, "step": 1273 }, { "epoch": 0.07101053452984783, "grad_norm": 0.4935868978500366, "learning_rate": 9.92497221434185e-05, "loss": 1.6932, "step": 1274 }, { "epoch": 0.07106627278301098, "grad_norm": 0.5403043031692505, "learning_rate": 9.924818461542335e-05, "loss": 1.7863, "step": 1275 }, { "epoch": 0.07112201103617413, "grad_norm": 0.49991410970687866, "learning_rate": 9.924664552556293e-05, "loss": 1.5134, "step": 1276 }, { "epoch": 0.07117774928933727, "grad_norm": 0.5363178849220276, "learning_rate": 9.924510487388603e-05, "loss": 1.7264, "step": 1277 }, { "epoch": 0.07123348754250042, "grad_norm": 0.6076151728630066, "learning_rate": 9.924356266044153e-05, "loss": 2.0642, "step": 1278 }, { "epoch": 0.07128922579566356, "grad_norm": 0.5013806223869324, "learning_rate": 9.924201888527833e-05, "loss": 1.5962, "step": 1279 }, { "epoch": 0.0713449640488267, "grad_norm": 0.4695322513580322, "learning_rate": 9.924047354844539e-05, "loss": 1.657, "step": 1280 }, { "epoch": 0.07140070230198986, "grad_norm": 0.5039030909538269, "learning_rate": 9.923892664999173e-05, "loss": 1.8447, "step": 1281 }, { "epoch": 0.071456440555153, "grad_norm": 0.5190325379371643, "learning_rate": 9.923737818996639e-05, "loss": 1.7732, "step": 1282 }, { "epoch": 0.07151217880831615, "grad_norm": 0.4986951947212219, "learning_rate": 9.92358281684185e-05, "loss": 1.5262, "step": 1283 }, { "epoch": 0.07156791706147929, "grad_norm": 0.5534316897392273, "learning_rate": 9.92342765853972e-05, "loss": 2.0328, "step": 1284 }, { "epoch": 0.07162365531464245, "grad_norm": 0.49968552589416504, "learning_rate": 9.923272344095169e-05, "loss": 1.7766, "step": 1285 }, { "epoch": 0.07167939356780559, "grad_norm": 0.5316057205200195, "learning_rate": 9.923116873513125e-05, "loss": 1.9544, "step": 1286 }, { "epoch": 0.07173513182096873, "grad_norm": 0.49467048048973083, "learning_rate": 9.922961246798516e-05, "loss": 1.6245, "step": 1287 }, { "epoch": 0.07179087007413187, "grad_norm": 0.5283698439598083, "learning_rate": 9.922805463956282e-05, "loss": 1.8113, "step": 1288 }, { "epoch": 0.07184660832729502, "grad_norm": 0.5117636322975159, "learning_rate": 9.922649524991359e-05, "loss": 1.5682, "step": 1289 }, { "epoch": 0.07190234658045817, "grad_norm": 0.524705708026886, "learning_rate": 9.922493429908695e-05, "loss": 1.7724, "step": 1290 }, { "epoch": 0.07195808483362132, "grad_norm": 0.5265300273895264, "learning_rate": 9.922337178713238e-05, "loss": 1.8775, "step": 1291 }, { "epoch": 0.07201382308678446, "grad_norm": 0.4668891429901123, "learning_rate": 9.922180771409945e-05, "loss": 1.6585, "step": 1292 }, { "epoch": 0.0720695613399476, "grad_norm": 0.5392476916313171, "learning_rate": 9.922024208003777e-05, "loss": 1.7811, "step": 1293 }, { "epoch": 0.07212529959311076, "grad_norm": 0.45741191506385803, "learning_rate": 9.921867488499699e-05, "loss": 1.5123, "step": 1294 }, { "epoch": 0.0721810378462739, "grad_norm": 0.5779647827148438, "learning_rate": 9.92171061290268e-05, "loss": 1.798, "step": 1295 }, { "epoch": 0.07223677609943704, "grad_norm": 0.5434536337852478, "learning_rate": 9.921553581217697e-05, "loss": 1.8681, "step": 1296 }, { "epoch": 0.07229251435260019, "grad_norm": 0.47686439752578735, "learning_rate": 9.921396393449727e-05, "loss": 1.5803, "step": 1297 }, { "epoch": 0.07234825260576333, "grad_norm": 0.5182580947875977, "learning_rate": 9.921239049603759e-05, "loss": 1.8512, "step": 1298 }, { "epoch": 0.07240399085892649, "grad_norm": 0.5331408977508545, "learning_rate": 9.921081549684779e-05, "loss": 1.9001, "step": 1299 }, { "epoch": 0.07245972911208963, "grad_norm": 0.49691641330718994, "learning_rate": 9.920923893697786e-05, "loss": 1.718, "step": 1300 }, { "epoch": 0.07251546736525277, "grad_norm": 0.526009202003479, "learning_rate": 9.920766081647779e-05, "loss": 1.6531, "step": 1301 }, { "epoch": 0.07257120561841592, "grad_norm": 0.5836690664291382, "learning_rate": 9.92060811353976e-05, "loss": 1.6522, "step": 1302 }, { "epoch": 0.07262694387157906, "grad_norm": 0.5216406583786011, "learning_rate": 9.920449989378742e-05, "loss": 1.5131, "step": 1303 }, { "epoch": 0.07268268212474222, "grad_norm": 0.4874148964881897, "learning_rate": 9.920291709169737e-05, "loss": 1.5922, "step": 1304 }, { "epoch": 0.07273842037790536, "grad_norm": 0.4904099404811859, "learning_rate": 9.920133272917767e-05, "loss": 1.83, "step": 1305 }, { "epoch": 0.0727941586310685, "grad_norm": 0.5295507907867432, "learning_rate": 9.919974680627856e-05, "loss": 1.8742, "step": 1306 }, { "epoch": 0.07284989688423164, "grad_norm": 0.5288472175598145, "learning_rate": 9.919815932305034e-05, "loss": 1.8706, "step": 1307 }, { "epoch": 0.0729056351373948, "grad_norm": 0.48234906792640686, "learning_rate": 9.919657027954335e-05, "loss": 1.6827, "step": 1308 }, { "epoch": 0.07296137339055794, "grad_norm": 0.5203633904457092, "learning_rate": 9.919497967580798e-05, "loss": 1.7064, "step": 1309 }, { "epoch": 0.07301711164372109, "grad_norm": 0.51950603723526, "learning_rate": 9.919338751189468e-05, "loss": 1.7643, "step": 1310 }, { "epoch": 0.07307284989688423, "grad_norm": 0.5219436883926392, "learning_rate": 9.919179378785396e-05, "loss": 1.928, "step": 1311 }, { "epoch": 0.07312858815004737, "grad_norm": 0.5543720722198486, "learning_rate": 9.919019850373635e-05, "loss": 2.0754, "step": 1312 }, { "epoch": 0.07318432640321053, "grad_norm": 0.4778376817703247, "learning_rate": 9.918860165959243e-05, "loss": 1.652, "step": 1313 }, { "epoch": 0.07324006465637367, "grad_norm": 0.5367230772972107, "learning_rate": 9.918700325547286e-05, "loss": 1.9413, "step": 1314 }, { "epoch": 0.07329580290953681, "grad_norm": 0.5712525248527527, "learning_rate": 9.918540329142831e-05, "loss": 1.7279, "step": 1315 }, { "epoch": 0.07335154116269996, "grad_norm": 0.5032913088798523, "learning_rate": 9.918380176750955e-05, "loss": 1.7546, "step": 1316 }, { "epoch": 0.07340727941586311, "grad_norm": 0.4760904908180237, "learning_rate": 9.918219868376737e-05, "loss": 1.657, "step": 1317 }, { "epoch": 0.07346301766902626, "grad_norm": 0.5059273838996887, "learning_rate": 9.91805940402526e-05, "loss": 1.8728, "step": 1318 }, { "epoch": 0.0735187559221894, "grad_norm": 0.5608049631118774, "learning_rate": 9.917898783701612e-05, "loss": 2.008, "step": 1319 }, { "epoch": 0.07357449417535254, "grad_norm": 0.5329555869102478, "learning_rate": 9.917738007410888e-05, "loss": 1.6254, "step": 1320 }, { "epoch": 0.07363023242851569, "grad_norm": 0.5802140831947327, "learning_rate": 9.917577075158186e-05, "loss": 2.0478, "step": 1321 }, { "epoch": 0.07368597068167884, "grad_norm": 0.5300236940383911, "learning_rate": 9.917415986948612e-05, "loss": 1.8852, "step": 1322 }, { "epoch": 0.07374170893484198, "grad_norm": 0.4858631491661072, "learning_rate": 9.917254742787273e-05, "loss": 1.5704, "step": 1323 }, { "epoch": 0.07379744718800513, "grad_norm": 0.5059242248535156, "learning_rate": 9.917093342679284e-05, "loss": 1.6683, "step": 1324 }, { "epoch": 0.07385318544116827, "grad_norm": 0.4971073567867279, "learning_rate": 9.916931786629761e-05, "loss": 1.6127, "step": 1325 }, { "epoch": 0.07390892369433141, "grad_norm": 0.5727537274360657, "learning_rate": 9.916770074643831e-05, "loss": 1.8274, "step": 1326 }, { "epoch": 0.07396466194749457, "grad_norm": 0.5242769718170166, "learning_rate": 9.91660820672662e-05, "loss": 1.7747, "step": 1327 }, { "epoch": 0.07402040020065771, "grad_norm": 0.5268994569778442, "learning_rate": 9.916446182883264e-05, "loss": 1.8716, "step": 1328 }, { "epoch": 0.07407613845382086, "grad_norm": 0.5069685578346252, "learning_rate": 9.916284003118897e-05, "loss": 1.572, "step": 1329 }, { "epoch": 0.074131876706984, "grad_norm": 0.5535740852355957, "learning_rate": 9.916121667438667e-05, "loss": 1.852, "step": 1330 }, { "epoch": 0.07418761496014716, "grad_norm": 0.5100526213645935, "learning_rate": 9.915959175847723e-05, "loss": 1.8053, "step": 1331 }, { "epoch": 0.0742433532133103, "grad_norm": 0.5486835837364197, "learning_rate": 9.915796528351212e-05, "loss": 1.9061, "step": 1332 }, { "epoch": 0.07429909146647344, "grad_norm": 0.546424150466919, "learning_rate": 9.915633724954299e-05, "loss": 1.8031, "step": 1333 }, { "epoch": 0.07435482971963658, "grad_norm": 0.5596832036972046, "learning_rate": 9.915470765662143e-05, "loss": 1.7918, "step": 1334 }, { "epoch": 0.07441056797279973, "grad_norm": 0.5737068057060242, "learning_rate": 9.915307650479914e-05, "loss": 1.7687, "step": 1335 }, { "epoch": 0.07446630622596288, "grad_norm": 0.5227526426315308, "learning_rate": 9.915144379412784e-05, "loss": 1.6509, "step": 1336 }, { "epoch": 0.07452204447912603, "grad_norm": 0.5172739028930664, "learning_rate": 9.914980952465932e-05, "loss": 1.7922, "step": 1337 }, { "epoch": 0.07457778273228917, "grad_norm": 0.5068166851997375, "learning_rate": 9.91481736964454e-05, "loss": 1.6475, "step": 1338 }, { "epoch": 0.07463352098545231, "grad_norm": 0.5804305076599121, "learning_rate": 9.914653630953797e-05, "loss": 1.9451, "step": 1339 }, { "epoch": 0.07468925923861547, "grad_norm": 0.5118273496627808, "learning_rate": 9.914489736398895e-05, "loss": 1.6014, "step": 1340 }, { "epoch": 0.07474499749177861, "grad_norm": 0.47122183442115784, "learning_rate": 9.914325685985033e-05, "loss": 1.7206, "step": 1341 }, { "epoch": 0.07480073574494175, "grad_norm": 0.5404577851295471, "learning_rate": 9.914161479717413e-05, "loss": 1.984, "step": 1342 }, { "epoch": 0.0748564739981049, "grad_norm": 0.5037184953689575, "learning_rate": 9.91399711760124e-05, "loss": 1.8535, "step": 1343 }, { "epoch": 0.07491221225126804, "grad_norm": 0.5099769830703735, "learning_rate": 9.91383259964173e-05, "loss": 1.7632, "step": 1344 }, { "epoch": 0.0749679505044312, "grad_norm": 0.5458886623382568, "learning_rate": 9.9136679258441e-05, "loss": 2.0607, "step": 1345 }, { "epoch": 0.07502368875759434, "grad_norm": 0.4648517668247223, "learning_rate": 9.913503096213572e-05, "loss": 1.914, "step": 1346 }, { "epoch": 0.07507942701075748, "grad_norm": 0.5120497941970825, "learning_rate": 9.913338110755375e-05, "loss": 1.8349, "step": 1347 }, { "epoch": 0.07513516526392063, "grad_norm": 0.4551779329776764, "learning_rate": 9.913172969474737e-05, "loss": 1.5673, "step": 1348 }, { "epoch": 0.07519090351708377, "grad_norm": 0.5728102326393127, "learning_rate": 9.913007672376899e-05, "loss": 2.1014, "step": 1349 }, { "epoch": 0.07524664177024692, "grad_norm": 0.47414430975914, "learning_rate": 9.912842219467105e-05, "loss": 1.6999, "step": 1350 }, { "epoch": 0.07530238002341007, "grad_norm": 0.5111278891563416, "learning_rate": 9.912676610750598e-05, "loss": 1.9367, "step": 1351 }, { "epoch": 0.07535811827657321, "grad_norm": 0.5118902325630188, "learning_rate": 9.91251084623263e-05, "loss": 1.8136, "step": 1352 }, { "epoch": 0.07541385652973635, "grad_norm": 0.5514450669288635, "learning_rate": 9.912344925918462e-05, "loss": 1.7309, "step": 1353 }, { "epoch": 0.07546959478289951, "grad_norm": 0.4836481511592865, "learning_rate": 9.912178849813353e-05, "loss": 1.2918, "step": 1354 }, { "epoch": 0.07552533303606265, "grad_norm": 0.5168613791465759, "learning_rate": 9.91201261792257e-05, "loss": 1.8673, "step": 1355 }, { "epoch": 0.0755810712892258, "grad_norm": 0.48082637786865234, "learning_rate": 9.911846230251388e-05, "loss": 1.6275, "step": 1356 }, { "epoch": 0.07563680954238894, "grad_norm": 0.504571259021759, "learning_rate": 9.91167968680508e-05, "loss": 1.7718, "step": 1357 }, { "epoch": 0.07569254779555208, "grad_norm": 0.499100923538208, "learning_rate": 9.911512987588932e-05, "loss": 1.7842, "step": 1358 }, { "epoch": 0.07574828604871524, "grad_norm": 0.4926021993160248, "learning_rate": 9.911346132608225e-05, "loss": 1.5556, "step": 1359 }, { "epoch": 0.07580402430187838, "grad_norm": 0.5981921553611755, "learning_rate": 9.911179121868255e-05, "loss": 1.853, "step": 1360 }, { "epoch": 0.07585976255504152, "grad_norm": 0.4938274621963501, "learning_rate": 9.911011955374316e-05, "loss": 1.646, "step": 1361 }, { "epoch": 0.07591550080820467, "grad_norm": 0.4952639937400818, "learning_rate": 9.910844633131713e-05, "loss": 1.6188, "step": 1362 }, { "epoch": 0.07597123906136782, "grad_norm": 0.5024005770683289, "learning_rate": 9.91067715514575e-05, "loss": 1.9164, "step": 1363 }, { "epoch": 0.07602697731453097, "grad_norm": 0.5488448143005371, "learning_rate": 9.910509521421738e-05, "loss": 1.9139, "step": 1364 }, { "epoch": 0.07608271556769411, "grad_norm": 0.5247362852096558, "learning_rate": 9.910341731964996e-05, "loss": 1.8488, "step": 1365 }, { "epoch": 0.07613845382085725, "grad_norm": 0.5229883193969727, "learning_rate": 9.910173786780842e-05, "loss": 1.8503, "step": 1366 }, { "epoch": 0.0761941920740204, "grad_norm": 0.49642667174339294, "learning_rate": 9.910005685874603e-05, "loss": 1.7051, "step": 1367 }, { "epoch": 0.07624993032718355, "grad_norm": 0.48131421208381653, "learning_rate": 9.909837429251614e-05, "loss": 1.4925, "step": 1368 }, { "epoch": 0.0763056685803467, "grad_norm": 0.4743631184101105, "learning_rate": 9.909669016917204e-05, "loss": 1.5833, "step": 1369 }, { "epoch": 0.07636140683350984, "grad_norm": 0.5918928980827332, "learning_rate": 9.909500448876721e-05, "loss": 2.1295, "step": 1370 }, { "epoch": 0.07641714508667298, "grad_norm": 0.5590381622314453, "learning_rate": 9.909331725135509e-05, "loss": 1.862, "step": 1371 }, { "epoch": 0.07647288333983612, "grad_norm": 0.5015060305595398, "learning_rate": 9.909162845698916e-05, "loss": 1.7541, "step": 1372 }, { "epoch": 0.07652862159299928, "grad_norm": 0.5213440656661987, "learning_rate": 9.9089938105723e-05, "loss": 1.7944, "step": 1373 }, { "epoch": 0.07658435984616242, "grad_norm": 0.5424663424491882, "learning_rate": 9.908824619761023e-05, "loss": 1.8207, "step": 1374 }, { "epoch": 0.07664009809932557, "grad_norm": 0.548622727394104, "learning_rate": 9.908655273270449e-05, "loss": 1.8224, "step": 1375 }, { "epoch": 0.07669583635248871, "grad_norm": 0.5018399953842163, "learning_rate": 9.908485771105949e-05, "loss": 1.856, "step": 1376 }, { "epoch": 0.07675157460565186, "grad_norm": 0.5578395128250122, "learning_rate": 9.908316113272897e-05, "loss": 1.7791, "step": 1377 }, { "epoch": 0.07680731285881501, "grad_norm": 0.5207507610321045, "learning_rate": 9.908146299776678e-05, "loss": 1.7608, "step": 1378 }, { "epoch": 0.07686305111197815, "grad_norm": 0.5391795039176941, "learning_rate": 9.907976330622674e-05, "loss": 1.772, "step": 1379 }, { "epoch": 0.0769187893651413, "grad_norm": 0.47418221831321716, "learning_rate": 9.907806205816277e-05, "loss": 1.2319, "step": 1380 }, { "epoch": 0.07697452761830444, "grad_norm": 0.49630096554756165, "learning_rate": 9.90763592536288e-05, "loss": 1.676, "step": 1381 }, { "epoch": 0.0770302658714676, "grad_norm": 0.533801257610321, "learning_rate": 9.907465489267886e-05, "loss": 1.7612, "step": 1382 }, { "epoch": 0.07708600412463074, "grad_norm": 0.5061699748039246, "learning_rate": 9.907294897536699e-05, "loss": 1.8883, "step": 1383 }, { "epoch": 0.07714174237779388, "grad_norm": 0.5732898116111755, "learning_rate": 9.90712415017473e-05, "loss": 1.8195, "step": 1384 }, { "epoch": 0.07719748063095702, "grad_norm": 0.5062339901924133, "learning_rate": 9.906953247187392e-05, "loss": 1.765, "step": 1385 }, { "epoch": 0.07725321888412018, "grad_norm": 0.4672509729862213, "learning_rate": 9.906782188580107e-05, "loss": 1.5199, "step": 1386 }, { "epoch": 0.07730895713728332, "grad_norm": 0.5902494788169861, "learning_rate": 9.9066109743583e-05, "loss": 2.1369, "step": 1387 }, { "epoch": 0.07736469539044646, "grad_norm": 0.4874188005924225, "learning_rate": 9.9064396045274e-05, "loss": 1.6941, "step": 1388 }, { "epoch": 0.0774204336436096, "grad_norm": 0.5620763301849365, "learning_rate": 9.906268079092843e-05, "loss": 1.7395, "step": 1389 }, { "epoch": 0.07747617189677275, "grad_norm": 0.5454680919647217, "learning_rate": 9.906096398060067e-05, "loss": 1.7771, "step": 1390 }, { "epoch": 0.0775319101499359, "grad_norm": 0.5270059704780579, "learning_rate": 9.905924561434519e-05, "loss": 1.8375, "step": 1391 }, { "epoch": 0.07758764840309905, "grad_norm": 0.4714577794075012, "learning_rate": 9.905752569221647e-05, "loss": 1.4259, "step": 1392 }, { "epoch": 0.07764338665626219, "grad_norm": 0.4905398190021515, "learning_rate": 9.905580421426905e-05, "loss": 1.7302, "step": 1393 }, { "epoch": 0.07769912490942534, "grad_norm": 0.5166676640510559, "learning_rate": 9.905408118055755e-05, "loss": 1.665, "step": 1394 }, { "epoch": 0.07775486316258848, "grad_norm": 0.5545955896377563, "learning_rate": 9.905235659113658e-05, "loss": 1.7589, "step": 1395 }, { "epoch": 0.07781060141575163, "grad_norm": 0.5974867343902588, "learning_rate": 9.905063044606088e-05, "loss": 1.9677, "step": 1396 }, { "epoch": 0.07786633966891478, "grad_norm": 0.538375198841095, "learning_rate": 9.904890274538516e-05, "loss": 1.6438, "step": 1397 }, { "epoch": 0.07792207792207792, "grad_norm": 0.5226508378982544, "learning_rate": 9.904717348916421e-05, "loss": 1.8672, "step": 1398 }, { "epoch": 0.07797781617524106, "grad_norm": 0.5076341032981873, "learning_rate": 9.904544267745288e-05, "loss": 1.6942, "step": 1399 }, { "epoch": 0.07803355442840422, "grad_norm": 0.5587323307991028, "learning_rate": 9.904371031030608e-05, "loss": 2.0127, "step": 1400 }, { "epoch": 0.07808929268156736, "grad_norm": 0.5744814276695251, "learning_rate": 9.904197638777872e-05, "loss": 1.6781, "step": 1401 }, { "epoch": 0.0781450309347305, "grad_norm": 0.4966742992401123, "learning_rate": 9.904024090992581e-05, "loss": 1.7314, "step": 1402 }, { "epoch": 0.07820076918789365, "grad_norm": 0.5050981640815735, "learning_rate": 9.903850387680238e-05, "loss": 1.8782, "step": 1403 }, { "epoch": 0.07825650744105679, "grad_norm": 0.518583357334137, "learning_rate": 9.903676528846352e-05, "loss": 1.9028, "step": 1404 }, { "epoch": 0.07831224569421995, "grad_norm": 0.5047330856323242, "learning_rate": 9.903502514496436e-05, "loss": 1.6501, "step": 1405 }, { "epoch": 0.07836798394738309, "grad_norm": 0.5036478042602539, "learning_rate": 9.903328344636012e-05, "loss": 1.7873, "step": 1406 }, { "epoch": 0.07842372220054623, "grad_norm": 0.49196913838386536, "learning_rate": 9.903154019270599e-05, "loss": 1.6404, "step": 1407 }, { "epoch": 0.07847946045370938, "grad_norm": 0.5227888226509094, "learning_rate": 9.90297953840573e-05, "loss": 1.8049, "step": 1408 }, { "epoch": 0.07853519870687253, "grad_norm": 0.5419712662696838, "learning_rate": 9.902804902046935e-05, "loss": 1.8979, "step": 1409 }, { "epoch": 0.07859093696003568, "grad_norm": 0.5512637495994568, "learning_rate": 9.902630110199753e-05, "loss": 1.5322, "step": 1410 }, { "epoch": 0.07864667521319882, "grad_norm": 0.5147241353988647, "learning_rate": 9.90245516286973e-05, "loss": 1.8126, "step": 1411 }, { "epoch": 0.07870241346636196, "grad_norm": 0.5257126092910767, "learning_rate": 9.902280060062413e-05, "loss": 1.9197, "step": 1412 }, { "epoch": 0.0787581517195251, "grad_norm": 0.5739386677742004, "learning_rate": 9.902104801783352e-05, "loss": 2.0767, "step": 1413 }, { "epoch": 0.07881388997268826, "grad_norm": 0.47901228070259094, "learning_rate": 9.90192938803811e-05, "loss": 1.4594, "step": 1414 }, { "epoch": 0.0788696282258514, "grad_norm": 0.4943484663963318, "learning_rate": 9.901753818832248e-05, "loss": 1.6394, "step": 1415 }, { "epoch": 0.07892536647901455, "grad_norm": 0.5033669471740723, "learning_rate": 9.901578094171333e-05, "loss": 1.6963, "step": 1416 }, { "epoch": 0.07898110473217769, "grad_norm": 0.5039759874343872, "learning_rate": 9.90140221406094e-05, "loss": 1.5721, "step": 1417 }, { "epoch": 0.07903684298534083, "grad_norm": 0.49595627188682556, "learning_rate": 9.901226178506646e-05, "loss": 1.7414, "step": 1418 }, { "epoch": 0.07909258123850399, "grad_norm": 0.5233118534088135, "learning_rate": 9.901049987514033e-05, "loss": 1.7728, "step": 1419 }, { "epoch": 0.07914831949166713, "grad_norm": 0.5164638757705688, "learning_rate": 9.90087364108869e-05, "loss": 1.8569, "step": 1420 }, { "epoch": 0.07920405774483028, "grad_norm": 0.5309315323829651, "learning_rate": 9.900697139236209e-05, "loss": 1.7734, "step": 1421 }, { "epoch": 0.07925979599799342, "grad_norm": 0.4936157464981079, "learning_rate": 9.900520481962188e-05, "loss": 1.6859, "step": 1422 }, { "epoch": 0.07931553425115657, "grad_norm": 0.4760551452636719, "learning_rate": 9.90034366927223e-05, "loss": 1.7148, "step": 1423 }, { "epoch": 0.07937127250431972, "grad_norm": 0.5099088549613953, "learning_rate": 9.90016670117194e-05, "loss": 1.7605, "step": 1424 }, { "epoch": 0.07942701075748286, "grad_norm": 0.512695848941803, "learning_rate": 9.899989577666933e-05, "loss": 1.7824, "step": 1425 }, { "epoch": 0.079482749010646, "grad_norm": 0.5051438212394714, "learning_rate": 9.899812298762826e-05, "loss": 1.8003, "step": 1426 }, { "epoch": 0.07953848726380915, "grad_norm": 0.5289508700370789, "learning_rate": 9.899634864465241e-05, "loss": 1.7588, "step": 1427 }, { "epoch": 0.0795942255169723, "grad_norm": 0.4910021424293518, "learning_rate": 9.899457274779804e-05, "loss": 1.7284, "step": 1428 }, { "epoch": 0.07964996377013545, "grad_norm": 0.6068856716156006, "learning_rate": 9.899279529712148e-05, "loss": 1.9947, "step": 1429 }, { "epoch": 0.07970570202329859, "grad_norm": 0.5239669680595398, "learning_rate": 9.899101629267911e-05, "loss": 1.5956, "step": 1430 }, { "epoch": 0.07976144027646173, "grad_norm": 0.5577272176742554, "learning_rate": 9.898923573452734e-05, "loss": 2.0396, "step": 1431 }, { "epoch": 0.07981717852962489, "grad_norm": 0.4893241822719574, "learning_rate": 9.898745362272264e-05, "loss": 1.5054, "step": 1432 }, { "epoch": 0.07987291678278803, "grad_norm": 0.48603859543800354, "learning_rate": 9.898566995732153e-05, "loss": 1.6304, "step": 1433 }, { "epoch": 0.07992865503595117, "grad_norm": 0.5560683012008667, "learning_rate": 9.898388473838056e-05, "loss": 1.8177, "step": 1434 }, { "epoch": 0.07998439328911432, "grad_norm": 0.5030083060264587, "learning_rate": 9.898209796595636e-05, "loss": 1.7325, "step": 1435 }, { "epoch": 0.08004013154227746, "grad_norm": 0.48422524333000183, "learning_rate": 9.898030964010562e-05, "loss": 1.5905, "step": 1436 }, { "epoch": 0.08009586979544062, "grad_norm": 0.5284083485603333, "learning_rate": 9.897851976088501e-05, "loss": 1.672, "step": 1437 }, { "epoch": 0.08015160804860376, "grad_norm": 0.5937215685844421, "learning_rate": 9.897672832835135e-05, "loss": 1.9549, "step": 1438 }, { "epoch": 0.0802073463017669, "grad_norm": 0.4896755516529083, "learning_rate": 9.89749353425614e-05, "loss": 1.7438, "step": 1439 }, { "epoch": 0.08026308455493004, "grad_norm": 0.5281119346618652, "learning_rate": 9.897314080357202e-05, "loss": 1.6437, "step": 1440 }, { "epoch": 0.08031882280809319, "grad_norm": 0.5150919556617737, "learning_rate": 9.897134471144019e-05, "loss": 1.742, "step": 1441 }, { "epoch": 0.08037456106125634, "grad_norm": 0.5028387308120728, "learning_rate": 9.896954706622281e-05, "loss": 1.5031, "step": 1442 }, { "epoch": 0.08043029931441949, "grad_norm": 0.5158771276473999, "learning_rate": 9.896774786797691e-05, "loss": 1.533, "step": 1443 }, { "epoch": 0.08048603756758263, "grad_norm": 0.5377411842346191, "learning_rate": 9.896594711675954e-05, "loss": 2.0242, "step": 1444 }, { "epoch": 0.08054177582074577, "grad_norm": 0.4912663698196411, "learning_rate": 9.896414481262784e-05, "loss": 1.815, "step": 1445 }, { "epoch": 0.08059751407390893, "grad_norm": 0.47936177253723145, "learning_rate": 9.896234095563893e-05, "loss": 1.5458, "step": 1446 }, { "epoch": 0.08065325232707207, "grad_norm": 0.5695403218269348, "learning_rate": 9.896053554585006e-05, "loss": 2.1062, "step": 1447 }, { "epoch": 0.08070899058023522, "grad_norm": 0.5067823529243469, "learning_rate": 9.895872858331843e-05, "loss": 1.7228, "step": 1448 }, { "epoch": 0.08076472883339836, "grad_norm": 0.5249797105789185, "learning_rate": 9.89569200681014e-05, "loss": 1.8915, "step": 1449 }, { "epoch": 0.0808204670865615, "grad_norm": 0.5042678713798523, "learning_rate": 9.895511000025629e-05, "loss": 1.857, "step": 1450 }, { "epoch": 0.08087620533972466, "grad_norm": 0.5119437575340271, "learning_rate": 9.895329837984053e-05, "loss": 1.7033, "step": 1451 }, { "epoch": 0.0809319435928878, "grad_norm": 0.5357143878936768, "learning_rate": 9.895148520691155e-05, "loss": 1.9076, "step": 1452 }, { "epoch": 0.08098768184605094, "grad_norm": 0.47728776931762695, "learning_rate": 9.894967048152688e-05, "loss": 1.4164, "step": 1453 }, { "epoch": 0.08104342009921409, "grad_norm": 0.5269622206687927, "learning_rate": 9.894785420374405e-05, "loss": 1.9833, "step": 1454 }, { "epoch": 0.08109915835237724, "grad_norm": 0.5312412977218628, "learning_rate": 9.894603637362068e-05, "loss": 1.8342, "step": 1455 }, { "epoch": 0.08115489660554039, "grad_norm": 0.5786725282669067, "learning_rate": 9.894421699121439e-05, "loss": 2.1415, "step": 1456 }, { "epoch": 0.08121063485870353, "grad_norm": 0.4990336000919342, "learning_rate": 9.894239605658292e-05, "loss": 1.8387, "step": 1457 }, { "epoch": 0.08126637311186667, "grad_norm": 0.5438005924224854, "learning_rate": 9.8940573569784e-05, "loss": 1.9307, "step": 1458 }, { "epoch": 0.08132211136502981, "grad_norm": 0.5444794297218323, "learning_rate": 9.893874953087543e-05, "loss": 1.7991, "step": 1459 }, { "epoch": 0.08137784961819297, "grad_norm": 0.5221540331840515, "learning_rate": 9.893692393991504e-05, "loss": 1.7898, "step": 1460 }, { "epoch": 0.08143358787135611, "grad_norm": 0.509023129940033, "learning_rate": 9.893509679696077e-05, "loss": 1.8955, "step": 1461 }, { "epoch": 0.08148932612451926, "grad_norm": 0.5018633008003235, "learning_rate": 9.893326810207053e-05, "loss": 1.6774, "step": 1462 }, { "epoch": 0.0815450643776824, "grad_norm": 0.5234403610229492, "learning_rate": 9.893143785530233e-05, "loss": 1.5989, "step": 1463 }, { "epoch": 0.08160080263084554, "grad_norm": 0.5122543573379517, "learning_rate": 9.892960605671421e-05, "loss": 1.6129, "step": 1464 }, { "epoch": 0.0816565408840087, "grad_norm": 0.5005357265472412, "learning_rate": 9.892777270636426e-05, "loss": 1.7568, "step": 1465 }, { "epoch": 0.08171227913717184, "grad_norm": 0.4521070420742035, "learning_rate": 9.892593780431063e-05, "loss": 1.5785, "step": 1466 }, { "epoch": 0.08176801739033498, "grad_norm": 0.5116862058639526, "learning_rate": 9.892410135061151e-05, "loss": 1.6021, "step": 1467 }, { "epoch": 0.08182375564349813, "grad_norm": 0.5345929861068726, "learning_rate": 9.892226334532515e-05, "loss": 1.7185, "step": 1468 }, { "epoch": 0.08187949389666128, "grad_norm": 0.5190909504890442, "learning_rate": 9.892042378850983e-05, "loss": 1.7729, "step": 1469 }, { "epoch": 0.08193523214982443, "grad_norm": 0.5051796436309814, "learning_rate": 9.89185826802239e-05, "loss": 1.7497, "step": 1470 }, { "epoch": 0.08199097040298757, "grad_norm": 0.49057456851005554, "learning_rate": 9.891674002052572e-05, "loss": 1.7032, "step": 1471 }, { "epoch": 0.08204670865615071, "grad_norm": 0.48970887064933777, "learning_rate": 9.891489580947377e-05, "loss": 1.697, "step": 1472 }, { "epoch": 0.08210244690931386, "grad_norm": 0.466226726770401, "learning_rate": 9.891305004712652e-05, "loss": 1.676, "step": 1473 }, { "epoch": 0.08215818516247701, "grad_norm": 0.5120090246200562, "learning_rate": 9.891120273354248e-05, "loss": 1.7862, "step": 1474 }, { "epoch": 0.08221392341564016, "grad_norm": 0.5071076154708862, "learning_rate": 9.890935386878029e-05, "loss": 1.7835, "step": 1475 }, { "epoch": 0.0822696616688033, "grad_norm": 0.5432698726654053, "learning_rate": 9.890750345289855e-05, "loss": 1.9147, "step": 1476 }, { "epoch": 0.08232539992196644, "grad_norm": 0.5131239295005798, "learning_rate": 9.890565148595594e-05, "loss": 1.9944, "step": 1477 }, { "epoch": 0.0823811381751296, "grad_norm": 0.49580785632133484, "learning_rate": 9.890379796801122e-05, "loss": 1.7003, "step": 1478 }, { "epoch": 0.08243687642829274, "grad_norm": 0.5251078605651855, "learning_rate": 9.890194289912315e-05, "loss": 1.5901, "step": 1479 }, { "epoch": 0.08249261468145588, "grad_norm": 0.4522892236709595, "learning_rate": 9.890008627935057e-05, "loss": 1.4628, "step": 1480 }, { "epoch": 0.08254835293461903, "grad_norm": 0.49866771697998047, "learning_rate": 9.889822810875236e-05, "loss": 1.797, "step": 1481 }, { "epoch": 0.08260409118778217, "grad_norm": 0.5042446851730347, "learning_rate": 9.889636838738745e-05, "loss": 1.7715, "step": 1482 }, { "epoch": 0.08265982944094533, "grad_norm": 0.5398827791213989, "learning_rate": 9.889450711531482e-05, "loss": 1.7935, "step": 1483 }, { "epoch": 0.08271556769410847, "grad_norm": 0.5085358023643494, "learning_rate": 9.889264429259351e-05, "loss": 1.7009, "step": 1484 }, { "epoch": 0.08277130594727161, "grad_norm": 0.5344458222389221, "learning_rate": 9.889077991928257e-05, "loss": 1.9159, "step": 1485 }, { "epoch": 0.08282704420043475, "grad_norm": 0.5375879406929016, "learning_rate": 9.888891399544116e-05, "loss": 1.8089, "step": 1486 }, { "epoch": 0.0828827824535979, "grad_norm": 0.5068013668060303, "learning_rate": 9.888704652112841e-05, "loss": 1.81, "step": 1487 }, { "epoch": 0.08293852070676105, "grad_norm": 0.5293126106262207, "learning_rate": 9.88851774964036e-05, "loss": 1.8359, "step": 1488 }, { "epoch": 0.0829942589599242, "grad_norm": 0.538372814655304, "learning_rate": 9.8883306921326e-05, "loss": 1.7542, "step": 1489 }, { "epoch": 0.08304999721308734, "grad_norm": 0.5009732246398926, "learning_rate": 9.888143479595487e-05, "loss": 1.761, "step": 1490 }, { "epoch": 0.08310573546625048, "grad_norm": 0.5073357820510864, "learning_rate": 9.887956112034965e-05, "loss": 1.961, "step": 1491 }, { "epoch": 0.08316147371941364, "grad_norm": 0.5246378779411316, "learning_rate": 9.887768589456973e-05, "loss": 1.6075, "step": 1492 }, { "epoch": 0.08321721197257678, "grad_norm": 0.5965234637260437, "learning_rate": 9.88758091186746e-05, "loss": 1.7721, "step": 1493 }, { "epoch": 0.08327295022573993, "grad_norm": 0.580460250377655, "learning_rate": 9.887393079272378e-05, "loss": 2.0317, "step": 1494 }, { "epoch": 0.08332868847890307, "grad_norm": 0.47487667202949524, "learning_rate": 9.88720509167768e-05, "loss": 1.614, "step": 1495 }, { "epoch": 0.08338442673206621, "grad_norm": 0.511886715888977, "learning_rate": 9.887016949089333e-05, "loss": 1.7988, "step": 1496 }, { "epoch": 0.08344016498522937, "grad_norm": 0.5386150479316711, "learning_rate": 9.886828651513302e-05, "loss": 1.6694, "step": 1497 }, { "epoch": 0.08349590323839251, "grad_norm": 0.5117900967597961, "learning_rate": 9.886640198955557e-05, "loss": 1.9023, "step": 1498 }, { "epoch": 0.08355164149155565, "grad_norm": 0.5726772546768188, "learning_rate": 9.886451591422076e-05, "loss": 1.8974, "step": 1499 }, { "epoch": 0.0836073797447188, "grad_norm": 0.5696210861206055, "learning_rate": 9.886262828918842e-05, "loss": 2.011, "step": 1500 }, { "epoch": 0.08366311799788195, "grad_norm": 0.5422051548957825, "learning_rate": 9.886073911451838e-05, "loss": 1.853, "step": 1501 }, { "epoch": 0.0837188562510451, "grad_norm": 0.5856989622116089, "learning_rate": 9.88588483902706e-05, "loss": 2.0279, "step": 1502 }, { "epoch": 0.08377459450420824, "grad_norm": 0.49369946122169495, "learning_rate": 9.8856956116505e-05, "loss": 1.9006, "step": 1503 }, { "epoch": 0.08383033275737138, "grad_norm": 0.5601094961166382, "learning_rate": 9.88550622932816e-05, "loss": 1.8549, "step": 1504 }, { "epoch": 0.08388607101053452, "grad_norm": 0.5482882857322693, "learning_rate": 9.885316692066048e-05, "loss": 1.6991, "step": 1505 }, { "epoch": 0.08394180926369768, "grad_norm": 0.5111584663391113, "learning_rate": 9.885126999870173e-05, "loss": 1.7942, "step": 1506 }, { "epoch": 0.08399754751686082, "grad_norm": 0.5061234831809998, "learning_rate": 9.884937152746553e-05, "loss": 1.7333, "step": 1507 }, { "epoch": 0.08405328577002397, "grad_norm": 0.5409541726112366, "learning_rate": 9.884747150701207e-05, "loss": 1.8288, "step": 1508 }, { "epoch": 0.08410902402318711, "grad_norm": 0.5025638341903687, "learning_rate": 9.884556993740161e-05, "loss": 1.7986, "step": 1509 }, { "epoch": 0.08416476227635025, "grad_norm": 0.544328510761261, "learning_rate": 9.884366681869447e-05, "loss": 1.9335, "step": 1510 }, { "epoch": 0.08422050052951341, "grad_norm": 0.5425384640693665, "learning_rate": 9.8841762150951e-05, "loss": 1.952, "step": 1511 }, { "epoch": 0.08427623878267655, "grad_norm": 0.546819269657135, "learning_rate": 9.883985593423158e-05, "loss": 1.6983, "step": 1512 }, { "epoch": 0.0843319770358397, "grad_norm": 0.5102137327194214, "learning_rate": 9.88379481685967e-05, "loss": 1.9128, "step": 1513 }, { "epoch": 0.08438771528900284, "grad_norm": 0.5642107725143433, "learning_rate": 9.883603885410686e-05, "loss": 1.8798, "step": 1514 }, { "epoch": 0.084443453542166, "grad_norm": 0.5285095572471619, "learning_rate": 9.88341279908226e-05, "loss": 1.987, "step": 1515 }, { "epoch": 0.08449919179532914, "grad_norm": 0.5712692737579346, "learning_rate": 9.88322155788045e-05, "loss": 1.9272, "step": 1516 }, { "epoch": 0.08455493004849228, "grad_norm": 0.5068216919898987, "learning_rate": 9.883030161811324e-05, "loss": 1.747, "step": 1517 }, { "epoch": 0.08461066830165542, "grad_norm": 0.5292205810546875, "learning_rate": 9.882838610880954e-05, "loss": 1.7361, "step": 1518 }, { "epoch": 0.08466640655481857, "grad_norm": 0.5131486654281616, "learning_rate": 9.88264690509541e-05, "loss": 1.7197, "step": 1519 }, { "epoch": 0.08472214480798172, "grad_norm": 0.5345507860183716, "learning_rate": 9.882455044460773e-05, "loss": 1.6553, "step": 1520 }, { "epoch": 0.08477788306114487, "grad_norm": 0.5729446411132812, "learning_rate": 9.88226302898313e-05, "loss": 1.9354, "step": 1521 }, { "epoch": 0.08483362131430801, "grad_norm": 0.5425586700439453, "learning_rate": 9.882070858668568e-05, "loss": 1.7173, "step": 1522 }, { "epoch": 0.08488935956747115, "grad_norm": 0.5828628540039062, "learning_rate": 9.881878533523185e-05, "loss": 1.5161, "step": 1523 }, { "epoch": 0.08494509782063431, "grad_norm": 0.4496408998966217, "learning_rate": 9.881686053553077e-05, "loss": 1.4486, "step": 1524 }, { "epoch": 0.08500083607379745, "grad_norm": 0.5365184545516968, "learning_rate": 9.88149341876435e-05, "loss": 1.7378, "step": 1525 }, { "epoch": 0.0850565743269606, "grad_norm": 0.5183097720146179, "learning_rate": 9.881300629163113e-05, "loss": 1.7466, "step": 1526 }, { "epoch": 0.08511231258012374, "grad_norm": 0.5500345826148987, "learning_rate": 9.88110768475548e-05, "loss": 2.05, "step": 1527 }, { "epoch": 0.08516805083328688, "grad_norm": 0.5311182141304016, "learning_rate": 9.88091458554757e-05, "loss": 1.9213, "step": 1528 }, { "epoch": 0.08522378908645004, "grad_norm": 0.5297403335571289, "learning_rate": 9.880721331545507e-05, "loss": 1.7725, "step": 1529 }, { "epoch": 0.08527952733961318, "grad_norm": 0.4777231514453888, "learning_rate": 9.880527922755418e-05, "loss": 1.7671, "step": 1530 }, { "epoch": 0.08533526559277632, "grad_norm": 0.5027580261230469, "learning_rate": 9.880334359183441e-05, "loss": 1.5094, "step": 1531 }, { "epoch": 0.08539100384593946, "grad_norm": 0.5496742725372314, "learning_rate": 9.880140640835711e-05, "loss": 1.8291, "step": 1532 }, { "epoch": 0.08544674209910261, "grad_norm": 0.5041139721870422, "learning_rate": 9.879946767718374e-05, "loss": 1.6669, "step": 1533 }, { "epoch": 0.08550248035226576, "grad_norm": 0.5976061820983887, "learning_rate": 9.879752739837578e-05, "loss": 2.1902, "step": 1534 }, { "epoch": 0.0855582186054289, "grad_norm": 0.5422946810722351, "learning_rate": 9.879558557199475e-05, "loss": 1.5727, "step": 1535 }, { "epoch": 0.08561395685859205, "grad_norm": 0.4999959170818329, "learning_rate": 9.879364219810226e-05, "loss": 1.6102, "step": 1536 }, { "epoch": 0.08566969511175519, "grad_norm": 0.5026562213897705, "learning_rate": 9.879169727675991e-05, "loss": 1.7124, "step": 1537 }, { "epoch": 0.08572543336491835, "grad_norm": 0.5175659656524658, "learning_rate": 9.87897508080294e-05, "loss": 1.7585, "step": 1538 }, { "epoch": 0.08578117161808149, "grad_norm": 0.5337525010108948, "learning_rate": 9.878780279197247e-05, "loss": 1.7857, "step": 1539 }, { "epoch": 0.08583690987124463, "grad_norm": 0.5325166583061218, "learning_rate": 9.878585322865087e-05, "loss": 1.865, "step": 1540 }, { "epoch": 0.08589264812440778, "grad_norm": 0.46590784192085266, "learning_rate": 9.878390211812646e-05, "loss": 1.627, "step": 1541 }, { "epoch": 0.08594838637757092, "grad_norm": 0.4856724441051483, "learning_rate": 9.87819494604611e-05, "loss": 1.7221, "step": 1542 }, { "epoch": 0.08600412463073408, "grad_norm": 0.5396975874900818, "learning_rate": 9.877999525571673e-05, "loss": 1.7696, "step": 1543 }, { "epoch": 0.08605986288389722, "grad_norm": 0.49516481161117554, "learning_rate": 9.87780395039553e-05, "loss": 1.6928, "step": 1544 }, { "epoch": 0.08611560113706036, "grad_norm": 0.5212313532829285, "learning_rate": 9.877608220523886e-05, "loss": 1.8461, "step": 1545 }, { "epoch": 0.0861713393902235, "grad_norm": 0.5174347162246704, "learning_rate": 9.877412335962948e-05, "loss": 1.6598, "step": 1546 }, { "epoch": 0.08622707764338666, "grad_norm": 0.5417358875274658, "learning_rate": 9.877216296718929e-05, "loss": 1.8449, "step": 1547 }, { "epoch": 0.0862828158965498, "grad_norm": 0.6204573512077332, "learning_rate": 9.877020102798044e-05, "loss": 2.0521, "step": 1548 }, { "epoch": 0.08633855414971295, "grad_norm": 0.548689067363739, "learning_rate": 9.876823754206517e-05, "loss": 1.8019, "step": 1549 }, { "epoch": 0.08639429240287609, "grad_norm": 0.5634471774101257, "learning_rate": 9.876627250950573e-05, "loss": 1.9138, "step": 1550 }, { "epoch": 0.08645003065603923, "grad_norm": 0.517440915107727, "learning_rate": 9.876430593036445e-05, "loss": 1.6576, "step": 1551 }, { "epoch": 0.08650576890920239, "grad_norm": 0.5255969762802124, "learning_rate": 9.876233780470373e-05, "loss": 1.9165, "step": 1552 }, { "epoch": 0.08656150716236553, "grad_norm": 0.5497751235961914, "learning_rate": 9.876036813258593e-05, "loss": 1.7924, "step": 1553 }, { "epoch": 0.08661724541552868, "grad_norm": 0.49066075682640076, "learning_rate": 9.875839691407355e-05, "loss": 1.7025, "step": 1554 }, { "epoch": 0.08667298366869182, "grad_norm": 0.5411027669906616, "learning_rate": 9.875642414922913e-05, "loss": 1.7742, "step": 1555 }, { "epoch": 0.08672872192185498, "grad_norm": 0.5388767123222351, "learning_rate": 9.875444983811517e-05, "loss": 1.7676, "step": 1556 }, { "epoch": 0.08678446017501812, "grad_norm": 0.540668249130249, "learning_rate": 9.875247398079434e-05, "loss": 1.7824, "step": 1557 }, { "epoch": 0.08684019842818126, "grad_norm": 0.4785401523113251, "learning_rate": 9.875049657732928e-05, "loss": 1.5643, "step": 1558 }, { "epoch": 0.0868959366813444, "grad_norm": 0.4758340120315552, "learning_rate": 9.87485176277827e-05, "loss": 1.7751, "step": 1559 }, { "epoch": 0.08695167493450755, "grad_norm": 0.5260589122772217, "learning_rate": 9.874653713221736e-05, "loss": 1.6758, "step": 1560 }, { "epoch": 0.0870074131876707, "grad_norm": 0.5716840624809265, "learning_rate": 9.874455509069608e-05, "loss": 1.9237, "step": 1561 }, { "epoch": 0.08706315144083385, "grad_norm": 0.5434233546257019, "learning_rate": 9.874257150328171e-05, "loss": 1.8882, "step": 1562 }, { "epoch": 0.08711888969399699, "grad_norm": 0.562435507774353, "learning_rate": 9.874058637003715e-05, "loss": 2.0451, "step": 1563 }, { "epoch": 0.08717462794716013, "grad_norm": 0.5642979741096497, "learning_rate": 9.87385996910254e-05, "loss": 1.924, "step": 1564 }, { "epoch": 0.08723036620032328, "grad_norm": 0.5052669048309326, "learning_rate": 9.87366114663094e-05, "loss": 1.58, "step": 1565 }, { "epoch": 0.08728610445348643, "grad_norm": 0.5220628380775452, "learning_rate": 9.873462169595225e-05, "loss": 1.7895, "step": 1566 }, { "epoch": 0.08734184270664958, "grad_norm": 0.517431378364563, "learning_rate": 9.873263038001706e-05, "loss": 1.6593, "step": 1567 }, { "epoch": 0.08739758095981272, "grad_norm": 0.5140258073806763, "learning_rate": 9.873063751856693e-05, "loss": 1.8271, "step": 1568 }, { "epoch": 0.08745331921297586, "grad_norm": 0.4922142028808594, "learning_rate": 9.872864311166513e-05, "loss": 1.6083, "step": 1569 }, { "epoch": 0.08750905746613902, "grad_norm": 0.5390502214431763, "learning_rate": 9.872664715937485e-05, "loss": 1.4434, "step": 1570 }, { "epoch": 0.08756479571930216, "grad_norm": 0.5033831596374512, "learning_rate": 9.872464966175943e-05, "loss": 1.7666, "step": 1571 }, { "epoch": 0.0876205339724653, "grad_norm": 0.5968888401985168, "learning_rate": 9.872265061888222e-05, "loss": 2.129, "step": 1572 }, { "epoch": 0.08767627222562845, "grad_norm": 0.4963712990283966, "learning_rate": 9.87206500308066e-05, "loss": 1.757, "step": 1573 }, { "epoch": 0.08773201047879159, "grad_norm": 0.561555802822113, "learning_rate": 9.871864789759602e-05, "loss": 1.8953, "step": 1574 }, { "epoch": 0.08778774873195475, "grad_norm": 0.5095016956329346, "learning_rate": 9.871664421931397e-05, "loss": 1.5125, "step": 1575 }, { "epoch": 0.08784348698511789, "grad_norm": 0.5717408061027527, "learning_rate": 9.8714638996024e-05, "loss": 1.9326, "step": 1576 }, { "epoch": 0.08789922523828103, "grad_norm": 0.5086256861686707, "learning_rate": 9.871263222778972e-05, "loss": 1.4956, "step": 1577 }, { "epoch": 0.08795496349144417, "grad_norm": 0.5559898614883423, "learning_rate": 9.871062391467476e-05, "loss": 2.0481, "step": 1578 }, { "epoch": 0.08801070174460733, "grad_norm": 0.511561930179596, "learning_rate": 9.870861405674281e-05, "loss": 1.6748, "step": 1579 }, { "epoch": 0.08806643999777047, "grad_norm": 0.46475693583488464, "learning_rate": 9.87066026540576e-05, "loss": 1.5146, "step": 1580 }, { "epoch": 0.08812217825093362, "grad_norm": 0.619973361492157, "learning_rate": 9.870458970668295e-05, "loss": 1.9752, "step": 1581 }, { "epoch": 0.08817791650409676, "grad_norm": 0.5257066488265991, "learning_rate": 9.870257521468267e-05, "loss": 1.8943, "step": 1582 }, { "epoch": 0.0882336547572599, "grad_norm": 0.48758870363235474, "learning_rate": 9.870055917812066e-05, "loss": 1.7243, "step": 1583 }, { "epoch": 0.08828939301042306, "grad_norm": 0.500957190990448, "learning_rate": 9.869854159706087e-05, "loss": 1.608, "step": 1584 }, { "epoch": 0.0883451312635862, "grad_norm": 0.5307281613349915, "learning_rate": 9.869652247156726e-05, "loss": 1.8326, "step": 1585 }, { "epoch": 0.08840086951674934, "grad_norm": 0.5321508049964905, "learning_rate": 9.869450180170388e-05, "loss": 1.5715, "step": 1586 }, { "epoch": 0.08845660776991249, "grad_norm": 0.512824296951294, "learning_rate": 9.869247958753483e-05, "loss": 1.9452, "step": 1587 }, { "epoch": 0.08851234602307563, "grad_norm": 0.5297205448150635, "learning_rate": 9.86904558291242e-05, "loss": 1.7894, "step": 1588 }, { "epoch": 0.08856808427623879, "grad_norm": 0.5388361215591431, "learning_rate": 9.86884305265362e-05, "loss": 1.8428, "step": 1589 }, { "epoch": 0.08862382252940193, "grad_norm": 0.5642775297164917, "learning_rate": 9.868640367983507e-05, "loss": 1.9602, "step": 1590 }, { "epoch": 0.08867956078256507, "grad_norm": 0.5613628029823303, "learning_rate": 9.868437528908507e-05, "loss": 1.8967, "step": 1591 }, { "epoch": 0.08873529903572822, "grad_norm": 0.4843713641166687, "learning_rate": 9.868234535435052e-05, "loss": 1.5939, "step": 1592 }, { "epoch": 0.08879103728889137, "grad_norm": 0.5549110770225525, "learning_rate": 9.868031387569583e-05, "loss": 1.7461, "step": 1593 }, { "epoch": 0.08884677554205452, "grad_norm": 0.5344760417938232, "learning_rate": 9.867828085318541e-05, "loss": 1.7843, "step": 1594 }, { "epoch": 0.08890251379521766, "grad_norm": 0.49532350897789, "learning_rate": 9.867624628688374e-05, "loss": 1.981, "step": 1595 }, { "epoch": 0.0889582520483808, "grad_norm": 0.48208191990852356, "learning_rate": 9.867421017685531e-05, "loss": 1.3437, "step": 1596 }, { "epoch": 0.08901399030154394, "grad_norm": 0.489444762468338, "learning_rate": 9.867217252316476e-05, "loss": 1.6426, "step": 1597 }, { "epoch": 0.0890697285547071, "grad_norm": 0.5148588418960571, "learning_rate": 9.867013332587667e-05, "loss": 1.5808, "step": 1598 }, { "epoch": 0.08912546680787024, "grad_norm": 0.5365609526634216, "learning_rate": 9.86680925850557e-05, "loss": 1.8197, "step": 1599 }, { "epoch": 0.08918120506103339, "grad_norm": 0.48567450046539307, "learning_rate": 9.86660503007666e-05, "loss": 1.6238, "step": 1600 }, { "epoch": 0.08923694331419653, "grad_norm": 0.515129029750824, "learning_rate": 9.866400647307413e-05, "loss": 1.8063, "step": 1601 }, { "epoch": 0.08929268156735969, "grad_norm": 0.5591225028038025, "learning_rate": 9.86619611020431e-05, "loss": 1.8849, "step": 1602 }, { "epoch": 0.08934841982052283, "grad_norm": 0.4950789213180542, "learning_rate": 9.865991418773837e-05, "loss": 1.5961, "step": 1603 }, { "epoch": 0.08940415807368597, "grad_norm": 0.5623775124549866, "learning_rate": 9.865786573022488e-05, "loss": 1.782, "step": 1604 }, { "epoch": 0.08945989632684911, "grad_norm": 0.5508179664611816, "learning_rate": 9.865581572956759e-05, "loss": 1.9102, "step": 1605 }, { "epoch": 0.08951563458001226, "grad_norm": 0.5296784043312073, "learning_rate": 9.86537641858315e-05, "loss": 1.8494, "step": 1606 }, { "epoch": 0.08957137283317541, "grad_norm": 0.5068146586418152, "learning_rate": 9.865171109908169e-05, "loss": 1.7515, "step": 1607 }, { "epoch": 0.08962711108633856, "grad_norm": 0.5015462636947632, "learning_rate": 9.864965646938326e-05, "loss": 1.6874, "step": 1608 }, { "epoch": 0.0896828493395017, "grad_norm": 0.5293746590614319, "learning_rate": 9.864760029680137e-05, "loss": 1.7417, "step": 1609 }, { "epoch": 0.08973858759266484, "grad_norm": 0.5211681127548218, "learning_rate": 9.864554258140124e-05, "loss": 1.7553, "step": 1610 }, { "epoch": 0.08979432584582799, "grad_norm": 0.7411361336708069, "learning_rate": 9.864348332324811e-05, "loss": 1.7663, "step": 1611 }, { "epoch": 0.08985006409899114, "grad_norm": 0.4988972842693329, "learning_rate": 9.864142252240731e-05, "loss": 1.6, "step": 1612 }, { "epoch": 0.08990580235215428, "grad_norm": 0.5340063571929932, "learning_rate": 9.863936017894418e-05, "loss": 1.8076, "step": 1613 }, { "epoch": 0.08996154060531743, "grad_norm": 0.5994722247123718, "learning_rate": 9.863729629292414e-05, "loss": 1.7864, "step": 1614 }, { "epoch": 0.09001727885848057, "grad_norm": 0.541131854057312, "learning_rate": 9.863523086441264e-05, "loss": 1.931, "step": 1615 }, { "epoch": 0.09007301711164373, "grad_norm": 0.5259929299354553, "learning_rate": 9.863316389347517e-05, "loss": 1.7562, "step": 1616 }, { "epoch": 0.09012875536480687, "grad_norm": 0.5242890119552612, "learning_rate": 9.863109538017729e-05, "loss": 1.6973, "step": 1617 }, { "epoch": 0.09018449361797001, "grad_norm": 0.5834923386573792, "learning_rate": 9.862902532458461e-05, "loss": 2.0494, "step": 1618 }, { "epoch": 0.09024023187113316, "grad_norm": 0.4912288188934326, "learning_rate": 9.862695372676278e-05, "loss": 1.6505, "step": 1619 }, { "epoch": 0.0902959701242963, "grad_norm": 0.5288010239601135, "learning_rate": 9.862488058677748e-05, "loss": 1.734, "step": 1620 }, { "epoch": 0.09035170837745946, "grad_norm": 0.5029554963111877, "learning_rate": 9.862280590469448e-05, "loss": 1.8098, "step": 1621 }, { "epoch": 0.0904074466306226, "grad_norm": 0.531711995601654, "learning_rate": 9.862072968057956e-05, "loss": 1.8394, "step": 1622 }, { "epoch": 0.09046318488378574, "grad_norm": 0.4818442165851593, "learning_rate": 9.861865191449858e-05, "loss": 1.6742, "step": 1623 }, { "epoch": 0.09051892313694888, "grad_norm": 0.4834239184856415, "learning_rate": 9.861657260651742e-05, "loss": 1.6425, "step": 1624 }, { "epoch": 0.09057466139011204, "grad_norm": 0.4923589825630188, "learning_rate": 9.861449175670204e-05, "loss": 1.5693, "step": 1625 }, { "epoch": 0.09063039964327518, "grad_norm": 0.48194825649261475, "learning_rate": 9.861240936511842e-05, "loss": 1.6782, "step": 1626 }, { "epoch": 0.09068613789643833, "grad_norm": 0.5542406439781189, "learning_rate": 9.86103254318326e-05, "loss": 1.9775, "step": 1627 }, { "epoch": 0.09074187614960147, "grad_norm": 0.6013079881668091, "learning_rate": 9.860823995691068e-05, "loss": 1.9425, "step": 1628 }, { "epoch": 0.09079761440276461, "grad_norm": 0.5376304984092712, "learning_rate": 9.860615294041879e-05, "loss": 1.6473, "step": 1629 }, { "epoch": 0.09085335265592777, "grad_norm": 0.5485152006149292, "learning_rate": 9.860406438242313e-05, "loss": 1.6367, "step": 1630 }, { "epoch": 0.09090909090909091, "grad_norm": 0.5142073035240173, "learning_rate": 9.860197428298991e-05, "loss": 1.7602, "step": 1631 }, { "epoch": 0.09096482916225405, "grad_norm": 0.49521228671073914, "learning_rate": 9.859988264218546e-05, "loss": 1.546, "step": 1632 }, { "epoch": 0.0910205674154172, "grad_norm": 0.5011737942695618, "learning_rate": 9.859778946007608e-05, "loss": 1.5578, "step": 1633 }, { "epoch": 0.09107630566858034, "grad_norm": 0.4523265063762665, "learning_rate": 9.859569473672816e-05, "loss": 1.3888, "step": 1634 }, { "epoch": 0.0911320439217435, "grad_norm": 0.48054036498069763, "learning_rate": 9.859359847220815e-05, "loss": 1.7516, "step": 1635 }, { "epoch": 0.09118778217490664, "grad_norm": 0.5349341034889221, "learning_rate": 9.85915006665825e-05, "loss": 1.7055, "step": 1636 }, { "epoch": 0.09124352042806978, "grad_norm": 0.5274312496185303, "learning_rate": 9.858940131991777e-05, "loss": 1.8203, "step": 1637 }, { "epoch": 0.09129925868123293, "grad_norm": 0.4654419720172882, "learning_rate": 9.85873004322805e-05, "loss": 1.5783, "step": 1638 }, { "epoch": 0.09135499693439608, "grad_norm": 0.5258073806762695, "learning_rate": 9.858519800373738e-05, "loss": 1.7707, "step": 1639 }, { "epoch": 0.09141073518755923, "grad_norm": 0.4929850995540619, "learning_rate": 9.858309403435501e-05, "loss": 1.6027, "step": 1640 }, { "epoch": 0.09146647344072237, "grad_norm": 0.5121711492538452, "learning_rate": 9.85809885242002e-05, "loss": 1.7874, "step": 1641 }, { "epoch": 0.09152221169388551, "grad_norm": 0.4955439567565918, "learning_rate": 9.857888147333965e-05, "loss": 1.7223, "step": 1642 }, { "epoch": 0.09157794994704865, "grad_norm": 0.519477903842926, "learning_rate": 9.857677288184022e-05, "loss": 1.8618, "step": 1643 }, { "epoch": 0.09163368820021181, "grad_norm": 0.5247395038604736, "learning_rate": 9.857466274976878e-05, "loss": 1.761, "step": 1644 }, { "epoch": 0.09168942645337495, "grad_norm": 0.4881756901741028, "learning_rate": 9.857255107719225e-05, "loss": 1.7272, "step": 1645 }, { "epoch": 0.0917451647065381, "grad_norm": 0.5688063502311707, "learning_rate": 9.857043786417759e-05, "loss": 1.7532, "step": 1646 }, { "epoch": 0.09180090295970124, "grad_norm": 0.531910240650177, "learning_rate": 9.856832311079183e-05, "loss": 1.9235, "step": 1647 }, { "epoch": 0.0918566412128644, "grad_norm": 0.5271464586257935, "learning_rate": 9.856620681710205e-05, "loss": 1.8481, "step": 1648 }, { "epoch": 0.09191237946602754, "grad_norm": 0.5019913911819458, "learning_rate": 9.856408898317533e-05, "loss": 1.7273, "step": 1649 }, { "epoch": 0.09196811771919068, "grad_norm": 0.5375306010246277, "learning_rate": 9.856196960907887e-05, "loss": 1.8292, "step": 1650 }, { "epoch": 0.09202385597235382, "grad_norm": 0.551287829875946, "learning_rate": 9.855984869487985e-05, "loss": 1.7672, "step": 1651 }, { "epoch": 0.09207959422551697, "grad_norm": 0.5110806226730347, "learning_rate": 9.855772624064557e-05, "loss": 1.7338, "step": 1652 }, { "epoch": 0.09213533247868012, "grad_norm": 0.5807773470878601, "learning_rate": 9.855560224644332e-05, "loss": 1.8558, "step": 1653 }, { "epoch": 0.09219107073184327, "grad_norm": 0.5399064421653748, "learning_rate": 9.855347671234045e-05, "loss": 1.7338, "step": 1654 }, { "epoch": 0.09224680898500641, "grad_norm": 0.5670611262321472, "learning_rate": 9.855134963840441e-05, "loss": 1.9314, "step": 1655 }, { "epoch": 0.09230254723816955, "grad_norm": 0.49795302748680115, "learning_rate": 9.854922102470262e-05, "loss": 1.7196, "step": 1656 }, { "epoch": 0.0923582854913327, "grad_norm": 0.5752295255661011, "learning_rate": 9.85470908713026e-05, "loss": 1.7249, "step": 1657 }, { "epoch": 0.09241402374449585, "grad_norm": 0.4967830181121826, "learning_rate": 9.854495917827191e-05, "loss": 1.7368, "step": 1658 }, { "epoch": 0.092469761997659, "grad_norm": 0.4957406520843506, "learning_rate": 9.854282594567816e-05, "loss": 1.8287, "step": 1659 }, { "epoch": 0.09252550025082214, "grad_norm": 0.49035385251045227, "learning_rate": 9.854069117358899e-05, "loss": 1.743, "step": 1660 }, { "epoch": 0.09258123850398528, "grad_norm": 0.5366220474243164, "learning_rate": 9.853855486207211e-05, "loss": 1.7903, "step": 1661 }, { "epoch": 0.09263697675714844, "grad_norm": 0.5238292217254639, "learning_rate": 9.853641701119525e-05, "loss": 1.6038, "step": 1662 }, { "epoch": 0.09269271501031158, "grad_norm": 0.507854700088501, "learning_rate": 9.853427762102625e-05, "loss": 1.7459, "step": 1663 }, { "epoch": 0.09274845326347472, "grad_norm": 0.5182837247848511, "learning_rate": 9.853213669163293e-05, "loss": 1.7409, "step": 1664 }, { "epoch": 0.09280419151663787, "grad_norm": 0.5023046135902405, "learning_rate": 9.852999422308319e-05, "loss": 1.8207, "step": 1665 }, { "epoch": 0.09285992976980101, "grad_norm": 0.6185427308082581, "learning_rate": 9.852785021544499e-05, "loss": 1.9794, "step": 1666 }, { "epoch": 0.09291566802296417, "grad_norm": 0.5567124485969543, "learning_rate": 9.852570466878632e-05, "loss": 1.8052, "step": 1667 }, { "epoch": 0.09297140627612731, "grad_norm": 0.5299728512763977, "learning_rate": 9.852355758317523e-05, "loss": 1.6414, "step": 1668 }, { "epoch": 0.09302714452929045, "grad_norm": 0.47446316480636597, "learning_rate": 9.85214089586798e-05, "loss": 1.561, "step": 1669 }, { "epoch": 0.0930828827824536, "grad_norm": 0.5260158181190491, "learning_rate": 9.851925879536817e-05, "loss": 1.7192, "step": 1670 }, { "epoch": 0.09313862103561675, "grad_norm": 0.5200673341751099, "learning_rate": 9.851710709330855e-05, "loss": 1.6869, "step": 1671 }, { "epoch": 0.0931943592887799, "grad_norm": 0.5707138180732727, "learning_rate": 9.851495385256915e-05, "loss": 1.7307, "step": 1672 }, { "epoch": 0.09325009754194304, "grad_norm": 0.6008026003837585, "learning_rate": 9.851279907321829e-05, "loss": 1.8593, "step": 1673 }, { "epoch": 0.09330583579510618, "grad_norm": 0.4921055734157562, "learning_rate": 9.851064275532428e-05, "loss": 1.7155, "step": 1674 }, { "epoch": 0.09336157404826932, "grad_norm": 0.48389917612075806, "learning_rate": 9.850848489895553e-05, "loss": 1.7011, "step": 1675 }, { "epoch": 0.09341731230143248, "grad_norm": 0.6712982058525085, "learning_rate": 9.850632550418046e-05, "loss": 1.8851, "step": 1676 }, { "epoch": 0.09347305055459562, "grad_norm": 0.49884751439094543, "learning_rate": 9.850416457106755e-05, "loss": 1.7392, "step": 1677 }, { "epoch": 0.09352878880775876, "grad_norm": 0.5436164736747742, "learning_rate": 9.850200209968535e-05, "loss": 1.8583, "step": 1678 }, { "epoch": 0.09358452706092191, "grad_norm": 0.543387234210968, "learning_rate": 9.849983809010242e-05, "loss": 1.9008, "step": 1679 }, { "epoch": 0.09364026531408505, "grad_norm": 0.5220986604690552, "learning_rate": 9.849767254238741e-05, "loss": 1.8536, "step": 1680 }, { "epoch": 0.0936960035672482, "grad_norm": 0.5086224675178528, "learning_rate": 9.849550545660898e-05, "loss": 1.6492, "step": 1681 }, { "epoch": 0.09375174182041135, "grad_norm": 0.5263844728469849, "learning_rate": 9.849333683283587e-05, "loss": 1.8646, "step": 1682 }, { "epoch": 0.09380748007357449, "grad_norm": 0.48118674755096436, "learning_rate": 9.849116667113684e-05, "loss": 1.6978, "step": 1683 }, { "epoch": 0.09386321832673764, "grad_norm": 0.5442405939102173, "learning_rate": 9.848899497158075e-05, "loss": 1.7446, "step": 1684 }, { "epoch": 0.09391895657990079, "grad_norm": 0.5518308877944946, "learning_rate": 9.848682173423642e-05, "loss": 1.9409, "step": 1685 }, { "epoch": 0.09397469483306393, "grad_norm": 0.5064495205879211, "learning_rate": 9.848464695917283e-05, "loss": 1.9023, "step": 1686 }, { "epoch": 0.09403043308622708, "grad_norm": 0.5437746644020081, "learning_rate": 9.84824706464589e-05, "loss": 1.8456, "step": 1687 }, { "epoch": 0.09408617133939022, "grad_norm": 0.4933926463127136, "learning_rate": 9.848029279616369e-05, "loss": 1.6156, "step": 1688 }, { "epoch": 0.09414190959255336, "grad_norm": 0.5288189649581909, "learning_rate": 9.847811340835625e-05, "loss": 1.8053, "step": 1689 }, { "epoch": 0.09419764784571652, "grad_norm": 0.5238629579544067, "learning_rate": 9.847593248310569e-05, "loss": 1.8396, "step": 1690 }, { "epoch": 0.09425338609887966, "grad_norm": 0.5135747790336609, "learning_rate": 9.847375002048119e-05, "loss": 1.702, "step": 1691 }, { "epoch": 0.0943091243520428, "grad_norm": 0.48049938678741455, "learning_rate": 9.847156602055196e-05, "loss": 1.7258, "step": 1692 }, { "epoch": 0.09436486260520595, "grad_norm": 0.5790214538574219, "learning_rate": 9.846938048338728e-05, "loss": 1.9521, "step": 1693 }, { "epoch": 0.0944206008583691, "grad_norm": 0.49259278178215027, "learning_rate": 9.846719340905643e-05, "loss": 1.7358, "step": 1694 }, { "epoch": 0.09447633911153225, "grad_norm": 0.5396574139595032, "learning_rate": 9.846500479762879e-05, "loss": 1.9847, "step": 1695 }, { "epoch": 0.09453207736469539, "grad_norm": 0.5003666877746582, "learning_rate": 9.846281464917377e-05, "loss": 1.777, "step": 1696 }, { "epoch": 0.09458781561785853, "grad_norm": 0.5158617496490479, "learning_rate": 9.846062296376083e-05, "loss": 1.6861, "step": 1697 }, { "epoch": 0.09464355387102168, "grad_norm": 0.5154086351394653, "learning_rate": 9.845842974145947e-05, "loss": 1.8176, "step": 1698 }, { "epoch": 0.09469929212418483, "grad_norm": 0.5052759051322937, "learning_rate": 9.845623498233926e-05, "loss": 1.6658, "step": 1699 }, { "epoch": 0.09475503037734798, "grad_norm": 0.6677058339118958, "learning_rate": 9.845403868646979e-05, "loss": 1.7287, "step": 1700 }, { "epoch": 0.09481076863051112, "grad_norm": 0.5167236924171448, "learning_rate": 9.845184085392072e-05, "loss": 1.6861, "step": 1701 }, { "epoch": 0.09486650688367426, "grad_norm": 0.57721346616745, "learning_rate": 9.844964148476175e-05, "loss": 1.9309, "step": 1702 }, { "epoch": 0.0949222451368374, "grad_norm": 0.4876415729522705, "learning_rate": 9.844744057906263e-05, "loss": 1.738, "step": 1703 }, { "epoch": 0.09497798339000056, "grad_norm": 0.5089074373245239, "learning_rate": 9.844523813689316e-05, "loss": 1.8729, "step": 1704 }, { "epoch": 0.0950337216431637, "grad_norm": 0.5102959871292114, "learning_rate": 9.844303415832322e-05, "loss": 1.901, "step": 1705 }, { "epoch": 0.09508945989632685, "grad_norm": 0.5445943474769592, "learning_rate": 9.844082864342265e-05, "loss": 1.7838, "step": 1706 }, { "epoch": 0.09514519814948999, "grad_norm": 0.5227236151695251, "learning_rate": 9.843862159226142e-05, "loss": 1.7044, "step": 1707 }, { "epoch": 0.09520093640265315, "grad_norm": 0.5036524534225464, "learning_rate": 9.843641300490956e-05, "loss": 1.6637, "step": 1708 }, { "epoch": 0.09525667465581629, "grad_norm": 0.5071728825569153, "learning_rate": 9.843420288143706e-05, "loss": 1.5714, "step": 1709 }, { "epoch": 0.09531241290897943, "grad_norm": 0.563736081123352, "learning_rate": 9.843199122191404e-05, "loss": 2.0123, "step": 1710 }, { "epoch": 0.09536815116214258, "grad_norm": 0.5531306266784668, "learning_rate": 9.842977802641065e-05, "loss": 1.74, "step": 1711 }, { "epoch": 0.09542388941530572, "grad_norm": 0.5610520243644714, "learning_rate": 9.842756329499704e-05, "loss": 1.8003, "step": 1712 }, { "epoch": 0.09547962766846887, "grad_norm": 0.498121440410614, "learning_rate": 9.842534702774349e-05, "loss": 1.6448, "step": 1713 }, { "epoch": 0.09553536592163202, "grad_norm": 0.5231457948684692, "learning_rate": 9.842312922472028e-05, "loss": 1.8862, "step": 1714 }, { "epoch": 0.09559110417479516, "grad_norm": 0.520879864692688, "learning_rate": 9.842090988599772e-05, "loss": 1.7858, "step": 1715 }, { "epoch": 0.0956468424279583, "grad_norm": 0.5959715247154236, "learning_rate": 9.841868901164622e-05, "loss": 1.8487, "step": 1716 }, { "epoch": 0.09570258068112146, "grad_norm": 0.5337534546852112, "learning_rate": 9.84164666017362e-05, "loss": 1.5147, "step": 1717 }, { "epoch": 0.0957583189342846, "grad_norm": 0.5244635939598083, "learning_rate": 9.841424265633816e-05, "loss": 1.9583, "step": 1718 }, { "epoch": 0.09581405718744775, "grad_norm": 0.5573442578315735, "learning_rate": 9.84120171755226e-05, "loss": 1.7111, "step": 1719 }, { "epoch": 0.09586979544061089, "grad_norm": 0.5416032671928406, "learning_rate": 9.840979015936014e-05, "loss": 1.9152, "step": 1720 }, { "epoch": 0.09592553369377403, "grad_norm": 0.5546048283576965, "learning_rate": 9.840756160792138e-05, "loss": 1.7902, "step": 1721 }, { "epoch": 0.09598127194693719, "grad_norm": 0.5208713412284851, "learning_rate": 9.840533152127697e-05, "loss": 1.864, "step": 1722 }, { "epoch": 0.09603701020010033, "grad_norm": 0.5275363326072693, "learning_rate": 9.840309989949769e-05, "loss": 1.7866, "step": 1723 }, { "epoch": 0.09609274845326347, "grad_norm": 0.5389683246612549, "learning_rate": 9.84008667426543e-05, "loss": 1.8186, "step": 1724 }, { "epoch": 0.09614848670642662, "grad_norm": 0.5352590680122375, "learning_rate": 9.839863205081761e-05, "loss": 1.8207, "step": 1725 }, { "epoch": 0.09620422495958976, "grad_norm": 0.5303811430931091, "learning_rate": 9.839639582405849e-05, "loss": 1.8912, "step": 1726 }, { "epoch": 0.09625996321275292, "grad_norm": 0.4606251120567322, "learning_rate": 9.839415806244785e-05, "loss": 1.6001, "step": 1727 }, { "epoch": 0.09631570146591606, "grad_norm": 0.48041149973869324, "learning_rate": 9.839191876605668e-05, "loss": 1.6385, "step": 1728 }, { "epoch": 0.0963714397190792, "grad_norm": 0.5307428240776062, "learning_rate": 9.838967793495601e-05, "loss": 1.8683, "step": 1729 }, { "epoch": 0.09642717797224234, "grad_norm": 0.48561206459999084, "learning_rate": 9.838743556921688e-05, "loss": 1.7169, "step": 1730 }, { "epoch": 0.0964829162254055, "grad_norm": 0.5501610040664673, "learning_rate": 9.83851916689104e-05, "loss": 1.7714, "step": 1731 }, { "epoch": 0.09653865447856864, "grad_norm": 0.5766540765762329, "learning_rate": 9.838294623410776e-05, "loss": 1.961, "step": 1732 }, { "epoch": 0.09659439273173179, "grad_norm": 0.5572078824043274, "learning_rate": 9.838069926488016e-05, "loss": 1.9466, "step": 1733 }, { "epoch": 0.09665013098489493, "grad_norm": 0.5235105156898499, "learning_rate": 9.837845076129885e-05, "loss": 1.6369, "step": 1734 }, { "epoch": 0.09670586923805807, "grad_norm": 0.49561917781829834, "learning_rate": 9.837620072343514e-05, "loss": 1.6879, "step": 1735 }, { "epoch": 0.09676160749122123, "grad_norm": 0.577617883682251, "learning_rate": 9.83739491513604e-05, "loss": 2.0888, "step": 1736 }, { "epoch": 0.09681734574438437, "grad_norm": 0.559758722782135, "learning_rate": 9.837169604514605e-05, "loss": 2.0155, "step": 1737 }, { "epoch": 0.09687308399754752, "grad_norm": 0.4803854525089264, "learning_rate": 9.83694414048635e-05, "loss": 1.7143, "step": 1738 }, { "epoch": 0.09692882225071066, "grad_norm": 0.5286114811897278, "learning_rate": 9.83671852305843e-05, "loss": 1.7708, "step": 1739 }, { "epoch": 0.09698456050387382, "grad_norm": 0.5186529159545898, "learning_rate": 9.836492752237998e-05, "loss": 1.8367, "step": 1740 }, { "epoch": 0.09704029875703696, "grad_norm": 0.5168614983558655, "learning_rate": 9.836266828032214e-05, "loss": 1.6913, "step": 1741 }, { "epoch": 0.0970960370102001, "grad_norm": 0.5508823990821838, "learning_rate": 9.836040750448246e-05, "loss": 1.8108, "step": 1742 }, { "epoch": 0.09715177526336324, "grad_norm": 0.5152462720870972, "learning_rate": 9.835814519493258e-05, "loss": 1.7643, "step": 1743 }, { "epoch": 0.09720751351652639, "grad_norm": 0.5197470188140869, "learning_rate": 9.835588135174432e-05, "loss": 1.753, "step": 1744 }, { "epoch": 0.09726325176968954, "grad_norm": 0.5595375895500183, "learning_rate": 9.83536159749894e-05, "loss": 1.9646, "step": 1745 }, { "epoch": 0.09731899002285269, "grad_norm": 0.5276100635528564, "learning_rate": 9.835134906473973e-05, "loss": 1.8053, "step": 1746 }, { "epoch": 0.09737472827601583, "grad_norm": 0.543694257736206, "learning_rate": 9.834908062106716e-05, "loss": 1.9073, "step": 1747 }, { "epoch": 0.09743046652917897, "grad_norm": 0.5280660390853882, "learning_rate": 9.834681064404366e-05, "loss": 1.8642, "step": 1748 }, { "epoch": 0.09748620478234211, "grad_norm": 0.5228556394577026, "learning_rate": 9.83445391337412e-05, "loss": 1.7084, "step": 1749 }, { "epoch": 0.09754194303550527, "grad_norm": 0.5147905349731445, "learning_rate": 9.834226609023183e-05, "loss": 1.7273, "step": 1750 }, { "epoch": 0.09759768128866841, "grad_norm": 0.6363779306411743, "learning_rate": 9.833999151358763e-05, "loss": 2.3455, "step": 1751 }, { "epoch": 0.09765341954183156, "grad_norm": 0.4779658317565918, "learning_rate": 9.833771540388074e-05, "loss": 1.5965, "step": 1752 }, { "epoch": 0.0977091577949947, "grad_norm": 0.5493218302726746, "learning_rate": 9.833543776118334e-05, "loss": 1.7655, "step": 1753 }, { "epoch": 0.09776489604815786, "grad_norm": 0.5027639865875244, "learning_rate": 9.833315858556769e-05, "loss": 1.6425, "step": 1754 }, { "epoch": 0.097820634301321, "grad_norm": 0.5259470343589783, "learning_rate": 9.833087787710604e-05, "loss": 1.8848, "step": 1755 }, { "epoch": 0.09787637255448414, "grad_norm": 0.5296250581741333, "learning_rate": 9.832859563587073e-05, "loss": 1.6713, "step": 1756 }, { "epoch": 0.09793211080764729, "grad_norm": 0.5273899435997009, "learning_rate": 9.832631186193414e-05, "loss": 1.7833, "step": 1757 }, { "epoch": 0.09798784906081043, "grad_norm": 0.5987624526023865, "learning_rate": 9.832402655536869e-05, "loss": 2.0934, "step": 1758 }, { "epoch": 0.09804358731397358, "grad_norm": 0.5442295074462891, "learning_rate": 9.83217397162469e-05, "loss": 1.6506, "step": 1759 }, { "epoch": 0.09809932556713673, "grad_norm": 0.6511545181274414, "learning_rate": 9.831945134464123e-05, "loss": 2.1311, "step": 1760 }, { "epoch": 0.09815506382029987, "grad_norm": 0.5505144596099854, "learning_rate": 9.831716144062431e-05, "loss": 1.7606, "step": 1761 }, { "epoch": 0.09821080207346301, "grad_norm": 0.5241886973381042, "learning_rate": 9.831487000426871e-05, "loss": 1.7404, "step": 1762 }, { "epoch": 0.09826654032662617, "grad_norm": 0.5306397080421448, "learning_rate": 9.831257703564715e-05, "loss": 1.7232, "step": 1763 }, { "epoch": 0.09832227857978931, "grad_norm": 0.5829235315322876, "learning_rate": 9.831028253483232e-05, "loss": 1.8867, "step": 1764 }, { "epoch": 0.09837801683295246, "grad_norm": 0.5258575677871704, "learning_rate": 9.8307986501897e-05, "loss": 1.6442, "step": 1765 }, { "epoch": 0.0984337550861156, "grad_norm": 0.5493606328964233, "learning_rate": 9.8305688936914e-05, "loss": 2.025, "step": 1766 }, { "epoch": 0.09848949333927874, "grad_norm": 0.5285725593566895, "learning_rate": 9.83033898399562e-05, "loss": 1.683, "step": 1767 }, { "epoch": 0.0985452315924419, "grad_norm": 0.590203046798706, "learning_rate": 9.830108921109648e-05, "loss": 2.0356, "step": 1768 }, { "epoch": 0.09860096984560504, "grad_norm": 0.47736695408821106, "learning_rate": 9.829878705040784e-05, "loss": 1.2685, "step": 1769 }, { "epoch": 0.09865670809876818, "grad_norm": 0.5433778762817383, "learning_rate": 9.829648335796327e-05, "loss": 1.5734, "step": 1770 }, { "epoch": 0.09871244635193133, "grad_norm": 0.533301591873169, "learning_rate": 9.829417813383584e-05, "loss": 1.6253, "step": 1771 }, { "epoch": 0.09876818460509447, "grad_norm": 0.5619016289710999, "learning_rate": 9.829187137809865e-05, "loss": 1.9336, "step": 1772 }, { "epoch": 0.09882392285825763, "grad_norm": 0.5166584849357605, "learning_rate": 9.828956309082487e-05, "loss": 1.6934, "step": 1773 }, { "epoch": 0.09887966111142077, "grad_norm": 0.550294041633606, "learning_rate": 9.828725327208769e-05, "loss": 1.7357, "step": 1774 }, { "epoch": 0.09893539936458391, "grad_norm": 0.5708268880844116, "learning_rate": 9.828494192196037e-05, "loss": 1.75, "step": 1775 }, { "epoch": 0.09899113761774705, "grad_norm": 0.5142853856086731, "learning_rate": 9.828262904051621e-05, "loss": 1.8905, "step": 1776 }, { "epoch": 0.09904687587091021, "grad_norm": 0.5133590698242188, "learning_rate": 9.828031462782858e-05, "loss": 1.7111, "step": 1777 }, { "epoch": 0.09910261412407335, "grad_norm": 0.491804838180542, "learning_rate": 9.827799868397086e-05, "loss": 1.7898, "step": 1778 }, { "epoch": 0.0991583523772365, "grad_norm": 0.5558345913887024, "learning_rate": 9.827568120901649e-05, "loss": 1.8621, "step": 1779 }, { "epoch": 0.09921409063039964, "grad_norm": 0.5390424132347107, "learning_rate": 9.827336220303898e-05, "loss": 1.5574, "step": 1780 }, { "epoch": 0.09926982888356278, "grad_norm": 0.5201495885848999, "learning_rate": 9.827104166611188e-05, "loss": 1.7218, "step": 1781 }, { "epoch": 0.09932556713672594, "grad_norm": 0.49533358216285706, "learning_rate": 9.826871959830877e-05, "loss": 1.6587, "step": 1782 }, { "epoch": 0.09938130538988908, "grad_norm": 0.5522517561912537, "learning_rate": 9.826639599970331e-05, "loss": 1.9942, "step": 1783 }, { "epoch": 0.09943704364305223, "grad_norm": 0.5211175680160522, "learning_rate": 9.826407087036918e-05, "loss": 1.7953, "step": 1784 }, { "epoch": 0.09949278189621537, "grad_norm": 0.5591548681259155, "learning_rate": 9.82617442103801e-05, "loss": 1.7257, "step": 1785 }, { "epoch": 0.09954852014937852, "grad_norm": 0.5057593584060669, "learning_rate": 9.82594160198099e-05, "loss": 1.6209, "step": 1786 }, { "epoch": 0.09960425840254167, "grad_norm": 0.4974839389324188, "learning_rate": 9.82570862987324e-05, "loss": 1.7242, "step": 1787 }, { "epoch": 0.09965999665570481, "grad_norm": 0.580697238445282, "learning_rate": 9.825475504722147e-05, "loss": 1.8402, "step": 1788 }, { "epoch": 0.09971573490886795, "grad_norm": 0.5298492908477783, "learning_rate": 9.825242226535106e-05, "loss": 1.5434, "step": 1789 }, { "epoch": 0.0997714731620311, "grad_norm": 0.5714828372001648, "learning_rate": 9.825008795319514e-05, "loss": 1.8505, "step": 1790 }, { "epoch": 0.09982721141519425, "grad_norm": 0.5840202569961548, "learning_rate": 9.824775211082776e-05, "loss": 1.9345, "step": 1791 }, { "epoch": 0.0998829496683574, "grad_norm": 0.495969295501709, "learning_rate": 9.824541473832298e-05, "loss": 1.6482, "step": 1792 }, { "epoch": 0.09993868792152054, "grad_norm": 0.537111759185791, "learning_rate": 9.824307583575494e-05, "loss": 1.6791, "step": 1793 }, { "epoch": 0.09999442617468368, "grad_norm": 0.5053449869155884, "learning_rate": 9.82407354031978e-05, "loss": 1.6764, "step": 1794 }, { "epoch": 0.10005016442784682, "grad_norm": 0.5327693223953247, "learning_rate": 9.82383934407258e-05, "loss": 1.7993, "step": 1795 }, { "epoch": 0.10010590268100998, "grad_norm": 0.49914291501045227, "learning_rate": 9.823604994841322e-05, "loss": 1.9674, "step": 1796 }, { "epoch": 0.10016164093417312, "grad_norm": 0.5144324898719788, "learning_rate": 9.823370492633435e-05, "loss": 1.7585, "step": 1797 }, { "epoch": 0.10021737918733627, "grad_norm": 0.5108045935630798, "learning_rate": 9.823135837456362e-05, "loss": 1.7215, "step": 1798 }, { "epoch": 0.10027311744049941, "grad_norm": 0.5693103671073914, "learning_rate": 9.822901029317537e-05, "loss": 1.7812, "step": 1799 }, { "epoch": 0.10032885569366257, "grad_norm": 0.49847400188446045, "learning_rate": 9.822666068224412e-05, "loss": 1.6675, "step": 1800 }, { "epoch": 0.10038459394682571, "grad_norm": 0.5565662384033203, "learning_rate": 9.822430954184439e-05, "loss": 1.8071, "step": 1801 }, { "epoch": 0.10044033219998885, "grad_norm": 0.5412677526473999, "learning_rate": 9.82219568720507e-05, "loss": 1.7311, "step": 1802 }, { "epoch": 0.100496070453152, "grad_norm": 0.5256420373916626, "learning_rate": 9.821960267293771e-05, "loss": 1.8179, "step": 1803 }, { "epoch": 0.10055180870631514, "grad_norm": 0.486968457698822, "learning_rate": 9.821724694458006e-05, "loss": 1.7443, "step": 1804 }, { "epoch": 0.1006075469594783, "grad_norm": 0.5230684280395508, "learning_rate": 9.821488968705246e-05, "loss": 1.8426, "step": 1805 }, { "epoch": 0.10066328521264144, "grad_norm": 0.5057176351547241, "learning_rate": 9.821253090042967e-05, "loss": 1.6857, "step": 1806 }, { "epoch": 0.10071902346580458, "grad_norm": 0.5477109551429749, "learning_rate": 9.821017058478653e-05, "loss": 1.904, "step": 1807 }, { "epoch": 0.10077476171896772, "grad_norm": 0.5054430961608887, "learning_rate": 9.820780874019782e-05, "loss": 1.8538, "step": 1808 }, { "epoch": 0.10083049997213088, "grad_norm": 0.5614181160926819, "learning_rate": 9.82054453667385e-05, "loss": 1.9318, "step": 1809 }, { "epoch": 0.10088623822529402, "grad_norm": 0.49829983711242676, "learning_rate": 9.820308046448353e-05, "loss": 1.6044, "step": 1810 }, { "epoch": 0.10094197647845717, "grad_norm": 0.53876793384552, "learning_rate": 9.820071403350787e-05, "loss": 1.7234, "step": 1811 }, { "epoch": 0.10099771473162031, "grad_norm": 0.5352075695991516, "learning_rate": 9.81983460738866e-05, "loss": 1.7911, "step": 1812 }, { "epoch": 0.10105345298478345, "grad_norm": 0.5328055024147034, "learning_rate": 9.819597658569479e-05, "loss": 1.8147, "step": 1813 }, { "epoch": 0.10110919123794661, "grad_norm": 0.5261515378952026, "learning_rate": 9.819360556900763e-05, "loss": 1.8057, "step": 1814 }, { "epoch": 0.10116492949110975, "grad_norm": 0.5476046204566956, "learning_rate": 9.819123302390027e-05, "loss": 1.7813, "step": 1815 }, { "epoch": 0.1012206677442729, "grad_norm": 0.5293675661087036, "learning_rate": 9.818885895044799e-05, "loss": 1.7398, "step": 1816 }, { "epoch": 0.10127640599743604, "grad_norm": 0.6075041890144348, "learning_rate": 9.818648334872607e-05, "loss": 1.985, "step": 1817 }, { "epoch": 0.10133214425059918, "grad_norm": 0.5815473794937134, "learning_rate": 9.818410621880982e-05, "loss": 1.7932, "step": 1818 }, { "epoch": 0.10138788250376234, "grad_norm": 0.546378493309021, "learning_rate": 9.818172756077466e-05, "loss": 1.8672, "step": 1819 }, { "epoch": 0.10144362075692548, "grad_norm": 0.5089141130447388, "learning_rate": 9.817934737469603e-05, "loss": 1.4847, "step": 1820 }, { "epoch": 0.10149935901008862, "grad_norm": 0.5070534348487854, "learning_rate": 9.81769656606494e-05, "loss": 1.6301, "step": 1821 }, { "epoch": 0.10155509726325176, "grad_norm": 0.5128391981124878, "learning_rate": 9.817458241871032e-05, "loss": 1.8199, "step": 1822 }, { "epoch": 0.10161083551641492, "grad_norm": 0.5569765567779541, "learning_rate": 9.817219764895435e-05, "loss": 1.7238, "step": 1823 }, { "epoch": 0.10166657376957806, "grad_norm": 0.5038780570030212, "learning_rate": 9.816981135145714e-05, "loss": 1.7099, "step": 1824 }, { "epoch": 0.10172231202274121, "grad_norm": 0.5122333765029907, "learning_rate": 9.816742352629437e-05, "loss": 1.7679, "step": 1825 }, { "epoch": 0.10177805027590435, "grad_norm": 0.5544700026512146, "learning_rate": 9.816503417354174e-05, "loss": 2.0049, "step": 1826 }, { "epoch": 0.10183378852906749, "grad_norm": 0.5663131475448608, "learning_rate": 9.816264329327507e-05, "loss": 1.7042, "step": 1827 }, { "epoch": 0.10188952678223065, "grad_norm": 0.5186511278152466, "learning_rate": 9.816025088557015e-05, "loss": 1.7472, "step": 1828 }, { "epoch": 0.10194526503539379, "grad_norm": 0.5595180988311768, "learning_rate": 9.815785695050288e-05, "loss": 1.6525, "step": 1829 }, { "epoch": 0.10200100328855694, "grad_norm": 0.49748462438583374, "learning_rate": 9.815546148814915e-05, "loss": 1.6744, "step": 1830 }, { "epoch": 0.10205674154172008, "grad_norm": 0.47154897451400757, "learning_rate": 9.815306449858497e-05, "loss": 1.6183, "step": 1831 }, { "epoch": 0.10211247979488323, "grad_norm": 0.5415584444999695, "learning_rate": 9.815066598188631e-05, "loss": 1.842, "step": 1832 }, { "epoch": 0.10216821804804638, "grad_norm": 0.5106571912765503, "learning_rate": 9.814826593812928e-05, "loss": 1.6504, "step": 1833 }, { "epoch": 0.10222395630120952, "grad_norm": 0.5451028347015381, "learning_rate": 9.814586436738998e-05, "loss": 1.8817, "step": 1834 }, { "epoch": 0.10227969455437266, "grad_norm": 0.5032516121864319, "learning_rate": 9.814346126974455e-05, "loss": 1.8143, "step": 1835 }, { "epoch": 0.1023354328075358, "grad_norm": 0.4844000041484833, "learning_rate": 9.814105664526925e-05, "loss": 1.8255, "step": 1836 }, { "epoch": 0.10239117106069896, "grad_norm": 0.8231089115142822, "learning_rate": 9.81386504940403e-05, "loss": 1.5754, "step": 1837 }, { "epoch": 0.1024469093138621, "grad_norm": 0.5142394304275513, "learning_rate": 9.813624281613403e-05, "loss": 1.7516, "step": 1838 }, { "epoch": 0.10250264756702525, "grad_norm": 0.5010998249053955, "learning_rate": 9.813383361162678e-05, "loss": 1.7164, "step": 1839 }, { "epoch": 0.10255838582018839, "grad_norm": 0.5169504284858704, "learning_rate": 9.813142288059497e-05, "loss": 1.4974, "step": 1840 }, { "epoch": 0.10261412407335155, "grad_norm": 0.5264306664466858, "learning_rate": 9.812901062311507e-05, "loss": 1.6087, "step": 1841 }, { "epoch": 0.10266986232651469, "grad_norm": 0.5117889642715454, "learning_rate": 9.812659683926355e-05, "loss": 1.734, "step": 1842 }, { "epoch": 0.10272560057967783, "grad_norm": 0.5216721296310425, "learning_rate": 9.812418152911697e-05, "loss": 1.7643, "step": 1843 }, { "epoch": 0.10278133883284098, "grad_norm": 0.5514086484909058, "learning_rate": 9.812176469275196e-05, "loss": 1.7052, "step": 1844 }, { "epoch": 0.10283707708600412, "grad_norm": 0.5310468077659607, "learning_rate": 9.811934633024514e-05, "loss": 1.8478, "step": 1845 }, { "epoch": 0.10289281533916728, "grad_norm": 0.5535829067230225, "learning_rate": 9.811692644167318e-05, "loss": 1.7884, "step": 1846 }, { "epoch": 0.10294855359233042, "grad_norm": 0.5332193374633789, "learning_rate": 9.811450502711288e-05, "loss": 1.7511, "step": 1847 }, { "epoch": 0.10300429184549356, "grad_norm": 0.5547590851783752, "learning_rate": 9.8112082086641e-05, "loss": 1.7348, "step": 1848 }, { "epoch": 0.1030600300986567, "grad_norm": 0.5098549127578735, "learning_rate": 9.810965762033439e-05, "loss": 1.8117, "step": 1849 }, { "epoch": 0.10311576835181985, "grad_norm": 0.4965379238128662, "learning_rate": 9.810723162826994e-05, "loss": 1.6535, "step": 1850 }, { "epoch": 0.103171506604983, "grad_norm": 0.5498190522193909, "learning_rate": 9.810480411052458e-05, "loss": 1.8094, "step": 1851 }, { "epoch": 0.10322724485814615, "grad_norm": 0.5419559478759766, "learning_rate": 9.81023750671753e-05, "loss": 1.8347, "step": 1852 }, { "epoch": 0.10328298311130929, "grad_norm": 0.5136609077453613, "learning_rate": 9.809994449829916e-05, "loss": 1.8038, "step": 1853 }, { "epoch": 0.10333872136447243, "grad_norm": 0.4600328207015991, "learning_rate": 9.809751240397321e-05, "loss": 1.5616, "step": 1854 }, { "epoch": 0.10339445961763559, "grad_norm": 0.5725501775741577, "learning_rate": 9.80950787842746e-05, "loss": 2.0217, "step": 1855 }, { "epoch": 0.10345019787079873, "grad_norm": 0.4968816936016083, "learning_rate": 9.809264363928049e-05, "loss": 1.6151, "step": 1856 }, { "epoch": 0.10350593612396188, "grad_norm": 0.5521273016929626, "learning_rate": 9.809020696906815e-05, "loss": 1.5242, "step": 1857 }, { "epoch": 0.10356167437712502, "grad_norm": 0.526759684085846, "learning_rate": 9.80877687737148e-05, "loss": 1.6917, "step": 1858 }, { "epoch": 0.10361741263028816, "grad_norm": 0.5235029458999634, "learning_rate": 9.808532905329781e-05, "loss": 1.785, "step": 1859 }, { "epoch": 0.10367315088345132, "grad_norm": 0.5284624099731445, "learning_rate": 9.808288780789454e-05, "loss": 1.8857, "step": 1860 }, { "epoch": 0.10372888913661446, "grad_norm": 0.5086808800697327, "learning_rate": 9.80804450375824e-05, "loss": 1.5768, "step": 1861 }, { "epoch": 0.1037846273897776, "grad_norm": 0.6029835343360901, "learning_rate": 9.807800074243888e-05, "loss": 2.1482, "step": 1862 }, { "epoch": 0.10384036564294075, "grad_norm": 0.5451070666313171, "learning_rate": 9.80755549225415e-05, "loss": 1.6884, "step": 1863 }, { "epoch": 0.1038961038961039, "grad_norm": 0.5617519021034241, "learning_rate": 9.807310757796781e-05, "loss": 1.9665, "step": 1864 }, { "epoch": 0.10395184214926705, "grad_norm": 0.6114406585693359, "learning_rate": 9.807065870879544e-05, "loss": 1.9696, "step": 1865 }, { "epoch": 0.10400758040243019, "grad_norm": 0.5124810338020325, "learning_rate": 9.806820831510204e-05, "loss": 1.6848, "step": 1866 }, { "epoch": 0.10406331865559333, "grad_norm": 0.5385152697563171, "learning_rate": 9.806575639696533e-05, "loss": 1.6808, "step": 1867 }, { "epoch": 0.10411905690875647, "grad_norm": 0.49392756819725037, "learning_rate": 9.806330295446307e-05, "loss": 1.8179, "step": 1868 }, { "epoch": 0.10417479516191963, "grad_norm": 0.49383312463760376, "learning_rate": 9.806084798767307e-05, "loss": 1.5517, "step": 1869 }, { "epoch": 0.10423053341508277, "grad_norm": 0.5276709198951721, "learning_rate": 9.805839149667319e-05, "loss": 1.7125, "step": 1870 }, { "epoch": 0.10428627166824592, "grad_norm": 0.5694584250450134, "learning_rate": 9.805593348154131e-05, "loss": 1.9891, "step": 1871 }, { "epoch": 0.10434200992140906, "grad_norm": 0.5705782771110535, "learning_rate": 9.805347394235543e-05, "loss": 1.779, "step": 1872 }, { "epoch": 0.1043977481745722, "grad_norm": 0.543282151222229, "learning_rate": 9.805101287919352e-05, "loss": 1.898, "step": 1873 }, { "epoch": 0.10445348642773536, "grad_norm": 0.5607357025146484, "learning_rate": 9.804855029213365e-05, "loss": 1.9422, "step": 1874 }, { "epoch": 0.1045092246808985, "grad_norm": 0.548055112361908, "learning_rate": 9.804608618125388e-05, "loss": 1.776, "step": 1875 }, { "epoch": 0.10456496293406164, "grad_norm": 0.528634250164032, "learning_rate": 9.804362054663241e-05, "loss": 1.7196, "step": 1876 }, { "epoch": 0.10462070118722479, "grad_norm": 0.5074811577796936, "learning_rate": 9.80411533883474e-05, "loss": 1.6667, "step": 1877 }, { "epoch": 0.10467643944038794, "grad_norm": 0.5272465944290161, "learning_rate": 9.80386847064771e-05, "loss": 1.8897, "step": 1878 }, { "epoch": 0.10473217769355109, "grad_norm": 0.5819423198699951, "learning_rate": 9.80362145010998e-05, "loss": 1.868, "step": 1879 }, { "epoch": 0.10478791594671423, "grad_norm": 0.4952581226825714, "learning_rate": 9.803374277229387e-05, "loss": 1.7449, "step": 1880 }, { "epoch": 0.10484365419987737, "grad_norm": 0.5459893345832825, "learning_rate": 9.803126952013766e-05, "loss": 1.7454, "step": 1881 }, { "epoch": 0.10489939245304052, "grad_norm": 0.4974026381969452, "learning_rate": 9.802879474470964e-05, "loss": 1.5892, "step": 1882 }, { "epoch": 0.10495513070620367, "grad_norm": 0.503982424736023, "learning_rate": 9.802631844608825e-05, "loss": 1.608, "step": 1883 }, { "epoch": 0.10501086895936682, "grad_norm": 0.5444994568824768, "learning_rate": 9.802384062435206e-05, "loss": 1.8286, "step": 1884 }, { "epoch": 0.10506660721252996, "grad_norm": 0.5099791288375854, "learning_rate": 9.802136127957965e-05, "loss": 1.7811, "step": 1885 }, { "epoch": 0.1051223454656931, "grad_norm": 0.5670564770698547, "learning_rate": 9.801888041184963e-05, "loss": 2.0036, "step": 1886 }, { "epoch": 0.10517808371885626, "grad_norm": 0.5026718378067017, "learning_rate": 9.801639802124071e-05, "loss": 1.6716, "step": 1887 }, { "epoch": 0.1052338219720194, "grad_norm": 0.519005298614502, "learning_rate": 9.801391410783161e-05, "loss": 1.6815, "step": 1888 }, { "epoch": 0.10528956022518254, "grad_norm": 0.46930474042892456, "learning_rate": 9.801142867170106e-05, "loss": 1.7429, "step": 1889 }, { "epoch": 0.10534529847834569, "grad_norm": 0.5434656143188477, "learning_rate": 9.800894171292793e-05, "loss": 1.8671, "step": 1890 }, { "epoch": 0.10540103673150883, "grad_norm": 0.5062917470932007, "learning_rate": 9.80064532315911e-05, "loss": 1.6347, "step": 1891 }, { "epoch": 0.10545677498467199, "grad_norm": 0.5208712220191956, "learning_rate": 9.800396322776945e-05, "loss": 1.601, "step": 1892 }, { "epoch": 0.10551251323783513, "grad_norm": 0.49505361914634705, "learning_rate": 9.800147170154199e-05, "loss": 1.7157, "step": 1893 }, { "epoch": 0.10556825149099827, "grad_norm": 0.5282744765281677, "learning_rate": 9.79989786529877e-05, "loss": 1.7322, "step": 1894 }, { "epoch": 0.10562398974416141, "grad_norm": 0.5821601748466492, "learning_rate": 9.799648408218567e-05, "loss": 2.0407, "step": 1895 }, { "epoch": 0.10567972799732456, "grad_norm": 0.5044925212860107, "learning_rate": 9.7993987989215e-05, "loss": 1.6443, "step": 1896 }, { "epoch": 0.10573546625048771, "grad_norm": 0.5207780599594116, "learning_rate": 9.799149037415485e-05, "loss": 1.6341, "step": 1897 }, { "epoch": 0.10579120450365086, "grad_norm": 0.5176671743392944, "learning_rate": 9.798899123708444e-05, "loss": 1.7532, "step": 1898 }, { "epoch": 0.105846942756814, "grad_norm": 0.585341215133667, "learning_rate": 9.798649057808302e-05, "loss": 1.7511, "step": 1899 }, { "epoch": 0.10590268100997714, "grad_norm": 0.5633143782615662, "learning_rate": 9.798398839722991e-05, "loss": 1.8548, "step": 1900 }, { "epoch": 0.1059584192631403, "grad_norm": 0.5425167083740234, "learning_rate": 9.798148469460444e-05, "loss": 1.7457, "step": 1901 }, { "epoch": 0.10601415751630344, "grad_norm": 0.5065333247184753, "learning_rate": 9.797897947028602e-05, "loss": 1.6342, "step": 1902 }, { "epoch": 0.10606989576946659, "grad_norm": 0.4805918037891388, "learning_rate": 9.797647272435413e-05, "loss": 1.6272, "step": 1903 }, { "epoch": 0.10612563402262973, "grad_norm": 0.49736079573631287, "learning_rate": 9.797396445688825e-05, "loss": 1.6666, "step": 1904 }, { "epoch": 0.10618137227579287, "grad_norm": 0.5496745705604553, "learning_rate": 9.797145466796791e-05, "loss": 1.7214, "step": 1905 }, { "epoch": 0.10623711052895603, "grad_norm": 0.5134656429290771, "learning_rate": 9.796894335767272e-05, "loss": 1.7156, "step": 1906 }, { "epoch": 0.10629284878211917, "grad_norm": 0.5449696183204651, "learning_rate": 9.796643052608232e-05, "loss": 1.7284, "step": 1907 }, { "epoch": 0.10634858703528231, "grad_norm": 0.5344961881637573, "learning_rate": 9.796391617327643e-05, "loss": 1.514, "step": 1908 }, { "epoch": 0.10640432528844546, "grad_norm": 0.5717931389808655, "learning_rate": 9.796140029933474e-05, "loss": 1.9562, "step": 1909 }, { "epoch": 0.10646006354160861, "grad_norm": 0.5507314205169678, "learning_rate": 9.795888290433708e-05, "loss": 1.8475, "step": 1910 }, { "epoch": 0.10651580179477176, "grad_norm": 0.4807168245315552, "learning_rate": 9.795636398836328e-05, "loss": 1.4198, "step": 1911 }, { "epoch": 0.1065715400479349, "grad_norm": 0.5163860321044922, "learning_rate": 9.795384355149321e-05, "loss": 1.7098, "step": 1912 }, { "epoch": 0.10662727830109804, "grad_norm": 0.5876139998435974, "learning_rate": 9.795132159380683e-05, "loss": 1.8379, "step": 1913 }, { "epoch": 0.10668301655426118, "grad_norm": 0.5147418975830078, "learning_rate": 9.794879811538409e-05, "loss": 1.8069, "step": 1914 }, { "epoch": 0.10673875480742434, "grad_norm": 0.5539793372154236, "learning_rate": 9.794627311630503e-05, "loss": 1.9336, "step": 1915 }, { "epoch": 0.10679449306058748, "grad_norm": 0.5565729737281799, "learning_rate": 9.794374659664975e-05, "loss": 1.8024, "step": 1916 }, { "epoch": 0.10685023131375063, "grad_norm": 0.509848952293396, "learning_rate": 9.794121855649834e-05, "loss": 1.6553, "step": 1917 }, { "epoch": 0.10690596956691377, "grad_norm": 0.5031093955039978, "learning_rate": 9.793868899593101e-05, "loss": 1.6452, "step": 1918 }, { "epoch": 0.10696170782007691, "grad_norm": 0.5101149082183838, "learning_rate": 9.793615791502794e-05, "loss": 1.5787, "step": 1919 }, { "epoch": 0.10701744607324007, "grad_norm": 0.5462785363197327, "learning_rate": 9.793362531386946e-05, "loss": 1.7273, "step": 1920 }, { "epoch": 0.10707318432640321, "grad_norm": 0.5313560366630554, "learning_rate": 9.793109119253584e-05, "loss": 1.7061, "step": 1921 }, { "epoch": 0.10712892257956635, "grad_norm": 0.49144747853279114, "learning_rate": 9.792855555110747e-05, "loss": 1.6418, "step": 1922 }, { "epoch": 0.1071846608327295, "grad_norm": 0.5435053110122681, "learning_rate": 9.792601838966477e-05, "loss": 1.8774, "step": 1923 }, { "epoch": 0.10724039908589265, "grad_norm": 0.5598286390304565, "learning_rate": 9.792347970828819e-05, "loss": 1.8705, "step": 1924 }, { "epoch": 0.1072961373390558, "grad_norm": 0.5478824377059937, "learning_rate": 9.792093950705824e-05, "loss": 1.6882, "step": 1925 }, { "epoch": 0.10735187559221894, "grad_norm": 0.5779083967208862, "learning_rate": 9.79183977860555e-05, "loss": 1.993, "step": 1926 }, { "epoch": 0.10740761384538208, "grad_norm": 0.5614520907402039, "learning_rate": 9.791585454536054e-05, "loss": 1.7984, "step": 1927 }, { "epoch": 0.10746335209854523, "grad_norm": 0.5752551555633545, "learning_rate": 9.791330978505406e-05, "loss": 1.781, "step": 1928 }, { "epoch": 0.10751909035170838, "grad_norm": 0.5250864624977112, "learning_rate": 9.791076350521675e-05, "loss": 1.8367, "step": 1929 }, { "epoch": 0.10757482860487153, "grad_norm": 0.5408803224563599, "learning_rate": 9.790821570592937e-05, "loss": 1.9812, "step": 1930 }, { "epoch": 0.10763056685803467, "grad_norm": 0.5511845350265503, "learning_rate": 9.790566638727268e-05, "loss": 1.9631, "step": 1931 }, { "epoch": 0.10768630511119781, "grad_norm": 0.5966324806213379, "learning_rate": 9.790311554932758e-05, "loss": 1.6961, "step": 1932 }, { "epoch": 0.10774204336436097, "grad_norm": 0.5062892436981201, "learning_rate": 9.790056319217495e-05, "loss": 1.4829, "step": 1933 }, { "epoch": 0.10779778161752411, "grad_norm": 0.5916358232498169, "learning_rate": 9.789800931589574e-05, "loss": 1.7646, "step": 1934 }, { "epoch": 0.10785351987068725, "grad_norm": 0.5008646845817566, "learning_rate": 9.789545392057093e-05, "loss": 1.6985, "step": 1935 }, { "epoch": 0.1079092581238504, "grad_norm": 0.557442843914032, "learning_rate": 9.789289700628158e-05, "loss": 1.6734, "step": 1936 }, { "epoch": 0.10796499637701354, "grad_norm": 0.5303389430046082, "learning_rate": 9.789033857310876e-05, "loss": 1.8051, "step": 1937 }, { "epoch": 0.1080207346301767, "grad_norm": 0.5422589182853699, "learning_rate": 9.788777862113363e-05, "loss": 1.7073, "step": 1938 }, { "epoch": 0.10807647288333984, "grad_norm": 0.49321499466896057, "learning_rate": 9.788521715043736e-05, "loss": 1.6106, "step": 1939 }, { "epoch": 0.10813221113650298, "grad_norm": 0.5515221953392029, "learning_rate": 9.78826541611012e-05, "loss": 1.9005, "step": 1940 }, { "epoch": 0.10818794938966612, "grad_norm": 0.5055232048034668, "learning_rate": 9.788008965320643e-05, "loss": 1.6169, "step": 1941 }, { "epoch": 0.10824368764282927, "grad_norm": 0.5074330568313599, "learning_rate": 9.787752362683438e-05, "loss": 1.6712, "step": 1942 }, { "epoch": 0.10829942589599242, "grad_norm": 0.5290434956550598, "learning_rate": 9.78749560820664e-05, "loss": 1.6697, "step": 1943 }, { "epoch": 0.10835516414915557, "grad_norm": 0.5382573008537292, "learning_rate": 9.787238701898397e-05, "loss": 1.6955, "step": 1944 }, { "epoch": 0.10841090240231871, "grad_norm": 0.5350417494773865, "learning_rate": 9.786981643766852e-05, "loss": 1.695, "step": 1945 }, { "epoch": 0.10846664065548185, "grad_norm": 0.5305573344230652, "learning_rate": 9.78672443382016e-05, "loss": 1.8205, "step": 1946 }, { "epoch": 0.10852237890864501, "grad_norm": 0.5057222247123718, "learning_rate": 9.786467072066478e-05, "loss": 1.7815, "step": 1947 }, { "epoch": 0.10857811716180815, "grad_norm": 0.5606647729873657, "learning_rate": 9.786209558513968e-05, "loss": 2.0612, "step": 1948 }, { "epoch": 0.1086338554149713, "grad_norm": 0.5300911068916321, "learning_rate": 9.785951893170795e-05, "loss": 1.8648, "step": 1949 }, { "epoch": 0.10868959366813444, "grad_norm": 0.5408658385276794, "learning_rate": 9.785694076045133e-05, "loss": 1.7291, "step": 1950 }, { "epoch": 0.10874533192129758, "grad_norm": 0.5921101570129395, "learning_rate": 9.785436107145156e-05, "loss": 1.9079, "step": 1951 }, { "epoch": 0.10880107017446074, "grad_norm": 0.5365302562713623, "learning_rate": 9.785177986479048e-05, "loss": 1.888, "step": 1952 }, { "epoch": 0.10885680842762388, "grad_norm": 0.5375866293907166, "learning_rate": 9.784919714054993e-05, "loss": 1.7309, "step": 1953 }, { "epoch": 0.10891254668078702, "grad_norm": 0.5292702317237854, "learning_rate": 9.784661289881183e-05, "loss": 1.7366, "step": 1954 }, { "epoch": 0.10896828493395017, "grad_norm": 0.5953987240791321, "learning_rate": 9.784402713965815e-05, "loss": 1.6749, "step": 1955 }, { "epoch": 0.10902402318711332, "grad_norm": 0.5666269659996033, "learning_rate": 9.784143986317084e-05, "loss": 1.8123, "step": 1956 }, { "epoch": 0.10907976144027647, "grad_norm": 0.4942094683647156, "learning_rate": 9.783885106943203e-05, "loss": 1.5919, "step": 1957 }, { "epoch": 0.10913549969343961, "grad_norm": 0.5365981459617615, "learning_rate": 9.783626075852377e-05, "loss": 1.8938, "step": 1958 }, { "epoch": 0.10919123794660275, "grad_norm": 0.4730222523212433, "learning_rate": 9.783366893052822e-05, "loss": 1.6972, "step": 1959 }, { "epoch": 0.1092469761997659, "grad_norm": 0.5012983679771423, "learning_rate": 9.783107558552759e-05, "loss": 1.5967, "step": 1960 }, { "epoch": 0.10930271445292905, "grad_norm": 0.47032400965690613, "learning_rate": 9.782848072360411e-05, "loss": 1.4359, "step": 1961 }, { "epoch": 0.1093584527060922, "grad_norm": 0.6051558256149292, "learning_rate": 9.782588434484008e-05, "loss": 1.8727, "step": 1962 }, { "epoch": 0.10941419095925534, "grad_norm": 0.5087974667549133, "learning_rate": 9.782328644931784e-05, "loss": 1.6863, "step": 1963 }, { "epoch": 0.10946992921241848, "grad_norm": 0.5419572591781616, "learning_rate": 9.782068703711979e-05, "loss": 1.8686, "step": 1964 }, { "epoch": 0.10952566746558162, "grad_norm": 0.5740787386894226, "learning_rate": 9.781808610832837e-05, "loss": 1.8671, "step": 1965 }, { "epoch": 0.10958140571874478, "grad_norm": 0.5375397801399231, "learning_rate": 9.781548366302604e-05, "loss": 1.855, "step": 1966 }, { "epoch": 0.10963714397190792, "grad_norm": 0.5186393857002258, "learning_rate": 9.781287970129536e-05, "loss": 1.8296, "step": 1967 }, { "epoch": 0.10969288222507106, "grad_norm": 0.5058977007865906, "learning_rate": 9.781027422321891e-05, "loss": 1.6181, "step": 1968 }, { "epoch": 0.10974862047823421, "grad_norm": 0.5131574273109436, "learning_rate": 9.78076672288793e-05, "loss": 1.8194, "step": 1969 }, { "epoch": 0.10980435873139736, "grad_norm": 0.5668989419937134, "learning_rate": 9.780505871835924e-05, "loss": 1.857, "step": 1970 }, { "epoch": 0.1098600969845605, "grad_norm": 0.5090118646621704, "learning_rate": 9.780244869174142e-05, "loss": 1.5722, "step": 1971 }, { "epoch": 0.10991583523772365, "grad_norm": 0.5472584962844849, "learning_rate": 9.779983714910865e-05, "loss": 1.7926, "step": 1972 }, { "epoch": 0.10997157349088679, "grad_norm": 0.5904543399810791, "learning_rate": 9.779722409054374e-05, "loss": 1.9054, "step": 1973 }, { "epoch": 0.11002731174404994, "grad_norm": 0.4884478747844696, "learning_rate": 9.779460951612955e-05, "loss": 1.5573, "step": 1974 }, { "epoch": 0.11008304999721309, "grad_norm": 0.6380166411399841, "learning_rate": 9.779199342594902e-05, "loss": 2.0516, "step": 1975 }, { "epoch": 0.11013878825037623, "grad_norm": 0.5148760080337524, "learning_rate": 9.778937582008509e-05, "loss": 1.7119, "step": 1976 }, { "epoch": 0.11019452650353938, "grad_norm": 0.5153675079345703, "learning_rate": 9.77867566986208e-05, "loss": 1.6784, "step": 1977 }, { "epoch": 0.11025026475670252, "grad_norm": 0.5181575417518616, "learning_rate": 9.77841360616392e-05, "loss": 1.4993, "step": 1978 }, { "epoch": 0.11030600300986568, "grad_norm": 0.557270348072052, "learning_rate": 9.778151390922341e-05, "loss": 1.8278, "step": 1979 }, { "epoch": 0.11036174126302882, "grad_norm": 0.570976972579956, "learning_rate": 9.777889024145657e-05, "loss": 1.9032, "step": 1980 }, { "epoch": 0.11041747951619196, "grad_norm": 0.5794844031333923, "learning_rate": 9.777626505842193e-05, "loss": 1.8758, "step": 1981 }, { "epoch": 0.1104732177693551, "grad_norm": 0.5161063075065613, "learning_rate": 9.777363836020268e-05, "loss": 1.8698, "step": 1982 }, { "epoch": 0.11052895602251825, "grad_norm": 0.5546018481254578, "learning_rate": 9.777101014688219e-05, "loss": 1.87, "step": 1983 }, { "epoch": 0.1105846942756814, "grad_norm": 0.5865330696105957, "learning_rate": 9.776838041854377e-05, "loss": 1.9022, "step": 1984 }, { "epoch": 0.11064043252884455, "grad_norm": 0.5667337775230408, "learning_rate": 9.776574917527083e-05, "loss": 2.0603, "step": 1985 }, { "epoch": 0.11069617078200769, "grad_norm": 0.5092570185661316, "learning_rate": 9.776311641714683e-05, "loss": 1.7887, "step": 1986 }, { "epoch": 0.11075190903517083, "grad_norm": 0.5329071879386902, "learning_rate": 9.776048214425525e-05, "loss": 1.7294, "step": 1987 }, { "epoch": 0.11080764728833398, "grad_norm": 0.5048893690109253, "learning_rate": 9.775784635667964e-05, "loss": 1.7357, "step": 1988 }, { "epoch": 0.11086338554149713, "grad_norm": 0.4852405786514282, "learning_rate": 9.77552090545036e-05, "loss": 1.7027, "step": 1989 }, { "epoch": 0.11091912379466028, "grad_norm": 0.5363536477088928, "learning_rate": 9.775257023781074e-05, "loss": 1.9082, "step": 1990 }, { "epoch": 0.11097486204782342, "grad_norm": 0.5514358878135681, "learning_rate": 9.774992990668479e-05, "loss": 1.8572, "step": 1991 }, { "epoch": 0.11103060030098656, "grad_norm": 0.5773457884788513, "learning_rate": 9.774728806120945e-05, "loss": 1.9287, "step": 1992 }, { "epoch": 0.11108633855414972, "grad_norm": 0.5018163323402405, "learning_rate": 9.774464470146851e-05, "loss": 1.6721, "step": 1993 }, { "epoch": 0.11114207680731286, "grad_norm": 0.5004386305809021, "learning_rate": 9.774199982754584e-05, "loss": 1.6999, "step": 1994 }, { "epoch": 0.111197815060476, "grad_norm": 0.5078005194664001, "learning_rate": 9.773935343952527e-05, "loss": 1.6968, "step": 1995 }, { "epoch": 0.11125355331363915, "grad_norm": 0.5355806946754456, "learning_rate": 9.773670553749075e-05, "loss": 1.8122, "step": 1996 }, { "epoch": 0.11130929156680229, "grad_norm": 0.5051989555358887, "learning_rate": 9.773405612152626e-05, "loss": 1.6712, "step": 1997 }, { "epoch": 0.11136502981996545, "grad_norm": 0.5549625754356384, "learning_rate": 9.773140519171582e-05, "loss": 1.8872, "step": 1998 }, { "epoch": 0.11142076807312859, "grad_norm": 0.5879496335983276, "learning_rate": 9.77287527481435e-05, "loss": 1.7659, "step": 1999 }, { "epoch": 0.11147650632629173, "grad_norm": 0.6350980401039124, "learning_rate": 9.772609879089341e-05, "loss": 1.9805, "step": 2000 }, { "epoch": 0.11153224457945488, "grad_norm": 0.5255335569381714, "learning_rate": 9.772344332004975e-05, "loss": 1.7215, "step": 2001 }, { "epoch": 0.11158798283261803, "grad_norm": 0.5538710355758667, "learning_rate": 9.77207863356967e-05, "loss": 1.8071, "step": 2002 }, { "epoch": 0.11164372108578118, "grad_norm": 0.5447118878364563, "learning_rate": 9.771812783791854e-05, "loss": 1.6401, "step": 2003 }, { "epoch": 0.11169945933894432, "grad_norm": 0.5420034527778625, "learning_rate": 9.771546782679959e-05, "loss": 1.7649, "step": 2004 }, { "epoch": 0.11175519759210746, "grad_norm": 0.5717622637748718, "learning_rate": 9.771280630242419e-05, "loss": 1.9245, "step": 2005 }, { "epoch": 0.1118109358452706, "grad_norm": 0.533752977848053, "learning_rate": 9.771014326487675e-05, "loss": 1.6562, "step": 2006 }, { "epoch": 0.11186667409843376, "grad_norm": 0.5668651461601257, "learning_rate": 9.770747871424175e-05, "loss": 1.8504, "step": 2007 }, { "epoch": 0.1119224123515969, "grad_norm": 0.5300382375717163, "learning_rate": 9.770481265060368e-05, "loss": 1.5858, "step": 2008 }, { "epoch": 0.11197815060476005, "grad_norm": 0.5205538272857666, "learning_rate": 9.770214507404709e-05, "loss": 1.8421, "step": 2009 }, { "epoch": 0.11203388885792319, "grad_norm": 0.5641254186630249, "learning_rate": 9.769947598465657e-05, "loss": 1.7521, "step": 2010 }, { "epoch": 0.11208962711108633, "grad_norm": 0.5881509184837341, "learning_rate": 9.76968053825168e-05, "loss": 1.8359, "step": 2011 }, { "epoch": 0.11214536536424949, "grad_norm": 0.5264688730239868, "learning_rate": 9.769413326771243e-05, "loss": 1.7792, "step": 2012 }, { "epoch": 0.11220110361741263, "grad_norm": 0.5596029758453369, "learning_rate": 9.769145964032824e-05, "loss": 1.8502, "step": 2013 }, { "epoch": 0.11225684187057577, "grad_norm": 0.5555474758148193, "learning_rate": 9.768878450044902e-05, "loss": 1.9158, "step": 2014 }, { "epoch": 0.11231258012373892, "grad_norm": 0.5508490800857544, "learning_rate": 9.768610784815959e-05, "loss": 1.5545, "step": 2015 }, { "epoch": 0.11236831837690207, "grad_norm": 0.5072826743125916, "learning_rate": 9.768342968354484e-05, "loss": 1.6679, "step": 2016 }, { "epoch": 0.11242405663006522, "grad_norm": 0.4995681941509247, "learning_rate": 9.768075000668974e-05, "loss": 1.7114, "step": 2017 }, { "epoch": 0.11247979488322836, "grad_norm": 0.5590416789054871, "learning_rate": 9.767806881767923e-05, "loss": 1.8553, "step": 2018 }, { "epoch": 0.1125355331363915, "grad_norm": 0.542676568031311, "learning_rate": 9.767538611659837e-05, "loss": 1.9799, "step": 2019 }, { "epoch": 0.11259127138955465, "grad_norm": 0.6015095710754395, "learning_rate": 9.767270190353221e-05, "loss": 2.0631, "step": 2020 }, { "epoch": 0.1126470096427178, "grad_norm": 0.5182809829711914, "learning_rate": 9.767001617856591e-05, "loss": 1.8081, "step": 2021 }, { "epoch": 0.11270274789588094, "grad_norm": 0.539851725101471, "learning_rate": 9.766732894178463e-05, "loss": 1.6224, "step": 2022 }, { "epoch": 0.11275848614904409, "grad_norm": 0.5738646388053894, "learning_rate": 9.766464019327359e-05, "loss": 1.8425, "step": 2023 }, { "epoch": 0.11281422440220723, "grad_norm": 0.5035516619682312, "learning_rate": 9.766194993311809e-05, "loss": 1.8101, "step": 2024 }, { "epoch": 0.11286996265537039, "grad_norm": 0.4765785038471222, "learning_rate": 9.76592581614034e-05, "loss": 1.7461, "step": 2025 }, { "epoch": 0.11292570090853353, "grad_norm": 0.5692024230957031, "learning_rate": 9.765656487821492e-05, "loss": 1.9905, "step": 2026 }, { "epoch": 0.11298143916169667, "grad_norm": 0.5034509301185608, "learning_rate": 9.765387008363807e-05, "loss": 1.7689, "step": 2027 }, { "epoch": 0.11303717741485982, "grad_norm": 0.5591553449630737, "learning_rate": 9.76511737777583e-05, "loss": 1.7994, "step": 2028 }, { "epoch": 0.11309291566802296, "grad_norm": 0.533530592918396, "learning_rate": 9.764847596066111e-05, "loss": 1.5192, "step": 2029 }, { "epoch": 0.11314865392118612, "grad_norm": 0.5049347281455994, "learning_rate": 9.764577663243209e-05, "loss": 1.5906, "step": 2030 }, { "epoch": 0.11320439217434926, "grad_norm": 0.4710226058959961, "learning_rate": 9.764307579315681e-05, "loss": 1.4503, "step": 2031 }, { "epoch": 0.1132601304275124, "grad_norm": 0.5490729212760925, "learning_rate": 9.764037344292096e-05, "loss": 1.7865, "step": 2032 }, { "epoch": 0.11331586868067554, "grad_norm": 0.5714886784553528, "learning_rate": 9.763766958181022e-05, "loss": 1.6803, "step": 2033 }, { "epoch": 0.11337160693383869, "grad_norm": 0.5637816786766052, "learning_rate": 9.763496420991037e-05, "loss": 1.902, "step": 2034 }, { "epoch": 0.11342734518700184, "grad_norm": 0.5324851870536804, "learning_rate": 9.763225732730716e-05, "loss": 1.7774, "step": 2035 }, { "epoch": 0.11348308344016499, "grad_norm": 0.542209267616272, "learning_rate": 9.762954893408646e-05, "loss": 1.7369, "step": 2036 }, { "epoch": 0.11353882169332813, "grad_norm": 0.5353888273239136, "learning_rate": 9.762683903033419e-05, "loss": 1.914, "step": 2037 }, { "epoch": 0.11359455994649127, "grad_norm": 0.5152493119239807, "learning_rate": 9.762412761613624e-05, "loss": 1.8155, "step": 2038 }, { "epoch": 0.11365029819965443, "grad_norm": 0.4723453223705292, "learning_rate": 9.762141469157865e-05, "loss": 1.6183, "step": 2039 }, { "epoch": 0.11370603645281757, "grad_norm": 0.5671008229255676, "learning_rate": 9.761870025674743e-05, "loss": 1.887, "step": 2040 }, { "epoch": 0.11376177470598071, "grad_norm": 0.5240710377693176, "learning_rate": 9.761598431172868e-05, "loss": 1.7928, "step": 2041 }, { "epoch": 0.11381751295914386, "grad_norm": 0.4852540194988251, "learning_rate": 9.761326685660852e-05, "loss": 1.6132, "step": 2042 }, { "epoch": 0.113873251212307, "grad_norm": 0.46512627601623535, "learning_rate": 9.761054789147315e-05, "loss": 1.4053, "step": 2043 }, { "epoch": 0.11392898946547016, "grad_norm": 0.5127692222595215, "learning_rate": 9.760782741640879e-05, "loss": 1.649, "step": 2044 }, { "epoch": 0.1139847277186333, "grad_norm": 0.5368222594261169, "learning_rate": 9.76051054315017e-05, "loss": 1.7286, "step": 2045 }, { "epoch": 0.11404046597179644, "grad_norm": 0.5699864625930786, "learning_rate": 9.760238193683824e-05, "loss": 1.7911, "step": 2046 }, { "epoch": 0.11409620422495959, "grad_norm": 0.59310382604599, "learning_rate": 9.759965693250477e-05, "loss": 1.7731, "step": 2047 }, { "epoch": 0.11415194247812274, "grad_norm": 0.5524492859840393, "learning_rate": 9.75969304185877e-05, "loss": 1.7917, "step": 2048 }, { "epoch": 0.11420768073128588, "grad_norm": 0.529346227645874, "learning_rate": 9.75942023951735e-05, "loss": 1.7298, "step": 2049 }, { "epoch": 0.11426341898444903, "grad_norm": 0.5188475847244263, "learning_rate": 9.75914728623487e-05, "loss": 1.8422, "step": 2050 }, { "epoch": 0.11431915723761217, "grad_norm": 0.5141621232032776, "learning_rate": 9.758874182019986e-05, "loss": 1.7194, "step": 2051 }, { "epoch": 0.11437489549077531, "grad_norm": 0.5103389024734497, "learning_rate": 9.758600926881358e-05, "loss": 1.782, "step": 2052 }, { "epoch": 0.11443063374393847, "grad_norm": 0.5371511578559875, "learning_rate": 9.758327520827654e-05, "loss": 1.8925, "step": 2053 }, { "epoch": 0.11448637199710161, "grad_norm": 0.528293788433075, "learning_rate": 9.758053963867544e-05, "loss": 1.5632, "step": 2054 }, { "epoch": 0.11454211025026476, "grad_norm": 0.5670381784439087, "learning_rate": 9.757780256009704e-05, "loss": 2.0612, "step": 2055 }, { "epoch": 0.1145978485034279, "grad_norm": 0.4997304677963257, "learning_rate": 9.757506397262814e-05, "loss": 1.4963, "step": 2056 }, { "epoch": 0.11465358675659104, "grad_norm": 0.5154783129692078, "learning_rate": 9.757232387635559e-05, "loss": 1.7024, "step": 2057 }, { "epoch": 0.1147093250097542, "grad_norm": 0.5076404213905334, "learning_rate": 9.75695822713663e-05, "loss": 1.7356, "step": 2058 }, { "epoch": 0.11476506326291734, "grad_norm": 0.5490261912345886, "learning_rate": 9.75668391577472e-05, "loss": 1.9454, "step": 2059 }, { "epoch": 0.11482080151608048, "grad_norm": 0.49244236946105957, "learning_rate": 9.756409453558531e-05, "loss": 1.7741, "step": 2060 }, { "epoch": 0.11487653976924363, "grad_norm": 0.5007554292678833, "learning_rate": 9.756134840496763e-05, "loss": 1.6877, "step": 2061 }, { "epoch": 0.11493227802240678, "grad_norm": 0.5688347816467285, "learning_rate": 9.75586007659813e-05, "loss": 1.8947, "step": 2062 }, { "epoch": 0.11498801627556993, "grad_norm": 0.49076688289642334, "learning_rate": 9.755585161871344e-05, "loss": 1.632, "step": 2063 }, { "epoch": 0.11504375452873307, "grad_norm": 0.5263219475746155, "learning_rate": 9.755310096325123e-05, "loss": 1.8176, "step": 2064 }, { "epoch": 0.11509949278189621, "grad_norm": 0.5379471778869629, "learning_rate": 9.755034879968193e-05, "loss": 1.9844, "step": 2065 }, { "epoch": 0.11515523103505935, "grad_norm": 0.6128638982772827, "learning_rate": 9.754759512809277e-05, "loss": 2.0891, "step": 2066 }, { "epoch": 0.11521096928822251, "grad_norm": 0.513877272605896, "learning_rate": 9.754483994857115e-05, "loss": 1.7906, "step": 2067 }, { "epoch": 0.11526670754138565, "grad_norm": 0.5699423551559448, "learning_rate": 9.75420832612044e-05, "loss": 1.9245, "step": 2068 }, { "epoch": 0.1153224457945488, "grad_norm": 0.49974846839904785, "learning_rate": 9.753932506607995e-05, "loss": 1.5529, "step": 2069 }, { "epoch": 0.11537818404771194, "grad_norm": 0.5551686882972717, "learning_rate": 9.753656536328528e-05, "loss": 1.7138, "step": 2070 }, { "epoch": 0.1154339223008751, "grad_norm": 0.5302468538284302, "learning_rate": 9.753380415290792e-05, "loss": 1.7991, "step": 2071 }, { "epoch": 0.11548966055403824, "grad_norm": 0.5461943745613098, "learning_rate": 9.753104143503544e-05, "loss": 1.6249, "step": 2072 }, { "epoch": 0.11554539880720138, "grad_norm": 0.5242646336555481, "learning_rate": 9.752827720975544e-05, "loss": 1.7194, "step": 2073 }, { "epoch": 0.11560113706036453, "grad_norm": 0.5647328495979309, "learning_rate": 9.75255114771556e-05, "loss": 1.6221, "step": 2074 }, { "epoch": 0.11565687531352767, "grad_norm": 0.5108300447463989, "learning_rate": 9.752274423732364e-05, "loss": 1.5454, "step": 2075 }, { "epoch": 0.11571261356669083, "grad_norm": 0.5370137691497803, "learning_rate": 9.75199754903473e-05, "loss": 1.8162, "step": 2076 }, { "epoch": 0.11576835181985397, "grad_norm": 0.5308608412742615, "learning_rate": 9.75172052363144e-05, "loss": 1.8913, "step": 2077 }, { "epoch": 0.11582409007301711, "grad_norm": 0.5060725808143616, "learning_rate": 9.751443347531279e-05, "loss": 1.6392, "step": 2078 }, { "epoch": 0.11587982832618025, "grad_norm": 0.5402329564094543, "learning_rate": 9.751166020743037e-05, "loss": 1.6481, "step": 2079 }, { "epoch": 0.1159355665793434, "grad_norm": 0.5728126168251038, "learning_rate": 9.750888543275511e-05, "loss": 1.7507, "step": 2080 }, { "epoch": 0.11599130483250655, "grad_norm": 0.5055838227272034, "learning_rate": 9.750610915137502e-05, "loss": 1.7667, "step": 2081 }, { "epoch": 0.1160470430856697, "grad_norm": 0.5178690552711487, "learning_rate": 9.750333136337811e-05, "loss": 1.7303, "step": 2082 }, { "epoch": 0.11610278133883284, "grad_norm": 0.5922085642814636, "learning_rate": 9.750055206885249e-05, "loss": 1.9936, "step": 2083 }, { "epoch": 0.11615851959199598, "grad_norm": 0.5285540223121643, "learning_rate": 9.74977712678863e-05, "loss": 1.8642, "step": 2084 }, { "epoch": 0.11621425784515914, "grad_norm": 0.5517610907554626, "learning_rate": 9.749498896056775e-05, "loss": 1.8, "step": 2085 }, { "epoch": 0.11626999609832228, "grad_norm": 0.519136905670166, "learning_rate": 9.749220514698505e-05, "loss": 1.8553, "step": 2086 }, { "epoch": 0.11632573435148542, "grad_norm": 0.47392770648002625, "learning_rate": 9.748941982722652e-05, "loss": 1.5635, "step": 2087 }, { "epoch": 0.11638147260464857, "grad_norm": 0.5580193400382996, "learning_rate": 9.748663300138046e-05, "loss": 2.0887, "step": 2088 }, { "epoch": 0.11643721085781171, "grad_norm": 0.5110911726951599, "learning_rate": 9.748384466953529e-05, "loss": 1.7254, "step": 2089 }, { "epoch": 0.11649294911097487, "grad_norm": 0.5411677360534668, "learning_rate": 9.748105483177939e-05, "loss": 2.0895, "step": 2090 }, { "epoch": 0.11654868736413801, "grad_norm": 0.5149423480033875, "learning_rate": 9.747826348820129e-05, "loss": 1.6339, "step": 2091 }, { "epoch": 0.11660442561730115, "grad_norm": 0.48806729912757874, "learning_rate": 9.747547063888947e-05, "loss": 1.8714, "step": 2092 }, { "epoch": 0.1166601638704643, "grad_norm": 0.5147302746772766, "learning_rate": 9.747267628393252e-05, "loss": 1.8269, "step": 2093 }, { "epoch": 0.11671590212362745, "grad_norm": 0.512217104434967, "learning_rate": 9.746988042341906e-05, "loss": 1.7604, "step": 2094 }, { "epoch": 0.1167716403767906, "grad_norm": 0.66917484998703, "learning_rate": 9.746708305743778e-05, "loss": 2.2348, "step": 2095 }, { "epoch": 0.11682737862995374, "grad_norm": 0.5376080870628357, "learning_rate": 9.746428418607737e-05, "loss": 1.811, "step": 2096 }, { "epoch": 0.11688311688311688, "grad_norm": 0.5490595102310181, "learning_rate": 9.746148380942661e-05, "loss": 1.7822, "step": 2097 }, { "epoch": 0.11693885513628002, "grad_norm": 0.5195513367652893, "learning_rate": 9.745868192757429e-05, "loss": 1.815, "step": 2098 }, { "epoch": 0.11699459338944318, "grad_norm": 0.4978055953979492, "learning_rate": 9.745587854060929e-05, "loss": 1.6799, "step": 2099 }, { "epoch": 0.11705033164260632, "grad_norm": 0.47539737820625305, "learning_rate": 9.74530736486205e-05, "loss": 1.3444, "step": 2100 }, { "epoch": 0.11710606989576947, "grad_norm": 0.49834421277046204, "learning_rate": 9.74502672516969e-05, "loss": 1.6343, "step": 2101 }, { "epoch": 0.11716180814893261, "grad_norm": 0.5414234399795532, "learning_rate": 9.744745934992747e-05, "loss": 1.8732, "step": 2102 }, { "epoch": 0.11721754640209577, "grad_norm": 0.55171799659729, "learning_rate": 9.744464994340126e-05, "loss": 1.823, "step": 2103 }, { "epoch": 0.11727328465525891, "grad_norm": 0.545732319355011, "learning_rate": 9.744183903220738e-05, "loss": 1.6152, "step": 2104 }, { "epoch": 0.11732902290842205, "grad_norm": 0.5116435885429382, "learning_rate": 9.743902661643498e-05, "loss": 1.8159, "step": 2105 }, { "epoch": 0.1173847611615852, "grad_norm": 0.5736915469169617, "learning_rate": 9.743621269617324e-05, "loss": 2.0891, "step": 2106 }, { "epoch": 0.11744049941474834, "grad_norm": 0.5401880741119385, "learning_rate": 9.74333972715114e-05, "loss": 1.6851, "step": 2107 }, { "epoch": 0.1174962376679115, "grad_norm": 0.4980708658695221, "learning_rate": 9.743058034253876e-05, "loss": 1.7487, "step": 2108 }, { "epoch": 0.11755197592107464, "grad_norm": 0.5513383150100708, "learning_rate": 9.742776190934464e-05, "loss": 1.7077, "step": 2109 }, { "epoch": 0.11760771417423778, "grad_norm": 0.48612821102142334, "learning_rate": 9.742494197201845e-05, "loss": 1.7193, "step": 2110 }, { "epoch": 0.11766345242740092, "grad_norm": 0.5319970846176147, "learning_rate": 9.742212053064959e-05, "loss": 1.8341, "step": 2111 }, { "epoch": 0.11771919068056406, "grad_norm": 0.5188704133033752, "learning_rate": 9.741929758532758e-05, "loss": 1.7452, "step": 2112 }, { "epoch": 0.11777492893372722, "grad_norm": 0.569303035736084, "learning_rate": 9.741647313614191e-05, "loss": 1.7242, "step": 2113 }, { "epoch": 0.11783066718689036, "grad_norm": 0.5230869650840759, "learning_rate": 9.741364718318216e-05, "loss": 1.7484, "step": 2114 }, { "epoch": 0.11788640544005351, "grad_norm": 0.5458916425704956, "learning_rate": 9.741081972653798e-05, "loss": 1.8975, "step": 2115 }, { "epoch": 0.11794214369321665, "grad_norm": 0.5454350113868713, "learning_rate": 9.740799076629902e-05, "loss": 1.7848, "step": 2116 }, { "epoch": 0.1179978819463798, "grad_norm": 0.5229981541633606, "learning_rate": 9.7405160302555e-05, "loss": 1.7087, "step": 2117 }, { "epoch": 0.11805362019954295, "grad_norm": 0.5540334582328796, "learning_rate": 9.740232833539567e-05, "loss": 1.712, "step": 2118 }, { "epoch": 0.11810935845270609, "grad_norm": 0.5371966361999512, "learning_rate": 9.739949486491088e-05, "loss": 1.6682, "step": 2119 }, { "epoch": 0.11816509670586924, "grad_norm": 0.5578680038452148, "learning_rate": 9.739665989119047e-05, "loss": 1.7035, "step": 2120 }, { "epoch": 0.11822083495903238, "grad_norm": 0.49404215812683105, "learning_rate": 9.739382341432434e-05, "loss": 1.6535, "step": 2121 }, { "epoch": 0.11827657321219553, "grad_norm": 0.5198866724967957, "learning_rate": 9.739098543440246e-05, "loss": 1.9483, "step": 2122 }, { "epoch": 0.11833231146535868, "grad_norm": 0.5561308860778809, "learning_rate": 9.738814595151481e-05, "loss": 1.6287, "step": 2123 }, { "epoch": 0.11838804971852182, "grad_norm": 0.5929575562477112, "learning_rate": 9.73853049657515e-05, "loss": 1.8991, "step": 2124 }, { "epoch": 0.11844378797168496, "grad_norm": 0.5198292136192322, "learning_rate": 9.738246247720257e-05, "loss": 1.7004, "step": 2125 }, { "epoch": 0.11849952622484812, "grad_norm": 0.4800911247730255, "learning_rate": 9.73796184859582e-05, "loss": 1.8126, "step": 2126 }, { "epoch": 0.11855526447801126, "grad_norm": 0.5122108459472656, "learning_rate": 9.737677299210857e-05, "loss": 1.6761, "step": 2127 }, { "epoch": 0.1186110027311744, "grad_norm": 0.5015464425086975, "learning_rate": 9.737392599574391e-05, "loss": 1.6405, "step": 2128 }, { "epoch": 0.11866674098433755, "grad_norm": 0.560658872127533, "learning_rate": 9.737107749695456e-05, "loss": 1.8458, "step": 2129 }, { "epoch": 0.11872247923750069, "grad_norm": 0.5312667489051819, "learning_rate": 9.73682274958308e-05, "loss": 1.9419, "step": 2130 }, { "epoch": 0.11877821749066385, "grad_norm": 0.5537664294242859, "learning_rate": 9.736537599246305e-05, "loss": 2.0495, "step": 2131 }, { "epoch": 0.11883395574382699, "grad_norm": 0.5166563391685486, "learning_rate": 9.736252298694172e-05, "loss": 1.7997, "step": 2132 }, { "epoch": 0.11888969399699013, "grad_norm": 0.5567119121551514, "learning_rate": 9.735966847935732e-05, "loss": 2.0086, "step": 2133 }, { "epoch": 0.11894543225015328, "grad_norm": 0.5614973306655884, "learning_rate": 9.735681246980035e-05, "loss": 1.8669, "step": 2134 }, { "epoch": 0.11900117050331642, "grad_norm": 0.4755729138851166, "learning_rate": 9.73539549583614e-05, "loss": 1.4678, "step": 2135 }, { "epoch": 0.11905690875647958, "grad_norm": 0.5338446497917175, "learning_rate": 9.73510959451311e-05, "loss": 1.758, "step": 2136 }, { "epoch": 0.11911264700964272, "grad_norm": 0.5301800966262817, "learning_rate": 9.734823543020009e-05, "loss": 1.6377, "step": 2137 }, { "epoch": 0.11916838526280586, "grad_norm": 0.5584478378295898, "learning_rate": 9.734537341365914e-05, "loss": 1.8973, "step": 2138 }, { "epoch": 0.119224123515969, "grad_norm": 0.5499609112739563, "learning_rate": 9.734250989559896e-05, "loss": 1.8316, "step": 2139 }, { "epoch": 0.11927986176913216, "grad_norm": 0.5567249655723572, "learning_rate": 9.733964487611042e-05, "loss": 1.9231, "step": 2140 }, { "epoch": 0.1193356000222953, "grad_norm": 0.5121795535087585, "learning_rate": 9.733677835528434e-05, "loss": 1.7316, "step": 2141 }, { "epoch": 0.11939133827545845, "grad_norm": 0.5235653519630432, "learning_rate": 9.733391033321164e-05, "loss": 1.7328, "step": 2142 }, { "epoch": 0.11944707652862159, "grad_norm": 0.5482314229011536, "learning_rate": 9.733104080998329e-05, "loss": 1.9832, "step": 2143 }, { "epoch": 0.11950281478178473, "grad_norm": 0.4945628345012665, "learning_rate": 9.732816978569028e-05, "loss": 1.6102, "step": 2144 }, { "epoch": 0.11955855303494789, "grad_norm": 0.532642126083374, "learning_rate": 9.732529726042365e-05, "loss": 1.6543, "step": 2145 }, { "epoch": 0.11961429128811103, "grad_norm": 0.5531574487686157, "learning_rate": 9.732242323427455e-05, "loss": 1.8017, "step": 2146 }, { "epoch": 0.11967002954127418, "grad_norm": 0.595876932144165, "learning_rate": 9.731954770733407e-05, "loss": 2.0041, "step": 2147 }, { "epoch": 0.11972576779443732, "grad_norm": 0.5025404095649719, "learning_rate": 9.731667067969344e-05, "loss": 1.716, "step": 2148 }, { "epoch": 0.11978150604760048, "grad_norm": 0.5070561766624451, "learning_rate": 9.731379215144388e-05, "loss": 1.8201, "step": 2149 }, { "epoch": 0.11983724430076362, "grad_norm": 0.5182836651802063, "learning_rate": 9.73109121226767e-05, "loss": 1.51, "step": 2150 }, { "epoch": 0.11989298255392676, "grad_norm": 0.5657908320426941, "learning_rate": 9.730803059348323e-05, "loss": 2.0817, "step": 2151 }, { "epoch": 0.1199487208070899, "grad_norm": 0.5556692481040955, "learning_rate": 9.730514756395485e-05, "loss": 1.854, "step": 2152 }, { "epoch": 0.12000445906025305, "grad_norm": 0.4503386616706848, "learning_rate": 9.7302263034183e-05, "loss": 1.4719, "step": 2153 }, { "epoch": 0.1200601973134162, "grad_norm": 0.5425733327865601, "learning_rate": 9.729937700425916e-05, "loss": 1.8686, "step": 2154 }, { "epoch": 0.12011593556657935, "grad_norm": 0.5144285559654236, "learning_rate": 9.729648947427484e-05, "loss": 1.8232, "step": 2155 }, { "epoch": 0.12017167381974249, "grad_norm": 0.5346119999885559, "learning_rate": 9.729360044432166e-05, "loss": 1.7735, "step": 2156 }, { "epoch": 0.12022741207290563, "grad_norm": 0.5558546185493469, "learning_rate": 9.729070991449119e-05, "loss": 1.9485, "step": 2157 }, { "epoch": 0.12028315032606877, "grad_norm": 0.495919406414032, "learning_rate": 9.728781788487513e-05, "loss": 1.6713, "step": 2158 }, { "epoch": 0.12033888857923193, "grad_norm": 0.5348759889602661, "learning_rate": 9.72849243555652e-05, "loss": 1.6913, "step": 2159 }, { "epoch": 0.12039462683239507, "grad_norm": 0.5228710174560547, "learning_rate": 9.728202932665316e-05, "loss": 1.6557, "step": 2160 }, { "epoch": 0.12045036508555822, "grad_norm": 0.49766623973846436, "learning_rate": 9.727913279823081e-05, "loss": 1.6087, "step": 2161 }, { "epoch": 0.12050610333872136, "grad_norm": 0.5042500495910645, "learning_rate": 9.727623477039005e-05, "loss": 1.8017, "step": 2162 }, { "epoch": 0.12056184159188452, "grad_norm": 0.5221708416938782, "learning_rate": 9.727333524322274e-05, "loss": 1.7577, "step": 2163 }, { "epoch": 0.12061757984504766, "grad_norm": 0.5310743451118469, "learning_rate": 9.727043421682087e-05, "loss": 1.7025, "step": 2164 }, { "epoch": 0.1206733180982108, "grad_norm": 0.5771050453186035, "learning_rate": 9.726753169127643e-05, "loss": 1.8185, "step": 2165 }, { "epoch": 0.12072905635137395, "grad_norm": 0.4827874004840851, "learning_rate": 9.726462766668147e-05, "loss": 1.5869, "step": 2166 }, { "epoch": 0.12078479460453709, "grad_norm": 0.5001873970031738, "learning_rate": 9.72617221431281e-05, "loss": 1.6207, "step": 2167 }, { "epoch": 0.12084053285770024, "grad_norm": 0.47895923256874084, "learning_rate": 9.725881512070845e-05, "loss": 1.5611, "step": 2168 }, { "epoch": 0.12089627111086339, "grad_norm": 0.5227773785591125, "learning_rate": 9.725590659951473e-05, "loss": 1.7524, "step": 2169 }, { "epoch": 0.12095200936402653, "grad_norm": 0.5513851046562195, "learning_rate": 9.725299657963916e-05, "loss": 1.9093, "step": 2170 }, { "epoch": 0.12100774761718967, "grad_norm": 0.5206924080848694, "learning_rate": 9.725008506117405e-05, "loss": 1.6196, "step": 2171 }, { "epoch": 0.12106348587035283, "grad_norm": 0.5124804377555847, "learning_rate": 9.724717204421175e-05, "loss": 1.5592, "step": 2172 }, { "epoch": 0.12111922412351597, "grad_norm": 0.49579185247421265, "learning_rate": 9.724425752884458e-05, "loss": 1.7796, "step": 2173 }, { "epoch": 0.12117496237667912, "grad_norm": 0.4806743562221527, "learning_rate": 9.724134151516504e-05, "loss": 1.5684, "step": 2174 }, { "epoch": 0.12123070062984226, "grad_norm": 0.5735479593276978, "learning_rate": 9.72384240032656e-05, "loss": 1.9183, "step": 2175 }, { "epoch": 0.1212864388830054, "grad_norm": 0.49125027656555176, "learning_rate": 9.723550499323874e-05, "loss": 1.5609, "step": 2176 }, { "epoch": 0.12134217713616856, "grad_norm": 0.5535476207733154, "learning_rate": 9.723258448517707e-05, "loss": 1.8593, "step": 2177 }, { "epoch": 0.1213979153893317, "grad_norm": 0.5923840403556824, "learning_rate": 9.722966247917322e-05, "loss": 1.8673, "step": 2178 }, { "epoch": 0.12145365364249484, "grad_norm": 0.5120698809623718, "learning_rate": 9.722673897531983e-05, "loss": 1.6219, "step": 2179 }, { "epoch": 0.12150939189565799, "grad_norm": 0.5636369585990906, "learning_rate": 9.722381397370963e-05, "loss": 1.9298, "step": 2180 }, { "epoch": 0.12156513014882113, "grad_norm": 0.5421077609062195, "learning_rate": 9.722088747443539e-05, "loss": 1.4028, "step": 2181 }, { "epoch": 0.12162086840198429, "grad_norm": 0.5058643817901611, "learning_rate": 9.721795947758991e-05, "loss": 1.6988, "step": 2182 }, { "epoch": 0.12167660665514743, "grad_norm": 0.5012438297271729, "learning_rate": 9.721502998326607e-05, "loss": 1.6624, "step": 2183 }, { "epoch": 0.12173234490831057, "grad_norm": 0.47187769412994385, "learning_rate": 9.721209899155675e-05, "loss": 1.5275, "step": 2184 }, { "epoch": 0.12178808316147371, "grad_norm": 0.525303065776825, "learning_rate": 9.720916650255492e-05, "loss": 1.7458, "step": 2185 }, { "epoch": 0.12184382141463687, "grad_norm": 0.586681604385376, "learning_rate": 9.720623251635357e-05, "loss": 1.7205, "step": 2186 }, { "epoch": 0.12189955966780001, "grad_norm": 0.5550994873046875, "learning_rate": 9.720329703304577e-05, "loss": 1.6508, "step": 2187 }, { "epoch": 0.12195529792096316, "grad_norm": 0.5518259406089783, "learning_rate": 9.720036005272459e-05, "loss": 1.7847, "step": 2188 }, { "epoch": 0.1220110361741263, "grad_norm": 0.4833231270313263, "learning_rate": 9.719742157548319e-05, "loss": 1.578, "step": 2189 }, { "epoch": 0.12206677442728944, "grad_norm": 0.5002262592315674, "learning_rate": 9.719448160141476e-05, "loss": 1.7526, "step": 2190 }, { "epoch": 0.1221225126804526, "grad_norm": 0.4701862335205078, "learning_rate": 9.719154013061253e-05, "loss": 1.369, "step": 2191 }, { "epoch": 0.12217825093361574, "grad_norm": 0.5255539417266846, "learning_rate": 9.71885971631698e-05, "loss": 1.9266, "step": 2192 }, { "epoch": 0.12223398918677889, "grad_norm": 0.5181805491447449, "learning_rate": 9.71856526991799e-05, "loss": 1.8049, "step": 2193 }, { "epoch": 0.12228972743994203, "grad_norm": 0.5119277834892273, "learning_rate": 9.71827067387362e-05, "loss": 1.6141, "step": 2194 }, { "epoch": 0.12234546569310518, "grad_norm": 0.46822264790534973, "learning_rate": 9.717975928193214e-05, "loss": 1.4462, "step": 2195 }, { "epoch": 0.12240120394626833, "grad_norm": 0.5520098209381104, "learning_rate": 9.717681032886119e-05, "loss": 1.7872, "step": 2196 }, { "epoch": 0.12245694219943147, "grad_norm": 0.5204572677612305, "learning_rate": 9.717385987961686e-05, "loss": 1.7539, "step": 2197 }, { "epoch": 0.12251268045259461, "grad_norm": 0.5343250036239624, "learning_rate": 9.717090793429276e-05, "loss": 1.8575, "step": 2198 }, { "epoch": 0.12256841870575776, "grad_norm": 0.521108865737915, "learning_rate": 9.716795449298248e-05, "loss": 1.9104, "step": 2199 }, { "epoch": 0.12262415695892091, "grad_norm": 0.49352675676345825, "learning_rate": 9.71649995557797e-05, "loss": 1.6201, "step": 2200 }, { "epoch": 0.12267989521208406, "grad_norm": 0.5716384649276733, "learning_rate": 9.716204312277812e-05, "loss": 1.928, "step": 2201 }, { "epoch": 0.1227356334652472, "grad_norm": 0.5332071781158447, "learning_rate": 9.715908519407149e-05, "loss": 1.6348, "step": 2202 }, { "epoch": 0.12279137171841034, "grad_norm": 0.5008523464202881, "learning_rate": 9.715612576975366e-05, "loss": 1.8211, "step": 2203 }, { "epoch": 0.12284710997157348, "grad_norm": 0.5112088322639465, "learning_rate": 9.715316484991845e-05, "loss": 1.8334, "step": 2204 }, { "epoch": 0.12290284822473664, "grad_norm": 0.5519534349441528, "learning_rate": 9.715020243465976e-05, "loss": 1.8001, "step": 2205 }, { "epoch": 0.12295858647789978, "grad_norm": 0.46493321657180786, "learning_rate": 9.714723852407157e-05, "loss": 1.4173, "step": 2206 }, { "epoch": 0.12301432473106293, "grad_norm": 0.5702951550483704, "learning_rate": 9.714427311824786e-05, "loss": 1.7186, "step": 2207 }, { "epoch": 0.12307006298422607, "grad_norm": 0.5255847573280334, "learning_rate": 9.714130621728266e-05, "loss": 1.6884, "step": 2208 }, { "epoch": 0.12312580123738923, "grad_norm": 0.581146776676178, "learning_rate": 9.713833782127008e-05, "loss": 1.8707, "step": 2209 }, { "epoch": 0.12318153949055237, "grad_norm": 0.5044531226158142, "learning_rate": 9.713536793030429e-05, "loss": 1.555, "step": 2210 }, { "epoch": 0.12323727774371551, "grad_norm": 0.543787956237793, "learning_rate": 9.713239654447943e-05, "loss": 1.8188, "step": 2211 }, { "epoch": 0.12329301599687865, "grad_norm": 0.6438772678375244, "learning_rate": 9.712942366388975e-05, "loss": 1.8096, "step": 2212 }, { "epoch": 0.1233487542500418, "grad_norm": 0.5758397579193115, "learning_rate": 9.712644928862953e-05, "loss": 1.8329, "step": 2213 }, { "epoch": 0.12340449250320495, "grad_norm": 0.5573188066482544, "learning_rate": 9.712347341879311e-05, "loss": 1.8994, "step": 2214 }, { "epoch": 0.1234602307563681, "grad_norm": 0.5477108359336853, "learning_rate": 9.712049605447486e-05, "loss": 1.8856, "step": 2215 }, { "epoch": 0.12351596900953124, "grad_norm": 0.5133275985717773, "learning_rate": 9.711751719576922e-05, "loss": 1.7319, "step": 2216 }, { "epoch": 0.12357170726269438, "grad_norm": 0.5406665802001953, "learning_rate": 9.711453684277063e-05, "loss": 1.9889, "step": 2217 }, { "epoch": 0.12362744551585754, "grad_norm": 0.48421719670295715, "learning_rate": 9.711155499557364e-05, "loss": 1.5177, "step": 2218 }, { "epoch": 0.12368318376902068, "grad_norm": 0.5295604467391968, "learning_rate": 9.710857165427281e-05, "loss": 1.5376, "step": 2219 }, { "epoch": 0.12373892202218383, "grad_norm": 0.5241243243217468, "learning_rate": 9.710558681896274e-05, "loss": 1.7389, "step": 2220 }, { "epoch": 0.12379466027534697, "grad_norm": 0.48620593547821045, "learning_rate": 9.71026004897381e-05, "loss": 1.7281, "step": 2221 }, { "epoch": 0.12385039852851011, "grad_norm": 0.5162755846977234, "learning_rate": 9.70996126666936e-05, "loss": 1.6421, "step": 2222 }, { "epoch": 0.12390613678167327, "grad_norm": 0.5603106021881104, "learning_rate": 9.7096623349924e-05, "loss": 2.0405, "step": 2223 }, { "epoch": 0.12396187503483641, "grad_norm": 0.5636157393455505, "learning_rate": 9.70936325395241e-05, "loss": 1.7629, "step": 2224 }, { "epoch": 0.12401761328799955, "grad_norm": 0.5287961363792419, "learning_rate": 9.709064023558874e-05, "loss": 1.7357, "step": 2225 }, { "epoch": 0.1240733515411627, "grad_norm": 0.5584306120872498, "learning_rate": 9.708764643821284e-05, "loss": 1.905, "step": 2226 }, { "epoch": 0.12412908979432584, "grad_norm": 0.5021309852600098, "learning_rate": 9.708465114749132e-05, "loss": 1.7439, "step": 2227 }, { "epoch": 0.124184828047489, "grad_norm": 0.5482348799705505, "learning_rate": 9.708165436351921e-05, "loss": 1.7851, "step": 2228 }, { "epoch": 0.12424056630065214, "grad_norm": 0.498470276594162, "learning_rate": 9.707865608639152e-05, "loss": 1.494, "step": 2229 }, { "epoch": 0.12429630455381528, "grad_norm": 0.5526018142700195, "learning_rate": 9.707565631620334e-05, "loss": 1.973, "step": 2230 }, { "epoch": 0.12435204280697842, "grad_norm": 0.5773054957389832, "learning_rate": 9.707265505304982e-05, "loss": 1.9693, "step": 2231 }, { "epoch": 0.12440778106014158, "grad_norm": 0.5307757258415222, "learning_rate": 9.706965229702614e-05, "loss": 1.8978, "step": 2232 }, { "epoch": 0.12446351931330472, "grad_norm": 0.5740475654602051, "learning_rate": 9.70666480482275e-05, "loss": 2.0298, "step": 2233 }, { "epoch": 0.12451925756646787, "grad_norm": 0.5156608819961548, "learning_rate": 9.706364230674923e-05, "loss": 1.5383, "step": 2234 }, { "epoch": 0.12457499581963101, "grad_norm": 0.4921102225780487, "learning_rate": 9.706063507268661e-05, "loss": 1.6472, "step": 2235 }, { "epoch": 0.12463073407279415, "grad_norm": 0.5701449513435364, "learning_rate": 9.705762634613502e-05, "loss": 1.7692, "step": 2236 }, { "epoch": 0.12468647232595731, "grad_norm": 0.49713411927223206, "learning_rate": 9.705461612718991e-05, "loss": 1.5998, "step": 2237 }, { "epoch": 0.12474221057912045, "grad_norm": 0.5252828598022461, "learning_rate": 9.705160441594671e-05, "loss": 1.6545, "step": 2238 }, { "epoch": 0.1247979488322836, "grad_norm": 0.543063759803772, "learning_rate": 9.704859121250095e-05, "loss": 1.8984, "step": 2239 }, { "epoch": 0.12485368708544674, "grad_norm": 0.5450255274772644, "learning_rate": 9.704557651694818e-05, "loss": 1.7794, "step": 2240 }, { "epoch": 0.1249094253386099, "grad_norm": 0.4936400353908539, "learning_rate": 9.704256032938403e-05, "loss": 1.4191, "step": 2241 }, { "epoch": 0.12496516359177304, "grad_norm": 0.5075535774230957, "learning_rate": 9.703954264990414e-05, "loss": 1.7634, "step": 2242 }, { "epoch": 0.12502090184493617, "grad_norm": 0.5337166786193848, "learning_rate": 9.703652347860422e-05, "loss": 1.9257, "step": 2243 }, { "epoch": 0.12507664009809932, "grad_norm": 0.5265361666679382, "learning_rate": 9.703350281558002e-05, "loss": 1.8102, "step": 2244 }, { "epoch": 0.12513237835126248, "grad_norm": 0.5706486701965332, "learning_rate": 9.703048066092733e-05, "loss": 2.1658, "step": 2245 }, { "epoch": 0.1251881166044256, "grad_norm": 0.5012516975402832, "learning_rate": 9.7027457014742e-05, "loss": 1.6586, "step": 2246 }, { "epoch": 0.12524385485758877, "grad_norm": 0.5617608428001404, "learning_rate": 9.702443187711992e-05, "loss": 1.7678, "step": 2247 }, { "epoch": 0.12529959311075192, "grad_norm": 0.5820160508155823, "learning_rate": 9.702140524815704e-05, "loss": 1.848, "step": 2248 }, { "epoch": 0.12535533136391505, "grad_norm": 0.5511069297790527, "learning_rate": 9.701837712794932e-05, "loss": 1.8369, "step": 2249 }, { "epoch": 0.1254110696170782, "grad_norm": 0.5301650166511536, "learning_rate": 9.701534751659283e-05, "loss": 1.8621, "step": 2250 }, { "epoch": 0.12546680787024134, "grad_norm": 0.519693911075592, "learning_rate": 9.701231641418363e-05, "loss": 1.7069, "step": 2251 }, { "epoch": 0.1255225461234045, "grad_norm": 0.5177733302116394, "learning_rate": 9.700928382081786e-05, "loss": 1.7311, "step": 2252 }, { "epoch": 0.12557828437656765, "grad_norm": 0.5452710390090942, "learning_rate": 9.700624973659169e-05, "loss": 1.6022, "step": 2253 }, { "epoch": 0.12563402262973078, "grad_norm": 0.49126002192497253, "learning_rate": 9.700321416160134e-05, "loss": 1.6004, "step": 2254 }, { "epoch": 0.12568976088289394, "grad_norm": 0.4859536290168762, "learning_rate": 9.70001770959431e-05, "loss": 1.627, "step": 2255 }, { "epoch": 0.12574549913605707, "grad_norm": 0.5808461308479309, "learning_rate": 9.699713853971324e-05, "loss": 1.9893, "step": 2256 }, { "epoch": 0.12580123738922022, "grad_norm": 0.5044426321983337, "learning_rate": 9.699409849300818e-05, "loss": 1.6269, "step": 2257 }, { "epoch": 0.12585697564238338, "grad_norm": 0.5458354353904724, "learning_rate": 9.69910569559243e-05, "loss": 1.6803, "step": 2258 }, { "epoch": 0.1259127138955465, "grad_norm": 0.5350721478462219, "learning_rate": 9.698801392855808e-05, "loss": 1.7217, "step": 2259 }, { "epoch": 0.12596845214870966, "grad_norm": 0.511223554611206, "learning_rate": 9.698496941100601e-05, "loss": 1.6904, "step": 2260 }, { "epoch": 0.1260241904018728, "grad_norm": 0.46969008445739746, "learning_rate": 9.698192340336468e-05, "loss": 1.5411, "step": 2261 }, { "epoch": 0.12607992865503595, "grad_norm": 0.5638684630393982, "learning_rate": 9.697887590573063e-05, "loss": 1.6144, "step": 2262 }, { "epoch": 0.1261356669081991, "grad_norm": 0.5146279335021973, "learning_rate": 9.697582691820054e-05, "loss": 1.605, "step": 2263 }, { "epoch": 0.12619140516136224, "grad_norm": 0.46321019530296326, "learning_rate": 9.697277644087113e-05, "loss": 1.0444, "step": 2264 }, { "epoch": 0.1262471434145254, "grad_norm": 0.5038657784461975, "learning_rate": 9.69697244738391e-05, "loss": 1.7319, "step": 2265 }, { "epoch": 0.12630288166768852, "grad_norm": 0.593559205532074, "learning_rate": 9.696667101720127e-05, "loss": 1.9173, "step": 2266 }, { "epoch": 0.12635861992085168, "grad_norm": 0.5412843227386475, "learning_rate": 9.696361607105448e-05, "loss": 1.6603, "step": 2267 }, { "epoch": 0.12641435817401483, "grad_norm": 0.5422548055648804, "learning_rate": 9.69605596354956e-05, "loss": 1.7048, "step": 2268 }, { "epoch": 0.12647009642717796, "grad_norm": 0.5455138087272644, "learning_rate": 9.695750171062156e-05, "loss": 1.669, "step": 2269 }, { "epoch": 0.12652583468034112, "grad_norm": 0.5468176007270813, "learning_rate": 9.695444229652935e-05, "loss": 1.6744, "step": 2270 }, { "epoch": 0.12658157293350428, "grad_norm": 0.49385011196136475, "learning_rate": 9.6951381393316e-05, "loss": 1.6182, "step": 2271 }, { "epoch": 0.1266373111866674, "grad_norm": 0.5301021933555603, "learning_rate": 9.694831900107857e-05, "loss": 1.7818, "step": 2272 }, { "epoch": 0.12669304943983056, "grad_norm": 0.6178646087646484, "learning_rate": 9.69452551199142e-05, "loss": 1.9646, "step": 2273 }, { "epoch": 0.1267487876929937, "grad_norm": 0.5421885848045349, "learning_rate": 9.694218974992005e-05, "loss": 1.6862, "step": 2274 }, { "epoch": 0.12680452594615685, "grad_norm": 0.5251665115356445, "learning_rate": 9.693912289119332e-05, "loss": 1.7259, "step": 2275 }, { "epoch": 0.12686026419932, "grad_norm": 0.5069818496704102, "learning_rate": 9.693605454383128e-05, "loss": 1.8426, "step": 2276 }, { "epoch": 0.12691600245248313, "grad_norm": 0.5525764226913452, "learning_rate": 9.693298470793126e-05, "loss": 1.9999, "step": 2277 }, { "epoch": 0.1269717407056463, "grad_norm": 0.5717039108276367, "learning_rate": 9.69299133835906e-05, "loss": 1.736, "step": 2278 }, { "epoch": 0.12702747895880942, "grad_norm": 0.4768933057785034, "learning_rate": 9.69268405709067e-05, "loss": 1.4284, "step": 2279 }, { "epoch": 0.12708321721197258, "grad_norm": 0.5677302479743958, "learning_rate": 9.692376626997703e-05, "loss": 1.8972, "step": 2280 }, { "epoch": 0.12713895546513573, "grad_norm": 0.5202549695968628, "learning_rate": 9.692069048089907e-05, "loss": 1.6173, "step": 2281 }, { "epoch": 0.12719469371829886, "grad_norm": 0.5106683373451233, "learning_rate": 9.691761320377037e-05, "loss": 1.5599, "step": 2282 }, { "epoch": 0.12725043197146202, "grad_norm": 0.5042096376419067, "learning_rate": 9.691453443868854e-05, "loss": 1.7705, "step": 2283 }, { "epoch": 0.12730617022462515, "grad_norm": 0.5391340255737305, "learning_rate": 9.691145418575122e-05, "loss": 1.9065, "step": 2284 }, { "epoch": 0.1273619084777883, "grad_norm": 0.5074059963226318, "learning_rate": 9.690837244505607e-05, "loss": 1.7623, "step": 2285 }, { "epoch": 0.12741764673095146, "grad_norm": 0.5277912616729736, "learning_rate": 9.690528921670084e-05, "loss": 1.7758, "step": 2286 }, { "epoch": 0.1274733849841146, "grad_norm": 0.5068628787994385, "learning_rate": 9.69022045007833e-05, "loss": 1.6409, "step": 2287 }, { "epoch": 0.12752912323727775, "grad_norm": 0.5209136009216309, "learning_rate": 9.689911829740133e-05, "loss": 1.6144, "step": 2288 }, { "epoch": 0.12758486149044088, "grad_norm": 0.5280535221099854, "learning_rate": 9.689603060665273e-05, "loss": 1.8711, "step": 2289 }, { "epoch": 0.12764059974360403, "grad_norm": 0.5511658191680908, "learning_rate": 9.689294142863548e-05, "loss": 1.8228, "step": 2290 }, { "epoch": 0.1276963379967672, "grad_norm": 0.5436153411865234, "learning_rate": 9.688985076344754e-05, "loss": 1.696, "step": 2291 }, { "epoch": 0.12775207624993032, "grad_norm": 0.5065414309501648, "learning_rate": 9.68867586111869e-05, "loss": 1.6989, "step": 2292 }, { "epoch": 0.12780781450309348, "grad_norm": 0.5280441045761108, "learning_rate": 9.688366497195166e-05, "loss": 1.6764, "step": 2293 }, { "epoch": 0.12786355275625663, "grad_norm": 0.46777546405792236, "learning_rate": 9.68805698458399e-05, "loss": 1.4595, "step": 2294 }, { "epoch": 0.12791929100941976, "grad_norm": 0.5001897811889648, "learning_rate": 9.687747323294982e-05, "loss": 1.4642, "step": 2295 }, { "epoch": 0.12797502926258292, "grad_norm": 0.5615783929824829, "learning_rate": 9.687437513337961e-05, "loss": 1.7116, "step": 2296 }, { "epoch": 0.12803076751574605, "grad_norm": 0.5208621621131897, "learning_rate": 9.687127554722749e-05, "loss": 1.637, "step": 2297 }, { "epoch": 0.1280865057689092, "grad_norm": 0.5435874462127686, "learning_rate": 9.68681744745918e-05, "loss": 1.7629, "step": 2298 }, { "epoch": 0.12814224402207236, "grad_norm": 0.5296335220336914, "learning_rate": 9.686507191557089e-05, "loss": 1.827, "step": 2299 }, { "epoch": 0.1281979822752355, "grad_norm": 0.5191251635551453, "learning_rate": 9.686196787026311e-05, "loss": 1.9385, "step": 2300 }, { "epoch": 0.12825372052839865, "grad_norm": 0.5494365096092224, "learning_rate": 9.685886233876695e-05, "loss": 1.8378, "step": 2301 }, { "epoch": 0.12830945878156177, "grad_norm": 0.583207905292511, "learning_rate": 9.685575532118089e-05, "loss": 1.6812, "step": 2302 }, { "epoch": 0.12836519703472493, "grad_norm": 0.5473710894584656, "learning_rate": 9.685264681760345e-05, "loss": 1.9602, "step": 2303 }, { "epoch": 0.1284209352878881, "grad_norm": 0.567272424697876, "learning_rate": 9.684953682813322e-05, "loss": 1.8125, "step": 2304 }, { "epoch": 0.12847667354105122, "grad_norm": 0.4732169806957245, "learning_rate": 9.684642535286885e-05, "loss": 1.5566, "step": 2305 }, { "epoch": 0.12853241179421437, "grad_norm": 0.516720712184906, "learning_rate": 9.684331239190899e-05, "loss": 1.5688, "step": 2306 }, { "epoch": 0.1285881500473775, "grad_norm": 0.5574965476989746, "learning_rate": 9.684019794535237e-05, "loss": 1.7452, "step": 2307 }, { "epoch": 0.12864388830054066, "grad_norm": 0.5443317294120789, "learning_rate": 9.683708201329777e-05, "loss": 1.6624, "step": 2308 }, { "epoch": 0.12869962655370382, "grad_norm": 0.5809649229049683, "learning_rate": 9.683396459584404e-05, "loss": 1.7721, "step": 2309 }, { "epoch": 0.12875536480686695, "grad_norm": 0.5913598537445068, "learning_rate": 9.683084569308997e-05, "loss": 2.1623, "step": 2310 }, { "epoch": 0.1288111030600301, "grad_norm": 0.5404501557350159, "learning_rate": 9.682772530513453e-05, "loss": 1.7165, "step": 2311 }, { "epoch": 0.12886684131319323, "grad_norm": 0.4902174174785614, "learning_rate": 9.682460343207669e-05, "loss": 1.6391, "step": 2312 }, { "epoch": 0.1289225795663564, "grad_norm": 0.5791998505592346, "learning_rate": 9.682148007401541e-05, "loss": 1.891, "step": 2313 }, { "epoch": 0.12897831781951954, "grad_norm": 0.5695587992668152, "learning_rate": 9.681835523104978e-05, "loss": 1.9901, "step": 2314 }, { "epoch": 0.12903405607268267, "grad_norm": 0.6025593876838684, "learning_rate": 9.681522890327889e-05, "loss": 1.7748, "step": 2315 }, { "epoch": 0.12908979432584583, "grad_norm": 0.5111005902290344, "learning_rate": 9.681210109080189e-05, "loss": 1.6, "step": 2316 }, { "epoch": 0.129145532579009, "grad_norm": 0.533204972743988, "learning_rate": 9.680897179371798e-05, "loss": 1.6863, "step": 2317 }, { "epoch": 0.12920127083217212, "grad_norm": 0.5172824859619141, "learning_rate": 9.68058410121264e-05, "loss": 1.7456, "step": 2318 }, { "epoch": 0.12925700908533527, "grad_norm": 0.5905986428260803, "learning_rate": 9.680270874612643e-05, "loss": 1.572, "step": 2319 }, { "epoch": 0.1293127473384984, "grad_norm": 0.5090576410293579, "learning_rate": 9.679957499581742e-05, "loss": 1.7946, "step": 2320 }, { "epoch": 0.12936848559166156, "grad_norm": 0.5587893724441528, "learning_rate": 9.679643976129876e-05, "loss": 1.7792, "step": 2321 }, { "epoch": 0.12942422384482472, "grad_norm": 0.6383116841316223, "learning_rate": 9.679330304266988e-05, "loss": 2.0051, "step": 2322 }, { "epoch": 0.12947996209798784, "grad_norm": 0.5700294375419617, "learning_rate": 9.679016484003023e-05, "loss": 1.8419, "step": 2323 }, { "epoch": 0.129535700351151, "grad_norm": 0.6416967511177063, "learning_rate": 9.678702515347938e-05, "loss": 1.7893, "step": 2324 }, { "epoch": 0.12959143860431413, "grad_norm": 0.5761459469795227, "learning_rate": 9.678388398311686e-05, "loss": 1.8868, "step": 2325 }, { "epoch": 0.1296471768574773, "grad_norm": 0.5779362320899963, "learning_rate": 9.678074132904231e-05, "loss": 1.6472, "step": 2326 }, { "epoch": 0.12970291511064044, "grad_norm": 0.5250251293182373, "learning_rate": 9.677759719135542e-05, "loss": 1.8353, "step": 2327 }, { "epoch": 0.12975865336380357, "grad_norm": 0.5306884050369263, "learning_rate": 9.677445157015585e-05, "loss": 1.8419, "step": 2328 }, { "epoch": 0.12981439161696673, "grad_norm": 0.5761096477508545, "learning_rate": 9.67713044655434e-05, "loss": 1.846, "step": 2329 }, { "epoch": 0.12987012987012986, "grad_norm": 0.5438225269317627, "learning_rate": 9.676815587761787e-05, "loss": 1.734, "step": 2330 }, { "epoch": 0.12992586812329301, "grad_norm": 0.5154998898506165, "learning_rate": 9.676500580647912e-05, "loss": 1.8124, "step": 2331 }, { "epoch": 0.12998160637645617, "grad_norm": 0.5288179516792297, "learning_rate": 9.676185425222704e-05, "loss": 2.0132, "step": 2332 }, { "epoch": 0.1300373446296193, "grad_norm": 0.5507707595825195, "learning_rate": 9.675870121496158e-05, "loss": 1.7686, "step": 2333 }, { "epoch": 0.13009308288278246, "grad_norm": 0.4893222451210022, "learning_rate": 9.675554669478272e-05, "loss": 1.8113, "step": 2334 }, { "epoch": 0.13014882113594559, "grad_norm": 0.5455611944198608, "learning_rate": 9.675239069179056e-05, "loss": 1.7593, "step": 2335 }, { "epoch": 0.13020455938910874, "grad_norm": 0.5068415403366089, "learning_rate": 9.674923320608513e-05, "loss": 1.5302, "step": 2336 }, { "epoch": 0.1302602976422719, "grad_norm": 0.5160056948661804, "learning_rate": 9.674607423776661e-05, "loss": 1.5793, "step": 2337 }, { "epoch": 0.13031603589543503, "grad_norm": 0.5414824485778809, "learning_rate": 9.674291378693515e-05, "loss": 1.6392, "step": 2338 }, { "epoch": 0.13037177414859819, "grad_norm": 0.5210713744163513, "learning_rate": 9.673975185369098e-05, "loss": 1.9403, "step": 2339 }, { "epoch": 0.13042751240176134, "grad_norm": 0.5296798944473267, "learning_rate": 9.673658843813442e-05, "loss": 1.7093, "step": 2340 }, { "epoch": 0.13048325065492447, "grad_norm": 0.5705276131629944, "learning_rate": 9.673342354036574e-05, "loss": 1.7645, "step": 2341 }, { "epoch": 0.13053898890808763, "grad_norm": 0.5289913415908813, "learning_rate": 9.673025716048536e-05, "loss": 1.81, "step": 2342 }, { "epoch": 0.13059472716125076, "grad_norm": 0.5237072706222534, "learning_rate": 9.672708929859368e-05, "loss": 2.0053, "step": 2343 }, { "epoch": 0.1306504654144139, "grad_norm": 0.5144554376602173, "learning_rate": 9.672391995479115e-05, "loss": 1.7236, "step": 2344 }, { "epoch": 0.13070620366757707, "grad_norm": 0.5384603142738342, "learning_rate": 9.672074912917831e-05, "loss": 1.7492, "step": 2345 }, { "epoch": 0.1307619419207402, "grad_norm": 0.5475570559501648, "learning_rate": 9.67175768218557e-05, "loss": 1.9068, "step": 2346 }, { "epoch": 0.13081768017390336, "grad_norm": 0.512937068939209, "learning_rate": 9.671440303292395e-05, "loss": 1.7364, "step": 2347 }, { "epoch": 0.13087341842706648, "grad_norm": 0.48609036207199097, "learning_rate": 9.67112277624837e-05, "loss": 1.5916, "step": 2348 }, { "epoch": 0.13092915668022964, "grad_norm": 0.5132019519805908, "learning_rate": 9.670805101063563e-05, "loss": 1.7222, "step": 2349 }, { "epoch": 0.1309848949333928, "grad_norm": 0.5112780928611755, "learning_rate": 9.670487277748052e-05, "loss": 1.6418, "step": 2350 }, { "epoch": 0.13104063318655593, "grad_norm": 0.531306803226471, "learning_rate": 9.670169306311916e-05, "loss": 1.7323, "step": 2351 }, { "epoch": 0.13109637143971908, "grad_norm": 0.48118212819099426, "learning_rate": 9.669851186765238e-05, "loss": 1.4822, "step": 2352 }, { "epoch": 0.1311521096928822, "grad_norm": 0.5309464931488037, "learning_rate": 9.669532919118108e-05, "loss": 1.767, "step": 2353 }, { "epoch": 0.13120784794604537, "grad_norm": 0.532576322555542, "learning_rate": 9.669214503380617e-05, "loss": 1.7228, "step": 2354 }, { "epoch": 0.13126358619920853, "grad_norm": 0.49597617983818054, "learning_rate": 9.668895939562868e-05, "loss": 1.4792, "step": 2355 }, { "epoch": 0.13131932445237166, "grad_norm": 0.5480032563209534, "learning_rate": 9.66857722767496e-05, "loss": 1.7285, "step": 2356 }, { "epoch": 0.1313750627055348, "grad_norm": 0.5191400647163391, "learning_rate": 9.668258367727002e-05, "loss": 1.5942, "step": 2357 }, { "epoch": 0.13143080095869794, "grad_norm": 0.5335458517074585, "learning_rate": 9.667939359729109e-05, "loss": 1.8991, "step": 2358 }, { "epoch": 0.1314865392118611, "grad_norm": 0.5872248411178589, "learning_rate": 9.667620203691393e-05, "loss": 1.8247, "step": 2359 }, { "epoch": 0.13154227746502425, "grad_norm": 0.5811527967453003, "learning_rate": 9.667300899623976e-05, "loss": 2.0837, "step": 2360 }, { "epoch": 0.13159801571818738, "grad_norm": 0.5214108824729919, "learning_rate": 9.66698144753699e-05, "loss": 1.681, "step": 2361 }, { "epoch": 0.13165375397135054, "grad_norm": 0.5067755579948425, "learning_rate": 9.666661847440563e-05, "loss": 1.7168, "step": 2362 }, { "epoch": 0.1317094922245137, "grad_norm": 0.5883169770240784, "learning_rate": 9.666342099344829e-05, "loss": 1.8355, "step": 2363 }, { "epoch": 0.13176523047767683, "grad_norm": 0.5047624111175537, "learning_rate": 9.666022203259931e-05, "loss": 1.6872, "step": 2364 }, { "epoch": 0.13182096873083998, "grad_norm": 0.5165308117866516, "learning_rate": 9.665702159196013e-05, "loss": 1.6867, "step": 2365 }, { "epoch": 0.1318767069840031, "grad_norm": 0.5131801962852478, "learning_rate": 9.665381967163227e-05, "loss": 1.5836, "step": 2366 }, { "epoch": 0.13193244523716627, "grad_norm": 0.5561967492103577, "learning_rate": 9.665061627171726e-05, "loss": 1.6933, "step": 2367 }, { "epoch": 0.13198818349032942, "grad_norm": 0.6118646860122681, "learning_rate": 9.664741139231668e-05, "loss": 2.0988, "step": 2368 }, { "epoch": 0.13204392174349255, "grad_norm": 0.5255211591720581, "learning_rate": 9.664420503353218e-05, "loss": 1.7087, "step": 2369 }, { "epoch": 0.1320996599966557, "grad_norm": 0.555664598941803, "learning_rate": 9.664099719546547e-05, "loss": 1.8029, "step": 2370 }, { "epoch": 0.13215539824981884, "grad_norm": 0.5417226552963257, "learning_rate": 9.663778787821825e-05, "loss": 1.7483, "step": 2371 }, { "epoch": 0.132211136502982, "grad_norm": 0.5773631930351257, "learning_rate": 9.663457708189232e-05, "loss": 1.7137, "step": 2372 }, { "epoch": 0.13226687475614515, "grad_norm": 0.5354270935058594, "learning_rate": 9.66313648065895e-05, "loss": 1.8748, "step": 2373 }, { "epoch": 0.13232261300930828, "grad_norm": 0.5149551033973694, "learning_rate": 9.662815105241168e-05, "loss": 1.5948, "step": 2374 }, { "epoch": 0.13237835126247144, "grad_norm": 0.5566468238830566, "learning_rate": 9.662493581946074e-05, "loss": 1.7724, "step": 2375 }, { "epoch": 0.13243408951563457, "grad_norm": 0.5304192304611206, "learning_rate": 9.66217191078387e-05, "loss": 1.8068, "step": 2376 }, { "epoch": 0.13248982776879772, "grad_norm": 0.5885264873504639, "learning_rate": 9.661850091764756e-05, "loss": 1.9129, "step": 2377 }, { "epoch": 0.13254556602196088, "grad_norm": 0.4796747863292694, "learning_rate": 9.661528124898937e-05, "loss": 1.6931, "step": 2378 }, { "epoch": 0.132601304275124, "grad_norm": 0.49771320819854736, "learning_rate": 9.661206010196624e-05, "loss": 1.5938, "step": 2379 }, { "epoch": 0.13265704252828717, "grad_norm": 0.530432939529419, "learning_rate": 9.660883747668034e-05, "loss": 2.0283, "step": 2380 }, { "epoch": 0.1327127807814503, "grad_norm": 0.515631914138794, "learning_rate": 9.660561337323385e-05, "loss": 1.8549, "step": 2381 }, { "epoch": 0.13276851903461345, "grad_norm": 0.6954619288444519, "learning_rate": 9.660238779172905e-05, "loss": 2.0152, "step": 2382 }, { "epoch": 0.1328242572877766, "grad_norm": 0.5233824253082275, "learning_rate": 9.65991607322682e-05, "loss": 1.7353, "step": 2383 }, { "epoch": 0.13287999554093974, "grad_norm": 0.5527575016021729, "learning_rate": 9.659593219495368e-05, "loss": 1.6361, "step": 2384 }, { "epoch": 0.1329357337941029, "grad_norm": 0.48741617798805237, "learning_rate": 9.659270217988786e-05, "loss": 1.682, "step": 2385 }, { "epoch": 0.13299147204726605, "grad_norm": 0.5804024338722229, "learning_rate": 9.658947068717316e-05, "loss": 1.5736, "step": 2386 }, { "epoch": 0.13304721030042918, "grad_norm": 0.5614018440246582, "learning_rate": 9.658623771691211e-05, "loss": 1.9172, "step": 2387 }, { "epoch": 0.13310294855359234, "grad_norm": 0.5239617824554443, "learning_rate": 9.658300326920722e-05, "loss": 1.7751, "step": 2388 }, { "epoch": 0.13315868680675547, "grad_norm": 0.5195541381835938, "learning_rate": 9.657976734416106e-05, "loss": 1.875, "step": 2389 }, { "epoch": 0.13321442505991862, "grad_norm": 0.531480610370636, "learning_rate": 9.657652994187625e-05, "loss": 1.7631, "step": 2390 }, { "epoch": 0.13327016331308178, "grad_norm": 0.5037621259689331, "learning_rate": 9.657329106245547e-05, "loss": 1.6134, "step": 2391 }, { "epoch": 0.1333259015662449, "grad_norm": 0.4974221885204315, "learning_rate": 9.657005070600144e-05, "loss": 1.7501, "step": 2392 }, { "epoch": 0.13338163981940807, "grad_norm": 0.5308098196983337, "learning_rate": 9.656680887261693e-05, "loss": 1.7283, "step": 2393 }, { "epoch": 0.1334373780725712, "grad_norm": 0.4996281862258911, "learning_rate": 9.656356556240473e-05, "loss": 1.7897, "step": 2394 }, { "epoch": 0.13349311632573435, "grad_norm": 0.6450517773628235, "learning_rate": 9.656032077546772e-05, "loss": 1.7089, "step": 2395 }, { "epoch": 0.1335488545788975, "grad_norm": 0.5968025326728821, "learning_rate": 9.655707451190883e-05, "loss": 1.8664, "step": 2396 }, { "epoch": 0.13360459283206064, "grad_norm": 0.470813512802124, "learning_rate": 9.655382677183095e-05, "loss": 1.5199, "step": 2397 }, { "epoch": 0.1336603310852238, "grad_norm": 0.5651730298995972, "learning_rate": 9.655057755533712e-05, "loss": 1.9733, "step": 2398 }, { "epoch": 0.13371606933838692, "grad_norm": 0.5370044112205505, "learning_rate": 9.654732686253039e-05, "loss": 1.8281, "step": 2399 }, { "epoch": 0.13377180759155008, "grad_norm": 0.5285357236862183, "learning_rate": 9.654407469351383e-05, "loss": 1.592, "step": 2400 }, { "epoch": 0.13382754584471324, "grad_norm": 0.5265277624130249, "learning_rate": 9.654082104839059e-05, "loss": 1.8503, "step": 2401 }, { "epoch": 0.13388328409787636, "grad_norm": 0.5449655652046204, "learning_rate": 9.653756592726386e-05, "loss": 1.8579, "step": 2402 }, { "epoch": 0.13393902235103952, "grad_norm": 0.5737154483795166, "learning_rate": 9.653430933023689e-05, "loss": 1.8618, "step": 2403 }, { "epoch": 0.13399476060420265, "grad_norm": 0.5164530873298645, "learning_rate": 9.653105125741292e-05, "loss": 1.6213, "step": 2404 }, { "epoch": 0.1340504988573658, "grad_norm": 0.5017974376678467, "learning_rate": 9.65277917088953e-05, "loss": 1.6255, "step": 2405 }, { "epoch": 0.13410623711052896, "grad_norm": 0.5122340321540833, "learning_rate": 9.652453068478741e-05, "loss": 1.5653, "step": 2406 }, { "epoch": 0.1341619753636921, "grad_norm": 0.6067832708358765, "learning_rate": 9.652126818519266e-05, "loss": 2.0985, "step": 2407 }, { "epoch": 0.13421771361685525, "grad_norm": 0.5796366333961487, "learning_rate": 9.651800421021453e-05, "loss": 1.9636, "step": 2408 }, { "epoch": 0.1342734518700184, "grad_norm": 0.5619643926620483, "learning_rate": 9.651473875995651e-05, "loss": 1.7129, "step": 2409 }, { "epoch": 0.13432919012318154, "grad_norm": 0.5060097575187683, "learning_rate": 9.651147183452219e-05, "loss": 1.5304, "step": 2410 }, { "epoch": 0.1343849283763447, "grad_norm": 0.532145619392395, "learning_rate": 9.650820343401515e-05, "loss": 1.7844, "step": 2411 }, { "epoch": 0.13444066662950782, "grad_norm": 0.5342923402786255, "learning_rate": 9.650493355853906e-05, "loss": 1.8585, "step": 2412 }, { "epoch": 0.13449640488267098, "grad_norm": 0.49805736541748047, "learning_rate": 9.650166220819764e-05, "loss": 1.4576, "step": 2413 }, { "epoch": 0.13455214313583413, "grad_norm": 0.5234712362289429, "learning_rate": 9.64983893830946e-05, "loss": 1.6994, "step": 2414 }, { "epoch": 0.13460788138899726, "grad_norm": 0.5124284029006958, "learning_rate": 9.649511508333375e-05, "loss": 1.6614, "step": 2415 }, { "epoch": 0.13466361964216042, "grad_norm": 0.4958679676055908, "learning_rate": 9.649183930901895e-05, "loss": 1.56, "step": 2416 }, { "epoch": 0.13471935789532355, "grad_norm": 0.5191091895103455, "learning_rate": 9.648856206025407e-05, "loss": 1.7004, "step": 2417 }, { "epoch": 0.1347750961484867, "grad_norm": 0.5366125702857971, "learning_rate": 9.648528333714304e-05, "loss": 1.7206, "step": 2418 }, { "epoch": 0.13483083440164986, "grad_norm": 0.5979599952697754, "learning_rate": 9.648200313978986e-05, "loss": 1.757, "step": 2419 }, { "epoch": 0.134886572654813, "grad_norm": 0.5878745317459106, "learning_rate": 9.647872146829855e-05, "loss": 1.7236, "step": 2420 }, { "epoch": 0.13494231090797615, "grad_norm": 0.5160901546478271, "learning_rate": 9.647543832277317e-05, "loss": 1.7274, "step": 2421 }, { "epoch": 0.13499804916113928, "grad_norm": 0.5626492500305176, "learning_rate": 9.647215370331786e-05, "loss": 1.9507, "step": 2422 }, { "epoch": 0.13505378741430243, "grad_norm": 0.5624846816062927, "learning_rate": 9.646886761003679e-05, "loss": 1.9476, "step": 2423 }, { "epoch": 0.1351095256674656, "grad_norm": 0.5468912720680237, "learning_rate": 9.646558004303419e-05, "loss": 1.7836, "step": 2424 }, { "epoch": 0.13516526392062872, "grad_norm": 0.5446691513061523, "learning_rate": 9.646229100241429e-05, "loss": 1.7664, "step": 2425 }, { "epoch": 0.13522100217379188, "grad_norm": 0.5568925738334656, "learning_rate": 9.64590004882814e-05, "loss": 2.0063, "step": 2426 }, { "epoch": 0.135276740426955, "grad_norm": 0.560264527797699, "learning_rate": 9.64557085007399e-05, "loss": 1.8132, "step": 2427 }, { "epoch": 0.13533247868011816, "grad_norm": 0.5093153715133667, "learning_rate": 9.64524150398942e-05, "loss": 1.4198, "step": 2428 }, { "epoch": 0.13538821693328132, "grad_norm": 0.5184745192527771, "learning_rate": 9.64491201058487e-05, "loss": 1.6062, "step": 2429 }, { "epoch": 0.13544395518644445, "grad_norm": 0.5188031792640686, "learning_rate": 9.644582369870794e-05, "loss": 1.8179, "step": 2430 }, { "epoch": 0.1354996934396076, "grad_norm": 0.537381112575531, "learning_rate": 9.644252581857647e-05, "loss": 1.9697, "step": 2431 }, { "epoch": 0.13555543169277076, "grad_norm": 0.5132935047149658, "learning_rate": 9.643922646555883e-05, "loss": 1.6746, "step": 2432 }, { "epoch": 0.1356111699459339, "grad_norm": 0.5265336036682129, "learning_rate": 9.64359256397597e-05, "loss": 1.6561, "step": 2433 }, { "epoch": 0.13566690819909705, "grad_norm": 0.5241510272026062, "learning_rate": 9.643262334128374e-05, "loss": 1.577, "step": 2434 }, { "epoch": 0.13572264645226018, "grad_norm": 0.5073732137680054, "learning_rate": 9.642931957023569e-05, "loss": 1.6821, "step": 2435 }, { "epoch": 0.13577838470542333, "grad_norm": 0.4868320822715759, "learning_rate": 9.642601432672034e-05, "loss": 1.4476, "step": 2436 }, { "epoch": 0.1358341229585865, "grad_norm": 0.5248389840126038, "learning_rate": 9.642270761084249e-05, "loss": 1.9406, "step": 2437 }, { "epoch": 0.13588986121174962, "grad_norm": 0.492227166891098, "learning_rate": 9.641939942270701e-05, "loss": 1.6538, "step": 2438 }, { "epoch": 0.13594559946491278, "grad_norm": 0.5446291565895081, "learning_rate": 9.641608976241883e-05, "loss": 1.8208, "step": 2439 }, { "epoch": 0.1360013377180759, "grad_norm": 0.5214070677757263, "learning_rate": 9.64127786300829e-05, "loss": 1.6889, "step": 2440 }, { "epoch": 0.13605707597123906, "grad_norm": 0.5892273187637329, "learning_rate": 9.640946602580426e-05, "loss": 2.0888, "step": 2441 }, { "epoch": 0.13611281422440222, "grad_norm": 0.5230244994163513, "learning_rate": 9.640615194968791e-05, "loss": 1.7068, "step": 2442 }, { "epoch": 0.13616855247756535, "grad_norm": 0.5090706944465637, "learning_rate": 9.640283640183903e-05, "loss": 1.7328, "step": 2443 }, { "epoch": 0.1362242907307285, "grad_norm": 0.5167303681373596, "learning_rate": 9.639951938236269e-05, "loss": 1.7062, "step": 2444 }, { "epoch": 0.13628002898389163, "grad_norm": 0.5717843770980835, "learning_rate": 9.639620089136413e-05, "loss": 1.8633, "step": 2445 }, { "epoch": 0.1363357672370548, "grad_norm": 0.514242947101593, "learning_rate": 9.63928809289486e-05, "loss": 1.9126, "step": 2446 }, { "epoch": 0.13639150549021795, "grad_norm": 0.5159420371055603, "learning_rate": 9.638955949522137e-05, "loss": 1.6795, "step": 2447 }, { "epoch": 0.13644724374338107, "grad_norm": 0.4026312828063965, "learning_rate": 9.638623659028779e-05, "loss": 1.008, "step": 2448 }, { "epoch": 0.13650298199654423, "grad_norm": 0.5365085601806641, "learning_rate": 9.63829122142532e-05, "loss": 1.9597, "step": 2449 }, { "epoch": 0.13655872024970736, "grad_norm": 0.528103768825531, "learning_rate": 9.637958636722311e-05, "loss": 1.8801, "step": 2450 }, { "epoch": 0.13661445850287052, "grad_norm": 0.5581492185592651, "learning_rate": 9.637625904930292e-05, "loss": 1.6802, "step": 2451 }, { "epoch": 0.13667019675603367, "grad_norm": 0.5182628631591797, "learning_rate": 9.63729302605982e-05, "loss": 1.8041, "step": 2452 }, { "epoch": 0.1367259350091968, "grad_norm": 0.48804765939712524, "learning_rate": 9.636960000121451e-05, "loss": 1.7381, "step": 2453 }, { "epoch": 0.13678167326235996, "grad_norm": 0.5185055136680603, "learning_rate": 9.636626827125745e-05, "loss": 1.8356, "step": 2454 }, { "epoch": 0.13683741151552312, "grad_norm": 0.5890060663223267, "learning_rate": 9.63629350708327e-05, "loss": 1.8636, "step": 2455 }, { "epoch": 0.13689314976868625, "grad_norm": 0.5501379370689392, "learning_rate": 9.635960040004597e-05, "loss": 2.0967, "step": 2456 }, { "epoch": 0.1369488880218494, "grad_norm": 0.5753256678581238, "learning_rate": 9.635626425900301e-05, "loss": 1.8931, "step": 2457 }, { "epoch": 0.13700462627501253, "grad_norm": 0.5230208039283752, "learning_rate": 9.635292664780962e-05, "loss": 1.6546, "step": 2458 }, { "epoch": 0.1370603645281757, "grad_norm": 0.507422149181366, "learning_rate": 9.634958756657165e-05, "loss": 1.7135, "step": 2459 }, { "epoch": 0.13711610278133884, "grad_norm": 0.48532143235206604, "learning_rate": 9.634624701539498e-05, "loss": 1.5297, "step": 2460 }, { "epoch": 0.13717184103450197, "grad_norm": 0.5039069652557373, "learning_rate": 9.63429049943856e-05, "loss": 1.9089, "step": 2461 }, { "epoch": 0.13722757928766513, "grad_norm": 0.5480893850326538, "learning_rate": 9.633956150364947e-05, "loss": 1.7987, "step": 2462 }, { "epoch": 0.13728331754082826, "grad_norm": 0.5339971780776978, "learning_rate": 9.633621654329261e-05, "loss": 1.7035, "step": 2463 }, { "epoch": 0.13733905579399142, "grad_norm": 0.5058174133300781, "learning_rate": 9.633287011342113e-05, "loss": 1.6676, "step": 2464 }, { "epoch": 0.13739479404715457, "grad_norm": 0.5697671175003052, "learning_rate": 9.632952221414116e-05, "loss": 1.9683, "step": 2465 }, { "epoch": 0.1374505323003177, "grad_norm": 0.5071194767951965, "learning_rate": 9.632617284555886e-05, "loss": 1.9232, "step": 2466 }, { "epoch": 0.13750627055348086, "grad_norm": 0.5929427742958069, "learning_rate": 9.632282200778045e-05, "loss": 1.8352, "step": 2467 }, { "epoch": 0.137562008806644, "grad_norm": 0.528889000415802, "learning_rate": 9.631946970091221e-05, "loss": 1.7636, "step": 2468 }, { "epoch": 0.13761774705980714, "grad_norm": 1.3195804357528687, "learning_rate": 9.631611592506046e-05, "loss": 1.7929, "step": 2469 }, { "epoch": 0.1376734853129703, "grad_norm": 0.5272727608680725, "learning_rate": 9.631276068033154e-05, "loss": 1.89, "step": 2470 }, { "epoch": 0.13772922356613343, "grad_norm": 0.5453211665153503, "learning_rate": 9.630940396683188e-05, "loss": 1.6766, "step": 2471 }, { "epoch": 0.1377849618192966, "grad_norm": 0.5383656620979309, "learning_rate": 9.630604578466794e-05, "loss": 1.6168, "step": 2472 }, { "epoch": 0.13784070007245972, "grad_norm": 0.5008901953697205, "learning_rate": 9.63026861339462e-05, "loss": 1.5592, "step": 2473 }, { "epoch": 0.13789643832562287, "grad_norm": 0.5986757874488831, "learning_rate": 9.629932501477321e-05, "loss": 2.0793, "step": 2474 }, { "epoch": 0.13795217657878603, "grad_norm": 0.5368151664733887, "learning_rate": 9.629596242725558e-05, "loss": 1.6693, "step": 2475 }, { "epoch": 0.13800791483194916, "grad_norm": 0.5330533385276794, "learning_rate": 9.629259837149995e-05, "loss": 1.7398, "step": 2476 }, { "epoch": 0.13806365308511231, "grad_norm": 0.5093852877616882, "learning_rate": 9.6289232847613e-05, "loss": 1.6665, "step": 2477 }, { "epoch": 0.13811939133827547, "grad_norm": 0.5469667911529541, "learning_rate": 9.628586585570149e-05, "loss": 1.8411, "step": 2478 }, { "epoch": 0.1381751295914386, "grad_norm": 0.5832191705703735, "learning_rate": 9.628249739587217e-05, "loss": 1.8821, "step": 2479 }, { "epoch": 0.13823086784460176, "grad_norm": 0.5154137015342712, "learning_rate": 9.627912746823187e-05, "loss": 1.6075, "step": 2480 }, { "epoch": 0.13828660609776489, "grad_norm": 0.5499826669692993, "learning_rate": 9.627575607288745e-05, "loss": 1.735, "step": 2481 }, { "epoch": 0.13834234435092804, "grad_norm": 0.6152673959732056, "learning_rate": 9.627238320994589e-05, "loss": 2.0207, "step": 2482 }, { "epoch": 0.1383980826040912, "grad_norm": 0.49340128898620605, "learning_rate": 9.626900887951412e-05, "loss": 1.64, "step": 2483 }, { "epoch": 0.13845382085725433, "grad_norm": 0.5563956499099731, "learning_rate": 9.626563308169914e-05, "loss": 1.9062, "step": 2484 }, { "epoch": 0.13850955911041749, "grad_norm": 0.4945386052131653, "learning_rate": 9.626225581660803e-05, "loss": 1.4852, "step": 2485 }, { "epoch": 0.13856529736358061, "grad_norm": 0.5170808434486389, "learning_rate": 9.625887708434788e-05, "loss": 1.7517, "step": 2486 }, { "epoch": 0.13862103561674377, "grad_norm": 0.5459514260292053, "learning_rate": 9.625549688502589e-05, "loss": 1.6785, "step": 2487 }, { "epoch": 0.13867677386990693, "grad_norm": 0.5073458552360535, "learning_rate": 9.62521152187492e-05, "loss": 1.7213, "step": 2488 }, { "epoch": 0.13873251212307006, "grad_norm": 0.4946017563343048, "learning_rate": 9.624873208562509e-05, "loss": 1.6256, "step": 2489 }, { "epoch": 0.1387882503762332, "grad_norm": 0.5971960425376892, "learning_rate": 9.624534748576085e-05, "loss": 1.9997, "step": 2490 }, { "epoch": 0.13884398862939634, "grad_norm": 0.5135798454284668, "learning_rate": 9.624196141926381e-05, "loss": 1.6544, "step": 2491 }, { "epoch": 0.1388997268825595, "grad_norm": 0.5550069212913513, "learning_rate": 9.623857388624138e-05, "loss": 1.8297, "step": 2492 }, { "epoch": 0.13895546513572266, "grad_norm": 0.5476080179214478, "learning_rate": 9.623518488680095e-05, "loss": 1.9136, "step": 2493 }, { "epoch": 0.13901120338888578, "grad_norm": 0.5327604413032532, "learning_rate": 9.623179442105004e-05, "loss": 1.7471, "step": 2494 }, { "epoch": 0.13906694164204894, "grad_norm": 0.5192773938179016, "learning_rate": 9.622840248909617e-05, "loss": 1.6395, "step": 2495 }, { "epoch": 0.13912267989521207, "grad_norm": 0.5261735916137695, "learning_rate": 9.622500909104689e-05, "loss": 1.6751, "step": 2496 }, { "epoch": 0.13917841814837523, "grad_norm": 0.5256398916244507, "learning_rate": 9.622161422700984e-05, "loss": 1.7681, "step": 2497 }, { "epoch": 0.13923415640153838, "grad_norm": 0.5021438002586365, "learning_rate": 9.621821789709267e-05, "loss": 1.6317, "step": 2498 }, { "epoch": 0.1392898946547015, "grad_norm": 0.5900087952613831, "learning_rate": 9.62148201014031e-05, "loss": 1.8691, "step": 2499 }, { "epoch": 0.13934563290786467, "grad_norm": 0.492544025182724, "learning_rate": 9.621142084004889e-05, "loss": 1.6061, "step": 2500 }, { "epoch": 0.13940137116102783, "grad_norm": 0.5590608716011047, "learning_rate": 9.620802011313785e-05, "loss": 1.9551, "step": 2501 }, { "epoch": 0.13945710941419096, "grad_norm": 0.5163889527320862, "learning_rate": 9.620461792077782e-05, "loss": 1.8419, "step": 2502 }, { "epoch": 0.1395128476673541, "grad_norm": 0.5565062165260315, "learning_rate": 9.620121426307669e-05, "loss": 1.9454, "step": 2503 }, { "epoch": 0.13956858592051724, "grad_norm": 0.5010280013084412, "learning_rate": 9.619780914014242e-05, "loss": 1.6189, "step": 2504 }, { "epoch": 0.1396243241736804, "grad_norm": 0.5342069268226624, "learning_rate": 9.619440255208301e-05, "loss": 1.7667, "step": 2505 }, { "epoch": 0.13968006242684355, "grad_norm": 0.5092571377754211, "learning_rate": 9.619099449900646e-05, "loss": 1.6797, "step": 2506 }, { "epoch": 0.13973580068000668, "grad_norm": 0.5784452557563782, "learning_rate": 9.618758498102089e-05, "loss": 1.9559, "step": 2507 }, { "epoch": 0.13979153893316984, "grad_norm": 0.5389965176582336, "learning_rate": 9.618417399823441e-05, "loss": 1.7971, "step": 2508 }, { "epoch": 0.13984727718633297, "grad_norm": 0.5197558999061584, "learning_rate": 9.618076155075521e-05, "loss": 1.8631, "step": 2509 }, { "epoch": 0.13990301543949613, "grad_norm": 0.5198122262954712, "learning_rate": 9.617734763869151e-05, "loss": 1.7487, "step": 2510 }, { "epoch": 0.13995875369265928, "grad_norm": 0.515998363494873, "learning_rate": 9.617393226215157e-05, "loss": 1.6849, "step": 2511 }, { "epoch": 0.1400144919458224, "grad_norm": 0.5627748370170593, "learning_rate": 9.617051542124371e-05, "loss": 1.7637, "step": 2512 }, { "epoch": 0.14007023019898557, "grad_norm": 0.49436190724372864, "learning_rate": 9.61670971160763e-05, "loss": 1.6303, "step": 2513 }, { "epoch": 0.1401259684521487, "grad_norm": 0.5101426839828491, "learning_rate": 9.616367734675772e-05, "loss": 1.5709, "step": 2514 }, { "epoch": 0.14018170670531185, "grad_norm": 0.5416966080665588, "learning_rate": 9.616025611339647e-05, "loss": 1.8456, "step": 2515 }, { "epoch": 0.140237444958475, "grad_norm": 0.5797568559646606, "learning_rate": 9.615683341610103e-05, "loss": 1.7499, "step": 2516 }, { "epoch": 0.14029318321163814, "grad_norm": 0.5696927905082703, "learning_rate": 9.615340925497995e-05, "loss": 1.6875, "step": 2517 }, { "epoch": 0.1403489214648013, "grad_norm": 0.49985361099243164, "learning_rate": 9.61499836301418e-05, "loss": 1.6336, "step": 2518 }, { "epoch": 0.14040465971796443, "grad_norm": 0.5426433086395264, "learning_rate": 9.614655654169527e-05, "loss": 1.8164, "step": 2519 }, { "epoch": 0.14046039797112758, "grad_norm": 0.562021017074585, "learning_rate": 9.6143127989749e-05, "loss": 1.626, "step": 2520 }, { "epoch": 0.14051613622429074, "grad_norm": 0.5873587727546692, "learning_rate": 9.613969797441173e-05, "loss": 2.0087, "step": 2521 }, { "epoch": 0.14057187447745387, "grad_norm": 0.5239251852035522, "learning_rate": 9.613626649579229e-05, "loss": 1.74, "step": 2522 }, { "epoch": 0.14062761273061702, "grad_norm": 0.613498330116272, "learning_rate": 9.613283355399945e-05, "loss": 1.7088, "step": 2523 }, { "epoch": 0.14068335098378018, "grad_norm": 0.5224273800849915, "learning_rate": 9.61293991491421e-05, "loss": 1.5665, "step": 2524 }, { "epoch": 0.1407390892369433, "grad_norm": 0.5063479542732239, "learning_rate": 9.612596328132915e-05, "loss": 1.3456, "step": 2525 }, { "epoch": 0.14079482749010647, "grad_norm": 0.5042296648025513, "learning_rate": 9.61225259506696e-05, "loss": 1.6111, "step": 2526 }, { "epoch": 0.1408505657432696, "grad_norm": 0.5116347670555115, "learning_rate": 9.611908715727244e-05, "loss": 1.9546, "step": 2527 }, { "epoch": 0.14090630399643275, "grad_norm": 0.5643008351325989, "learning_rate": 9.611564690124672e-05, "loss": 1.8488, "step": 2528 }, { "epoch": 0.1409620422495959, "grad_norm": 0.5275754332542419, "learning_rate": 9.611220518270155e-05, "loss": 1.7367, "step": 2529 }, { "epoch": 0.14101778050275904, "grad_norm": 0.523114800453186, "learning_rate": 9.61087620017461e-05, "loss": 1.5207, "step": 2530 }, { "epoch": 0.1410735187559222, "grad_norm": 0.5141943693161011, "learning_rate": 9.610531735848953e-05, "loss": 1.6592, "step": 2531 }, { "epoch": 0.14112925700908532, "grad_norm": 0.5485236048698425, "learning_rate": 9.610187125304111e-05, "loss": 1.7567, "step": 2532 }, { "epoch": 0.14118499526224848, "grad_norm": 0.537264347076416, "learning_rate": 9.609842368551014e-05, "loss": 1.7151, "step": 2533 }, { "epoch": 0.14124073351541164, "grad_norm": 0.588664174079895, "learning_rate": 9.609497465600595e-05, "loss": 1.9591, "step": 2534 }, { "epoch": 0.14129647176857477, "grad_norm": 0.5192539691925049, "learning_rate": 9.60915241646379e-05, "loss": 1.7296, "step": 2535 }, { "epoch": 0.14135221002173792, "grad_norm": 0.543268620967865, "learning_rate": 9.608807221151543e-05, "loss": 1.7645, "step": 2536 }, { "epoch": 0.14140794827490105, "grad_norm": 0.534324049949646, "learning_rate": 9.608461879674802e-05, "loss": 1.8227, "step": 2537 }, { "epoch": 0.1414636865280642, "grad_norm": 0.5177492499351501, "learning_rate": 9.608116392044521e-05, "loss": 1.6495, "step": 2538 }, { "epoch": 0.14151942478122737, "grad_norm": 0.5617666840553284, "learning_rate": 9.607770758271655e-05, "loss": 1.9329, "step": 2539 }, { "epoch": 0.1415751630343905, "grad_norm": 0.5591059327125549, "learning_rate": 9.607424978367165e-05, "loss": 1.8535, "step": 2540 }, { "epoch": 0.14163090128755365, "grad_norm": 0.5114865899085999, "learning_rate": 9.607079052342018e-05, "loss": 1.6956, "step": 2541 }, { "epoch": 0.1416866395407168, "grad_norm": 0.5444316864013672, "learning_rate": 9.606732980207184e-05, "loss": 1.6842, "step": 2542 }, { "epoch": 0.14174237779387994, "grad_norm": 0.5291377305984497, "learning_rate": 9.606386761973641e-05, "loss": 1.778, "step": 2543 }, { "epoch": 0.1417981160470431, "grad_norm": 0.5469574332237244, "learning_rate": 9.606040397652365e-05, "loss": 1.8492, "step": 2544 }, { "epoch": 0.14185385430020622, "grad_norm": 0.5374149084091187, "learning_rate": 9.605693887254343e-05, "loss": 1.8428, "step": 2545 }, { "epoch": 0.14190959255336938, "grad_norm": 0.5556001663208008, "learning_rate": 9.605347230790565e-05, "loss": 1.786, "step": 2546 }, { "epoch": 0.14196533080653254, "grad_norm": 0.5268534421920776, "learning_rate": 9.605000428272023e-05, "loss": 1.5936, "step": 2547 }, { "epoch": 0.14202106905969566, "grad_norm": 0.5348252058029175, "learning_rate": 9.604653479709717e-05, "loss": 1.8033, "step": 2548 }, { "epoch": 0.14207680731285882, "grad_norm": 0.47919270396232605, "learning_rate": 9.60430638511465e-05, "loss": 1.5892, "step": 2549 }, { "epoch": 0.14213254556602195, "grad_norm": 0.5066027045249939, "learning_rate": 9.603959144497827e-05, "loss": 1.6489, "step": 2550 }, { "epoch": 0.1421882838191851, "grad_norm": 0.512729823589325, "learning_rate": 9.603611757870266e-05, "loss": 1.4806, "step": 2551 }, { "epoch": 0.14224402207234826, "grad_norm": 0.5020458102226257, "learning_rate": 9.603264225242978e-05, "loss": 1.7944, "step": 2552 }, { "epoch": 0.1422997603255114, "grad_norm": 0.5788121819496155, "learning_rate": 9.60291654662699e-05, "loss": 1.828, "step": 2553 }, { "epoch": 0.14235549857867455, "grad_norm": 0.5426775217056274, "learning_rate": 9.602568722033326e-05, "loss": 1.8621, "step": 2554 }, { "epoch": 0.14241123683183768, "grad_norm": 0.5158776044845581, "learning_rate": 9.602220751473015e-05, "loss": 1.8829, "step": 2555 }, { "epoch": 0.14246697508500084, "grad_norm": 0.48226305842399597, "learning_rate": 9.601872634957096e-05, "loss": 1.6547, "step": 2556 }, { "epoch": 0.142522713338164, "grad_norm": 0.5081673860549927, "learning_rate": 9.601524372496608e-05, "loss": 1.6629, "step": 2557 }, { "epoch": 0.14257845159132712, "grad_norm": 0.5080944299697876, "learning_rate": 9.601175964102596e-05, "loss": 1.8285, "step": 2558 }, { "epoch": 0.14263418984449028, "grad_norm": 0.5221143364906311, "learning_rate": 9.600827409786107e-05, "loss": 1.9544, "step": 2559 }, { "epoch": 0.1426899280976534, "grad_norm": 0.5045720338821411, "learning_rate": 9.600478709558199e-05, "loss": 1.5243, "step": 2560 }, { "epoch": 0.14274566635081656, "grad_norm": 0.5300230383872986, "learning_rate": 9.600129863429929e-05, "loss": 1.6888, "step": 2561 }, { "epoch": 0.14280140460397972, "grad_norm": 0.5262769460678101, "learning_rate": 9.599780871412359e-05, "loss": 1.8205, "step": 2562 }, { "epoch": 0.14285714285714285, "grad_norm": 0.5437910556793213, "learning_rate": 9.59943173351656e-05, "loss": 1.69, "step": 2563 }, { "epoch": 0.142912881110306, "grad_norm": 0.5781261324882507, "learning_rate": 9.599082449753602e-05, "loss": 1.918, "step": 2564 }, { "epoch": 0.14296861936346916, "grad_norm": 0.5519402623176575, "learning_rate": 9.598733020134562e-05, "loss": 1.7039, "step": 2565 }, { "epoch": 0.1430243576166323, "grad_norm": 0.5874602198600769, "learning_rate": 9.598383444670526e-05, "loss": 1.6948, "step": 2566 }, { "epoch": 0.14308009586979545, "grad_norm": 0.5131939649581909, "learning_rate": 9.598033723372575e-05, "loss": 1.6666, "step": 2567 }, { "epoch": 0.14313583412295858, "grad_norm": 0.6441419124603271, "learning_rate": 9.597683856251804e-05, "loss": 1.9023, "step": 2568 }, { "epoch": 0.14319157237612173, "grad_norm": 0.48139771819114685, "learning_rate": 9.597333843319309e-05, "loss": 1.6297, "step": 2569 }, { "epoch": 0.1432473106292849, "grad_norm": 0.4975999891757965, "learning_rate": 9.596983684586186e-05, "loss": 1.6558, "step": 2570 }, { "epoch": 0.14330304888244802, "grad_norm": 0.5479779839515686, "learning_rate": 9.596633380063544e-05, "loss": 1.78, "step": 2571 }, { "epoch": 0.14335878713561118, "grad_norm": 0.5358686447143555, "learning_rate": 9.596282929762492e-05, "loss": 1.848, "step": 2572 }, { "epoch": 0.1434145253887743, "grad_norm": 0.5355905890464783, "learning_rate": 9.595932333694142e-05, "loss": 1.847, "step": 2573 }, { "epoch": 0.14347026364193746, "grad_norm": 0.5640880465507507, "learning_rate": 9.595581591869616e-05, "loss": 1.713, "step": 2574 }, { "epoch": 0.14352600189510062, "grad_norm": 0.5763548016548157, "learning_rate": 9.595230704300035e-05, "loss": 1.9647, "step": 2575 }, { "epoch": 0.14358174014826375, "grad_norm": 0.5426276922225952, "learning_rate": 9.594879670996528e-05, "loss": 1.7378, "step": 2576 }, { "epoch": 0.1436374784014269, "grad_norm": 0.5128087997436523, "learning_rate": 9.594528491970228e-05, "loss": 1.7663, "step": 2577 }, { "epoch": 0.14369321665459003, "grad_norm": 0.5331497192382812, "learning_rate": 9.594177167232273e-05, "loss": 1.6068, "step": 2578 }, { "epoch": 0.1437489549077532, "grad_norm": 0.5513312220573425, "learning_rate": 9.593825696793803e-05, "loss": 1.6527, "step": 2579 }, { "epoch": 0.14380469316091635, "grad_norm": 0.5069592595100403, "learning_rate": 9.593474080665968e-05, "loss": 1.5839, "step": 2580 }, { "epoch": 0.14386043141407948, "grad_norm": 0.5478212237358093, "learning_rate": 9.593122318859915e-05, "loss": 1.8217, "step": 2581 }, { "epoch": 0.14391616966724263, "grad_norm": 0.5398098230361938, "learning_rate": 9.592770411386802e-05, "loss": 1.8395, "step": 2582 }, { "epoch": 0.14397190792040576, "grad_norm": 0.535152792930603, "learning_rate": 9.592418358257789e-05, "loss": 1.8477, "step": 2583 }, { "epoch": 0.14402764617356892, "grad_norm": 0.5321324467658997, "learning_rate": 9.592066159484043e-05, "loss": 1.6152, "step": 2584 }, { "epoch": 0.14408338442673208, "grad_norm": 0.525637686252594, "learning_rate": 9.59171381507673e-05, "loss": 1.8558, "step": 2585 }, { "epoch": 0.1441391226798952, "grad_norm": 0.5971347689628601, "learning_rate": 9.591361325047028e-05, "loss": 1.8752, "step": 2586 }, { "epoch": 0.14419486093305836, "grad_norm": 0.5029361844062805, "learning_rate": 9.591008689406114e-05, "loss": 1.6977, "step": 2587 }, { "epoch": 0.14425059918622152, "grad_norm": 0.5642208456993103, "learning_rate": 9.59065590816517e-05, "loss": 1.8379, "step": 2588 }, { "epoch": 0.14430633743938465, "grad_norm": 0.5269021391868591, "learning_rate": 9.590302981335387e-05, "loss": 1.98, "step": 2589 }, { "epoch": 0.1443620756925478, "grad_norm": 0.5572815537452698, "learning_rate": 9.589949908927957e-05, "loss": 1.7123, "step": 2590 }, { "epoch": 0.14441781394571093, "grad_norm": 0.5520729422569275, "learning_rate": 9.589596690954077e-05, "loss": 1.8578, "step": 2591 }, { "epoch": 0.1444735521988741, "grad_norm": 0.5181688070297241, "learning_rate": 9.589243327424951e-05, "loss": 1.7641, "step": 2592 }, { "epoch": 0.14452929045203725, "grad_norm": 0.5066071152687073, "learning_rate": 9.588889818351781e-05, "loss": 1.6991, "step": 2593 }, { "epoch": 0.14458502870520037, "grad_norm": 0.5530059933662415, "learning_rate": 9.588536163745782e-05, "loss": 1.7019, "step": 2594 }, { "epoch": 0.14464076695836353, "grad_norm": 0.5519603490829468, "learning_rate": 9.58818236361817e-05, "loss": 1.6645, "step": 2595 }, { "epoch": 0.14469650521152666, "grad_norm": 0.6039948463439941, "learning_rate": 9.587828417980163e-05, "loss": 2.0606, "step": 2596 }, { "epoch": 0.14475224346468982, "grad_norm": 0.5822129845619202, "learning_rate": 9.587474326842987e-05, "loss": 1.8879, "step": 2597 }, { "epoch": 0.14480798171785297, "grad_norm": 0.5391368865966797, "learning_rate": 9.587120090217874e-05, "loss": 1.6668, "step": 2598 }, { "epoch": 0.1448637199710161, "grad_norm": 0.505940854549408, "learning_rate": 9.586765708116056e-05, "loss": 1.6322, "step": 2599 }, { "epoch": 0.14491945822417926, "grad_norm": 0.5613484978675842, "learning_rate": 9.586411180548771e-05, "loss": 1.7002, "step": 2600 }, { "epoch": 0.1449751964773424, "grad_norm": 0.5343160629272461, "learning_rate": 9.586056507527266e-05, "loss": 1.8232, "step": 2601 }, { "epoch": 0.14503093473050555, "grad_norm": 0.5221366286277771, "learning_rate": 9.585701689062785e-05, "loss": 1.7799, "step": 2602 }, { "epoch": 0.1450866729836687, "grad_norm": 0.503301739692688, "learning_rate": 9.585346725166584e-05, "loss": 1.5724, "step": 2603 }, { "epoch": 0.14514241123683183, "grad_norm": 0.5650082230567932, "learning_rate": 9.584991615849921e-05, "loss": 1.898, "step": 2604 }, { "epoch": 0.145198149489995, "grad_norm": 0.4780997633934021, "learning_rate": 9.584636361124054e-05, "loss": 1.5643, "step": 2605 }, { "epoch": 0.14525388774315812, "grad_norm": 0.5057533979415894, "learning_rate": 9.584280961000253e-05, "loss": 1.575, "step": 2606 }, { "epoch": 0.14530962599632127, "grad_norm": 0.530737578868866, "learning_rate": 9.583925415489787e-05, "loss": 1.7932, "step": 2607 }, { "epoch": 0.14536536424948443, "grad_norm": 0.603374719619751, "learning_rate": 9.583569724603934e-05, "loss": 2.0627, "step": 2608 }, { "epoch": 0.14542110250264756, "grad_norm": 0.5549886226654053, "learning_rate": 9.583213888353972e-05, "loss": 1.7767, "step": 2609 }, { "epoch": 0.14547684075581072, "grad_norm": 0.6217805743217468, "learning_rate": 9.582857906751191e-05, "loss": 2.05, "step": 2610 }, { "epoch": 0.14553257900897387, "grad_norm": 0.5606620907783508, "learning_rate": 9.582501779806874e-05, "loss": 1.7722, "step": 2611 }, { "epoch": 0.145588317262137, "grad_norm": 0.5387722253799438, "learning_rate": 9.582145507532319e-05, "loss": 1.6958, "step": 2612 }, { "epoch": 0.14564405551530016, "grad_norm": 0.557847797870636, "learning_rate": 9.581789089938825e-05, "loss": 1.8401, "step": 2613 }, { "epoch": 0.1456997937684633, "grad_norm": 0.5201898217201233, "learning_rate": 9.581432527037693e-05, "loss": 1.7684, "step": 2614 }, { "epoch": 0.14575553202162644, "grad_norm": 0.5138794183731079, "learning_rate": 9.581075818840234e-05, "loss": 1.7435, "step": 2615 }, { "epoch": 0.1458112702747896, "grad_norm": 0.5721390247344971, "learning_rate": 9.58071896535776e-05, "loss": 1.8191, "step": 2616 }, { "epoch": 0.14586700852795273, "grad_norm": 0.5593292117118835, "learning_rate": 9.580361966601588e-05, "loss": 1.877, "step": 2617 }, { "epoch": 0.1459227467811159, "grad_norm": 0.5009481906890869, "learning_rate": 9.580004822583038e-05, "loss": 1.6282, "step": 2618 }, { "epoch": 0.14597848503427902, "grad_norm": 0.4969474673271179, "learning_rate": 9.579647533313439e-05, "loss": 1.7076, "step": 2619 }, { "epoch": 0.14603422328744217, "grad_norm": 0.5316969156265259, "learning_rate": 9.579290098804122e-05, "loss": 1.6271, "step": 2620 }, { "epoch": 0.14608996154060533, "grad_norm": 0.5574962496757507, "learning_rate": 9.578932519066422e-05, "loss": 1.8687, "step": 2621 }, { "epoch": 0.14614569979376846, "grad_norm": 0.499491423368454, "learning_rate": 9.57857479411168e-05, "loss": 1.6985, "step": 2622 }, { "epoch": 0.14620143804693161, "grad_norm": 0.654602587223053, "learning_rate": 9.57821692395124e-05, "loss": 1.7291, "step": 2623 }, { "epoch": 0.14625717630009474, "grad_norm": 0.5459001660346985, "learning_rate": 9.577858908596451e-05, "loss": 1.729, "step": 2624 }, { "epoch": 0.1463129145532579, "grad_norm": 0.5157297849655151, "learning_rate": 9.57750074805867e-05, "loss": 1.4164, "step": 2625 }, { "epoch": 0.14636865280642106, "grad_norm": 0.5205078125, "learning_rate": 9.577142442349254e-05, "loss": 1.7282, "step": 2626 }, { "epoch": 0.14642439105958419, "grad_norm": 0.563706636428833, "learning_rate": 9.576783991479565e-05, "loss": 1.8092, "step": 2627 }, { "epoch": 0.14648012931274734, "grad_norm": 0.5385141968727112, "learning_rate": 9.576425395460973e-05, "loss": 1.8241, "step": 2628 }, { "epoch": 0.14653586756591047, "grad_norm": 0.6100838780403137, "learning_rate": 9.576066654304849e-05, "loss": 1.9425, "step": 2629 }, { "epoch": 0.14659160581907363, "grad_norm": 0.5153439044952393, "learning_rate": 9.575707768022572e-05, "loss": 1.4287, "step": 2630 }, { "epoch": 0.14664734407223678, "grad_norm": 0.5562304258346558, "learning_rate": 9.575348736625523e-05, "loss": 1.9308, "step": 2631 }, { "epoch": 0.14670308232539991, "grad_norm": 0.5785409808158875, "learning_rate": 9.574989560125087e-05, "loss": 1.8831, "step": 2632 }, { "epoch": 0.14675882057856307, "grad_norm": 0.5315858721733093, "learning_rate": 9.574630238532658e-05, "loss": 1.5871, "step": 2633 }, { "epoch": 0.14681455883172623, "grad_norm": 0.5748802423477173, "learning_rate": 9.574270771859628e-05, "loss": 1.8394, "step": 2634 }, { "epoch": 0.14687029708488936, "grad_norm": 0.5130333304405212, "learning_rate": 9.5739111601174e-05, "loss": 1.8598, "step": 2635 }, { "epoch": 0.1469260353380525, "grad_norm": 0.5098990201950073, "learning_rate": 9.573551403317378e-05, "loss": 1.5862, "step": 2636 }, { "epoch": 0.14698177359121564, "grad_norm": 0.5426929593086243, "learning_rate": 9.573191501470971e-05, "loss": 1.8026, "step": 2637 }, { "epoch": 0.1470375118443788, "grad_norm": 0.5652133226394653, "learning_rate": 9.572831454589592e-05, "loss": 1.7529, "step": 2638 }, { "epoch": 0.14709325009754196, "grad_norm": 0.5370623469352722, "learning_rate": 9.572471262684662e-05, "loss": 1.7851, "step": 2639 }, { "epoch": 0.14714898835070508, "grad_norm": 0.5871500372886658, "learning_rate": 9.572110925767601e-05, "loss": 1.7617, "step": 2640 }, { "epoch": 0.14720472660386824, "grad_norm": 0.5181992053985596, "learning_rate": 9.571750443849841e-05, "loss": 1.6418, "step": 2641 }, { "epoch": 0.14726046485703137, "grad_norm": 0.5635068416595459, "learning_rate": 9.571389816942811e-05, "loss": 2.0309, "step": 2642 }, { "epoch": 0.14731620311019453, "grad_norm": 0.5830138921737671, "learning_rate": 9.571029045057948e-05, "loss": 1.8764, "step": 2643 }, { "epoch": 0.14737194136335768, "grad_norm": 0.5109788179397583, "learning_rate": 9.570668128206697e-05, "loss": 1.6183, "step": 2644 }, { "epoch": 0.1474276796165208, "grad_norm": 0.5681736469268799, "learning_rate": 9.5703070664005e-05, "loss": 1.738, "step": 2645 }, { "epoch": 0.14748341786968397, "grad_norm": 0.5385489463806152, "learning_rate": 9.56994585965081e-05, "loss": 1.7379, "step": 2646 }, { "epoch": 0.1475391561228471, "grad_norm": 0.5935365557670593, "learning_rate": 9.569584507969082e-05, "loss": 1.6596, "step": 2647 }, { "epoch": 0.14759489437601025, "grad_norm": 0.5758340358734131, "learning_rate": 9.569223011366776e-05, "loss": 1.7998, "step": 2648 }, { "epoch": 0.1476506326291734, "grad_norm": 0.5150250196456909, "learning_rate": 9.568861369855357e-05, "loss": 1.5843, "step": 2649 }, { "epoch": 0.14770637088233654, "grad_norm": 0.549801230430603, "learning_rate": 9.568499583446293e-05, "loss": 1.6966, "step": 2650 }, { "epoch": 0.1477621091354997, "grad_norm": 0.5092233419418335, "learning_rate": 9.568137652151059e-05, "loss": 1.7318, "step": 2651 }, { "epoch": 0.14781784738866283, "grad_norm": 0.5549139976501465, "learning_rate": 9.567775575981133e-05, "loss": 1.8252, "step": 2652 }, { "epoch": 0.14787358564182598, "grad_norm": 0.5805264115333557, "learning_rate": 9.567413354947997e-05, "loss": 1.8455, "step": 2653 }, { "epoch": 0.14792932389498914, "grad_norm": 0.5241934657096863, "learning_rate": 9.56705098906314e-05, "loss": 1.8003, "step": 2654 }, { "epoch": 0.14798506214815227, "grad_norm": 0.5738681554794312, "learning_rate": 9.566688478338053e-05, "loss": 1.765, "step": 2655 }, { "epoch": 0.14804080040131543, "grad_norm": 0.5123993158340454, "learning_rate": 9.566325822784232e-05, "loss": 1.686, "step": 2656 }, { "epoch": 0.14809653865447858, "grad_norm": 0.5327409505844116, "learning_rate": 9.56596302241318e-05, "loss": 1.9386, "step": 2657 }, { "epoch": 0.1481522769076417, "grad_norm": 0.4922872483730316, "learning_rate": 9.565600077236403e-05, "loss": 1.6464, "step": 2658 }, { "epoch": 0.14820801516080487, "grad_norm": 0.5839138031005859, "learning_rate": 9.565236987265411e-05, "loss": 2.0237, "step": 2659 }, { "epoch": 0.148263753413968, "grad_norm": 0.5407429933547974, "learning_rate": 9.564873752511718e-05, "loss": 1.9181, "step": 2660 }, { "epoch": 0.14831949166713115, "grad_norm": 0.5354205369949341, "learning_rate": 9.564510372986845e-05, "loss": 1.9004, "step": 2661 }, { "epoch": 0.1483752299202943, "grad_norm": 0.517620325088501, "learning_rate": 9.564146848702316e-05, "loss": 1.4634, "step": 2662 }, { "epoch": 0.14843096817345744, "grad_norm": 0.513761579990387, "learning_rate": 9.56378317966966e-05, "loss": 1.7994, "step": 2663 }, { "epoch": 0.1484867064266206, "grad_norm": 0.520189642906189, "learning_rate": 9.56341936590041e-05, "loss": 1.493, "step": 2664 }, { "epoch": 0.14854244467978373, "grad_norm": 0.5256882905960083, "learning_rate": 9.563055407406104e-05, "loss": 1.747, "step": 2665 }, { "epoch": 0.14859818293294688, "grad_norm": 0.5171797871589661, "learning_rate": 9.562691304198286e-05, "loss": 1.7043, "step": 2666 }, { "epoch": 0.14865392118611004, "grad_norm": 0.5845912098884583, "learning_rate": 9.5623270562885e-05, "loss": 1.8348, "step": 2667 }, { "epoch": 0.14870965943927317, "grad_norm": 0.5168249011039734, "learning_rate": 9.561962663688302e-05, "loss": 1.5255, "step": 2668 }, { "epoch": 0.14876539769243632, "grad_norm": 0.5021228790283203, "learning_rate": 9.561598126409245e-05, "loss": 1.5113, "step": 2669 }, { "epoch": 0.14882113594559945, "grad_norm": 0.5029981732368469, "learning_rate": 9.561233444462894e-05, "loss": 1.5927, "step": 2670 }, { "epoch": 0.1488768741987626, "grad_norm": 0.5585193634033203, "learning_rate": 9.56086861786081e-05, "loss": 1.9007, "step": 2671 }, { "epoch": 0.14893261245192577, "grad_norm": 0.4993244409561157, "learning_rate": 9.560503646614564e-05, "loss": 1.5592, "step": 2672 }, { "epoch": 0.1489883507050889, "grad_norm": 0.4925285875797272, "learning_rate": 9.560138530735734e-05, "loss": 1.5822, "step": 2673 }, { "epoch": 0.14904408895825205, "grad_norm": 0.5714946985244751, "learning_rate": 9.559773270235896e-05, "loss": 1.703, "step": 2674 }, { "epoch": 0.14909982721141518, "grad_norm": 0.5588274598121643, "learning_rate": 9.559407865126636e-05, "loss": 1.7473, "step": 2675 }, { "epoch": 0.14915556546457834, "grad_norm": 0.5327757000923157, "learning_rate": 9.559042315419542e-05, "loss": 1.6382, "step": 2676 }, { "epoch": 0.1492113037177415, "grad_norm": 0.5377374887466431, "learning_rate": 9.558676621126206e-05, "loss": 1.7602, "step": 2677 }, { "epoch": 0.14926704197090462, "grad_norm": 0.5468077659606934, "learning_rate": 9.558310782258227e-05, "loss": 1.7686, "step": 2678 }, { "epoch": 0.14932278022406778, "grad_norm": 0.5344017744064331, "learning_rate": 9.557944798827205e-05, "loss": 1.6661, "step": 2679 }, { "epoch": 0.14937851847723094, "grad_norm": 0.5011274218559265, "learning_rate": 9.557578670844751e-05, "loss": 1.6757, "step": 2680 }, { "epoch": 0.14943425673039407, "grad_norm": 0.5330647826194763, "learning_rate": 9.557212398322473e-05, "loss": 1.8146, "step": 2681 }, { "epoch": 0.14948999498355722, "grad_norm": 0.5211254954338074, "learning_rate": 9.556845981271989e-05, "loss": 1.7437, "step": 2682 }, { "epoch": 0.14954573323672035, "grad_norm": 0.603344738483429, "learning_rate": 9.556479419704918e-05, "loss": 2.0424, "step": 2683 }, { "epoch": 0.1496014714898835, "grad_norm": 0.5117289423942566, "learning_rate": 9.556112713632885e-05, "loss": 1.6523, "step": 2684 }, { "epoch": 0.14965720974304667, "grad_norm": 0.5624164938926697, "learning_rate": 9.555745863067522e-05, "loss": 1.8348, "step": 2685 }, { "epoch": 0.1497129479962098, "grad_norm": 0.4994141459465027, "learning_rate": 9.555378868020461e-05, "loss": 1.6003, "step": 2686 }, { "epoch": 0.14976868624937295, "grad_norm": 0.5267731547355652, "learning_rate": 9.555011728503343e-05, "loss": 1.6412, "step": 2687 }, { "epoch": 0.14982442450253608, "grad_norm": 0.4905613958835602, "learning_rate": 9.554644444527812e-05, "loss": 1.6397, "step": 2688 }, { "epoch": 0.14988016275569924, "grad_norm": 0.5710086226463318, "learning_rate": 9.554277016105512e-05, "loss": 2.0408, "step": 2689 }, { "epoch": 0.1499359010088624, "grad_norm": 0.5375673770904541, "learning_rate": 9.5539094432481e-05, "loss": 1.7599, "step": 2690 }, { "epoch": 0.14999163926202552, "grad_norm": 0.5491001009941101, "learning_rate": 9.55354172596723e-05, "loss": 1.6704, "step": 2691 }, { "epoch": 0.15004737751518868, "grad_norm": 0.5431581139564514, "learning_rate": 9.553173864274567e-05, "loss": 1.7792, "step": 2692 }, { "epoch": 0.1501031157683518, "grad_norm": 0.5338147282600403, "learning_rate": 9.552805858181775e-05, "loss": 1.7461, "step": 2693 }, { "epoch": 0.15015885402151496, "grad_norm": 0.5207554697990417, "learning_rate": 9.552437707700526e-05, "loss": 1.7735, "step": 2694 }, { "epoch": 0.15021459227467812, "grad_norm": 0.515975296497345, "learning_rate": 9.552069412842495e-05, "loss": 1.6318, "step": 2695 }, { "epoch": 0.15027033052784125, "grad_norm": 0.5207625031471252, "learning_rate": 9.551700973619364e-05, "loss": 1.665, "step": 2696 }, { "epoch": 0.1503260687810044, "grad_norm": 0.5158435702323914, "learning_rate": 9.551332390042816e-05, "loss": 1.743, "step": 2697 }, { "epoch": 0.15038180703416754, "grad_norm": 0.5647339224815369, "learning_rate": 9.55096366212454e-05, "loss": 1.9245, "step": 2698 }, { "epoch": 0.1504375452873307, "grad_norm": 0.545265793800354, "learning_rate": 9.55059478987623e-05, "loss": 1.5553, "step": 2699 }, { "epoch": 0.15049328354049385, "grad_norm": 0.5328176617622375, "learning_rate": 9.550225773309586e-05, "loss": 1.4489, "step": 2700 }, { "epoch": 0.15054902179365698, "grad_norm": 0.5154641270637512, "learning_rate": 9.54985661243631e-05, "loss": 1.9052, "step": 2701 }, { "epoch": 0.15060476004682014, "grad_norm": 0.5019435286521912, "learning_rate": 9.54948730726811e-05, "loss": 1.5049, "step": 2702 }, { "epoch": 0.1506604982999833, "grad_norm": 0.557501494884491, "learning_rate": 9.549117857816697e-05, "loss": 1.8818, "step": 2703 }, { "epoch": 0.15071623655314642, "grad_norm": 0.5352375507354736, "learning_rate": 9.548748264093789e-05, "loss": 1.6683, "step": 2704 }, { "epoch": 0.15077197480630958, "grad_norm": 0.5106709599494934, "learning_rate": 9.548378526111108e-05, "loss": 1.6966, "step": 2705 }, { "epoch": 0.1508277130594727, "grad_norm": 0.5565862655639648, "learning_rate": 9.54800864388038e-05, "loss": 1.8303, "step": 2706 }, { "epoch": 0.15088345131263586, "grad_norm": 0.5492972135543823, "learning_rate": 9.547638617413333e-05, "loss": 1.8624, "step": 2707 }, { "epoch": 0.15093918956579902, "grad_norm": 0.50017249584198, "learning_rate": 9.547268446721702e-05, "loss": 1.5654, "step": 2708 }, { "epoch": 0.15099492781896215, "grad_norm": 0.48998236656188965, "learning_rate": 9.54689813181723e-05, "loss": 1.6074, "step": 2709 }, { "epoch": 0.1510506660721253, "grad_norm": 0.5397832989692688, "learning_rate": 9.54652767271166e-05, "loss": 1.8095, "step": 2710 }, { "epoch": 0.15110640432528843, "grad_norm": 0.5553854703903198, "learning_rate": 9.54615706941674e-05, "loss": 1.8065, "step": 2711 }, { "epoch": 0.1511621425784516, "grad_norm": 0.5286390781402588, "learning_rate": 9.545786321944223e-05, "loss": 1.5857, "step": 2712 }, { "epoch": 0.15121788083161475, "grad_norm": 0.4900679588317871, "learning_rate": 9.545415430305869e-05, "loss": 1.5847, "step": 2713 }, { "epoch": 0.15127361908477788, "grad_norm": 0.5456913113594055, "learning_rate": 9.545044394513439e-05, "loss": 1.7911, "step": 2714 }, { "epoch": 0.15132935733794103, "grad_norm": 0.5544347763061523, "learning_rate": 9.544673214578698e-05, "loss": 1.7341, "step": 2715 }, { "epoch": 0.15138509559110416, "grad_norm": 0.5260149836540222, "learning_rate": 9.544301890513423e-05, "loss": 1.6531, "step": 2716 }, { "epoch": 0.15144083384426732, "grad_norm": 0.5473960638046265, "learning_rate": 9.543930422329386e-05, "loss": 1.7704, "step": 2717 }, { "epoch": 0.15149657209743048, "grad_norm": 0.5335630178451538, "learning_rate": 9.543558810038368e-05, "loss": 1.6427, "step": 2718 }, { "epoch": 0.1515523103505936, "grad_norm": 0.558547854423523, "learning_rate": 9.543187053652156e-05, "loss": 1.9572, "step": 2719 }, { "epoch": 0.15160804860375676, "grad_norm": 0.5423372983932495, "learning_rate": 9.54281515318254e-05, "loss": 1.6761, "step": 2720 }, { "epoch": 0.1516637868569199, "grad_norm": 0.5132402181625366, "learning_rate": 9.542443108641312e-05, "loss": 1.8216, "step": 2721 }, { "epoch": 0.15171952511008305, "grad_norm": 0.491897314786911, "learning_rate": 9.542070920040274e-05, "loss": 1.5411, "step": 2722 }, { "epoch": 0.1517752633632462, "grad_norm": 0.5645871758460999, "learning_rate": 9.541698587391229e-05, "loss": 1.848, "step": 2723 }, { "epoch": 0.15183100161640933, "grad_norm": 0.5238233208656311, "learning_rate": 9.541326110705983e-05, "loss": 1.7717, "step": 2724 }, { "epoch": 0.1518867398695725, "grad_norm": 0.5333484411239624, "learning_rate": 9.540953489996354e-05, "loss": 1.6865, "step": 2725 }, { "epoch": 0.15194247812273565, "grad_norm": 0.5394174456596375, "learning_rate": 9.540580725274153e-05, "loss": 1.7526, "step": 2726 }, { "epoch": 0.15199821637589878, "grad_norm": 0.5119402408599854, "learning_rate": 9.540207816551206e-05, "loss": 1.7543, "step": 2727 }, { "epoch": 0.15205395462906193, "grad_norm": 0.4968518912792206, "learning_rate": 9.539834763839337e-05, "loss": 1.4261, "step": 2728 }, { "epoch": 0.15210969288222506, "grad_norm": 0.5909052491188049, "learning_rate": 9.539461567150378e-05, "loss": 1.9545, "step": 2729 }, { "epoch": 0.15216543113538822, "grad_norm": 0.5353077054023743, "learning_rate": 9.539088226496167e-05, "loss": 1.7021, "step": 2730 }, { "epoch": 0.15222116938855138, "grad_norm": 0.526706874370575, "learning_rate": 9.538714741888541e-05, "loss": 1.7132, "step": 2731 }, { "epoch": 0.1522769076417145, "grad_norm": 0.5296183228492737, "learning_rate": 9.538341113339346e-05, "loss": 1.6896, "step": 2732 }, { "epoch": 0.15233264589487766, "grad_norm": 0.5836046934127808, "learning_rate": 9.537967340860432e-05, "loss": 1.7815, "step": 2733 }, { "epoch": 0.1523883841480408, "grad_norm": 0.5508841872215271, "learning_rate": 9.537593424463651e-05, "loss": 1.8918, "step": 2734 }, { "epoch": 0.15244412240120395, "grad_norm": 0.522796630859375, "learning_rate": 9.537219364160863e-05, "loss": 1.7225, "step": 2735 }, { "epoch": 0.1524998606543671, "grad_norm": 0.48475125432014465, "learning_rate": 9.536845159963932e-05, "loss": 1.5232, "step": 2736 }, { "epoch": 0.15255559890753023, "grad_norm": 0.5141192674636841, "learning_rate": 9.536470811884723e-05, "loss": 1.8193, "step": 2737 }, { "epoch": 0.1526113371606934, "grad_norm": 0.5721970796585083, "learning_rate": 9.536096319935108e-05, "loss": 1.9167, "step": 2738 }, { "epoch": 0.15266707541385652, "grad_norm": 0.53280109167099, "learning_rate": 9.535721684126967e-05, "loss": 1.8613, "step": 2739 }, { "epoch": 0.15272281366701967, "grad_norm": 0.5099390745162964, "learning_rate": 9.535346904472177e-05, "loss": 1.6646, "step": 2740 }, { "epoch": 0.15277855192018283, "grad_norm": 0.8719338774681091, "learning_rate": 9.53497198098263e-05, "loss": 1.7495, "step": 2741 }, { "epoch": 0.15283429017334596, "grad_norm": 0.6453019380569458, "learning_rate": 9.53459691367021e-05, "loss": 1.9952, "step": 2742 }, { "epoch": 0.15289002842650912, "grad_norm": 0.5782769322395325, "learning_rate": 9.534221702546814e-05, "loss": 1.9164, "step": 2743 }, { "epoch": 0.15294576667967225, "grad_norm": 0.4970633387565613, "learning_rate": 9.533846347624343e-05, "loss": 1.7106, "step": 2744 }, { "epoch": 0.1530015049328354, "grad_norm": 0.5226539373397827, "learning_rate": 9.533470848914698e-05, "loss": 1.6197, "step": 2745 }, { "epoch": 0.15305724318599856, "grad_norm": 0.5139595866203308, "learning_rate": 9.533095206429792e-05, "loss": 1.7638, "step": 2746 }, { "epoch": 0.1531129814391617, "grad_norm": 0.5007668733596802, "learning_rate": 9.532719420181535e-05, "loss": 1.5744, "step": 2747 }, { "epoch": 0.15316871969232485, "grad_norm": 0.5414915084838867, "learning_rate": 9.532343490181845e-05, "loss": 1.748, "step": 2748 }, { "epoch": 0.153224457945488, "grad_norm": 0.6250778436660767, "learning_rate": 9.531967416442646e-05, "loss": 1.8845, "step": 2749 }, { "epoch": 0.15328019619865113, "grad_norm": 0.5204728245735168, "learning_rate": 9.531591198975863e-05, "loss": 1.7691, "step": 2750 }, { "epoch": 0.1533359344518143, "grad_norm": 0.5631746649742126, "learning_rate": 9.531214837793429e-05, "loss": 1.6964, "step": 2751 }, { "epoch": 0.15339167270497742, "grad_norm": 0.49102160334587097, "learning_rate": 9.530838332907278e-05, "loss": 1.6693, "step": 2752 }, { "epoch": 0.15344741095814057, "grad_norm": 0.5530296564102173, "learning_rate": 9.530461684329352e-05, "loss": 1.932, "step": 2753 }, { "epoch": 0.15350314921130373, "grad_norm": 0.4979936480522156, "learning_rate": 9.530084892071596e-05, "loss": 1.6084, "step": 2754 }, { "epoch": 0.15355888746446686, "grad_norm": 0.5499585270881653, "learning_rate": 9.52970795614596e-05, "loss": 1.8431, "step": 2755 }, { "epoch": 0.15361462571763002, "grad_norm": 0.5399606227874756, "learning_rate": 9.529330876564398e-05, "loss": 1.7747, "step": 2756 }, { "epoch": 0.15367036397079314, "grad_norm": 0.5473707914352417, "learning_rate": 9.528953653338867e-05, "loss": 1.7633, "step": 2757 }, { "epoch": 0.1537261022239563, "grad_norm": 0.5312392711639404, "learning_rate": 9.528576286481332e-05, "loss": 1.7155, "step": 2758 }, { "epoch": 0.15378184047711946, "grad_norm": 0.5812214016914368, "learning_rate": 9.52819877600376e-05, "loss": 1.7427, "step": 2759 }, { "epoch": 0.1538375787302826, "grad_norm": 0.5881000757217407, "learning_rate": 9.527821121918126e-05, "loss": 1.9338, "step": 2760 }, { "epoch": 0.15389331698344574, "grad_norm": 0.4990249574184418, "learning_rate": 9.527443324236403e-05, "loss": 1.6865, "step": 2761 }, { "epoch": 0.15394905523660887, "grad_norm": 0.5099406242370605, "learning_rate": 9.527065382970576e-05, "loss": 1.4843, "step": 2762 }, { "epoch": 0.15400479348977203, "grad_norm": 0.555368959903717, "learning_rate": 9.52668729813263e-05, "loss": 1.7174, "step": 2763 }, { "epoch": 0.1540605317429352, "grad_norm": 0.5384423136711121, "learning_rate": 9.526309069734553e-05, "loss": 1.8855, "step": 2764 }, { "epoch": 0.15411626999609832, "grad_norm": 0.5143032073974609, "learning_rate": 9.525930697788345e-05, "loss": 1.7095, "step": 2765 }, { "epoch": 0.15417200824926147, "grad_norm": 0.4992869794368744, "learning_rate": 9.525552182306003e-05, "loss": 1.5436, "step": 2766 }, { "epoch": 0.1542277465024246, "grad_norm": 0.5122644901275635, "learning_rate": 9.525173523299531e-05, "loss": 1.8488, "step": 2767 }, { "epoch": 0.15428348475558776, "grad_norm": 0.49027514457702637, "learning_rate": 9.524794720780938e-05, "loss": 1.6764, "step": 2768 }, { "epoch": 0.15433922300875091, "grad_norm": 0.5170779824256897, "learning_rate": 9.524415774762239e-05, "loss": 1.7393, "step": 2769 }, { "epoch": 0.15439496126191404, "grad_norm": 0.5226306319236755, "learning_rate": 9.52403668525545e-05, "loss": 1.6587, "step": 2770 }, { "epoch": 0.1544506995150772, "grad_norm": 0.5146019458770752, "learning_rate": 9.523657452272594e-05, "loss": 1.5704, "step": 2771 }, { "epoch": 0.15450643776824036, "grad_norm": 0.5141226649284363, "learning_rate": 9.5232780758257e-05, "loss": 1.6701, "step": 2772 }, { "epoch": 0.15456217602140349, "grad_norm": 0.5106475353240967, "learning_rate": 9.522898555926796e-05, "loss": 1.7997, "step": 2773 }, { "epoch": 0.15461791427456664, "grad_norm": 0.4933443069458008, "learning_rate": 9.52251889258792e-05, "loss": 1.4629, "step": 2774 }, { "epoch": 0.15467365252772977, "grad_norm": 0.547154426574707, "learning_rate": 9.522139085821113e-05, "loss": 1.7481, "step": 2775 }, { "epoch": 0.15472939078089293, "grad_norm": 0.5420608520507812, "learning_rate": 9.521759135638422e-05, "loss": 1.781, "step": 2776 }, { "epoch": 0.15478512903405608, "grad_norm": 0.5556414723396301, "learning_rate": 9.521379042051894e-05, "loss": 1.5232, "step": 2777 }, { "epoch": 0.1548408672872192, "grad_norm": 0.546357274055481, "learning_rate": 9.520998805073584e-05, "loss": 1.663, "step": 2778 }, { "epoch": 0.15489660554038237, "grad_norm": 0.5195935964584351, "learning_rate": 9.52061842471555e-05, "loss": 1.632, "step": 2779 }, { "epoch": 0.1549523437935455, "grad_norm": 0.5412857532501221, "learning_rate": 9.520237900989858e-05, "loss": 1.7983, "step": 2780 }, { "epoch": 0.15500808204670866, "grad_norm": 0.5480208992958069, "learning_rate": 9.519857233908574e-05, "loss": 2.0205, "step": 2781 }, { "epoch": 0.1550638202998718, "grad_norm": 0.5754556655883789, "learning_rate": 9.519476423483771e-05, "loss": 1.9992, "step": 2782 }, { "epoch": 0.15511955855303494, "grad_norm": 0.560160756111145, "learning_rate": 9.519095469727527e-05, "loss": 1.8583, "step": 2783 }, { "epoch": 0.1551752968061981, "grad_norm": 0.5757945775985718, "learning_rate": 9.518714372651922e-05, "loss": 1.9257, "step": 2784 }, { "epoch": 0.15523103505936123, "grad_norm": 0.861761212348938, "learning_rate": 9.518333132269043e-05, "loss": 1.8291, "step": 2785 }, { "epoch": 0.15528677331252438, "grad_norm": 0.5081753134727478, "learning_rate": 9.517951748590983e-05, "loss": 1.5859, "step": 2786 }, { "epoch": 0.15534251156568754, "grad_norm": 0.5519318580627441, "learning_rate": 9.517570221629833e-05, "loss": 1.7556, "step": 2787 }, { "epoch": 0.15539824981885067, "grad_norm": 0.5754350423812866, "learning_rate": 9.517188551397695e-05, "loss": 1.8201, "step": 2788 }, { "epoch": 0.15545398807201383, "grad_norm": 0.5522143840789795, "learning_rate": 9.516806737906674e-05, "loss": 1.7392, "step": 2789 }, { "epoch": 0.15550972632517696, "grad_norm": 0.5845313668251038, "learning_rate": 9.516424781168877e-05, "loss": 1.7216, "step": 2790 }, { "epoch": 0.1555654645783401, "grad_norm": 0.57271808385849, "learning_rate": 9.516042681196419e-05, "loss": 1.561, "step": 2791 }, { "epoch": 0.15562120283150327, "grad_norm": 0.5778896808624268, "learning_rate": 9.515660438001417e-05, "loss": 2.061, "step": 2792 }, { "epoch": 0.1556769410846664, "grad_norm": 0.5089336633682251, "learning_rate": 9.515278051595996e-05, "loss": 1.5716, "step": 2793 }, { "epoch": 0.15573267933782955, "grad_norm": 0.5174574255943298, "learning_rate": 9.514895521992278e-05, "loss": 1.5369, "step": 2794 }, { "epoch": 0.1557884175909927, "grad_norm": 0.5474531650543213, "learning_rate": 9.5145128492024e-05, "loss": 1.9497, "step": 2795 }, { "epoch": 0.15584415584415584, "grad_norm": 0.5397194027900696, "learning_rate": 9.514130033238494e-05, "loss": 1.7145, "step": 2796 }, { "epoch": 0.155899894097319, "grad_norm": 0.5489051938056946, "learning_rate": 9.513747074112705e-05, "loss": 1.599, "step": 2797 }, { "epoch": 0.15595563235048213, "grad_norm": 0.5342767834663391, "learning_rate": 9.513363971837174e-05, "loss": 1.6787, "step": 2798 }, { "epoch": 0.15601137060364528, "grad_norm": 0.5298926830291748, "learning_rate": 9.512980726424052e-05, "loss": 1.6852, "step": 2799 }, { "epoch": 0.15606710885680844, "grad_norm": 0.5444782376289368, "learning_rate": 9.512597337885496e-05, "loss": 1.6972, "step": 2800 }, { "epoch": 0.15612284710997157, "grad_norm": 0.5541877150535583, "learning_rate": 9.51221380623366e-05, "loss": 1.6794, "step": 2801 }, { "epoch": 0.15617858536313473, "grad_norm": 0.6140812039375305, "learning_rate": 9.511830131480712e-05, "loss": 1.6826, "step": 2802 }, { "epoch": 0.15623432361629785, "grad_norm": 0.5042434930801392, "learning_rate": 9.511446313638819e-05, "loss": 1.6276, "step": 2803 }, { "epoch": 0.156290061869461, "grad_norm": 0.5544094443321228, "learning_rate": 9.51106235272015e-05, "loss": 1.7685, "step": 2804 }, { "epoch": 0.15634580012262417, "grad_norm": 0.49621298909187317, "learning_rate": 9.510678248736887e-05, "loss": 1.6194, "step": 2805 }, { "epoch": 0.1564015383757873, "grad_norm": 0.5988842248916626, "learning_rate": 9.510294001701208e-05, "loss": 1.8121, "step": 2806 }, { "epoch": 0.15645727662895045, "grad_norm": 0.5324400067329407, "learning_rate": 9.509909611625298e-05, "loss": 1.7674, "step": 2807 }, { "epoch": 0.15651301488211358, "grad_norm": 0.5413124561309814, "learning_rate": 9.509525078521353e-05, "loss": 1.5738, "step": 2808 }, { "epoch": 0.15656875313527674, "grad_norm": 0.5253452658653259, "learning_rate": 9.509140402401563e-05, "loss": 1.7126, "step": 2809 }, { "epoch": 0.1566244913884399, "grad_norm": 0.5672581791877747, "learning_rate": 9.508755583278131e-05, "loss": 1.8056, "step": 2810 }, { "epoch": 0.15668022964160302, "grad_norm": 0.49362093210220337, "learning_rate": 9.508370621163259e-05, "loss": 1.7569, "step": 2811 }, { "epoch": 0.15673596789476618, "grad_norm": 0.5672383308410645, "learning_rate": 9.507985516069154e-05, "loss": 2.0115, "step": 2812 }, { "epoch": 0.1567917061479293, "grad_norm": 0.576835036277771, "learning_rate": 9.507600268008034e-05, "loss": 2.0173, "step": 2813 }, { "epoch": 0.15684744440109247, "grad_norm": 0.5514403581619263, "learning_rate": 9.507214876992116e-05, "loss": 1.711, "step": 2814 }, { "epoch": 0.15690318265425562, "grad_norm": 0.5197775363922119, "learning_rate": 9.506829343033619e-05, "loss": 1.7613, "step": 2815 }, { "epoch": 0.15695892090741875, "grad_norm": 0.5949315428733826, "learning_rate": 9.506443666144773e-05, "loss": 1.9146, "step": 2816 }, { "epoch": 0.1570146591605819, "grad_norm": 0.5169588923454285, "learning_rate": 9.506057846337808e-05, "loss": 1.5925, "step": 2817 }, { "epoch": 0.15707039741374507, "grad_norm": 0.5083977580070496, "learning_rate": 9.505671883624959e-05, "loss": 1.7269, "step": 2818 }, { "epoch": 0.1571261356669082, "grad_norm": 0.5890203714370728, "learning_rate": 9.505285778018469e-05, "loss": 1.9239, "step": 2819 }, { "epoch": 0.15718187392007135, "grad_norm": 0.5113581418991089, "learning_rate": 9.504899529530582e-05, "loss": 1.4883, "step": 2820 }, { "epoch": 0.15723761217323448, "grad_norm": 0.5035502314567566, "learning_rate": 9.504513138173547e-05, "loss": 1.5673, "step": 2821 }, { "epoch": 0.15729335042639764, "grad_norm": 0.5176184773445129, "learning_rate": 9.504126603959618e-05, "loss": 1.492, "step": 2822 }, { "epoch": 0.1573490886795608, "grad_norm": 0.5595249533653259, "learning_rate": 9.503739926901055e-05, "loss": 1.916, "step": 2823 }, { "epoch": 0.15740482693272392, "grad_norm": 0.5306408405303955, "learning_rate": 9.50335310701012e-05, "loss": 1.8255, "step": 2824 }, { "epoch": 0.15746056518588708, "grad_norm": 0.5166139602661133, "learning_rate": 9.50296614429908e-05, "loss": 1.9614, "step": 2825 }, { "epoch": 0.1575163034390502, "grad_norm": 0.5143607258796692, "learning_rate": 9.502579038780207e-05, "loss": 1.5858, "step": 2826 }, { "epoch": 0.15757204169221337, "grad_norm": 0.5186240673065186, "learning_rate": 9.50219179046578e-05, "loss": 1.6746, "step": 2827 }, { "epoch": 0.15762777994537652, "grad_norm": 0.5193765759468079, "learning_rate": 9.50180439936808e-05, "loss": 1.5768, "step": 2828 }, { "epoch": 0.15768351819853965, "grad_norm": 0.5847373604774475, "learning_rate": 9.501416865499391e-05, "loss": 2.0199, "step": 2829 }, { "epoch": 0.1577392564517028, "grad_norm": 0.5198137760162354, "learning_rate": 9.501029188872004e-05, "loss": 1.6215, "step": 2830 }, { "epoch": 0.15779499470486594, "grad_norm": 0.5044419169425964, "learning_rate": 9.500641369498214e-05, "loss": 1.6355, "step": 2831 }, { "epoch": 0.1578507329580291, "grad_norm": 0.6085756421089172, "learning_rate": 9.50025340739032e-05, "loss": 2.107, "step": 2832 }, { "epoch": 0.15790647121119225, "grad_norm": 0.5201433300971985, "learning_rate": 9.499865302560626e-05, "loss": 1.5787, "step": 2833 }, { "epoch": 0.15796220946435538, "grad_norm": 0.5003561973571777, "learning_rate": 9.49947705502144e-05, "loss": 1.6343, "step": 2834 }, { "epoch": 0.15801794771751854, "grad_norm": 0.5781692862510681, "learning_rate": 9.499088664785077e-05, "loss": 1.8281, "step": 2835 }, { "epoch": 0.15807368597068167, "grad_norm": 0.5135318636894226, "learning_rate": 9.498700131863853e-05, "loss": 1.7294, "step": 2836 }, { "epoch": 0.15812942422384482, "grad_norm": 0.5199892520904541, "learning_rate": 9.49831145627009e-05, "loss": 1.6611, "step": 2837 }, { "epoch": 0.15818516247700798, "grad_norm": 0.49417805671691895, "learning_rate": 9.497922638016114e-05, "loss": 1.4057, "step": 2838 }, { "epoch": 0.1582409007301711, "grad_norm": 0.5626333951950073, "learning_rate": 9.497533677114257e-05, "loss": 1.7803, "step": 2839 }, { "epoch": 0.15829663898333426, "grad_norm": 0.5851137042045593, "learning_rate": 9.497144573576855e-05, "loss": 1.7828, "step": 2840 }, { "epoch": 0.15835237723649742, "grad_norm": 0.5782892107963562, "learning_rate": 9.496755327416245e-05, "loss": 1.9224, "step": 2841 }, { "epoch": 0.15840811548966055, "grad_norm": 0.519010603427887, "learning_rate": 9.496365938644775e-05, "loss": 1.6932, "step": 2842 }, { "epoch": 0.1584638537428237, "grad_norm": 0.588720440864563, "learning_rate": 9.495976407274794e-05, "loss": 1.7235, "step": 2843 }, { "epoch": 0.15851959199598684, "grad_norm": 0.530684769153595, "learning_rate": 9.495586733318654e-05, "loss": 1.7368, "step": 2844 }, { "epoch": 0.15857533024915, "grad_norm": 0.5223602652549744, "learning_rate": 9.495196916788714e-05, "loss": 1.5822, "step": 2845 }, { "epoch": 0.15863106850231315, "grad_norm": 0.5282277464866638, "learning_rate": 9.494806957697337e-05, "loss": 1.7119, "step": 2846 }, { "epoch": 0.15868680675547628, "grad_norm": 0.5861890912055969, "learning_rate": 9.49441685605689e-05, "loss": 1.7597, "step": 2847 }, { "epoch": 0.15874254500863944, "grad_norm": 0.6072325110435486, "learning_rate": 9.494026611879744e-05, "loss": 2.1445, "step": 2848 }, { "epoch": 0.15879828326180256, "grad_norm": 0.5348519086837769, "learning_rate": 9.493636225178276e-05, "loss": 1.5885, "step": 2849 }, { "epoch": 0.15885402151496572, "grad_norm": 0.5133005976676941, "learning_rate": 9.493245695964866e-05, "loss": 1.7934, "step": 2850 }, { "epoch": 0.15890975976812888, "grad_norm": 0.5469639897346497, "learning_rate": 9.492855024251901e-05, "loss": 1.7025, "step": 2851 }, { "epoch": 0.158965498021292, "grad_norm": 0.5326577425003052, "learning_rate": 9.492464210051771e-05, "loss": 1.6258, "step": 2852 }, { "epoch": 0.15902123627445516, "grad_norm": 0.6941805481910706, "learning_rate": 9.492073253376865e-05, "loss": 1.9171, "step": 2853 }, { "epoch": 0.1590769745276183, "grad_norm": 0.5997553467750549, "learning_rate": 9.491682154239589e-05, "loss": 1.9891, "step": 2854 }, { "epoch": 0.15913271278078145, "grad_norm": 0.5727251172065735, "learning_rate": 9.491290912652344e-05, "loss": 1.9522, "step": 2855 }, { "epoch": 0.1591884510339446, "grad_norm": 0.5947685837745667, "learning_rate": 9.490899528627536e-05, "loss": 2.0334, "step": 2856 }, { "epoch": 0.15924418928710773, "grad_norm": 0.5425087809562683, "learning_rate": 9.490508002177579e-05, "loss": 1.8532, "step": 2857 }, { "epoch": 0.1592999275402709, "grad_norm": 0.5523599982261658, "learning_rate": 9.490116333314889e-05, "loss": 1.6041, "step": 2858 }, { "epoch": 0.15935566579343402, "grad_norm": 0.5558710098266602, "learning_rate": 9.489724522051888e-05, "loss": 1.9383, "step": 2859 }, { "epoch": 0.15941140404659718, "grad_norm": 0.5611505508422852, "learning_rate": 9.489332568401004e-05, "loss": 1.8919, "step": 2860 }, { "epoch": 0.15946714229976033, "grad_norm": 0.5016571283340454, "learning_rate": 9.488940472374663e-05, "loss": 1.8347, "step": 2861 }, { "epoch": 0.15952288055292346, "grad_norm": 0.5290272831916809, "learning_rate": 9.488548233985305e-05, "loss": 1.697, "step": 2862 }, { "epoch": 0.15957861880608662, "grad_norm": 0.5488302707672119, "learning_rate": 9.488155853245366e-05, "loss": 1.9557, "step": 2863 }, { "epoch": 0.15963435705924978, "grad_norm": 0.5422006845474243, "learning_rate": 9.487763330167291e-05, "loss": 1.6364, "step": 2864 }, { "epoch": 0.1596900953124129, "grad_norm": 0.5467256307601929, "learning_rate": 9.487370664763529e-05, "loss": 1.7917, "step": 2865 }, { "epoch": 0.15974583356557606, "grad_norm": 0.538063108921051, "learning_rate": 9.486977857046532e-05, "loss": 1.8552, "step": 2866 }, { "epoch": 0.1598015718187392, "grad_norm": 0.5502356886863708, "learning_rate": 9.486584907028758e-05, "loss": 1.6089, "step": 2867 }, { "epoch": 0.15985731007190235, "grad_norm": 0.526684582233429, "learning_rate": 9.48619181472267e-05, "loss": 1.5357, "step": 2868 }, { "epoch": 0.1599130483250655, "grad_norm": 0.5427432656288147, "learning_rate": 9.485798580140735e-05, "loss": 1.7628, "step": 2869 }, { "epoch": 0.15996878657822863, "grad_norm": 0.5465673208236694, "learning_rate": 9.485405203295421e-05, "loss": 1.6318, "step": 2870 }, { "epoch": 0.1600245248313918, "grad_norm": 0.5261492729187012, "learning_rate": 9.485011684199207e-05, "loss": 1.6422, "step": 2871 }, { "epoch": 0.16008026308455492, "grad_norm": 0.571042001247406, "learning_rate": 9.484618022864571e-05, "loss": 1.5466, "step": 2872 }, { "epoch": 0.16013600133771808, "grad_norm": 0.5928837656974792, "learning_rate": 9.484224219304e-05, "loss": 2.0925, "step": 2873 }, { "epoch": 0.16019173959088123, "grad_norm": 0.4875600337982178, "learning_rate": 9.48383027352998e-05, "loss": 1.6183, "step": 2874 }, { "epoch": 0.16024747784404436, "grad_norm": 0.5074633955955505, "learning_rate": 9.483436185555007e-05, "loss": 1.5593, "step": 2875 }, { "epoch": 0.16030321609720752, "grad_norm": 0.553817093372345, "learning_rate": 9.483041955391578e-05, "loss": 1.7093, "step": 2876 }, { "epoch": 0.16035895435037065, "grad_norm": 0.5676888823509216, "learning_rate": 9.482647583052196e-05, "loss": 1.7555, "step": 2877 }, { "epoch": 0.1604146926035338, "grad_norm": 0.5311009883880615, "learning_rate": 9.48225306854937e-05, "loss": 1.7709, "step": 2878 }, { "epoch": 0.16047043085669696, "grad_norm": 0.5391182899475098, "learning_rate": 9.481858411895608e-05, "loss": 1.7296, "step": 2879 }, { "epoch": 0.1605261691098601, "grad_norm": 0.5432226657867432, "learning_rate": 9.481463613103429e-05, "loss": 1.7808, "step": 2880 }, { "epoch": 0.16058190736302325, "grad_norm": 0.5264506936073303, "learning_rate": 9.481068672185353e-05, "loss": 1.6362, "step": 2881 }, { "epoch": 0.16063764561618638, "grad_norm": 0.5308744311332703, "learning_rate": 9.480673589153904e-05, "loss": 1.5913, "step": 2882 }, { "epoch": 0.16069338386934953, "grad_norm": 0.4966695308685303, "learning_rate": 9.480278364021614e-05, "loss": 1.6744, "step": 2883 }, { "epoch": 0.1607491221225127, "grad_norm": 0.5250310301780701, "learning_rate": 9.479882996801017e-05, "loss": 1.5185, "step": 2884 }, { "epoch": 0.16080486037567582, "grad_norm": 0.5288892388343811, "learning_rate": 9.479487487504649e-05, "loss": 1.5259, "step": 2885 }, { "epoch": 0.16086059862883897, "grad_norm": 0.5666532516479492, "learning_rate": 9.479091836145057e-05, "loss": 1.7626, "step": 2886 }, { "epoch": 0.16091633688200213, "grad_norm": 0.5458130836486816, "learning_rate": 9.478696042734785e-05, "loss": 1.6936, "step": 2887 }, { "epoch": 0.16097207513516526, "grad_norm": 0.5105459690093994, "learning_rate": 9.478300107286389e-05, "loss": 1.4811, "step": 2888 }, { "epoch": 0.16102781338832842, "grad_norm": 0.5251494646072388, "learning_rate": 9.477904029812422e-05, "loss": 1.7184, "step": 2889 }, { "epoch": 0.16108355164149155, "grad_norm": 0.5484756231307983, "learning_rate": 9.477507810325448e-05, "loss": 1.4053, "step": 2890 }, { "epoch": 0.1611392898946547, "grad_norm": 0.5894975066184998, "learning_rate": 9.477111448838031e-05, "loss": 2.0827, "step": 2891 }, { "epoch": 0.16119502814781786, "grad_norm": 0.5738565921783447, "learning_rate": 9.476714945362745e-05, "loss": 1.8864, "step": 2892 }, { "epoch": 0.161250766400981, "grad_norm": 0.6212289333343506, "learning_rate": 9.47631829991216e-05, "loss": 1.9475, "step": 2893 }, { "epoch": 0.16130650465414414, "grad_norm": 0.6506125330924988, "learning_rate": 9.475921512498857e-05, "loss": 1.9044, "step": 2894 }, { "epoch": 0.16136224290730727, "grad_norm": 0.5559994578361511, "learning_rate": 9.475524583135421e-05, "loss": 1.5211, "step": 2895 }, { "epoch": 0.16141798116047043, "grad_norm": 0.5860363841056824, "learning_rate": 9.475127511834438e-05, "loss": 1.7724, "step": 2896 }, { "epoch": 0.1614737194136336, "grad_norm": 0.5559065341949463, "learning_rate": 9.474730298608504e-05, "loss": 1.8392, "step": 2897 }, { "epoch": 0.16152945766679672, "grad_norm": 0.5526688694953918, "learning_rate": 9.474332943470213e-05, "loss": 1.7909, "step": 2898 }, { "epoch": 0.16158519591995987, "grad_norm": 0.5582461357116699, "learning_rate": 9.47393544643217e-05, "loss": 1.9106, "step": 2899 }, { "epoch": 0.161640934173123, "grad_norm": 0.5841467380523682, "learning_rate": 9.473537807506977e-05, "loss": 1.922, "step": 2900 }, { "epoch": 0.16169667242628616, "grad_norm": 0.5061233043670654, "learning_rate": 9.47314002670725e-05, "loss": 1.5719, "step": 2901 }, { "epoch": 0.16175241067944932, "grad_norm": 0.4959016442298889, "learning_rate": 9.472742104045599e-05, "loss": 1.6517, "step": 2902 }, { "epoch": 0.16180814893261244, "grad_norm": 0.5075359344482422, "learning_rate": 9.472344039534646e-05, "loss": 1.7661, "step": 2903 }, { "epoch": 0.1618638871857756, "grad_norm": 0.5135536193847656, "learning_rate": 9.471945833187018e-05, "loss": 1.6874, "step": 2904 }, { "epoch": 0.16191962543893873, "grad_norm": 0.5618202090263367, "learning_rate": 9.471547485015341e-05, "loss": 1.6745, "step": 2905 }, { "epoch": 0.1619753636921019, "grad_norm": 0.5325173139572144, "learning_rate": 9.471148995032247e-05, "loss": 1.7141, "step": 2906 }, { "epoch": 0.16203110194526504, "grad_norm": 0.521827220916748, "learning_rate": 9.470750363250378e-05, "loss": 1.595, "step": 2907 }, { "epoch": 0.16208684019842817, "grad_norm": 0.5489259362220764, "learning_rate": 9.470351589682372e-05, "loss": 1.8687, "step": 2908 }, { "epoch": 0.16214257845159133, "grad_norm": 0.5823487043380737, "learning_rate": 9.469952674340877e-05, "loss": 1.8964, "step": 2909 }, { "epoch": 0.16219831670475449, "grad_norm": 0.5378115773200989, "learning_rate": 9.469553617238546e-05, "loss": 1.6171, "step": 2910 }, { "epoch": 0.16225405495791762, "grad_norm": 0.500411331653595, "learning_rate": 9.469154418388034e-05, "loss": 1.7592, "step": 2911 }, { "epoch": 0.16230979321108077, "grad_norm": 0.49383944272994995, "learning_rate": 9.468755077801999e-05, "loss": 1.6709, "step": 2912 }, { "epoch": 0.1623655314642439, "grad_norm": 0.5428176522254944, "learning_rate": 9.468355595493109e-05, "loss": 1.7304, "step": 2913 }, { "epoch": 0.16242126971740706, "grad_norm": 0.537581205368042, "learning_rate": 9.467955971474031e-05, "loss": 1.7252, "step": 2914 }, { "epoch": 0.16247700797057021, "grad_norm": 0.5622221231460571, "learning_rate": 9.46755620575744e-05, "loss": 1.7643, "step": 2915 }, { "epoch": 0.16253274622373334, "grad_norm": 0.5474369525909424, "learning_rate": 9.467156298356015e-05, "loss": 1.7263, "step": 2916 }, { "epoch": 0.1625884844768965, "grad_norm": 0.5429725646972656, "learning_rate": 9.466756249282435e-05, "loss": 1.7771, "step": 2917 }, { "epoch": 0.16264422273005963, "grad_norm": 0.5385332107543945, "learning_rate": 9.466356058549393e-05, "loss": 1.7372, "step": 2918 }, { "epoch": 0.16269996098322279, "grad_norm": 0.5135955214500427, "learning_rate": 9.465955726169575e-05, "loss": 1.7296, "step": 2919 }, { "epoch": 0.16275569923638594, "grad_norm": 0.5584880709648132, "learning_rate": 9.46555525215568e-05, "loss": 1.7907, "step": 2920 }, { "epoch": 0.16281143748954907, "grad_norm": 0.5609123706817627, "learning_rate": 9.46515463652041e-05, "loss": 1.8558, "step": 2921 }, { "epoch": 0.16286717574271223, "grad_norm": 0.5887969732284546, "learning_rate": 9.464753879276467e-05, "loss": 1.8673, "step": 2922 }, { "epoch": 0.16292291399587536, "grad_norm": 0.5207127332687378, "learning_rate": 9.464352980436562e-05, "loss": 1.8252, "step": 2923 }, { "epoch": 0.1629786522490385, "grad_norm": 0.4879356622695923, "learning_rate": 9.463951940013411e-05, "loss": 1.564, "step": 2924 }, { "epoch": 0.16303439050220167, "grad_norm": 0.5253145098686218, "learning_rate": 9.46355075801973e-05, "loss": 1.731, "step": 2925 }, { "epoch": 0.1630901287553648, "grad_norm": 0.5216013789176941, "learning_rate": 9.463149434468244e-05, "loss": 1.7954, "step": 2926 }, { "epoch": 0.16314586700852796, "grad_norm": 0.5162796974182129, "learning_rate": 9.46274796937168e-05, "loss": 1.6639, "step": 2927 }, { "epoch": 0.16320160526169109, "grad_norm": 0.5164597630500793, "learning_rate": 9.462346362742767e-05, "loss": 1.5104, "step": 2928 }, { "epoch": 0.16325734351485424, "grad_norm": 0.5458294153213501, "learning_rate": 9.461944614594248e-05, "loss": 1.7081, "step": 2929 }, { "epoch": 0.1633130817680174, "grad_norm": 0.525484025478363, "learning_rate": 9.461542724938859e-05, "loss": 1.8709, "step": 2930 }, { "epoch": 0.16336882002118053, "grad_norm": 0.5675646662712097, "learning_rate": 9.461140693789349e-05, "loss": 1.7861, "step": 2931 }, { "epoch": 0.16342455827434368, "grad_norm": 0.5174034833908081, "learning_rate": 9.460738521158466e-05, "loss": 1.745, "step": 2932 }, { "epoch": 0.16348029652750684, "grad_norm": 0.5687560439109802, "learning_rate": 9.460336207058964e-05, "loss": 1.8071, "step": 2933 }, { "epoch": 0.16353603478066997, "grad_norm": 0.5177374482154846, "learning_rate": 9.459933751503604e-05, "loss": 1.7359, "step": 2934 }, { "epoch": 0.16359177303383313, "grad_norm": 0.5742724537849426, "learning_rate": 9.459531154505147e-05, "loss": 1.6545, "step": 2935 }, { "epoch": 0.16364751128699626, "grad_norm": 0.555439293384552, "learning_rate": 9.459128416076365e-05, "loss": 1.5666, "step": 2936 }, { "epoch": 0.1637032495401594, "grad_norm": 0.5305073857307434, "learning_rate": 9.458725536230027e-05, "loss": 1.8546, "step": 2937 }, { "epoch": 0.16375898779332257, "grad_norm": 0.517587423324585, "learning_rate": 9.458322514978912e-05, "loss": 1.6707, "step": 2938 }, { "epoch": 0.1638147260464857, "grad_norm": 0.5396296977996826, "learning_rate": 9.4579193523358e-05, "loss": 1.6807, "step": 2939 }, { "epoch": 0.16387046429964885, "grad_norm": 0.545603334903717, "learning_rate": 9.457516048313478e-05, "loss": 1.7966, "step": 2940 }, { "epoch": 0.16392620255281198, "grad_norm": 0.5535080432891846, "learning_rate": 9.457112602924735e-05, "loss": 1.8103, "step": 2941 }, { "epoch": 0.16398194080597514, "grad_norm": 0.5278719663619995, "learning_rate": 9.456709016182368e-05, "loss": 1.7992, "step": 2942 }, { "epoch": 0.1640376790591383, "grad_norm": 0.5094558000564575, "learning_rate": 9.456305288099174e-05, "loss": 1.8232, "step": 2943 }, { "epoch": 0.16409341731230143, "grad_norm": 0.5989511013031006, "learning_rate": 9.45590141868796e-05, "loss": 1.8106, "step": 2944 }, { "epoch": 0.16414915556546458, "grad_norm": 0.5221716165542603, "learning_rate": 9.455497407961532e-05, "loss": 1.6316, "step": 2945 }, { "epoch": 0.1642048938186277, "grad_norm": 0.4996791481971741, "learning_rate": 9.455093255932704e-05, "loss": 1.4846, "step": 2946 }, { "epoch": 0.16426063207179087, "grad_norm": 0.5217500329017639, "learning_rate": 9.454688962614293e-05, "loss": 1.7717, "step": 2947 }, { "epoch": 0.16431637032495403, "grad_norm": 0.5416474938392639, "learning_rate": 9.45428452801912e-05, "loss": 1.8829, "step": 2948 }, { "epoch": 0.16437210857811715, "grad_norm": 0.5558078289031982, "learning_rate": 9.453879952160013e-05, "loss": 1.8933, "step": 2949 }, { "epoch": 0.1644278468312803, "grad_norm": 0.5439289808273315, "learning_rate": 9.4534752350498e-05, "loss": 1.6009, "step": 2950 }, { "epoch": 0.16448358508444344, "grad_norm": 0.5921631455421448, "learning_rate": 9.45307037670132e-05, "loss": 1.9932, "step": 2951 }, { "epoch": 0.1645393233376066, "grad_norm": 0.5491567850112915, "learning_rate": 9.452665377127409e-05, "loss": 1.9729, "step": 2952 }, { "epoch": 0.16459506159076975, "grad_norm": 0.6129978895187378, "learning_rate": 9.452260236340915e-05, "loss": 1.8995, "step": 2953 }, { "epoch": 0.16465079984393288, "grad_norm": 0.6029583215713501, "learning_rate": 9.451854954354684e-05, "loss": 1.8313, "step": 2954 }, { "epoch": 0.16470653809709604, "grad_norm": 0.5197410583496094, "learning_rate": 9.451449531181572e-05, "loss": 1.6307, "step": 2955 }, { "epoch": 0.1647622763502592, "grad_norm": 0.5214848518371582, "learning_rate": 9.451043966834431e-05, "loss": 1.7253, "step": 2956 }, { "epoch": 0.16481801460342232, "grad_norm": 0.48953381180763245, "learning_rate": 9.450638261326128e-05, "loss": 1.5122, "step": 2957 }, { "epoch": 0.16487375285658548, "grad_norm": 0.5038783550262451, "learning_rate": 9.450232414669528e-05, "loss": 1.7602, "step": 2958 }, { "epoch": 0.1649294911097486, "grad_norm": 0.5723398327827454, "learning_rate": 9.449826426877504e-05, "loss": 1.9841, "step": 2959 }, { "epoch": 0.16498522936291177, "grad_norm": 0.5200619101524353, "learning_rate": 9.44942029796293e-05, "loss": 1.7965, "step": 2960 }, { "epoch": 0.16504096761607492, "grad_norm": 0.6376471519470215, "learning_rate": 9.449014027938685e-05, "loss": 2.1267, "step": 2961 }, { "epoch": 0.16509670586923805, "grad_norm": 0.5397600531578064, "learning_rate": 9.448607616817655e-05, "loss": 1.7952, "step": 2962 }, { "epoch": 0.1651524441224012, "grad_norm": 0.5907739996910095, "learning_rate": 9.448201064612728e-05, "loss": 1.8026, "step": 2963 }, { "epoch": 0.16520818237556434, "grad_norm": 0.5700837969779968, "learning_rate": 9.447794371336799e-05, "loss": 2.1377, "step": 2964 }, { "epoch": 0.1652639206287275, "grad_norm": 0.5404232740402222, "learning_rate": 9.447387537002765e-05, "loss": 1.9586, "step": 2965 }, { "epoch": 0.16531965888189065, "grad_norm": 0.5181935429573059, "learning_rate": 9.446980561623527e-05, "loss": 1.4828, "step": 2966 }, { "epoch": 0.16537539713505378, "grad_norm": 0.6044127941131592, "learning_rate": 9.446573445211994e-05, "loss": 1.789, "step": 2967 }, { "epoch": 0.16543113538821694, "grad_norm": 0.5353678464889526, "learning_rate": 9.446166187781077e-05, "loss": 1.709, "step": 2968 }, { "epoch": 0.16548687364138007, "grad_norm": 0.5155282020568848, "learning_rate": 9.445758789343691e-05, "loss": 1.6335, "step": 2969 }, { "epoch": 0.16554261189454322, "grad_norm": 0.5247118473052979, "learning_rate": 9.445351249912757e-05, "loss": 1.6666, "step": 2970 }, { "epoch": 0.16559835014770638, "grad_norm": 0.5768206119537354, "learning_rate": 9.4449435695012e-05, "loss": 1.9109, "step": 2971 }, { "epoch": 0.1656540884008695, "grad_norm": 0.5591040849685669, "learning_rate": 9.444535748121949e-05, "loss": 1.781, "step": 2972 }, { "epoch": 0.16570982665403267, "grad_norm": 0.5098216533660889, "learning_rate": 9.444127785787938e-05, "loss": 1.7213, "step": 2973 }, { "epoch": 0.1657655649071958, "grad_norm": 0.5072734355926514, "learning_rate": 9.443719682512102e-05, "loss": 1.8224, "step": 2974 }, { "epoch": 0.16582130316035895, "grad_norm": 0.5172891020774841, "learning_rate": 9.443311438307389e-05, "loss": 1.8449, "step": 2975 }, { "epoch": 0.1658770414135221, "grad_norm": 0.557597815990448, "learning_rate": 9.442903053186743e-05, "loss": 1.6679, "step": 2976 }, { "epoch": 0.16593277966668524, "grad_norm": 0.518157422542572, "learning_rate": 9.442494527163115e-05, "loss": 1.6812, "step": 2977 }, { "epoch": 0.1659885179198484, "grad_norm": 0.5476084351539612, "learning_rate": 9.442085860249461e-05, "loss": 1.7849, "step": 2978 }, { "epoch": 0.16604425617301155, "grad_norm": 0.5458279252052307, "learning_rate": 9.441677052458745e-05, "loss": 1.8582, "step": 2979 }, { "epoch": 0.16609999442617468, "grad_norm": 0.592612624168396, "learning_rate": 9.441268103803928e-05, "loss": 2.0226, "step": 2980 }, { "epoch": 0.16615573267933784, "grad_norm": 0.5498427748680115, "learning_rate": 9.440859014297982e-05, "loss": 1.577, "step": 2981 }, { "epoch": 0.16621147093250097, "grad_norm": 0.5673382878303528, "learning_rate": 9.440449783953883e-05, "loss": 1.7272, "step": 2982 }, { "epoch": 0.16626720918566412, "grad_norm": 0.565617024898529, "learning_rate": 9.440040412784603e-05, "loss": 1.7481, "step": 2983 }, { "epoch": 0.16632294743882728, "grad_norm": 0.6157540678977966, "learning_rate": 9.439630900803129e-05, "loss": 1.9244, "step": 2984 }, { "epoch": 0.1663786856919904, "grad_norm": 0.4916851818561554, "learning_rate": 9.439221248022447e-05, "loss": 1.5845, "step": 2985 }, { "epoch": 0.16643442394515356, "grad_norm": 0.573154091835022, "learning_rate": 9.43881145445555e-05, "loss": 1.8841, "step": 2986 }, { "epoch": 0.1664901621983167, "grad_norm": 0.5438728332519531, "learning_rate": 9.438401520115434e-05, "loss": 1.7537, "step": 2987 }, { "epoch": 0.16654590045147985, "grad_norm": 0.5793212652206421, "learning_rate": 9.4379914450151e-05, "loss": 1.9331, "step": 2988 }, { "epoch": 0.166601638704643, "grad_norm": 0.5194965600967407, "learning_rate": 9.437581229167551e-05, "loss": 1.5948, "step": 2989 }, { "epoch": 0.16665737695780614, "grad_norm": 0.5872880816459656, "learning_rate": 9.4371708725858e-05, "loss": 1.7629, "step": 2990 }, { "epoch": 0.1667131152109693, "grad_norm": 0.519842803478241, "learning_rate": 9.436760375282859e-05, "loss": 1.766, "step": 2991 }, { "epoch": 0.16676885346413242, "grad_norm": 0.5351104736328125, "learning_rate": 9.436349737271745e-05, "loss": 1.8319, "step": 2992 }, { "epoch": 0.16682459171729558, "grad_norm": 0.5584455728530884, "learning_rate": 9.435938958565487e-05, "loss": 1.7975, "step": 2993 }, { "epoch": 0.16688032997045874, "grad_norm": 0.4804225564002991, "learning_rate": 9.435528039177105e-05, "loss": 1.7058, "step": 2994 }, { "epoch": 0.16693606822362186, "grad_norm": 0.5311334133148193, "learning_rate": 9.435116979119635e-05, "loss": 1.7305, "step": 2995 }, { "epoch": 0.16699180647678502, "grad_norm": 0.5292813777923584, "learning_rate": 9.434705778406114e-05, "loss": 1.6901, "step": 2996 }, { "epoch": 0.16704754472994815, "grad_norm": 0.5105124711990356, "learning_rate": 9.434294437049582e-05, "loss": 1.7462, "step": 2997 }, { "epoch": 0.1671032829831113, "grad_norm": 0.5604652762413025, "learning_rate": 9.433882955063084e-05, "loss": 1.7997, "step": 2998 }, { "epoch": 0.16715902123627446, "grad_norm": 0.555237889289856, "learning_rate": 9.43347133245967e-05, "loss": 1.923, "step": 2999 }, { "epoch": 0.1672147594894376, "grad_norm": 0.5382326245307922, "learning_rate": 9.433059569252394e-05, "loss": 1.7263, "step": 3000 }, { "epoch": 0.16727049774260075, "grad_norm": 0.6488143801689148, "learning_rate": 9.432647665454315e-05, "loss": 1.5881, "step": 3001 }, { "epoch": 0.1673262359957639, "grad_norm": 0.55712890625, "learning_rate": 9.432235621078497e-05, "loss": 1.9409, "step": 3002 }, { "epoch": 0.16738197424892703, "grad_norm": 0.5540611147880554, "learning_rate": 9.431823436138005e-05, "loss": 1.8471, "step": 3003 }, { "epoch": 0.1674377125020902, "grad_norm": 0.5297248959541321, "learning_rate": 9.431411110645915e-05, "loss": 1.6844, "step": 3004 }, { "epoch": 0.16749345075525332, "grad_norm": 0.5368382334709167, "learning_rate": 9.4309986446153e-05, "loss": 1.7333, "step": 3005 }, { "epoch": 0.16754918900841648, "grad_norm": 0.5433456897735596, "learning_rate": 9.430586038059244e-05, "loss": 1.9837, "step": 3006 }, { "epoch": 0.16760492726157963, "grad_norm": 0.5077199339866638, "learning_rate": 9.430173290990829e-05, "loss": 1.7391, "step": 3007 }, { "epoch": 0.16766066551474276, "grad_norm": 0.49970632791519165, "learning_rate": 9.429760403423148e-05, "loss": 1.5325, "step": 3008 }, { "epoch": 0.16771640376790592, "grad_norm": 0.5068593621253967, "learning_rate": 9.429347375369295e-05, "loss": 1.5849, "step": 3009 }, { "epoch": 0.16777214202106905, "grad_norm": 0.5405229330062866, "learning_rate": 9.428934206842365e-05, "loss": 1.7995, "step": 3010 }, { "epoch": 0.1678278802742322, "grad_norm": 0.5368816256523132, "learning_rate": 9.428520897855469e-05, "loss": 1.7941, "step": 3011 }, { "epoch": 0.16788361852739536, "grad_norm": 0.5910351872444153, "learning_rate": 9.428107448421708e-05, "loss": 1.8987, "step": 3012 }, { "epoch": 0.1679393567805585, "grad_norm": 0.5387074947357178, "learning_rate": 9.427693858554196e-05, "loss": 1.2377, "step": 3013 }, { "epoch": 0.16799509503372165, "grad_norm": 0.5382748246192932, "learning_rate": 9.42728012826605e-05, "loss": 1.8915, "step": 3014 }, { "epoch": 0.16805083328688478, "grad_norm": 0.5706035494804382, "learning_rate": 9.426866257570391e-05, "loss": 1.9298, "step": 3015 }, { "epoch": 0.16810657154004793, "grad_norm": 0.517613410949707, "learning_rate": 9.426452246480347e-05, "loss": 1.6459, "step": 3016 }, { "epoch": 0.1681623097932111, "grad_norm": 0.5248231291770935, "learning_rate": 9.426038095009042e-05, "loss": 1.8506, "step": 3017 }, { "epoch": 0.16821804804637422, "grad_norm": 0.49280843138694763, "learning_rate": 9.425623803169616e-05, "loss": 1.5642, "step": 3018 }, { "epoch": 0.16827378629953738, "grad_norm": 0.5404548048973083, "learning_rate": 9.425209370975208e-05, "loss": 1.7475, "step": 3019 }, { "epoch": 0.1683295245527005, "grad_norm": 0.5196406245231628, "learning_rate": 9.424794798438958e-05, "loss": 1.8123, "step": 3020 }, { "epoch": 0.16838526280586366, "grad_norm": 0.5767018795013428, "learning_rate": 9.424380085574015e-05, "loss": 1.9773, "step": 3021 }, { "epoch": 0.16844100105902682, "grad_norm": 0.5589628219604492, "learning_rate": 9.423965232393532e-05, "loss": 1.8269, "step": 3022 }, { "epoch": 0.16849673931218995, "grad_norm": 0.5162323117256165, "learning_rate": 9.423550238910666e-05, "loss": 1.7838, "step": 3023 }, { "epoch": 0.1685524775653531, "grad_norm": 0.5301263332366943, "learning_rate": 9.423135105138577e-05, "loss": 1.7805, "step": 3024 }, { "epoch": 0.16860821581851626, "grad_norm": 0.5383440256118774, "learning_rate": 9.42271983109043e-05, "loss": 1.8054, "step": 3025 }, { "epoch": 0.1686639540716794, "grad_norm": 0.572410523891449, "learning_rate": 9.422304416779397e-05, "loss": 1.7666, "step": 3026 }, { "epoch": 0.16871969232484255, "grad_norm": 0.5496928691864014, "learning_rate": 9.421888862218651e-05, "loss": 1.8725, "step": 3027 }, { "epoch": 0.16877543057800568, "grad_norm": 0.5649563670158386, "learning_rate": 9.421473167421373e-05, "loss": 1.873, "step": 3028 }, { "epoch": 0.16883116883116883, "grad_norm": 0.5560464262962341, "learning_rate": 9.421057332400744e-05, "loss": 1.6385, "step": 3029 }, { "epoch": 0.168886907084332, "grad_norm": 0.5245364904403687, "learning_rate": 9.420641357169954e-05, "loss": 1.758, "step": 3030 }, { "epoch": 0.16894264533749512, "grad_norm": 0.5251185297966003, "learning_rate": 9.420225241742193e-05, "loss": 1.829, "step": 3031 }, { "epoch": 0.16899838359065827, "grad_norm": 0.5360503792762756, "learning_rate": 9.419808986130661e-05, "loss": 1.7447, "step": 3032 }, { "epoch": 0.1690541218438214, "grad_norm": 0.579368531703949, "learning_rate": 9.419392590348555e-05, "loss": 1.7367, "step": 3033 }, { "epoch": 0.16910986009698456, "grad_norm": 0.5943927764892578, "learning_rate": 9.418976054409084e-05, "loss": 1.8542, "step": 3034 }, { "epoch": 0.16916559835014772, "grad_norm": 0.5310322642326355, "learning_rate": 9.418559378325457e-05, "loss": 1.5941, "step": 3035 }, { "epoch": 0.16922133660331085, "grad_norm": 0.5201945304870605, "learning_rate": 9.418142562110888e-05, "loss": 1.6894, "step": 3036 }, { "epoch": 0.169277074856474, "grad_norm": 0.49601128697395325, "learning_rate": 9.417725605778598e-05, "loss": 1.5647, "step": 3037 }, { "epoch": 0.16933281310963713, "grad_norm": 0.5370486378669739, "learning_rate": 9.417308509341806e-05, "loss": 1.7843, "step": 3038 }, { "epoch": 0.1693885513628003, "grad_norm": 0.5515000820159912, "learning_rate": 9.416891272813747e-05, "loss": 1.8156, "step": 3039 }, { "epoch": 0.16944428961596344, "grad_norm": 0.5245648622512817, "learning_rate": 9.416473896207645e-05, "loss": 1.7029, "step": 3040 }, { "epoch": 0.16950002786912657, "grad_norm": 0.6024215817451477, "learning_rate": 9.416056379536744e-05, "loss": 1.8892, "step": 3041 }, { "epoch": 0.16955576612228973, "grad_norm": 0.5456023812294006, "learning_rate": 9.415638722814279e-05, "loss": 1.7344, "step": 3042 }, { "epoch": 0.16961150437545286, "grad_norm": 0.47283026576042175, "learning_rate": 9.415220926053501e-05, "loss": 1.4281, "step": 3043 }, { "epoch": 0.16966724262861602, "grad_norm": 0.5906921029090881, "learning_rate": 9.414802989267657e-05, "loss": 1.772, "step": 3044 }, { "epoch": 0.16972298088177917, "grad_norm": 0.5549463033676147, "learning_rate": 9.414384912470002e-05, "loss": 1.6814, "step": 3045 }, { "epoch": 0.1697787191349423, "grad_norm": 0.5007080435752869, "learning_rate": 9.413966695673795e-05, "loss": 1.7041, "step": 3046 }, { "epoch": 0.16983445738810546, "grad_norm": 0.5527877807617188, "learning_rate": 9.413548338892301e-05, "loss": 1.8597, "step": 3047 }, { "epoch": 0.16989019564126862, "grad_norm": 0.5755193829536438, "learning_rate": 9.413129842138786e-05, "loss": 2.115, "step": 3048 }, { "epoch": 0.16994593389443174, "grad_norm": 0.5897433161735535, "learning_rate": 9.412711205426521e-05, "loss": 1.5559, "step": 3049 }, { "epoch": 0.1700016721475949, "grad_norm": 0.5253439545631409, "learning_rate": 9.412292428768787e-05, "loss": 1.8423, "step": 3050 }, { "epoch": 0.17005741040075803, "grad_norm": 0.5220539569854736, "learning_rate": 9.411873512178862e-05, "loss": 1.6792, "step": 3051 }, { "epoch": 0.1701131486539212, "grad_norm": 0.5669887661933899, "learning_rate": 9.41145445567003e-05, "loss": 1.8432, "step": 3052 }, { "epoch": 0.17016888690708434, "grad_norm": 0.5661007761955261, "learning_rate": 9.411035259255585e-05, "loss": 1.9316, "step": 3053 }, { "epoch": 0.17022462516024747, "grad_norm": 0.5614895820617676, "learning_rate": 9.41061592294882e-05, "loss": 1.8668, "step": 3054 }, { "epoch": 0.17028036341341063, "grad_norm": 0.541671872138977, "learning_rate": 9.410196446763034e-05, "loss": 1.9025, "step": 3055 }, { "epoch": 0.17033610166657376, "grad_norm": 0.54454106092453, "learning_rate": 9.409776830711528e-05, "loss": 1.7351, "step": 3056 }, { "epoch": 0.17039183991973691, "grad_norm": 0.581135094165802, "learning_rate": 9.409357074807612e-05, "loss": 2.0981, "step": 3057 }, { "epoch": 0.17044757817290007, "grad_norm": 0.5024539232254028, "learning_rate": 9.4089371790646e-05, "loss": 1.74, "step": 3058 }, { "epoch": 0.1705033164260632, "grad_norm": 0.527542233467102, "learning_rate": 9.408517143495806e-05, "loss": 1.7409, "step": 3059 }, { "epoch": 0.17055905467922636, "grad_norm": 0.5976712107658386, "learning_rate": 9.40809696811455e-05, "loss": 1.6624, "step": 3060 }, { "epoch": 0.1706147929323895, "grad_norm": 0.5328633785247803, "learning_rate": 9.40767665293416e-05, "loss": 1.7723, "step": 3061 }, { "epoch": 0.17067053118555264, "grad_norm": 0.5550236701965332, "learning_rate": 9.407256197967965e-05, "loss": 1.771, "step": 3062 }, { "epoch": 0.1707262694387158, "grad_norm": 0.5482365489006042, "learning_rate": 9.4068356032293e-05, "loss": 1.5427, "step": 3063 }, { "epoch": 0.17078200769187893, "grad_norm": 0.5379420518875122, "learning_rate": 9.406414868731502e-05, "loss": 1.7884, "step": 3064 }, { "epoch": 0.17083774594504209, "grad_norm": 0.5322206020355225, "learning_rate": 9.405993994487917e-05, "loss": 1.7756, "step": 3065 }, { "epoch": 0.17089348419820521, "grad_norm": 0.5303000807762146, "learning_rate": 9.40557298051189e-05, "loss": 1.7589, "step": 3066 }, { "epoch": 0.17094922245136837, "grad_norm": 0.5660407543182373, "learning_rate": 9.405151826816776e-05, "loss": 1.7427, "step": 3067 }, { "epoch": 0.17100496070453153, "grad_norm": 0.5341696739196777, "learning_rate": 9.404730533415929e-05, "loss": 1.8757, "step": 3068 }, { "epoch": 0.17106069895769466, "grad_norm": 0.533214271068573, "learning_rate": 9.40430910032271e-05, "loss": 1.8219, "step": 3069 }, { "epoch": 0.1711164372108578, "grad_norm": 0.6056374311447144, "learning_rate": 9.403887527550486e-05, "loss": 1.9808, "step": 3070 }, { "epoch": 0.17117217546402097, "grad_norm": 0.5189699530601501, "learning_rate": 9.403465815112626e-05, "loss": 1.6841, "step": 3071 }, { "epoch": 0.1712279137171841, "grad_norm": 0.5255261659622192, "learning_rate": 9.403043963022505e-05, "loss": 1.5559, "step": 3072 }, { "epoch": 0.17128365197034726, "grad_norm": 0.8432055115699768, "learning_rate": 9.4026219712935e-05, "loss": 1.8316, "step": 3073 }, { "epoch": 0.17133939022351038, "grad_norm": 0.5276064276695251, "learning_rate": 9.402199839938996e-05, "loss": 1.678, "step": 3074 }, { "epoch": 0.17139512847667354, "grad_norm": 0.5075768232345581, "learning_rate": 9.401777568972379e-05, "loss": 1.5931, "step": 3075 }, { "epoch": 0.1714508667298367, "grad_norm": 0.5471227169036865, "learning_rate": 9.401355158407042e-05, "loss": 1.8761, "step": 3076 }, { "epoch": 0.17150660498299983, "grad_norm": 0.5062270760536194, "learning_rate": 9.400932608256381e-05, "loss": 1.6682, "step": 3077 }, { "epoch": 0.17156234323616298, "grad_norm": 0.5492522716522217, "learning_rate": 9.400509918533798e-05, "loss": 1.6889, "step": 3078 }, { "epoch": 0.1716180814893261, "grad_norm": 0.5703136324882507, "learning_rate": 9.400087089252695e-05, "loss": 1.6925, "step": 3079 }, { "epoch": 0.17167381974248927, "grad_norm": 0.5027966499328613, "learning_rate": 9.399664120426484e-05, "loss": 1.4425, "step": 3080 }, { "epoch": 0.17172955799565243, "grad_norm": 0.558413028717041, "learning_rate": 9.39924101206858e-05, "loss": 1.6485, "step": 3081 }, { "epoch": 0.17178529624881556, "grad_norm": 0.6047654151916504, "learning_rate": 9.3988177641924e-05, "loss": 1.835, "step": 3082 }, { "epoch": 0.1718410345019787, "grad_norm": 0.5760734677314758, "learning_rate": 9.398394376811368e-05, "loss": 1.7104, "step": 3083 }, { "epoch": 0.17189677275514184, "grad_norm": 0.5076540112495422, "learning_rate": 9.397970849938911e-05, "loss": 1.5808, "step": 3084 }, { "epoch": 0.171952511008305, "grad_norm": 0.5645167827606201, "learning_rate": 9.39754718358846e-05, "loss": 1.771, "step": 3085 }, { "epoch": 0.17200824926146815, "grad_norm": 0.5443428158760071, "learning_rate": 9.397123377773451e-05, "loss": 1.8713, "step": 3086 }, { "epoch": 0.17206398751463128, "grad_norm": 0.513888418674469, "learning_rate": 9.396699432507325e-05, "loss": 1.5279, "step": 3087 }, { "epoch": 0.17211972576779444, "grad_norm": 0.5408303141593933, "learning_rate": 9.396275347803529e-05, "loss": 1.8924, "step": 3088 }, { "epoch": 0.1721754640209576, "grad_norm": 0.5284982323646545, "learning_rate": 9.395851123675512e-05, "loss": 1.7562, "step": 3089 }, { "epoch": 0.17223120227412073, "grad_norm": 0.5364746451377869, "learning_rate": 9.395426760136726e-05, "loss": 1.599, "step": 3090 }, { "epoch": 0.17228694052728388, "grad_norm": 0.5527182817459106, "learning_rate": 9.39500225720063e-05, "loss": 1.7657, "step": 3091 }, { "epoch": 0.172342678780447, "grad_norm": 0.5294612646102905, "learning_rate": 9.394577614880687e-05, "loss": 1.684, "step": 3092 }, { "epoch": 0.17239841703361017, "grad_norm": 0.5614673495292664, "learning_rate": 9.394152833190364e-05, "loss": 1.8619, "step": 3093 }, { "epoch": 0.17245415528677333, "grad_norm": 0.5280752182006836, "learning_rate": 9.393727912143134e-05, "loss": 1.6454, "step": 3094 }, { "epoch": 0.17250989353993645, "grad_norm": 0.5236919522285461, "learning_rate": 9.39330285175247e-05, "loss": 1.6498, "step": 3095 }, { "epoch": 0.1725656317930996, "grad_norm": 0.5192380547523499, "learning_rate": 9.392877652031855e-05, "loss": 1.8345, "step": 3096 }, { "epoch": 0.17262137004626274, "grad_norm": 0.5223302841186523, "learning_rate": 9.392452312994773e-05, "loss": 1.5056, "step": 3097 }, { "epoch": 0.1726771082994259, "grad_norm": 0.5231219530105591, "learning_rate": 9.392026834654714e-05, "loss": 1.5868, "step": 3098 }, { "epoch": 0.17273284655258905, "grad_norm": 0.5619219541549683, "learning_rate": 9.39160121702517e-05, "loss": 1.8988, "step": 3099 }, { "epoch": 0.17278858480575218, "grad_norm": 0.5591604709625244, "learning_rate": 9.391175460119642e-05, "loss": 1.7228, "step": 3100 }, { "epoch": 0.17284432305891534, "grad_norm": 0.5290101766586304, "learning_rate": 9.39074956395163e-05, "loss": 1.6436, "step": 3101 }, { "epoch": 0.17290006131207847, "grad_norm": 0.5596829056739807, "learning_rate": 9.390323528534641e-05, "loss": 1.498, "step": 3102 }, { "epoch": 0.17295579956524162, "grad_norm": 0.5178213119506836, "learning_rate": 9.389897353882188e-05, "loss": 1.6834, "step": 3103 }, { "epoch": 0.17301153781840478, "grad_norm": 0.46845757961273193, "learning_rate": 9.389471040007784e-05, "loss": 1.4012, "step": 3104 }, { "epoch": 0.1730672760715679, "grad_norm": 0.5671401619911194, "learning_rate": 9.389044586924953e-05, "loss": 1.7005, "step": 3105 }, { "epoch": 0.17312301432473107, "grad_norm": 0.5250539779663086, "learning_rate": 9.388617994647218e-05, "loss": 1.6934, "step": 3106 }, { "epoch": 0.1731787525778942, "grad_norm": 0.5091891884803772, "learning_rate": 9.388191263188107e-05, "loss": 1.5041, "step": 3107 }, { "epoch": 0.17323449083105735, "grad_norm": 0.5298328995704651, "learning_rate": 9.387764392561153e-05, "loss": 1.6184, "step": 3108 }, { "epoch": 0.1732902290842205, "grad_norm": 0.5605019330978394, "learning_rate": 9.387337382779894e-05, "loss": 1.8302, "step": 3109 }, { "epoch": 0.17334596733738364, "grad_norm": 0.554153561592102, "learning_rate": 9.386910233857875e-05, "loss": 1.6565, "step": 3110 }, { "epoch": 0.1734017055905468, "grad_norm": 0.5952569246292114, "learning_rate": 9.386482945808641e-05, "loss": 1.5957, "step": 3111 }, { "epoch": 0.17345744384370995, "grad_norm": 0.6842632293701172, "learning_rate": 9.386055518645742e-05, "loss": 1.7147, "step": 3112 }, { "epoch": 0.17351318209687308, "grad_norm": 0.6011619567871094, "learning_rate": 9.385627952382736e-05, "loss": 2.0401, "step": 3113 }, { "epoch": 0.17356892035003624, "grad_norm": 0.5976441502571106, "learning_rate": 9.38520024703318e-05, "loss": 1.9242, "step": 3114 }, { "epoch": 0.17362465860319937, "grad_norm": 0.4991317689418793, "learning_rate": 9.38477240261064e-05, "loss": 1.689, "step": 3115 }, { "epoch": 0.17368039685636252, "grad_norm": 0.5823774337768555, "learning_rate": 9.384344419128684e-05, "loss": 1.7896, "step": 3116 }, { "epoch": 0.17373613510952568, "grad_norm": 0.584511399269104, "learning_rate": 9.383916296600886e-05, "loss": 1.7828, "step": 3117 }, { "epoch": 0.1737918733626888, "grad_norm": 0.5839495062828064, "learning_rate": 9.383488035040821e-05, "loss": 1.9487, "step": 3118 }, { "epoch": 0.17384761161585197, "grad_norm": 0.5381820201873779, "learning_rate": 9.383059634462077e-05, "loss": 1.6792, "step": 3119 }, { "epoch": 0.1739033498690151, "grad_norm": 0.5147883892059326, "learning_rate": 9.382631094878234e-05, "loss": 1.6627, "step": 3120 }, { "epoch": 0.17395908812217825, "grad_norm": 0.6467978358268738, "learning_rate": 9.382202416302885e-05, "loss": 1.7446, "step": 3121 }, { "epoch": 0.1740148263753414, "grad_norm": 0.5035672187805176, "learning_rate": 9.381773598749626e-05, "loss": 1.6078, "step": 3122 }, { "epoch": 0.17407056462850454, "grad_norm": 0.5837130546569824, "learning_rate": 9.381344642232056e-05, "loss": 1.792, "step": 3123 }, { "epoch": 0.1741263028816677, "grad_norm": 0.5331088900566101, "learning_rate": 9.380915546763778e-05, "loss": 1.788, "step": 3124 }, { "epoch": 0.17418204113483082, "grad_norm": 0.5427802801132202, "learning_rate": 9.380486312358402e-05, "loss": 1.8515, "step": 3125 }, { "epoch": 0.17423777938799398, "grad_norm": 0.4916117489337921, "learning_rate": 9.380056939029541e-05, "loss": 1.5184, "step": 3126 }, { "epoch": 0.17429351764115714, "grad_norm": 0.559158980846405, "learning_rate": 9.379627426790812e-05, "loss": 1.8659, "step": 3127 }, { "epoch": 0.17434925589432027, "grad_norm": 0.5941457152366638, "learning_rate": 9.379197775655833e-05, "loss": 1.7891, "step": 3128 }, { "epoch": 0.17440499414748342, "grad_norm": 0.4794413447380066, "learning_rate": 9.378767985638235e-05, "loss": 1.4975, "step": 3129 }, { "epoch": 0.17446073240064655, "grad_norm": 0.5934321284294128, "learning_rate": 9.378338056751647e-05, "loss": 1.9019, "step": 3130 }, { "epoch": 0.1745164706538097, "grad_norm": 0.5290476679801941, "learning_rate": 9.377907989009702e-05, "loss": 1.7563, "step": 3131 }, { "epoch": 0.17457220890697286, "grad_norm": 0.5909081101417542, "learning_rate": 9.37747778242604e-05, "loss": 2.009, "step": 3132 }, { "epoch": 0.174627947160136, "grad_norm": 0.5411567687988281, "learning_rate": 9.377047437014308e-05, "loss": 1.8264, "step": 3133 }, { "epoch": 0.17468368541329915, "grad_norm": 0.5046765208244324, "learning_rate": 9.376616952788149e-05, "loss": 1.6131, "step": 3134 }, { "epoch": 0.1747394236664623, "grad_norm": 0.528154194355011, "learning_rate": 9.376186329761219e-05, "loss": 1.7159, "step": 3135 }, { "epoch": 0.17479516191962544, "grad_norm": 0.5536481142044067, "learning_rate": 9.375755567947173e-05, "loss": 1.5203, "step": 3136 }, { "epoch": 0.1748509001727886, "grad_norm": 0.5683685541152954, "learning_rate": 9.375324667359673e-05, "loss": 1.7154, "step": 3137 }, { "epoch": 0.17490663842595172, "grad_norm": 0.4969169497489929, "learning_rate": 9.374893628012384e-05, "loss": 1.7277, "step": 3138 }, { "epoch": 0.17496237667911488, "grad_norm": 0.548058032989502, "learning_rate": 9.374462449918976e-05, "loss": 1.7931, "step": 3139 }, { "epoch": 0.17501811493227803, "grad_norm": 0.5391299724578857, "learning_rate": 9.374031133093124e-05, "loss": 1.8076, "step": 3140 }, { "epoch": 0.17507385318544116, "grad_norm": 0.5356679558753967, "learning_rate": 9.373599677548508e-05, "loss": 1.7212, "step": 3141 }, { "epoch": 0.17512959143860432, "grad_norm": 0.5841724276542664, "learning_rate": 9.373168083298809e-05, "loss": 1.9175, "step": 3142 }, { "epoch": 0.17518532969176745, "grad_norm": 0.5568740963935852, "learning_rate": 9.372736350357717e-05, "loss": 1.842, "step": 3143 }, { "epoch": 0.1752410679449306, "grad_norm": 0.5539031028747559, "learning_rate": 9.372304478738922e-05, "loss": 1.8881, "step": 3144 }, { "epoch": 0.17529680619809376, "grad_norm": 0.5519389510154724, "learning_rate": 9.371872468456122e-05, "loss": 1.7381, "step": 3145 }, { "epoch": 0.1753525444512569, "grad_norm": 0.5324805378913879, "learning_rate": 9.371440319523016e-05, "loss": 1.745, "step": 3146 }, { "epoch": 0.17540828270442005, "grad_norm": 0.5449910759925842, "learning_rate": 9.37100803195331e-05, "loss": 1.8071, "step": 3147 }, { "epoch": 0.17546402095758318, "grad_norm": 0.5846375823020935, "learning_rate": 9.370575605760716e-05, "loss": 1.8659, "step": 3148 }, { "epoch": 0.17551975921074633, "grad_norm": 0.4958127737045288, "learning_rate": 9.370143040958943e-05, "loss": 1.5791, "step": 3149 }, { "epoch": 0.1755754974639095, "grad_norm": 0.5119603276252747, "learning_rate": 9.369710337561714e-05, "loss": 1.7657, "step": 3150 }, { "epoch": 0.17563123571707262, "grad_norm": 0.5698620080947876, "learning_rate": 9.36927749558275e-05, "loss": 2.0541, "step": 3151 }, { "epoch": 0.17568697397023578, "grad_norm": 0.5704925656318665, "learning_rate": 9.368844515035779e-05, "loss": 1.762, "step": 3152 }, { "epoch": 0.1757427122233989, "grad_norm": 0.5676224231719971, "learning_rate": 9.368411395934533e-05, "loss": 1.5928, "step": 3153 }, { "epoch": 0.17579845047656206, "grad_norm": 0.5878868699073792, "learning_rate": 9.367978138292747e-05, "loss": 1.9292, "step": 3154 }, { "epoch": 0.17585418872972522, "grad_norm": 0.5323675274848938, "learning_rate": 9.36754474212416e-05, "loss": 1.8832, "step": 3155 }, { "epoch": 0.17590992698288835, "grad_norm": 0.49846091866493225, "learning_rate": 9.36711120744252e-05, "loss": 1.4679, "step": 3156 }, { "epoch": 0.1759656652360515, "grad_norm": 0.5483475923538208, "learning_rate": 9.366677534261572e-05, "loss": 1.7744, "step": 3157 }, { "epoch": 0.17602140348921466, "grad_norm": 0.5628114938735962, "learning_rate": 9.366243722595074e-05, "loss": 1.8169, "step": 3158 }, { "epoch": 0.1760771417423778, "grad_norm": 0.5500927567481995, "learning_rate": 9.365809772456782e-05, "loss": 1.8714, "step": 3159 }, { "epoch": 0.17613287999554095, "grad_norm": 0.5269673466682434, "learning_rate": 9.365375683860458e-05, "loss": 1.797, "step": 3160 }, { "epoch": 0.17618861824870408, "grad_norm": 0.5542075037956238, "learning_rate": 9.36494145681987e-05, "loss": 1.8027, "step": 3161 }, { "epoch": 0.17624435650186723, "grad_norm": 0.5421326756477356, "learning_rate": 9.364507091348788e-05, "loss": 1.7254, "step": 3162 }, { "epoch": 0.1763000947550304, "grad_norm": 0.4979914128780365, "learning_rate": 9.364072587460988e-05, "loss": 1.7505, "step": 3163 }, { "epoch": 0.17635583300819352, "grad_norm": 0.5363655686378479, "learning_rate": 9.363637945170249e-05, "loss": 1.7651, "step": 3164 }, { "epoch": 0.17641157126135668, "grad_norm": 0.5159875750541687, "learning_rate": 9.363203164490356e-05, "loss": 1.7096, "step": 3165 }, { "epoch": 0.1764673095145198, "grad_norm": 0.590908408164978, "learning_rate": 9.362768245435098e-05, "loss": 2.0557, "step": 3166 }, { "epoch": 0.17652304776768296, "grad_norm": 0.5476133823394775, "learning_rate": 9.362333188018269e-05, "loss": 1.6362, "step": 3167 }, { "epoch": 0.17657878602084612, "grad_norm": 0.5187797546386719, "learning_rate": 9.361897992253665e-05, "loss": 1.6019, "step": 3168 }, { "epoch": 0.17663452427400925, "grad_norm": 0.5152827501296997, "learning_rate": 9.361462658155089e-05, "loss": 1.7042, "step": 3169 }, { "epoch": 0.1766902625271724, "grad_norm": 0.5961150527000427, "learning_rate": 9.361027185736346e-05, "loss": 1.7224, "step": 3170 }, { "epoch": 0.17674600078033553, "grad_norm": 0.5234068632125854, "learning_rate": 9.360591575011245e-05, "loss": 1.6534, "step": 3171 }, { "epoch": 0.1768017390334987, "grad_norm": 0.5417289137840271, "learning_rate": 9.360155825993607e-05, "loss": 1.8964, "step": 3172 }, { "epoch": 0.17685747728666185, "grad_norm": 0.535892903804779, "learning_rate": 9.359719938697246e-05, "loss": 1.7262, "step": 3173 }, { "epoch": 0.17691321553982498, "grad_norm": 0.5440612435340881, "learning_rate": 9.359283913135988e-05, "loss": 1.7775, "step": 3174 }, { "epoch": 0.17696895379298813, "grad_norm": 0.6108183264732361, "learning_rate": 9.358847749323659e-05, "loss": 1.9756, "step": 3175 }, { "epoch": 0.17702469204615126, "grad_norm": 0.5500672459602356, "learning_rate": 9.358411447274094e-05, "loss": 1.7427, "step": 3176 }, { "epoch": 0.17708043029931442, "grad_norm": 0.5370178818702698, "learning_rate": 9.357975007001129e-05, "loss": 1.8076, "step": 3177 }, { "epoch": 0.17713616855247757, "grad_norm": 0.5063850283622742, "learning_rate": 9.357538428518607e-05, "loss": 1.6826, "step": 3178 }, { "epoch": 0.1771919068056407, "grad_norm": 0.5165611505508423, "learning_rate": 9.357101711840372e-05, "loss": 1.6545, "step": 3179 }, { "epoch": 0.17724764505880386, "grad_norm": 0.521656334400177, "learning_rate": 9.356664856980273e-05, "loss": 1.4337, "step": 3180 }, { "epoch": 0.17730338331196702, "grad_norm": 0.527341902256012, "learning_rate": 9.356227863952168e-05, "loss": 1.7241, "step": 3181 }, { "epoch": 0.17735912156513015, "grad_norm": 0.494210422039032, "learning_rate": 9.355790732769911e-05, "loss": 1.5474, "step": 3182 }, { "epoch": 0.1774148598182933, "grad_norm": 0.5171836614608765, "learning_rate": 9.35535346344737e-05, "loss": 1.5795, "step": 3183 }, { "epoch": 0.17747059807145643, "grad_norm": 0.5571975111961365, "learning_rate": 9.354916055998409e-05, "loss": 1.8093, "step": 3184 }, { "epoch": 0.1775263363246196, "grad_norm": 0.5996416807174683, "learning_rate": 9.354478510436902e-05, "loss": 2.0041, "step": 3185 }, { "epoch": 0.17758207457778274, "grad_norm": 0.4972604811191559, "learning_rate": 9.354040826776727e-05, "loss": 1.7329, "step": 3186 }, { "epoch": 0.17763781283094587, "grad_norm": 0.5599552392959595, "learning_rate": 9.35360300503176e-05, "loss": 1.7857, "step": 3187 }, { "epoch": 0.17769355108410903, "grad_norm": 0.5476880669593811, "learning_rate": 9.35316504521589e-05, "loss": 1.7757, "step": 3188 }, { "epoch": 0.17774928933727216, "grad_norm": 0.5362497568130493, "learning_rate": 9.352726947343006e-05, "loss": 1.7656, "step": 3189 }, { "epoch": 0.17780502759043532, "grad_norm": 0.5269262194633484, "learning_rate": 9.352288711427001e-05, "loss": 1.7716, "step": 3190 }, { "epoch": 0.17786076584359847, "grad_norm": 0.5733572244644165, "learning_rate": 9.351850337481773e-05, "loss": 1.7623, "step": 3191 }, { "epoch": 0.1779165040967616, "grad_norm": 0.5491241812705994, "learning_rate": 9.351411825521228e-05, "loss": 1.7835, "step": 3192 }, { "epoch": 0.17797224234992476, "grad_norm": 0.5553460121154785, "learning_rate": 9.350973175559267e-05, "loss": 1.9064, "step": 3193 }, { "epoch": 0.1780279806030879, "grad_norm": 0.5257185101509094, "learning_rate": 9.350534387609807e-05, "loss": 1.7245, "step": 3194 }, { "epoch": 0.17808371885625104, "grad_norm": 0.5201014876365662, "learning_rate": 9.35009546168676e-05, "loss": 1.6991, "step": 3195 }, { "epoch": 0.1781394571094142, "grad_norm": 0.5365905165672302, "learning_rate": 9.34965639780405e-05, "loss": 1.747, "step": 3196 }, { "epoch": 0.17819519536257733, "grad_norm": 0.5471792221069336, "learning_rate": 9.349217195975598e-05, "loss": 1.8114, "step": 3197 }, { "epoch": 0.1782509336157405, "grad_norm": 0.5407313704490662, "learning_rate": 9.348777856215334e-05, "loss": 1.7719, "step": 3198 }, { "epoch": 0.17830667186890362, "grad_norm": 0.5418484807014465, "learning_rate": 9.348338378537192e-05, "loss": 1.7989, "step": 3199 }, { "epoch": 0.17836241012206677, "grad_norm": 0.5235376954078674, "learning_rate": 9.347898762955109e-05, "loss": 1.5998, "step": 3200 }, { "epoch": 0.17841814837522993, "grad_norm": 0.5582895874977112, "learning_rate": 9.347459009483028e-05, "loss": 1.7352, "step": 3201 }, { "epoch": 0.17847388662839306, "grad_norm": 0.5512102246284485, "learning_rate": 9.347019118134893e-05, "loss": 1.8595, "step": 3202 }, { "epoch": 0.17852962488155621, "grad_norm": 0.5874474048614502, "learning_rate": 9.346579088924658e-05, "loss": 1.8312, "step": 3203 }, { "epoch": 0.17858536313471937, "grad_norm": 0.5523637533187866, "learning_rate": 9.346138921866276e-05, "loss": 1.9124, "step": 3204 }, { "epoch": 0.1786411013878825, "grad_norm": 0.5245184898376465, "learning_rate": 9.345698616973707e-05, "loss": 1.8279, "step": 3205 }, { "epoch": 0.17869683964104566, "grad_norm": 0.5538264513015747, "learning_rate": 9.345258174260915e-05, "loss": 1.8218, "step": 3206 }, { "epoch": 0.1787525778942088, "grad_norm": 0.5474498271942139, "learning_rate": 9.344817593741868e-05, "loss": 1.6772, "step": 3207 }, { "epoch": 0.17880831614737194, "grad_norm": 0.5437337756156921, "learning_rate": 9.344376875430539e-05, "loss": 1.8402, "step": 3208 }, { "epoch": 0.1788640544005351, "grad_norm": 0.6069798469543457, "learning_rate": 9.343936019340906e-05, "loss": 2.0245, "step": 3209 }, { "epoch": 0.17891979265369823, "grad_norm": 0.5451731085777283, "learning_rate": 9.343495025486948e-05, "loss": 1.7243, "step": 3210 }, { "epoch": 0.17897553090686139, "grad_norm": 0.5307853817939758, "learning_rate": 9.343053893882654e-05, "loss": 1.8062, "step": 3211 }, { "epoch": 0.17903126916002451, "grad_norm": 0.5642760992050171, "learning_rate": 9.34261262454201e-05, "loss": 1.9111, "step": 3212 }, { "epoch": 0.17908700741318767, "grad_norm": 0.5641029477119446, "learning_rate": 9.342171217479014e-05, "loss": 1.892, "step": 3213 }, { "epoch": 0.17914274566635083, "grad_norm": 0.5118708610534668, "learning_rate": 9.341729672707664e-05, "loss": 1.7303, "step": 3214 }, { "epoch": 0.17919848391951396, "grad_norm": 0.5048193335533142, "learning_rate": 9.341287990241962e-05, "loss": 1.5011, "step": 3215 }, { "epoch": 0.1792542221726771, "grad_norm": 0.5508407950401306, "learning_rate": 9.340846170095917e-05, "loss": 1.8355, "step": 3216 }, { "epoch": 0.17930996042584024, "grad_norm": 0.5779476165771484, "learning_rate": 9.34040421228354e-05, "loss": 1.8892, "step": 3217 }, { "epoch": 0.1793656986790034, "grad_norm": 0.5211353898048401, "learning_rate": 9.339962116818848e-05, "loss": 1.6359, "step": 3218 }, { "epoch": 0.17942143693216656, "grad_norm": 0.5479955077171326, "learning_rate": 9.339519883715862e-05, "loss": 1.7594, "step": 3219 }, { "epoch": 0.17947717518532968, "grad_norm": 0.49651384353637695, "learning_rate": 9.339077512988606e-05, "loss": 1.5873, "step": 3220 }, { "epoch": 0.17953291343849284, "grad_norm": 0.569810152053833, "learning_rate": 9.338635004651108e-05, "loss": 1.6675, "step": 3221 }, { "epoch": 0.17958865169165597, "grad_norm": 0.5437332987785339, "learning_rate": 9.338192358717406e-05, "loss": 1.8268, "step": 3222 }, { "epoch": 0.17964438994481913, "grad_norm": 0.5670780539512634, "learning_rate": 9.337749575201535e-05, "loss": 1.6647, "step": 3223 }, { "epoch": 0.17970012819798228, "grad_norm": 0.5969633460044861, "learning_rate": 9.337306654117538e-05, "loss": 1.7202, "step": 3224 }, { "epoch": 0.1797558664511454, "grad_norm": 0.48552221059799194, "learning_rate": 9.336863595479462e-05, "loss": 1.4645, "step": 3225 }, { "epoch": 0.17981160470430857, "grad_norm": 0.5412662625312805, "learning_rate": 9.33642039930136e-05, "loss": 1.8443, "step": 3226 }, { "epoch": 0.17986734295747173, "grad_norm": 0.5973519682884216, "learning_rate": 9.335977065597285e-05, "loss": 1.98, "step": 3227 }, { "epoch": 0.17992308121063486, "grad_norm": 0.5288311243057251, "learning_rate": 9.335533594381297e-05, "loss": 1.5549, "step": 3228 }, { "epoch": 0.179978819463798, "grad_norm": 0.5504105687141418, "learning_rate": 9.335089985667463e-05, "loss": 1.5479, "step": 3229 }, { "epoch": 0.18003455771696114, "grad_norm": 0.4889037609100342, "learning_rate": 9.334646239469848e-05, "loss": 1.7899, "step": 3230 }, { "epoch": 0.1800902959701243, "grad_norm": 0.5372660756111145, "learning_rate": 9.334202355802528e-05, "loss": 1.7351, "step": 3231 }, { "epoch": 0.18014603422328745, "grad_norm": 0.5164480209350586, "learning_rate": 9.333758334679581e-05, "loss": 1.6461, "step": 3232 }, { "epoch": 0.18020177247645058, "grad_norm": 0.539726972579956, "learning_rate": 9.333314176115084e-05, "loss": 1.6368, "step": 3233 }, { "epoch": 0.18025751072961374, "grad_norm": 0.6785762310028076, "learning_rate": 9.33286988012313e-05, "loss": 2.0446, "step": 3234 }, { "epoch": 0.18031324898277687, "grad_norm": 0.580847442150116, "learning_rate": 9.332425446717803e-05, "loss": 1.8455, "step": 3235 }, { "epoch": 0.18036898723594003, "grad_norm": 0.5236613154411316, "learning_rate": 9.331980875913202e-05, "loss": 1.4925, "step": 3236 }, { "epoch": 0.18042472548910318, "grad_norm": 0.5626049637794495, "learning_rate": 9.331536167723423e-05, "loss": 1.7695, "step": 3237 }, { "epoch": 0.1804804637422663, "grad_norm": 0.5435861349105835, "learning_rate": 9.331091322162573e-05, "loss": 1.8594, "step": 3238 }, { "epoch": 0.18053620199542947, "grad_norm": 0.5868507027626038, "learning_rate": 9.330646339244759e-05, "loss": 1.8194, "step": 3239 }, { "epoch": 0.1805919402485926, "grad_norm": 0.5488845705986023, "learning_rate": 9.330201218984092e-05, "loss": 1.6584, "step": 3240 }, { "epoch": 0.18064767850175575, "grad_norm": 0.5238907933235168, "learning_rate": 9.329755961394688e-05, "loss": 1.757, "step": 3241 }, { "epoch": 0.1807034167549189, "grad_norm": 0.5120671987533569, "learning_rate": 9.32931056649067e-05, "loss": 1.6786, "step": 3242 }, { "epoch": 0.18075915500808204, "grad_norm": 0.49454161524772644, "learning_rate": 9.328865034286161e-05, "loss": 1.457, "step": 3243 }, { "epoch": 0.1808148932612452, "grad_norm": 0.5296444892883301, "learning_rate": 9.328419364795295e-05, "loss": 1.691, "step": 3244 }, { "epoch": 0.18087063151440833, "grad_norm": 0.5104671120643616, "learning_rate": 9.327973558032201e-05, "loss": 1.6702, "step": 3245 }, { "epoch": 0.18092636976757148, "grad_norm": 0.5683085322380066, "learning_rate": 9.32752761401102e-05, "loss": 1.6912, "step": 3246 }, { "epoch": 0.18098210802073464, "grad_norm": 0.5360772609710693, "learning_rate": 9.327081532745896e-05, "loss": 1.7894, "step": 3247 }, { "epoch": 0.18103784627389777, "grad_norm": 0.6272693872451782, "learning_rate": 9.326635314250971e-05, "loss": 2.0331, "step": 3248 }, { "epoch": 0.18109358452706092, "grad_norm": 0.5494347810745239, "learning_rate": 9.326188958540403e-05, "loss": 1.8261, "step": 3249 }, { "epoch": 0.18114932278022408, "grad_norm": 0.5473103523254395, "learning_rate": 9.325742465628342e-05, "loss": 1.5244, "step": 3250 }, { "epoch": 0.1812050610333872, "grad_norm": 0.5626412034034729, "learning_rate": 9.325295835528953e-05, "loss": 1.8512, "step": 3251 }, { "epoch": 0.18126079928655037, "grad_norm": 0.5165623426437378, "learning_rate": 9.324849068256397e-05, "loss": 1.8405, "step": 3252 }, { "epoch": 0.1813165375397135, "grad_norm": 0.5183326601982117, "learning_rate": 9.324402163824846e-05, "loss": 1.7193, "step": 3253 }, { "epoch": 0.18137227579287665, "grad_norm": 0.5188653469085693, "learning_rate": 9.323955122248468e-05, "loss": 1.6715, "step": 3254 }, { "epoch": 0.1814280140460398, "grad_norm": 0.5316330194473267, "learning_rate": 9.323507943541447e-05, "loss": 1.5796, "step": 3255 }, { "epoch": 0.18148375229920294, "grad_norm": 0.5456557869911194, "learning_rate": 9.323060627717961e-05, "loss": 1.7856, "step": 3256 }, { "epoch": 0.1815394905523661, "grad_norm": 0.5671826004981995, "learning_rate": 9.322613174792197e-05, "loss": 1.7715, "step": 3257 }, { "epoch": 0.18159522880552922, "grad_norm": 0.5530715584754944, "learning_rate": 9.322165584778347e-05, "loss": 1.9437, "step": 3258 }, { "epoch": 0.18165096705869238, "grad_norm": 0.5097282528877258, "learning_rate": 9.321717857690601e-05, "loss": 1.5789, "step": 3259 }, { "epoch": 0.18170670531185554, "grad_norm": 0.5106785297393799, "learning_rate": 9.321269993543166e-05, "loss": 1.7718, "step": 3260 }, { "epoch": 0.18176244356501867, "grad_norm": 0.5174189209938049, "learning_rate": 9.320821992350239e-05, "loss": 1.6088, "step": 3261 }, { "epoch": 0.18181818181818182, "grad_norm": 0.5284159779548645, "learning_rate": 9.320373854126032e-05, "loss": 1.6355, "step": 3262 }, { "epoch": 0.18187392007134495, "grad_norm": 0.5431947708129883, "learning_rate": 9.319925578884754e-05, "loss": 1.8282, "step": 3263 }, { "epoch": 0.1819296583245081, "grad_norm": 0.5244488716125488, "learning_rate": 9.319477166640626e-05, "loss": 1.8765, "step": 3264 }, { "epoch": 0.18198539657767127, "grad_norm": 0.5338707566261292, "learning_rate": 9.319028617407865e-05, "loss": 1.7684, "step": 3265 }, { "epoch": 0.1820411348308344, "grad_norm": 0.5929536819458008, "learning_rate": 9.318579931200697e-05, "loss": 1.9083, "step": 3266 }, { "epoch": 0.18209687308399755, "grad_norm": 0.5214221477508545, "learning_rate": 9.318131108033355e-05, "loss": 1.6844, "step": 3267 }, { "epoch": 0.18215261133716068, "grad_norm": 0.5370472073554993, "learning_rate": 9.31768214792007e-05, "loss": 1.9451, "step": 3268 }, { "epoch": 0.18220834959032384, "grad_norm": 0.5181378722190857, "learning_rate": 9.31723305087508e-05, "loss": 1.7534, "step": 3269 }, { "epoch": 0.182264087843487, "grad_norm": 0.5766522884368896, "learning_rate": 9.316783816912629e-05, "loss": 1.876, "step": 3270 }, { "epoch": 0.18231982609665012, "grad_norm": 0.5224177241325378, "learning_rate": 9.316334446046966e-05, "loss": 1.7254, "step": 3271 }, { "epoch": 0.18237556434981328, "grad_norm": 0.5871415138244629, "learning_rate": 9.315884938292339e-05, "loss": 1.6292, "step": 3272 }, { "epoch": 0.18243130260297644, "grad_norm": 0.5917293429374695, "learning_rate": 9.315435293663005e-05, "loss": 2.0649, "step": 3273 }, { "epoch": 0.18248704085613957, "grad_norm": 0.5843697190284729, "learning_rate": 9.314985512173223e-05, "loss": 1.8282, "step": 3274 }, { "epoch": 0.18254277910930272, "grad_norm": 0.5423409938812256, "learning_rate": 9.31453559383726e-05, "loss": 1.7501, "step": 3275 }, { "epoch": 0.18259851736246585, "grad_norm": 0.5610026717185974, "learning_rate": 9.314085538669383e-05, "loss": 1.7287, "step": 3276 }, { "epoch": 0.182654255615629, "grad_norm": 0.5071337223052979, "learning_rate": 9.313635346683865e-05, "loss": 1.6779, "step": 3277 }, { "epoch": 0.18270999386879216, "grad_norm": 0.5492652058601379, "learning_rate": 9.313185017894985e-05, "loss": 1.7884, "step": 3278 }, { "epoch": 0.1827657321219553, "grad_norm": 0.4901118874549866, "learning_rate": 9.312734552317023e-05, "loss": 1.5747, "step": 3279 }, { "epoch": 0.18282147037511845, "grad_norm": 0.515848696231842, "learning_rate": 9.312283949964267e-05, "loss": 1.4992, "step": 3280 }, { "epoch": 0.18287720862828158, "grad_norm": 0.497324675321579, "learning_rate": 9.311833210851007e-05, "loss": 1.5226, "step": 3281 }, { "epoch": 0.18293294688144474, "grad_norm": 0.5232150554656982, "learning_rate": 9.311382334991536e-05, "loss": 1.6106, "step": 3282 }, { "epoch": 0.1829886851346079, "grad_norm": 0.6029054522514343, "learning_rate": 9.310931322400156e-05, "loss": 1.9531, "step": 3283 }, { "epoch": 0.18304442338777102, "grad_norm": 0.70119708776474, "learning_rate": 9.310480173091168e-05, "loss": 1.9566, "step": 3284 }, { "epoch": 0.18310016164093418, "grad_norm": 0.5252953767776489, "learning_rate": 9.31002888707888e-05, "loss": 1.8004, "step": 3285 }, { "epoch": 0.1831558998940973, "grad_norm": 0.5744017958641052, "learning_rate": 9.309577464377606e-05, "loss": 1.8203, "step": 3286 }, { "epoch": 0.18321163814726046, "grad_norm": 0.5286086797714233, "learning_rate": 9.309125905001659e-05, "loss": 1.8127, "step": 3287 }, { "epoch": 0.18326737640042362, "grad_norm": 0.5180408954620361, "learning_rate": 9.308674208965364e-05, "loss": 1.5432, "step": 3288 }, { "epoch": 0.18332311465358675, "grad_norm": 0.568420946598053, "learning_rate": 9.308222376283045e-05, "loss": 1.853, "step": 3289 }, { "epoch": 0.1833788529067499, "grad_norm": 0.9352191090583801, "learning_rate": 9.30777040696903e-05, "loss": 1.531, "step": 3290 }, { "epoch": 0.18343459115991304, "grad_norm": 0.5612093210220337, "learning_rate": 9.307318301037656e-05, "loss": 2.0149, "step": 3291 }, { "epoch": 0.1834903294130762, "grad_norm": 0.5616469979286194, "learning_rate": 9.306866058503257e-05, "loss": 1.6388, "step": 3292 }, { "epoch": 0.18354606766623935, "grad_norm": 0.5579656958580017, "learning_rate": 9.306413679380177e-05, "loss": 1.8719, "step": 3293 }, { "epoch": 0.18360180591940248, "grad_norm": 0.5343957543373108, "learning_rate": 9.305961163682764e-05, "loss": 1.7592, "step": 3294 }, { "epoch": 0.18365754417256563, "grad_norm": 0.5974972248077393, "learning_rate": 9.305508511425367e-05, "loss": 1.834, "step": 3295 }, { "epoch": 0.1837132824257288, "grad_norm": 0.5827033519744873, "learning_rate": 9.305055722622344e-05, "loss": 1.8606, "step": 3296 }, { "epoch": 0.18376902067889192, "grad_norm": 0.5568636059761047, "learning_rate": 9.304602797288054e-05, "loss": 1.8952, "step": 3297 }, { "epoch": 0.18382475893205508, "grad_norm": 0.6066376566886902, "learning_rate": 9.30414973543686e-05, "loss": 1.9215, "step": 3298 }, { "epoch": 0.1838804971852182, "grad_norm": 0.5111042261123657, "learning_rate": 9.303696537083132e-05, "loss": 1.5506, "step": 3299 }, { "epoch": 0.18393623543838136, "grad_norm": 0.501711905002594, "learning_rate": 9.303243202241242e-05, "loss": 1.5003, "step": 3300 }, { "epoch": 0.18399197369154452, "grad_norm": 0.543425977230072, "learning_rate": 9.302789730925567e-05, "loss": 1.5837, "step": 3301 }, { "epoch": 0.18404771194470765, "grad_norm": 0.5619440674781799, "learning_rate": 9.30233612315049e-05, "loss": 1.8285, "step": 3302 }, { "epoch": 0.1841034501978708, "grad_norm": 0.5294018387794495, "learning_rate": 9.301882378930394e-05, "loss": 1.6032, "step": 3303 }, { "epoch": 0.18415918845103393, "grad_norm": 0.6101817488670349, "learning_rate": 9.301428498279671e-05, "loss": 1.9998, "step": 3304 }, { "epoch": 0.1842149267041971, "grad_norm": 0.5133767127990723, "learning_rate": 9.300974481212715e-05, "loss": 1.6816, "step": 3305 }, { "epoch": 0.18427066495736025, "grad_norm": 0.5289322137832642, "learning_rate": 9.300520327743924e-05, "loss": 1.4649, "step": 3306 }, { "epoch": 0.18432640321052338, "grad_norm": 0.5560780763626099, "learning_rate": 9.300066037887704e-05, "loss": 1.6704, "step": 3307 }, { "epoch": 0.18438214146368653, "grad_norm": 0.5855201482772827, "learning_rate": 9.29961161165846e-05, "loss": 1.9368, "step": 3308 }, { "epoch": 0.18443787971684966, "grad_norm": 0.5227165818214417, "learning_rate": 9.299157049070603e-05, "loss": 1.663, "step": 3309 }, { "epoch": 0.18449361797001282, "grad_norm": 0.555633008480072, "learning_rate": 9.298702350138551e-05, "loss": 1.6634, "step": 3310 }, { "epoch": 0.18454935622317598, "grad_norm": 0.5284892916679382, "learning_rate": 9.298247514876724e-05, "loss": 1.7772, "step": 3311 }, { "epoch": 0.1846050944763391, "grad_norm": 0.5455605983734131, "learning_rate": 9.297792543299545e-05, "loss": 1.7826, "step": 3312 }, { "epoch": 0.18466083272950226, "grad_norm": 0.6630359292030334, "learning_rate": 9.297337435421446e-05, "loss": 2.0859, "step": 3313 }, { "epoch": 0.1847165709826654, "grad_norm": 0.4958614408969879, "learning_rate": 9.296882191256857e-05, "loss": 1.6861, "step": 3314 }, { "epoch": 0.18477230923582855, "grad_norm": 0.506952702999115, "learning_rate": 9.29642681082022e-05, "loss": 1.6616, "step": 3315 }, { "epoch": 0.1848280474889917, "grad_norm": 0.5598859190940857, "learning_rate": 9.295971294125973e-05, "loss": 1.8831, "step": 3316 }, { "epoch": 0.18488378574215483, "grad_norm": 0.5533158183097839, "learning_rate": 9.295515641188563e-05, "loss": 1.6373, "step": 3317 }, { "epoch": 0.184939523995318, "grad_norm": 0.5264914035797119, "learning_rate": 9.295059852022443e-05, "loss": 1.6668, "step": 3318 }, { "epoch": 0.18499526224848115, "grad_norm": 0.542248010635376, "learning_rate": 9.294603926642064e-05, "loss": 1.5566, "step": 3319 }, { "epoch": 0.18505100050164427, "grad_norm": 0.5599246621131897, "learning_rate": 9.294147865061891e-05, "loss": 1.8183, "step": 3320 }, { "epoch": 0.18510673875480743, "grad_norm": 0.48394709825515747, "learning_rate": 9.293691667296382e-05, "loss": 1.4792, "step": 3321 }, { "epoch": 0.18516247700797056, "grad_norm": 0.5670637488365173, "learning_rate": 9.293235333360009e-05, "loss": 1.8202, "step": 3322 }, { "epoch": 0.18521821526113372, "grad_norm": 0.5079344511032104, "learning_rate": 9.29277886326724e-05, "loss": 1.698, "step": 3323 }, { "epoch": 0.18527395351429687, "grad_norm": 0.6303577423095703, "learning_rate": 9.292322257032555e-05, "loss": 1.8882, "step": 3324 }, { "epoch": 0.18532969176746, "grad_norm": 0.5548877716064453, "learning_rate": 9.291865514670435e-05, "loss": 1.8684, "step": 3325 }, { "epoch": 0.18538543002062316, "grad_norm": 0.5407868027687073, "learning_rate": 9.291408636195364e-05, "loss": 1.7726, "step": 3326 }, { "epoch": 0.1854411682737863, "grad_norm": 0.5434556007385254, "learning_rate": 9.29095162162183e-05, "loss": 1.8152, "step": 3327 }, { "epoch": 0.18549690652694945, "grad_norm": 0.5405827164649963, "learning_rate": 9.290494470964332e-05, "loss": 1.7364, "step": 3328 }, { "epoch": 0.1855526447801126, "grad_norm": 0.4682316184043884, "learning_rate": 9.290037184237362e-05, "loss": 1.6331, "step": 3329 }, { "epoch": 0.18560838303327573, "grad_norm": 0.5418784618377686, "learning_rate": 9.289579761455426e-05, "loss": 1.9186, "step": 3330 }, { "epoch": 0.1856641212864389, "grad_norm": 0.6001595854759216, "learning_rate": 9.289122202633029e-05, "loss": 1.8436, "step": 3331 }, { "epoch": 0.18571985953960202, "grad_norm": 0.5514225363731384, "learning_rate": 9.288664507784686e-05, "loss": 1.8193, "step": 3332 }, { "epoch": 0.18577559779276517, "grad_norm": 0.5329412817955017, "learning_rate": 9.288206676924906e-05, "loss": 1.5945, "step": 3333 }, { "epoch": 0.18583133604592833, "grad_norm": 0.5613374710083008, "learning_rate": 9.287748710068214e-05, "loss": 1.8746, "step": 3334 }, { "epoch": 0.18588707429909146, "grad_norm": 0.5720524191856384, "learning_rate": 9.287290607229131e-05, "loss": 1.6635, "step": 3335 }, { "epoch": 0.18594281255225462, "grad_norm": 0.5446194410324097, "learning_rate": 9.286832368422187e-05, "loss": 1.6587, "step": 3336 }, { "epoch": 0.18599855080541774, "grad_norm": 0.5358483791351318, "learning_rate": 9.286373993661916e-05, "loss": 1.8244, "step": 3337 }, { "epoch": 0.1860542890585809, "grad_norm": 0.5477625727653503, "learning_rate": 9.28591548296285e-05, "loss": 1.8085, "step": 3338 }, { "epoch": 0.18611002731174406, "grad_norm": 0.528417706489563, "learning_rate": 9.285456836339537e-05, "loss": 1.7652, "step": 3339 }, { "epoch": 0.1861657655649072, "grad_norm": 0.5157662630081177, "learning_rate": 9.284998053806516e-05, "loss": 1.7365, "step": 3340 }, { "epoch": 0.18622150381807034, "grad_norm": 0.5836164951324463, "learning_rate": 9.284539135378341e-05, "loss": 1.8217, "step": 3341 }, { "epoch": 0.1862772420712335, "grad_norm": 0.5283136963844299, "learning_rate": 9.284080081069565e-05, "loss": 1.7073, "step": 3342 }, { "epoch": 0.18633298032439663, "grad_norm": 0.5611073970794678, "learning_rate": 9.283620890894749e-05, "loss": 1.6885, "step": 3343 }, { "epoch": 0.1863887185775598, "grad_norm": 0.5854252576828003, "learning_rate": 9.283161564868452e-05, "loss": 1.8512, "step": 3344 }, { "epoch": 0.18644445683072292, "grad_norm": 0.5314401984214783, "learning_rate": 9.282702103005243e-05, "loss": 1.8003, "step": 3345 }, { "epoch": 0.18650019508388607, "grad_norm": 0.5689622759819031, "learning_rate": 9.282242505319693e-05, "loss": 1.7775, "step": 3346 }, { "epoch": 0.18655593333704923, "grad_norm": 0.5099941492080688, "learning_rate": 9.281782771826378e-05, "loss": 1.4253, "step": 3347 }, { "epoch": 0.18661167159021236, "grad_norm": 0.557032585144043, "learning_rate": 9.281322902539878e-05, "loss": 1.7682, "step": 3348 }, { "epoch": 0.18666740984337551, "grad_norm": 0.5229087471961975, "learning_rate": 9.280862897474776e-05, "loss": 1.5904, "step": 3349 }, { "epoch": 0.18672314809653864, "grad_norm": 0.5913739800453186, "learning_rate": 9.280402756645663e-05, "loss": 1.9147, "step": 3350 }, { "epoch": 0.1867788863497018, "grad_norm": 0.5528784990310669, "learning_rate": 9.279942480067131e-05, "loss": 1.7212, "step": 3351 }, { "epoch": 0.18683462460286496, "grad_norm": 0.5475696921348572, "learning_rate": 9.279482067753777e-05, "loss": 1.8177, "step": 3352 }, { "epoch": 0.18689036285602809, "grad_norm": 0.5523363947868347, "learning_rate": 9.279021519720203e-05, "loss": 1.7726, "step": 3353 }, { "epoch": 0.18694610110919124, "grad_norm": 0.4846109449863434, "learning_rate": 9.278560835981016e-05, "loss": 1.7335, "step": 3354 }, { "epoch": 0.18700183936235437, "grad_norm": 0.5322748422622681, "learning_rate": 9.278100016550825e-05, "loss": 1.8071, "step": 3355 }, { "epoch": 0.18705757761551753, "grad_norm": 0.5510337352752686, "learning_rate": 9.277639061444244e-05, "loss": 1.7673, "step": 3356 }, { "epoch": 0.18711331586868069, "grad_norm": 0.5218777060508728, "learning_rate": 9.277177970675893e-05, "loss": 1.686, "step": 3357 }, { "epoch": 0.18716905412184381, "grad_norm": 0.5483778715133667, "learning_rate": 9.276716744260392e-05, "loss": 1.8069, "step": 3358 }, { "epoch": 0.18722479237500697, "grad_norm": 0.5690082907676697, "learning_rate": 9.276255382212373e-05, "loss": 1.7838, "step": 3359 }, { "epoch": 0.1872805306281701, "grad_norm": 0.5564740896224976, "learning_rate": 9.275793884546465e-05, "loss": 1.6363, "step": 3360 }, { "epoch": 0.18733626888133326, "grad_norm": 0.5689534544944763, "learning_rate": 9.275332251277305e-05, "loss": 1.7624, "step": 3361 }, { "epoch": 0.1873920071344964, "grad_norm": 0.5340893864631653, "learning_rate": 9.274870482419533e-05, "loss": 1.785, "step": 3362 }, { "epoch": 0.18744774538765954, "grad_norm": 0.556954562664032, "learning_rate": 9.274408577987792e-05, "loss": 1.7629, "step": 3363 }, { "epoch": 0.1875034836408227, "grad_norm": 0.5275453329086304, "learning_rate": 9.273946537996734e-05, "loss": 1.6675, "step": 3364 }, { "epoch": 0.18755922189398586, "grad_norm": 0.5510149598121643, "learning_rate": 9.273484362461011e-05, "loss": 1.8703, "step": 3365 }, { "epoch": 0.18761496014714898, "grad_norm": 0.5040173530578613, "learning_rate": 9.273022051395278e-05, "loss": 1.646, "step": 3366 }, { "epoch": 0.18767069840031214, "grad_norm": 0.5532334446907043, "learning_rate": 9.272559604814201e-05, "loss": 1.7221, "step": 3367 }, { "epoch": 0.18772643665347527, "grad_norm": 0.5305314064025879, "learning_rate": 9.272097022732443e-05, "loss": 1.5933, "step": 3368 }, { "epoch": 0.18778217490663843, "grad_norm": 0.5466606020927429, "learning_rate": 9.271634305164675e-05, "loss": 1.7312, "step": 3369 }, { "epoch": 0.18783791315980158, "grad_norm": 0.5373468995094299, "learning_rate": 9.271171452125575e-05, "loss": 1.7442, "step": 3370 }, { "epoch": 0.1878936514129647, "grad_norm": 0.5270282626152039, "learning_rate": 9.270708463629815e-05, "loss": 1.7939, "step": 3371 }, { "epoch": 0.18794938966612787, "grad_norm": 0.5657024383544922, "learning_rate": 9.270245339692086e-05, "loss": 1.8941, "step": 3372 }, { "epoch": 0.188005127919291, "grad_norm": 0.5092267990112305, "learning_rate": 9.269782080327071e-05, "loss": 1.6895, "step": 3373 }, { "epoch": 0.18806086617245416, "grad_norm": 0.5645020008087158, "learning_rate": 9.269318685549463e-05, "loss": 1.6734, "step": 3374 }, { "epoch": 0.1881166044256173, "grad_norm": 0.5031103491783142, "learning_rate": 9.268855155373957e-05, "loss": 1.848, "step": 3375 }, { "epoch": 0.18817234267878044, "grad_norm": 0.5133728981018066, "learning_rate": 9.268391489815257e-05, "loss": 1.4297, "step": 3376 }, { "epoch": 0.1882280809319436, "grad_norm": 0.5471519231796265, "learning_rate": 9.267927688888062e-05, "loss": 1.8073, "step": 3377 }, { "epoch": 0.18828381918510673, "grad_norm": 0.545860230922699, "learning_rate": 9.267463752607089e-05, "loss": 1.751, "step": 3378 }, { "epoch": 0.18833955743826988, "grad_norm": 0.4829151928424835, "learning_rate": 9.266999680987043e-05, "loss": 1.498, "step": 3379 }, { "epoch": 0.18839529569143304, "grad_norm": 0.5440730452537537, "learning_rate": 9.266535474042647e-05, "loss": 1.4733, "step": 3380 }, { "epoch": 0.18845103394459617, "grad_norm": 0.7026723623275757, "learning_rate": 9.266071131788621e-05, "loss": 1.904, "step": 3381 }, { "epoch": 0.18850677219775933, "grad_norm": 0.49864065647125244, "learning_rate": 9.26560665423969e-05, "loss": 1.8644, "step": 3382 }, { "epoch": 0.18856251045092245, "grad_norm": 0.5199279189109802, "learning_rate": 9.265142041410589e-05, "loss": 1.6917, "step": 3383 }, { "epoch": 0.1886182487040856, "grad_norm": 0.5546734929084778, "learning_rate": 9.26467729331605e-05, "loss": 1.7944, "step": 3384 }, { "epoch": 0.18867398695724877, "grad_norm": 0.5777541399002075, "learning_rate": 9.26421240997081e-05, "loss": 1.9372, "step": 3385 }, { "epoch": 0.1887297252104119, "grad_norm": 0.6016680598258972, "learning_rate": 9.263747391389615e-05, "loss": 1.9591, "step": 3386 }, { "epoch": 0.18878546346357505, "grad_norm": 0.5046743154525757, "learning_rate": 9.263282237587213e-05, "loss": 1.5718, "step": 3387 }, { "epoch": 0.1888412017167382, "grad_norm": 0.5458966493606567, "learning_rate": 9.262816948578354e-05, "loss": 1.7829, "step": 3388 }, { "epoch": 0.18889693996990134, "grad_norm": 0.5983991026878357, "learning_rate": 9.262351524377797e-05, "loss": 1.8848, "step": 3389 }, { "epoch": 0.1889526782230645, "grad_norm": 0.5047475099563599, "learning_rate": 9.261885965000298e-05, "loss": 1.3356, "step": 3390 }, { "epoch": 0.18900841647622763, "grad_norm": 0.5353848338127136, "learning_rate": 9.261420270460628e-05, "loss": 1.7632, "step": 3391 }, { "epoch": 0.18906415472939078, "grad_norm": 0.5097886919975281, "learning_rate": 9.26095444077355e-05, "loss": 1.6608, "step": 3392 }, { "epoch": 0.18911989298255394, "grad_norm": 0.5497481226921082, "learning_rate": 9.260488475953842e-05, "loss": 1.8704, "step": 3393 }, { "epoch": 0.18917563123571707, "grad_norm": 0.5084047317504883, "learning_rate": 9.26002237601628e-05, "loss": 1.515, "step": 3394 }, { "epoch": 0.18923136948888022, "grad_norm": 0.5252576470375061, "learning_rate": 9.259556140975644e-05, "loss": 1.448, "step": 3395 }, { "epoch": 0.18928710774204335, "grad_norm": 0.5760124325752258, "learning_rate": 9.259089770846723e-05, "loss": 1.7052, "step": 3396 }, { "epoch": 0.1893428459952065, "grad_norm": 0.5604876279830933, "learning_rate": 9.258623265644309e-05, "loss": 1.8782, "step": 3397 }, { "epoch": 0.18939858424836967, "grad_norm": 0.5331717133522034, "learning_rate": 9.258156625383192e-05, "loss": 1.6754, "step": 3398 }, { "epoch": 0.1894543225015328, "grad_norm": 0.5478466153144836, "learning_rate": 9.257689850078174e-05, "loss": 1.7709, "step": 3399 }, { "epoch": 0.18951006075469595, "grad_norm": 0.5751819014549255, "learning_rate": 9.257222939744059e-05, "loss": 1.6806, "step": 3400 }, { "epoch": 0.18956579900785908, "grad_norm": 0.557999849319458, "learning_rate": 9.256755894395652e-05, "loss": 1.6614, "step": 3401 }, { "epoch": 0.18962153726102224, "grad_norm": 0.6242285370826721, "learning_rate": 9.256288714047767e-05, "loss": 1.9115, "step": 3402 }, { "epoch": 0.1896772755141854, "grad_norm": 0.5403860807418823, "learning_rate": 9.255821398715221e-05, "loss": 1.6686, "step": 3403 }, { "epoch": 0.18973301376734852, "grad_norm": 0.5129532814025879, "learning_rate": 9.255353948412833e-05, "loss": 1.5406, "step": 3404 }, { "epoch": 0.18978875202051168, "grad_norm": 0.5771991014480591, "learning_rate": 9.254886363155429e-05, "loss": 1.8979, "step": 3405 }, { "epoch": 0.1898444902736748, "grad_norm": 0.5569978356361389, "learning_rate": 9.254418642957835e-05, "loss": 1.7284, "step": 3406 }, { "epoch": 0.18990022852683797, "grad_norm": 0.5016009211540222, "learning_rate": 9.253950787834889e-05, "loss": 1.7517, "step": 3407 }, { "epoch": 0.18995596678000112, "grad_norm": 0.47752997279167175, "learning_rate": 9.253482797801425e-05, "loss": 1.7069, "step": 3408 }, { "epoch": 0.19001170503316425, "grad_norm": 0.4722379446029663, "learning_rate": 9.253014672872285e-05, "loss": 1.4309, "step": 3409 }, { "epoch": 0.1900674432863274, "grad_norm": 0.516113817691803, "learning_rate": 9.252546413062319e-05, "loss": 1.6337, "step": 3410 }, { "epoch": 0.19012318153949057, "grad_norm": 0.4841940402984619, "learning_rate": 9.252078018386374e-05, "loss": 1.4486, "step": 3411 }, { "epoch": 0.1901789197926537, "grad_norm": 0.566828191280365, "learning_rate": 9.251609488859304e-05, "loss": 1.524, "step": 3412 }, { "epoch": 0.19023465804581685, "grad_norm": 0.5277671813964844, "learning_rate": 9.251140824495972e-05, "loss": 1.6331, "step": 3413 }, { "epoch": 0.19029039629897998, "grad_norm": 0.5212645530700684, "learning_rate": 9.250672025311237e-05, "loss": 1.6409, "step": 3414 }, { "epoch": 0.19034613455214314, "grad_norm": 0.5892760753631592, "learning_rate": 9.250203091319968e-05, "loss": 1.7712, "step": 3415 }, { "epoch": 0.1904018728053063, "grad_norm": 0.5454036593437195, "learning_rate": 9.24973402253704e-05, "loss": 1.888, "step": 3416 }, { "epoch": 0.19045761105846942, "grad_norm": 0.5001441836357117, "learning_rate": 9.249264818977324e-05, "loss": 1.6808, "step": 3417 }, { "epoch": 0.19051334931163258, "grad_norm": 0.5732707977294922, "learning_rate": 9.248795480655704e-05, "loss": 1.8398, "step": 3418 }, { "epoch": 0.1905690875647957, "grad_norm": 0.5356916785240173, "learning_rate": 9.248326007587063e-05, "loss": 1.7295, "step": 3419 }, { "epoch": 0.19062482581795887, "grad_norm": 0.5317162275314331, "learning_rate": 9.247856399786292e-05, "loss": 1.7717, "step": 3420 }, { "epoch": 0.19068056407112202, "grad_norm": 0.5117460489273071, "learning_rate": 9.247386657268283e-05, "loss": 1.5417, "step": 3421 }, { "epoch": 0.19073630232428515, "grad_norm": 0.5263468623161316, "learning_rate": 9.24691678004793e-05, "loss": 1.8882, "step": 3422 }, { "epoch": 0.1907920405774483, "grad_norm": 0.5721904635429382, "learning_rate": 9.24644676814014e-05, "loss": 1.8083, "step": 3423 }, { "epoch": 0.19084777883061144, "grad_norm": 0.5673632025718689, "learning_rate": 9.245976621559817e-05, "loss": 1.8532, "step": 3424 }, { "epoch": 0.1909035170837746, "grad_norm": 0.5096221566200256, "learning_rate": 9.24550634032187e-05, "loss": 1.5365, "step": 3425 }, { "epoch": 0.19095925533693775, "grad_norm": 0.545087456703186, "learning_rate": 9.245035924441217e-05, "loss": 1.854, "step": 3426 }, { "epoch": 0.19101499359010088, "grad_norm": 0.5424298644065857, "learning_rate": 9.244565373932774e-05, "loss": 1.7373, "step": 3427 }, { "epoch": 0.19107073184326404, "grad_norm": 0.5558550357818604, "learning_rate": 9.244094688811465e-05, "loss": 1.746, "step": 3428 }, { "epoch": 0.19112647009642716, "grad_norm": 0.49283209443092346, "learning_rate": 9.243623869092218e-05, "loss": 1.3836, "step": 3429 }, { "epoch": 0.19118220834959032, "grad_norm": 0.5955911874771118, "learning_rate": 9.24315291478996e-05, "loss": 1.8499, "step": 3430 }, { "epoch": 0.19123794660275348, "grad_norm": 0.5249252319335938, "learning_rate": 9.242681825919635e-05, "loss": 1.6767, "step": 3431 }, { "epoch": 0.1912936848559166, "grad_norm": 0.5496412515640259, "learning_rate": 9.242210602496178e-05, "loss": 1.7963, "step": 3432 }, { "epoch": 0.19134942310907976, "grad_norm": 0.5590277910232544, "learning_rate": 9.241739244534534e-05, "loss": 1.7885, "step": 3433 }, { "epoch": 0.19140516136224292, "grad_norm": 0.5826262831687927, "learning_rate": 9.241267752049653e-05, "loss": 1.7971, "step": 3434 }, { "epoch": 0.19146089961540605, "grad_norm": 0.5477822422981262, "learning_rate": 9.240796125056486e-05, "loss": 1.7376, "step": 3435 }, { "epoch": 0.1915166378685692, "grad_norm": 0.5088443756103516, "learning_rate": 9.240324363569992e-05, "loss": 1.6705, "step": 3436 }, { "epoch": 0.19157237612173234, "grad_norm": 0.5802351832389832, "learning_rate": 9.239852467605132e-05, "loss": 1.8505, "step": 3437 }, { "epoch": 0.1916281143748955, "grad_norm": 0.5589656829833984, "learning_rate": 9.239380437176872e-05, "loss": 1.7993, "step": 3438 }, { "epoch": 0.19168385262805865, "grad_norm": 0.5384811162948608, "learning_rate": 9.238908272300181e-05, "loss": 1.803, "step": 3439 }, { "epoch": 0.19173959088122178, "grad_norm": 0.5251903533935547, "learning_rate": 9.238435972990036e-05, "loss": 1.6364, "step": 3440 }, { "epoch": 0.19179532913438493, "grad_norm": 0.5536593794822693, "learning_rate": 9.237963539261412e-05, "loss": 1.8069, "step": 3441 }, { "epoch": 0.19185106738754806, "grad_norm": 0.49031203985214233, "learning_rate": 9.237490971129294e-05, "loss": 1.6969, "step": 3442 }, { "epoch": 0.19190680564071122, "grad_norm": 0.5111910700798035, "learning_rate": 9.23701826860867e-05, "loss": 1.7135, "step": 3443 }, { "epoch": 0.19196254389387438, "grad_norm": 0.5502627491950989, "learning_rate": 9.236545431714529e-05, "loss": 1.8724, "step": 3444 }, { "epoch": 0.1920182821470375, "grad_norm": 0.5772512555122375, "learning_rate": 9.236072460461867e-05, "loss": 1.7944, "step": 3445 }, { "epoch": 0.19207402040020066, "grad_norm": 0.6393208503723145, "learning_rate": 9.235599354865686e-05, "loss": 1.5557, "step": 3446 }, { "epoch": 0.1921297586533638, "grad_norm": 0.5822187066078186, "learning_rate": 9.235126114940989e-05, "loss": 1.8263, "step": 3447 }, { "epoch": 0.19218549690652695, "grad_norm": 0.5391358733177185, "learning_rate": 9.234652740702781e-05, "loss": 1.7186, "step": 3448 }, { "epoch": 0.1922412351596901, "grad_norm": 0.4919295907020569, "learning_rate": 9.23417923216608e-05, "loss": 1.5176, "step": 3449 }, { "epoch": 0.19229697341285323, "grad_norm": 0.547146737575531, "learning_rate": 9.233705589345902e-05, "loss": 1.8129, "step": 3450 }, { "epoch": 0.1923527116660164, "grad_norm": 0.4958893358707428, "learning_rate": 9.233231812257265e-05, "loss": 1.5314, "step": 3451 }, { "epoch": 0.19240844991917952, "grad_norm": 0.4873281419277191, "learning_rate": 9.232757900915197e-05, "loss": 1.6043, "step": 3452 }, { "epoch": 0.19246418817234268, "grad_norm": 0.5672634840011597, "learning_rate": 9.232283855334727e-05, "loss": 1.8168, "step": 3453 }, { "epoch": 0.19251992642550583, "grad_norm": 0.514673113822937, "learning_rate": 9.231809675530888e-05, "loss": 1.7076, "step": 3454 }, { "epoch": 0.19257566467866896, "grad_norm": 0.5566558241844177, "learning_rate": 9.23133536151872e-05, "loss": 1.8021, "step": 3455 }, { "epoch": 0.19263140293183212, "grad_norm": 0.5627939701080322, "learning_rate": 9.230860913313266e-05, "loss": 1.659, "step": 3456 }, { "epoch": 0.19268714118499528, "grad_norm": 0.5632688403129578, "learning_rate": 9.23038633092957e-05, "loss": 1.8172, "step": 3457 }, { "epoch": 0.1927428794381584, "grad_norm": 0.5149570107460022, "learning_rate": 9.229911614382685e-05, "loss": 1.6086, "step": 3458 }, { "epoch": 0.19279861769132156, "grad_norm": 0.5687461495399475, "learning_rate": 9.229436763687665e-05, "loss": 1.7102, "step": 3459 }, { "epoch": 0.1928543559444847, "grad_norm": 0.527733325958252, "learning_rate": 9.228961778859572e-05, "loss": 1.6291, "step": 3460 }, { "epoch": 0.19291009419764785, "grad_norm": 0.5713732242584229, "learning_rate": 9.228486659913467e-05, "loss": 1.7628, "step": 3461 }, { "epoch": 0.192965832450811, "grad_norm": 0.5368852019309998, "learning_rate": 9.228011406864417e-05, "loss": 1.6604, "step": 3462 }, { "epoch": 0.19302157070397413, "grad_norm": 0.5099670886993408, "learning_rate": 9.227536019727498e-05, "loss": 1.6571, "step": 3463 }, { "epoch": 0.1930773089571373, "grad_norm": 0.5792325735092163, "learning_rate": 9.227060498517785e-05, "loss": 1.6586, "step": 3464 }, { "epoch": 0.19313304721030042, "grad_norm": 0.5870433449745178, "learning_rate": 9.226584843250357e-05, "loss": 1.6693, "step": 3465 }, { "epoch": 0.19318878546346357, "grad_norm": 0.5723249316215515, "learning_rate": 9.226109053940302e-05, "loss": 1.8516, "step": 3466 }, { "epoch": 0.19324452371662673, "grad_norm": 0.5492411255836487, "learning_rate": 9.225633130602707e-05, "loss": 1.8369, "step": 3467 }, { "epoch": 0.19330026196978986, "grad_norm": 0.5040132403373718, "learning_rate": 9.225157073252666e-05, "loss": 1.7936, "step": 3468 }, { "epoch": 0.19335600022295302, "grad_norm": 0.5484923124313354, "learning_rate": 9.224680881905279e-05, "loss": 1.8398, "step": 3469 }, { "epoch": 0.19341173847611615, "grad_norm": 0.6042559146881104, "learning_rate": 9.224204556575644e-05, "loss": 1.8699, "step": 3470 }, { "epoch": 0.1934674767292793, "grad_norm": 0.5580307841300964, "learning_rate": 9.22372809727887e-05, "loss": 1.6961, "step": 3471 }, { "epoch": 0.19352321498244246, "grad_norm": 0.5399236679077148, "learning_rate": 9.223251504030066e-05, "loss": 1.6302, "step": 3472 }, { "epoch": 0.1935789532356056, "grad_norm": 0.5522669553756714, "learning_rate": 9.222774776844349e-05, "loss": 1.765, "step": 3473 }, { "epoch": 0.19363469148876875, "grad_norm": 0.5530064105987549, "learning_rate": 9.222297915736834e-05, "loss": 1.783, "step": 3474 }, { "epoch": 0.19369042974193187, "grad_norm": 0.5082196593284607, "learning_rate": 9.22182092072265e-05, "loss": 1.6188, "step": 3475 }, { "epoch": 0.19374616799509503, "grad_norm": 0.5311219692230225, "learning_rate": 9.221343791816918e-05, "loss": 1.7017, "step": 3476 }, { "epoch": 0.1938019062482582, "grad_norm": 0.542589545249939, "learning_rate": 9.220866529034776e-05, "loss": 1.7064, "step": 3477 }, { "epoch": 0.19385764450142132, "grad_norm": 0.5327942967414856, "learning_rate": 9.220389132391356e-05, "loss": 1.7807, "step": 3478 }, { "epoch": 0.19391338275458447, "grad_norm": 0.523639976978302, "learning_rate": 9.219911601901799e-05, "loss": 1.5785, "step": 3479 }, { "epoch": 0.19396912100774763, "grad_norm": 0.5756027102470398, "learning_rate": 9.21943393758125e-05, "loss": 2.0297, "step": 3480 }, { "epoch": 0.19402485926091076, "grad_norm": 0.5392191410064697, "learning_rate": 9.218956139444858e-05, "loss": 1.6824, "step": 3481 }, { "epoch": 0.19408059751407392, "grad_norm": 0.536055326461792, "learning_rate": 9.218478207507775e-05, "loss": 1.7264, "step": 3482 }, { "epoch": 0.19413633576723704, "grad_norm": 0.5701099634170532, "learning_rate": 9.218000141785158e-05, "loss": 1.7967, "step": 3483 }, { "epoch": 0.1941920740204002, "grad_norm": 0.586493194103241, "learning_rate": 9.21752194229217e-05, "loss": 2.0026, "step": 3484 }, { "epoch": 0.19424781227356336, "grad_norm": 0.5607553124427795, "learning_rate": 9.217043609043975e-05, "loss": 1.8374, "step": 3485 }, { "epoch": 0.1943035505267265, "grad_norm": 0.5268848538398743, "learning_rate": 9.216565142055745e-05, "loss": 1.6248, "step": 3486 }, { "epoch": 0.19435928877988964, "grad_norm": 0.563528299331665, "learning_rate": 9.216086541342652e-05, "loss": 1.8659, "step": 3487 }, { "epoch": 0.19441502703305277, "grad_norm": 0.5309708714485168, "learning_rate": 9.215607806919877e-05, "loss": 1.7026, "step": 3488 }, { "epoch": 0.19447076528621593, "grad_norm": 0.5582777857780457, "learning_rate": 9.2151289388026e-05, "loss": 1.8766, "step": 3489 }, { "epoch": 0.1945265035393791, "grad_norm": 0.5012943744659424, "learning_rate": 9.214649937006008e-05, "loss": 1.372, "step": 3490 }, { "epoch": 0.19458224179254222, "grad_norm": 0.5534226298332214, "learning_rate": 9.214170801545294e-05, "loss": 1.8491, "step": 3491 }, { "epoch": 0.19463798004570537, "grad_norm": 0.5312340259552002, "learning_rate": 9.213691532435654e-05, "loss": 1.4738, "step": 3492 }, { "epoch": 0.1946937182988685, "grad_norm": 0.5233004093170166, "learning_rate": 9.213212129692284e-05, "loss": 1.5871, "step": 3493 }, { "epoch": 0.19474945655203166, "grad_norm": 0.5227805972099304, "learning_rate": 9.212732593330389e-05, "loss": 1.6355, "step": 3494 }, { "epoch": 0.19480519480519481, "grad_norm": 0.5237340927124023, "learning_rate": 9.21225292336518e-05, "loss": 1.8903, "step": 3495 }, { "epoch": 0.19486093305835794, "grad_norm": 0.5420545935630798, "learning_rate": 9.211773119811867e-05, "loss": 1.9006, "step": 3496 }, { "epoch": 0.1949166713115211, "grad_norm": 0.534702718257904, "learning_rate": 9.211293182685667e-05, "loss": 1.5601, "step": 3497 }, { "epoch": 0.19497240956468423, "grad_norm": 0.5968030095100403, "learning_rate": 9.210813112001802e-05, "loss": 1.7871, "step": 3498 }, { "epoch": 0.19502814781784739, "grad_norm": 0.5270916223526001, "learning_rate": 9.210332907775494e-05, "loss": 1.69, "step": 3499 }, { "epoch": 0.19508388607101054, "grad_norm": 0.5496137142181396, "learning_rate": 9.209852570021975e-05, "loss": 1.916, "step": 3500 }, { "epoch": 0.19513962432417367, "grad_norm": 0.5198974013328552, "learning_rate": 9.209372098756476e-05, "loss": 1.6651, "step": 3501 }, { "epoch": 0.19519536257733683, "grad_norm": 0.5615696907043457, "learning_rate": 9.208891493994239e-05, "loss": 1.7589, "step": 3502 }, { "epoch": 0.19525110083049999, "grad_norm": 0.5367715954780579, "learning_rate": 9.208410755750501e-05, "loss": 1.5889, "step": 3503 }, { "epoch": 0.19530683908366311, "grad_norm": 0.6133012771606445, "learning_rate": 9.207929884040511e-05, "loss": 1.8472, "step": 3504 }, { "epoch": 0.19536257733682627, "grad_norm": 0.6582043170928955, "learning_rate": 9.20744887887952e-05, "loss": 1.6471, "step": 3505 }, { "epoch": 0.1954183155899894, "grad_norm": 0.5180196762084961, "learning_rate": 9.206967740282783e-05, "loss": 1.5727, "step": 3506 }, { "epoch": 0.19547405384315256, "grad_norm": 0.5526701807975769, "learning_rate": 9.206486468265555e-05, "loss": 1.635, "step": 3507 }, { "epoch": 0.1955297920963157, "grad_norm": 0.6198756694793701, "learning_rate": 9.206005062843102e-05, "loss": 1.7088, "step": 3508 }, { "epoch": 0.19558553034947884, "grad_norm": 0.5373274683952332, "learning_rate": 9.205523524030693e-05, "loss": 1.7032, "step": 3509 }, { "epoch": 0.195641268602642, "grad_norm": 0.5724993944168091, "learning_rate": 9.205041851843596e-05, "loss": 1.8822, "step": 3510 }, { "epoch": 0.19569700685580513, "grad_norm": 0.5542033314704895, "learning_rate": 9.20456004629709e-05, "loss": 1.333, "step": 3511 }, { "epoch": 0.19575274510896828, "grad_norm": 0.5784552693367004, "learning_rate": 9.204078107406454e-05, "loss": 1.8277, "step": 3512 }, { "epoch": 0.19580848336213144, "grad_norm": 0.5339728593826294, "learning_rate": 9.203596035186969e-05, "loss": 1.5545, "step": 3513 }, { "epoch": 0.19586422161529457, "grad_norm": 0.5574887990951538, "learning_rate": 9.203113829653927e-05, "loss": 1.6811, "step": 3514 }, { "epoch": 0.19591995986845773, "grad_norm": 0.48576298356056213, "learning_rate": 9.202631490822622e-05, "loss": 1.548, "step": 3515 }, { "epoch": 0.19597569812162086, "grad_norm": 0.516997218132019, "learning_rate": 9.202149018708347e-05, "loss": 1.6624, "step": 3516 }, { "epoch": 0.196031436374784, "grad_norm": 0.5537340641021729, "learning_rate": 9.201666413326408e-05, "loss": 1.8557, "step": 3517 }, { "epoch": 0.19608717462794717, "grad_norm": 0.5295738577842712, "learning_rate": 9.201183674692107e-05, "loss": 1.5435, "step": 3518 }, { "epoch": 0.1961429128811103, "grad_norm": 0.47536125779151917, "learning_rate": 9.200700802820754e-05, "loss": 1.4683, "step": 3519 }, { "epoch": 0.19619865113427346, "grad_norm": 0.546451985836029, "learning_rate": 9.200217797727662e-05, "loss": 1.8706, "step": 3520 }, { "epoch": 0.19625438938743658, "grad_norm": 0.5166674256324768, "learning_rate": 9.199734659428152e-05, "loss": 1.5608, "step": 3521 }, { "epoch": 0.19631012764059974, "grad_norm": 0.5700700879096985, "learning_rate": 9.199251387937545e-05, "loss": 1.7221, "step": 3522 }, { "epoch": 0.1963658658937629, "grad_norm": 0.6089435815811157, "learning_rate": 9.198767983271166e-05, "loss": 1.7989, "step": 3523 }, { "epoch": 0.19642160414692603, "grad_norm": 0.6160342693328857, "learning_rate": 9.198284445444348e-05, "loss": 2.0033, "step": 3524 }, { "epoch": 0.19647734240008918, "grad_norm": 0.6272563338279724, "learning_rate": 9.197800774472426e-05, "loss": 1.9705, "step": 3525 }, { "epoch": 0.19653308065325234, "grad_norm": 0.4671235680580139, "learning_rate": 9.197316970370737e-05, "loss": 1.0644, "step": 3526 }, { "epoch": 0.19658881890641547, "grad_norm": 0.5911363959312439, "learning_rate": 9.196833033154625e-05, "loss": 1.662, "step": 3527 }, { "epoch": 0.19664455715957863, "grad_norm": 0.552719235420227, "learning_rate": 9.19634896283944e-05, "loss": 1.7743, "step": 3528 }, { "epoch": 0.19670029541274175, "grad_norm": 0.5252164006233215, "learning_rate": 9.195864759440531e-05, "loss": 1.7682, "step": 3529 }, { "epoch": 0.1967560336659049, "grad_norm": 0.53560471534729, "learning_rate": 9.195380422973257e-05, "loss": 1.6731, "step": 3530 }, { "epoch": 0.19681177191906807, "grad_norm": 0.5091952085494995, "learning_rate": 9.194895953452976e-05, "loss": 1.4618, "step": 3531 }, { "epoch": 0.1968675101722312, "grad_norm": 0.5449403524398804, "learning_rate": 9.194411350895053e-05, "loss": 1.7007, "step": 3532 }, { "epoch": 0.19692324842539435, "grad_norm": 0.5258320569992065, "learning_rate": 9.193926615314857e-05, "loss": 1.8571, "step": 3533 }, { "epoch": 0.19697898667855748, "grad_norm": 0.5018019080162048, "learning_rate": 9.193441746727762e-05, "loss": 1.4968, "step": 3534 }, { "epoch": 0.19703472493172064, "grad_norm": 0.570955753326416, "learning_rate": 9.192956745149144e-05, "loss": 1.8938, "step": 3535 }, { "epoch": 0.1970904631848838, "grad_norm": 0.595371663570404, "learning_rate": 9.192471610594384e-05, "loss": 2.0865, "step": 3536 }, { "epoch": 0.19714620143804693, "grad_norm": 0.5452008247375488, "learning_rate": 9.191986343078868e-05, "loss": 1.7354, "step": 3537 }, { "epoch": 0.19720193969121008, "grad_norm": 0.5002971887588501, "learning_rate": 9.191500942617988e-05, "loss": 1.5218, "step": 3538 }, { "epoch": 0.1972576779443732, "grad_norm": 0.5388283133506775, "learning_rate": 9.191015409227134e-05, "loss": 1.6676, "step": 3539 }, { "epoch": 0.19731341619753637, "grad_norm": 0.5798291563987732, "learning_rate": 9.190529742921707e-05, "loss": 1.8602, "step": 3540 }, { "epoch": 0.19736915445069952, "grad_norm": 0.5622314214706421, "learning_rate": 9.190043943717111e-05, "loss": 1.7324, "step": 3541 }, { "epoch": 0.19742489270386265, "grad_norm": 0.5845619440078735, "learning_rate": 9.189558011628749e-05, "loss": 1.8098, "step": 3542 }, { "epoch": 0.1974806309570258, "grad_norm": 0.5707986354827881, "learning_rate": 9.189071946672033e-05, "loss": 1.9953, "step": 3543 }, { "epoch": 0.19753636921018894, "grad_norm": 0.5030776858329773, "learning_rate": 9.18858574886238e-05, "loss": 1.6697, "step": 3544 }, { "epoch": 0.1975921074633521, "grad_norm": 0.5452118515968323, "learning_rate": 9.188099418215208e-05, "loss": 1.4443, "step": 3545 }, { "epoch": 0.19764784571651525, "grad_norm": 0.5277875065803528, "learning_rate": 9.187612954745942e-05, "loss": 1.738, "step": 3546 }, { "epoch": 0.19770358396967838, "grad_norm": 0.563870906829834, "learning_rate": 9.187126358470006e-05, "loss": 1.7746, "step": 3547 }, { "epoch": 0.19775932222284154, "grad_norm": 0.5097183585166931, "learning_rate": 9.186639629402836e-05, "loss": 1.5869, "step": 3548 }, { "epoch": 0.1978150604760047, "grad_norm": 0.5304349660873413, "learning_rate": 9.186152767559866e-05, "loss": 1.4967, "step": 3549 }, { "epoch": 0.19787079872916782, "grad_norm": 0.5379878878593445, "learning_rate": 9.185665772956539e-05, "loss": 1.7457, "step": 3550 }, { "epoch": 0.19792653698233098, "grad_norm": 0.5299242734909058, "learning_rate": 9.185178645608297e-05, "loss": 1.6194, "step": 3551 }, { "epoch": 0.1979822752354941, "grad_norm": 0.5131285190582275, "learning_rate": 9.184691385530588e-05, "loss": 1.8616, "step": 3552 }, { "epoch": 0.19803801348865727, "grad_norm": 0.5294276475906372, "learning_rate": 9.184203992738869e-05, "loss": 1.5835, "step": 3553 }, { "epoch": 0.19809375174182042, "grad_norm": 0.544457197189331, "learning_rate": 9.183716467248593e-05, "loss": 1.6874, "step": 3554 }, { "epoch": 0.19814948999498355, "grad_norm": 0.5258937478065491, "learning_rate": 9.183228809075223e-05, "loss": 1.7946, "step": 3555 }, { "epoch": 0.1982052282481467, "grad_norm": 0.5388005971908569, "learning_rate": 9.182741018234228e-05, "loss": 1.6509, "step": 3556 }, { "epoch": 0.19826096650130984, "grad_norm": 0.5726017951965332, "learning_rate": 9.182253094741073e-05, "loss": 1.6885, "step": 3557 }, { "epoch": 0.198316704754473, "grad_norm": 0.5634879469871521, "learning_rate": 9.181765038611234e-05, "loss": 1.7431, "step": 3558 }, { "epoch": 0.19837244300763615, "grad_norm": 0.5139129161834717, "learning_rate": 9.18127684986019e-05, "loss": 1.763, "step": 3559 }, { "epoch": 0.19842818126079928, "grad_norm": 0.5589642524719238, "learning_rate": 9.180788528503423e-05, "loss": 1.9388, "step": 3560 }, { "epoch": 0.19848391951396244, "grad_norm": 0.538282036781311, "learning_rate": 9.18030007455642e-05, "loss": 1.8491, "step": 3561 }, { "epoch": 0.19853965776712557, "grad_norm": 0.5197616219520569, "learning_rate": 9.179811488034671e-05, "loss": 1.657, "step": 3562 }, { "epoch": 0.19859539602028872, "grad_norm": 0.569980800151825, "learning_rate": 9.17932276895367e-05, "loss": 1.8632, "step": 3563 }, { "epoch": 0.19865113427345188, "grad_norm": 0.6533870100975037, "learning_rate": 9.17883391732892e-05, "loss": 2.2768, "step": 3564 }, { "epoch": 0.198706872526615, "grad_norm": 0.5272773504257202, "learning_rate": 9.178344933175922e-05, "loss": 1.7145, "step": 3565 }, { "epoch": 0.19876261077977816, "grad_norm": 0.5350964069366455, "learning_rate": 9.177855816510184e-05, "loss": 1.6678, "step": 3566 }, { "epoch": 0.1988183490329413, "grad_norm": 0.5308762788772583, "learning_rate": 9.177366567347216e-05, "loss": 1.6745, "step": 3567 }, { "epoch": 0.19887408728610445, "grad_norm": 0.552905261516571, "learning_rate": 9.176877185702539e-05, "loss": 1.7337, "step": 3568 }, { "epoch": 0.1989298255392676, "grad_norm": 0.5350809693336487, "learning_rate": 9.17638767159167e-05, "loss": 1.754, "step": 3569 }, { "epoch": 0.19898556379243074, "grad_norm": 0.5393645167350769, "learning_rate": 9.175898025030134e-05, "loss": 1.6508, "step": 3570 }, { "epoch": 0.1990413020455939, "grad_norm": 0.5781660079956055, "learning_rate": 9.175408246033458e-05, "loss": 1.7258, "step": 3571 }, { "epoch": 0.19909704029875705, "grad_norm": 0.5230069160461426, "learning_rate": 9.17491833461718e-05, "loss": 1.5918, "step": 3572 }, { "epoch": 0.19915277855192018, "grad_norm": 0.54449063539505, "learning_rate": 9.174428290796833e-05, "loss": 1.4328, "step": 3573 }, { "epoch": 0.19920851680508334, "grad_norm": 0.5652233958244324, "learning_rate": 9.173938114587957e-05, "loss": 1.6627, "step": 3574 }, { "epoch": 0.19926425505824646, "grad_norm": 0.5487927198410034, "learning_rate": 9.173447806006102e-05, "loss": 1.6238, "step": 3575 }, { "epoch": 0.19931999331140962, "grad_norm": 0.5450085401535034, "learning_rate": 9.172957365066815e-05, "loss": 1.8033, "step": 3576 }, { "epoch": 0.19937573156457278, "grad_norm": 0.5951147079467773, "learning_rate": 9.17246679178565e-05, "loss": 2.0117, "step": 3577 }, { "epoch": 0.1994314698177359, "grad_norm": 0.5555893778800964, "learning_rate": 9.171976086178164e-05, "loss": 1.6994, "step": 3578 }, { "epoch": 0.19948720807089906, "grad_norm": 0.4888277053833008, "learning_rate": 9.171485248259924e-05, "loss": 1.555, "step": 3579 }, { "epoch": 0.1995429463240622, "grad_norm": 0.5293035507202148, "learning_rate": 9.170994278046492e-05, "loss": 1.7463, "step": 3580 }, { "epoch": 0.19959868457722535, "grad_norm": 0.544032096862793, "learning_rate": 9.17050317555344e-05, "loss": 1.8112, "step": 3581 }, { "epoch": 0.1996544228303885, "grad_norm": 0.5483592748641968, "learning_rate": 9.170011940796341e-05, "loss": 1.7906, "step": 3582 }, { "epoch": 0.19971016108355163, "grad_norm": 0.6069881319999695, "learning_rate": 9.16952057379078e-05, "loss": 2.0624, "step": 3583 }, { "epoch": 0.1997658993367148, "grad_norm": 0.5667694211006165, "learning_rate": 9.169029074552333e-05, "loss": 1.8233, "step": 3584 }, { "epoch": 0.19982163758987792, "grad_norm": 0.5053529739379883, "learning_rate": 9.168537443096592e-05, "loss": 1.6512, "step": 3585 }, { "epoch": 0.19987737584304108, "grad_norm": 0.5334288477897644, "learning_rate": 9.168045679439149e-05, "loss": 1.5675, "step": 3586 }, { "epoch": 0.19993311409620423, "grad_norm": 0.61188805103302, "learning_rate": 9.167553783595597e-05, "loss": 1.8834, "step": 3587 }, { "epoch": 0.19998885234936736, "grad_norm": 0.5691487193107605, "learning_rate": 9.167061755581539e-05, "loss": 1.7663, "step": 3588 }, { "epoch": 0.20004459060253052, "grad_norm": 0.5586220026016235, "learning_rate": 9.166569595412575e-05, "loss": 1.8832, "step": 3589 }, { "epoch": 0.20010032885569365, "grad_norm": 0.4987550675868988, "learning_rate": 9.166077303104319e-05, "loss": 1.661, "step": 3590 }, { "epoch": 0.2001560671088568, "grad_norm": 0.5463746190071106, "learning_rate": 9.165584878672378e-05, "loss": 1.6764, "step": 3591 }, { "epoch": 0.20021180536201996, "grad_norm": 0.5752919316291809, "learning_rate": 9.165092322132374e-05, "loss": 1.6847, "step": 3592 }, { "epoch": 0.2002675436151831, "grad_norm": 0.5271925330162048, "learning_rate": 9.164599633499925e-05, "loss": 1.7428, "step": 3593 }, { "epoch": 0.20032328186834625, "grad_norm": 0.4875536262989044, "learning_rate": 9.164106812790657e-05, "loss": 1.3011, "step": 3594 }, { "epoch": 0.2003790201215094, "grad_norm": 0.5993346571922302, "learning_rate": 9.1636138600202e-05, "loss": 1.8065, "step": 3595 }, { "epoch": 0.20043475837467253, "grad_norm": 0.5418604612350464, "learning_rate": 9.163120775204187e-05, "loss": 1.6812, "step": 3596 }, { "epoch": 0.2004904966278357, "grad_norm": 0.5411487817764282, "learning_rate": 9.162627558358255e-05, "loss": 1.8109, "step": 3597 }, { "epoch": 0.20054623488099882, "grad_norm": 0.5583702325820923, "learning_rate": 9.162134209498046e-05, "loss": 1.8183, "step": 3598 }, { "epoch": 0.20060197313416198, "grad_norm": 0.6028481721878052, "learning_rate": 9.161640728639207e-05, "loss": 1.8642, "step": 3599 }, { "epoch": 0.20065771138732513, "grad_norm": 0.5424187183380127, "learning_rate": 9.161147115797388e-05, "loss": 1.8178, "step": 3600 }, { "epoch": 0.20071344964048826, "grad_norm": 0.6147588491439819, "learning_rate": 9.160653370988243e-05, "loss": 1.7343, "step": 3601 }, { "epoch": 0.20076918789365142, "grad_norm": 0.5581020712852478, "learning_rate": 9.160159494227434e-05, "loss": 1.713, "step": 3602 }, { "epoch": 0.20082492614681455, "grad_norm": 0.5363709330558777, "learning_rate": 9.15966548553062e-05, "loss": 1.8839, "step": 3603 }, { "epoch": 0.2008806643999777, "grad_norm": 0.5731095671653748, "learning_rate": 9.159171344913469e-05, "loss": 1.8919, "step": 3604 }, { "epoch": 0.20093640265314086, "grad_norm": 0.5256056785583496, "learning_rate": 9.158677072391653e-05, "loss": 1.7236, "step": 3605 }, { "epoch": 0.200992140906304, "grad_norm": 0.5467107892036438, "learning_rate": 9.158182667980846e-05, "loss": 1.6551, "step": 3606 }, { "epoch": 0.20104787915946715, "grad_norm": 0.5082773566246033, "learning_rate": 9.157688131696729e-05, "loss": 1.6537, "step": 3607 }, { "epoch": 0.20110361741263028, "grad_norm": 0.5320789813995361, "learning_rate": 9.157193463554986e-05, "loss": 1.6112, "step": 3608 }, { "epoch": 0.20115935566579343, "grad_norm": 0.5658825635910034, "learning_rate": 9.156698663571305e-05, "loss": 1.6377, "step": 3609 }, { "epoch": 0.2012150939189566, "grad_norm": 0.593096137046814, "learning_rate": 9.156203731761376e-05, "loss": 1.7296, "step": 3610 }, { "epoch": 0.20127083217211972, "grad_norm": 0.5300352573394775, "learning_rate": 9.155708668140899e-05, "loss": 1.4073, "step": 3611 }, { "epoch": 0.20132657042528287, "grad_norm": 0.5179193019866943, "learning_rate": 9.155213472725571e-05, "loss": 1.5432, "step": 3612 }, { "epoch": 0.201382308678446, "grad_norm": 0.5618082880973816, "learning_rate": 9.154718145531098e-05, "loss": 1.79, "step": 3613 }, { "epoch": 0.20143804693160916, "grad_norm": 0.47643256187438965, "learning_rate": 9.15422268657319e-05, "loss": 1.4084, "step": 3614 }, { "epoch": 0.20149378518477232, "grad_norm": 0.6415194272994995, "learning_rate": 9.15372709586756e-05, "loss": 1.9196, "step": 3615 }, { "epoch": 0.20154952343793545, "grad_norm": 0.5599740147590637, "learning_rate": 9.153231373429922e-05, "loss": 1.8508, "step": 3616 }, { "epoch": 0.2016052616910986, "grad_norm": 0.5777899622917175, "learning_rate": 9.152735519276002e-05, "loss": 1.8367, "step": 3617 }, { "epoch": 0.20166099994426176, "grad_norm": 0.5653935670852661, "learning_rate": 9.152239533421523e-05, "loss": 1.5819, "step": 3618 }, { "epoch": 0.2017167381974249, "grad_norm": 0.5558584928512573, "learning_rate": 9.151743415882215e-05, "loss": 1.8245, "step": 3619 }, { "epoch": 0.20177247645058805, "grad_norm": 0.5481976866722107, "learning_rate": 9.151247166673811e-05, "loss": 1.6422, "step": 3620 }, { "epoch": 0.20182821470375117, "grad_norm": 0.49504461884498596, "learning_rate": 9.150750785812052e-05, "loss": 1.5992, "step": 3621 }, { "epoch": 0.20188395295691433, "grad_norm": 0.6056009531021118, "learning_rate": 9.150254273312677e-05, "loss": 1.7729, "step": 3622 }, { "epoch": 0.2019396912100775, "grad_norm": 0.5418253540992737, "learning_rate": 9.149757629191436e-05, "loss": 1.8279, "step": 3623 }, { "epoch": 0.20199542946324062, "grad_norm": 0.5427140593528748, "learning_rate": 9.149260853464077e-05, "loss": 1.6135, "step": 3624 }, { "epoch": 0.20205116771640377, "grad_norm": 0.5552391409873962, "learning_rate": 9.148763946146354e-05, "loss": 1.6617, "step": 3625 }, { "epoch": 0.2021069059695669, "grad_norm": 0.5886726975440979, "learning_rate": 9.148266907254031e-05, "loss": 1.9072, "step": 3626 }, { "epoch": 0.20216264422273006, "grad_norm": 0.587967038154602, "learning_rate": 9.147769736802864e-05, "loss": 1.7807, "step": 3627 }, { "epoch": 0.20221838247589322, "grad_norm": 0.5265384912490845, "learning_rate": 9.147272434808627e-05, "loss": 1.5633, "step": 3628 }, { "epoch": 0.20227412072905634, "grad_norm": 0.5282620191574097, "learning_rate": 9.146775001287088e-05, "loss": 1.579, "step": 3629 }, { "epoch": 0.2023298589822195, "grad_norm": 0.5758345723152161, "learning_rate": 9.146277436254022e-05, "loss": 1.8881, "step": 3630 }, { "epoch": 0.20238559723538263, "grad_norm": 0.5375788807868958, "learning_rate": 9.145779739725213e-05, "loss": 1.7915, "step": 3631 }, { "epoch": 0.2024413354885458, "grad_norm": 0.5047256350517273, "learning_rate": 9.14528191171644e-05, "loss": 1.8006, "step": 3632 }, { "epoch": 0.20249707374170894, "grad_norm": 0.5424186587333679, "learning_rate": 9.144783952243493e-05, "loss": 1.5753, "step": 3633 }, { "epoch": 0.20255281199487207, "grad_norm": 0.5288758277893066, "learning_rate": 9.144285861322166e-05, "loss": 1.7276, "step": 3634 }, { "epoch": 0.20260855024803523, "grad_norm": 0.638491690158844, "learning_rate": 9.143787638968254e-05, "loss": 1.8898, "step": 3635 }, { "epoch": 0.20266428850119836, "grad_norm": 0.5804757475852966, "learning_rate": 9.143289285197558e-05, "loss": 1.9973, "step": 3636 }, { "epoch": 0.20272002675436152, "grad_norm": 0.6197081804275513, "learning_rate": 9.142790800025883e-05, "loss": 1.7459, "step": 3637 }, { "epoch": 0.20277576500752467, "grad_norm": 0.6034955382347107, "learning_rate": 9.142292183469039e-05, "loss": 1.9412, "step": 3638 }, { "epoch": 0.2028315032606878, "grad_norm": 0.5404736995697021, "learning_rate": 9.141793435542836e-05, "loss": 1.6073, "step": 3639 }, { "epoch": 0.20288724151385096, "grad_norm": 0.48670318722724915, "learning_rate": 9.141294556263096e-05, "loss": 1.5109, "step": 3640 }, { "epoch": 0.20294297976701411, "grad_norm": 0.5840024948120117, "learning_rate": 9.140795545645636e-05, "loss": 1.7593, "step": 3641 }, { "epoch": 0.20299871802017724, "grad_norm": 0.5371603965759277, "learning_rate": 9.140296403706284e-05, "loss": 1.6055, "step": 3642 }, { "epoch": 0.2030544562733404, "grad_norm": 1.0509992837905884, "learning_rate": 9.13979713046087e-05, "loss": 2.0113, "step": 3643 }, { "epoch": 0.20311019452650353, "grad_norm": 0.49479854106903076, "learning_rate": 9.139297725925229e-05, "loss": 1.516, "step": 3644 }, { "epoch": 0.20316593277966669, "grad_norm": 0.5389636754989624, "learning_rate": 9.138798190115196e-05, "loss": 1.9002, "step": 3645 }, { "epoch": 0.20322167103282984, "grad_norm": 0.5524114370346069, "learning_rate": 9.138298523046617e-05, "loss": 1.6288, "step": 3646 }, { "epoch": 0.20327740928599297, "grad_norm": 0.49681249260902405, "learning_rate": 9.137798724735336e-05, "loss": 1.4397, "step": 3647 }, { "epoch": 0.20333314753915613, "grad_norm": 0.6418421268463135, "learning_rate": 9.137298795197204e-05, "loss": 2.1691, "step": 3648 }, { "epoch": 0.20338888579231926, "grad_norm": 0.5589434504508972, "learning_rate": 9.136798734448077e-05, "loss": 1.781, "step": 3649 }, { "epoch": 0.20344462404548241, "grad_norm": 0.5447176694869995, "learning_rate": 9.136298542503814e-05, "loss": 1.6205, "step": 3650 }, { "epoch": 0.20350036229864557, "grad_norm": 0.5343891978263855, "learning_rate": 9.135798219380276e-05, "loss": 1.7727, "step": 3651 }, { "epoch": 0.2035561005518087, "grad_norm": 0.5254631638526917, "learning_rate": 9.135297765093333e-05, "loss": 1.7057, "step": 3652 }, { "epoch": 0.20361183880497186, "grad_norm": 0.5393111109733582, "learning_rate": 9.134797179658854e-05, "loss": 1.7132, "step": 3653 }, { "epoch": 0.20366757705813499, "grad_norm": 0.5616254806518555, "learning_rate": 9.134296463092717e-05, "loss": 1.9128, "step": 3654 }, { "epoch": 0.20372331531129814, "grad_norm": 0.5558052659034729, "learning_rate": 9.133795615410801e-05, "loss": 1.6986, "step": 3655 }, { "epoch": 0.2037790535644613, "grad_norm": 0.5616979002952576, "learning_rate": 9.13329463662899e-05, "loss": 1.9381, "step": 3656 }, { "epoch": 0.20383479181762443, "grad_norm": 0.5200750827789307, "learning_rate": 9.132793526763171e-05, "loss": 1.6176, "step": 3657 }, { "epoch": 0.20389053007078758, "grad_norm": 0.5086760520935059, "learning_rate": 9.132292285829237e-05, "loss": 1.5035, "step": 3658 }, { "epoch": 0.20394626832395074, "grad_norm": 0.5122929215431213, "learning_rate": 9.131790913843086e-05, "loss": 1.6288, "step": 3659 }, { "epoch": 0.20400200657711387, "grad_norm": 0.5770255923271179, "learning_rate": 9.131289410820616e-05, "loss": 1.71, "step": 3660 }, { "epoch": 0.20405774483027703, "grad_norm": 0.5811052322387695, "learning_rate": 9.130787776777734e-05, "loss": 1.9395, "step": 3661 }, { "epoch": 0.20411348308344016, "grad_norm": 0.5475841164588928, "learning_rate": 9.130286011730347e-05, "loss": 1.8358, "step": 3662 }, { "epoch": 0.2041692213366033, "grad_norm": 0.5167744755744934, "learning_rate": 9.129784115694369e-05, "loss": 1.602, "step": 3663 }, { "epoch": 0.20422495958976647, "grad_norm": 0.5313771963119507, "learning_rate": 9.129282088685718e-05, "loss": 1.7868, "step": 3664 }, { "epoch": 0.2042806978429296, "grad_norm": 0.5149242877960205, "learning_rate": 9.128779930720313e-05, "loss": 1.6943, "step": 3665 }, { "epoch": 0.20433643609609276, "grad_norm": 0.5548785924911499, "learning_rate": 9.128277641814082e-05, "loss": 1.9083, "step": 3666 }, { "epoch": 0.20439217434925588, "grad_norm": 0.5865716338157654, "learning_rate": 9.127775221982954e-05, "loss": 1.9183, "step": 3667 }, { "epoch": 0.20444791260241904, "grad_norm": 0.5036227703094482, "learning_rate": 9.127272671242861e-05, "loss": 1.6097, "step": 3668 }, { "epoch": 0.2045036508555822, "grad_norm": 0.5178596377372742, "learning_rate": 9.126769989609745e-05, "loss": 1.7121, "step": 3669 }, { "epoch": 0.20455938910874533, "grad_norm": 0.585189938545227, "learning_rate": 9.126267177099543e-05, "loss": 1.8327, "step": 3670 }, { "epoch": 0.20461512736190848, "grad_norm": 0.5853554606437683, "learning_rate": 9.125764233728206e-05, "loss": 1.9047, "step": 3671 }, { "epoch": 0.2046708656150716, "grad_norm": 0.5730652213096619, "learning_rate": 9.125261159511682e-05, "loss": 1.8311, "step": 3672 }, { "epoch": 0.20472660386823477, "grad_norm": 0.5045105814933777, "learning_rate": 9.124757954465925e-05, "loss": 1.5241, "step": 3673 }, { "epoch": 0.20478234212139793, "grad_norm": 0.5725773572921753, "learning_rate": 9.124254618606897e-05, "loss": 1.6949, "step": 3674 }, { "epoch": 0.20483808037456105, "grad_norm": 0.5756915211677551, "learning_rate": 9.123751151950557e-05, "loss": 1.8553, "step": 3675 }, { "epoch": 0.2048938186277242, "grad_norm": 0.5354653000831604, "learning_rate": 9.123247554512873e-05, "loss": 1.7906, "step": 3676 }, { "epoch": 0.20494955688088734, "grad_norm": 0.5941489934921265, "learning_rate": 9.122743826309819e-05, "loss": 1.7721, "step": 3677 }, { "epoch": 0.2050052951340505, "grad_norm": 0.5832119584083557, "learning_rate": 9.122239967357366e-05, "loss": 1.9673, "step": 3678 }, { "epoch": 0.20506103338721365, "grad_norm": 0.6178232431411743, "learning_rate": 9.121735977671495e-05, "loss": 2.0516, "step": 3679 }, { "epoch": 0.20511677164037678, "grad_norm": 0.5315244197845459, "learning_rate": 9.121231857268191e-05, "loss": 1.5958, "step": 3680 }, { "epoch": 0.20517250989353994, "grad_norm": 0.5662999153137207, "learning_rate": 9.120727606163442e-05, "loss": 1.6989, "step": 3681 }, { "epoch": 0.2052282481467031, "grad_norm": 0.49450522661209106, "learning_rate": 9.120223224373238e-05, "loss": 1.4492, "step": 3682 }, { "epoch": 0.20528398639986623, "grad_norm": 0.572935163974762, "learning_rate": 9.119718711913575e-05, "loss": 1.6674, "step": 3683 }, { "epoch": 0.20533972465302938, "grad_norm": 0.5418963432312012, "learning_rate": 9.119214068800456e-05, "loss": 1.6326, "step": 3684 }, { "epoch": 0.2053954629061925, "grad_norm": 0.5970882773399353, "learning_rate": 9.118709295049883e-05, "loss": 1.9069, "step": 3685 }, { "epoch": 0.20545120115935567, "grad_norm": 0.5530537962913513, "learning_rate": 9.118204390677863e-05, "loss": 1.6096, "step": 3686 }, { "epoch": 0.20550693941251882, "grad_norm": 0.5641506314277649, "learning_rate": 9.117699355700412e-05, "loss": 1.7118, "step": 3687 }, { "epoch": 0.20556267766568195, "grad_norm": 0.6086058616638184, "learning_rate": 9.117194190133545e-05, "loss": 1.713, "step": 3688 }, { "epoch": 0.2056184159188451, "grad_norm": 0.577290952205658, "learning_rate": 9.116688893993284e-05, "loss": 1.8858, "step": 3689 }, { "epoch": 0.20567415417200824, "grad_norm": 0.5066075325012207, "learning_rate": 9.116183467295651e-05, "loss": 1.5922, "step": 3690 }, { "epoch": 0.2057298924251714, "grad_norm": 0.5287824273109436, "learning_rate": 9.115677910056681e-05, "loss": 1.4441, "step": 3691 }, { "epoch": 0.20578563067833455, "grad_norm": 0.62456214427948, "learning_rate": 9.115172222292401e-05, "loss": 1.9545, "step": 3692 }, { "epoch": 0.20584136893149768, "grad_norm": 0.5801160335540771, "learning_rate": 9.114666404018853e-05, "loss": 2.0095, "step": 3693 }, { "epoch": 0.20589710718466084, "grad_norm": 0.5162177085876465, "learning_rate": 9.114160455252074e-05, "loss": 1.7295, "step": 3694 }, { "epoch": 0.20595284543782397, "grad_norm": 0.5912075042724609, "learning_rate": 9.113654376008115e-05, "loss": 1.787, "step": 3695 }, { "epoch": 0.20600858369098712, "grad_norm": 0.5578693747520447, "learning_rate": 9.113148166303023e-05, "loss": 1.6167, "step": 3696 }, { "epoch": 0.20606432194415028, "grad_norm": 0.5576518177986145, "learning_rate": 9.112641826152853e-05, "loss": 1.7855, "step": 3697 }, { "epoch": 0.2061200601973134, "grad_norm": 0.5475178360939026, "learning_rate": 9.11213535557366e-05, "loss": 1.7013, "step": 3698 }, { "epoch": 0.20617579845047657, "grad_norm": 0.5434138178825378, "learning_rate": 9.111628754581512e-05, "loss": 1.7804, "step": 3699 }, { "epoch": 0.2062315367036397, "grad_norm": 0.5596892237663269, "learning_rate": 9.111122023192471e-05, "loss": 1.8347, "step": 3700 }, { "epoch": 0.20628727495680285, "grad_norm": 0.5505380034446716, "learning_rate": 9.110615161422609e-05, "loss": 1.878, "step": 3701 }, { "epoch": 0.206343013209966, "grad_norm": 0.6178278923034668, "learning_rate": 9.110108169288001e-05, "loss": 1.7626, "step": 3702 }, { "epoch": 0.20639875146312914, "grad_norm": 0.5460211038589478, "learning_rate": 9.109601046804726e-05, "loss": 1.8064, "step": 3703 }, { "epoch": 0.2064544897162923, "grad_norm": 0.5765804052352905, "learning_rate": 9.109093793988865e-05, "loss": 1.8228, "step": 3704 }, { "epoch": 0.20651022796945545, "grad_norm": 0.5335574746131897, "learning_rate": 9.108586410856508e-05, "loss": 1.8011, "step": 3705 }, { "epoch": 0.20656596622261858, "grad_norm": 0.5536273717880249, "learning_rate": 9.108078897423743e-05, "loss": 1.8751, "step": 3706 }, { "epoch": 0.20662170447578174, "grad_norm": 0.5405413508415222, "learning_rate": 9.107571253706668e-05, "loss": 1.8607, "step": 3707 }, { "epoch": 0.20667744272894487, "grad_norm": 0.5240110158920288, "learning_rate": 9.107063479721383e-05, "loss": 1.4375, "step": 3708 }, { "epoch": 0.20673318098210802, "grad_norm": 0.4756803512573242, "learning_rate": 9.106555575483988e-05, "loss": 1.3254, "step": 3709 }, { "epoch": 0.20678891923527118, "grad_norm": 0.5738046765327454, "learning_rate": 9.106047541010593e-05, "loss": 1.776, "step": 3710 }, { "epoch": 0.2068446574884343, "grad_norm": 0.5442799925804138, "learning_rate": 9.105539376317312e-05, "loss": 1.7099, "step": 3711 }, { "epoch": 0.20690039574159746, "grad_norm": 0.5695345401763916, "learning_rate": 9.105031081420259e-05, "loss": 1.6337, "step": 3712 }, { "epoch": 0.2069561339947606, "grad_norm": 0.4725694954395294, "learning_rate": 9.104522656335553e-05, "loss": 1.4172, "step": 3713 }, { "epoch": 0.20701187224792375, "grad_norm": 0.5137088894844055, "learning_rate": 9.10401410107932e-05, "loss": 1.6826, "step": 3714 }, { "epoch": 0.2070676105010869, "grad_norm": 0.5813738703727722, "learning_rate": 9.103505415667686e-05, "loss": 1.9881, "step": 3715 }, { "epoch": 0.20712334875425004, "grad_norm": 0.5776458382606506, "learning_rate": 9.102996600116786e-05, "loss": 1.8194, "step": 3716 }, { "epoch": 0.2071790870074132, "grad_norm": 0.6059629917144775, "learning_rate": 9.102487654442758e-05, "loss": 1.9822, "step": 3717 }, { "epoch": 0.20723482526057632, "grad_norm": 0.5408186912536621, "learning_rate": 9.101978578661738e-05, "loss": 1.8422, "step": 3718 }, { "epoch": 0.20729056351373948, "grad_norm": 0.5199152231216431, "learning_rate": 9.101469372789874e-05, "loss": 1.6269, "step": 3719 }, { "epoch": 0.20734630176690264, "grad_norm": 0.4990878105163574, "learning_rate": 9.100960036843317e-05, "loss": 1.6431, "step": 3720 }, { "epoch": 0.20740204002006576, "grad_norm": 0.539283812046051, "learning_rate": 9.100450570838216e-05, "loss": 1.6332, "step": 3721 }, { "epoch": 0.20745777827322892, "grad_norm": 0.4963357150554657, "learning_rate": 9.09994097479073e-05, "loss": 1.4083, "step": 3722 }, { "epoch": 0.20751351652639205, "grad_norm": 0.5257975459098816, "learning_rate": 9.099431248717022e-05, "loss": 1.673, "step": 3723 }, { "epoch": 0.2075692547795552, "grad_norm": 0.5869825482368469, "learning_rate": 9.098921392633255e-05, "loss": 1.8618, "step": 3724 }, { "epoch": 0.20762499303271836, "grad_norm": 0.5818216800689697, "learning_rate": 9.0984114065556e-05, "loss": 1.761, "step": 3725 }, { "epoch": 0.2076807312858815, "grad_norm": 0.5281986594200134, "learning_rate": 9.097901290500231e-05, "loss": 1.5652, "step": 3726 }, { "epoch": 0.20773646953904465, "grad_norm": 0.5425719618797302, "learning_rate": 9.097391044483325e-05, "loss": 1.6899, "step": 3727 }, { "epoch": 0.2077922077922078, "grad_norm": 0.5924318432807922, "learning_rate": 9.096880668521066e-05, "loss": 2.0674, "step": 3728 }, { "epoch": 0.20784794604537093, "grad_norm": 0.5444379448890686, "learning_rate": 9.096370162629637e-05, "loss": 1.8427, "step": 3729 }, { "epoch": 0.2079036842985341, "grad_norm": 0.5292965769767761, "learning_rate": 9.09585952682523e-05, "loss": 1.6487, "step": 3730 }, { "epoch": 0.20795942255169722, "grad_norm": 0.5337923765182495, "learning_rate": 9.09534876112404e-05, "loss": 1.7153, "step": 3731 }, { "epoch": 0.20801516080486038, "grad_norm": 0.5366414785385132, "learning_rate": 9.094837865542265e-05, "loss": 1.7336, "step": 3732 }, { "epoch": 0.20807089905802353, "grad_norm": 0.5158184766769409, "learning_rate": 9.094326840096106e-05, "loss": 1.4747, "step": 3733 }, { "epoch": 0.20812663731118666, "grad_norm": 0.5793300867080688, "learning_rate": 9.093815684801772e-05, "loss": 1.67, "step": 3734 }, { "epoch": 0.20818237556434982, "grad_norm": 0.57293701171875, "learning_rate": 9.093304399675474e-05, "loss": 1.8801, "step": 3735 }, { "epoch": 0.20823811381751295, "grad_norm": 0.514213502407074, "learning_rate": 9.092792984733425e-05, "loss": 1.5878, "step": 3736 }, { "epoch": 0.2082938520706761, "grad_norm": 0.5890586376190186, "learning_rate": 9.092281439991846e-05, "loss": 1.9247, "step": 3737 }, { "epoch": 0.20834959032383926, "grad_norm": 0.5602766275405884, "learning_rate": 9.091769765466959e-05, "loss": 1.7421, "step": 3738 }, { "epoch": 0.2084053285770024, "grad_norm": 0.586161196231842, "learning_rate": 9.091257961174991e-05, "loss": 2.0567, "step": 3739 }, { "epoch": 0.20846106683016555, "grad_norm": 0.5134695768356323, "learning_rate": 9.090746027132175e-05, "loss": 1.6464, "step": 3740 }, { "epoch": 0.20851680508332868, "grad_norm": 0.5447134375572205, "learning_rate": 9.090233963354746e-05, "loss": 1.8313, "step": 3741 }, { "epoch": 0.20857254333649183, "grad_norm": 0.5118534564971924, "learning_rate": 9.089721769858943e-05, "loss": 1.629, "step": 3742 }, { "epoch": 0.208628281589655, "grad_norm": 0.5482544898986816, "learning_rate": 9.08920944666101e-05, "loss": 1.6353, "step": 3743 }, { "epoch": 0.20868401984281812, "grad_norm": 0.542334258556366, "learning_rate": 9.088696993777194e-05, "loss": 1.6882, "step": 3744 }, { "epoch": 0.20873975809598128, "grad_norm": 0.527746319770813, "learning_rate": 9.08818441122375e-05, "loss": 1.5986, "step": 3745 }, { "epoch": 0.2087954963491444, "grad_norm": 0.5480045080184937, "learning_rate": 9.08767169901693e-05, "loss": 1.6445, "step": 3746 }, { "epoch": 0.20885123460230756, "grad_norm": 0.5573908686637878, "learning_rate": 9.087158857172999e-05, "loss": 1.851, "step": 3747 }, { "epoch": 0.20890697285547072, "grad_norm": 0.5698862671852112, "learning_rate": 9.086645885708218e-05, "loss": 1.6359, "step": 3748 }, { "epoch": 0.20896271110863385, "grad_norm": 0.557510256767273, "learning_rate": 9.086132784638857e-05, "loss": 1.7563, "step": 3749 }, { "epoch": 0.209018449361797, "grad_norm": 0.5576832890510559, "learning_rate": 9.085619553981187e-05, "loss": 1.8104, "step": 3750 }, { "epoch": 0.20907418761496016, "grad_norm": 0.5342584848403931, "learning_rate": 9.085106193751485e-05, "loss": 1.4561, "step": 3751 }, { "epoch": 0.2091299258681233, "grad_norm": 0.5547382235527039, "learning_rate": 9.084592703966033e-05, "loss": 1.6986, "step": 3752 }, { "epoch": 0.20918566412128645, "grad_norm": 0.5614180564880371, "learning_rate": 9.084079084641115e-05, "loss": 1.7837, "step": 3753 }, { "epoch": 0.20924140237444958, "grad_norm": 0.5065221786499023, "learning_rate": 9.083565335793021e-05, "loss": 1.7262, "step": 3754 }, { "epoch": 0.20929714062761273, "grad_norm": 0.5504621863365173, "learning_rate": 9.083051457438043e-05, "loss": 1.7848, "step": 3755 }, { "epoch": 0.2093528788807759, "grad_norm": 0.5882393717765808, "learning_rate": 9.082537449592479e-05, "loss": 2.0356, "step": 3756 }, { "epoch": 0.20940861713393902, "grad_norm": 0.6157543063163757, "learning_rate": 9.08202331227263e-05, "loss": 1.9959, "step": 3757 }, { "epoch": 0.20946435538710217, "grad_norm": 0.5493510961532593, "learning_rate": 9.0815090454948e-05, "loss": 1.7899, "step": 3758 }, { "epoch": 0.2095200936402653, "grad_norm": 0.5107924938201904, "learning_rate": 9.0809946492753e-05, "loss": 1.4062, "step": 3759 }, { "epoch": 0.20957583189342846, "grad_norm": 0.5571010112762451, "learning_rate": 9.080480123630444e-05, "loss": 1.6807, "step": 3760 }, { "epoch": 0.20963157014659162, "grad_norm": 0.5510861277580261, "learning_rate": 9.07996546857655e-05, "loss": 1.9714, "step": 3761 }, { "epoch": 0.20968730839975475, "grad_norm": 0.531609296798706, "learning_rate": 9.07945068412994e-05, "loss": 1.7811, "step": 3762 }, { "epoch": 0.2097430466529179, "grad_norm": 0.5203907489776611, "learning_rate": 9.078935770306938e-05, "loss": 1.7003, "step": 3763 }, { "epoch": 0.20979878490608103, "grad_norm": 0.5677714347839355, "learning_rate": 9.078420727123874e-05, "loss": 2.0188, "step": 3764 }, { "epoch": 0.2098545231592442, "grad_norm": 0.5568066239356995, "learning_rate": 9.077905554597086e-05, "loss": 1.7745, "step": 3765 }, { "epoch": 0.20991026141240735, "grad_norm": 0.564201831817627, "learning_rate": 9.077390252742907e-05, "loss": 1.7723, "step": 3766 }, { "epoch": 0.20996599966557047, "grad_norm": 0.569828450679779, "learning_rate": 9.076874821577683e-05, "loss": 1.7731, "step": 3767 }, { "epoch": 0.21002173791873363, "grad_norm": 0.5601812601089478, "learning_rate": 9.07635926111776e-05, "loss": 1.6495, "step": 3768 }, { "epoch": 0.21007747617189676, "grad_norm": 0.6098621487617493, "learning_rate": 9.075843571379488e-05, "loss": 1.9732, "step": 3769 }, { "epoch": 0.21013321442505992, "grad_norm": 0.5688888430595398, "learning_rate": 9.075327752379221e-05, "loss": 1.8381, "step": 3770 }, { "epoch": 0.21018895267822307, "grad_norm": 0.5635893940925598, "learning_rate": 9.074811804133318e-05, "loss": 1.7662, "step": 3771 }, { "epoch": 0.2102446909313862, "grad_norm": 0.5132915377616882, "learning_rate": 9.074295726658144e-05, "loss": 1.6434, "step": 3772 }, { "epoch": 0.21030042918454936, "grad_norm": 0.5504310727119446, "learning_rate": 9.073779519970065e-05, "loss": 1.7831, "step": 3773 }, { "epoch": 0.21035616743771252, "grad_norm": 0.5861356258392334, "learning_rate": 9.07326318408545e-05, "loss": 1.9085, "step": 3774 }, { "epoch": 0.21041190569087564, "grad_norm": 0.5746229887008667, "learning_rate": 9.072746719020676e-05, "loss": 1.851, "step": 3775 }, { "epoch": 0.2104676439440388, "grad_norm": 0.5618278980255127, "learning_rate": 9.072230124792121e-05, "loss": 1.9007, "step": 3776 }, { "epoch": 0.21052338219720193, "grad_norm": 0.5574671030044556, "learning_rate": 9.07171340141617e-05, "loss": 1.7664, "step": 3777 }, { "epoch": 0.2105791204503651, "grad_norm": 0.5418394207954407, "learning_rate": 9.071196548909208e-05, "loss": 1.5912, "step": 3778 }, { "epoch": 0.21063485870352824, "grad_norm": 0.5579066872596741, "learning_rate": 9.070679567287631e-05, "loss": 1.8595, "step": 3779 }, { "epoch": 0.21069059695669137, "grad_norm": 0.5038254261016846, "learning_rate": 9.07016245656783e-05, "loss": 1.5864, "step": 3780 }, { "epoch": 0.21074633520985453, "grad_norm": 0.5985908508300781, "learning_rate": 9.069645216766208e-05, "loss": 1.8166, "step": 3781 }, { "epoch": 0.21080207346301766, "grad_norm": 0.5343535542488098, "learning_rate": 9.069127847899166e-05, "loss": 1.7482, "step": 3782 }, { "epoch": 0.21085781171618082, "grad_norm": 0.513039231300354, "learning_rate": 9.068610349983113e-05, "loss": 1.7065, "step": 3783 }, { "epoch": 0.21091354996934397, "grad_norm": 0.5761904716491699, "learning_rate": 9.068092723034462e-05, "loss": 1.7781, "step": 3784 }, { "epoch": 0.2109692882225071, "grad_norm": 0.5832051634788513, "learning_rate": 9.067574967069628e-05, "loss": 1.7871, "step": 3785 }, { "epoch": 0.21102502647567026, "grad_norm": 0.9756948947906494, "learning_rate": 9.067057082105032e-05, "loss": 1.9512, "step": 3786 }, { "epoch": 0.2110807647288334, "grad_norm": 0.5692908763885498, "learning_rate": 9.066539068157098e-05, "loss": 1.4585, "step": 3787 }, { "epoch": 0.21113650298199654, "grad_norm": 0.5954088568687439, "learning_rate": 9.066020925242256e-05, "loss": 1.9236, "step": 3788 }, { "epoch": 0.2111922412351597, "grad_norm": 0.5660640597343445, "learning_rate": 9.065502653376935e-05, "loss": 1.67, "step": 3789 }, { "epoch": 0.21124797948832283, "grad_norm": 0.5779823064804077, "learning_rate": 9.064984252577573e-05, "loss": 1.8769, "step": 3790 }, { "epoch": 0.21130371774148599, "grad_norm": 0.5380722880363464, "learning_rate": 9.064465722860611e-05, "loss": 1.6658, "step": 3791 }, { "epoch": 0.21135945599464911, "grad_norm": 0.5925493836402893, "learning_rate": 9.063947064242495e-05, "loss": 1.7003, "step": 3792 }, { "epoch": 0.21141519424781227, "grad_norm": 0.5475820899009705, "learning_rate": 9.063428276739671e-05, "loss": 1.7658, "step": 3793 }, { "epoch": 0.21147093250097543, "grad_norm": 0.5608733296394348, "learning_rate": 9.062909360368595e-05, "loss": 1.7443, "step": 3794 }, { "epoch": 0.21152667075413856, "grad_norm": 0.5024067163467407, "learning_rate": 9.062390315145723e-05, "loss": 1.4051, "step": 3795 }, { "epoch": 0.2115824090073017, "grad_norm": 0.5922258496284485, "learning_rate": 9.061871141087515e-05, "loss": 1.6788, "step": 3796 }, { "epoch": 0.21163814726046487, "grad_norm": 0.5388537645339966, "learning_rate": 9.061351838210434e-05, "loss": 1.7306, "step": 3797 }, { "epoch": 0.211693885513628, "grad_norm": 0.5710194706916809, "learning_rate": 9.060832406530955e-05, "loss": 1.7759, "step": 3798 }, { "epoch": 0.21174962376679116, "grad_norm": 0.5648775696754456, "learning_rate": 9.060312846065548e-05, "loss": 1.8155, "step": 3799 }, { "epoch": 0.21180536201995429, "grad_norm": 0.5653148293495178, "learning_rate": 9.059793156830691e-05, "loss": 1.9103, "step": 3800 }, { "epoch": 0.21186110027311744, "grad_norm": 0.5372900366783142, "learning_rate": 9.059273338842864e-05, "loss": 1.6333, "step": 3801 }, { "epoch": 0.2119168385262806, "grad_norm": 0.6031267046928406, "learning_rate": 9.058753392118555e-05, "loss": 1.9328, "step": 3802 }, { "epoch": 0.21197257677944373, "grad_norm": 0.5510583519935608, "learning_rate": 9.058233316674252e-05, "loss": 1.6296, "step": 3803 }, { "epoch": 0.21202831503260688, "grad_norm": 0.5591006875038147, "learning_rate": 9.057713112526449e-05, "loss": 1.6512, "step": 3804 }, { "epoch": 0.21208405328577, "grad_norm": 0.5050638318061829, "learning_rate": 9.057192779691645e-05, "loss": 1.793, "step": 3805 }, { "epoch": 0.21213979153893317, "grad_norm": 0.5485342144966125, "learning_rate": 9.05667231818634e-05, "loss": 1.8989, "step": 3806 }, { "epoch": 0.21219552979209633, "grad_norm": 0.536729633808136, "learning_rate": 9.056151728027042e-05, "loss": 1.7235, "step": 3807 }, { "epoch": 0.21225126804525946, "grad_norm": 0.5280648469924927, "learning_rate": 9.055631009230262e-05, "loss": 1.779, "step": 3808 }, { "epoch": 0.2123070062984226, "grad_norm": 0.5353644490242004, "learning_rate": 9.05511016181251e-05, "loss": 1.9023, "step": 3809 }, { "epoch": 0.21236274455158574, "grad_norm": 0.5420893430709839, "learning_rate": 9.054589185790305e-05, "loss": 1.6484, "step": 3810 }, { "epoch": 0.2124184828047489, "grad_norm": 0.49997881054878235, "learning_rate": 9.054068081180173e-05, "loss": 1.6866, "step": 3811 }, { "epoch": 0.21247422105791205, "grad_norm": 0.540344774723053, "learning_rate": 9.05354684799864e-05, "loss": 1.6013, "step": 3812 }, { "epoch": 0.21252995931107518, "grad_norm": 0.5512065291404724, "learning_rate": 9.053025486262231e-05, "loss": 1.7741, "step": 3813 }, { "epoch": 0.21258569756423834, "grad_norm": 0.5562829375267029, "learning_rate": 9.052503995987488e-05, "loss": 1.7829, "step": 3814 }, { "epoch": 0.21264143581740147, "grad_norm": 0.528271496295929, "learning_rate": 9.051982377190944e-05, "loss": 1.6395, "step": 3815 }, { "epoch": 0.21269717407056463, "grad_norm": 0.5158810019493103, "learning_rate": 9.051460629889144e-05, "loss": 1.4752, "step": 3816 }, { "epoch": 0.21275291232372778, "grad_norm": 0.5320451855659485, "learning_rate": 9.050938754098635e-05, "loss": 1.7896, "step": 3817 }, { "epoch": 0.2128086505768909, "grad_norm": 0.503190279006958, "learning_rate": 9.050416749835968e-05, "loss": 1.5488, "step": 3818 }, { "epoch": 0.21286438883005407, "grad_norm": 0.561086893081665, "learning_rate": 9.049894617117696e-05, "loss": 1.9669, "step": 3819 }, { "epoch": 0.21292012708321723, "grad_norm": 0.5414785146713257, "learning_rate": 9.04937235596038e-05, "loss": 1.761, "step": 3820 }, { "epoch": 0.21297586533638035, "grad_norm": 0.5729870796203613, "learning_rate": 9.04884996638058e-05, "loss": 1.7399, "step": 3821 }, { "epoch": 0.2130316035895435, "grad_norm": 0.5905429720878601, "learning_rate": 9.048327448394868e-05, "loss": 1.863, "step": 3822 }, { "epoch": 0.21308734184270664, "grad_norm": 0.5414051413536072, "learning_rate": 9.047804802019813e-05, "loss": 1.4662, "step": 3823 }, { "epoch": 0.2131430800958698, "grad_norm": 0.5677713751792908, "learning_rate": 9.047282027271988e-05, "loss": 1.7569, "step": 3824 }, { "epoch": 0.21319881834903295, "grad_norm": 0.5208271145820618, "learning_rate": 9.046759124167976e-05, "loss": 1.5647, "step": 3825 }, { "epoch": 0.21325455660219608, "grad_norm": 0.5930595397949219, "learning_rate": 9.046236092724357e-05, "loss": 1.8287, "step": 3826 }, { "epoch": 0.21331029485535924, "grad_norm": 0.5405799150466919, "learning_rate": 9.045712932957722e-05, "loss": 1.7175, "step": 3827 }, { "epoch": 0.21336603310852237, "grad_norm": 0.5690011382102966, "learning_rate": 9.045189644884661e-05, "loss": 1.9759, "step": 3828 }, { "epoch": 0.21342177136168552, "grad_norm": 0.5739631652832031, "learning_rate": 9.04466622852177e-05, "loss": 1.7102, "step": 3829 }, { "epoch": 0.21347750961484868, "grad_norm": 0.5377629399299622, "learning_rate": 9.044142683885645e-05, "loss": 1.647, "step": 3830 }, { "epoch": 0.2135332478680118, "grad_norm": 0.6439347267150879, "learning_rate": 9.043619010992897e-05, "loss": 2.2611, "step": 3831 }, { "epoch": 0.21358898612117497, "grad_norm": 0.527803897857666, "learning_rate": 9.043095209860128e-05, "loss": 1.7279, "step": 3832 }, { "epoch": 0.2136447243743381, "grad_norm": 0.549921452999115, "learning_rate": 9.042571280503951e-05, "loss": 1.7293, "step": 3833 }, { "epoch": 0.21370046262750125, "grad_norm": 0.5477808713912964, "learning_rate": 9.042047222940985e-05, "loss": 1.8327, "step": 3834 }, { "epoch": 0.2137562008806644, "grad_norm": 0.6052964329719543, "learning_rate": 9.041523037187847e-05, "loss": 1.6961, "step": 3835 }, { "epoch": 0.21381193913382754, "grad_norm": 0.5640259385108948, "learning_rate": 9.04099872326116e-05, "loss": 1.8019, "step": 3836 }, { "epoch": 0.2138676773869907, "grad_norm": 0.5238528251647949, "learning_rate": 9.040474281177557e-05, "loss": 1.7182, "step": 3837 }, { "epoch": 0.21392341564015382, "grad_norm": 0.561298668384552, "learning_rate": 9.039949710953665e-05, "loss": 1.903, "step": 3838 }, { "epoch": 0.21397915389331698, "grad_norm": 0.5629448890686035, "learning_rate": 9.039425012606125e-05, "loss": 1.6652, "step": 3839 }, { "epoch": 0.21403489214648014, "grad_norm": 0.5578324794769287, "learning_rate": 9.038900186151575e-05, "loss": 1.8062, "step": 3840 }, { "epoch": 0.21409063039964327, "grad_norm": 0.5517327785491943, "learning_rate": 9.038375231606659e-05, "loss": 1.7376, "step": 3841 }, { "epoch": 0.21414636865280642, "grad_norm": 0.5653707385063171, "learning_rate": 9.037850148988026e-05, "loss": 1.7724, "step": 3842 }, { "epoch": 0.21420210690596958, "grad_norm": 0.6022188663482666, "learning_rate": 9.037324938312327e-05, "loss": 1.9338, "step": 3843 }, { "epoch": 0.2142578451591327, "grad_norm": 0.5128300189971924, "learning_rate": 9.036799599596222e-05, "loss": 1.6685, "step": 3844 }, { "epoch": 0.21431358341229587, "grad_norm": 0.5680099129676819, "learning_rate": 9.036274132856368e-05, "loss": 1.6111, "step": 3845 }, { "epoch": 0.214369321665459, "grad_norm": 0.5332833528518677, "learning_rate": 9.035748538109433e-05, "loss": 1.7406, "step": 3846 }, { "epoch": 0.21442505991862215, "grad_norm": 0.5845235586166382, "learning_rate": 9.035222815372084e-05, "loss": 2.0365, "step": 3847 }, { "epoch": 0.2144807981717853, "grad_norm": 0.536208987236023, "learning_rate": 9.034696964660996e-05, "loss": 1.7869, "step": 3848 }, { "epoch": 0.21453653642494844, "grad_norm": 0.6078736782073975, "learning_rate": 9.034170985992843e-05, "loss": 1.9884, "step": 3849 }, { "epoch": 0.2145922746781116, "grad_norm": 0.5227762460708618, "learning_rate": 9.033644879384307e-05, "loss": 1.7483, "step": 3850 }, { "epoch": 0.21464801293127472, "grad_norm": 0.555255115032196, "learning_rate": 9.033118644852073e-05, "loss": 1.7319, "step": 3851 }, { "epoch": 0.21470375118443788, "grad_norm": 0.5747233033180237, "learning_rate": 9.032592282412831e-05, "loss": 1.806, "step": 3852 }, { "epoch": 0.21475948943760104, "grad_norm": 0.5099679231643677, "learning_rate": 9.032065792083271e-05, "loss": 1.7784, "step": 3853 }, { "epoch": 0.21481522769076417, "grad_norm": 0.583080530166626, "learning_rate": 9.031539173880095e-05, "loss": 1.8283, "step": 3854 }, { "epoch": 0.21487096594392732, "grad_norm": 0.5755245089530945, "learning_rate": 9.031012427820003e-05, "loss": 1.8088, "step": 3855 }, { "epoch": 0.21492670419709045, "grad_norm": 0.6300316452980042, "learning_rate": 9.030485553919696e-05, "loss": 2.021, "step": 3856 }, { "epoch": 0.2149824424502536, "grad_norm": 0.48787984251976013, "learning_rate": 9.029958552195889e-05, "loss": 1.7416, "step": 3857 }, { "epoch": 0.21503818070341676, "grad_norm": 0.5602289438247681, "learning_rate": 9.029431422665292e-05, "loss": 1.7158, "step": 3858 }, { "epoch": 0.2150939189565799, "grad_norm": 0.6266565918922424, "learning_rate": 9.028904165344622e-05, "loss": 1.904, "step": 3859 }, { "epoch": 0.21514965720974305, "grad_norm": 0.5256897211074829, "learning_rate": 9.028376780250605e-05, "loss": 1.5227, "step": 3860 }, { "epoch": 0.21520539546290618, "grad_norm": 0.5775957107543945, "learning_rate": 9.027849267399962e-05, "loss": 1.8613, "step": 3861 }, { "epoch": 0.21526113371606934, "grad_norm": 0.5759565830230713, "learning_rate": 9.027321626809425e-05, "loss": 1.7657, "step": 3862 }, { "epoch": 0.2153168719692325, "grad_norm": 0.5388328433036804, "learning_rate": 9.026793858495727e-05, "loss": 1.7117, "step": 3863 }, { "epoch": 0.21537261022239562, "grad_norm": 0.5749773383140564, "learning_rate": 9.026265962475605e-05, "loss": 1.781, "step": 3864 }, { "epoch": 0.21542834847555878, "grad_norm": 0.5567165017127991, "learning_rate": 9.025737938765801e-05, "loss": 1.8402, "step": 3865 }, { "epoch": 0.21548408672872194, "grad_norm": 0.5531468987464905, "learning_rate": 9.025209787383062e-05, "loss": 1.637, "step": 3866 }, { "epoch": 0.21553982498188506, "grad_norm": 0.5598788261413574, "learning_rate": 9.024681508344138e-05, "loss": 1.7487, "step": 3867 }, { "epoch": 0.21559556323504822, "grad_norm": 0.5670254826545715, "learning_rate": 9.024153101665782e-05, "loss": 1.8556, "step": 3868 }, { "epoch": 0.21565130148821135, "grad_norm": 0.5822195410728455, "learning_rate": 9.02362456736475e-05, "loss": 1.8144, "step": 3869 }, { "epoch": 0.2157070397413745, "grad_norm": 0.5438206791877747, "learning_rate": 9.023095905457807e-05, "loss": 1.7709, "step": 3870 }, { "epoch": 0.21576277799453766, "grad_norm": 0.5582990646362305, "learning_rate": 9.022567115961718e-05, "loss": 1.7373, "step": 3871 }, { "epoch": 0.2158185162477008, "grad_norm": 0.5481442809104919, "learning_rate": 9.022038198893254e-05, "loss": 1.642, "step": 3872 }, { "epoch": 0.21587425450086395, "grad_norm": 0.5365943312644958, "learning_rate": 9.021509154269187e-05, "loss": 1.7393, "step": 3873 }, { "epoch": 0.21592999275402708, "grad_norm": 0.5595213174819946, "learning_rate": 9.0209799821063e-05, "loss": 1.7803, "step": 3874 }, { "epoch": 0.21598573100719023, "grad_norm": 0.5171288251876831, "learning_rate": 9.020450682421368e-05, "loss": 1.6007, "step": 3875 }, { "epoch": 0.2160414692603534, "grad_norm": 0.5536861419677734, "learning_rate": 9.019921255231183e-05, "loss": 1.7964, "step": 3876 }, { "epoch": 0.21609720751351652, "grad_norm": 0.5218709707260132, "learning_rate": 9.019391700552533e-05, "loss": 1.7572, "step": 3877 }, { "epoch": 0.21615294576667968, "grad_norm": 0.5276560187339783, "learning_rate": 9.018862018402214e-05, "loss": 1.7768, "step": 3878 }, { "epoch": 0.2162086840198428, "grad_norm": 0.509636640548706, "learning_rate": 9.018332208797023e-05, "loss": 1.8262, "step": 3879 }, { "epoch": 0.21626442227300596, "grad_norm": 0.5426955819129944, "learning_rate": 9.017802271753763e-05, "loss": 1.7966, "step": 3880 }, { "epoch": 0.21632016052616912, "grad_norm": 0.5915662050247192, "learning_rate": 9.017272207289241e-05, "loss": 1.7047, "step": 3881 }, { "epoch": 0.21637589877933225, "grad_norm": 0.5025647282600403, "learning_rate": 9.016742015420264e-05, "loss": 1.6662, "step": 3882 }, { "epoch": 0.2164316370324954, "grad_norm": 0.5097705125808716, "learning_rate": 9.016211696163651e-05, "loss": 1.6667, "step": 3883 }, { "epoch": 0.21648737528565853, "grad_norm": 0.5540134310722351, "learning_rate": 9.015681249536219e-05, "loss": 1.7085, "step": 3884 }, { "epoch": 0.2165431135388217, "grad_norm": 0.5509772300720215, "learning_rate": 9.015150675554791e-05, "loss": 1.7739, "step": 3885 }, { "epoch": 0.21659885179198485, "grad_norm": 0.519534170627594, "learning_rate": 9.014619974236195e-05, "loss": 1.5412, "step": 3886 }, { "epoch": 0.21665459004514798, "grad_norm": 0.5313923954963684, "learning_rate": 9.014089145597259e-05, "loss": 1.6956, "step": 3887 }, { "epoch": 0.21671032829831113, "grad_norm": 0.5057397484779358, "learning_rate": 9.013558189654819e-05, "loss": 1.6772, "step": 3888 }, { "epoch": 0.2167660665514743, "grad_norm": 0.5538941621780396, "learning_rate": 9.013027106425713e-05, "loss": 1.7071, "step": 3889 }, { "epoch": 0.21682180480463742, "grad_norm": 0.5932080149650574, "learning_rate": 9.012495895926786e-05, "loss": 1.9886, "step": 3890 }, { "epoch": 0.21687754305780058, "grad_norm": 0.5497404932975769, "learning_rate": 9.011964558174884e-05, "loss": 1.6111, "step": 3891 }, { "epoch": 0.2169332813109637, "grad_norm": 0.5296292304992676, "learning_rate": 9.011433093186856e-05, "loss": 1.7192, "step": 3892 }, { "epoch": 0.21698901956412686, "grad_norm": 0.5682234168052673, "learning_rate": 9.01090150097956e-05, "loss": 1.727, "step": 3893 }, { "epoch": 0.21704475781729002, "grad_norm": 0.49014294147491455, "learning_rate": 9.010369781569854e-05, "loss": 1.5865, "step": 3894 }, { "epoch": 0.21710049607045315, "grad_norm": 0.5291064381599426, "learning_rate": 9.009837934974598e-05, "loss": 1.6708, "step": 3895 }, { "epoch": 0.2171562343236163, "grad_norm": 0.5380057096481323, "learning_rate": 9.009305961210664e-05, "loss": 1.816, "step": 3896 }, { "epoch": 0.21721197257677943, "grad_norm": 0.5304032564163208, "learning_rate": 9.008773860294921e-05, "loss": 1.6085, "step": 3897 }, { "epoch": 0.2172677108299426, "grad_norm": 0.5649582147598267, "learning_rate": 9.008241632244243e-05, "loss": 2.0664, "step": 3898 }, { "epoch": 0.21732344908310575, "grad_norm": 0.5284783840179443, "learning_rate": 9.00770927707551e-05, "loss": 1.6078, "step": 3899 }, { "epoch": 0.21737918733626888, "grad_norm": 0.5097172856330872, "learning_rate": 9.007176794805606e-05, "loss": 1.6985, "step": 3900 }, { "epoch": 0.21743492558943203, "grad_norm": 0.5433828830718994, "learning_rate": 9.006644185451416e-05, "loss": 1.824, "step": 3901 }, { "epoch": 0.21749066384259516, "grad_norm": 0.5155694484710693, "learning_rate": 9.006111449029835e-05, "loss": 1.674, "step": 3902 }, { "epoch": 0.21754640209575832, "grad_norm": 0.4952467978000641, "learning_rate": 9.005578585557754e-05, "loss": 1.5491, "step": 3903 }, { "epoch": 0.21760214034892147, "grad_norm": 0.5352423191070557, "learning_rate": 9.005045595052077e-05, "loss": 1.7583, "step": 3904 }, { "epoch": 0.2176578786020846, "grad_norm": 0.5036554336547852, "learning_rate": 9.004512477529702e-05, "loss": 1.6147, "step": 3905 }, { "epoch": 0.21771361685524776, "grad_norm": 0.5414397120475769, "learning_rate": 9.003979233007541e-05, "loss": 1.7576, "step": 3906 }, { "epoch": 0.2177693551084109, "grad_norm": 0.51963871717453, "learning_rate": 9.003445861502502e-05, "loss": 1.7114, "step": 3907 }, { "epoch": 0.21782509336157405, "grad_norm": 0.5667458176612854, "learning_rate": 9.002912363031504e-05, "loss": 1.904, "step": 3908 }, { "epoch": 0.2178808316147372, "grad_norm": 0.5066022872924805, "learning_rate": 9.002378737611463e-05, "loss": 1.5851, "step": 3909 }, { "epoch": 0.21793656986790033, "grad_norm": 0.5155694484710693, "learning_rate": 9.001844985259303e-05, "loss": 1.6766, "step": 3910 }, { "epoch": 0.2179923081210635, "grad_norm": 0.5910778641700745, "learning_rate": 9.001311105991954e-05, "loss": 1.6309, "step": 3911 }, { "epoch": 0.21804804637422665, "grad_norm": 0.5524371862411499, "learning_rate": 9.000777099826345e-05, "loss": 1.5347, "step": 3912 }, { "epoch": 0.21810378462738977, "grad_norm": 0.5852683186531067, "learning_rate": 9.000242966779412e-05, "loss": 1.7077, "step": 3913 }, { "epoch": 0.21815952288055293, "grad_norm": 0.511112630367279, "learning_rate": 8.999708706868097e-05, "loss": 1.4288, "step": 3914 }, { "epoch": 0.21821526113371606, "grad_norm": 0.553582489490509, "learning_rate": 8.999174320109343e-05, "loss": 1.6114, "step": 3915 }, { "epoch": 0.21827099938687922, "grad_norm": 0.5207599401473999, "learning_rate": 8.998639806520092e-05, "loss": 1.6002, "step": 3916 }, { "epoch": 0.21832673764004237, "grad_norm": 0.520836591720581, "learning_rate": 8.998105166117304e-05, "loss": 1.7308, "step": 3917 }, { "epoch": 0.2183824758932055, "grad_norm": 0.5346881151199341, "learning_rate": 8.99757039891793e-05, "loss": 1.7732, "step": 3918 }, { "epoch": 0.21843821414636866, "grad_norm": 0.5407224893569946, "learning_rate": 8.997035504938928e-05, "loss": 1.6927, "step": 3919 }, { "epoch": 0.2184939523995318, "grad_norm": 0.6079891324043274, "learning_rate": 8.996500484197266e-05, "loss": 1.7503, "step": 3920 }, { "epoch": 0.21854969065269494, "grad_norm": 0.5896045565605164, "learning_rate": 8.995965336709908e-05, "loss": 1.8189, "step": 3921 }, { "epoch": 0.2186054289058581, "grad_norm": 0.5681061148643494, "learning_rate": 8.99543006249383e-05, "loss": 1.9138, "step": 3922 }, { "epoch": 0.21866116715902123, "grad_norm": 0.5397033095359802, "learning_rate": 8.994894661566004e-05, "loss": 1.6947, "step": 3923 }, { "epoch": 0.2187169054121844, "grad_norm": 0.5442162752151489, "learning_rate": 8.994359133943411e-05, "loss": 1.7947, "step": 3924 }, { "epoch": 0.21877264366534752, "grad_norm": 0.5366693735122681, "learning_rate": 8.993823479643036e-05, "loss": 1.8557, "step": 3925 }, { "epoch": 0.21882838191851067, "grad_norm": 0.5018730163574219, "learning_rate": 8.993287698681867e-05, "loss": 1.6033, "step": 3926 }, { "epoch": 0.21888412017167383, "grad_norm": 0.5234804749488831, "learning_rate": 8.992751791076893e-05, "loss": 1.6927, "step": 3927 }, { "epoch": 0.21893985842483696, "grad_norm": 0.5351289510726929, "learning_rate": 8.992215756845111e-05, "loss": 1.6108, "step": 3928 }, { "epoch": 0.21899559667800012, "grad_norm": 0.5499307513237, "learning_rate": 8.991679596003521e-05, "loss": 1.821, "step": 3929 }, { "epoch": 0.21905133493116324, "grad_norm": 0.5461710691452026, "learning_rate": 8.991143308569129e-05, "loss": 1.6755, "step": 3930 }, { "epoch": 0.2191070731843264, "grad_norm": 0.557220458984375, "learning_rate": 8.990606894558942e-05, "loss": 1.7568, "step": 3931 }, { "epoch": 0.21916281143748956, "grad_norm": 0.5313843488693237, "learning_rate": 8.99007035398997e-05, "loss": 1.5701, "step": 3932 }, { "epoch": 0.2192185496906527, "grad_norm": 0.5466028451919556, "learning_rate": 8.98953368687923e-05, "loss": 1.7533, "step": 3933 }, { "epoch": 0.21927428794381584, "grad_norm": 0.5278179049491882, "learning_rate": 8.988996893243742e-05, "loss": 1.6604, "step": 3934 }, { "epoch": 0.219330026196979, "grad_norm": 0.5555846095085144, "learning_rate": 8.988459973100529e-05, "loss": 1.9101, "step": 3935 }, { "epoch": 0.21938576445014213, "grad_norm": 0.5475595593452454, "learning_rate": 8.987922926466621e-05, "loss": 1.6784, "step": 3936 }, { "epoch": 0.21944150270330529, "grad_norm": 0.5606985092163086, "learning_rate": 8.98738575335905e-05, "loss": 1.8496, "step": 3937 }, { "epoch": 0.21949724095646841, "grad_norm": 0.5272994041442871, "learning_rate": 8.986848453794849e-05, "loss": 1.6477, "step": 3938 }, { "epoch": 0.21955297920963157, "grad_norm": 0.5808579325675964, "learning_rate": 8.986311027791061e-05, "loss": 1.9312, "step": 3939 }, { "epoch": 0.21960871746279473, "grad_norm": 0.5892482399940491, "learning_rate": 8.985773475364729e-05, "loss": 1.8278, "step": 3940 }, { "epoch": 0.21966445571595786, "grad_norm": 0.5204423069953918, "learning_rate": 8.9852357965329e-05, "loss": 1.5689, "step": 3941 }, { "epoch": 0.219720193969121, "grad_norm": 0.5408873558044434, "learning_rate": 8.984697991312629e-05, "loss": 1.6719, "step": 3942 }, { "epoch": 0.21977593222228414, "grad_norm": 0.4690547585487366, "learning_rate": 8.98416005972097e-05, "loss": 1.4167, "step": 3943 }, { "epoch": 0.2198316704754473, "grad_norm": 0.5128321647644043, "learning_rate": 8.98362200177498e-05, "loss": 1.5936, "step": 3944 }, { "epoch": 0.21988740872861046, "grad_norm": 0.5651824474334717, "learning_rate": 8.98308381749173e-05, "loss": 1.7715, "step": 3945 }, { "epoch": 0.21994314698177359, "grad_norm": 0.49932271242141724, "learning_rate": 8.982545506888282e-05, "loss": 1.5167, "step": 3946 }, { "epoch": 0.21999888523493674, "grad_norm": 0.5488872528076172, "learning_rate": 8.982007069981711e-05, "loss": 1.6694, "step": 3947 }, { "epoch": 0.22005462348809987, "grad_norm": 0.5529676079750061, "learning_rate": 8.981468506789093e-05, "loss": 1.7098, "step": 3948 }, { "epoch": 0.22011036174126303, "grad_norm": 0.555151104927063, "learning_rate": 8.980929817327509e-05, "loss": 1.8188, "step": 3949 }, { "epoch": 0.22016609999442618, "grad_norm": 0.5413922667503357, "learning_rate": 8.980391001614039e-05, "loss": 1.6947, "step": 3950 }, { "epoch": 0.2202218382475893, "grad_norm": 0.5880113244056702, "learning_rate": 8.979852059665774e-05, "loss": 1.8565, "step": 3951 }, { "epoch": 0.22027757650075247, "grad_norm": 0.5404399037361145, "learning_rate": 8.979312991499807e-05, "loss": 1.6119, "step": 3952 }, { "epoch": 0.2203333147539156, "grad_norm": 0.5193542838096619, "learning_rate": 8.97877379713323e-05, "loss": 1.5012, "step": 3953 }, { "epoch": 0.22038905300707876, "grad_norm": 0.5563862323760986, "learning_rate": 8.97823447658315e-05, "loss": 1.7968, "step": 3954 }, { "epoch": 0.2204447912602419, "grad_norm": 0.5796663165092468, "learning_rate": 8.977695029866665e-05, "loss": 1.6924, "step": 3955 }, { "epoch": 0.22050052951340504, "grad_norm": 0.5060169100761414, "learning_rate": 8.977155457000886e-05, "loss": 1.6837, "step": 3956 }, { "epoch": 0.2205562677665682, "grad_norm": 0.5254307389259338, "learning_rate": 8.976615758002925e-05, "loss": 1.5339, "step": 3957 }, { "epoch": 0.22061200601973135, "grad_norm": 0.4909488260746002, "learning_rate": 8.976075932889896e-05, "loss": 1.406, "step": 3958 }, { "epoch": 0.22066774427289448, "grad_norm": 0.521052896976471, "learning_rate": 8.97553598167892e-05, "loss": 1.6203, "step": 3959 }, { "epoch": 0.22072348252605764, "grad_norm": 0.5382006764411926, "learning_rate": 8.974995904387123e-05, "loss": 1.6984, "step": 3960 }, { "epoch": 0.22077922077922077, "grad_norm": 0.5354267954826355, "learning_rate": 8.97445570103163e-05, "loss": 1.7722, "step": 3961 }, { "epoch": 0.22083495903238393, "grad_norm": 0.5725782513618469, "learning_rate": 8.973915371629577e-05, "loss": 1.8308, "step": 3962 }, { "epoch": 0.22089069728554708, "grad_norm": 0.5183130502700806, "learning_rate": 8.973374916198096e-05, "loss": 1.6487, "step": 3963 }, { "epoch": 0.2209464355387102, "grad_norm": 0.5026050209999084, "learning_rate": 8.972834334754331e-05, "loss": 1.4931, "step": 3964 }, { "epoch": 0.22100217379187337, "grad_norm": 0.5589287281036377, "learning_rate": 8.972293627315424e-05, "loss": 1.9263, "step": 3965 }, { "epoch": 0.2210579120450365, "grad_norm": 0.5776212811470032, "learning_rate": 8.971752793898522e-05, "loss": 1.8374, "step": 3966 }, { "epoch": 0.22111365029819965, "grad_norm": 0.5569107532501221, "learning_rate": 8.971211834520779e-05, "loss": 1.7221, "step": 3967 }, { "epoch": 0.2211693885513628, "grad_norm": 0.527186930179596, "learning_rate": 8.970670749199351e-05, "loss": 1.713, "step": 3968 }, { "epoch": 0.22122512680452594, "grad_norm": 0.5234454274177551, "learning_rate": 8.970129537951395e-05, "loss": 1.6519, "step": 3969 }, { "epoch": 0.2212808650576891, "grad_norm": 0.5419970154762268, "learning_rate": 8.969588200794079e-05, "loss": 1.5816, "step": 3970 }, { "epoch": 0.22133660331085223, "grad_norm": 0.5328260660171509, "learning_rate": 8.969046737744571e-05, "loss": 1.8442, "step": 3971 }, { "epoch": 0.22139234156401538, "grad_norm": 0.5527640581130981, "learning_rate": 8.968505148820039e-05, "loss": 1.5886, "step": 3972 }, { "epoch": 0.22144807981717854, "grad_norm": 0.5386121869087219, "learning_rate": 8.967963434037663e-05, "loss": 1.8938, "step": 3973 }, { "epoch": 0.22150381807034167, "grad_norm": 0.60856693983078, "learning_rate": 8.967421593414622e-05, "loss": 1.7739, "step": 3974 }, { "epoch": 0.22155955632350482, "grad_norm": 0.5383316278457642, "learning_rate": 8.966879626968099e-05, "loss": 1.5916, "step": 3975 }, { "epoch": 0.22161529457666795, "grad_norm": 0.5469935536384583, "learning_rate": 8.966337534715284e-05, "loss": 1.6879, "step": 3976 }, { "epoch": 0.2216710328298311, "grad_norm": 0.5624483227729797, "learning_rate": 8.965795316673366e-05, "loss": 1.5465, "step": 3977 }, { "epoch": 0.22172677108299427, "grad_norm": 0.571090817451477, "learning_rate": 8.965252972859545e-05, "loss": 1.8477, "step": 3978 }, { "epoch": 0.2217825093361574, "grad_norm": 0.5622638463973999, "learning_rate": 8.964710503291018e-05, "loss": 1.7961, "step": 3979 }, { "epoch": 0.22183824758932055, "grad_norm": 0.54639732837677, "learning_rate": 8.964167907984988e-05, "loss": 1.7795, "step": 3980 }, { "epoch": 0.2218939858424837, "grad_norm": 0.5762872099876404, "learning_rate": 8.963625186958666e-05, "loss": 1.7824, "step": 3981 }, { "epoch": 0.22194972409564684, "grad_norm": 0.5208929777145386, "learning_rate": 8.963082340229263e-05, "loss": 1.7521, "step": 3982 }, { "epoch": 0.22200546234881, "grad_norm": 0.49496889114379883, "learning_rate": 8.962539367813993e-05, "loss": 1.5493, "step": 3983 }, { "epoch": 0.22206120060197312, "grad_norm": 0.4936692714691162, "learning_rate": 8.961996269730078e-05, "loss": 1.5015, "step": 3984 }, { "epoch": 0.22211693885513628, "grad_norm": 0.5555882453918457, "learning_rate": 8.961453045994742e-05, "loss": 1.7563, "step": 3985 }, { "epoch": 0.22217267710829944, "grad_norm": 0.5514853596687317, "learning_rate": 8.960909696625213e-05, "loss": 1.6671, "step": 3986 }, { "epoch": 0.22222841536146257, "grad_norm": 0.5259945392608643, "learning_rate": 8.960366221638721e-05, "loss": 1.7181, "step": 3987 }, { "epoch": 0.22228415361462572, "grad_norm": 0.5564213395118713, "learning_rate": 8.959822621052502e-05, "loss": 1.8017, "step": 3988 }, { "epoch": 0.22233989186778885, "grad_norm": 0.5879985094070435, "learning_rate": 8.959278894883797e-05, "loss": 1.8768, "step": 3989 }, { "epoch": 0.222395630120952, "grad_norm": 0.5429808497428894, "learning_rate": 8.958735043149852e-05, "loss": 1.6246, "step": 3990 }, { "epoch": 0.22245136837411517, "grad_norm": 0.5388792753219604, "learning_rate": 8.958191065867912e-05, "loss": 1.8083, "step": 3991 }, { "epoch": 0.2225071066272783, "grad_norm": 0.5783261060714722, "learning_rate": 8.957646963055227e-05, "loss": 1.9074, "step": 3992 }, { "epoch": 0.22256284488044145, "grad_norm": 0.5076984167098999, "learning_rate": 8.957102734729057e-05, "loss": 1.6518, "step": 3993 }, { "epoch": 0.22261858313360458, "grad_norm": 0.6677889823913574, "learning_rate": 8.956558380906659e-05, "loss": 2.3105, "step": 3994 }, { "epoch": 0.22267432138676774, "grad_norm": 0.5451659560203552, "learning_rate": 8.956013901605299e-05, "loss": 1.7229, "step": 3995 }, { "epoch": 0.2227300596399309, "grad_norm": 0.5508718490600586, "learning_rate": 8.955469296842241e-05, "loss": 1.641, "step": 3996 }, { "epoch": 0.22278579789309402, "grad_norm": 0.5317922234535217, "learning_rate": 8.95492456663476e-05, "loss": 1.6717, "step": 3997 }, { "epoch": 0.22284153614625718, "grad_norm": 0.5446794033050537, "learning_rate": 8.954379711000129e-05, "loss": 1.7382, "step": 3998 }, { "epoch": 0.2228972743994203, "grad_norm": 0.5360628962516785, "learning_rate": 8.95383472995563e-05, "loss": 1.7489, "step": 3999 }, { "epoch": 0.22295301265258347, "grad_norm": 0.5646945238113403, "learning_rate": 8.953289623518545e-05, "loss": 1.7241, "step": 4000 }, { "epoch": 0.22300875090574662, "grad_norm": 0.5079129338264465, "learning_rate": 8.952744391706165e-05, "loss": 1.6683, "step": 4001 }, { "epoch": 0.22306448915890975, "grad_norm": 0.5274491906166077, "learning_rate": 8.952199034535778e-05, "loss": 1.6086, "step": 4002 }, { "epoch": 0.2231202274120729, "grad_norm": 0.5475561618804932, "learning_rate": 8.95165355202468e-05, "loss": 1.9497, "step": 4003 }, { "epoch": 0.22317596566523606, "grad_norm": 0.5520079135894775, "learning_rate": 8.951107944190171e-05, "loss": 1.9735, "step": 4004 }, { "epoch": 0.2232317039183992, "grad_norm": 0.5097377300262451, "learning_rate": 8.950562211049556e-05, "loss": 1.5424, "step": 4005 }, { "epoch": 0.22328744217156235, "grad_norm": 0.5405047535896301, "learning_rate": 8.950016352620139e-05, "loss": 1.6966, "step": 4006 }, { "epoch": 0.22334318042472548, "grad_norm": 0.5254392027854919, "learning_rate": 8.949470368919235e-05, "loss": 1.6651, "step": 4007 }, { "epoch": 0.22339891867788864, "grad_norm": 0.5582841634750366, "learning_rate": 8.948924259964157e-05, "loss": 1.7668, "step": 4008 }, { "epoch": 0.2234546569310518, "grad_norm": 0.5375759601593018, "learning_rate": 8.948378025772227e-05, "loss": 1.7271, "step": 4009 }, { "epoch": 0.22351039518421492, "grad_norm": 0.5370509028434753, "learning_rate": 8.947831666360765e-05, "loss": 1.7851, "step": 4010 }, { "epoch": 0.22356613343737808, "grad_norm": 0.5874437093734741, "learning_rate": 8.947285181747098e-05, "loss": 1.8569, "step": 4011 }, { "epoch": 0.2236218716905412, "grad_norm": 0.566886305809021, "learning_rate": 8.946738571948562e-05, "loss": 1.6114, "step": 4012 }, { "epoch": 0.22367760994370436, "grad_norm": 0.5747610926628113, "learning_rate": 8.946191836982489e-05, "loss": 1.8552, "step": 4013 }, { "epoch": 0.22373334819686752, "grad_norm": 0.5414125919342041, "learning_rate": 8.945644976866219e-05, "loss": 1.5846, "step": 4014 }, { "epoch": 0.22378908645003065, "grad_norm": 0.5818209648132324, "learning_rate": 8.945097991617096e-05, "loss": 1.8305, "step": 4015 }, { "epoch": 0.2238448247031938, "grad_norm": 0.5896833539009094, "learning_rate": 8.944550881252465e-05, "loss": 1.6642, "step": 4016 }, { "epoch": 0.22390056295635694, "grad_norm": 0.5750831365585327, "learning_rate": 8.944003645789678e-05, "loss": 1.7286, "step": 4017 }, { "epoch": 0.2239563012095201, "grad_norm": 0.514319896697998, "learning_rate": 8.943456285246091e-05, "loss": 1.6254, "step": 4018 }, { "epoch": 0.22401203946268325, "grad_norm": 0.48393240571022034, "learning_rate": 8.942908799639062e-05, "loss": 1.4306, "step": 4019 }, { "epoch": 0.22406777771584638, "grad_norm": 0.5655490756034851, "learning_rate": 8.942361188985957e-05, "loss": 1.8686, "step": 4020 }, { "epoch": 0.22412351596900953, "grad_norm": 0.7101614475250244, "learning_rate": 8.941813453304138e-05, "loss": 1.6244, "step": 4021 }, { "epoch": 0.22417925422217266, "grad_norm": 0.5121461153030396, "learning_rate": 8.941265592610979e-05, "loss": 1.5336, "step": 4022 }, { "epoch": 0.22423499247533582, "grad_norm": 0.5167136192321777, "learning_rate": 8.940717606923857e-05, "loss": 1.5896, "step": 4023 }, { "epoch": 0.22429073072849898, "grad_norm": 0.5683619379997253, "learning_rate": 8.940169496260144e-05, "loss": 1.8004, "step": 4024 }, { "epoch": 0.2243464689816621, "grad_norm": 0.5303056240081787, "learning_rate": 8.939621260637231e-05, "loss": 1.6034, "step": 4025 }, { "epoch": 0.22440220723482526, "grad_norm": 0.5514824986457825, "learning_rate": 8.9390729000725e-05, "loss": 1.7099, "step": 4026 }, { "epoch": 0.22445794548798842, "grad_norm": 0.5117455720901489, "learning_rate": 8.938524414583343e-05, "loss": 1.8367, "step": 4027 }, { "epoch": 0.22451368374115155, "grad_norm": 0.5556350946426392, "learning_rate": 8.937975804187156e-05, "loss": 1.6737, "step": 4028 }, { "epoch": 0.2245694219943147, "grad_norm": 0.5511283874511719, "learning_rate": 8.937427068901335e-05, "loss": 1.7541, "step": 4029 }, { "epoch": 0.22462516024747783, "grad_norm": 0.5651305317878723, "learning_rate": 8.936878208743285e-05, "loss": 1.7383, "step": 4030 }, { "epoch": 0.224680898500641, "grad_norm": 0.5192481875419617, "learning_rate": 8.93632922373041e-05, "loss": 1.5392, "step": 4031 }, { "epoch": 0.22473663675380415, "grad_norm": 0.5942433476448059, "learning_rate": 8.935780113880125e-05, "loss": 1.9703, "step": 4032 }, { "epoch": 0.22479237500696728, "grad_norm": 0.5313376188278198, "learning_rate": 8.93523087920984e-05, "loss": 1.7827, "step": 4033 }, { "epoch": 0.22484811326013043, "grad_norm": 0.5464789271354675, "learning_rate": 8.934681519736977e-05, "loss": 1.8036, "step": 4034 }, { "epoch": 0.22490385151329356, "grad_norm": 0.5823439955711365, "learning_rate": 8.934132035478955e-05, "loss": 1.9969, "step": 4035 }, { "epoch": 0.22495958976645672, "grad_norm": 0.5518758296966553, "learning_rate": 8.933582426453205e-05, "loss": 1.7836, "step": 4036 }, { "epoch": 0.22501532801961988, "grad_norm": 0.529864490032196, "learning_rate": 8.933032692677153e-05, "loss": 1.8767, "step": 4037 }, { "epoch": 0.225071066272783, "grad_norm": 0.5450250506401062, "learning_rate": 8.932482834168237e-05, "loss": 1.6584, "step": 4038 }, { "epoch": 0.22512680452594616, "grad_norm": 0.5210989713668823, "learning_rate": 8.931932850943892e-05, "loss": 1.6707, "step": 4039 }, { "epoch": 0.2251825427791093, "grad_norm": 0.5319432616233826, "learning_rate": 8.931382743021562e-05, "loss": 1.5798, "step": 4040 }, { "epoch": 0.22523828103227245, "grad_norm": 0.502311110496521, "learning_rate": 8.930832510418692e-05, "loss": 1.5718, "step": 4041 }, { "epoch": 0.2252940192854356, "grad_norm": 0.5432561635971069, "learning_rate": 8.930282153152734e-05, "loss": 1.7996, "step": 4042 }, { "epoch": 0.22534975753859873, "grad_norm": 0.5339439511299133, "learning_rate": 8.92973167124114e-05, "loss": 1.8783, "step": 4043 }, { "epoch": 0.2254054957917619, "grad_norm": 0.5929161310195923, "learning_rate": 8.92918106470137e-05, "loss": 1.9278, "step": 4044 }, { "epoch": 0.22546123404492502, "grad_norm": 0.5356025695800781, "learning_rate": 8.928630333550886e-05, "loss": 1.6555, "step": 4045 }, { "epoch": 0.22551697229808818, "grad_norm": 0.6173697113990784, "learning_rate": 8.928079477807155e-05, "loss": 1.6326, "step": 4046 }, { "epoch": 0.22557271055125133, "grad_norm": 0.5391169786453247, "learning_rate": 8.927528497487642e-05, "loss": 1.7983, "step": 4047 }, { "epoch": 0.22562844880441446, "grad_norm": 0.541691780090332, "learning_rate": 8.926977392609826e-05, "loss": 1.9013, "step": 4048 }, { "epoch": 0.22568418705757762, "grad_norm": 0.5518167018890381, "learning_rate": 8.926426163191182e-05, "loss": 1.8038, "step": 4049 }, { "epoch": 0.22573992531074077, "grad_norm": 0.5680546164512634, "learning_rate": 8.925874809249193e-05, "loss": 1.893, "step": 4050 }, { "epoch": 0.2257956635639039, "grad_norm": 0.531597912311554, "learning_rate": 8.925323330801345e-05, "loss": 1.6987, "step": 4051 }, { "epoch": 0.22585140181706706, "grad_norm": 0.5005265474319458, "learning_rate": 8.924771727865126e-05, "loss": 1.4703, "step": 4052 }, { "epoch": 0.2259071400702302, "grad_norm": 0.4409901201725006, "learning_rate": 8.924220000458032e-05, "loss": 1.1188, "step": 4053 }, { "epoch": 0.22596287832339335, "grad_norm": 0.5583540797233582, "learning_rate": 8.92366814859756e-05, "loss": 1.8899, "step": 4054 }, { "epoch": 0.2260186165765565, "grad_norm": 0.5503487586975098, "learning_rate": 8.923116172301208e-05, "loss": 1.7006, "step": 4055 }, { "epoch": 0.22607435482971963, "grad_norm": 0.5401930212974548, "learning_rate": 8.922564071586487e-05, "loss": 1.7435, "step": 4056 }, { "epoch": 0.2261300930828828, "grad_norm": 0.5470068454742432, "learning_rate": 8.922011846470903e-05, "loss": 1.7926, "step": 4057 }, { "epoch": 0.22618583133604592, "grad_norm": 0.5655896663665771, "learning_rate": 8.921459496971971e-05, "loss": 1.8028, "step": 4058 }, { "epoch": 0.22624156958920907, "grad_norm": 0.520338237285614, "learning_rate": 8.920907023107208e-05, "loss": 1.7713, "step": 4059 }, { "epoch": 0.22629730784237223, "grad_norm": 0.5628316402435303, "learning_rate": 8.920354424894133e-05, "loss": 1.8308, "step": 4060 }, { "epoch": 0.22635304609553536, "grad_norm": 0.5436638593673706, "learning_rate": 8.919801702350272e-05, "loss": 1.7824, "step": 4061 }, { "epoch": 0.22640878434869852, "grad_norm": 0.6150013208389282, "learning_rate": 8.919248855493156e-05, "loss": 1.6801, "step": 4062 }, { "epoch": 0.22646452260186165, "grad_norm": 0.5413832068443298, "learning_rate": 8.918695884340318e-05, "loss": 1.7266, "step": 4063 }, { "epoch": 0.2265202608550248, "grad_norm": 0.6004742980003357, "learning_rate": 8.918142788909294e-05, "loss": 1.9331, "step": 4064 }, { "epoch": 0.22657599910818796, "grad_norm": 0.5428612232208252, "learning_rate": 8.917589569217624e-05, "loss": 1.8074, "step": 4065 }, { "epoch": 0.2266317373613511, "grad_norm": 0.5653241276741028, "learning_rate": 8.917036225282855e-05, "loss": 1.8719, "step": 4066 }, { "epoch": 0.22668747561451424, "grad_norm": 0.5411580801010132, "learning_rate": 8.916482757122535e-05, "loss": 1.7155, "step": 4067 }, { "epoch": 0.22674321386767737, "grad_norm": 0.5733420252799988, "learning_rate": 8.915929164754215e-05, "loss": 1.8401, "step": 4068 }, { "epoch": 0.22679895212084053, "grad_norm": 0.5870828032493591, "learning_rate": 8.915375448195455e-05, "loss": 1.6825, "step": 4069 }, { "epoch": 0.2268546903740037, "grad_norm": 0.5373989939689636, "learning_rate": 8.914821607463814e-05, "loss": 1.6471, "step": 4070 }, { "epoch": 0.22691042862716682, "grad_norm": 0.5650984048843384, "learning_rate": 8.914267642576857e-05, "loss": 2.0078, "step": 4071 }, { "epoch": 0.22696616688032997, "grad_norm": 0.5647602677345276, "learning_rate": 8.91371355355215e-05, "loss": 1.8949, "step": 4072 }, { "epoch": 0.22702190513349313, "grad_norm": 0.5225738286972046, "learning_rate": 8.913159340407269e-05, "loss": 1.787, "step": 4073 }, { "epoch": 0.22707764338665626, "grad_norm": 0.4927429258823395, "learning_rate": 8.912605003159788e-05, "loss": 1.6022, "step": 4074 }, { "epoch": 0.22713338163981941, "grad_norm": 0.5242977738380432, "learning_rate": 8.912050541827291e-05, "loss": 1.6286, "step": 4075 }, { "epoch": 0.22718911989298254, "grad_norm": 0.5272535681724548, "learning_rate": 8.911495956427357e-05, "loss": 1.8091, "step": 4076 }, { "epoch": 0.2272448581461457, "grad_norm": 0.5660970211029053, "learning_rate": 8.910941246977577e-05, "loss": 1.7518, "step": 4077 }, { "epoch": 0.22730059639930886, "grad_norm": 0.5166184902191162, "learning_rate": 8.910386413495544e-05, "loss": 1.7051, "step": 4078 }, { "epoch": 0.227356334652472, "grad_norm": 0.5315423607826233, "learning_rate": 8.909831455998854e-05, "loss": 1.5667, "step": 4079 }, { "epoch": 0.22741207290563514, "grad_norm": 0.5121911764144897, "learning_rate": 8.909276374505104e-05, "loss": 1.6594, "step": 4080 }, { "epoch": 0.22746781115879827, "grad_norm": 0.5725307464599609, "learning_rate": 8.908721169031901e-05, "loss": 1.7931, "step": 4081 }, { "epoch": 0.22752354941196143, "grad_norm": 0.6129924058914185, "learning_rate": 8.908165839596852e-05, "loss": 2.0539, "step": 4082 }, { "epoch": 0.22757928766512459, "grad_norm": 0.6019653677940369, "learning_rate": 8.907610386217568e-05, "loss": 2.1055, "step": 4083 }, { "epoch": 0.22763502591828771, "grad_norm": 0.5589843392372131, "learning_rate": 8.907054808911668e-05, "loss": 1.8536, "step": 4084 }, { "epoch": 0.22769076417145087, "grad_norm": 0.5030215382575989, "learning_rate": 8.906499107696766e-05, "loss": 1.5868, "step": 4085 }, { "epoch": 0.227746502424614, "grad_norm": 0.5388656258583069, "learning_rate": 8.90594328259049e-05, "loss": 1.611, "step": 4086 }, { "epoch": 0.22780224067777716, "grad_norm": 0.5835996270179749, "learning_rate": 8.905387333610466e-05, "loss": 1.3946, "step": 4087 }, { "epoch": 0.2278579789309403, "grad_norm": 0.5778213739395142, "learning_rate": 8.904831260774327e-05, "loss": 1.9145, "step": 4088 }, { "epoch": 0.22791371718410344, "grad_norm": 0.5685307383537292, "learning_rate": 8.904275064099708e-05, "loss": 1.8516, "step": 4089 }, { "epoch": 0.2279694554372666, "grad_norm": 0.5906243324279785, "learning_rate": 8.903718743604244e-05, "loss": 1.7872, "step": 4090 }, { "epoch": 0.22802519369042973, "grad_norm": 0.5142653584480286, "learning_rate": 8.903162299305585e-05, "loss": 1.5771, "step": 4091 }, { "epoch": 0.22808093194359289, "grad_norm": 0.5752720832824707, "learning_rate": 8.902605731221373e-05, "loss": 1.7952, "step": 4092 }, { "epoch": 0.22813667019675604, "grad_norm": 0.5666948556900024, "learning_rate": 8.902049039369261e-05, "loss": 1.7417, "step": 4093 }, { "epoch": 0.22819240844991917, "grad_norm": 0.5241186618804932, "learning_rate": 8.901492223766906e-05, "loss": 1.6605, "step": 4094 }, { "epoch": 0.22824814670308233, "grad_norm": 0.548561155796051, "learning_rate": 8.900935284431961e-05, "loss": 1.8027, "step": 4095 }, { "epoch": 0.22830388495624548, "grad_norm": 0.5435733795166016, "learning_rate": 8.900378221382097e-05, "loss": 1.6941, "step": 4096 }, { "epoch": 0.2283596232094086, "grad_norm": 0.5925113558769226, "learning_rate": 8.899821034634974e-05, "loss": 1.9182, "step": 4097 }, { "epoch": 0.22841536146257177, "grad_norm": 0.5289484262466431, "learning_rate": 8.899263724208266e-05, "loss": 1.7512, "step": 4098 }, { "epoch": 0.2284710997157349, "grad_norm": 0.5516422390937805, "learning_rate": 8.898706290119647e-05, "loss": 1.8606, "step": 4099 }, { "epoch": 0.22852683796889806, "grad_norm": 0.5578961372375488, "learning_rate": 8.898148732386795e-05, "loss": 1.7136, "step": 4100 }, { "epoch": 0.2285825762220612, "grad_norm": 0.5643925666809082, "learning_rate": 8.897591051027394e-05, "loss": 1.8315, "step": 4101 }, { "epoch": 0.22863831447522434, "grad_norm": 0.4974330961704254, "learning_rate": 8.89703324605913e-05, "loss": 1.4505, "step": 4102 }, { "epoch": 0.2286940527283875, "grad_norm": 0.5316607356071472, "learning_rate": 8.896475317499691e-05, "loss": 1.662, "step": 4103 }, { "epoch": 0.22874979098155063, "grad_norm": 0.48880115151405334, "learning_rate": 8.895917265366773e-05, "loss": 1.6713, "step": 4104 }, { "epoch": 0.22880552923471378, "grad_norm": 0.5647329092025757, "learning_rate": 8.895359089678075e-05, "loss": 1.6645, "step": 4105 }, { "epoch": 0.22886126748787694, "grad_norm": 0.588045060634613, "learning_rate": 8.894800790451298e-05, "loss": 1.7344, "step": 4106 }, { "epoch": 0.22891700574104007, "grad_norm": 0.5201917290687561, "learning_rate": 8.894242367704149e-05, "loss": 1.7137, "step": 4107 }, { "epoch": 0.22897274399420323, "grad_norm": 0.5581889152526855, "learning_rate": 8.893683821454335e-05, "loss": 1.689, "step": 4108 }, { "epoch": 0.22902848224736636, "grad_norm": 0.533208429813385, "learning_rate": 8.893125151719574e-05, "loss": 1.7345, "step": 4109 }, { "epoch": 0.2290842205005295, "grad_norm": 0.5409815907478333, "learning_rate": 8.89256635851758e-05, "loss": 1.6921, "step": 4110 }, { "epoch": 0.22913995875369267, "grad_norm": 0.5371890664100647, "learning_rate": 8.892007441866076e-05, "loss": 1.7282, "step": 4111 }, { "epoch": 0.2291956970068558, "grad_norm": 0.5628719925880432, "learning_rate": 8.89144840178279e-05, "loss": 1.6771, "step": 4112 }, { "epoch": 0.22925143526001895, "grad_norm": 0.5631751418113708, "learning_rate": 8.89088923828545e-05, "loss": 1.9474, "step": 4113 }, { "epoch": 0.22930717351318208, "grad_norm": 0.5464017987251282, "learning_rate": 8.890329951391787e-05, "loss": 1.7969, "step": 4114 }, { "epoch": 0.22936291176634524, "grad_norm": 0.5662708878517151, "learning_rate": 8.88977054111954e-05, "loss": 1.6611, "step": 4115 }, { "epoch": 0.2294186500195084, "grad_norm": 0.607832670211792, "learning_rate": 8.889211007486451e-05, "loss": 1.6558, "step": 4116 }, { "epoch": 0.22947438827267153, "grad_norm": 0.5683878064155579, "learning_rate": 8.888651350510265e-05, "loss": 1.712, "step": 4117 }, { "epoch": 0.22953012652583468, "grad_norm": 0.5762284398078918, "learning_rate": 8.888091570208729e-05, "loss": 1.8012, "step": 4118 }, { "epoch": 0.22958586477899784, "grad_norm": 0.5987650752067566, "learning_rate": 8.887531666599598e-05, "loss": 2.0303, "step": 4119 }, { "epoch": 0.22964160303216097, "grad_norm": 0.5141220092773438, "learning_rate": 8.88697163970063e-05, "loss": 1.6133, "step": 4120 }, { "epoch": 0.22969734128532412, "grad_norm": 0.5571396946907043, "learning_rate": 8.886411489529583e-05, "loss": 1.6117, "step": 4121 }, { "epoch": 0.22975307953848725, "grad_norm": 0.5717421770095825, "learning_rate": 8.885851216104222e-05, "loss": 1.8159, "step": 4122 }, { "epoch": 0.2298088177916504, "grad_norm": 0.5314472913742065, "learning_rate": 8.885290819442319e-05, "loss": 1.8198, "step": 4123 }, { "epoch": 0.22986455604481357, "grad_norm": 0.5760038495063782, "learning_rate": 8.884730299561642e-05, "loss": 1.8839, "step": 4124 }, { "epoch": 0.2299202942979767, "grad_norm": 0.5187524557113647, "learning_rate": 8.88416965647997e-05, "loss": 1.5981, "step": 4125 }, { "epoch": 0.22997603255113985, "grad_norm": 0.5539306998252869, "learning_rate": 8.883608890215083e-05, "loss": 1.5802, "step": 4126 }, { "epoch": 0.23003177080430298, "grad_norm": 0.5440337061882019, "learning_rate": 8.883048000784764e-05, "loss": 1.7884, "step": 4127 }, { "epoch": 0.23008750905746614, "grad_norm": 0.6190919876098633, "learning_rate": 8.882486988206803e-05, "loss": 1.8968, "step": 4128 }, { "epoch": 0.2301432473106293, "grad_norm": 0.5481730103492737, "learning_rate": 8.881925852498991e-05, "loss": 1.5026, "step": 4129 }, { "epoch": 0.23019898556379242, "grad_norm": 0.5920677185058594, "learning_rate": 8.881364593679124e-05, "loss": 2.02, "step": 4130 }, { "epoch": 0.23025472381695558, "grad_norm": 0.580629289150238, "learning_rate": 8.880803211765003e-05, "loss": 1.8447, "step": 4131 }, { "epoch": 0.2303104620701187, "grad_norm": 0.5800060033798218, "learning_rate": 8.880241706774431e-05, "loss": 1.8952, "step": 4132 }, { "epoch": 0.23036620032328187, "grad_norm": 0.5633650422096252, "learning_rate": 8.879680078725214e-05, "loss": 1.79, "step": 4133 }, { "epoch": 0.23042193857644502, "grad_norm": 0.503121554851532, "learning_rate": 8.879118327635165e-05, "loss": 1.31, "step": 4134 }, { "epoch": 0.23047767682960815, "grad_norm": 0.5033895373344421, "learning_rate": 8.8785564535221e-05, "loss": 1.388, "step": 4135 }, { "epoch": 0.2305334150827713, "grad_norm": 0.5460697412490845, "learning_rate": 8.877994456403838e-05, "loss": 1.8455, "step": 4136 }, { "epoch": 0.23058915333593444, "grad_norm": 0.5005971193313599, "learning_rate": 8.877432336298201e-05, "loss": 1.513, "step": 4137 }, { "epoch": 0.2306448915890976, "grad_norm": 0.5267760753631592, "learning_rate": 8.876870093223019e-05, "loss": 1.6449, "step": 4138 }, { "epoch": 0.23070062984226075, "grad_norm": 0.5714914202690125, "learning_rate": 8.87630772719612e-05, "loss": 2.0891, "step": 4139 }, { "epoch": 0.23075636809542388, "grad_norm": 0.5814961194992065, "learning_rate": 8.875745238235341e-05, "loss": 1.6314, "step": 4140 }, { "epoch": 0.23081210634858704, "grad_norm": 0.5237919092178345, "learning_rate": 8.87518262635852e-05, "loss": 1.5437, "step": 4141 }, { "epoch": 0.2308678446017502, "grad_norm": 0.5390162467956543, "learning_rate": 8.8746198915835e-05, "loss": 1.8075, "step": 4142 }, { "epoch": 0.23092358285491332, "grad_norm": 0.5281346440315247, "learning_rate": 8.874057033928128e-05, "loss": 1.7196, "step": 4143 }, { "epoch": 0.23097932110807648, "grad_norm": 0.5769410133361816, "learning_rate": 8.873494053410254e-05, "loss": 1.7623, "step": 4144 }, { "epoch": 0.2310350593612396, "grad_norm": 0.5773770213127136, "learning_rate": 8.872930950047733e-05, "loss": 1.6683, "step": 4145 }, { "epoch": 0.23109079761440277, "grad_norm": 0.5479909777641296, "learning_rate": 8.872367723858422e-05, "loss": 1.8277, "step": 4146 }, { "epoch": 0.23114653586756592, "grad_norm": 0.5558038949966431, "learning_rate": 8.871804374860185e-05, "loss": 1.9413, "step": 4147 }, { "epoch": 0.23120227412072905, "grad_norm": 0.5571532249450684, "learning_rate": 8.871240903070888e-05, "loss": 1.7471, "step": 4148 }, { "epoch": 0.2312580123738922, "grad_norm": 0.63371741771698, "learning_rate": 8.870677308508399e-05, "loss": 2.0195, "step": 4149 }, { "epoch": 0.23131375062705534, "grad_norm": 0.5300304889678955, "learning_rate": 8.870113591190595e-05, "loss": 1.5686, "step": 4150 }, { "epoch": 0.2313694888802185, "grad_norm": 0.6006084680557251, "learning_rate": 8.869549751135352e-05, "loss": 1.7178, "step": 4151 }, { "epoch": 0.23142522713338165, "grad_norm": 0.5930531024932861, "learning_rate": 8.868985788360551e-05, "loss": 1.6998, "step": 4152 }, { "epoch": 0.23148096538654478, "grad_norm": 0.5450523495674133, "learning_rate": 8.868421702884077e-05, "loss": 1.5045, "step": 4153 }, { "epoch": 0.23153670363970794, "grad_norm": 0.519468367099762, "learning_rate": 8.867857494723824e-05, "loss": 1.6035, "step": 4154 }, { "epoch": 0.23159244189287106, "grad_norm": 0.5567930936813354, "learning_rate": 8.867293163897681e-05, "loss": 1.8108, "step": 4155 }, { "epoch": 0.23164818014603422, "grad_norm": 0.5138580799102783, "learning_rate": 8.866728710423547e-05, "loss": 1.5952, "step": 4156 }, { "epoch": 0.23170391839919738, "grad_norm": 0.5398350954055786, "learning_rate": 8.866164134319323e-05, "loss": 1.8621, "step": 4157 }, { "epoch": 0.2317596566523605, "grad_norm": 0.5708958506584167, "learning_rate": 8.865599435602915e-05, "loss": 1.5408, "step": 4158 }, { "epoch": 0.23181539490552366, "grad_norm": 0.62980717420578, "learning_rate": 8.86503461429223e-05, "loss": 2.2779, "step": 4159 }, { "epoch": 0.2318711331586868, "grad_norm": 0.5782346129417419, "learning_rate": 8.86446967040518e-05, "loss": 1.6574, "step": 4160 }, { "epoch": 0.23192687141184995, "grad_norm": 0.5406448841094971, "learning_rate": 8.863904603959686e-05, "loss": 1.6591, "step": 4161 }, { "epoch": 0.2319826096650131, "grad_norm": 0.533285915851593, "learning_rate": 8.863339414973664e-05, "loss": 1.7869, "step": 4162 }, { "epoch": 0.23203834791817624, "grad_norm": 0.5359031558036804, "learning_rate": 8.862774103465042e-05, "loss": 1.8322, "step": 4163 }, { "epoch": 0.2320940861713394, "grad_norm": 0.5305787920951843, "learning_rate": 8.862208669451748e-05, "loss": 1.5869, "step": 4164 }, { "epoch": 0.23214982442450255, "grad_norm": 0.5482218861579895, "learning_rate": 8.861643112951712e-05, "loss": 1.9482, "step": 4165 }, { "epoch": 0.23220556267766568, "grad_norm": 0.5915202498435974, "learning_rate": 8.86107743398287e-05, "loss": 1.9292, "step": 4166 }, { "epoch": 0.23226130093082883, "grad_norm": 0.5175179243087769, "learning_rate": 8.860511632563166e-05, "loss": 1.5677, "step": 4167 }, { "epoch": 0.23231703918399196, "grad_norm": 0.5698404908180237, "learning_rate": 8.85994570871054e-05, "loss": 1.8537, "step": 4168 }, { "epoch": 0.23237277743715512, "grad_norm": 0.5476871728897095, "learning_rate": 8.859379662442941e-05, "loss": 1.7031, "step": 4169 }, { "epoch": 0.23242851569031828, "grad_norm": 0.5611745119094849, "learning_rate": 8.858813493778322e-05, "loss": 1.9365, "step": 4170 }, { "epoch": 0.2324842539434814, "grad_norm": 0.5908852219581604, "learning_rate": 8.858247202734637e-05, "loss": 1.7084, "step": 4171 }, { "epoch": 0.23253999219664456, "grad_norm": 0.5042490363121033, "learning_rate": 8.857680789329844e-05, "loss": 1.6353, "step": 4172 }, { "epoch": 0.2325957304498077, "grad_norm": 0.535675048828125, "learning_rate": 8.85711425358191e-05, "loss": 1.523, "step": 4173 }, { "epoch": 0.23265146870297085, "grad_norm": 0.5372074246406555, "learning_rate": 8.8565475955088e-05, "loss": 1.38, "step": 4174 }, { "epoch": 0.232707206956134, "grad_norm": 0.554507315158844, "learning_rate": 8.855980815128486e-05, "loss": 1.5261, "step": 4175 }, { "epoch": 0.23276294520929713, "grad_norm": 0.5450062155723572, "learning_rate": 8.85541391245894e-05, "loss": 1.7725, "step": 4176 }, { "epoch": 0.2328186834624603, "grad_norm": 0.5121927857398987, "learning_rate": 8.854846887518147e-05, "loss": 1.4857, "step": 4177 }, { "epoch": 0.23287442171562342, "grad_norm": 0.5284276008605957, "learning_rate": 8.854279740324086e-05, "loss": 1.8393, "step": 4178 }, { "epoch": 0.23293015996878658, "grad_norm": 0.5464218258857727, "learning_rate": 8.85371247089474e-05, "loss": 1.6455, "step": 4179 }, { "epoch": 0.23298589822194973, "grad_norm": 0.515756368637085, "learning_rate": 8.853145079248106e-05, "loss": 1.6739, "step": 4180 }, { "epoch": 0.23304163647511286, "grad_norm": 0.5167007446289062, "learning_rate": 8.852577565402175e-05, "loss": 1.6312, "step": 4181 }, { "epoch": 0.23309737472827602, "grad_norm": 0.5863040089607239, "learning_rate": 8.852009929374945e-05, "loss": 1.8519, "step": 4182 }, { "epoch": 0.23315311298143915, "grad_norm": 0.5061371922492981, "learning_rate": 8.851442171184418e-05, "loss": 1.6562, "step": 4183 }, { "epoch": 0.2332088512346023, "grad_norm": 0.5501469969749451, "learning_rate": 8.850874290848603e-05, "loss": 1.7597, "step": 4184 }, { "epoch": 0.23326458948776546, "grad_norm": 0.5034657716751099, "learning_rate": 8.850306288385505e-05, "loss": 1.7217, "step": 4185 }, { "epoch": 0.2333203277409286, "grad_norm": 0.5563570857048035, "learning_rate": 8.849738163813143e-05, "loss": 1.7315, "step": 4186 }, { "epoch": 0.23337606599409175, "grad_norm": 0.5461295247077942, "learning_rate": 8.849169917149531e-05, "loss": 1.7419, "step": 4187 }, { "epoch": 0.2334318042472549, "grad_norm": 0.5286983251571655, "learning_rate": 8.848601548412691e-05, "loss": 1.8088, "step": 4188 }, { "epoch": 0.23348754250041803, "grad_norm": 0.5308994650840759, "learning_rate": 8.848033057620651e-05, "loss": 1.6436, "step": 4189 }, { "epoch": 0.2335432807535812, "grad_norm": 0.5667473673820496, "learning_rate": 8.847464444791435e-05, "loss": 1.6382, "step": 4190 }, { "epoch": 0.23359901900674432, "grad_norm": 0.5432576537132263, "learning_rate": 8.846895709943082e-05, "loss": 1.8993, "step": 4191 }, { "epoch": 0.23365475725990748, "grad_norm": 0.6006546020507812, "learning_rate": 8.846326853093623e-05, "loss": 1.7459, "step": 4192 }, { "epoch": 0.23371049551307063, "grad_norm": 0.5638506412506104, "learning_rate": 8.845757874261104e-05, "loss": 1.618, "step": 4193 }, { "epoch": 0.23376623376623376, "grad_norm": 0.5464212894439697, "learning_rate": 8.845188773463566e-05, "loss": 1.6731, "step": 4194 }, { "epoch": 0.23382197201939692, "grad_norm": 0.5781604051589966, "learning_rate": 8.84461955071906e-05, "loss": 1.8368, "step": 4195 }, { "epoch": 0.23387771027256005, "grad_norm": 0.5308955907821655, "learning_rate": 8.844050206045637e-05, "loss": 1.805, "step": 4196 }, { "epoch": 0.2339334485257232, "grad_norm": 0.5154343843460083, "learning_rate": 8.843480739461356e-05, "loss": 1.4806, "step": 4197 }, { "epoch": 0.23398918677888636, "grad_norm": 0.5477091073989868, "learning_rate": 8.842911150984272e-05, "loss": 1.7506, "step": 4198 }, { "epoch": 0.2340449250320495, "grad_norm": 0.5401119589805603, "learning_rate": 8.842341440632454e-05, "loss": 1.8434, "step": 4199 }, { "epoch": 0.23410066328521265, "grad_norm": 0.5683028697967529, "learning_rate": 8.841771608423967e-05, "loss": 1.6289, "step": 4200 }, { "epoch": 0.23415640153837577, "grad_norm": 0.5980592370033264, "learning_rate": 8.841201654376883e-05, "loss": 1.782, "step": 4201 }, { "epoch": 0.23421213979153893, "grad_norm": 0.5431941151618958, "learning_rate": 8.84063157850928e-05, "loss": 1.7904, "step": 4202 }, { "epoch": 0.2342678780447021, "grad_norm": 0.6389545202255249, "learning_rate": 8.840061380839235e-05, "loss": 1.5506, "step": 4203 }, { "epoch": 0.23432361629786522, "grad_norm": 0.5594901442527771, "learning_rate": 8.839491061384832e-05, "loss": 1.7914, "step": 4204 }, { "epoch": 0.23437935455102837, "grad_norm": 0.5211427211761475, "learning_rate": 8.838920620164157e-05, "loss": 1.5682, "step": 4205 }, { "epoch": 0.23443509280419153, "grad_norm": 0.5244554281234741, "learning_rate": 8.838350057195304e-05, "loss": 1.6598, "step": 4206 }, { "epoch": 0.23449083105735466, "grad_norm": 0.5590394735336304, "learning_rate": 8.837779372496367e-05, "loss": 1.6682, "step": 4207 }, { "epoch": 0.23454656931051782, "grad_norm": 0.5445299744606018, "learning_rate": 8.837208566085441e-05, "loss": 1.8047, "step": 4208 }, { "epoch": 0.23460230756368095, "grad_norm": 0.5209025144577026, "learning_rate": 8.836637637980636e-05, "loss": 1.6225, "step": 4209 }, { "epoch": 0.2346580458168441, "grad_norm": 0.5524556040763855, "learning_rate": 8.836066588200051e-05, "loss": 1.7139, "step": 4210 }, { "epoch": 0.23471378407000726, "grad_norm": 0.5641475915908813, "learning_rate": 8.8354954167618e-05, "loss": 1.7928, "step": 4211 }, { "epoch": 0.2347695223231704, "grad_norm": 0.57920241355896, "learning_rate": 8.834924123683998e-05, "loss": 1.7035, "step": 4212 }, { "epoch": 0.23482526057633354, "grad_norm": 0.5374131202697754, "learning_rate": 8.834352708984762e-05, "loss": 1.6887, "step": 4213 }, { "epoch": 0.23488099882949667, "grad_norm": 0.5739797353744507, "learning_rate": 8.833781172682214e-05, "loss": 1.7476, "step": 4214 }, { "epoch": 0.23493673708265983, "grad_norm": 0.5460266470909119, "learning_rate": 8.833209514794479e-05, "loss": 1.569, "step": 4215 }, { "epoch": 0.234992475335823, "grad_norm": 0.5776944160461426, "learning_rate": 8.832637735339688e-05, "loss": 1.6762, "step": 4216 }, { "epoch": 0.23504821358898612, "grad_norm": 0.593519926071167, "learning_rate": 8.832065834335973e-05, "loss": 1.6699, "step": 4217 }, { "epoch": 0.23510395184214927, "grad_norm": 0.5690516233444214, "learning_rate": 8.831493811801472e-05, "loss": 1.8292, "step": 4218 }, { "epoch": 0.2351596900953124, "grad_norm": 0.5436887741088867, "learning_rate": 8.830921667754328e-05, "loss": 1.6958, "step": 4219 }, { "epoch": 0.23521542834847556, "grad_norm": 0.54433673620224, "learning_rate": 8.830349402212683e-05, "loss": 1.7544, "step": 4220 }, { "epoch": 0.23527116660163871, "grad_norm": 0.5694179534912109, "learning_rate": 8.82977701519469e-05, "loss": 1.676, "step": 4221 }, { "epoch": 0.23532690485480184, "grad_norm": 0.5544805526733398, "learning_rate": 8.829204506718496e-05, "loss": 1.7395, "step": 4222 }, { "epoch": 0.235382643107965, "grad_norm": 0.586121141910553, "learning_rate": 8.828631876802263e-05, "loss": 1.8418, "step": 4223 }, { "epoch": 0.23543838136112813, "grad_norm": 0.5376494526863098, "learning_rate": 8.828059125464148e-05, "loss": 1.5981, "step": 4224 }, { "epoch": 0.2354941196142913, "grad_norm": 0.5764834880828857, "learning_rate": 8.827486252722316e-05, "loss": 1.9862, "step": 4225 }, { "epoch": 0.23554985786745444, "grad_norm": 0.6348791122436523, "learning_rate": 8.826913258594937e-05, "loss": 1.9931, "step": 4226 }, { "epoch": 0.23560559612061757, "grad_norm": 0.5736886262893677, "learning_rate": 8.826340143100182e-05, "loss": 1.8651, "step": 4227 }, { "epoch": 0.23566133437378073, "grad_norm": 0.5940203070640564, "learning_rate": 8.825766906256228e-05, "loss": 1.6837, "step": 4228 }, { "epoch": 0.23571707262694389, "grad_norm": 0.5036525726318359, "learning_rate": 8.825193548081252e-05, "loss": 1.4064, "step": 4229 }, { "epoch": 0.23577281088010701, "grad_norm": 0.5096335411071777, "learning_rate": 8.824620068593439e-05, "loss": 1.7501, "step": 4230 }, { "epoch": 0.23582854913327017, "grad_norm": 0.5474448204040527, "learning_rate": 8.824046467810976e-05, "loss": 1.7263, "step": 4231 }, { "epoch": 0.2358842873864333, "grad_norm": 0.5364823937416077, "learning_rate": 8.823472745752055e-05, "loss": 1.7752, "step": 4232 }, { "epoch": 0.23594002563959646, "grad_norm": 0.5261183977127075, "learning_rate": 8.822898902434873e-05, "loss": 1.7809, "step": 4233 }, { "epoch": 0.2359957638927596, "grad_norm": 0.5040357708930969, "learning_rate": 8.822324937877624e-05, "loss": 1.5033, "step": 4234 }, { "epoch": 0.23605150214592274, "grad_norm": 0.534517228603363, "learning_rate": 8.821750852098515e-05, "loss": 1.735, "step": 4235 }, { "epoch": 0.2361072403990859, "grad_norm": 0.5336146950721741, "learning_rate": 8.821176645115752e-05, "loss": 1.8211, "step": 4236 }, { "epoch": 0.23616297865224903, "grad_norm": 0.5576988458633423, "learning_rate": 8.820602316947544e-05, "loss": 1.6501, "step": 4237 }, { "epoch": 0.23621871690541218, "grad_norm": 0.6140468716621399, "learning_rate": 8.820027867612107e-05, "loss": 1.9297, "step": 4238 }, { "epoch": 0.23627445515857534, "grad_norm": 0.6102777123451233, "learning_rate": 8.819453297127657e-05, "loss": 1.7881, "step": 4239 }, { "epoch": 0.23633019341173847, "grad_norm": 0.5396928787231445, "learning_rate": 8.818878605512418e-05, "loss": 1.7629, "step": 4240 }, { "epoch": 0.23638593166490163, "grad_norm": 0.5476622581481934, "learning_rate": 8.818303792784615e-05, "loss": 1.939, "step": 4241 }, { "epoch": 0.23644166991806476, "grad_norm": 0.5725302696228027, "learning_rate": 8.817728858962478e-05, "loss": 1.7058, "step": 4242 }, { "epoch": 0.2364974081712279, "grad_norm": 0.5522921085357666, "learning_rate": 8.817153804064241e-05, "loss": 1.6284, "step": 4243 }, { "epoch": 0.23655314642439107, "grad_norm": 0.5554071664810181, "learning_rate": 8.81657862810814e-05, "loss": 1.7203, "step": 4244 }, { "epoch": 0.2366088846775542, "grad_norm": 0.6202051639556885, "learning_rate": 8.816003331112419e-05, "loss": 2.0629, "step": 4245 }, { "epoch": 0.23666462293071736, "grad_norm": 0.5647374391555786, "learning_rate": 8.81542791309532e-05, "loss": 1.7256, "step": 4246 }, { "epoch": 0.23672036118388048, "grad_norm": 0.5261071920394897, "learning_rate": 8.814852374075093e-05, "loss": 1.6476, "step": 4247 }, { "epoch": 0.23677609943704364, "grad_norm": 0.5051866173744202, "learning_rate": 8.81427671406999e-05, "loss": 1.57, "step": 4248 }, { "epoch": 0.2368318376902068, "grad_norm": 0.5553388595581055, "learning_rate": 8.81370093309827e-05, "loss": 1.497, "step": 4249 }, { "epoch": 0.23688757594336993, "grad_norm": 0.6159742474555969, "learning_rate": 8.813125031178191e-05, "loss": 1.9324, "step": 4250 }, { "epoch": 0.23694331419653308, "grad_norm": 0.5158507227897644, "learning_rate": 8.812549008328017e-05, "loss": 1.7841, "step": 4251 }, { "epoch": 0.23699905244969624, "grad_norm": 0.5447210073471069, "learning_rate": 8.811972864566018e-05, "loss": 1.6966, "step": 4252 }, { "epoch": 0.23705479070285937, "grad_norm": 0.5115744471549988, "learning_rate": 8.811396599910467e-05, "loss": 1.6449, "step": 4253 }, { "epoch": 0.23711052895602253, "grad_norm": 0.5265628695487976, "learning_rate": 8.810820214379636e-05, "loss": 1.8372, "step": 4254 }, { "epoch": 0.23716626720918565, "grad_norm": 0.5546838045120239, "learning_rate": 8.810243707991805e-05, "loss": 1.9996, "step": 4255 }, { "epoch": 0.2372220054623488, "grad_norm": 0.5540011525154114, "learning_rate": 8.809667080765262e-05, "loss": 1.7619, "step": 4256 }, { "epoch": 0.23727774371551197, "grad_norm": 0.5753396153450012, "learning_rate": 8.809090332718288e-05, "loss": 1.8621, "step": 4257 }, { "epoch": 0.2373334819686751, "grad_norm": 0.5528965592384338, "learning_rate": 8.808513463869179e-05, "loss": 1.6625, "step": 4258 }, { "epoch": 0.23738922022183825, "grad_norm": 0.5542230010032654, "learning_rate": 8.80793647423623e-05, "loss": 1.5929, "step": 4259 }, { "epoch": 0.23744495847500138, "grad_norm": 0.6071727275848389, "learning_rate": 8.807359363837734e-05, "loss": 1.7551, "step": 4260 }, { "epoch": 0.23750069672816454, "grad_norm": 0.5722533464431763, "learning_rate": 8.806782132691999e-05, "loss": 1.9474, "step": 4261 }, { "epoch": 0.2375564349813277, "grad_norm": 0.5362473130226135, "learning_rate": 8.806204780817331e-05, "loss": 1.6914, "step": 4262 }, { "epoch": 0.23761217323449083, "grad_norm": 0.519892156124115, "learning_rate": 8.805627308232036e-05, "loss": 1.4148, "step": 4263 }, { "epoch": 0.23766791148765398, "grad_norm": 0.5315799713134766, "learning_rate": 8.805049714954434e-05, "loss": 1.8304, "step": 4264 }, { "epoch": 0.2377236497408171, "grad_norm": 0.5093747973442078, "learning_rate": 8.804472001002839e-05, "loss": 1.4575, "step": 4265 }, { "epoch": 0.23777938799398027, "grad_norm": 0.5335510969161987, "learning_rate": 8.803894166395574e-05, "loss": 1.515, "step": 4266 }, { "epoch": 0.23783512624714342, "grad_norm": 0.5546256303787231, "learning_rate": 8.803316211150964e-05, "loss": 1.657, "step": 4267 }, { "epoch": 0.23789086450030655, "grad_norm": 0.5256768465042114, "learning_rate": 8.802738135287338e-05, "loss": 1.5228, "step": 4268 }, { "epoch": 0.2379466027534697, "grad_norm": 0.5291659235954285, "learning_rate": 8.802159938823031e-05, "loss": 1.5667, "step": 4269 }, { "epoch": 0.23800234100663284, "grad_norm": 0.5859813094139099, "learning_rate": 8.801581621776379e-05, "loss": 1.9385, "step": 4270 }, { "epoch": 0.238058079259796, "grad_norm": 0.6084904670715332, "learning_rate": 8.801003184165722e-05, "loss": 1.9139, "step": 4271 }, { "epoch": 0.23811381751295915, "grad_norm": 0.5245258212089539, "learning_rate": 8.800424626009407e-05, "loss": 1.8107, "step": 4272 }, { "epoch": 0.23816955576612228, "grad_norm": 0.5182399749755859, "learning_rate": 8.799845947325777e-05, "loss": 1.72, "step": 4273 }, { "epoch": 0.23822529401928544, "grad_norm": 0.5252156257629395, "learning_rate": 8.799267148133192e-05, "loss": 1.6711, "step": 4274 }, { "epoch": 0.2382810322724486, "grad_norm": 0.49757280945777893, "learning_rate": 8.798688228450002e-05, "loss": 1.5716, "step": 4275 }, { "epoch": 0.23833677052561172, "grad_norm": 0.5291200876235962, "learning_rate": 8.798109188294572e-05, "loss": 1.6498, "step": 4276 }, { "epoch": 0.23839250877877488, "grad_norm": 0.5830451250076294, "learning_rate": 8.797530027685261e-05, "loss": 1.8761, "step": 4277 }, { "epoch": 0.238448247031938, "grad_norm": 0.5453559756278992, "learning_rate": 8.796950746640439e-05, "loss": 1.6984, "step": 4278 }, { "epoch": 0.23850398528510117, "grad_norm": 0.5068353414535522, "learning_rate": 8.796371345178476e-05, "loss": 1.3414, "step": 4279 }, { "epoch": 0.23855972353826432, "grad_norm": 0.5567828416824341, "learning_rate": 8.79579182331775e-05, "loss": 1.716, "step": 4280 }, { "epoch": 0.23861546179142745, "grad_norm": 0.5418634414672852, "learning_rate": 8.795212181076638e-05, "loss": 1.6889, "step": 4281 }, { "epoch": 0.2386712000445906, "grad_norm": 0.5291851162910461, "learning_rate": 8.794632418473522e-05, "loss": 1.6941, "step": 4282 }, { "epoch": 0.23872693829775374, "grad_norm": 0.5776856541633606, "learning_rate": 8.794052535526792e-05, "loss": 1.756, "step": 4283 }, { "epoch": 0.2387826765509169, "grad_norm": 0.5982547998428345, "learning_rate": 8.793472532254836e-05, "loss": 1.8349, "step": 4284 }, { "epoch": 0.23883841480408005, "grad_norm": 0.5404837727546692, "learning_rate": 8.792892408676048e-05, "loss": 1.6617, "step": 4285 }, { "epoch": 0.23889415305724318, "grad_norm": 0.5049643516540527, "learning_rate": 8.792312164808827e-05, "loss": 1.5132, "step": 4286 }, { "epoch": 0.23894989131040634, "grad_norm": 0.5474380254745483, "learning_rate": 8.791731800671575e-05, "loss": 1.7937, "step": 4287 }, { "epoch": 0.23900562956356947, "grad_norm": 0.5853757858276367, "learning_rate": 8.791151316282698e-05, "loss": 1.8488, "step": 4288 }, { "epoch": 0.23906136781673262, "grad_norm": 0.574220597743988, "learning_rate": 8.790570711660604e-05, "loss": 1.7211, "step": 4289 }, { "epoch": 0.23911710606989578, "grad_norm": 0.580944836139679, "learning_rate": 8.789989986823707e-05, "loss": 1.6015, "step": 4290 }, { "epoch": 0.2391728443230589, "grad_norm": 0.5716251730918884, "learning_rate": 8.789409141790426e-05, "loss": 1.7375, "step": 4291 }, { "epoch": 0.23922858257622207, "grad_norm": 0.5204554200172424, "learning_rate": 8.788828176579182e-05, "loss": 1.7231, "step": 4292 }, { "epoch": 0.2392843208293852, "grad_norm": 0.529961884021759, "learning_rate": 8.788247091208397e-05, "loss": 1.7355, "step": 4293 }, { "epoch": 0.23934005908254835, "grad_norm": 0.5950244665145874, "learning_rate": 8.787665885696502e-05, "loss": 2.0786, "step": 4294 }, { "epoch": 0.2393957973357115, "grad_norm": 0.5200558304786682, "learning_rate": 8.78708456006193e-05, "loss": 1.6045, "step": 4295 }, { "epoch": 0.23945153558887464, "grad_norm": 0.5256621241569519, "learning_rate": 8.786503114323113e-05, "loss": 1.6679, "step": 4296 }, { "epoch": 0.2395072738420378, "grad_norm": 0.5340785980224609, "learning_rate": 8.785921548498494e-05, "loss": 1.6646, "step": 4297 }, { "epoch": 0.23956301209520095, "grad_norm": 0.5381552577018738, "learning_rate": 8.785339862606521e-05, "loss": 1.7888, "step": 4298 }, { "epoch": 0.23961875034836408, "grad_norm": 0.6692368984222412, "learning_rate": 8.784758056665634e-05, "loss": 1.9363, "step": 4299 }, { "epoch": 0.23967448860152724, "grad_norm": 0.5429602265357971, "learning_rate": 8.784176130694289e-05, "loss": 1.8477, "step": 4300 }, { "epoch": 0.23973022685469036, "grad_norm": 0.5760909914970398, "learning_rate": 8.783594084710941e-05, "loss": 1.9106, "step": 4301 }, { "epoch": 0.23978596510785352, "grad_norm": 0.5410770773887634, "learning_rate": 8.783011918734048e-05, "loss": 1.7685, "step": 4302 }, { "epoch": 0.23984170336101668, "grad_norm": 0.6343144774436951, "learning_rate": 8.782429632782073e-05, "loss": 1.6641, "step": 4303 }, { "epoch": 0.2398974416141798, "grad_norm": 0.5951781868934631, "learning_rate": 8.781847226873484e-05, "loss": 1.8908, "step": 4304 }, { "epoch": 0.23995317986734296, "grad_norm": 0.5187268257141113, "learning_rate": 8.78126470102675e-05, "loss": 1.5571, "step": 4305 }, { "epoch": 0.2400089181205061, "grad_norm": 0.5376867651939392, "learning_rate": 8.780682055260348e-05, "loss": 1.514, "step": 4306 }, { "epoch": 0.24006465637366925, "grad_norm": 0.5534177422523499, "learning_rate": 8.780099289592751e-05, "loss": 1.581, "step": 4307 }, { "epoch": 0.2401203946268324, "grad_norm": 0.5672261714935303, "learning_rate": 8.779516404042446e-05, "loss": 1.7344, "step": 4308 }, { "epoch": 0.24017613287999554, "grad_norm": 0.5509449243545532, "learning_rate": 8.778933398627915e-05, "loss": 1.7162, "step": 4309 }, { "epoch": 0.2402318711331587, "grad_norm": 0.5842772126197815, "learning_rate": 8.778350273367653e-05, "loss": 1.7958, "step": 4310 }, { "epoch": 0.24028760938632182, "grad_norm": 0.50345379114151, "learning_rate": 8.777767028280145e-05, "loss": 1.4958, "step": 4311 }, { "epoch": 0.24034334763948498, "grad_norm": 0.5337620377540588, "learning_rate": 8.777183663383896e-05, "loss": 1.6876, "step": 4312 }, { "epoch": 0.24039908589264813, "grad_norm": 0.5183177590370178, "learning_rate": 8.776600178697402e-05, "loss": 1.7538, "step": 4313 }, { "epoch": 0.24045482414581126, "grad_norm": 0.5510264039039612, "learning_rate": 8.776016574239171e-05, "loss": 1.7722, "step": 4314 }, { "epoch": 0.24051056239897442, "grad_norm": 0.5638562440872192, "learning_rate": 8.77543285002771e-05, "loss": 1.8447, "step": 4315 }, { "epoch": 0.24056630065213755, "grad_norm": 0.6304780840873718, "learning_rate": 8.774849006081529e-05, "loss": 2.111, "step": 4316 }, { "epoch": 0.2406220389053007, "grad_norm": 0.5731822848320007, "learning_rate": 8.774265042419148e-05, "loss": 1.9022, "step": 4317 }, { "epoch": 0.24067777715846386, "grad_norm": 0.5105111002922058, "learning_rate": 8.773680959059086e-05, "loss": 1.4723, "step": 4318 }, { "epoch": 0.240733515411627, "grad_norm": 0.5694832801818848, "learning_rate": 8.773096756019866e-05, "loss": 1.8138, "step": 4319 }, { "epoch": 0.24078925366479015, "grad_norm": 0.5039976835250854, "learning_rate": 8.772512433320014e-05, "loss": 1.5152, "step": 4320 }, { "epoch": 0.2408449919179533, "grad_norm": 0.5481953024864197, "learning_rate": 8.771927990978063e-05, "loss": 1.7373, "step": 4321 }, { "epoch": 0.24090073017111643, "grad_norm": 0.5046210885047913, "learning_rate": 8.771343429012549e-05, "loss": 1.3736, "step": 4322 }, { "epoch": 0.2409564684242796, "grad_norm": 0.5144927501678467, "learning_rate": 8.77075874744201e-05, "loss": 1.7854, "step": 4323 }, { "epoch": 0.24101220667744272, "grad_norm": 0.5863038301467896, "learning_rate": 8.770173946284987e-05, "loss": 1.9596, "step": 4324 }, { "epoch": 0.24106794493060588, "grad_norm": 0.5546390414237976, "learning_rate": 8.76958902556003e-05, "loss": 1.6905, "step": 4325 }, { "epoch": 0.24112368318376903, "grad_norm": 0.5615156888961792, "learning_rate": 8.769003985285686e-05, "loss": 1.8015, "step": 4326 }, { "epoch": 0.24117942143693216, "grad_norm": 0.5112027525901794, "learning_rate": 8.76841882548051e-05, "loss": 1.7408, "step": 4327 }, { "epoch": 0.24123515969009532, "grad_norm": 0.523891270160675, "learning_rate": 8.767833546163062e-05, "loss": 1.6473, "step": 4328 }, { "epoch": 0.24129089794325845, "grad_norm": 0.5263711214065552, "learning_rate": 8.767248147351902e-05, "loss": 1.724, "step": 4329 }, { "epoch": 0.2413466361964216, "grad_norm": 0.5724520683288574, "learning_rate": 8.766662629065594e-05, "loss": 1.7469, "step": 4330 }, { "epoch": 0.24140237444958476, "grad_norm": 0.5471790432929993, "learning_rate": 8.76607699132271e-05, "loss": 1.7262, "step": 4331 }, { "epoch": 0.2414581127027479, "grad_norm": 0.6246349215507507, "learning_rate": 8.76549123414182e-05, "loss": 2.0055, "step": 4332 }, { "epoch": 0.24151385095591105, "grad_norm": 0.5492396354675293, "learning_rate": 8.764905357541505e-05, "loss": 1.7602, "step": 4333 }, { "epoch": 0.24156958920907418, "grad_norm": 0.5340796113014221, "learning_rate": 8.76431936154034e-05, "loss": 1.7666, "step": 4334 }, { "epoch": 0.24162532746223733, "grad_norm": 0.5311720967292786, "learning_rate": 8.763733246156913e-05, "loss": 1.5892, "step": 4335 }, { "epoch": 0.2416810657154005, "grad_norm": 0.5926803350448608, "learning_rate": 8.763147011409811e-05, "loss": 1.9398, "step": 4336 }, { "epoch": 0.24173680396856362, "grad_norm": 0.5204175710678101, "learning_rate": 8.762560657317629e-05, "loss": 1.4019, "step": 4337 }, { "epoch": 0.24179254222172678, "grad_norm": 0.5834428071975708, "learning_rate": 8.761974183898957e-05, "loss": 1.7063, "step": 4338 }, { "epoch": 0.2418482804748899, "grad_norm": 0.5776971578598022, "learning_rate": 8.7613875911724e-05, "loss": 1.7957, "step": 4339 }, { "epoch": 0.24190401872805306, "grad_norm": 0.5160627365112305, "learning_rate": 8.760800879156558e-05, "loss": 1.5686, "step": 4340 }, { "epoch": 0.24195975698121622, "grad_norm": 0.5783469676971436, "learning_rate": 8.760214047870039e-05, "loss": 2.0046, "step": 4341 }, { "epoch": 0.24201549523437935, "grad_norm": 0.5625891089439392, "learning_rate": 8.759627097331455e-05, "loss": 1.6902, "step": 4342 }, { "epoch": 0.2420712334875425, "grad_norm": 0.5326409935951233, "learning_rate": 8.759040027559418e-05, "loss": 1.9046, "step": 4343 }, { "epoch": 0.24212697174070566, "grad_norm": 0.5869771838188171, "learning_rate": 8.758452838572551e-05, "loss": 1.7593, "step": 4344 }, { "epoch": 0.2421827099938688, "grad_norm": 0.6008633971214294, "learning_rate": 8.75786553038947e-05, "loss": 2.0021, "step": 4345 }, { "epoch": 0.24223844824703195, "grad_norm": 0.48187822103500366, "learning_rate": 8.757278103028806e-05, "loss": 1.1718, "step": 4346 }, { "epoch": 0.24229418650019507, "grad_norm": 0.5490634441375732, "learning_rate": 8.756690556509186e-05, "loss": 1.6083, "step": 4347 }, { "epoch": 0.24234992475335823, "grad_norm": 0.5408362746238708, "learning_rate": 8.756102890849246e-05, "loss": 1.6982, "step": 4348 }, { "epoch": 0.2424056630065214, "grad_norm": 0.5706157684326172, "learning_rate": 8.75551510606762e-05, "loss": 1.8505, "step": 4349 }, { "epoch": 0.24246140125968452, "grad_norm": 0.573557436466217, "learning_rate": 8.754927202182953e-05, "loss": 1.8455, "step": 4350 }, { "epoch": 0.24251713951284767, "grad_norm": 0.5338667035102844, "learning_rate": 8.754339179213886e-05, "loss": 1.5964, "step": 4351 }, { "epoch": 0.2425728777660108, "grad_norm": 0.5258156061172485, "learning_rate": 8.753751037179073e-05, "loss": 1.7428, "step": 4352 }, { "epoch": 0.24262861601917396, "grad_norm": 0.5279545783996582, "learning_rate": 8.75316277609716e-05, "loss": 1.7279, "step": 4353 }, { "epoch": 0.24268435427233712, "grad_norm": 0.5074349045753479, "learning_rate": 8.752574395986806e-05, "loss": 1.508, "step": 4354 }, { "epoch": 0.24274009252550025, "grad_norm": 0.5738914012908936, "learning_rate": 8.751985896866672e-05, "loss": 1.8978, "step": 4355 }, { "epoch": 0.2427958307786634, "grad_norm": 0.6244510412216187, "learning_rate": 8.75139727875542e-05, "loss": 1.94, "step": 4356 }, { "epoch": 0.24285156903182653, "grad_norm": 0.5642906427383423, "learning_rate": 8.75080854167172e-05, "loss": 1.9239, "step": 4357 }, { "epoch": 0.2429073072849897, "grad_norm": 0.5614916086196899, "learning_rate": 8.75021968563424e-05, "loss": 1.6965, "step": 4358 }, { "epoch": 0.24296304553815284, "grad_norm": 0.5800240635871887, "learning_rate": 8.749630710661658e-05, "loss": 1.7979, "step": 4359 }, { "epoch": 0.24301878379131597, "grad_norm": 0.5255259871482849, "learning_rate": 8.749041616772653e-05, "loss": 1.753, "step": 4360 }, { "epoch": 0.24307452204447913, "grad_norm": 0.5205305814743042, "learning_rate": 8.748452403985905e-05, "loss": 1.518, "step": 4361 }, { "epoch": 0.24313026029764226, "grad_norm": 0.5705804824829102, "learning_rate": 8.747863072320102e-05, "loss": 1.7267, "step": 4362 }, { "epoch": 0.24318599855080542, "grad_norm": 0.5209723114967346, "learning_rate": 8.747273621793932e-05, "loss": 1.6697, "step": 4363 }, { "epoch": 0.24324173680396857, "grad_norm": 0.5164801478385925, "learning_rate": 8.746684052426093e-05, "loss": 1.628, "step": 4364 }, { "epoch": 0.2432974750571317, "grad_norm": 0.6018537282943726, "learning_rate": 8.74609436423528e-05, "loss": 1.8611, "step": 4365 }, { "epoch": 0.24335321331029486, "grad_norm": 0.5693862438201904, "learning_rate": 8.745504557240195e-05, "loss": 1.8587, "step": 4366 }, { "epoch": 0.24340895156345801, "grad_norm": 0.5834870338439941, "learning_rate": 8.744914631459544e-05, "loss": 1.82, "step": 4367 }, { "epoch": 0.24346468981662114, "grad_norm": 0.5055362582206726, "learning_rate": 8.744324586912033e-05, "loss": 1.5662, "step": 4368 }, { "epoch": 0.2435204280697843, "grad_norm": 0.5283217430114746, "learning_rate": 8.74373442361638e-05, "loss": 1.618, "step": 4369 }, { "epoch": 0.24357616632294743, "grad_norm": 0.5035987496376038, "learning_rate": 8.743144141591297e-05, "loss": 1.6436, "step": 4370 }, { "epoch": 0.2436319045761106, "grad_norm": 0.5793476700782776, "learning_rate": 8.742553740855506e-05, "loss": 1.9764, "step": 4371 }, { "epoch": 0.24368764282927374, "grad_norm": 0.5031444430351257, "learning_rate": 8.741963221427732e-05, "loss": 1.4643, "step": 4372 }, { "epoch": 0.24374338108243687, "grad_norm": 0.5925171971321106, "learning_rate": 8.7413725833267e-05, "loss": 1.7132, "step": 4373 }, { "epoch": 0.24379911933560003, "grad_norm": 0.5252764225006104, "learning_rate": 8.740781826571144e-05, "loss": 1.613, "step": 4374 }, { "epoch": 0.24385485758876316, "grad_norm": 0.5435476899147034, "learning_rate": 8.740190951179799e-05, "loss": 1.7225, "step": 4375 }, { "epoch": 0.24391059584192631, "grad_norm": 0.5505743026733398, "learning_rate": 8.739599957171404e-05, "loss": 1.7796, "step": 4376 }, { "epoch": 0.24396633409508947, "grad_norm": 0.5711907148361206, "learning_rate": 8.7390088445647e-05, "loss": 1.8918, "step": 4377 }, { "epoch": 0.2440220723482526, "grad_norm": 0.617215096950531, "learning_rate": 8.738417613378439e-05, "loss": 1.6408, "step": 4378 }, { "epoch": 0.24407781060141576, "grad_norm": 0.5194396376609802, "learning_rate": 8.737826263631363e-05, "loss": 1.5007, "step": 4379 }, { "epoch": 0.24413354885457889, "grad_norm": NaN, "learning_rate": 8.737826263631363e-05, "loss": 1.8818, "step": 4380 }, { "epoch": 0.24418928710774204, "grad_norm": 0.5449255704879761, "learning_rate": 8.737234795342234e-05, "loss": 1.6008, "step": 4381 }, { "epoch": 0.2442450253609052, "grad_norm": 0.517254650592804, "learning_rate": 8.736643208529807e-05, "loss": 1.5589, "step": 4382 }, { "epoch": 0.24430076361406833, "grad_norm": 0.5613778829574585, "learning_rate": 8.736051503212843e-05, "loss": 1.8349, "step": 4383 }, { "epoch": 0.24435650186723148, "grad_norm": 0.5578374266624451, "learning_rate": 8.735459679410108e-05, "loss": 1.6444, "step": 4384 }, { "epoch": 0.2444122401203946, "grad_norm": 0.5179364681243896, "learning_rate": 8.734867737140371e-05, "loss": 1.5685, "step": 4385 }, { "epoch": 0.24446797837355777, "grad_norm": 0.5676231980323792, "learning_rate": 8.734275676422406e-05, "loss": 1.7138, "step": 4386 }, { "epoch": 0.24452371662672093, "grad_norm": 0.5979743599891663, "learning_rate": 8.73368349727499e-05, "loss": 1.8035, "step": 4387 }, { "epoch": 0.24457945487988406, "grad_norm": 0.566631555557251, "learning_rate": 8.733091199716899e-05, "loss": 1.7692, "step": 4388 }, { "epoch": 0.2446351931330472, "grad_norm": 0.5594037175178528, "learning_rate": 8.732498783766923e-05, "loss": 1.7145, "step": 4389 }, { "epoch": 0.24469093138621037, "grad_norm": 0.47728872299194336, "learning_rate": 8.731906249443847e-05, "loss": 1.3759, "step": 4390 }, { "epoch": 0.2447466696393735, "grad_norm": 0.5077241063117981, "learning_rate": 8.731313596766461e-05, "loss": 1.6403, "step": 4391 }, { "epoch": 0.24480240789253666, "grad_norm": 0.51840740442276, "learning_rate": 8.730720825753567e-05, "loss": 1.7304, "step": 4392 }, { "epoch": 0.24485814614569978, "grad_norm": 0.555458664894104, "learning_rate": 8.730127936423957e-05, "loss": 1.7039, "step": 4393 }, { "epoch": 0.24491388439886294, "grad_norm": 0.530720591545105, "learning_rate": 8.729534928796438e-05, "loss": 1.87, "step": 4394 }, { "epoch": 0.2449696226520261, "grad_norm": 0.5183333158493042, "learning_rate": 8.728941802889816e-05, "loss": 1.6194, "step": 4395 }, { "epoch": 0.24502536090518923, "grad_norm": 0.5418990254402161, "learning_rate": 8.728348558722901e-05, "loss": 1.6804, "step": 4396 }, { "epoch": 0.24508109915835238, "grad_norm": 0.5377148985862732, "learning_rate": 8.727755196314507e-05, "loss": 1.5289, "step": 4397 }, { "epoch": 0.2451368374115155, "grad_norm": 0.5729206800460815, "learning_rate": 8.727161715683452e-05, "loss": 1.7488, "step": 4398 }, { "epoch": 0.24519257566467867, "grad_norm": 0.5957255363464355, "learning_rate": 8.726568116848559e-05, "loss": 1.4552, "step": 4399 }, { "epoch": 0.24524831391784183, "grad_norm": 0.6279282569885254, "learning_rate": 8.725974399828653e-05, "loss": 1.8822, "step": 4400 }, { "epoch": 0.24530405217100495, "grad_norm": 0.5379980802536011, "learning_rate": 8.725380564642563e-05, "loss": 1.7286, "step": 4401 }, { "epoch": 0.2453597904241681, "grad_norm": 0.506988525390625, "learning_rate": 8.724786611309123e-05, "loss": 1.5182, "step": 4402 }, { "epoch": 0.24541552867733124, "grad_norm": 0.5806999206542969, "learning_rate": 8.724192539847167e-05, "loss": 1.7967, "step": 4403 }, { "epoch": 0.2454712669304944, "grad_norm": 0.6368009448051453, "learning_rate": 8.723598350275537e-05, "loss": 1.8081, "step": 4404 }, { "epoch": 0.24552700518365755, "grad_norm": 0.6073201894760132, "learning_rate": 8.723004042613079e-05, "loss": 1.8369, "step": 4405 }, { "epoch": 0.24558274343682068, "grad_norm": 0.5500373244285583, "learning_rate": 8.722409616878637e-05, "loss": 1.6556, "step": 4406 }, { "epoch": 0.24563848168998384, "grad_norm": 0.5122720003128052, "learning_rate": 8.721815073091068e-05, "loss": 1.5745, "step": 4407 }, { "epoch": 0.24569421994314697, "grad_norm": 0.5759167671203613, "learning_rate": 8.721220411269222e-05, "loss": 1.8282, "step": 4408 }, { "epoch": 0.24574995819631013, "grad_norm": 0.5656915307044983, "learning_rate": 8.720625631431963e-05, "loss": 1.6782, "step": 4409 }, { "epoch": 0.24580569644947328, "grad_norm": 0.5352250933647156, "learning_rate": 8.72003073359815e-05, "loss": 1.7703, "step": 4410 }, { "epoch": 0.2458614347026364, "grad_norm": 0.6013755798339844, "learning_rate": 8.719435717786653e-05, "loss": 1.4931, "step": 4411 }, { "epoch": 0.24591717295579957, "grad_norm": 0.5831592082977295, "learning_rate": 8.718840584016339e-05, "loss": 1.8267, "step": 4412 }, { "epoch": 0.24597291120896272, "grad_norm": 0.5686485767364502, "learning_rate": 8.718245332306086e-05, "loss": 1.7073, "step": 4413 }, { "epoch": 0.24602864946212585, "grad_norm": 0.5540615320205688, "learning_rate": 8.717649962674768e-05, "loss": 1.7481, "step": 4414 }, { "epoch": 0.246084387715289, "grad_norm": 0.4984779953956604, "learning_rate": 8.71705447514127e-05, "loss": 1.4674, "step": 4415 }, { "epoch": 0.24614012596845214, "grad_norm": 0.5658791065216064, "learning_rate": 8.716458869724475e-05, "loss": 1.7044, "step": 4416 }, { "epoch": 0.2461958642216153, "grad_norm": 0.6222524046897888, "learning_rate": 8.715863146443273e-05, "loss": 1.9216, "step": 4417 }, { "epoch": 0.24625160247477845, "grad_norm": 0.5234952569007874, "learning_rate": 8.715267305316558e-05, "loss": 1.3814, "step": 4418 }, { "epoch": 0.24630734072794158, "grad_norm": 0.5298272371292114, "learning_rate": 8.714671346363226e-05, "loss": 1.7245, "step": 4419 }, { "epoch": 0.24636307898110474, "grad_norm": 0.5426690578460693, "learning_rate": 8.714075269602176e-05, "loss": 1.7225, "step": 4420 }, { "epoch": 0.24641881723426787, "grad_norm": 0.5064488649368286, "learning_rate": 8.713479075052312e-05, "loss": 1.637, "step": 4421 }, { "epoch": 0.24647455548743102, "grad_norm": 0.6294771432876587, "learning_rate": 8.712882762732543e-05, "loss": 2.0957, "step": 4422 }, { "epoch": 0.24653029374059418, "grad_norm": 0.5518829226493835, "learning_rate": 8.712286332661783e-05, "loss": 1.8551, "step": 4423 }, { "epoch": 0.2465860319937573, "grad_norm": 0.5775428414344788, "learning_rate": 8.711689784858943e-05, "loss": 2.0364, "step": 4424 }, { "epoch": 0.24664177024692047, "grad_norm": 0.585757851600647, "learning_rate": 8.711093119342944e-05, "loss": 1.9078, "step": 4425 }, { "epoch": 0.2466975085000836, "grad_norm": 0.49010977149009705, "learning_rate": 8.710496336132707e-05, "loss": 1.7235, "step": 4426 }, { "epoch": 0.24675324675324675, "grad_norm": 0.4925966262817383, "learning_rate": 8.709899435247162e-05, "loss": 1.5281, "step": 4427 }, { "epoch": 0.2468089850064099, "grad_norm": 0.5210297107696533, "learning_rate": 8.709302416705235e-05, "loss": 1.6194, "step": 4428 }, { "epoch": 0.24686472325957304, "grad_norm": 0.5486511588096619, "learning_rate": 8.708705280525863e-05, "loss": 1.8987, "step": 4429 }, { "epoch": 0.2469204615127362, "grad_norm": 0.5911165475845337, "learning_rate": 8.708108026727983e-05, "loss": 1.8762, "step": 4430 }, { "epoch": 0.24697619976589932, "grad_norm": 0.557861864566803, "learning_rate": 8.707510655330535e-05, "loss": 1.7246, "step": 4431 }, { "epoch": 0.24703193801906248, "grad_norm": 0.5598505139350891, "learning_rate": 8.706913166352468e-05, "loss": 1.7012, "step": 4432 }, { "epoch": 0.24708767627222564, "grad_norm": 0.523493230342865, "learning_rate": 8.706315559812725e-05, "loss": 1.6476, "step": 4433 }, { "epoch": 0.24714341452538877, "grad_norm": 0.5727233290672302, "learning_rate": 8.705717835730263e-05, "loss": 1.7085, "step": 4434 }, { "epoch": 0.24719915277855192, "grad_norm": 0.5231149792671204, "learning_rate": 8.705119994124038e-05, "loss": 1.6553, "step": 4435 }, { "epoch": 0.24725489103171508, "grad_norm": 0.5807697176933289, "learning_rate": 8.70452203501301e-05, "loss": 1.9495, "step": 4436 }, { "epoch": 0.2473106292848782, "grad_norm": 0.538212239742279, "learning_rate": 8.703923958416141e-05, "loss": 1.6201, "step": 4437 }, { "epoch": 0.24736636753804137, "grad_norm": 0.5267363786697388, "learning_rate": 8.703325764352397e-05, "loss": 1.6372, "step": 4438 }, { "epoch": 0.2474221057912045, "grad_norm": 0.5570881962776184, "learning_rate": 8.702727452840753e-05, "loss": 1.7135, "step": 4439 }, { "epoch": 0.24747784404436765, "grad_norm": 0.5702007412910461, "learning_rate": 8.702129023900184e-05, "loss": 1.7636, "step": 4440 }, { "epoch": 0.2475335822975308, "grad_norm": 0.5725893974304199, "learning_rate": 8.701530477549666e-05, "loss": 1.7144, "step": 4441 }, { "epoch": 0.24758932055069394, "grad_norm": 0.5385577082633972, "learning_rate": 8.700931813808182e-05, "loss": 1.7915, "step": 4442 }, { "epoch": 0.2476450588038571, "grad_norm": 0.625249981880188, "learning_rate": 8.700333032694721e-05, "loss": 1.8956, "step": 4443 }, { "epoch": 0.24770079705702022, "grad_norm": 0.6568485498428345, "learning_rate": 8.69973413422827e-05, "loss": 2.0, "step": 4444 }, { "epoch": 0.24775653531018338, "grad_norm": 0.5595792531967163, "learning_rate": 8.699135118427821e-05, "loss": 1.7215, "step": 4445 }, { "epoch": 0.24781227356334654, "grad_norm": 0.5085048675537109, "learning_rate": 8.698535985312376e-05, "loss": 1.5958, "step": 4446 }, { "epoch": 0.24786801181650966, "grad_norm": 0.5155544281005859, "learning_rate": 8.697936734900932e-05, "loss": 1.7741, "step": 4447 }, { "epoch": 0.24792375006967282, "grad_norm": 0.5145729780197144, "learning_rate": 8.697337367212496e-05, "loss": 1.7966, "step": 4448 }, { "epoch": 0.24797948832283595, "grad_norm": 0.5527476072311401, "learning_rate": 8.696737882266076e-05, "loss": 1.8026, "step": 4449 }, { "epoch": 0.2480352265759991, "grad_norm": 0.5763035416603088, "learning_rate": 8.696138280080684e-05, "loss": 1.7823, "step": 4450 }, { "epoch": 0.24809096482916226, "grad_norm": 0.5513672828674316, "learning_rate": 8.695538560675334e-05, "loss": 1.5817, "step": 4451 }, { "epoch": 0.2481467030823254, "grad_norm": 0.5553067922592163, "learning_rate": 8.694938724069048e-05, "loss": 1.8425, "step": 4452 }, { "epoch": 0.24820244133548855, "grad_norm": 0.49385184049606323, "learning_rate": 8.69433877028085e-05, "loss": 1.6939, "step": 4453 }, { "epoch": 0.24825817958865168, "grad_norm": 0.5889978408813477, "learning_rate": 8.693738699329765e-05, "loss": 1.6874, "step": 4454 }, { "epoch": 0.24831391784181484, "grad_norm": 0.556433916091919, "learning_rate": 8.693138511234825e-05, "loss": 1.7339, "step": 4455 }, { "epoch": 0.248369656094978, "grad_norm": 0.5483202338218689, "learning_rate": 8.692538206015062e-05, "loss": 1.8301, "step": 4456 }, { "epoch": 0.24842539434814112, "grad_norm": 0.5633078813552856, "learning_rate": 8.691937783689518e-05, "loss": 1.7435, "step": 4457 }, { "epoch": 0.24848113260130428, "grad_norm": 0.5544833540916443, "learning_rate": 8.691337244277231e-05, "loss": 1.6348, "step": 4458 }, { "epoch": 0.24853687085446743, "grad_norm": 0.5703203082084656, "learning_rate": 8.69073658779725e-05, "loss": 1.6839, "step": 4459 }, { "epoch": 0.24859260910763056, "grad_norm": 0.5441849231719971, "learning_rate": 8.690135814268623e-05, "loss": 1.7292, "step": 4460 }, { "epoch": 0.24864834736079372, "grad_norm": 0.5759615302085876, "learning_rate": 8.689534923710403e-05, "loss": 1.8113, "step": 4461 }, { "epoch": 0.24870408561395685, "grad_norm": 0.568762481212616, "learning_rate": 8.688933916141647e-05, "loss": 1.9261, "step": 4462 }, { "epoch": 0.24875982386712, "grad_norm": 0.5397505164146423, "learning_rate": 8.688332791581415e-05, "loss": 1.8136, "step": 4463 }, { "epoch": 0.24881556212028316, "grad_norm": 0.5890788435935974, "learning_rate": 8.68773155004877e-05, "loss": 1.6383, "step": 4464 }, { "epoch": 0.2488713003734463, "grad_norm": 0.5507654547691345, "learning_rate": 8.687130191562782e-05, "loss": 1.7313, "step": 4465 }, { "epoch": 0.24892703862660945, "grad_norm": 0.5670168399810791, "learning_rate": 8.686528716142523e-05, "loss": 1.7355, "step": 4466 }, { "epoch": 0.24898277687977258, "grad_norm": 0.5866429805755615, "learning_rate": 8.685927123807065e-05, "loss": 1.7786, "step": 4467 }, { "epoch": 0.24903851513293573, "grad_norm": 0.5706139206886292, "learning_rate": 8.68532541457549e-05, "loss": 1.8995, "step": 4468 }, { "epoch": 0.2490942533860989, "grad_norm": 0.5574220418930054, "learning_rate": 8.68472358846688e-05, "loss": 1.86, "step": 4469 }, { "epoch": 0.24914999163926202, "grad_norm": 0.5442642569541931, "learning_rate": 8.684121645500322e-05, "loss": 1.803, "step": 4470 }, { "epoch": 0.24920572989242518, "grad_norm": 0.5070736408233643, "learning_rate": 8.683519585694903e-05, "loss": 1.5786, "step": 4471 }, { "epoch": 0.2492614681455883, "grad_norm": 0.5622973442077637, "learning_rate": 8.682917409069721e-05, "loss": 1.8524, "step": 4472 }, { "epoch": 0.24931720639875146, "grad_norm": 0.5547112226486206, "learning_rate": 8.682315115643872e-05, "loss": 1.7891, "step": 4473 }, { "epoch": 0.24937294465191462, "grad_norm": 0.5251905918121338, "learning_rate": 8.681712705436455e-05, "loss": 1.3104, "step": 4474 }, { "epoch": 0.24942868290507775, "grad_norm": 0.5507151484489441, "learning_rate": 8.68111017846658e-05, "loss": 1.7571, "step": 4475 }, { "epoch": 0.2494844211582409, "grad_norm": 0.628353476524353, "learning_rate": 8.68050753475335e-05, "loss": 1.7915, "step": 4476 }, { "epoch": 0.24954015941140403, "grad_norm": 0.5899398922920227, "learning_rate": 8.67990477431588e-05, "loss": 1.7928, "step": 4477 }, { "epoch": 0.2495958976645672, "grad_norm": 0.5376555919647217, "learning_rate": 8.679301897173287e-05, "loss": 1.6592, "step": 4478 }, { "epoch": 0.24965163591773035, "grad_norm": 0.5241808891296387, "learning_rate": 8.678698903344689e-05, "loss": 1.6482, "step": 4479 }, { "epoch": 0.24970737417089348, "grad_norm": 0.6054913997650146, "learning_rate": 8.67809579284921e-05, "loss": 1.7838, "step": 4480 }, { "epoch": 0.24976311242405663, "grad_norm": 0.56617671251297, "learning_rate": 8.677492565705976e-05, "loss": 1.7705, "step": 4481 }, { "epoch": 0.2498188506772198, "grad_norm": 0.549431324005127, "learning_rate": 8.676889221934121e-05, "loss": 1.8349, "step": 4482 }, { "epoch": 0.24987458893038292, "grad_norm": 0.5290791392326355, "learning_rate": 8.676285761552775e-05, "loss": 1.6761, "step": 4483 }, { "epoch": 0.24993032718354607, "grad_norm": 0.7188482880592346, "learning_rate": 8.675682184581081e-05, "loss": 1.6409, "step": 4484 }, { "epoch": 0.2499860654367092, "grad_norm": 0.5338848233222961, "learning_rate": 8.67507849103818e-05, "loss": 1.4604, "step": 4485 }, { "epoch": 0.25004180368987233, "grad_norm": 0.5384326577186584, "learning_rate": 8.674474680943215e-05, "loss": 1.5605, "step": 4486 }, { "epoch": 0.2500975419430355, "grad_norm": 0.521425724029541, "learning_rate": 8.673870754315336e-05, "loss": 1.625, "step": 4487 }, { "epoch": 0.25015328019619865, "grad_norm": 0.5739718079566956, "learning_rate": 8.673266711173698e-05, "loss": 1.7826, "step": 4488 }, { "epoch": 0.2502090184493618, "grad_norm": 0.5505213141441345, "learning_rate": 8.672662551537457e-05, "loss": 1.595, "step": 4489 }, { "epoch": 0.25026475670252496, "grad_norm": 0.5271283388137817, "learning_rate": 8.672058275425772e-05, "loss": 1.5468, "step": 4490 }, { "epoch": 0.2503204949556881, "grad_norm": 0.5678611993789673, "learning_rate": 8.671453882857808e-05, "loss": 1.8862, "step": 4491 }, { "epoch": 0.2503762332088512, "grad_norm": 0.6000241041183472, "learning_rate": 8.670849373852734e-05, "loss": 1.6133, "step": 4492 }, { "epoch": 0.2504319714620144, "grad_norm": 0.5662490129470825, "learning_rate": 8.670244748429719e-05, "loss": 1.7045, "step": 4493 }, { "epoch": 0.25048770971517753, "grad_norm": 0.5680144429206848, "learning_rate": 8.66964000660794e-05, "loss": 1.6462, "step": 4494 }, { "epoch": 0.25054344796834066, "grad_norm": 0.5490357279777527, "learning_rate": 8.669035148406577e-05, "loss": 1.5736, "step": 4495 }, { "epoch": 0.25059918622150384, "grad_norm": 0.5800120234489441, "learning_rate": 8.668430173844808e-05, "loss": 1.8931, "step": 4496 }, { "epoch": 0.250654924474667, "grad_norm": 0.5286765694618225, "learning_rate": 8.667825082941826e-05, "loss": 1.6553, "step": 4497 }, { "epoch": 0.2507106627278301, "grad_norm": 0.5452672839164734, "learning_rate": 8.667219875716814e-05, "loss": 1.7692, "step": 4498 }, { "epoch": 0.25076640098099323, "grad_norm": 0.5615769028663635, "learning_rate": 8.66661455218897e-05, "loss": 1.8116, "step": 4499 }, { "epoch": 0.2508221392341564, "grad_norm": 0.5832181572914124, "learning_rate": 8.666009112377491e-05, "loss": 1.938, "step": 4500 }, { "epoch": 0.25087787748731954, "grad_norm": 0.5258188247680664, "learning_rate": 8.665403556301576e-05, "loss": 1.6026, "step": 4501 }, { "epoch": 0.2509336157404827, "grad_norm": 0.6271452307701111, "learning_rate": 8.664797883980434e-05, "loss": 1.6589, "step": 4502 }, { "epoch": 0.25098935399364586, "grad_norm": 0.5411872267723083, "learning_rate": 8.664192095433266e-05, "loss": 1.7016, "step": 4503 }, { "epoch": 0.251045092246809, "grad_norm": 0.5610190629959106, "learning_rate": 8.663586190679291e-05, "loss": 1.8425, "step": 4504 }, { "epoch": 0.2511008304999721, "grad_norm": 0.5276908278465271, "learning_rate": 8.662980169737723e-05, "loss": 1.6105, "step": 4505 }, { "epoch": 0.2511565687531353, "grad_norm": 0.5493645668029785, "learning_rate": 8.662374032627778e-05, "loss": 1.9352, "step": 4506 }, { "epoch": 0.25121230700629843, "grad_norm": 0.5296374559402466, "learning_rate": 8.661767779368683e-05, "loss": 1.7867, "step": 4507 }, { "epoch": 0.25126804525946156, "grad_norm": 0.6600750684738159, "learning_rate": 8.661161409979665e-05, "loss": 1.6947, "step": 4508 }, { "epoch": 0.2513237835126247, "grad_norm": 0.5515453815460205, "learning_rate": 8.66055492447995e-05, "loss": 1.796, "step": 4509 }, { "epoch": 0.25137952176578787, "grad_norm": 0.5651318430900574, "learning_rate": 8.659948322888777e-05, "loss": 1.6343, "step": 4510 }, { "epoch": 0.251435260018951, "grad_norm": 0.5783109664916992, "learning_rate": 8.659341605225384e-05, "loss": 1.8057, "step": 4511 }, { "epoch": 0.25149099827211413, "grad_norm": 0.5711765885353088, "learning_rate": 8.65873477150901e-05, "loss": 1.8123, "step": 4512 }, { "epoch": 0.2515467365252773, "grad_norm": 0.5652083158493042, "learning_rate": 8.658127821758899e-05, "loss": 1.7952, "step": 4513 }, { "epoch": 0.25160247477844044, "grad_norm": 0.5652216076850891, "learning_rate": 8.657520755994305e-05, "loss": 1.8295, "step": 4514 }, { "epoch": 0.2516582130316036, "grad_norm": 0.5443994998931885, "learning_rate": 8.656913574234474e-05, "loss": 1.6294, "step": 4515 }, { "epoch": 0.25171395128476676, "grad_norm": 0.5845414400100708, "learning_rate": 8.656306276498667e-05, "loss": 1.8597, "step": 4516 }, { "epoch": 0.2517696895379299, "grad_norm": 0.5372679233551025, "learning_rate": 8.655698862806143e-05, "loss": 1.7067, "step": 4517 }, { "epoch": 0.251825427791093, "grad_norm": 0.5330473780632019, "learning_rate": 8.655091333176165e-05, "loss": 1.7043, "step": 4518 }, { "epoch": 0.2518811660442562, "grad_norm": 0.5988831520080566, "learning_rate": 8.654483687628002e-05, "loss": 1.7418, "step": 4519 }, { "epoch": 0.25193690429741933, "grad_norm": 0.5914613604545593, "learning_rate": 8.65387592618092e-05, "loss": 1.6442, "step": 4520 }, { "epoch": 0.25199264255058246, "grad_norm": 0.5800835490226746, "learning_rate": 8.653268048854201e-05, "loss": 1.7816, "step": 4521 }, { "epoch": 0.2520483808037456, "grad_norm": 0.5335732102394104, "learning_rate": 8.652660055667117e-05, "loss": 1.5046, "step": 4522 }, { "epoch": 0.25210411905690877, "grad_norm": 0.48013389110565186, "learning_rate": 8.652051946638953e-05, "loss": 1.582, "step": 4523 }, { "epoch": 0.2521598573100719, "grad_norm": 0.6047071814537048, "learning_rate": 8.651443721788996e-05, "loss": 1.6199, "step": 4524 }, { "epoch": 0.25221559556323503, "grad_norm": 0.5248143672943115, "learning_rate": 8.650835381136533e-05, "loss": 1.5345, "step": 4525 }, { "epoch": 0.2522713338163982, "grad_norm": 0.5078330636024475, "learning_rate": 8.650226924700855e-05, "loss": 1.6656, "step": 4526 }, { "epoch": 0.25232707206956134, "grad_norm": 0.5320603251457214, "learning_rate": 8.649618352501264e-05, "loss": 1.598, "step": 4527 }, { "epoch": 0.25238281032272447, "grad_norm": 0.49775633215904236, "learning_rate": 8.649009664557057e-05, "loss": 1.3941, "step": 4528 }, { "epoch": 0.25243854857588766, "grad_norm": 0.5565609931945801, "learning_rate": 8.648400860887538e-05, "loss": 1.7144, "step": 4529 }, { "epoch": 0.2524942868290508, "grad_norm": 0.5529298782348633, "learning_rate": 8.647791941512016e-05, "loss": 1.8223, "step": 4530 }, { "epoch": 0.2525500250822139, "grad_norm": 0.5692974328994751, "learning_rate": 8.6471829064498e-05, "loss": 1.6577, "step": 4531 }, { "epoch": 0.25260576333537704, "grad_norm": 0.49695706367492676, "learning_rate": 8.646573755720209e-05, "loss": 1.6222, "step": 4532 }, { "epoch": 0.2526615015885402, "grad_norm": 0.5647556781768799, "learning_rate": 8.645964489342557e-05, "loss": 1.8348, "step": 4533 }, { "epoch": 0.25271723984170336, "grad_norm": 0.5597743391990662, "learning_rate": 8.645355107336171e-05, "loss": 1.7095, "step": 4534 }, { "epoch": 0.2527729780948665, "grad_norm": 0.5715233683586121, "learning_rate": 8.644745609720375e-05, "loss": 1.9243, "step": 4535 }, { "epoch": 0.25282871634802967, "grad_norm": 0.5817229747772217, "learning_rate": 8.644135996514498e-05, "loss": 1.782, "step": 4536 }, { "epoch": 0.2528844546011928, "grad_norm": 0.5697113275527954, "learning_rate": 8.643526267737873e-05, "loss": 1.6014, "step": 4537 }, { "epoch": 0.2529401928543559, "grad_norm": 0.5716366767883301, "learning_rate": 8.642916423409839e-05, "loss": 1.6435, "step": 4538 }, { "epoch": 0.2529959311075191, "grad_norm": 0.5631042718887329, "learning_rate": 8.642306463549736e-05, "loss": 1.7686, "step": 4539 }, { "epoch": 0.25305166936068224, "grad_norm": 0.596517026424408, "learning_rate": 8.641696388176907e-05, "loss": 1.8116, "step": 4540 }, { "epoch": 0.25310740761384537, "grad_norm": 0.47129639983177185, "learning_rate": 8.641086197310703e-05, "loss": 1.4985, "step": 4541 }, { "epoch": 0.25316314586700855, "grad_norm": 0.551607072353363, "learning_rate": 8.640475890970471e-05, "loss": 1.7948, "step": 4542 }, { "epoch": 0.2532188841201717, "grad_norm": 0.559027910232544, "learning_rate": 8.639865469175572e-05, "loss": 1.5825, "step": 4543 }, { "epoch": 0.2532746223733348, "grad_norm": 0.5063076019287109, "learning_rate": 8.639254931945362e-05, "loss": 1.4125, "step": 4544 }, { "epoch": 0.25333036062649794, "grad_norm": 0.5271062254905701, "learning_rate": 8.638644279299202e-05, "loss": 1.6964, "step": 4545 }, { "epoch": 0.2533860988796611, "grad_norm": 0.4922122657299042, "learning_rate": 8.638033511256462e-05, "loss": 1.6725, "step": 4546 }, { "epoch": 0.25344183713282425, "grad_norm": 0.5734017491340637, "learning_rate": 8.637422627836509e-05, "loss": 2.0334, "step": 4547 }, { "epoch": 0.2534975753859874, "grad_norm": 0.4978555738925934, "learning_rate": 8.636811629058718e-05, "loss": 1.6665, "step": 4548 }, { "epoch": 0.25355331363915057, "grad_norm": 0.5637436509132385, "learning_rate": 8.636200514942467e-05, "loss": 1.5875, "step": 4549 }, { "epoch": 0.2536090518923137, "grad_norm": 0.5382322072982788, "learning_rate": 8.635589285507135e-05, "loss": 1.838, "step": 4550 }, { "epoch": 0.2536647901454768, "grad_norm": 0.518650233745575, "learning_rate": 8.634977940772108e-05, "loss": 1.7802, "step": 4551 }, { "epoch": 0.25372052839864, "grad_norm": 0.5153575539588928, "learning_rate": 8.634366480756774e-05, "loss": 1.6153, "step": 4552 }, { "epoch": 0.25377626665180314, "grad_norm": 0.5355269908905029, "learning_rate": 8.633754905480527e-05, "loss": 1.8255, "step": 4553 }, { "epoch": 0.25383200490496627, "grad_norm": 0.5261843204498291, "learning_rate": 8.63314321496276e-05, "loss": 1.6177, "step": 4554 }, { "epoch": 0.2538877431581294, "grad_norm": 0.557314395904541, "learning_rate": 8.632531409222872e-05, "loss": 1.8342, "step": 4555 }, { "epoch": 0.2539434814112926, "grad_norm": 0.5285095572471619, "learning_rate": 8.631919488280267e-05, "loss": 1.6217, "step": 4556 }, { "epoch": 0.2539992196644557, "grad_norm": 0.5471826195716858, "learning_rate": 8.631307452154352e-05, "loss": 1.5318, "step": 4557 }, { "epoch": 0.25405495791761884, "grad_norm": 0.5375044941902161, "learning_rate": 8.630695300864536e-05, "loss": 1.7415, "step": 4558 }, { "epoch": 0.254110696170782, "grad_norm": 0.566832423210144, "learning_rate": 8.630083034430232e-05, "loss": 1.9215, "step": 4559 }, { "epoch": 0.25416643442394515, "grad_norm": 0.5262976884841919, "learning_rate": 8.629470652870861e-05, "loss": 1.5432, "step": 4560 }, { "epoch": 0.2542221726771083, "grad_norm": 0.5495408177375793, "learning_rate": 8.628858156205842e-05, "loss": 1.9161, "step": 4561 }, { "epoch": 0.25427791093027147, "grad_norm": 0.5776422023773193, "learning_rate": 8.6282455444546e-05, "loss": 1.8547, "step": 4562 }, { "epoch": 0.2543336491834346, "grad_norm": 0.5136664509773254, "learning_rate": 8.627632817636563e-05, "loss": 1.3558, "step": 4563 }, { "epoch": 0.2543893874365977, "grad_norm": 0.5449255108833313, "learning_rate": 8.627019975771165e-05, "loss": 1.7991, "step": 4564 }, { "epoch": 0.2544451256897609, "grad_norm": 0.49720707535743713, "learning_rate": 8.626407018877837e-05, "loss": 1.5515, "step": 4565 }, { "epoch": 0.25450086394292404, "grad_norm": 0.5493996739387512, "learning_rate": 8.625793946976026e-05, "loss": 1.7666, "step": 4566 }, { "epoch": 0.25455660219608717, "grad_norm": 0.5458593368530273, "learning_rate": 8.625180760085167e-05, "loss": 1.9701, "step": 4567 }, { "epoch": 0.2546123404492503, "grad_norm": 0.5866237878799438, "learning_rate": 8.624567458224713e-05, "loss": 1.7123, "step": 4568 }, { "epoch": 0.2546680787024135, "grad_norm": 0.5610763430595398, "learning_rate": 8.62395404141411e-05, "loss": 1.8511, "step": 4569 }, { "epoch": 0.2547238169555766, "grad_norm": 0.5264028906822205, "learning_rate": 8.623340509672817e-05, "loss": 1.6913, "step": 4570 }, { "epoch": 0.25477955520873974, "grad_norm": 0.5024250745773315, "learning_rate": 8.622726863020285e-05, "loss": 1.6337, "step": 4571 }, { "epoch": 0.2548352934619029, "grad_norm": 0.6130850315093994, "learning_rate": 8.622113101475982e-05, "loss": 1.8858, "step": 4572 }, { "epoch": 0.25489103171506605, "grad_norm": 0.5543071627616882, "learning_rate": 8.621499225059369e-05, "loss": 1.6353, "step": 4573 }, { "epoch": 0.2549467699682292, "grad_norm": 0.5286437273025513, "learning_rate": 8.620885233789914e-05, "loss": 1.4418, "step": 4574 }, { "epoch": 0.25500250822139237, "grad_norm": 0.5485914349555969, "learning_rate": 8.620271127687092e-05, "loss": 1.7161, "step": 4575 }, { "epoch": 0.2550582464745555, "grad_norm": 0.612994909286499, "learning_rate": 8.619656906770377e-05, "loss": 1.8467, "step": 4576 }, { "epoch": 0.2551139847277186, "grad_norm": 0.5447350740432739, "learning_rate": 8.619042571059248e-05, "loss": 1.7528, "step": 4577 }, { "epoch": 0.25516972298088175, "grad_norm": 0.5236079096794128, "learning_rate": 8.61842812057319e-05, "loss": 1.5648, "step": 4578 }, { "epoch": 0.25522546123404494, "grad_norm": 0.534354567527771, "learning_rate": 8.617813555331689e-05, "loss": 1.5093, "step": 4579 }, { "epoch": 0.25528119948720807, "grad_norm": 0.5146899819374084, "learning_rate": 8.617198875354235e-05, "loss": 1.6445, "step": 4580 }, { "epoch": 0.2553369377403712, "grad_norm": 0.5606057047843933, "learning_rate": 8.616584080660323e-05, "loss": 1.6225, "step": 4581 }, { "epoch": 0.2553926759935344, "grad_norm": 0.557131290435791, "learning_rate": 8.615969171269449e-05, "loss": 1.8017, "step": 4582 }, { "epoch": 0.2554484142466975, "grad_norm": 0.5046922564506531, "learning_rate": 8.615354147201116e-05, "loss": 1.6034, "step": 4583 }, { "epoch": 0.25550415249986064, "grad_norm": 0.5313592553138733, "learning_rate": 8.614739008474829e-05, "loss": 1.481, "step": 4584 }, { "epoch": 0.2555598907530238, "grad_norm": 0.5347174406051636, "learning_rate": 8.614123755110096e-05, "loss": 1.6323, "step": 4585 }, { "epoch": 0.25561562900618695, "grad_norm": 0.5261495113372803, "learning_rate": 8.61350838712643e-05, "loss": 1.4896, "step": 4586 }, { "epoch": 0.2556713672593501, "grad_norm": 0.5374502539634705, "learning_rate": 8.612892904543344e-05, "loss": 1.6488, "step": 4587 }, { "epoch": 0.25572710551251326, "grad_norm": 0.5835258960723877, "learning_rate": 8.612277307380361e-05, "loss": 1.7467, "step": 4588 }, { "epoch": 0.2557828437656764, "grad_norm": 0.519822359085083, "learning_rate": 8.611661595657004e-05, "loss": 1.4627, "step": 4589 }, { "epoch": 0.2558385820188395, "grad_norm": 0.5837191343307495, "learning_rate": 8.611045769392796e-05, "loss": 1.654, "step": 4590 }, { "epoch": 0.25589432027200265, "grad_norm": 0.5844641327857971, "learning_rate": 8.610429828607271e-05, "loss": 1.6177, "step": 4591 }, { "epoch": 0.25595005852516584, "grad_norm": 0.5927681922912598, "learning_rate": 8.609813773319963e-05, "loss": 1.9184, "step": 4592 }, { "epoch": 0.25600579677832896, "grad_norm": 0.6149387955665588, "learning_rate": 8.609197603550409e-05, "loss": 1.6321, "step": 4593 }, { "epoch": 0.2560615350314921, "grad_norm": 0.5619008541107178, "learning_rate": 8.608581319318148e-05, "loss": 1.6094, "step": 4594 }, { "epoch": 0.2561172732846553, "grad_norm": 0.5645739436149597, "learning_rate": 8.607964920642728e-05, "loss": 1.7111, "step": 4595 }, { "epoch": 0.2561730115378184, "grad_norm": 0.5264320373535156, "learning_rate": 8.607348407543699e-05, "loss": 1.5206, "step": 4596 }, { "epoch": 0.25622874979098154, "grad_norm": 0.5533236861228943, "learning_rate": 8.606731780040608e-05, "loss": 1.9129, "step": 4597 }, { "epoch": 0.2562844880441447, "grad_norm": 0.5276892781257629, "learning_rate": 8.606115038153015e-05, "loss": 1.7739, "step": 4598 }, { "epoch": 0.25634022629730785, "grad_norm": 0.5314942598342896, "learning_rate": 8.605498181900477e-05, "loss": 1.6853, "step": 4599 }, { "epoch": 0.256395964550471, "grad_norm": 0.540059506893158, "learning_rate": 8.604881211302559e-05, "loss": 1.8345, "step": 4600 }, { "epoch": 0.2564517028036341, "grad_norm": 0.5306822657585144, "learning_rate": 8.604264126378827e-05, "loss": 1.9012, "step": 4601 }, { "epoch": 0.2565074410567973, "grad_norm": 0.5294952988624573, "learning_rate": 8.603646927148849e-05, "loss": 1.5109, "step": 4602 }, { "epoch": 0.2565631793099604, "grad_norm": 0.5673249959945679, "learning_rate": 8.603029613632205e-05, "loss": 1.758, "step": 4603 }, { "epoch": 0.25661891756312355, "grad_norm": 0.5006965398788452, "learning_rate": 8.602412185848466e-05, "loss": 1.6211, "step": 4604 }, { "epoch": 0.25667465581628673, "grad_norm": 0.5873995423316956, "learning_rate": 8.601794643817216e-05, "loss": 1.8896, "step": 4605 }, { "epoch": 0.25673039406944986, "grad_norm": 0.56819748878479, "learning_rate": 8.601176987558041e-05, "loss": 1.6733, "step": 4606 }, { "epoch": 0.256786132322613, "grad_norm": 0.5610432624816895, "learning_rate": 8.600559217090529e-05, "loss": 1.824, "step": 4607 }, { "epoch": 0.2568418705757762, "grad_norm": 0.5451894998550415, "learning_rate": 8.599941332434269e-05, "loss": 1.7229, "step": 4608 }, { "epoch": 0.2568976088289393, "grad_norm": 0.9107519388198853, "learning_rate": 8.599323333608861e-05, "loss": 1.846, "step": 4609 }, { "epoch": 0.25695334708210243, "grad_norm": 0.5975711941719055, "learning_rate": 8.598705220633903e-05, "loss": 1.7334, "step": 4610 }, { "epoch": 0.2570090853352656, "grad_norm": 0.5969035625457764, "learning_rate": 8.598086993528996e-05, "loss": 1.9449, "step": 4611 }, { "epoch": 0.25706482358842875, "grad_norm": 0.6146485805511475, "learning_rate": 8.597468652313747e-05, "loss": 1.8884, "step": 4612 }, { "epoch": 0.2571205618415919, "grad_norm": 0.5359372496604919, "learning_rate": 8.596850197007767e-05, "loss": 1.6199, "step": 4613 }, { "epoch": 0.257176300094755, "grad_norm": 0.5491176247596741, "learning_rate": 8.596231627630671e-05, "loss": 1.5702, "step": 4614 }, { "epoch": 0.2572320383479182, "grad_norm": 0.5316644310951233, "learning_rate": 8.595612944202076e-05, "loss": 1.6538, "step": 4615 }, { "epoch": 0.2572877766010813, "grad_norm": 0.5944792032241821, "learning_rate": 8.5949941467416e-05, "loss": 1.79, "step": 4616 }, { "epoch": 0.25734351485424445, "grad_norm": 0.5629575848579407, "learning_rate": 8.594375235268872e-05, "loss": 2.0629, "step": 4617 }, { "epoch": 0.25739925310740763, "grad_norm": 0.5681300163269043, "learning_rate": 8.593756209803518e-05, "loss": 1.7105, "step": 4618 }, { "epoch": 0.25745499136057076, "grad_norm": 0.5259959697723389, "learning_rate": 8.59313707036517e-05, "loss": 1.7797, "step": 4619 }, { "epoch": 0.2575107296137339, "grad_norm": 0.5173026323318481, "learning_rate": 8.592517816973462e-05, "loss": 1.6879, "step": 4620 }, { "epoch": 0.2575664678668971, "grad_norm": 0.5310641527175903, "learning_rate": 8.591898449648035e-05, "loss": 1.6947, "step": 4621 }, { "epoch": 0.2576222061200602, "grad_norm": 0.5746062397956848, "learning_rate": 8.591278968408532e-05, "loss": 1.8276, "step": 4622 }, { "epoch": 0.25767794437322333, "grad_norm": 0.5601612329483032, "learning_rate": 8.590659373274599e-05, "loss": 1.6054, "step": 4623 }, { "epoch": 0.25773368262638646, "grad_norm": 0.5777058601379395, "learning_rate": 8.590039664265885e-05, "loss": 1.612, "step": 4624 }, { "epoch": 0.25778942087954965, "grad_norm": 0.6337921023368835, "learning_rate": 8.589419841402047e-05, "loss": 2.1569, "step": 4625 }, { "epoch": 0.2578451591327128, "grad_norm": 0.5203370451927185, "learning_rate": 8.588799904702736e-05, "loss": 1.4849, "step": 4626 }, { "epoch": 0.2579008973858759, "grad_norm": 0.55791175365448, "learning_rate": 8.588179854187616e-05, "loss": 1.882, "step": 4627 }, { "epoch": 0.2579566356390391, "grad_norm": 0.581343948841095, "learning_rate": 8.587559689876354e-05, "loss": 1.7811, "step": 4628 }, { "epoch": 0.2580123738922022, "grad_norm": 0.6163395047187805, "learning_rate": 8.586939411788615e-05, "loss": 1.8589, "step": 4629 }, { "epoch": 0.25806811214536535, "grad_norm": 0.5277383327484131, "learning_rate": 8.586319019944071e-05, "loss": 1.5817, "step": 4630 }, { "epoch": 0.25812385039852853, "grad_norm": 0.5042583346366882, "learning_rate": 8.585698514362397e-05, "loss": 1.4472, "step": 4631 }, { "epoch": 0.25817958865169166, "grad_norm": 0.5802309513092041, "learning_rate": 8.585077895063271e-05, "loss": 1.9396, "step": 4632 }, { "epoch": 0.2582353269048548, "grad_norm": 0.5798273682594299, "learning_rate": 8.58445716206638e-05, "loss": 1.6806, "step": 4633 }, { "epoch": 0.258291065158018, "grad_norm": 0.5102317333221436, "learning_rate": 8.583836315391403e-05, "loss": 1.5884, "step": 4634 }, { "epoch": 0.2583468034111811, "grad_norm": 0.6215993165969849, "learning_rate": 8.583215355058035e-05, "loss": 2.001, "step": 4635 }, { "epoch": 0.25840254166434423, "grad_norm": 0.5116714835166931, "learning_rate": 8.582594281085967e-05, "loss": 1.6639, "step": 4636 }, { "epoch": 0.25845827991750736, "grad_norm": 0.5677070617675781, "learning_rate": 8.581973093494897e-05, "loss": 1.841, "step": 4637 }, { "epoch": 0.25851401817067055, "grad_norm": 0.5552488565444946, "learning_rate": 8.581351792304524e-05, "loss": 1.6623, "step": 4638 }, { "epoch": 0.2585697564238337, "grad_norm": 0.5567041635513306, "learning_rate": 8.580730377534554e-05, "loss": 1.5144, "step": 4639 }, { "epoch": 0.2586254946769968, "grad_norm": 0.5067396759986877, "learning_rate": 8.580108849204693e-05, "loss": 1.4875, "step": 4640 }, { "epoch": 0.25868123293016, "grad_norm": 0.5226799845695496, "learning_rate": 8.579487207334653e-05, "loss": 1.7197, "step": 4641 }, { "epoch": 0.2587369711833231, "grad_norm": 0.5152204036712646, "learning_rate": 8.578865451944148e-05, "loss": 1.4488, "step": 4642 }, { "epoch": 0.25879270943648625, "grad_norm": 0.5446513295173645, "learning_rate": 8.578243583052897e-05, "loss": 1.7116, "step": 4643 }, { "epoch": 0.25884844768964943, "grad_norm": 0.5753796696662903, "learning_rate": 8.577621600680623e-05, "loss": 1.5765, "step": 4644 }, { "epoch": 0.25890418594281256, "grad_norm": 0.53980952501297, "learning_rate": 8.57699950484705e-05, "loss": 1.7881, "step": 4645 }, { "epoch": 0.2589599241959757, "grad_norm": 0.5444200038909912, "learning_rate": 8.57637729557191e-05, "loss": 1.8373, "step": 4646 }, { "epoch": 0.2590156624491388, "grad_norm": 0.5415917634963989, "learning_rate": 8.575754972874931e-05, "loss": 1.6772, "step": 4647 }, { "epoch": 0.259071400702302, "grad_norm": 0.5910305380821228, "learning_rate": 8.575132536775853e-05, "loss": 1.8558, "step": 4648 }, { "epoch": 0.25912713895546513, "grad_norm": 0.5802417397499084, "learning_rate": 8.574509987294417e-05, "loss": 1.9364, "step": 4649 }, { "epoch": 0.25918287720862826, "grad_norm": 0.573726236820221, "learning_rate": 8.573887324450364e-05, "loss": 1.8956, "step": 4650 }, { "epoch": 0.25923861546179144, "grad_norm": 0.5909465551376343, "learning_rate": 8.573264548263442e-05, "loss": 1.7338, "step": 4651 }, { "epoch": 0.2592943537149546, "grad_norm": 0.6169442534446716, "learning_rate": 8.572641658753404e-05, "loss": 1.5941, "step": 4652 }, { "epoch": 0.2593500919681177, "grad_norm": 0.5135464668273926, "learning_rate": 8.572018655940001e-05, "loss": 1.7035, "step": 4653 }, { "epoch": 0.2594058302212809, "grad_norm": 0.5379095077514648, "learning_rate": 8.571395539842992e-05, "loss": 1.7387, "step": 4654 }, { "epoch": 0.259461568474444, "grad_norm": 0.5439580678939819, "learning_rate": 8.570772310482141e-05, "loss": 1.7089, "step": 4655 }, { "epoch": 0.25951730672760714, "grad_norm": 0.5132806301116943, "learning_rate": 8.57014896787721e-05, "loss": 1.5298, "step": 4656 }, { "epoch": 0.25957304498077033, "grad_norm": 0.5612521171569824, "learning_rate": 8.569525512047969e-05, "loss": 1.7676, "step": 4657 }, { "epoch": 0.25962878323393346, "grad_norm": 0.5397217273712158, "learning_rate": 8.56890194301419e-05, "loss": 1.636, "step": 4658 }, { "epoch": 0.2596845214870966, "grad_norm": 0.6334729194641113, "learning_rate": 8.56827826079565e-05, "loss": 1.8281, "step": 4659 }, { "epoch": 0.2597402597402597, "grad_norm": 0.5931346416473389, "learning_rate": 8.56765446541213e-05, "loss": 1.7335, "step": 4660 }, { "epoch": 0.2597959979934229, "grad_norm": 0.5085331201553345, "learning_rate": 8.567030556883408e-05, "loss": 1.8524, "step": 4661 }, { "epoch": 0.25985173624658603, "grad_norm": 0.5508363246917725, "learning_rate": 8.566406535229276e-05, "loss": 1.7883, "step": 4662 }, { "epoch": 0.25990747449974916, "grad_norm": 0.5742567181587219, "learning_rate": 8.565782400469522e-05, "loss": 1.7011, "step": 4663 }, { "epoch": 0.25996321275291234, "grad_norm": 0.4922592043876648, "learning_rate": 8.56515815262394e-05, "loss": 1.4828, "step": 4664 }, { "epoch": 0.26001895100607547, "grad_norm": 0.5450266003608704, "learning_rate": 8.564533791712328e-05, "loss": 1.7885, "step": 4665 }, { "epoch": 0.2600746892592386, "grad_norm": 0.5942632555961609, "learning_rate": 8.563909317754487e-05, "loss": 1.9297, "step": 4666 }, { "epoch": 0.2601304275124018, "grad_norm": 0.5638509392738342, "learning_rate": 8.563284730770221e-05, "loss": 1.9536, "step": 4667 }, { "epoch": 0.2601861657655649, "grad_norm": 0.5848171710968018, "learning_rate": 8.56266003077934e-05, "loss": 2.003, "step": 4668 }, { "epoch": 0.26024190401872804, "grad_norm": 0.5629677176475525, "learning_rate": 8.562035217801652e-05, "loss": 2.0024, "step": 4669 }, { "epoch": 0.26029764227189117, "grad_norm": 0.5268816351890564, "learning_rate": 8.561410291856977e-05, "loss": 1.5865, "step": 4670 }, { "epoch": 0.26035338052505436, "grad_norm": 0.545254647731781, "learning_rate": 8.560785252965131e-05, "loss": 1.7586, "step": 4671 }, { "epoch": 0.2604091187782175, "grad_norm": 0.5406084060668945, "learning_rate": 8.560160101145937e-05, "loss": 1.9274, "step": 4672 }, { "epoch": 0.2604648570313806, "grad_norm": 0.5519586801528931, "learning_rate": 8.559534836419224e-05, "loss": 1.7652, "step": 4673 }, { "epoch": 0.2605205952845438, "grad_norm": 0.5398983955383301, "learning_rate": 8.558909458804818e-05, "loss": 1.9096, "step": 4674 }, { "epoch": 0.26057633353770693, "grad_norm": 0.5414653420448303, "learning_rate": 8.558283968322555e-05, "loss": 1.6586, "step": 4675 }, { "epoch": 0.26063207179087006, "grad_norm": 0.5628217458724976, "learning_rate": 8.55765836499227e-05, "loss": 1.606, "step": 4676 }, { "epoch": 0.26068781004403324, "grad_norm": 0.5232682228088379, "learning_rate": 8.557032648833804e-05, "loss": 1.698, "step": 4677 }, { "epoch": 0.26074354829719637, "grad_norm": 0.588845431804657, "learning_rate": 8.556406819867001e-05, "loss": 1.9568, "step": 4678 }, { "epoch": 0.2607992865503595, "grad_norm": 0.5363548994064331, "learning_rate": 8.55578087811171e-05, "loss": 1.6827, "step": 4679 }, { "epoch": 0.2608550248035227, "grad_norm": 0.514584481716156, "learning_rate": 8.55515482358778e-05, "loss": 1.631, "step": 4680 }, { "epoch": 0.2609107630566858, "grad_norm": 0.5446624159812927, "learning_rate": 8.554528656315069e-05, "loss": 1.7978, "step": 4681 }, { "epoch": 0.26096650130984894, "grad_norm": 0.5160642266273499, "learning_rate": 8.55390237631343e-05, "loss": 1.4935, "step": 4682 }, { "epoch": 0.26102223956301207, "grad_norm": 0.5020194053649902, "learning_rate": 8.553275983602732e-05, "loss": 1.3459, "step": 4683 }, { "epoch": 0.26107797781617526, "grad_norm": 0.5197760462760925, "learning_rate": 8.552649478202834e-05, "loss": 1.8008, "step": 4684 }, { "epoch": 0.2611337160693384, "grad_norm": 0.5080288648605347, "learning_rate": 8.55202286013361e-05, "loss": 1.5853, "step": 4685 }, { "epoch": 0.2611894543225015, "grad_norm": 0.5232203602790833, "learning_rate": 8.551396129414928e-05, "loss": 1.7352, "step": 4686 }, { "epoch": 0.2612451925756647, "grad_norm": 0.5843389630317688, "learning_rate": 8.550769286066669e-05, "loss": 1.5833, "step": 4687 }, { "epoch": 0.2613009308288278, "grad_norm": 0.5756316184997559, "learning_rate": 8.55014233010871e-05, "loss": 1.8692, "step": 4688 }, { "epoch": 0.26135666908199096, "grad_norm": 0.5456770658493042, "learning_rate": 8.549515261560937e-05, "loss": 1.6987, "step": 4689 }, { "epoch": 0.26141240733515414, "grad_norm": 0.5343070030212402, "learning_rate": 8.548888080443231e-05, "loss": 1.4492, "step": 4690 }, { "epoch": 0.26146814558831727, "grad_norm": 0.546418309211731, "learning_rate": 8.54826078677549e-05, "loss": 1.7292, "step": 4691 }, { "epoch": 0.2615238838414804, "grad_norm": 0.5571802258491516, "learning_rate": 8.547633380577604e-05, "loss": 1.9054, "step": 4692 }, { "epoch": 0.2615796220946435, "grad_norm": 0.5529661774635315, "learning_rate": 8.54700586186947e-05, "loss": 1.8537, "step": 4693 }, { "epoch": 0.2616353603478067, "grad_norm": 0.5503031611442566, "learning_rate": 8.546378230670992e-05, "loss": 1.7507, "step": 4694 }, { "epoch": 0.26169109860096984, "grad_norm": 0.5290326476097107, "learning_rate": 8.545750487002073e-05, "loss": 1.5895, "step": 4695 }, { "epoch": 0.26174683685413297, "grad_norm": 0.5247073769569397, "learning_rate": 8.54512263088262e-05, "loss": 1.5736, "step": 4696 }, { "epoch": 0.26180257510729615, "grad_norm": 0.575093686580658, "learning_rate": 8.544494662332548e-05, "loss": 1.5192, "step": 4697 }, { "epoch": 0.2618583133604593, "grad_norm": 0.5360473990440369, "learning_rate": 8.543866581371771e-05, "loss": 1.7796, "step": 4698 }, { "epoch": 0.2619140516136224, "grad_norm": 0.5478860139846802, "learning_rate": 8.54323838802021e-05, "loss": 1.756, "step": 4699 }, { "epoch": 0.2619697898667856, "grad_norm": 0.5454539060592651, "learning_rate": 8.542610082297783e-05, "loss": 1.7589, "step": 4700 }, { "epoch": 0.2620255281199487, "grad_norm": 0.5187868475914001, "learning_rate": 8.541981664224421e-05, "loss": 1.5043, "step": 4701 }, { "epoch": 0.26208126637311185, "grad_norm": 0.5362755060195923, "learning_rate": 8.54135313382005e-05, "loss": 1.731, "step": 4702 }, { "epoch": 0.26213700462627504, "grad_norm": 0.5599364638328552, "learning_rate": 8.540724491104606e-05, "loss": 1.6976, "step": 4703 }, { "epoch": 0.26219274287943817, "grad_norm": 0.5924205183982849, "learning_rate": 8.540095736098026e-05, "loss": 1.8049, "step": 4704 }, { "epoch": 0.2622484811326013, "grad_norm": 0.5288107395172119, "learning_rate": 8.539466868820247e-05, "loss": 1.5834, "step": 4705 }, { "epoch": 0.2623042193857644, "grad_norm": 0.5498400330543518, "learning_rate": 8.538837889291218e-05, "loss": 1.6546, "step": 4706 }, { "epoch": 0.2623599576389276, "grad_norm": 0.5080811381340027, "learning_rate": 8.538208797530883e-05, "loss": 1.434, "step": 4707 }, { "epoch": 0.26241569589209074, "grad_norm": 0.5125556588172913, "learning_rate": 8.537579593559195e-05, "loss": 1.6628, "step": 4708 }, { "epoch": 0.26247143414525387, "grad_norm": 0.5489838123321533, "learning_rate": 8.536950277396106e-05, "loss": 1.5702, "step": 4709 }, { "epoch": 0.26252717239841705, "grad_norm": 0.5346508622169495, "learning_rate": 8.536320849061577e-05, "loss": 1.7829, "step": 4710 }, { "epoch": 0.2625829106515802, "grad_norm": 0.5648466944694519, "learning_rate": 8.535691308575569e-05, "loss": 1.8271, "step": 4711 }, { "epoch": 0.2626386489047433, "grad_norm": 0.5875536203384399, "learning_rate": 8.535061655958048e-05, "loss": 1.888, "step": 4712 }, { "epoch": 0.2626943871579065, "grad_norm": 0.5403586626052856, "learning_rate": 8.534431891228981e-05, "loss": 1.5633, "step": 4713 }, { "epoch": 0.2627501254110696, "grad_norm": 0.5541427135467529, "learning_rate": 8.533802014408341e-05, "loss": 1.7778, "step": 4714 }, { "epoch": 0.26280586366423275, "grad_norm": 0.5390727519989014, "learning_rate": 8.533172025516106e-05, "loss": 1.6732, "step": 4715 }, { "epoch": 0.2628616019173959, "grad_norm": 0.5591700077056885, "learning_rate": 8.532541924572254e-05, "loss": 1.7714, "step": 4716 }, { "epoch": 0.26291734017055907, "grad_norm": 0.5306904911994934, "learning_rate": 8.531911711596767e-05, "loss": 1.7311, "step": 4717 }, { "epoch": 0.2629730784237222, "grad_norm": 0.5665531158447266, "learning_rate": 8.531281386609633e-05, "loss": 1.684, "step": 4718 }, { "epoch": 0.2630288166768853, "grad_norm": 0.5404395461082458, "learning_rate": 8.530650949630844e-05, "loss": 1.7727, "step": 4719 }, { "epoch": 0.2630845549300485, "grad_norm": 0.5549681782722473, "learning_rate": 8.530020400680392e-05, "loss": 1.6802, "step": 4720 }, { "epoch": 0.26314029318321164, "grad_norm": 0.5529362559318542, "learning_rate": 8.529389739778272e-05, "loss": 1.6691, "step": 4721 }, { "epoch": 0.26319603143637477, "grad_norm": 0.5257294178009033, "learning_rate": 8.528758966944489e-05, "loss": 1.6649, "step": 4722 }, { "epoch": 0.26325176968953795, "grad_norm": 0.5499683022499084, "learning_rate": 8.528128082199046e-05, "loss": 1.8637, "step": 4723 }, { "epoch": 0.2633075079427011, "grad_norm": 0.5676036477088928, "learning_rate": 8.527497085561949e-05, "loss": 1.6409, "step": 4724 }, { "epoch": 0.2633632461958642, "grad_norm": 0.5784804821014404, "learning_rate": 8.526865977053211e-05, "loss": 1.8414, "step": 4725 }, { "epoch": 0.2634189844490274, "grad_norm": 0.592461884021759, "learning_rate": 8.52623475669285e-05, "loss": 1.725, "step": 4726 }, { "epoch": 0.2634747227021905, "grad_norm": 0.5251427888870239, "learning_rate": 8.52560342450088e-05, "loss": 1.5888, "step": 4727 }, { "epoch": 0.26353046095535365, "grad_norm": 0.5062176585197449, "learning_rate": 8.524971980497325e-05, "loss": 1.5588, "step": 4728 }, { "epoch": 0.2635861992085168, "grad_norm": 0.5686171054840088, "learning_rate": 8.524340424702211e-05, "loss": 1.6186, "step": 4729 }, { "epoch": 0.26364193746167996, "grad_norm": 0.5521769523620605, "learning_rate": 8.523708757135567e-05, "loss": 1.6917, "step": 4730 }, { "epoch": 0.2636976757148431, "grad_norm": 0.5489006042480469, "learning_rate": 8.523076977817426e-05, "loss": 1.8079, "step": 4731 }, { "epoch": 0.2637534139680062, "grad_norm": 0.5295306444168091, "learning_rate": 8.522445086767826e-05, "loss": 1.6814, "step": 4732 }, { "epoch": 0.2638091522211694, "grad_norm": 0.5596312284469604, "learning_rate": 8.521813084006802e-05, "loss": 1.7971, "step": 4733 }, { "epoch": 0.26386489047433254, "grad_norm": 0.535030722618103, "learning_rate": 8.5211809695544e-05, "loss": 1.6389, "step": 4734 }, { "epoch": 0.26392062872749567, "grad_norm": 0.5560666918754578, "learning_rate": 8.520548743430673e-05, "loss": 1.8107, "step": 4735 }, { "epoch": 0.26397636698065885, "grad_norm": 0.5749865770339966, "learning_rate": 8.51991640565566e-05, "loss": 1.7698, "step": 4736 }, { "epoch": 0.264032105233822, "grad_norm": 0.603252649307251, "learning_rate": 8.519283956249424e-05, "loss": 1.9701, "step": 4737 }, { "epoch": 0.2640878434869851, "grad_norm": 0.562053918838501, "learning_rate": 8.51865139523202e-05, "loss": 1.7033, "step": 4738 }, { "epoch": 0.26414358174014824, "grad_norm": 0.5553662776947021, "learning_rate": 8.518018722623509e-05, "loss": 1.6353, "step": 4739 }, { "epoch": 0.2641993199933114, "grad_norm": 0.5916672945022583, "learning_rate": 8.517385938443955e-05, "loss": 1.8496, "step": 4740 }, { "epoch": 0.26425505824647455, "grad_norm": 0.549395740032196, "learning_rate": 8.516753042713426e-05, "loss": 1.612, "step": 4741 }, { "epoch": 0.2643107964996377, "grad_norm": 0.5560966730117798, "learning_rate": 8.516120035451996e-05, "loss": 1.5978, "step": 4742 }, { "epoch": 0.26436653475280086, "grad_norm": 0.5934261679649353, "learning_rate": 8.515486916679738e-05, "loss": 1.9667, "step": 4743 }, { "epoch": 0.264422273005964, "grad_norm": 0.5441667437553406, "learning_rate": 8.514853686416732e-05, "loss": 1.639, "step": 4744 }, { "epoch": 0.2644780112591271, "grad_norm": 0.5780582427978516, "learning_rate": 8.51422034468306e-05, "loss": 1.6839, "step": 4745 }, { "epoch": 0.2645337495122903, "grad_norm": 0.5739880204200745, "learning_rate": 8.513586891498809e-05, "loss": 1.6927, "step": 4746 }, { "epoch": 0.26458948776545343, "grad_norm": 0.5097702145576477, "learning_rate": 8.512953326884066e-05, "loss": 1.5131, "step": 4747 }, { "epoch": 0.26464522601861656, "grad_norm": 0.5593822598457336, "learning_rate": 8.512319650858926e-05, "loss": 1.8373, "step": 4748 }, { "epoch": 0.26470096427177975, "grad_norm": 0.546627938747406, "learning_rate": 8.511685863443484e-05, "loss": 1.723, "step": 4749 }, { "epoch": 0.2647567025249429, "grad_norm": 0.5196560621261597, "learning_rate": 8.511051964657842e-05, "loss": 1.6108, "step": 4750 }, { "epoch": 0.264812440778106, "grad_norm": 0.548095166683197, "learning_rate": 8.510417954522102e-05, "loss": 1.6268, "step": 4751 }, { "epoch": 0.26486817903126914, "grad_norm": 0.5570634007453918, "learning_rate": 8.509783833056373e-05, "loss": 1.828, "step": 4752 }, { "epoch": 0.2649239172844323, "grad_norm": 0.5177022814750671, "learning_rate": 8.509149600280762e-05, "loss": 1.6537, "step": 4753 }, { "epoch": 0.26497965553759545, "grad_norm": 0.5529354810714722, "learning_rate": 8.508515256215389e-05, "loss": 1.6702, "step": 4754 }, { "epoch": 0.2650353937907586, "grad_norm": 0.6287319660186768, "learning_rate": 8.507880800880364e-05, "loss": 1.7545, "step": 4755 }, { "epoch": 0.26509113204392176, "grad_norm": 0.5878986716270447, "learning_rate": 8.507246234295814e-05, "loss": 1.9199, "step": 4756 }, { "epoch": 0.2651468702970849, "grad_norm": 0.560119092464447, "learning_rate": 8.506611556481862e-05, "loss": 1.645, "step": 4757 }, { "epoch": 0.265202608550248, "grad_norm": 0.5107282996177673, "learning_rate": 8.505976767458636e-05, "loss": 1.8503, "step": 4758 }, { "epoch": 0.2652583468034112, "grad_norm": 0.5514339208602905, "learning_rate": 8.50534186724627e-05, "loss": 1.6562, "step": 4759 }, { "epoch": 0.26531408505657433, "grad_norm": 0.541807234287262, "learning_rate": 8.504706855864897e-05, "loss": 1.7167, "step": 4760 }, { "epoch": 0.26536982330973746, "grad_norm": 0.5748420357704163, "learning_rate": 8.504071733334656e-05, "loss": 1.955, "step": 4761 }, { "epoch": 0.2654255615629006, "grad_norm": 0.5451623201370239, "learning_rate": 8.503436499675687e-05, "loss": 1.7336, "step": 4762 }, { "epoch": 0.2654812998160638, "grad_norm": 0.5036576986312866, "learning_rate": 8.502801154908142e-05, "loss": 1.7619, "step": 4763 }, { "epoch": 0.2655370380692269, "grad_norm": 0.5252074003219604, "learning_rate": 8.502165699052168e-05, "loss": 1.6425, "step": 4764 }, { "epoch": 0.26559277632239003, "grad_norm": 0.5452297925949097, "learning_rate": 8.501530132127915e-05, "loss": 1.5942, "step": 4765 }, { "epoch": 0.2656485145755532, "grad_norm": 0.5282885432243347, "learning_rate": 8.500894454155541e-05, "loss": 1.4847, "step": 4766 }, { "epoch": 0.26570425282871635, "grad_norm": 0.6032153367996216, "learning_rate": 8.500258665155207e-05, "loss": 1.8069, "step": 4767 }, { "epoch": 0.2657599910818795, "grad_norm": 0.6232243776321411, "learning_rate": 8.499622765147078e-05, "loss": 1.9243, "step": 4768 }, { "epoch": 0.26581572933504266, "grad_norm": 0.5226832032203674, "learning_rate": 8.498986754151316e-05, "loss": 1.5832, "step": 4769 }, { "epoch": 0.2658714675882058, "grad_norm": 0.653657853603363, "learning_rate": 8.498350632188097e-05, "loss": 1.7387, "step": 4770 }, { "epoch": 0.2659272058413689, "grad_norm": 0.6087796688079834, "learning_rate": 8.497714399277592e-05, "loss": 1.7853, "step": 4771 }, { "epoch": 0.2659829440945321, "grad_norm": 0.5050531029701233, "learning_rate": 8.49707805543998e-05, "loss": 1.4848, "step": 4772 }, { "epoch": 0.26603868234769523, "grad_norm": 0.5245751738548279, "learning_rate": 8.496441600695441e-05, "loss": 1.615, "step": 4773 }, { "epoch": 0.26609442060085836, "grad_norm": 0.5427295565605164, "learning_rate": 8.495805035064159e-05, "loss": 1.8508, "step": 4774 }, { "epoch": 0.2661501588540215, "grad_norm": 0.5052759647369385, "learning_rate": 8.495168358566325e-05, "loss": 1.6307, "step": 4775 }, { "epoch": 0.2662058971071847, "grad_norm": 0.5618288516998291, "learning_rate": 8.494531571222128e-05, "loss": 1.7516, "step": 4776 }, { "epoch": 0.2662616353603478, "grad_norm": 0.5743941068649292, "learning_rate": 8.493894673051765e-05, "loss": 1.9439, "step": 4777 }, { "epoch": 0.26631737361351093, "grad_norm": 0.5246620178222656, "learning_rate": 8.493257664075433e-05, "loss": 1.7159, "step": 4778 }, { "epoch": 0.2663731118666741, "grad_norm": 0.5409666895866394, "learning_rate": 8.492620544313335e-05, "loss": 1.6972, "step": 4779 }, { "epoch": 0.26642885011983725, "grad_norm": 0.5137554407119751, "learning_rate": 8.491983313785676e-05, "loss": 1.6285, "step": 4780 }, { "epoch": 0.2664845883730004, "grad_norm": 0.6102763414382935, "learning_rate": 8.491345972512668e-05, "loss": 1.7433, "step": 4781 }, { "epoch": 0.26654032662616356, "grad_norm": 0.6035791039466858, "learning_rate": 8.490708520514519e-05, "loss": 1.8665, "step": 4782 }, { "epoch": 0.2665960648793267, "grad_norm": 0.5769240856170654, "learning_rate": 8.490070957811449e-05, "loss": 1.7147, "step": 4783 }, { "epoch": 0.2666518031324898, "grad_norm": 0.5191882252693176, "learning_rate": 8.489433284423678e-05, "loss": 1.5935, "step": 4784 }, { "epoch": 0.26670754138565295, "grad_norm": 0.575363039970398, "learning_rate": 8.488795500371427e-05, "loss": 1.8616, "step": 4785 }, { "epoch": 0.26676327963881613, "grad_norm": 0.5380163788795471, "learning_rate": 8.488157605674925e-05, "loss": 1.5693, "step": 4786 }, { "epoch": 0.26681901789197926, "grad_norm": 0.5527309775352478, "learning_rate": 8.487519600354399e-05, "loss": 1.797, "step": 4787 }, { "epoch": 0.2668747561451424, "grad_norm": 0.5432277321815491, "learning_rate": 8.486881484430085e-05, "loss": 1.7024, "step": 4788 }, { "epoch": 0.2669304943983056, "grad_norm": 0.5643296837806702, "learning_rate": 8.486243257922221e-05, "loss": 1.6602, "step": 4789 }, { "epoch": 0.2669862326514687, "grad_norm": 0.5539331436157227, "learning_rate": 8.485604920851049e-05, "loss": 1.7195, "step": 4790 }, { "epoch": 0.26704197090463183, "grad_norm": 0.5279936790466309, "learning_rate": 8.48496647323681e-05, "loss": 1.6503, "step": 4791 }, { "epoch": 0.267097709157795, "grad_norm": 0.5447912812232971, "learning_rate": 8.484327915099752e-05, "loss": 1.7975, "step": 4792 }, { "epoch": 0.26715344741095814, "grad_norm": 0.6047879457473755, "learning_rate": 8.48368924646013e-05, "loss": 1.8362, "step": 4793 }, { "epoch": 0.2672091856641213, "grad_norm": 0.5555823445320129, "learning_rate": 8.483050467338194e-05, "loss": 1.7033, "step": 4794 }, { "epoch": 0.26726492391728446, "grad_norm": 0.5324097871780396, "learning_rate": 8.482411577754205e-05, "loss": 1.828, "step": 4795 }, { "epoch": 0.2673206621704476, "grad_norm": 0.5133151412010193, "learning_rate": 8.481772577728426e-05, "loss": 1.6922, "step": 4796 }, { "epoch": 0.2673764004236107, "grad_norm": 0.5466338396072388, "learning_rate": 8.48113346728112e-05, "loss": 1.7228, "step": 4797 }, { "epoch": 0.26743213867677385, "grad_norm": 0.5190402269363403, "learning_rate": 8.480494246432557e-05, "loss": 1.7192, "step": 4798 }, { "epoch": 0.26748787692993703, "grad_norm": 0.4959962069988251, "learning_rate": 8.47985491520301e-05, "loss": 1.5593, "step": 4799 }, { "epoch": 0.26754361518310016, "grad_norm": 0.5530042052268982, "learning_rate": 8.479215473612754e-05, "loss": 1.7545, "step": 4800 }, { "epoch": 0.2675993534362633, "grad_norm": 0.6360591650009155, "learning_rate": 8.478575921682066e-05, "loss": 1.9369, "step": 4801 }, { "epoch": 0.26765509168942647, "grad_norm": 0.5604984164237976, "learning_rate": 8.477936259431235e-05, "loss": 1.6485, "step": 4802 }, { "epoch": 0.2677108299425896, "grad_norm": 0.568709671497345, "learning_rate": 8.477296486880541e-05, "loss": 1.6459, "step": 4803 }, { "epoch": 0.26776656819575273, "grad_norm": 0.6228764653205872, "learning_rate": 8.476656604050277e-05, "loss": 1.8825, "step": 4804 }, { "epoch": 0.2678223064489159, "grad_norm": 0.5803889036178589, "learning_rate": 8.476016610960736e-05, "loss": 1.8011, "step": 4805 }, { "epoch": 0.26787804470207904, "grad_norm": 0.5778336524963379, "learning_rate": 8.475376507632215e-05, "loss": 1.726, "step": 4806 }, { "epoch": 0.2679337829552422, "grad_norm": 0.5755890011787415, "learning_rate": 8.474736294085014e-05, "loss": 1.6394, "step": 4807 }, { "epoch": 0.2679895212084053, "grad_norm": 0.5545676350593567, "learning_rate": 8.474095970339436e-05, "loss": 1.7973, "step": 4808 }, { "epoch": 0.2680452594615685, "grad_norm": 0.5003368854522705, "learning_rate": 8.473455536415789e-05, "loss": 1.6653, "step": 4809 }, { "epoch": 0.2681009977147316, "grad_norm": 0.5292695164680481, "learning_rate": 8.472814992334386e-05, "loss": 1.7463, "step": 4810 }, { "epoch": 0.26815673596789474, "grad_norm": 0.604960560798645, "learning_rate": 8.472174338115537e-05, "loss": 1.9016, "step": 4811 }, { "epoch": 0.26821247422105793, "grad_norm": 0.5484800338745117, "learning_rate": 8.471533573779564e-05, "loss": 1.6117, "step": 4812 }, { "epoch": 0.26826821247422106, "grad_norm": 0.5383596420288086, "learning_rate": 8.470892699346786e-05, "loss": 1.6871, "step": 4813 }, { "epoch": 0.2683239507273842, "grad_norm": 0.5479928851127625, "learning_rate": 8.470251714837529e-05, "loss": 1.7255, "step": 4814 }, { "epoch": 0.26837968898054737, "grad_norm": 0.5112576484680176, "learning_rate": 8.46961062027212e-05, "loss": 1.414, "step": 4815 }, { "epoch": 0.2684354272337105, "grad_norm": 0.547825038433075, "learning_rate": 8.46896941567089e-05, "loss": 1.835, "step": 4816 }, { "epoch": 0.26849116548687363, "grad_norm": 0.5121808648109436, "learning_rate": 8.468328101054177e-05, "loss": 1.5269, "step": 4817 }, { "epoch": 0.2685469037400368, "grad_norm": 0.5761928558349609, "learning_rate": 8.467686676442318e-05, "loss": 1.7195, "step": 4818 }, { "epoch": 0.26860264199319994, "grad_norm": 0.547089159488678, "learning_rate": 8.467045141855656e-05, "loss": 1.6714, "step": 4819 }, { "epoch": 0.26865838024636307, "grad_norm": 0.5228059887886047, "learning_rate": 8.466403497314537e-05, "loss": 1.6444, "step": 4820 }, { "epoch": 0.2687141184995262, "grad_norm": 0.5589326620101929, "learning_rate": 8.465761742839307e-05, "loss": 1.9121, "step": 4821 }, { "epoch": 0.2687698567526894, "grad_norm": 0.5607814192771912, "learning_rate": 8.465119878450324e-05, "loss": 1.8351, "step": 4822 }, { "epoch": 0.2688255950058525, "grad_norm": 0.591454029083252, "learning_rate": 8.46447790416794e-05, "loss": 1.8308, "step": 4823 }, { "epoch": 0.26888133325901564, "grad_norm": 0.5167153477668762, "learning_rate": 8.463835820012517e-05, "loss": 1.6928, "step": 4824 }, { "epoch": 0.2689370715121788, "grad_norm": 0.5741368532180786, "learning_rate": 8.463193626004418e-05, "loss": 1.8407, "step": 4825 }, { "epoch": 0.26899280976534196, "grad_norm": 0.563448965549469, "learning_rate": 8.462551322164007e-05, "loss": 1.7246, "step": 4826 }, { "epoch": 0.2690485480185051, "grad_norm": 0.5690648555755615, "learning_rate": 8.461908908511657e-05, "loss": 1.7408, "step": 4827 }, { "epoch": 0.26910428627166827, "grad_norm": 0.5448554754257202, "learning_rate": 8.461266385067741e-05, "loss": 1.6012, "step": 4828 }, { "epoch": 0.2691600245248314, "grad_norm": 0.5054116249084473, "learning_rate": 8.460623751852637e-05, "loss": 1.6175, "step": 4829 }, { "epoch": 0.2692157627779945, "grad_norm": 0.5798751711845398, "learning_rate": 8.459981008886721e-05, "loss": 1.7742, "step": 4830 }, { "epoch": 0.26927150103115766, "grad_norm": 0.5339779257774353, "learning_rate": 8.459338156190384e-05, "loss": 1.6737, "step": 4831 }, { "epoch": 0.26932723928432084, "grad_norm": 0.5387359261512756, "learning_rate": 8.45869519378401e-05, "loss": 1.6606, "step": 4832 }, { "epoch": 0.26938297753748397, "grad_norm": 0.646202802658081, "learning_rate": 8.458052121687987e-05, "loss": 1.9741, "step": 4833 }, { "epoch": 0.2694387157906471, "grad_norm": 0.5640881061553955, "learning_rate": 8.457408939922715e-05, "loss": 1.7103, "step": 4834 }, { "epoch": 0.2694944540438103, "grad_norm": 0.567292332649231, "learning_rate": 8.456765648508589e-05, "loss": 1.7605, "step": 4835 }, { "epoch": 0.2695501922969734, "grad_norm": 0.6057398319244385, "learning_rate": 8.456122247466009e-05, "loss": 1.6074, "step": 4836 }, { "epoch": 0.26960593055013654, "grad_norm": 0.6216564178466797, "learning_rate": 8.455478736815385e-05, "loss": 1.6341, "step": 4837 }, { "epoch": 0.2696616688032997, "grad_norm": 0.53920978307724, "learning_rate": 8.454835116577122e-05, "loss": 1.792, "step": 4838 }, { "epoch": 0.26971740705646285, "grad_norm": 0.5827376842498779, "learning_rate": 8.45419138677163e-05, "loss": 1.5826, "step": 4839 }, { "epoch": 0.269773145309626, "grad_norm": 0.5303118228912354, "learning_rate": 8.453547547419329e-05, "loss": 1.7387, "step": 4840 }, { "epoch": 0.26982888356278917, "grad_norm": 0.5183376669883728, "learning_rate": 8.452903598540634e-05, "loss": 1.532, "step": 4841 }, { "epoch": 0.2698846218159523, "grad_norm": 0.5537537336349487, "learning_rate": 8.452259540155968e-05, "loss": 1.7955, "step": 4842 }, { "epoch": 0.2699403600691154, "grad_norm": 0.5679836273193359, "learning_rate": 8.451615372285758e-05, "loss": 1.7329, "step": 4843 }, { "epoch": 0.26999609832227855, "grad_norm": 0.5696743726730347, "learning_rate": 8.450971094950433e-05, "loss": 1.7294, "step": 4844 }, { "epoch": 0.27005183657544174, "grad_norm": 0.5818564295768738, "learning_rate": 8.450326708170426e-05, "loss": 2.0301, "step": 4845 }, { "epoch": 0.27010757482860487, "grad_norm": 0.5044540762901306, "learning_rate": 8.449682211966172e-05, "loss": 1.5171, "step": 4846 }, { "epoch": 0.270163313081768, "grad_norm": 0.5692309141159058, "learning_rate": 8.449037606358111e-05, "loss": 1.776, "step": 4847 }, { "epoch": 0.2702190513349312, "grad_norm": 0.5652437210083008, "learning_rate": 8.448392891366688e-05, "loss": 1.8956, "step": 4848 }, { "epoch": 0.2702747895880943, "grad_norm": 0.5531434416770935, "learning_rate": 8.447748067012345e-05, "loss": 1.7156, "step": 4849 }, { "epoch": 0.27033052784125744, "grad_norm": 0.5418469309806824, "learning_rate": 8.447103133315537e-05, "loss": 1.6983, "step": 4850 }, { "epoch": 0.2703862660944206, "grad_norm": 0.5276792049407959, "learning_rate": 8.446458090296716e-05, "loss": 1.6147, "step": 4851 }, { "epoch": 0.27044200434758375, "grad_norm": 0.5772181749343872, "learning_rate": 8.445812937976338e-05, "loss": 1.677, "step": 4852 }, { "epoch": 0.2704977426007469, "grad_norm": 0.5323836803436279, "learning_rate": 8.445167676374865e-05, "loss": 1.4833, "step": 4853 }, { "epoch": 0.27055348085391, "grad_norm": 0.5478299260139465, "learning_rate": 8.444522305512757e-05, "loss": 1.5832, "step": 4854 }, { "epoch": 0.2706092191070732, "grad_norm": 0.5325939655303955, "learning_rate": 8.443876825410488e-05, "loss": 1.4971, "step": 4855 }, { "epoch": 0.2706649573602363, "grad_norm": 0.5912976861000061, "learning_rate": 8.443231236088524e-05, "loss": 1.7624, "step": 4856 }, { "epoch": 0.27072069561339945, "grad_norm": 0.5368456244468689, "learning_rate": 8.44258553756734e-05, "loss": 1.5509, "step": 4857 }, { "epoch": 0.27077643386656264, "grad_norm": 0.5713909864425659, "learning_rate": 8.441939729867415e-05, "loss": 1.8286, "step": 4858 }, { "epoch": 0.27083217211972577, "grad_norm": 0.5259481072425842, "learning_rate": 8.44129381300923e-05, "loss": 1.7291, "step": 4859 }, { "epoch": 0.2708879103728889, "grad_norm": 0.5365427136421204, "learning_rate": 8.440647787013268e-05, "loss": 1.6051, "step": 4860 }, { "epoch": 0.2709436486260521, "grad_norm": 0.5223046541213989, "learning_rate": 8.44000165190002e-05, "loss": 1.5241, "step": 4861 }, { "epoch": 0.2709993868792152, "grad_norm": 0.5721556544303894, "learning_rate": 8.439355407689975e-05, "loss": 1.8138, "step": 4862 }, { "epoch": 0.27105512513237834, "grad_norm": 0.527158260345459, "learning_rate": 8.43870905440363e-05, "loss": 1.5114, "step": 4863 }, { "epoch": 0.2711108633855415, "grad_norm": 0.5364054441452026, "learning_rate": 8.438062592061485e-05, "loss": 1.5331, "step": 4864 }, { "epoch": 0.27116660163870465, "grad_norm": 0.5465856790542603, "learning_rate": 8.437416020684036e-05, "loss": 1.5122, "step": 4865 }, { "epoch": 0.2712223398918678, "grad_norm": 0.5655773282051086, "learning_rate": 8.436769340291794e-05, "loss": 1.8776, "step": 4866 }, { "epoch": 0.2712780781450309, "grad_norm": 0.5278435349464417, "learning_rate": 8.436122550905266e-05, "loss": 1.6388, "step": 4867 }, { "epoch": 0.2713338163981941, "grad_norm": 0.5141345262527466, "learning_rate": 8.435475652544967e-05, "loss": 1.5203, "step": 4868 }, { "epoch": 0.2713895546513572, "grad_norm": 0.5731988549232483, "learning_rate": 8.434828645231407e-05, "loss": 1.8796, "step": 4869 }, { "epoch": 0.27144529290452035, "grad_norm": 0.5262272357940674, "learning_rate": 8.434181528985112e-05, "loss": 1.711, "step": 4870 }, { "epoch": 0.27150103115768354, "grad_norm": 0.5410183668136597, "learning_rate": 8.4335343038266e-05, "loss": 1.5739, "step": 4871 }, { "epoch": 0.27155676941084667, "grad_norm": 0.5376774072647095, "learning_rate": 8.432886969776398e-05, "loss": 1.7037, "step": 4872 }, { "epoch": 0.2716125076640098, "grad_norm": 0.4998942017555237, "learning_rate": 8.432239526855036e-05, "loss": 1.566, "step": 4873 }, { "epoch": 0.271668245917173, "grad_norm": 0.562468945980072, "learning_rate": 8.431591975083049e-05, "loss": 1.7742, "step": 4874 }, { "epoch": 0.2717239841703361, "grad_norm": 0.5608972907066345, "learning_rate": 8.430944314480973e-05, "loss": 1.7467, "step": 4875 }, { "epoch": 0.27177972242349924, "grad_norm": 0.6075250506401062, "learning_rate": 8.430296545069345e-05, "loss": 1.5414, "step": 4876 }, { "epoch": 0.27183546067666237, "grad_norm": 0.5488311052322388, "learning_rate": 8.429648666868713e-05, "loss": 1.7401, "step": 4877 }, { "epoch": 0.27189119892982555, "grad_norm": 0.5740364193916321, "learning_rate": 8.429000679899619e-05, "loss": 1.6739, "step": 4878 }, { "epoch": 0.2719469371829887, "grad_norm": 0.5271220207214355, "learning_rate": 8.428352584182617e-05, "loss": 1.6982, "step": 4879 }, { "epoch": 0.2720026754361518, "grad_norm": 0.5354405045509338, "learning_rate": 8.42770437973826e-05, "loss": 1.6927, "step": 4880 }, { "epoch": 0.272058413689315, "grad_norm": 0.569052517414093, "learning_rate": 8.427056066587105e-05, "loss": 1.6674, "step": 4881 }, { "epoch": 0.2721141519424781, "grad_norm": 0.5651227831840515, "learning_rate": 8.426407644749711e-05, "loss": 1.8356, "step": 4882 }, { "epoch": 0.27216989019564125, "grad_norm": 0.5364747643470764, "learning_rate": 8.425759114246647e-05, "loss": 1.749, "step": 4883 }, { "epoch": 0.27222562844880444, "grad_norm": 0.48416903614997864, "learning_rate": 8.425110475098476e-05, "loss": 1.4771, "step": 4884 }, { "epoch": 0.27228136670196756, "grad_norm": 0.5686883926391602, "learning_rate": 8.42446172732577e-05, "loss": 1.6603, "step": 4885 }, { "epoch": 0.2723371049551307, "grad_norm": 0.5875502824783325, "learning_rate": 8.423812870949104e-05, "loss": 1.8797, "step": 4886 }, { "epoch": 0.2723928432082939, "grad_norm": 0.5201019644737244, "learning_rate": 8.423163905989055e-05, "loss": 1.649, "step": 4887 }, { "epoch": 0.272448581461457, "grad_norm": 0.566376268863678, "learning_rate": 8.422514832466206e-05, "loss": 1.7182, "step": 4888 }, { "epoch": 0.27250431971462014, "grad_norm": 0.5158393979072571, "learning_rate": 8.421865650401143e-05, "loss": 1.6317, "step": 4889 }, { "epoch": 0.27256005796778326, "grad_norm": 0.5439308881759644, "learning_rate": 8.421216359814451e-05, "loss": 1.7071, "step": 4890 }, { "epoch": 0.27261579622094645, "grad_norm": 0.5321268439292908, "learning_rate": 8.420566960726723e-05, "loss": 1.6561, "step": 4891 }, { "epoch": 0.2726715344741096, "grad_norm": 0.4758521616458893, "learning_rate": 8.419917453158554e-05, "loss": 1.5538, "step": 4892 }, { "epoch": 0.2727272727272727, "grad_norm": 0.4964730441570282, "learning_rate": 8.419267837130544e-05, "loss": 1.5957, "step": 4893 }, { "epoch": 0.2727830109804359, "grad_norm": 0.555168628692627, "learning_rate": 8.418618112663292e-05, "loss": 1.6552, "step": 4894 }, { "epoch": 0.272838749233599, "grad_norm": 0.5903061032295227, "learning_rate": 8.417968279777409e-05, "loss": 1.8649, "step": 4895 }, { "epoch": 0.27289448748676215, "grad_norm": 0.584933876991272, "learning_rate": 8.417318338493497e-05, "loss": 1.8317, "step": 4896 }, { "epoch": 0.27295022573992533, "grad_norm": 0.6088751554489136, "learning_rate": 8.416668288832173e-05, "loss": 1.775, "step": 4897 }, { "epoch": 0.27300596399308846, "grad_norm": 0.6300697326660156, "learning_rate": 8.41601813081405e-05, "loss": 1.9256, "step": 4898 }, { "epoch": 0.2730617022462516, "grad_norm": 0.5516534447669983, "learning_rate": 8.415367864459751e-05, "loss": 1.6553, "step": 4899 }, { "epoch": 0.2731174404994147, "grad_norm": 0.5985352993011475, "learning_rate": 8.414717489789894e-05, "loss": 1.8121, "step": 4900 }, { "epoch": 0.2731731787525779, "grad_norm": 0.5280508399009705, "learning_rate": 8.414067006825108e-05, "loss": 1.657, "step": 4901 }, { "epoch": 0.27322891700574103, "grad_norm": 0.6586048007011414, "learning_rate": 8.413416415586024e-05, "loss": 2.2447, "step": 4902 }, { "epoch": 0.27328465525890416, "grad_norm": 0.5527061223983765, "learning_rate": 8.412765716093272e-05, "loss": 1.5666, "step": 4903 }, { "epoch": 0.27334039351206735, "grad_norm": 0.5549877882003784, "learning_rate": 8.412114908367488e-05, "loss": 1.5972, "step": 4904 }, { "epoch": 0.2733961317652305, "grad_norm": 0.5879062414169312, "learning_rate": 8.411463992429314e-05, "loss": 1.8609, "step": 4905 }, { "epoch": 0.2734518700183936, "grad_norm": 0.5397518873214722, "learning_rate": 8.41081296829939e-05, "loss": 1.8211, "step": 4906 }, { "epoch": 0.2735076082715568, "grad_norm": 0.5364968776702881, "learning_rate": 8.410161835998369e-05, "loss": 1.7879, "step": 4907 }, { "epoch": 0.2735633465247199, "grad_norm": 0.5714520215988159, "learning_rate": 8.409510595546894e-05, "loss": 1.9543, "step": 4908 }, { "epoch": 0.27361908477788305, "grad_norm": 0.5671858787536621, "learning_rate": 8.408859246965623e-05, "loss": 1.8165, "step": 4909 }, { "epoch": 0.27367482303104623, "grad_norm": 0.6034393906593323, "learning_rate": 8.408207790275213e-05, "loss": 1.8084, "step": 4910 }, { "epoch": 0.27373056128420936, "grad_norm": 0.5954535007476807, "learning_rate": 8.407556225496322e-05, "loss": 1.782, "step": 4911 }, { "epoch": 0.2737862995373725, "grad_norm": 0.5597085952758789, "learning_rate": 8.406904552649614e-05, "loss": 1.7673, "step": 4912 }, { "epoch": 0.2738420377905356, "grad_norm": 0.7730258107185364, "learning_rate": 8.406252771755758e-05, "loss": 1.9742, "step": 4913 }, { "epoch": 0.2738977760436988, "grad_norm": 0.5349806547164917, "learning_rate": 8.405600882835425e-05, "loss": 1.6226, "step": 4914 }, { "epoch": 0.27395351429686193, "grad_norm": 0.5271722674369812, "learning_rate": 8.404948885909288e-05, "loss": 1.7948, "step": 4915 }, { "epoch": 0.27400925255002506, "grad_norm": 0.6604454517364502, "learning_rate": 8.404296780998022e-05, "loss": 1.5653, "step": 4916 }, { "epoch": 0.27406499080318825, "grad_norm": 0.5219733119010925, "learning_rate": 8.403644568122313e-05, "loss": 1.6596, "step": 4917 }, { "epoch": 0.2741207290563514, "grad_norm": 0.5320934653282166, "learning_rate": 8.402992247302842e-05, "loss": 1.7119, "step": 4918 }, { "epoch": 0.2741764673095145, "grad_norm": 0.5232207179069519, "learning_rate": 8.402339818560296e-05, "loss": 1.7161, "step": 4919 }, { "epoch": 0.2742322055626777, "grad_norm": 0.5363631844520569, "learning_rate": 8.401687281915371e-05, "loss": 1.7174, "step": 4920 }, { "epoch": 0.2742879438158408, "grad_norm": 0.5237067937850952, "learning_rate": 8.401034637388758e-05, "loss": 1.5517, "step": 4921 }, { "epoch": 0.27434368206900395, "grad_norm": 0.5529504418373108, "learning_rate": 8.400381885001155e-05, "loss": 1.7067, "step": 4922 }, { "epoch": 0.2743994203221671, "grad_norm": 0.5712334513664246, "learning_rate": 8.399729024773264e-05, "loss": 1.7333, "step": 4923 }, { "epoch": 0.27445515857533026, "grad_norm": 0.5530427098274231, "learning_rate": 8.39907605672579e-05, "loss": 1.7721, "step": 4924 }, { "epoch": 0.2745108968284934, "grad_norm": 0.5096892714500427, "learning_rate": 8.398422980879442e-05, "loss": 1.5788, "step": 4925 }, { "epoch": 0.2745666350816565, "grad_norm": 0.5875157713890076, "learning_rate": 8.39776979725493e-05, "loss": 1.7782, "step": 4926 }, { "epoch": 0.2746223733348197, "grad_norm": 0.5620753169059753, "learning_rate": 8.397116505872973e-05, "loss": 1.6911, "step": 4927 }, { "epoch": 0.27467811158798283, "grad_norm": 0.5037546157836914, "learning_rate": 8.396463106754285e-05, "loss": 1.7944, "step": 4928 }, { "epoch": 0.27473384984114596, "grad_norm": 0.5311979055404663, "learning_rate": 8.395809599919591e-05, "loss": 1.8542, "step": 4929 }, { "epoch": 0.27478958809430915, "grad_norm": 0.5294662714004517, "learning_rate": 8.395155985389615e-05, "loss": 1.582, "step": 4930 }, { "epoch": 0.2748453263474723, "grad_norm": 0.5880303382873535, "learning_rate": 8.394502263185087e-05, "loss": 1.8807, "step": 4931 }, { "epoch": 0.2749010646006354, "grad_norm": 0.5946251153945923, "learning_rate": 8.393848433326736e-05, "loss": 1.8139, "step": 4932 }, { "epoch": 0.2749568028537986, "grad_norm": 0.5572118759155273, "learning_rate": 8.393194495835304e-05, "loss": 1.9141, "step": 4933 }, { "epoch": 0.2750125411069617, "grad_norm": 0.5573039054870605, "learning_rate": 8.392540450731522e-05, "loss": 1.7951, "step": 4934 }, { "epoch": 0.27506827936012485, "grad_norm": 0.540758490562439, "learning_rate": 8.39188629803614e-05, "loss": 1.7804, "step": 4935 }, { "epoch": 0.275124017613288, "grad_norm": 0.5271297693252563, "learning_rate": 8.3912320377699e-05, "loss": 1.82, "step": 4936 }, { "epoch": 0.27517975586645116, "grad_norm": 0.5359855890274048, "learning_rate": 8.390577669953552e-05, "loss": 1.7678, "step": 4937 }, { "epoch": 0.2752354941196143, "grad_norm": 0.5025729537010193, "learning_rate": 8.389923194607849e-05, "loss": 1.5144, "step": 4938 }, { "epoch": 0.2752912323727774, "grad_norm": 0.5402054190635681, "learning_rate": 8.389268611753546e-05, "loss": 1.6204, "step": 4939 }, { "epoch": 0.2753469706259406, "grad_norm": 0.5499907732009888, "learning_rate": 8.388613921411404e-05, "loss": 1.6948, "step": 4940 }, { "epoch": 0.27540270887910373, "grad_norm": 0.6044038534164429, "learning_rate": 8.387959123602185e-05, "loss": 1.5522, "step": 4941 }, { "epoch": 0.27545844713226686, "grad_norm": 0.5463374853134155, "learning_rate": 8.387304218346656e-05, "loss": 1.6392, "step": 4942 }, { "epoch": 0.27551418538543004, "grad_norm": 0.5164476633071899, "learning_rate": 8.386649205665586e-05, "loss": 1.674, "step": 4943 }, { "epoch": 0.2755699236385932, "grad_norm": 0.6093559861183167, "learning_rate": 8.385994085579751e-05, "loss": 2.0767, "step": 4944 }, { "epoch": 0.2756256618917563, "grad_norm": 0.5542387366294861, "learning_rate": 8.385338858109922e-05, "loss": 1.8275, "step": 4945 }, { "epoch": 0.27568140014491943, "grad_norm": 0.5787892937660217, "learning_rate": 8.384683523276885e-05, "loss": 1.5918, "step": 4946 }, { "epoch": 0.2757371383980826, "grad_norm": 0.5294553637504578, "learning_rate": 8.38402808110142e-05, "loss": 1.6857, "step": 4947 }, { "epoch": 0.27579287665124574, "grad_norm": 0.5397957563400269, "learning_rate": 8.383372531604314e-05, "loss": 1.6894, "step": 4948 }, { "epoch": 0.2758486149044089, "grad_norm": 0.5266357660293579, "learning_rate": 8.382716874806357e-05, "loss": 1.7214, "step": 4949 }, { "epoch": 0.27590435315757206, "grad_norm": 0.5046342611312866, "learning_rate": 8.382061110728345e-05, "loss": 1.4341, "step": 4950 }, { "epoch": 0.2759600914107352, "grad_norm": 0.5609323382377625, "learning_rate": 8.381405239391074e-05, "loss": 1.7528, "step": 4951 }, { "epoch": 0.2760158296638983, "grad_norm": 0.5804145336151123, "learning_rate": 8.38074926081534e-05, "loss": 1.8709, "step": 4952 }, { "epoch": 0.2760715679170615, "grad_norm": 0.5542110204696655, "learning_rate": 8.380093175021953e-05, "loss": 1.8472, "step": 4953 }, { "epoch": 0.27612730617022463, "grad_norm": 0.5371457934379578, "learning_rate": 8.379436982031718e-05, "loss": 1.5508, "step": 4954 }, { "epoch": 0.27618304442338776, "grad_norm": 0.6307567358016968, "learning_rate": 8.378780681865445e-05, "loss": 1.7762, "step": 4955 }, { "epoch": 0.27623878267655094, "grad_norm": 0.6115426421165466, "learning_rate": 8.37812427454395e-05, "loss": 1.8666, "step": 4956 }, { "epoch": 0.27629452092971407, "grad_norm": 0.5419024229049683, "learning_rate": 8.377467760088046e-05, "loss": 1.6681, "step": 4957 }, { "epoch": 0.2763502591828772, "grad_norm": 0.5587498545646667, "learning_rate": 8.376811138518558e-05, "loss": 1.8999, "step": 4958 }, { "epoch": 0.27640599743604033, "grad_norm": 0.6416218876838684, "learning_rate": 8.376154409856309e-05, "loss": 2.1091, "step": 4959 }, { "epoch": 0.2764617356892035, "grad_norm": 0.5992975234985352, "learning_rate": 8.375497574122127e-05, "loss": 1.837, "step": 4960 }, { "epoch": 0.27651747394236664, "grad_norm": 0.5807574987411499, "learning_rate": 8.374840631336842e-05, "loss": 1.643, "step": 4961 }, { "epoch": 0.27657321219552977, "grad_norm": 0.5473943948745728, "learning_rate": 8.374183581521288e-05, "loss": 1.6044, "step": 4962 }, { "epoch": 0.27662895044869296, "grad_norm": 0.5294444561004639, "learning_rate": 8.373526424696305e-05, "loss": 1.7088, "step": 4963 }, { "epoch": 0.2766846887018561, "grad_norm": 0.5424871444702148, "learning_rate": 8.372869160882733e-05, "loss": 1.5888, "step": 4964 }, { "epoch": 0.2767404269550192, "grad_norm": 0.5405928492546082, "learning_rate": 8.372211790101414e-05, "loss": 1.6905, "step": 4965 }, { "epoch": 0.2767961652081824, "grad_norm": 0.5668782591819763, "learning_rate": 8.3715543123732e-05, "loss": 1.7584, "step": 4966 }, { "epoch": 0.2768519034613455, "grad_norm": 0.586342990398407, "learning_rate": 8.370896727718942e-05, "loss": 1.7863, "step": 4967 }, { "epoch": 0.27690764171450866, "grad_norm": 0.6017349362373352, "learning_rate": 8.370239036159493e-05, "loss": 1.8825, "step": 4968 }, { "epoch": 0.2769633799676718, "grad_norm": 0.5821561813354492, "learning_rate": 8.36958123771571e-05, "loss": 1.9587, "step": 4969 }, { "epoch": 0.27701911822083497, "grad_norm": 0.5764045119285583, "learning_rate": 8.368923332408459e-05, "loss": 1.8635, "step": 4970 }, { "epoch": 0.2770748564739981, "grad_norm": 0.595043957233429, "learning_rate": 8.368265320258598e-05, "loss": 1.7843, "step": 4971 }, { "epoch": 0.27713059472716123, "grad_norm": 0.5718355774879456, "learning_rate": 8.367607201287002e-05, "loss": 1.6231, "step": 4972 }, { "epoch": 0.2771863329803244, "grad_norm": 0.5044475793838501, "learning_rate": 8.366948975514539e-05, "loss": 1.5014, "step": 4973 }, { "epoch": 0.27724207123348754, "grad_norm": 0.5001023411750793, "learning_rate": 8.366290642962087e-05, "loss": 1.522, "step": 4974 }, { "epoch": 0.27729780948665067, "grad_norm": 0.7615741491317749, "learning_rate": 8.36563220365052e-05, "loss": 1.5344, "step": 4975 }, { "epoch": 0.27735354773981385, "grad_norm": 0.47964903712272644, "learning_rate": 8.364973657600724e-05, "loss": 1.4201, "step": 4976 }, { "epoch": 0.277409285992977, "grad_norm": 0.5713698863983154, "learning_rate": 8.364315004833583e-05, "loss": 1.7664, "step": 4977 }, { "epoch": 0.2774650242461401, "grad_norm": 0.5541187524795532, "learning_rate": 8.363656245369984e-05, "loss": 1.75, "step": 4978 }, { "epoch": 0.2775207624993033, "grad_norm": 0.543755054473877, "learning_rate": 8.362997379230822e-05, "loss": 1.6432, "step": 4979 }, { "epoch": 0.2775765007524664, "grad_norm": 0.5810009241104126, "learning_rate": 8.36233840643699e-05, "loss": 1.948, "step": 4980 }, { "epoch": 0.27763223900562956, "grad_norm": 0.5693858861923218, "learning_rate": 8.361679327009388e-05, "loss": 1.8148, "step": 4981 }, { "epoch": 0.2776879772587927, "grad_norm": 0.5942829251289368, "learning_rate": 8.361020140968919e-05, "loss": 1.9087, "step": 4982 }, { "epoch": 0.27774371551195587, "grad_norm": 0.548213541507721, "learning_rate": 8.360360848336484e-05, "loss": 1.7628, "step": 4983 }, { "epoch": 0.277799453765119, "grad_norm": 0.5708996057510376, "learning_rate": 8.359701449132998e-05, "loss": 1.8127, "step": 4984 }, { "epoch": 0.2778551920182821, "grad_norm": 0.5608772039413452, "learning_rate": 8.359041943379369e-05, "loss": 1.5508, "step": 4985 }, { "epoch": 0.2779109302714453, "grad_norm": 0.5337716937065125, "learning_rate": 8.358382331096514e-05, "loss": 1.6666, "step": 4986 }, { "epoch": 0.27796666852460844, "grad_norm": 0.5663906335830688, "learning_rate": 8.357722612305353e-05, "loss": 1.8808, "step": 4987 }, { "epoch": 0.27802240677777157, "grad_norm": 0.5678949952125549, "learning_rate": 8.357062787026805e-05, "loss": 1.7122, "step": 4988 }, { "epoch": 0.27807814503093475, "grad_norm": 0.5173599720001221, "learning_rate": 8.356402855281802e-05, "loss": 1.6552, "step": 4989 }, { "epoch": 0.2781338832840979, "grad_norm": 0.5319927334785461, "learning_rate": 8.355742817091268e-05, "loss": 1.4913, "step": 4990 }, { "epoch": 0.278189621537261, "grad_norm": 0.5666325092315674, "learning_rate": 8.355082672476136e-05, "loss": 1.7334, "step": 4991 }, { "epoch": 0.27824535979042414, "grad_norm": 0.6288278698921204, "learning_rate": 8.354422421457346e-05, "loss": 2.005, "step": 4992 }, { "epoch": 0.2783010980435873, "grad_norm": 0.4918287992477417, "learning_rate": 8.353762064055833e-05, "loss": 1.6484, "step": 4993 }, { "epoch": 0.27835683629675045, "grad_norm": 0.6033855676651001, "learning_rate": 8.353101600292541e-05, "loss": 1.7403, "step": 4994 }, { "epoch": 0.2784125745499136, "grad_norm": 0.5309021472930908, "learning_rate": 8.352441030188417e-05, "loss": 1.6779, "step": 4995 }, { "epoch": 0.27846831280307677, "grad_norm": 0.5141871571540833, "learning_rate": 8.351780353764408e-05, "loss": 1.7298, "step": 4996 }, { "epoch": 0.2785240510562399, "grad_norm": 0.5200504064559937, "learning_rate": 8.351119571041468e-05, "loss": 1.594, "step": 4997 }, { "epoch": 0.278579789309403, "grad_norm": 0.5325762033462524, "learning_rate": 8.350458682040556e-05, "loss": 1.7623, "step": 4998 }, { "epoch": 0.2786355275625662, "grad_norm": 0.539318859577179, "learning_rate": 8.349797686782627e-05, "loss": 1.6779, "step": 4999 }, { "epoch": 0.27869126581572934, "grad_norm": 0.5733089447021484, "learning_rate": 8.349136585288648e-05, "loss": 1.8159, "step": 5000 }, { "epoch": 0.27874700406889247, "grad_norm": 0.5516615509986877, "learning_rate": 8.348475377579583e-05, "loss": 1.6049, "step": 5001 }, { "epoch": 0.27880274232205565, "grad_norm": 0.5449507236480713, "learning_rate": 8.3478140636764e-05, "loss": 1.661, "step": 5002 }, { "epoch": 0.2788584805752188, "grad_norm": 0.5257706642150879, "learning_rate": 8.347152643600076e-05, "loss": 1.6633, "step": 5003 }, { "epoch": 0.2789142188283819, "grad_norm": 0.5481857657432556, "learning_rate": 8.346491117371584e-05, "loss": 1.7599, "step": 5004 }, { "epoch": 0.27896995708154504, "grad_norm": 0.5461267232894897, "learning_rate": 8.345829485011906e-05, "loss": 1.6645, "step": 5005 }, { "epoch": 0.2790256953347082, "grad_norm": 0.5450317859649658, "learning_rate": 8.345167746542024e-05, "loss": 1.7965, "step": 5006 }, { "epoch": 0.27908143358787135, "grad_norm": 0.5598206520080566, "learning_rate": 8.344505901982926e-05, "loss": 1.8171, "step": 5007 }, { "epoch": 0.2791371718410345, "grad_norm": 0.5036829113960266, "learning_rate": 8.343843951355599e-05, "loss": 1.5853, "step": 5008 }, { "epoch": 0.27919291009419767, "grad_norm": 0.5530052185058594, "learning_rate": 8.34318189468104e-05, "loss": 1.8362, "step": 5009 }, { "epoch": 0.2792486483473608, "grad_norm": 0.5920783877372742, "learning_rate": 8.34251973198024e-05, "loss": 1.7712, "step": 5010 }, { "epoch": 0.2793043866005239, "grad_norm": 0.5592779517173767, "learning_rate": 8.341857463274204e-05, "loss": 1.729, "step": 5011 }, { "epoch": 0.2793601248536871, "grad_norm": 0.5464910864830017, "learning_rate": 8.341195088583934e-05, "loss": 1.9075, "step": 5012 }, { "epoch": 0.27941586310685024, "grad_norm": 0.5421869158744812, "learning_rate": 8.340532607930435e-05, "loss": 1.6845, "step": 5013 }, { "epoch": 0.27947160136001337, "grad_norm": 0.6448494791984558, "learning_rate": 8.339870021334721e-05, "loss": 1.677, "step": 5014 }, { "epoch": 0.2795273396131765, "grad_norm": 0.551950991153717, "learning_rate": 8.339207328817801e-05, "loss": 1.7604, "step": 5015 }, { "epoch": 0.2795830778663397, "grad_norm": 0.5297108292579651, "learning_rate": 8.338544530400694e-05, "loss": 1.8327, "step": 5016 }, { "epoch": 0.2796388161195028, "grad_norm": 0.5589694976806641, "learning_rate": 8.337881626104418e-05, "loss": 1.8363, "step": 5017 }, { "epoch": 0.27969455437266594, "grad_norm": 0.5295442342758179, "learning_rate": 8.337218615949999e-05, "loss": 1.5949, "step": 5018 }, { "epoch": 0.2797502926258291, "grad_norm": 0.5680721998214722, "learning_rate": 8.336555499958463e-05, "loss": 1.7101, "step": 5019 }, { "epoch": 0.27980603087899225, "grad_norm": 0.5222816467285156, "learning_rate": 8.33589227815084e-05, "loss": 1.6419, "step": 5020 }, { "epoch": 0.2798617691321554, "grad_norm": 0.5572875142097473, "learning_rate": 8.335228950548164e-05, "loss": 1.5752, "step": 5021 }, { "epoch": 0.27991750738531856, "grad_norm": 0.5234338641166687, "learning_rate": 8.334565517171471e-05, "loss": 1.608, "step": 5022 }, { "epoch": 0.2799732456384817, "grad_norm": 0.5773409008979797, "learning_rate": 8.333901978041801e-05, "loss": 1.8295, "step": 5023 }, { "epoch": 0.2800289838916448, "grad_norm": 0.6236357092857361, "learning_rate": 8.3332383331802e-05, "loss": 2.1082, "step": 5024 }, { "epoch": 0.280084722144808, "grad_norm": 0.5226585865020752, "learning_rate": 8.332574582607712e-05, "loss": 1.5637, "step": 5025 }, { "epoch": 0.28014046039797114, "grad_norm": 0.5552464723587036, "learning_rate": 8.331910726345389e-05, "loss": 1.565, "step": 5026 }, { "epoch": 0.28019619865113427, "grad_norm": 0.5889436602592468, "learning_rate": 8.331246764414282e-05, "loss": 1.6853, "step": 5027 }, { "epoch": 0.2802519369042974, "grad_norm": 0.5935594439506531, "learning_rate": 8.330582696835453e-05, "loss": 1.8281, "step": 5028 }, { "epoch": 0.2803076751574606, "grad_norm": 0.5328096747398376, "learning_rate": 8.329918523629958e-05, "loss": 1.5658, "step": 5029 }, { "epoch": 0.2803634134106237, "grad_norm": 0.5282544493675232, "learning_rate": 8.329254244818862e-05, "loss": 1.5369, "step": 5030 }, { "epoch": 0.28041915166378684, "grad_norm": 0.5771158337593079, "learning_rate": 8.328589860423234e-05, "loss": 1.718, "step": 5031 }, { "epoch": 0.28047488991695, "grad_norm": 0.5074672698974609, "learning_rate": 8.327925370464142e-05, "loss": 1.5096, "step": 5032 }, { "epoch": 0.28053062817011315, "grad_norm": 0.5818241834640503, "learning_rate": 8.32726077496266e-05, "loss": 1.8082, "step": 5033 }, { "epoch": 0.2805863664232763, "grad_norm": 0.5617592930793762, "learning_rate": 8.326596073939865e-05, "loss": 1.885, "step": 5034 }, { "epoch": 0.28064210467643946, "grad_norm": 0.5317988991737366, "learning_rate": 8.325931267416837e-05, "loss": 1.6933, "step": 5035 }, { "epoch": 0.2806978429296026, "grad_norm": 0.5429521799087524, "learning_rate": 8.325266355414663e-05, "loss": 1.7869, "step": 5036 }, { "epoch": 0.2807535811827657, "grad_norm": 0.5846121311187744, "learning_rate": 8.324601337954427e-05, "loss": 1.8213, "step": 5037 }, { "epoch": 0.28080931943592885, "grad_norm": 0.5202860236167908, "learning_rate": 8.323936215057219e-05, "loss": 1.5685, "step": 5038 }, { "epoch": 0.28086505768909203, "grad_norm": 0.5208321213722229, "learning_rate": 8.323270986744136e-05, "loss": 1.6801, "step": 5039 }, { "epoch": 0.28092079594225516, "grad_norm": 0.5601228475570679, "learning_rate": 8.322605653036273e-05, "loss": 1.7527, "step": 5040 }, { "epoch": 0.2809765341954183, "grad_norm": 0.5703938603401184, "learning_rate": 8.32194021395473e-05, "loss": 1.7583, "step": 5041 }, { "epoch": 0.2810322724485815, "grad_norm": 0.5135952234268188, "learning_rate": 8.321274669520613e-05, "loss": 1.6603, "step": 5042 }, { "epoch": 0.2810880107017446, "grad_norm": 0.5345764756202698, "learning_rate": 8.320609019755025e-05, "loss": 1.8041, "step": 5043 }, { "epoch": 0.28114374895490774, "grad_norm": 0.5866489410400391, "learning_rate": 8.319943264679082e-05, "loss": 1.8187, "step": 5044 }, { "epoch": 0.2811994872080709, "grad_norm": 0.5317565202713013, "learning_rate": 8.319277404313895e-05, "loss": 1.627, "step": 5045 }, { "epoch": 0.28125522546123405, "grad_norm": 0.5532716512680054, "learning_rate": 8.318611438680581e-05, "loss": 1.7922, "step": 5046 }, { "epoch": 0.2813109637143972, "grad_norm": 0.5880955457687378, "learning_rate": 8.317945367800262e-05, "loss": 1.9276, "step": 5047 }, { "epoch": 0.28136670196756036, "grad_norm": 0.5237969160079956, "learning_rate": 8.31727919169406e-05, "loss": 1.6415, "step": 5048 }, { "epoch": 0.2814224402207235, "grad_norm": 0.5675956010818481, "learning_rate": 8.316612910383104e-05, "loss": 1.7371, "step": 5049 }, { "epoch": 0.2814781784738866, "grad_norm": 0.5321084260940552, "learning_rate": 8.315946523888523e-05, "loss": 1.5045, "step": 5050 }, { "epoch": 0.28153391672704975, "grad_norm": 0.5198732614517212, "learning_rate": 8.31528003223145e-05, "loss": 1.7094, "step": 5051 }, { "epoch": 0.28158965498021293, "grad_norm": 0.5548423528671265, "learning_rate": 8.314613435433025e-05, "loss": 1.7824, "step": 5052 }, { "epoch": 0.28164539323337606, "grad_norm": 0.5975722074508667, "learning_rate": 8.313946733514388e-05, "loss": 1.6823, "step": 5053 }, { "epoch": 0.2817011314865392, "grad_norm": 0.5505688190460205, "learning_rate": 8.313279926496682e-05, "loss": 1.6891, "step": 5054 }, { "epoch": 0.2817568697397024, "grad_norm": 0.535331666469574, "learning_rate": 8.312613014401053e-05, "loss": 1.6879, "step": 5055 }, { "epoch": 0.2818126079928655, "grad_norm": 0.5429748296737671, "learning_rate": 8.311945997248656e-05, "loss": 1.7741, "step": 5056 }, { "epoch": 0.28186834624602863, "grad_norm": 0.5404984354972839, "learning_rate": 8.31127887506064e-05, "loss": 1.5888, "step": 5057 }, { "epoch": 0.2819240844991918, "grad_norm": 0.6144102811813354, "learning_rate": 8.310611647858164e-05, "loss": 1.8173, "step": 5058 }, { "epoch": 0.28197982275235495, "grad_norm": 0.5709677934646606, "learning_rate": 8.30994431566239e-05, "loss": 1.6492, "step": 5059 }, { "epoch": 0.2820355610055181, "grad_norm": 0.5943745374679565, "learning_rate": 8.309276878494481e-05, "loss": 1.9265, "step": 5060 }, { "epoch": 0.28209129925868126, "grad_norm": 0.5663633942604065, "learning_rate": 8.308609336375601e-05, "loss": 1.5966, "step": 5061 }, { "epoch": 0.2821470375118444, "grad_norm": 0.5235463380813599, "learning_rate": 8.307941689326926e-05, "loss": 1.6598, "step": 5062 }, { "epoch": 0.2822027757650075, "grad_norm": 0.5473840832710266, "learning_rate": 8.307273937369627e-05, "loss": 1.3741, "step": 5063 }, { "epoch": 0.28225851401817065, "grad_norm": 0.6380063891410828, "learning_rate": 8.30660608052488e-05, "loss": 1.7855, "step": 5064 }, { "epoch": 0.28231425227133383, "grad_norm": 0.5315070748329163, "learning_rate": 8.305938118813868e-05, "loss": 1.6285, "step": 5065 }, { "epoch": 0.28236999052449696, "grad_norm": 0.571528971195221, "learning_rate": 8.305270052257773e-05, "loss": 1.8315, "step": 5066 }, { "epoch": 0.2824257287776601, "grad_norm": 0.5939456820487976, "learning_rate": 8.304601880877784e-05, "loss": 1.8598, "step": 5067 }, { "epoch": 0.2824814670308233, "grad_norm": 0.5018705129623413, "learning_rate": 8.30393360469509e-05, "loss": 1.5472, "step": 5068 }, { "epoch": 0.2825372052839864, "grad_norm": 0.5844521522521973, "learning_rate": 8.303265223730885e-05, "loss": 1.8186, "step": 5069 }, { "epoch": 0.28259294353714953, "grad_norm": 0.5360279083251953, "learning_rate": 8.302596738006367e-05, "loss": 1.7101, "step": 5070 }, { "epoch": 0.2826486817903127, "grad_norm": 0.5614787340164185, "learning_rate": 8.301928147542736e-05, "loss": 1.6207, "step": 5071 }, { "epoch": 0.28270442004347585, "grad_norm": 0.5616874098777771, "learning_rate": 8.301259452361197e-05, "loss": 1.7829, "step": 5072 }, { "epoch": 0.282760158296639, "grad_norm": 0.6129429340362549, "learning_rate": 8.300590652482954e-05, "loss": 1.844, "step": 5073 }, { "epoch": 0.2828158965498021, "grad_norm": 0.5966079831123352, "learning_rate": 8.29992174792922e-05, "loss": 1.9242, "step": 5074 }, { "epoch": 0.2828716348029653, "grad_norm": 0.5461622476577759, "learning_rate": 8.299252738721206e-05, "loss": 1.7337, "step": 5075 }, { "epoch": 0.2829273730561284, "grad_norm": 0.5274501442909241, "learning_rate": 8.298583624880135e-05, "loss": 1.6531, "step": 5076 }, { "epoch": 0.28298311130929155, "grad_norm": 0.6280329823493958, "learning_rate": 8.29791440642722e-05, "loss": 1.6198, "step": 5077 }, { "epoch": 0.28303884956245473, "grad_norm": 0.5429005026817322, "learning_rate": 8.297245083383689e-05, "loss": 1.7574, "step": 5078 }, { "epoch": 0.28309458781561786, "grad_norm": 0.586188018321991, "learning_rate": 8.296575655770768e-05, "loss": 1.7325, "step": 5079 }, { "epoch": 0.283150326068781, "grad_norm": 0.48814016580581665, "learning_rate": 8.295906123609688e-05, "loss": 1.6964, "step": 5080 }, { "epoch": 0.2832060643219442, "grad_norm": 0.518273651599884, "learning_rate": 8.295236486921685e-05, "loss": 1.6128, "step": 5081 }, { "epoch": 0.2832618025751073, "grad_norm": 0.5701366066932678, "learning_rate": 8.29456674572799e-05, "loss": 1.8898, "step": 5082 }, { "epoch": 0.28331754082827043, "grad_norm": 0.522463858127594, "learning_rate": 8.293896900049846e-05, "loss": 1.513, "step": 5083 }, { "epoch": 0.2833732790814336, "grad_norm": 0.5641170144081116, "learning_rate": 8.293226949908499e-05, "loss": 1.658, "step": 5084 }, { "epoch": 0.28342901733459674, "grad_norm": 0.5498567223548889, "learning_rate": 8.292556895325194e-05, "loss": 1.6148, "step": 5085 }, { "epoch": 0.2834847555877599, "grad_norm": 0.5941603183746338, "learning_rate": 8.29188673632118e-05, "loss": 1.7469, "step": 5086 }, { "epoch": 0.283540493840923, "grad_norm": 0.5746224522590637, "learning_rate": 8.291216472917714e-05, "loss": 1.6819, "step": 5087 }, { "epoch": 0.2835962320940862, "grad_norm": 0.6701369285583496, "learning_rate": 8.290546105136048e-05, "loss": 1.3384, "step": 5088 }, { "epoch": 0.2836519703472493, "grad_norm": 0.5807752013206482, "learning_rate": 8.289875632997446e-05, "loss": 1.6534, "step": 5089 }, { "epoch": 0.28370770860041244, "grad_norm": 0.5432621240615845, "learning_rate": 8.289205056523168e-05, "loss": 1.6963, "step": 5090 }, { "epoch": 0.28376344685357563, "grad_norm": 0.5509108901023865, "learning_rate": 8.288534375734486e-05, "loss": 1.6027, "step": 5091 }, { "epoch": 0.28381918510673876, "grad_norm": 0.5456513166427612, "learning_rate": 8.287863590652666e-05, "loss": 1.6362, "step": 5092 }, { "epoch": 0.2838749233599019, "grad_norm": 0.5441727042198181, "learning_rate": 8.287192701298982e-05, "loss": 1.5781, "step": 5093 }, { "epoch": 0.28393066161306507, "grad_norm": 0.5558503866195679, "learning_rate": 8.286521707694712e-05, "loss": 1.8077, "step": 5094 }, { "epoch": 0.2839863998662282, "grad_norm": 0.5933700799942017, "learning_rate": 8.285850609861134e-05, "loss": 1.8407, "step": 5095 }, { "epoch": 0.28404213811939133, "grad_norm": 0.557685375213623, "learning_rate": 8.285179407819534e-05, "loss": 1.579, "step": 5096 }, { "epoch": 0.28409787637255446, "grad_norm": 0.5183169841766357, "learning_rate": 8.284508101591198e-05, "loss": 1.3955, "step": 5097 }, { "epoch": 0.28415361462571764, "grad_norm": 0.5807473659515381, "learning_rate": 8.283836691197413e-05, "loss": 1.8429, "step": 5098 }, { "epoch": 0.28420935287888077, "grad_norm": 0.6236990690231323, "learning_rate": 8.283165176659474e-05, "loss": 1.8281, "step": 5099 }, { "epoch": 0.2842650911320439, "grad_norm": 0.5581399202346802, "learning_rate": 8.282493557998678e-05, "loss": 1.764, "step": 5100 }, { "epoch": 0.2843208293852071, "grad_norm": 0.5508102774620056, "learning_rate": 8.281821835236325e-05, "loss": 1.8694, "step": 5101 }, { "epoch": 0.2843765676383702, "grad_norm": 0.6012663841247559, "learning_rate": 8.281150008393718e-05, "loss": 1.8829, "step": 5102 }, { "epoch": 0.28443230589153334, "grad_norm": 0.5453019738197327, "learning_rate": 8.280478077492163e-05, "loss": 1.8996, "step": 5103 }, { "epoch": 0.28448804414469653, "grad_norm": 0.5334420204162598, "learning_rate": 8.27980604255297e-05, "loss": 1.7342, "step": 5104 }, { "epoch": 0.28454378239785966, "grad_norm": 0.5454635620117188, "learning_rate": 8.279133903597451e-05, "loss": 1.7496, "step": 5105 }, { "epoch": 0.2845995206510228, "grad_norm": 0.5557402968406677, "learning_rate": 8.278461660646925e-05, "loss": 1.63, "step": 5106 }, { "epoch": 0.28465525890418597, "grad_norm": 0.5542622208595276, "learning_rate": 8.27778931372271e-05, "loss": 1.6639, "step": 5107 }, { "epoch": 0.2847109971573491, "grad_norm": 0.565591037273407, "learning_rate": 8.277116862846126e-05, "loss": 1.9303, "step": 5108 }, { "epoch": 0.28476673541051223, "grad_norm": 0.6099279522895813, "learning_rate": 8.276444308038504e-05, "loss": 1.7833, "step": 5109 }, { "epoch": 0.28482247366367536, "grad_norm": 0.6192046999931335, "learning_rate": 8.27577164932117e-05, "loss": 1.9167, "step": 5110 }, { "epoch": 0.28487821191683854, "grad_norm": 0.5659559965133667, "learning_rate": 8.275098886715462e-05, "loss": 1.7716, "step": 5111 }, { "epoch": 0.28493395017000167, "grad_norm": 0.6038410067558289, "learning_rate": 8.274426020242709e-05, "loss": 1.9078, "step": 5112 }, { "epoch": 0.2849896884231648, "grad_norm": 0.5924156904220581, "learning_rate": 8.273753049924256e-05, "loss": 1.7014, "step": 5113 }, { "epoch": 0.285045426676328, "grad_norm": 0.5436737537384033, "learning_rate": 8.273079975781442e-05, "loss": 1.6482, "step": 5114 }, { "epoch": 0.2851011649294911, "grad_norm": 0.5460022687911987, "learning_rate": 8.272406797835614e-05, "loss": 1.7304, "step": 5115 }, { "epoch": 0.28515690318265424, "grad_norm": 0.5954405069351196, "learning_rate": 8.271733516108125e-05, "loss": 1.6698, "step": 5116 }, { "epoch": 0.2852126414358174, "grad_norm": 0.638888418674469, "learning_rate": 8.27106013062032e-05, "loss": 2.0553, "step": 5117 }, { "epoch": 0.28526837968898056, "grad_norm": 0.5477131605148315, "learning_rate": 8.270386641393564e-05, "loss": 1.5031, "step": 5118 }, { "epoch": 0.2853241179421437, "grad_norm": 0.5998544692993164, "learning_rate": 8.269713048449208e-05, "loss": 1.9087, "step": 5119 }, { "epoch": 0.2853798561953068, "grad_norm": 0.5584544539451599, "learning_rate": 8.26903935180862e-05, "loss": 1.8125, "step": 5120 }, { "epoch": 0.28543559444847, "grad_norm": 0.5390369892120361, "learning_rate": 8.268365551493161e-05, "loss": 1.6459, "step": 5121 }, { "epoch": 0.2854913327016331, "grad_norm": 0.5171942710876465, "learning_rate": 8.267691647524206e-05, "loss": 1.6801, "step": 5122 }, { "epoch": 0.28554707095479626, "grad_norm": 0.4894436299800873, "learning_rate": 8.26701763992312e-05, "loss": 1.4172, "step": 5123 }, { "epoch": 0.28560280920795944, "grad_norm": 0.5318630337715149, "learning_rate": 8.266343528711285e-05, "loss": 1.6956, "step": 5124 }, { "epoch": 0.28565854746112257, "grad_norm": 0.513378918170929, "learning_rate": 8.265669313910077e-05, "loss": 1.5235, "step": 5125 }, { "epoch": 0.2857142857142857, "grad_norm": 0.6027741432189941, "learning_rate": 8.264994995540878e-05, "loss": 1.9089, "step": 5126 }, { "epoch": 0.2857700239674489, "grad_norm": 0.5300361514091492, "learning_rate": 8.264320573625075e-05, "loss": 1.6013, "step": 5127 }, { "epoch": 0.285825762220612, "grad_norm": 0.5484519600868225, "learning_rate": 8.263646048184055e-05, "loss": 1.6596, "step": 5128 }, { "epoch": 0.28588150047377514, "grad_norm": 0.6186813116073608, "learning_rate": 8.26297141923921e-05, "loss": 1.7786, "step": 5129 }, { "epoch": 0.2859372387269383, "grad_norm": 0.5475611686706543, "learning_rate": 8.262296686811936e-05, "loss": 1.6151, "step": 5130 }, { "epoch": 0.28599297698010145, "grad_norm": 0.612417995929718, "learning_rate": 8.261621850923634e-05, "loss": 1.587, "step": 5131 }, { "epoch": 0.2860487152332646, "grad_norm": 0.5619268417358398, "learning_rate": 8.260946911595701e-05, "loss": 1.6915, "step": 5132 }, { "epoch": 0.2861044534864277, "grad_norm": 0.5510770678520203, "learning_rate": 8.260271868849547e-05, "loss": 1.9188, "step": 5133 }, { "epoch": 0.2861601917395909, "grad_norm": 0.5569331049919128, "learning_rate": 8.259596722706575e-05, "loss": 1.7657, "step": 5134 }, { "epoch": 0.286215929992754, "grad_norm": 0.48364466428756714, "learning_rate": 8.258921473188202e-05, "loss": 1.3247, "step": 5135 }, { "epoch": 0.28627166824591715, "grad_norm": 0.5114015936851501, "learning_rate": 8.25824612031584e-05, "loss": 1.6025, "step": 5136 }, { "epoch": 0.28632740649908034, "grad_norm": 0.5254806876182556, "learning_rate": 8.257570664110907e-05, "loss": 1.7264, "step": 5137 }, { "epoch": 0.28638314475224347, "grad_norm": 0.5384583473205566, "learning_rate": 8.256895104594828e-05, "loss": 1.802, "step": 5138 }, { "epoch": 0.2864388830054066, "grad_norm": 0.5924034118652344, "learning_rate": 8.256219441789022e-05, "loss": 1.9493, "step": 5139 }, { "epoch": 0.2864946212585698, "grad_norm": 0.5453627705574036, "learning_rate": 8.255543675714923e-05, "loss": 1.5655, "step": 5140 }, { "epoch": 0.2865503595117329, "grad_norm": 0.535179853439331, "learning_rate": 8.254867806393957e-05, "loss": 1.5492, "step": 5141 }, { "epoch": 0.28660609776489604, "grad_norm": 0.5418823957443237, "learning_rate": 8.254191833847564e-05, "loss": 1.7343, "step": 5142 }, { "epoch": 0.28666183601805917, "grad_norm": 0.5330826044082642, "learning_rate": 8.253515758097179e-05, "loss": 1.6551, "step": 5143 }, { "epoch": 0.28671757427122235, "grad_norm": 0.6033239960670471, "learning_rate": 8.252839579164243e-05, "loss": 1.8227, "step": 5144 }, { "epoch": 0.2867733125243855, "grad_norm": 0.5882185697555542, "learning_rate": 8.252163297070201e-05, "loss": 1.9731, "step": 5145 }, { "epoch": 0.2868290507775486, "grad_norm": 0.537185788154602, "learning_rate": 8.251486911836501e-05, "loss": 1.5992, "step": 5146 }, { "epoch": 0.2868847890307118, "grad_norm": 0.5307870507240295, "learning_rate": 8.250810423484592e-05, "loss": 1.5641, "step": 5147 }, { "epoch": 0.2869405272838749, "grad_norm": 0.5483027696609497, "learning_rate": 8.25013383203593e-05, "loss": 1.759, "step": 5148 }, { "epoch": 0.28699626553703805, "grad_norm": 0.5503141283988953, "learning_rate": 8.249457137511976e-05, "loss": 1.7229, "step": 5149 }, { "epoch": 0.28705200379020124, "grad_norm": 0.5450831651687622, "learning_rate": 8.248780339934183e-05, "loss": 1.6758, "step": 5150 }, { "epoch": 0.28710774204336437, "grad_norm": 0.5555149912834167, "learning_rate": 8.248103439324022e-05, "loss": 1.7173, "step": 5151 }, { "epoch": 0.2871634802965275, "grad_norm": 0.5960267186164856, "learning_rate": 8.247426435702956e-05, "loss": 1.8327, "step": 5152 }, { "epoch": 0.2872192185496907, "grad_norm": 0.5497944951057434, "learning_rate": 8.246749329092458e-05, "loss": 1.6373, "step": 5153 }, { "epoch": 0.2872749568028538, "grad_norm": 0.6035077571868896, "learning_rate": 8.246072119514e-05, "loss": 2.0384, "step": 5154 }, { "epoch": 0.28733069505601694, "grad_norm": 0.5685641765594482, "learning_rate": 8.245394806989062e-05, "loss": 1.9093, "step": 5155 }, { "epoch": 0.28738643330918007, "grad_norm": 0.5542479753494263, "learning_rate": 8.244717391539124e-05, "loss": 1.6794, "step": 5156 }, { "epoch": 0.28744217156234325, "grad_norm": 0.5434539318084717, "learning_rate": 8.244039873185664e-05, "loss": 1.6624, "step": 5157 }, { "epoch": 0.2874979098155064, "grad_norm": 0.5240741968154907, "learning_rate": 8.243362251950177e-05, "loss": 1.7119, "step": 5158 }, { "epoch": 0.2875536480686695, "grad_norm": 0.5400795340538025, "learning_rate": 8.242684527854148e-05, "loss": 1.7379, "step": 5159 }, { "epoch": 0.2876093863218327, "grad_norm": 0.5450997352600098, "learning_rate": 8.242006700919072e-05, "loss": 1.648, "step": 5160 }, { "epoch": 0.2876651245749958, "grad_norm": 0.5497955679893494, "learning_rate": 8.241328771166446e-05, "loss": 1.8969, "step": 5161 }, { "epoch": 0.28772086282815895, "grad_norm": 0.556607186794281, "learning_rate": 8.24065073861777e-05, "loss": 1.7941, "step": 5162 }, { "epoch": 0.28777660108132214, "grad_norm": 0.5775546431541443, "learning_rate": 8.239972603294546e-05, "loss": 1.7996, "step": 5163 }, { "epoch": 0.28783233933448527, "grad_norm": 0.5500494241714478, "learning_rate": 8.239294365218282e-05, "loss": 1.486, "step": 5164 }, { "epoch": 0.2878880775876484, "grad_norm": 0.5263432860374451, "learning_rate": 8.238616024410486e-05, "loss": 1.8011, "step": 5165 }, { "epoch": 0.2879438158408115, "grad_norm": 0.580796480178833, "learning_rate": 8.237937580892674e-05, "loss": 1.7308, "step": 5166 }, { "epoch": 0.2879995540939747, "grad_norm": 0.5561580657958984, "learning_rate": 8.237259034686359e-05, "loss": 1.7732, "step": 5167 }, { "epoch": 0.28805529234713784, "grad_norm": 0.5456521511077881, "learning_rate": 8.236580385813062e-05, "loss": 1.6932, "step": 5168 }, { "epoch": 0.28811103060030097, "grad_norm": 0.5676544904708862, "learning_rate": 8.235901634294306e-05, "loss": 1.8033, "step": 5169 }, { "epoch": 0.28816676885346415, "grad_norm": 0.5046932697296143, "learning_rate": 8.235222780151616e-05, "loss": 1.5637, "step": 5170 }, { "epoch": 0.2882225071066273, "grad_norm": 0.5261063575744629, "learning_rate": 8.234543823406525e-05, "loss": 1.5763, "step": 5171 }, { "epoch": 0.2882782453597904, "grad_norm": 0.5619118809700012, "learning_rate": 8.23386476408056e-05, "loss": 1.7251, "step": 5172 }, { "epoch": 0.2883339836129536, "grad_norm": 0.5556089282035828, "learning_rate": 8.233185602195259e-05, "loss": 1.7168, "step": 5173 }, { "epoch": 0.2883897218661167, "grad_norm": 0.5449663400650024, "learning_rate": 8.232506337772163e-05, "loss": 1.7282, "step": 5174 }, { "epoch": 0.28844546011927985, "grad_norm": 0.5821020007133484, "learning_rate": 8.231826970832812e-05, "loss": 2.0267, "step": 5175 }, { "epoch": 0.28850119837244304, "grad_norm": 0.5104268193244934, "learning_rate": 8.231147501398753e-05, "loss": 1.4387, "step": 5176 }, { "epoch": 0.28855693662560616, "grad_norm": 0.548219621181488, "learning_rate": 8.230467929491534e-05, "loss": 1.7042, "step": 5177 }, { "epoch": 0.2886126748787693, "grad_norm": 0.5711565017700195, "learning_rate": 8.229788255132706e-05, "loss": 1.6752, "step": 5178 }, { "epoch": 0.2886684131319324, "grad_norm": 0.526942789554596, "learning_rate": 8.229108478343827e-05, "loss": 1.5905, "step": 5179 }, { "epoch": 0.2887241513850956, "grad_norm": 0.5535737872123718, "learning_rate": 8.228428599146453e-05, "loss": 1.6857, "step": 5180 }, { "epoch": 0.28877988963825874, "grad_norm": 0.5093039870262146, "learning_rate": 8.227748617562147e-05, "loss": 1.6489, "step": 5181 }, { "epoch": 0.28883562789142186, "grad_norm": 0.5642322301864624, "learning_rate": 8.227068533612475e-05, "loss": 1.8709, "step": 5182 }, { "epoch": 0.28889136614458505, "grad_norm": 0.5547685623168945, "learning_rate": 8.226388347319004e-05, "loss": 1.7088, "step": 5183 }, { "epoch": 0.2889471043977482, "grad_norm": 0.5316441059112549, "learning_rate": 8.225708058703305e-05, "loss": 1.59, "step": 5184 }, { "epoch": 0.2890028426509113, "grad_norm": 0.5305221080780029, "learning_rate": 8.225027667786955e-05, "loss": 1.4301, "step": 5185 }, { "epoch": 0.2890585809040745, "grad_norm": 0.5498524904251099, "learning_rate": 8.224347174591529e-05, "loss": 1.533, "step": 5186 }, { "epoch": 0.2891143191572376, "grad_norm": 0.5519589781761169, "learning_rate": 8.22366657913861e-05, "loss": 1.7171, "step": 5187 }, { "epoch": 0.28917005741040075, "grad_norm": 0.5893858075141907, "learning_rate": 8.222985881449783e-05, "loss": 1.7751, "step": 5188 }, { "epoch": 0.2892257956635639, "grad_norm": 0.5334852933883667, "learning_rate": 8.222305081546635e-05, "loss": 1.6905, "step": 5189 }, { "epoch": 0.28928153391672706, "grad_norm": 0.5692505836486816, "learning_rate": 8.221624179450757e-05, "loss": 1.6461, "step": 5190 }, { "epoch": 0.2893372721698902, "grad_norm": 0.5988993644714355, "learning_rate": 8.220943175183743e-05, "loss": 2.0131, "step": 5191 }, { "epoch": 0.2893930104230533, "grad_norm": 0.6873819231987, "learning_rate": 8.220262068767191e-05, "loss": 1.977, "step": 5192 }, { "epoch": 0.2894487486762165, "grad_norm": 0.5408362746238708, "learning_rate": 8.219580860222701e-05, "loss": 1.6866, "step": 5193 }, { "epoch": 0.28950448692937963, "grad_norm": 0.8928006291389465, "learning_rate": 8.218899549571878e-05, "loss": 1.6639, "step": 5194 }, { "epoch": 0.28956022518254276, "grad_norm": 0.5256812572479248, "learning_rate": 8.218218136836331e-05, "loss": 1.435, "step": 5195 }, { "epoch": 0.28961596343570595, "grad_norm": 0.5350750684738159, "learning_rate": 8.217536622037667e-05, "loss": 1.6317, "step": 5196 }, { "epoch": 0.2896717016888691, "grad_norm": 0.5534375309944153, "learning_rate": 8.2168550051975e-05, "loss": 1.7473, "step": 5197 }, { "epoch": 0.2897274399420322, "grad_norm": 0.5433312058448792, "learning_rate": 8.216173286337448e-05, "loss": 1.8094, "step": 5198 }, { "epoch": 0.2897831781951954, "grad_norm": 0.5386417508125305, "learning_rate": 8.215491465479133e-05, "loss": 1.5757, "step": 5199 }, { "epoch": 0.2898389164483585, "grad_norm": 0.6519530415534973, "learning_rate": 8.214809542644173e-05, "loss": 1.9404, "step": 5200 }, { "epoch": 0.28989465470152165, "grad_norm": 0.6092321872711182, "learning_rate": 8.214127517854199e-05, "loss": 1.8751, "step": 5201 }, { "epoch": 0.2899503929546848, "grad_norm": 0.5904344320297241, "learning_rate": 8.213445391130841e-05, "loss": 1.8278, "step": 5202 }, { "epoch": 0.29000613120784796, "grad_norm": 0.6538552045822144, "learning_rate": 8.212763162495729e-05, "loss": 1.683, "step": 5203 }, { "epoch": 0.2900618694610111, "grad_norm": 0.5683111548423767, "learning_rate": 8.212080831970503e-05, "loss": 1.6758, "step": 5204 }, { "epoch": 0.2901176077141742, "grad_norm": 0.5633412599563599, "learning_rate": 8.2113983995768e-05, "loss": 1.7229, "step": 5205 }, { "epoch": 0.2901733459673374, "grad_norm": 0.5722443461418152, "learning_rate": 8.210715865336263e-05, "loss": 1.8076, "step": 5206 }, { "epoch": 0.29022908422050053, "grad_norm": 0.562892496585846, "learning_rate": 8.21003322927054e-05, "loss": 1.672, "step": 5207 }, { "epoch": 0.29028482247366366, "grad_norm": 0.5266914367675781, "learning_rate": 8.209350491401277e-05, "loss": 1.6009, "step": 5208 }, { "epoch": 0.29034056072682685, "grad_norm": 0.576404869556427, "learning_rate": 8.20866765175013e-05, "loss": 1.8675, "step": 5209 }, { "epoch": 0.29039629897999, "grad_norm": 0.6091673374176025, "learning_rate": 8.207984710338752e-05, "loss": 1.7122, "step": 5210 }, { "epoch": 0.2904520372331531, "grad_norm": 0.590103030204773, "learning_rate": 8.207301667188803e-05, "loss": 1.5629, "step": 5211 }, { "epoch": 0.29050777548631623, "grad_norm": 0.5491459369659424, "learning_rate": 8.206618522321945e-05, "loss": 1.6373, "step": 5212 }, { "epoch": 0.2905635137394794, "grad_norm": 0.5361247062683105, "learning_rate": 8.205935275759842e-05, "loss": 1.7587, "step": 5213 }, { "epoch": 0.29061925199264255, "grad_norm": 0.5602622628211975, "learning_rate": 8.205251927524164e-05, "loss": 1.6596, "step": 5214 }, { "epoch": 0.2906749902458057, "grad_norm": 0.5763882994651794, "learning_rate": 8.204568477636585e-05, "loss": 1.7195, "step": 5215 }, { "epoch": 0.29073072849896886, "grad_norm": 0.5280525088310242, "learning_rate": 8.203884926118777e-05, "loss": 1.6929, "step": 5216 }, { "epoch": 0.290786466752132, "grad_norm": 0.5279143452644348, "learning_rate": 8.203201272992419e-05, "loss": 1.4884, "step": 5217 }, { "epoch": 0.2908422050052951, "grad_norm": 0.5360000729560852, "learning_rate": 8.202517518279193e-05, "loss": 1.6383, "step": 5218 }, { "epoch": 0.2908979432584583, "grad_norm": 0.5178120732307434, "learning_rate": 8.201833662000781e-05, "loss": 1.3916, "step": 5219 }, { "epoch": 0.29095368151162143, "grad_norm": 0.5441476702690125, "learning_rate": 8.201149704178875e-05, "loss": 1.8316, "step": 5220 }, { "epoch": 0.29100941976478456, "grad_norm": 0.5272539854049683, "learning_rate": 8.200465644835165e-05, "loss": 1.479, "step": 5221 }, { "epoch": 0.29106515801794774, "grad_norm": 0.5858429074287415, "learning_rate": 8.199781483991345e-05, "loss": 1.8735, "step": 5222 }, { "epoch": 0.2911208962711109, "grad_norm": 0.5939355492591858, "learning_rate": 8.19909722166911e-05, "loss": 1.8911, "step": 5223 }, { "epoch": 0.291176634524274, "grad_norm": 0.6942164301872253, "learning_rate": 8.198412857890166e-05, "loss": 1.5865, "step": 5224 }, { "epoch": 0.29123237277743713, "grad_norm": 0.5283763408660889, "learning_rate": 8.197728392676211e-05, "loss": 1.518, "step": 5225 }, { "epoch": 0.2912881110306003, "grad_norm": 0.5898897051811218, "learning_rate": 8.197043826048957e-05, "loss": 1.4729, "step": 5226 }, { "epoch": 0.29134384928376345, "grad_norm": 0.6161963939666748, "learning_rate": 8.196359158030113e-05, "loss": 1.7724, "step": 5227 }, { "epoch": 0.2913995875369266, "grad_norm": 0.5693463683128357, "learning_rate": 8.195674388641393e-05, "loss": 1.7379, "step": 5228 }, { "epoch": 0.29145532579008976, "grad_norm": 0.5397728681564331, "learning_rate": 8.194989517904513e-05, "loss": 1.694, "step": 5229 }, { "epoch": 0.2915110640432529, "grad_norm": 0.5856531858444214, "learning_rate": 8.194304545841193e-05, "loss": 1.7607, "step": 5230 }, { "epoch": 0.291566802296416, "grad_norm": 0.5777943730354309, "learning_rate": 8.19361947247316e-05, "loss": 1.7321, "step": 5231 }, { "epoch": 0.2916225405495792, "grad_norm": 0.5896830558776855, "learning_rate": 8.192934297822133e-05, "loss": 1.7183, "step": 5232 }, { "epoch": 0.29167827880274233, "grad_norm": 0.6119521260261536, "learning_rate": 8.192249021909847e-05, "loss": 1.9229, "step": 5233 }, { "epoch": 0.29173401705590546, "grad_norm": 0.5776544213294983, "learning_rate": 8.191563644758037e-05, "loss": 1.8151, "step": 5234 }, { "epoch": 0.2917897553090686, "grad_norm": 0.510097086429596, "learning_rate": 8.190878166388435e-05, "loss": 1.6619, "step": 5235 }, { "epoch": 0.2918454935622318, "grad_norm": 0.5378518104553223, "learning_rate": 8.19019258682278e-05, "loss": 1.8347, "step": 5236 }, { "epoch": 0.2919012318153949, "grad_norm": 0.5934120416641235, "learning_rate": 8.189506906082818e-05, "loss": 1.7583, "step": 5237 }, { "epoch": 0.29195697006855803, "grad_norm": 0.49861982464790344, "learning_rate": 8.188821124190293e-05, "loss": 1.4644, "step": 5238 }, { "epoch": 0.2920127083217212, "grad_norm": 0.5318624377250671, "learning_rate": 8.188135241166953e-05, "loss": 1.6562, "step": 5239 }, { "epoch": 0.29206844657488434, "grad_norm": 0.5517171621322632, "learning_rate": 8.187449257034552e-05, "loss": 1.6493, "step": 5240 }, { "epoch": 0.2921241848280475, "grad_norm": 0.5400835275650024, "learning_rate": 8.186763171814845e-05, "loss": 1.5672, "step": 5241 }, { "epoch": 0.29217992308121066, "grad_norm": 0.5250990986824036, "learning_rate": 8.186076985529589e-05, "loss": 1.6091, "step": 5242 }, { "epoch": 0.2922356613343738, "grad_norm": 0.5855765342712402, "learning_rate": 8.18539069820055e-05, "loss": 1.8457, "step": 5243 }, { "epoch": 0.2922913995875369, "grad_norm": 0.6245700716972351, "learning_rate": 8.184704309849487e-05, "loss": 1.5562, "step": 5244 }, { "epoch": 0.2923471378407001, "grad_norm": 0.583342432975769, "learning_rate": 8.184017820498173e-05, "loss": 1.8421, "step": 5245 }, { "epoch": 0.29240287609386323, "grad_norm": 0.576387345790863, "learning_rate": 8.183331230168377e-05, "loss": 1.7761, "step": 5246 }, { "epoch": 0.29245861434702636, "grad_norm": 0.5464752316474915, "learning_rate": 8.182644538881873e-05, "loss": 1.6677, "step": 5247 }, { "epoch": 0.2925143526001895, "grad_norm": 0.602606475353241, "learning_rate": 8.181957746660445e-05, "loss": 2.0468, "step": 5248 }, { "epoch": 0.29257009085335267, "grad_norm": 0.535839855670929, "learning_rate": 8.181270853525866e-05, "loss": 1.5903, "step": 5249 }, { "epoch": 0.2926258291065158, "grad_norm": 0.5617656707763672, "learning_rate": 8.180583859499923e-05, "loss": 1.6818, "step": 5250 }, { "epoch": 0.29268156735967893, "grad_norm": 0.5979596972465515, "learning_rate": 8.179896764604407e-05, "loss": 1.7915, "step": 5251 }, { "epoch": 0.2927373056128421, "grad_norm": 0.5312914848327637, "learning_rate": 8.179209568861104e-05, "loss": 1.4523, "step": 5252 }, { "epoch": 0.29279304386600524, "grad_norm": 0.5243698358535767, "learning_rate": 8.178522272291809e-05, "loss": 1.5611, "step": 5253 }, { "epoch": 0.29284878211916837, "grad_norm": 0.5564961433410645, "learning_rate": 8.17783487491832e-05, "loss": 1.7228, "step": 5254 }, { "epoch": 0.29290452037233156, "grad_norm": 0.5704841613769531, "learning_rate": 8.177147376762437e-05, "loss": 1.8324, "step": 5255 }, { "epoch": 0.2929602586254947, "grad_norm": 0.5011201500892639, "learning_rate": 8.176459777845964e-05, "loss": 1.6782, "step": 5256 }, { "epoch": 0.2930159968786578, "grad_norm": 0.4964855909347534, "learning_rate": 8.175772078190707e-05, "loss": 1.4567, "step": 5257 }, { "epoch": 0.29307173513182094, "grad_norm": 0.547637403011322, "learning_rate": 8.175084277818472e-05, "loss": 1.6129, "step": 5258 }, { "epoch": 0.2931274733849841, "grad_norm": 0.5082324743270874, "learning_rate": 8.174396376751079e-05, "loss": 1.5253, "step": 5259 }, { "epoch": 0.29318321163814726, "grad_norm": 0.535663366317749, "learning_rate": 8.173708375010342e-05, "loss": 1.574, "step": 5260 }, { "epoch": 0.2932389498913104, "grad_norm": 0.5733945965766907, "learning_rate": 8.173020272618078e-05, "loss": 1.8022, "step": 5261 }, { "epoch": 0.29329468814447357, "grad_norm": 0.5937253832817078, "learning_rate": 8.172332069596111e-05, "loss": 1.952, "step": 5262 }, { "epoch": 0.2933504263976367, "grad_norm": 0.5622910261154175, "learning_rate": 8.171643765966266e-05, "loss": 1.6838, "step": 5263 }, { "epoch": 0.29340616465079983, "grad_norm": 0.5633754730224609, "learning_rate": 8.170955361750373e-05, "loss": 1.8205, "step": 5264 }, { "epoch": 0.293461902903963, "grad_norm": 0.5639583468437195, "learning_rate": 8.170266856970264e-05, "loss": 1.6995, "step": 5265 }, { "epoch": 0.29351764115712614, "grad_norm": 0.5767412781715393, "learning_rate": 8.169578251647775e-05, "loss": 1.8193, "step": 5266 }, { "epoch": 0.29357337941028927, "grad_norm": 0.5323848128318787, "learning_rate": 8.168889545804743e-05, "loss": 1.6137, "step": 5267 }, { "epoch": 0.29362911766345245, "grad_norm": 0.5105542540550232, "learning_rate": 8.16820073946301e-05, "loss": 1.3883, "step": 5268 }, { "epoch": 0.2936848559166156, "grad_norm": 0.5348597168922424, "learning_rate": 8.167511832644423e-05, "loss": 1.7465, "step": 5269 }, { "epoch": 0.2937405941697787, "grad_norm": 0.5634239315986633, "learning_rate": 8.166822825370828e-05, "loss": 1.8121, "step": 5270 }, { "epoch": 0.29379633242294184, "grad_norm": 0.5704219937324524, "learning_rate": 8.166133717664075e-05, "loss": 1.8007, "step": 5271 }, { "epoch": 0.293852070676105, "grad_norm": 0.5514686703681946, "learning_rate": 8.165444509546023e-05, "loss": 1.7627, "step": 5272 }, { "epoch": 0.29390780892926816, "grad_norm": 0.5763065218925476, "learning_rate": 8.164755201038525e-05, "loss": 1.8668, "step": 5273 }, { "epoch": 0.2939635471824313, "grad_norm": 0.5290045738220215, "learning_rate": 8.164065792163445e-05, "loss": 1.6992, "step": 5274 }, { "epoch": 0.29401928543559447, "grad_norm": 0.5327118039131165, "learning_rate": 8.163376282942645e-05, "loss": 1.6882, "step": 5275 }, { "epoch": 0.2940750236887576, "grad_norm": 0.5230002403259277, "learning_rate": 8.162686673397995e-05, "loss": 1.6314, "step": 5276 }, { "epoch": 0.2941307619419207, "grad_norm": 0.5596842765808105, "learning_rate": 8.161996963551361e-05, "loss": 1.8543, "step": 5277 }, { "epoch": 0.2941865001950839, "grad_norm": 0.4837280809879303, "learning_rate": 8.16130715342462e-05, "loss": 1.407, "step": 5278 }, { "epoch": 0.29424223844824704, "grad_norm": 0.5188647508621216, "learning_rate": 8.160617243039648e-05, "loss": 1.6469, "step": 5279 }, { "epoch": 0.29429797670141017, "grad_norm": 0.5345882177352905, "learning_rate": 8.159927232418325e-05, "loss": 1.762, "step": 5280 }, { "epoch": 0.2943537149545733, "grad_norm": 0.6385248303413391, "learning_rate": 8.159237121582532e-05, "loss": 1.725, "step": 5281 }, { "epoch": 0.2944094532077365, "grad_norm": 0.532394289970398, "learning_rate": 8.158546910554159e-05, "loss": 1.59, "step": 5282 }, { "epoch": 0.2944651914608996, "grad_norm": 0.5918634533882141, "learning_rate": 8.157856599355093e-05, "loss": 1.8722, "step": 5283 }, { "epoch": 0.29452092971406274, "grad_norm": 0.5643036365509033, "learning_rate": 8.157166188007228e-05, "loss": 1.6608, "step": 5284 }, { "epoch": 0.2945766679672259, "grad_norm": 0.5480226874351501, "learning_rate": 8.156475676532458e-05, "loss": 1.6745, "step": 5285 }, { "epoch": 0.29463240622038905, "grad_norm": 0.5562642216682434, "learning_rate": 8.155785064952683e-05, "loss": 1.9036, "step": 5286 }, { "epoch": 0.2946881444735522, "grad_norm": 0.5737085938453674, "learning_rate": 8.155094353289807e-05, "loss": 1.6749, "step": 5287 }, { "epoch": 0.29474388272671537, "grad_norm": 0.537407398223877, "learning_rate": 8.154403541565732e-05, "loss": 1.5855, "step": 5288 }, { "epoch": 0.2947996209798785, "grad_norm": 0.5637186169624329, "learning_rate": 8.153712629802369e-05, "loss": 1.6667, "step": 5289 }, { "epoch": 0.2948553592330416, "grad_norm": 0.587086021900177, "learning_rate": 8.153021618021628e-05, "loss": 1.709, "step": 5290 }, { "epoch": 0.2949110974862048, "grad_norm": 0.5255305767059326, "learning_rate": 8.152330506245425e-05, "loss": 1.4982, "step": 5291 }, { "epoch": 0.29496683573936794, "grad_norm": 0.5582296848297119, "learning_rate": 8.151639294495678e-05, "loss": 1.6915, "step": 5292 }, { "epoch": 0.29502257399253107, "grad_norm": 0.5476033687591553, "learning_rate": 8.150947982794307e-05, "loss": 1.4827, "step": 5293 }, { "epoch": 0.2950783122456942, "grad_norm": 0.548763632774353, "learning_rate": 8.150256571163238e-05, "loss": 1.805, "step": 5294 }, { "epoch": 0.2951340504988574, "grad_norm": 0.58586585521698, "learning_rate": 8.149565059624398e-05, "loss": 1.7433, "step": 5295 }, { "epoch": 0.2951897887520205, "grad_norm": 0.5618621110916138, "learning_rate": 8.148873448199717e-05, "loss": 1.7681, "step": 5296 }, { "epoch": 0.29524552700518364, "grad_norm": 0.5388831496238708, "learning_rate": 8.148181736911129e-05, "loss": 1.582, "step": 5297 }, { "epoch": 0.2953012652583468, "grad_norm": 0.5742696523666382, "learning_rate": 8.147489925780572e-05, "loss": 1.8182, "step": 5298 }, { "epoch": 0.29535700351150995, "grad_norm": 0.5271889567375183, "learning_rate": 8.146798014829986e-05, "loss": 1.4823, "step": 5299 }, { "epoch": 0.2954127417646731, "grad_norm": 0.5565046072006226, "learning_rate": 8.146106004081315e-05, "loss": 1.6328, "step": 5300 }, { "epoch": 0.29546848001783627, "grad_norm": 0.5434616804122925, "learning_rate": 8.145413893556503e-05, "loss": 1.5871, "step": 5301 }, { "epoch": 0.2955242182709994, "grad_norm": 0.5343239903450012, "learning_rate": 8.144721683277504e-05, "loss": 1.6328, "step": 5302 }, { "epoch": 0.2955799565241625, "grad_norm": 0.5372942686080933, "learning_rate": 8.144029373266264e-05, "loss": 1.6885, "step": 5303 }, { "epoch": 0.29563569477732565, "grad_norm": 0.5881915092468262, "learning_rate": 8.143336963544746e-05, "loss": 1.8579, "step": 5304 }, { "epoch": 0.29569143303048884, "grad_norm": 0.5892425179481506, "learning_rate": 8.142644454134905e-05, "loss": 1.8771, "step": 5305 }, { "epoch": 0.29574717128365197, "grad_norm": 0.5286465287208557, "learning_rate": 8.141951845058707e-05, "loss": 1.6766, "step": 5306 }, { "epoch": 0.2958029095368151, "grad_norm": 0.5843679904937744, "learning_rate": 8.141259136338113e-05, "loss": 1.7359, "step": 5307 }, { "epoch": 0.2958586477899783, "grad_norm": 0.6178736090660095, "learning_rate": 8.140566327995094e-05, "loss": 1.9672, "step": 5308 }, { "epoch": 0.2959143860431414, "grad_norm": 0.5524381399154663, "learning_rate": 8.139873420051623e-05, "loss": 1.5947, "step": 5309 }, { "epoch": 0.29597012429630454, "grad_norm": 0.5591756105422974, "learning_rate": 8.139180412529674e-05, "loss": 1.7245, "step": 5310 }, { "epoch": 0.2960258625494677, "grad_norm": 0.5642113089561462, "learning_rate": 8.138487305451224e-05, "loss": 1.7156, "step": 5311 }, { "epoch": 0.29608160080263085, "grad_norm": 0.5767959356307983, "learning_rate": 8.137794098838257e-05, "loss": 1.78, "step": 5312 }, { "epoch": 0.296137339055794, "grad_norm": 0.5422171950340271, "learning_rate": 8.137100792712755e-05, "loss": 1.9258, "step": 5313 }, { "epoch": 0.29619307730895716, "grad_norm": 0.5860824584960938, "learning_rate": 8.136407387096704e-05, "loss": 1.7132, "step": 5314 }, { "epoch": 0.2962488155621203, "grad_norm": 0.6460077166557312, "learning_rate": 8.135713882012102e-05, "loss": 1.8024, "step": 5315 }, { "epoch": 0.2963045538152834, "grad_norm": 0.5744182467460632, "learning_rate": 8.135020277480934e-05, "loss": 1.7025, "step": 5316 }, { "epoch": 0.29636029206844655, "grad_norm": 0.560867965221405, "learning_rate": 8.134326573525202e-05, "loss": 1.7402, "step": 5317 }, { "epoch": 0.29641603032160974, "grad_norm": 0.5005339980125427, "learning_rate": 8.133632770166907e-05, "loss": 1.585, "step": 5318 }, { "epoch": 0.29647176857477286, "grad_norm": 0.5216720700263977, "learning_rate": 8.13293886742805e-05, "loss": 1.7313, "step": 5319 }, { "epoch": 0.296527506827936, "grad_norm": 0.5353510975837708, "learning_rate": 8.132244865330638e-05, "loss": 1.7854, "step": 5320 }, { "epoch": 0.2965832450810992, "grad_norm": 0.5222895741462708, "learning_rate": 8.131550763896682e-05, "loss": 1.6821, "step": 5321 }, { "epoch": 0.2966389833342623, "grad_norm": 0.5571734309196472, "learning_rate": 8.130856563148193e-05, "loss": 1.6151, "step": 5322 }, { "epoch": 0.29669472158742544, "grad_norm": 0.5494416952133179, "learning_rate": 8.130162263107189e-05, "loss": 1.7497, "step": 5323 }, { "epoch": 0.2967504598405886, "grad_norm": 0.5263827443122864, "learning_rate": 8.129467863795688e-05, "loss": 1.7157, "step": 5324 }, { "epoch": 0.29680619809375175, "grad_norm": 0.5756681561470032, "learning_rate": 8.128773365235711e-05, "loss": 1.6488, "step": 5325 }, { "epoch": 0.2968619363469149, "grad_norm": 0.5204091668128967, "learning_rate": 8.128078767449287e-05, "loss": 1.6868, "step": 5326 }, { "epoch": 0.296917674600078, "grad_norm": 0.5748211145401001, "learning_rate": 8.127384070458442e-05, "loss": 1.9352, "step": 5327 }, { "epoch": 0.2969734128532412, "grad_norm": 0.5648884773254395, "learning_rate": 8.126689274285207e-05, "loss": 1.9085, "step": 5328 }, { "epoch": 0.2970291511064043, "grad_norm": 0.5396182537078857, "learning_rate": 8.125994378951619e-05, "loss": 1.715, "step": 5329 }, { "epoch": 0.29708488935956745, "grad_norm": 0.5755982398986816, "learning_rate": 8.125299384479714e-05, "loss": 1.7472, "step": 5330 }, { "epoch": 0.29714062761273063, "grad_norm": 0.5721607804298401, "learning_rate": 8.124604290891535e-05, "loss": 1.8646, "step": 5331 }, { "epoch": 0.29719636586589376, "grad_norm": 0.5612310171127319, "learning_rate": 8.123909098209126e-05, "loss": 1.6506, "step": 5332 }, { "epoch": 0.2972521041190569, "grad_norm": 0.5630115866661072, "learning_rate": 8.123213806454535e-05, "loss": 1.805, "step": 5333 }, { "epoch": 0.2973078423722201, "grad_norm": 0.5319987535476685, "learning_rate": 8.122518415649808e-05, "loss": 1.6501, "step": 5334 }, { "epoch": 0.2973635806253832, "grad_norm": 0.5346727967262268, "learning_rate": 8.121822925817006e-05, "loss": 1.7944, "step": 5335 }, { "epoch": 0.29741931887854633, "grad_norm": 0.5356037616729736, "learning_rate": 8.121127336978183e-05, "loss": 1.5578, "step": 5336 }, { "epoch": 0.2974750571317095, "grad_norm": 0.5593723058700562, "learning_rate": 8.120431649155396e-05, "loss": 1.7118, "step": 5337 }, { "epoch": 0.29753079538487265, "grad_norm": 0.5361452102661133, "learning_rate": 8.11973586237071e-05, "loss": 1.7363, "step": 5338 }, { "epoch": 0.2975865336380358, "grad_norm": 0.5503700971603394, "learning_rate": 8.119039976646192e-05, "loss": 1.74, "step": 5339 }, { "epoch": 0.2976422718911989, "grad_norm": 0.5040326714515686, "learning_rate": 8.118343992003913e-05, "loss": 1.5712, "step": 5340 }, { "epoch": 0.2976980101443621, "grad_norm": 0.5251342058181763, "learning_rate": 8.117647908465942e-05, "loss": 1.5346, "step": 5341 }, { "epoch": 0.2977537483975252, "grad_norm": 0.5664347410202026, "learning_rate": 8.116951726054358e-05, "loss": 2.0871, "step": 5342 }, { "epoch": 0.29780948665068835, "grad_norm": 0.5798686742782593, "learning_rate": 8.116255444791237e-05, "loss": 1.5362, "step": 5343 }, { "epoch": 0.29786522490385153, "grad_norm": 0.5248550772666931, "learning_rate": 8.115559064698662e-05, "loss": 1.5788, "step": 5344 }, { "epoch": 0.29792096315701466, "grad_norm": 0.6149808764457703, "learning_rate": 8.11486258579872e-05, "loss": 1.7055, "step": 5345 }, { "epoch": 0.2979767014101778, "grad_norm": 0.6035127639770508, "learning_rate": 8.114166008113498e-05, "loss": 1.8135, "step": 5346 }, { "epoch": 0.298032439663341, "grad_norm": 0.5967592000961304, "learning_rate": 8.113469331665085e-05, "loss": 1.655, "step": 5347 }, { "epoch": 0.2980881779165041, "grad_norm": 0.5948666334152222, "learning_rate": 8.112772556475579e-05, "loss": 2.0929, "step": 5348 }, { "epoch": 0.29814391616966723, "grad_norm": 0.5955588221549988, "learning_rate": 8.112075682567075e-05, "loss": 1.6594, "step": 5349 }, { "epoch": 0.29819965442283036, "grad_norm": 0.5304718017578125, "learning_rate": 8.111378709961676e-05, "loss": 1.7254, "step": 5350 }, { "epoch": 0.29825539267599355, "grad_norm": 0.5426492691040039, "learning_rate": 8.110681638681485e-05, "loss": 1.7559, "step": 5351 }, { "epoch": 0.2983111309291567, "grad_norm": 0.6616886258125305, "learning_rate": 8.109984468748608e-05, "loss": 1.6271, "step": 5352 }, { "epoch": 0.2983668691823198, "grad_norm": 0.537685751914978, "learning_rate": 8.109287200185157e-05, "loss": 1.6231, "step": 5353 }, { "epoch": 0.298422607435483, "grad_norm": 0.5190281867980957, "learning_rate": 8.108589833013245e-05, "loss": 1.5838, "step": 5354 }, { "epoch": 0.2984783456886461, "grad_norm": 0.5232527852058411, "learning_rate": 8.107892367254986e-05, "loss": 1.5132, "step": 5355 }, { "epoch": 0.29853408394180925, "grad_norm": 0.5797703266143799, "learning_rate": 8.107194802932503e-05, "loss": 1.811, "step": 5356 }, { "epoch": 0.29858982219497243, "grad_norm": 0.5324226021766663, "learning_rate": 8.106497140067916e-05, "loss": 1.8477, "step": 5357 }, { "epoch": 0.29864556044813556, "grad_norm": 0.5274566411972046, "learning_rate": 8.105799378683353e-05, "loss": 1.5521, "step": 5358 }, { "epoch": 0.2987012987012987, "grad_norm": 0.5862823128700256, "learning_rate": 8.10510151880094e-05, "loss": 1.6123, "step": 5359 }, { "epoch": 0.2987570369544619, "grad_norm": 0.5503446459770203, "learning_rate": 8.104403560442813e-05, "loss": 1.6369, "step": 5360 }, { "epoch": 0.298812775207625, "grad_norm": 0.5560075044631958, "learning_rate": 8.103705503631104e-05, "loss": 1.762, "step": 5361 }, { "epoch": 0.29886851346078813, "grad_norm": 0.5699611306190491, "learning_rate": 8.103007348387952e-05, "loss": 1.9896, "step": 5362 }, { "epoch": 0.29892425171395126, "grad_norm": 0.5774125456809998, "learning_rate": 8.102309094735498e-05, "loss": 1.7463, "step": 5363 }, { "epoch": 0.29897998996711445, "grad_norm": 0.5046089887619019, "learning_rate": 8.101610742695889e-05, "loss": 1.4381, "step": 5364 }, { "epoch": 0.2990357282202776, "grad_norm": 0.5611773133277893, "learning_rate": 8.100912292291269e-05, "loss": 1.8118, "step": 5365 }, { "epoch": 0.2990914664734407, "grad_norm": 0.5826941132545471, "learning_rate": 8.100213743543793e-05, "loss": 1.7309, "step": 5366 }, { "epoch": 0.2991472047266039, "grad_norm": 0.5598444938659668, "learning_rate": 8.099515096475611e-05, "loss": 1.7422, "step": 5367 }, { "epoch": 0.299202942979767, "grad_norm": 0.5191280841827393, "learning_rate": 8.098816351108881e-05, "loss": 1.5088, "step": 5368 }, { "epoch": 0.29925868123293015, "grad_norm": 0.589454174041748, "learning_rate": 8.098117507465765e-05, "loss": 1.4643, "step": 5369 }, { "epoch": 0.29931441948609333, "grad_norm": 0.5066042542457581, "learning_rate": 8.097418565568424e-05, "loss": 1.3811, "step": 5370 }, { "epoch": 0.29937015773925646, "grad_norm": 0.5717688798904419, "learning_rate": 8.096719525439026e-05, "loss": 1.5929, "step": 5371 }, { "epoch": 0.2994258959924196, "grad_norm": 0.5810229778289795, "learning_rate": 8.096020387099739e-05, "loss": 1.5428, "step": 5372 }, { "epoch": 0.2994816342455827, "grad_norm": 0.5295297503471375, "learning_rate": 8.095321150572738e-05, "loss": 1.5148, "step": 5373 }, { "epoch": 0.2995373724987459, "grad_norm": 0.6027771234512329, "learning_rate": 8.094621815880197e-05, "loss": 1.898, "step": 5374 }, { "epoch": 0.29959311075190903, "grad_norm": 0.5107868909835815, "learning_rate": 8.093922383044293e-05, "loss": 1.4073, "step": 5375 }, { "epoch": 0.29964884900507216, "grad_norm": 0.5989086031913757, "learning_rate": 8.09322285208721e-05, "loss": 1.7551, "step": 5376 }, { "epoch": 0.29970458725823534, "grad_norm": 0.5706072449684143, "learning_rate": 8.092523223031134e-05, "loss": 1.8272, "step": 5377 }, { "epoch": 0.2997603255113985, "grad_norm": 0.5593813061714172, "learning_rate": 8.091823495898251e-05, "loss": 1.6346, "step": 5378 }, { "epoch": 0.2998160637645616, "grad_norm": 0.5510803461074829, "learning_rate": 8.091123670710754e-05, "loss": 1.7025, "step": 5379 }, { "epoch": 0.2998718020177248, "grad_norm": 0.5860506892204285, "learning_rate": 8.090423747490836e-05, "loss": 1.6895, "step": 5380 }, { "epoch": 0.2999275402708879, "grad_norm": 0.5655683875083923, "learning_rate": 8.089723726260696e-05, "loss": 1.8338, "step": 5381 }, { "epoch": 0.29998327852405104, "grad_norm": 0.5369336605072021, "learning_rate": 8.089023607042534e-05, "loss": 1.65, "step": 5382 }, { "epoch": 0.30003901677721423, "grad_norm": 0.5484170317649841, "learning_rate": 8.088323389858552e-05, "loss": 1.433, "step": 5383 }, { "epoch": 0.30009475503037736, "grad_norm": 0.5139251947402954, "learning_rate": 8.08762307473096e-05, "loss": 1.3703, "step": 5384 }, { "epoch": 0.3001504932835405, "grad_norm": 0.6160516142845154, "learning_rate": 8.086922661681966e-05, "loss": 2.1215, "step": 5385 }, { "epoch": 0.3002062315367036, "grad_norm": 0.5299053192138672, "learning_rate": 8.086222150733782e-05, "loss": 1.5703, "step": 5386 }, { "epoch": 0.3002619697898668, "grad_norm": 0.5320441722869873, "learning_rate": 8.085521541908627e-05, "loss": 1.5785, "step": 5387 }, { "epoch": 0.30031770804302993, "grad_norm": 0.5633600354194641, "learning_rate": 8.084820835228717e-05, "loss": 1.799, "step": 5388 }, { "epoch": 0.30037344629619306, "grad_norm": 0.5468734502792358, "learning_rate": 8.084120030716275e-05, "loss": 1.6782, "step": 5389 }, { "epoch": 0.30042918454935624, "grad_norm": 0.5711122751235962, "learning_rate": 8.083419128393528e-05, "loss": 1.6544, "step": 5390 }, { "epoch": 0.30048492280251937, "grad_norm": 0.5407732129096985, "learning_rate": 8.082718128282705e-05, "loss": 1.7962, "step": 5391 }, { "epoch": 0.3005406610556825, "grad_norm": 0.5521290898323059, "learning_rate": 8.082017030406037e-05, "loss": 1.7551, "step": 5392 }, { "epoch": 0.3005963993088457, "grad_norm": 0.5816917419433594, "learning_rate": 8.081315834785756e-05, "loss": 1.8789, "step": 5393 }, { "epoch": 0.3006521375620088, "grad_norm": 0.5271922945976257, "learning_rate": 8.080614541444103e-05, "loss": 1.7545, "step": 5394 }, { "epoch": 0.30070787581517194, "grad_norm": 0.543911337852478, "learning_rate": 8.079913150403318e-05, "loss": 1.6059, "step": 5395 }, { "epoch": 0.3007636140683351, "grad_norm": 0.547044038772583, "learning_rate": 8.079211661685644e-05, "loss": 2.0125, "step": 5396 }, { "epoch": 0.30081935232149826, "grad_norm": 0.6385172605514526, "learning_rate": 8.07851007531333e-05, "loss": 1.8713, "step": 5397 }, { "epoch": 0.3008750905746614, "grad_norm": 0.5882077813148499, "learning_rate": 8.077808391308626e-05, "loss": 1.6547, "step": 5398 }, { "epoch": 0.3009308288278245, "grad_norm": 0.5390593409538269, "learning_rate": 8.077106609693784e-05, "loss": 1.5186, "step": 5399 }, { "epoch": 0.3009865670809877, "grad_norm": 0.5759447813034058, "learning_rate": 8.076404730491061e-05, "loss": 1.8402, "step": 5400 }, { "epoch": 0.30104230533415083, "grad_norm": 0.5196195244789124, "learning_rate": 8.075702753722718e-05, "loss": 1.656, "step": 5401 }, { "epoch": 0.30109804358731396, "grad_norm": 0.5357980728149414, "learning_rate": 8.075000679411014e-05, "loss": 1.6743, "step": 5402 }, { "epoch": 0.30115378184047714, "grad_norm": 0.5370086431503296, "learning_rate": 8.074298507578218e-05, "loss": 1.7567, "step": 5403 }, { "epoch": 0.30120952009364027, "grad_norm": 0.5173280835151672, "learning_rate": 8.073596238246599e-05, "loss": 1.5783, "step": 5404 }, { "epoch": 0.3012652583468034, "grad_norm": 0.5284645557403564, "learning_rate": 8.072893871438428e-05, "loss": 1.7135, "step": 5405 }, { "epoch": 0.3013209965999666, "grad_norm": 0.5838817954063416, "learning_rate": 8.072191407175976e-05, "loss": 1.8845, "step": 5406 }, { "epoch": 0.3013767348531297, "grad_norm": 0.5520975589752197, "learning_rate": 8.071488845481528e-05, "loss": 1.6139, "step": 5407 }, { "epoch": 0.30143247310629284, "grad_norm": 0.5155717730522156, "learning_rate": 8.07078618637736e-05, "loss": 1.4973, "step": 5408 }, { "epoch": 0.30148821135945597, "grad_norm": 0.5581832528114319, "learning_rate": 8.070083429885758e-05, "loss": 1.7224, "step": 5409 }, { "epoch": 0.30154394961261916, "grad_norm": 0.5734993815422058, "learning_rate": 8.069380576029011e-05, "loss": 1.508, "step": 5410 }, { "epoch": 0.3015996878657823, "grad_norm": 0.5819764733314514, "learning_rate": 8.068677624829406e-05, "loss": 2.0365, "step": 5411 }, { "epoch": 0.3016554261189454, "grad_norm": 0.538995623588562, "learning_rate": 8.067974576309241e-05, "loss": 1.8489, "step": 5412 }, { "epoch": 0.3017111643721086, "grad_norm": 0.5447677373886108, "learning_rate": 8.067271430490809e-05, "loss": 1.7361, "step": 5413 }, { "epoch": 0.3017669026252717, "grad_norm": 0.5370633602142334, "learning_rate": 8.066568187396409e-05, "loss": 1.5648, "step": 5414 }, { "epoch": 0.30182264087843486, "grad_norm": 0.5709346532821655, "learning_rate": 8.065864847048346e-05, "loss": 1.7308, "step": 5415 }, { "epoch": 0.30187837913159804, "grad_norm": 0.5642514824867249, "learning_rate": 8.065161409468925e-05, "loss": 1.9456, "step": 5416 }, { "epoch": 0.30193411738476117, "grad_norm": 0.5522916316986084, "learning_rate": 8.064457874680457e-05, "loss": 1.8213, "step": 5417 }, { "epoch": 0.3019898556379243, "grad_norm": 0.5913909077644348, "learning_rate": 8.06375424270525e-05, "loss": 1.8837, "step": 5418 }, { "epoch": 0.3020455938910874, "grad_norm": 0.596079409122467, "learning_rate": 8.063050513565624e-05, "loss": 1.9783, "step": 5419 }, { "epoch": 0.3021013321442506, "grad_norm": 0.5493654012680054, "learning_rate": 8.062346687283892e-05, "loss": 1.8092, "step": 5420 }, { "epoch": 0.30215707039741374, "grad_norm": 0.5493000745773315, "learning_rate": 8.06164276388238e-05, "loss": 1.6994, "step": 5421 }, { "epoch": 0.30221280865057687, "grad_norm": 0.4986167550086975, "learning_rate": 8.060938743383408e-05, "loss": 1.5504, "step": 5422 }, { "epoch": 0.30226854690374005, "grad_norm": 0.5836266875267029, "learning_rate": 8.060234625809306e-05, "loss": 1.8898, "step": 5423 }, { "epoch": 0.3023242851569032, "grad_norm": 0.5557297468185425, "learning_rate": 8.059530411182406e-05, "loss": 1.7518, "step": 5424 }, { "epoch": 0.3023800234100663, "grad_norm": 0.5643293261528015, "learning_rate": 8.058826099525039e-05, "loss": 1.92, "step": 5425 }, { "epoch": 0.3024357616632295, "grad_norm": 0.5600275993347168, "learning_rate": 8.058121690859541e-05, "loss": 1.7421, "step": 5426 }, { "epoch": 0.3024914999163926, "grad_norm": 0.5405864119529724, "learning_rate": 8.057417185208254e-05, "loss": 1.7487, "step": 5427 }, { "epoch": 0.30254723816955575, "grad_norm": 0.5578258633613586, "learning_rate": 8.056712582593519e-05, "loss": 1.7268, "step": 5428 }, { "epoch": 0.30260297642271894, "grad_norm": 0.5377827286720276, "learning_rate": 8.056007883037682e-05, "loss": 1.8249, "step": 5429 }, { "epoch": 0.30265871467588207, "grad_norm": 0.5574936270713806, "learning_rate": 8.055303086563095e-05, "loss": 1.8337, "step": 5430 }, { "epoch": 0.3027144529290452, "grad_norm": 0.594794511795044, "learning_rate": 8.054598193192106e-05, "loss": 2.0531, "step": 5431 }, { "epoch": 0.3027701911822083, "grad_norm": 0.509722888469696, "learning_rate": 8.053893202947074e-05, "loss": 1.6712, "step": 5432 }, { "epoch": 0.3028259294353715, "grad_norm": 0.5056367516517639, "learning_rate": 8.053188115850354e-05, "loss": 1.5738, "step": 5433 }, { "epoch": 0.30288166768853464, "grad_norm": 0.5353802442550659, "learning_rate": 8.052482931924308e-05, "loss": 1.8257, "step": 5434 }, { "epoch": 0.30293740594169777, "grad_norm": 0.535033106803894, "learning_rate": 8.051777651191299e-05, "loss": 1.7261, "step": 5435 }, { "epoch": 0.30299314419486095, "grad_norm": 0.5537331700325012, "learning_rate": 8.051072273673698e-05, "loss": 1.7634, "step": 5436 }, { "epoch": 0.3030488824480241, "grad_norm": 0.538147509098053, "learning_rate": 8.050366799393874e-05, "loss": 1.5592, "step": 5437 }, { "epoch": 0.3031046207011872, "grad_norm": 0.5110997557640076, "learning_rate": 8.049661228374199e-05, "loss": 1.7104, "step": 5438 }, { "epoch": 0.3031603589543504, "grad_norm": 0.5138676166534424, "learning_rate": 8.04895556063705e-05, "loss": 1.7344, "step": 5439 }, { "epoch": 0.3032160972075135, "grad_norm": 0.5240350961685181, "learning_rate": 8.048249796204808e-05, "loss": 1.6345, "step": 5440 }, { "epoch": 0.30327183546067665, "grad_norm": 0.5258268713951111, "learning_rate": 8.047543935099855e-05, "loss": 1.542, "step": 5441 }, { "epoch": 0.3033275737138398, "grad_norm": 0.5549874901771545, "learning_rate": 8.046837977344577e-05, "loss": 1.8106, "step": 5442 }, { "epoch": 0.30338331196700297, "grad_norm": 0.5787036418914795, "learning_rate": 8.046131922961362e-05, "loss": 1.8995, "step": 5443 }, { "epoch": 0.3034390502201661, "grad_norm": 0.5319430828094482, "learning_rate": 8.045425771972603e-05, "loss": 1.471, "step": 5444 }, { "epoch": 0.3034947884733292, "grad_norm": 0.5467014312744141, "learning_rate": 8.044719524400694e-05, "loss": 1.6613, "step": 5445 }, { "epoch": 0.3035505267264924, "grad_norm": 0.5461364388465881, "learning_rate": 8.044013180268034e-05, "loss": 1.7442, "step": 5446 }, { "epoch": 0.30360626497965554, "grad_norm": 0.5711673498153687, "learning_rate": 8.043306739597024e-05, "loss": 1.7848, "step": 5447 }, { "epoch": 0.30366200323281867, "grad_norm": 0.5382382273674011, "learning_rate": 8.042600202410066e-05, "loss": 1.5744, "step": 5448 }, { "epoch": 0.30371774148598185, "grad_norm": 0.5482212901115417, "learning_rate": 8.041893568729573e-05, "loss": 1.6689, "step": 5449 }, { "epoch": 0.303773479739145, "grad_norm": 0.5345839262008667, "learning_rate": 8.041186838577949e-05, "loss": 1.6285, "step": 5450 }, { "epoch": 0.3038292179923081, "grad_norm": 0.5510614514350891, "learning_rate": 8.04048001197761e-05, "loss": 1.5176, "step": 5451 }, { "epoch": 0.3038849562454713, "grad_norm": 0.5475590825080872, "learning_rate": 8.039773088950973e-05, "loss": 1.6778, "step": 5452 }, { "epoch": 0.3039406944986344, "grad_norm": 0.5662024021148682, "learning_rate": 8.039066069520455e-05, "loss": 1.9253, "step": 5453 }, { "epoch": 0.30399643275179755, "grad_norm": 0.6412192583084106, "learning_rate": 8.038358953708482e-05, "loss": 1.8921, "step": 5454 }, { "epoch": 0.3040521710049607, "grad_norm": 0.5427385568618774, "learning_rate": 8.037651741537478e-05, "loss": 1.6157, "step": 5455 }, { "epoch": 0.30410790925812387, "grad_norm": 0.5492942333221436, "learning_rate": 8.03694443302987e-05, "loss": 1.6204, "step": 5456 }, { "epoch": 0.304163647511287, "grad_norm": 0.5571532249450684, "learning_rate": 8.036237028208092e-05, "loss": 1.6984, "step": 5457 }, { "epoch": 0.3042193857644501, "grad_norm": 0.5320706963539124, "learning_rate": 8.035529527094578e-05, "loss": 1.5733, "step": 5458 }, { "epoch": 0.3042751240176133, "grad_norm": 0.5525981187820435, "learning_rate": 8.034821929711767e-05, "loss": 1.6158, "step": 5459 }, { "epoch": 0.30433086227077644, "grad_norm": 0.5780904293060303, "learning_rate": 8.034114236082098e-05, "loss": 1.8269, "step": 5460 }, { "epoch": 0.30438660052393957, "grad_norm": 0.5405531525611877, "learning_rate": 8.033406446228014e-05, "loss": 1.8742, "step": 5461 }, { "epoch": 0.30444233877710275, "grad_norm": 0.5742613077163696, "learning_rate": 8.032698560171964e-05, "loss": 1.9496, "step": 5462 }, { "epoch": 0.3044980770302659, "grad_norm": 0.49316903948783875, "learning_rate": 8.031990577936398e-05, "loss": 1.5899, "step": 5463 }, { "epoch": 0.304553815283429, "grad_norm": 0.5170844197273254, "learning_rate": 8.031282499543769e-05, "loss": 1.6575, "step": 5464 }, { "epoch": 0.30460955353659214, "grad_norm": 0.5051673650741577, "learning_rate": 8.030574325016532e-05, "loss": 1.5878, "step": 5465 }, { "epoch": 0.3046652917897553, "grad_norm": 0.493794709444046, "learning_rate": 8.029866054377148e-05, "loss": 1.5681, "step": 5466 }, { "epoch": 0.30472103004291845, "grad_norm": 0.5372213125228882, "learning_rate": 8.029157687648077e-05, "loss": 1.6819, "step": 5467 }, { "epoch": 0.3047767682960816, "grad_norm": 0.559104323387146, "learning_rate": 8.028449224851785e-05, "loss": 1.8688, "step": 5468 }, { "epoch": 0.30483250654924476, "grad_norm": 0.558225691318512, "learning_rate": 8.027740666010741e-05, "loss": 1.7629, "step": 5469 }, { "epoch": 0.3048882448024079, "grad_norm": 0.511577844619751, "learning_rate": 8.027032011147417e-05, "loss": 1.594, "step": 5470 }, { "epoch": 0.304943983055571, "grad_norm": 0.5308223962783813, "learning_rate": 8.026323260284286e-05, "loss": 1.6677, "step": 5471 }, { "epoch": 0.3049997213087342, "grad_norm": 0.5670995712280273, "learning_rate": 8.025614413443824e-05, "loss": 1.5382, "step": 5472 }, { "epoch": 0.30505545956189734, "grad_norm": 0.553377091884613, "learning_rate": 8.024905470648516e-05, "loss": 1.59, "step": 5473 }, { "epoch": 0.30511119781506046, "grad_norm": 0.5147939324378967, "learning_rate": 8.024196431920841e-05, "loss": 1.6797, "step": 5474 }, { "epoch": 0.30516693606822365, "grad_norm": 0.5732524394989014, "learning_rate": 8.023487297283289e-05, "loss": 1.7703, "step": 5475 }, { "epoch": 0.3052226743213868, "grad_norm": 0.5088878870010376, "learning_rate": 8.022778066758348e-05, "loss": 1.5239, "step": 5476 }, { "epoch": 0.3052784125745499, "grad_norm": 0.5896703600883484, "learning_rate": 8.02206874036851e-05, "loss": 1.8356, "step": 5477 }, { "epoch": 0.30533415082771304, "grad_norm": 0.5752948522567749, "learning_rate": 8.021359318136273e-05, "loss": 1.8527, "step": 5478 }, { "epoch": 0.3053898890808762, "grad_norm": 0.5507591366767883, "learning_rate": 8.020649800084133e-05, "loss": 1.7682, "step": 5479 }, { "epoch": 0.30544562733403935, "grad_norm": 0.5891523957252502, "learning_rate": 8.019940186234591e-05, "loss": 1.7112, "step": 5480 }, { "epoch": 0.3055013655872025, "grad_norm": 0.5745503306388855, "learning_rate": 8.019230476610155e-05, "loss": 1.7824, "step": 5481 }, { "epoch": 0.30555710384036566, "grad_norm": 0.6154142022132874, "learning_rate": 8.018520671233333e-05, "loss": 1.8217, "step": 5482 }, { "epoch": 0.3056128420935288, "grad_norm": 0.5336470603942871, "learning_rate": 8.017810770126633e-05, "loss": 1.572, "step": 5483 }, { "epoch": 0.3056685803466919, "grad_norm": 0.6083388328552246, "learning_rate": 8.017100773312572e-05, "loss": 1.8889, "step": 5484 }, { "epoch": 0.3057243185998551, "grad_norm": 0.5398688912391663, "learning_rate": 8.016390680813664e-05, "loss": 1.8318, "step": 5485 }, { "epoch": 0.30578005685301823, "grad_norm": 0.5180187225341797, "learning_rate": 8.015680492652432e-05, "loss": 1.4898, "step": 5486 }, { "epoch": 0.30583579510618136, "grad_norm": 0.5112860798835754, "learning_rate": 8.014970208851395e-05, "loss": 1.622, "step": 5487 }, { "epoch": 0.3058915333593445, "grad_norm": 0.5450818538665771, "learning_rate": 8.014259829433082e-05, "loss": 1.5932, "step": 5488 }, { "epoch": 0.3059472716125077, "grad_norm": 0.5598384737968445, "learning_rate": 8.013549354420022e-05, "loss": 1.7663, "step": 5489 }, { "epoch": 0.3060030098656708, "grad_norm": 0.574329137802124, "learning_rate": 8.012838783834749e-05, "loss": 1.7812, "step": 5490 }, { "epoch": 0.30605874811883393, "grad_norm": 0.5636276006698608, "learning_rate": 8.012128117699793e-05, "loss": 1.8031, "step": 5491 }, { "epoch": 0.3061144863719971, "grad_norm": 0.5229976177215576, "learning_rate": 8.011417356037697e-05, "loss": 1.7483, "step": 5492 }, { "epoch": 0.30617022462516025, "grad_norm": 0.5263829231262207, "learning_rate": 8.010706498870997e-05, "loss": 1.6449, "step": 5493 }, { "epoch": 0.3062259628783234, "grad_norm": 0.5461215376853943, "learning_rate": 8.009995546222242e-05, "loss": 1.5837, "step": 5494 }, { "epoch": 0.30628170113148656, "grad_norm": 0.541483998298645, "learning_rate": 8.009284498113979e-05, "loss": 1.7239, "step": 5495 }, { "epoch": 0.3063374393846497, "grad_norm": 0.540389358997345, "learning_rate": 8.008573354568756e-05, "loss": 1.6928, "step": 5496 }, { "epoch": 0.3063931776378128, "grad_norm": 0.550672709941864, "learning_rate": 8.007862115609129e-05, "loss": 1.7299, "step": 5497 }, { "epoch": 0.306448915890976, "grad_norm": 0.532590389251709, "learning_rate": 8.007150781257651e-05, "loss": 1.6299, "step": 5498 }, { "epoch": 0.30650465414413913, "grad_norm": 0.5489155650138855, "learning_rate": 8.006439351536883e-05, "loss": 1.6814, "step": 5499 }, { "epoch": 0.30656039239730226, "grad_norm": 0.5809459090232849, "learning_rate": 8.005727826469389e-05, "loss": 1.7617, "step": 5500 }, { "epoch": 0.3066161306504654, "grad_norm": 0.5688945055007935, "learning_rate": 8.005016206077731e-05, "loss": 1.913, "step": 5501 }, { "epoch": 0.3066718689036286, "grad_norm": 0.5430113673210144, "learning_rate": 8.004304490384482e-05, "loss": 1.6782, "step": 5502 }, { "epoch": 0.3067276071567917, "grad_norm": 0.5550969243049622, "learning_rate": 8.003592679412208e-05, "loss": 1.4965, "step": 5503 }, { "epoch": 0.30678334540995483, "grad_norm": 0.5173535943031311, "learning_rate": 8.00288077318349e-05, "loss": 1.4724, "step": 5504 }, { "epoch": 0.306839083663118, "grad_norm": 0.5464041233062744, "learning_rate": 8.0021687717209e-05, "loss": 1.6722, "step": 5505 }, { "epoch": 0.30689482191628115, "grad_norm": 0.5555015206336975, "learning_rate": 8.001456675047019e-05, "loss": 1.8088, "step": 5506 }, { "epoch": 0.3069505601694443, "grad_norm": 0.5883082747459412, "learning_rate": 8.000744483184433e-05, "loss": 1.5916, "step": 5507 }, { "epoch": 0.30700629842260746, "grad_norm": 0.5937238931655884, "learning_rate": 8.000032196155726e-05, "loss": 1.8253, "step": 5508 }, { "epoch": 0.3070620366757706, "grad_norm": 0.5752248764038086, "learning_rate": 7.999319813983492e-05, "loss": 1.7183, "step": 5509 }, { "epoch": 0.3071177749289337, "grad_norm": 0.5927345156669617, "learning_rate": 7.99860733669032e-05, "loss": 1.8415, "step": 5510 }, { "epoch": 0.30717351318209685, "grad_norm": 0.597845196723938, "learning_rate": 7.997894764298806e-05, "loss": 1.8575, "step": 5511 }, { "epoch": 0.30722925143526003, "grad_norm": 0.5484491586685181, "learning_rate": 7.997182096831548e-05, "loss": 1.6398, "step": 5512 }, { "epoch": 0.30728498968842316, "grad_norm": 0.5977261662483215, "learning_rate": 7.99646933431115e-05, "loss": 2.0446, "step": 5513 }, { "epoch": 0.3073407279415863, "grad_norm": 0.5897913575172424, "learning_rate": 7.995756476760214e-05, "loss": 1.7335, "step": 5514 }, { "epoch": 0.3073964661947495, "grad_norm": 0.5303786396980286, "learning_rate": 7.995043524201351e-05, "loss": 1.6374, "step": 5515 }, { "epoch": 0.3074522044479126, "grad_norm": 0.6054732799530029, "learning_rate": 7.994330476657168e-05, "loss": 1.8542, "step": 5516 }, { "epoch": 0.30750794270107573, "grad_norm": 0.5825492739677429, "learning_rate": 7.993617334150282e-05, "loss": 1.74, "step": 5517 }, { "epoch": 0.3075636809542389, "grad_norm": 0.5496809482574463, "learning_rate": 7.992904096703307e-05, "loss": 1.6844, "step": 5518 }, { "epoch": 0.30761941920740205, "grad_norm": 0.5574871301651001, "learning_rate": 7.992190764338864e-05, "loss": 1.7397, "step": 5519 }, { "epoch": 0.3076751574605652, "grad_norm": 0.5654902458190918, "learning_rate": 7.991477337079576e-05, "loss": 1.7361, "step": 5520 }, { "epoch": 0.30773089571372836, "grad_norm": 0.5748382806777954, "learning_rate": 7.990763814948068e-05, "loss": 1.8819, "step": 5521 }, { "epoch": 0.3077866339668915, "grad_norm": 0.5120726823806763, "learning_rate": 7.99005019796697e-05, "loss": 1.5405, "step": 5522 }, { "epoch": 0.3078423722200546, "grad_norm": 0.5529910326004028, "learning_rate": 7.989336486158912e-05, "loss": 1.6712, "step": 5523 }, { "epoch": 0.30789811047321775, "grad_norm": 0.5775067210197449, "learning_rate": 7.988622679546529e-05, "loss": 2.0319, "step": 5524 }, { "epoch": 0.30795384872638093, "grad_norm": 0.5432143211364746, "learning_rate": 7.987908778152462e-05, "loss": 1.5891, "step": 5525 }, { "epoch": 0.30800958697954406, "grad_norm": 0.5764423608779907, "learning_rate": 7.987194781999345e-05, "loss": 1.865, "step": 5526 }, { "epoch": 0.3080653252327072, "grad_norm": 0.5256220698356628, "learning_rate": 7.98648069110983e-05, "loss": 1.5777, "step": 5527 }, { "epoch": 0.3081210634858704, "grad_norm": 0.5597642064094543, "learning_rate": 7.985766505506559e-05, "loss": 1.8957, "step": 5528 }, { "epoch": 0.3081768017390335, "grad_norm": 0.5411173701286316, "learning_rate": 7.985052225212181e-05, "loss": 1.7575, "step": 5529 }, { "epoch": 0.30823253999219663, "grad_norm": 0.5252230763435364, "learning_rate": 7.984337850249352e-05, "loss": 1.7377, "step": 5530 }, { "epoch": 0.3082882782453598, "grad_norm": 0.5985997915267944, "learning_rate": 7.983623380640729e-05, "loss": 1.7941, "step": 5531 }, { "epoch": 0.30834401649852294, "grad_norm": 0.5696808099746704, "learning_rate": 7.982908816408963e-05, "loss": 1.8425, "step": 5532 }, { "epoch": 0.3083997547516861, "grad_norm": 0.5184767246246338, "learning_rate": 7.982194157576723e-05, "loss": 1.6765, "step": 5533 }, { "epoch": 0.3084554930048492, "grad_norm": 0.5509563088417053, "learning_rate": 7.981479404166672e-05, "loss": 1.8554, "step": 5534 }, { "epoch": 0.3085112312580124, "grad_norm": 0.5477381944656372, "learning_rate": 7.980764556201478e-05, "loss": 1.6513, "step": 5535 }, { "epoch": 0.3085669695111755, "grad_norm": 0.5575202107429504, "learning_rate": 7.980049613703811e-05, "loss": 1.7565, "step": 5536 }, { "epoch": 0.30862270776433864, "grad_norm": 0.578071117401123, "learning_rate": 7.979334576696344e-05, "loss": 1.6711, "step": 5537 }, { "epoch": 0.30867844601750183, "grad_norm": 0.5293973684310913, "learning_rate": 7.978619445201756e-05, "loss": 1.8865, "step": 5538 }, { "epoch": 0.30873418427066496, "grad_norm": 0.5793629288673401, "learning_rate": 7.977904219242724e-05, "loss": 1.9338, "step": 5539 }, { "epoch": 0.3087899225238281, "grad_norm": 0.5701123476028442, "learning_rate": 7.977188898841936e-05, "loss": 1.778, "step": 5540 }, { "epoch": 0.30884566077699127, "grad_norm": 0.5166484117507935, "learning_rate": 7.976473484022071e-05, "loss": 1.6528, "step": 5541 }, { "epoch": 0.3089013990301544, "grad_norm": 0.5501734018325806, "learning_rate": 7.975757974805824e-05, "loss": 1.6939, "step": 5542 }, { "epoch": 0.30895713728331753, "grad_norm": 0.5325387716293335, "learning_rate": 7.975042371215881e-05, "loss": 1.5085, "step": 5543 }, { "epoch": 0.3090128755364807, "grad_norm": 0.5717397928237915, "learning_rate": 7.974326673274943e-05, "loss": 1.7745, "step": 5544 }, { "epoch": 0.30906861378964384, "grad_norm": 0.5344177484512329, "learning_rate": 7.973610881005702e-05, "loss": 1.6344, "step": 5545 }, { "epoch": 0.30912435204280697, "grad_norm": 0.5647115707397461, "learning_rate": 7.972894994430862e-05, "loss": 1.8173, "step": 5546 }, { "epoch": 0.3091800902959701, "grad_norm": 0.5356699824333191, "learning_rate": 7.972179013573125e-05, "loss": 1.6173, "step": 5547 }, { "epoch": 0.3092358285491333, "grad_norm": 0.5651494860649109, "learning_rate": 7.971462938455199e-05, "loss": 1.5781, "step": 5548 }, { "epoch": 0.3092915668022964, "grad_norm": 0.5726121664047241, "learning_rate": 7.970746769099795e-05, "loss": 1.5528, "step": 5549 }, { "epoch": 0.30934730505545954, "grad_norm": 0.6116449236869812, "learning_rate": 7.970030505529624e-05, "loss": 1.9145, "step": 5550 }, { "epoch": 0.3094030433086227, "grad_norm": 0.5738492012023926, "learning_rate": 7.969314147767399e-05, "loss": 1.7875, "step": 5551 }, { "epoch": 0.30945878156178586, "grad_norm": 0.5894981026649475, "learning_rate": 7.968597695835844e-05, "loss": 1.5879, "step": 5552 }, { "epoch": 0.309514519814949, "grad_norm": 0.5126131772994995, "learning_rate": 7.967881149757678e-05, "loss": 1.6178, "step": 5553 }, { "epoch": 0.30957025806811217, "grad_norm": 0.5616469979286194, "learning_rate": 7.967164509555624e-05, "loss": 1.7701, "step": 5554 }, { "epoch": 0.3096259963212753, "grad_norm": 0.5041468739509583, "learning_rate": 7.966447775252415e-05, "loss": 1.5632, "step": 5555 }, { "epoch": 0.3096817345744384, "grad_norm": 0.5093483328819275, "learning_rate": 7.965730946870775e-05, "loss": 1.7161, "step": 5556 }, { "epoch": 0.30973747282760156, "grad_norm": 0.6104699373245239, "learning_rate": 7.965014024433443e-05, "loss": 1.7959, "step": 5557 }, { "epoch": 0.30979321108076474, "grad_norm": 0.5576456189155579, "learning_rate": 7.964297007963151e-05, "loss": 1.8631, "step": 5558 }, { "epoch": 0.30984894933392787, "grad_norm": 0.5558076500892639, "learning_rate": 7.963579897482642e-05, "loss": 1.7503, "step": 5559 }, { "epoch": 0.309904687587091, "grad_norm": 0.5433835983276367, "learning_rate": 7.96286269301466e-05, "loss": 1.6935, "step": 5560 }, { "epoch": 0.3099604258402542, "grad_norm": 0.5542037487030029, "learning_rate": 7.962145394581944e-05, "loss": 1.7342, "step": 5561 }, { "epoch": 0.3100161640934173, "grad_norm": 0.5680848360061646, "learning_rate": 7.961428002207249e-05, "loss": 1.6875, "step": 5562 }, { "epoch": 0.31007190234658044, "grad_norm": 0.5349116921424866, "learning_rate": 7.960710515913323e-05, "loss": 1.6991, "step": 5563 }, { "epoch": 0.3101276405997436, "grad_norm": 0.5729091167449951, "learning_rate": 7.959992935722924e-05, "loss": 1.8622, "step": 5564 }, { "epoch": 0.31018337885290675, "grad_norm": 0.558594286441803, "learning_rate": 7.959275261658804e-05, "loss": 1.8244, "step": 5565 }, { "epoch": 0.3102391171060699, "grad_norm": 0.5720626711845398, "learning_rate": 7.958557493743728e-05, "loss": 1.796, "step": 5566 }, { "epoch": 0.31029485535923307, "grad_norm": 0.7089996933937073, "learning_rate": 7.957839632000457e-05, "loss": 2.2928, "step": 5567 }, { "epoch": 0.3103505936123962, "grad_norm": 0.51308274269104, "learning_rate": 7.957121676451759e-05, "loss": 1.5466, "step": 5568 }, { "epoch": 0.3104063318655593, "grad_norm": 0.5389419794082642, "learning_rate": 7.956403627120403e-05, "loss": 1.7847, "step": 5569 }, { "epoch": 0.31046207011872246, "grad_norm": 0.5362538695335388, "learning_rate": 7.95568548402916e-05, "loss": 1.752, "step": 5570 }, { "epoch": 0.31051780837188564, "grad_norm": 0.5565882921218872, "learning_rate": 7.954967247200806e-05, "loss": 1.7436, "step": 5571 }, { "epoch": 0.31057354662504877, "grad_norm": 0.5700491070747375, "learning_rate": 7.95424891665812e-05, "loss": 1.3893, "step": 5572 }, { "epoch": 0.3106292848782119, "grad_norm": 0.5634492635726929, "learning_rate": 7.953530492423884e-05, "loss": 1.5228, "step": 5573 }, { "epoch": 0.3106850231313751, "grad_norm": 0.5454849004745483, "learning_rate": 7.95281197452088e-05, "loss": 1.7454, "step": 5574 }, { "epoch": 0.3107407613845382, "grad_norm": 0.5382822751998901, "learning_rate": 7.952093362971897e-05, "loss": 1.6264, "step": 5575 }, { "epoch": 0.31079649963770134, "grad_norm": 0.5650563836097717, "learning_rate": 7.951374657799724e-05, "loss": 1.4175, "step": 5576 }, { "epoch": 0.3108522378908645, "grad_norm": 0.570775032043457, "learning_rate": 7.950655859027154e-05, "loss": 1.6686, "step": 5577 }, { "epoch": 0.31090797614402765, "grad_norm": 0.5498449206352234, "learning_rate": 7.949936966676984e-05, "loss": 1.7351, "step": 5578 }, { "epoch": 0.3109637143971908, "grad_norm": 0.6256487369537354, "learning_rate": 7.949217980772012e-05, "loss": 1.9914, "step": 5579 }, { "epoch": 0.3110194526503539, "grad_norm": 0.6062150001525879, "learning_rate": 7.948498901335042e-05, "loss": 1.9362, "step": 5580 }, { "epoch": 0.3110751909035171, "grad_norm": 0.5351932048797607, "learning_rate": 7.947779728388878e-05, "loss": 1.6922, "step": 5581 }, { "epoch": 0.3111309291566802, "grad_norm": 0.6049745678901672, "learning_rate": 7.947060461956329e-05, "loss": 2.146, "step": 5582 }, { "epoch": 0.31118666740984335, "grad_norm": 0.5465789437294006, "learning_rate": 7.946341102060202e-05, "loss": 1.7858, "step": 5583 }, { "epoch": 0.31124240566300654, "grad_norm": 0.5127213597297668, "learning_rate": 7.945621648723313e-05, "loss": 1.6921, "step": 5584 }, { "epoch": 0.31129814391616967, "grad_norm": 0.5576222538948059, "learning_rate": 7.944902101968482e-05, "loss": 1.7601, "step": 5585 }, { "epoch": 0.3113538821693328, "grad_norm": 0.5145538449287415, "learning_rate": 7.944182461818525e-05, "loss": 1.6861, "step": 5586 }, { "epoch": 0.311409620422496, "grad_norm": 0.5060127973556519, "learning_rate": 7.943462728296266e-05, "loss": 1.4954, "step": 5587 }, { "epoch": 0.3114653586756591, "grad_norm": 0.5226243138313293, "learning_rate": 7.942742901424531e-05, "loss": 1.7086, "step": 5588 }, { "epoch": 0.31152109692882224, "grad_norm": 0.5711196064949036, "learning_rate": 7.942022981226149e-05, "loss": 1.7788, "step": 5589 }, { "epoch": 0.3115768351819854, "grad_norm": 0.511813759803772, "learning_rate": 7.941302967723951e-05, "loss": 1.3316, "step": 5590 }, { "epoch": 0.31163257343514855, "grad_norm": 0.5399052500724792, "learning_rate": 7.940582860940771e-05, "loss": 1.6683, "step": 5591 }, { "epoch": 0.3116883116883117, "grad_norm": 0.5305676460266113, "learning_rate": 7.939862660899448e-05, "loss": 1.7344, "step": 5592 }, { "epoch": 0.3117440499414748, "grad_norm": 0.5254833698272705, "learning_rate": 7.939142367622823e-05, "loss": 1.5524, "step": 5593 }, { "epoch": 0.311799788194638, "grad_norm": 0.5858429074287415, "learning_rate": 7.938421981133738e-05, "loss": 1.7415, "step": 5594 }, { "epoch": 0.3118555264478011, "grad_norm": 0.6082313656806946, "learning_rate": 7.937701501455039e-05, "loss": 1.5333, "step": 5595 }, { "epoch": 0.31191126470096425, "grad_norm": 0.5757048726081848, "learning_rate": 7.936980928609577e-05, "loss": 1.8723, "step": 5596 }, { "epoch": 0.31196700295412744, "grad_norm": 0.6089504957199097, "learning_rate": 7.936260262620205e-05, "loss": 1.8915, "step": 5597 }, { "epoch": 0.31202274120729057, "grad_norm": 0.588326096534729, "learning_rate": 7.935539503509775e-05, "loss": 1.8353, "step": 5598 }, { "epoch": 0.3120784794604537, "grad_norm": 0.5930234789848328, "learning_rate": 7.934818651301148e-05, "loss": 1.832, "step": 5599 }, { "epoch": 0.3121342177136169, "grad_norm": 0.5394973158836365, "learning_rate": 7.934097706017185e-05, "loss": 1.7301, "step": 5600 }, { "epoch": 0.31218995596678, "grad_norm": 0.5147609114646912, "learning_rate": 7.93337666768075e-05, "loss": 1.7095, "step": 5601 }, { "epoch": 0.31224569421994314, "grad_norm": 0.5531661510467529, "learning_rate": 7.932655536314708e-05, "loss": 1.6071, "step": 5602 }, { "epoch": 0.31230143247310627, "grad_norm": 0.5388891696929932, "learning_rate": 7.931934311941933e-05, "loss": 1.5759, "step": 5603 }, { "epoch": 0.31235717072626945, "grad_norm": 0.5236558318138123, "learning_rate": 7.931212994585294e-05, "loss": 1.5492, "step": 5604 }, { "epoch": 0.3124129089794326, "grad_norm": 0.6088682413101196, "learning_rate": 7.93049158426767e-05, "loss": 1.7768, "step": 5605 }, { "epoch": 0.3124686472325957, "grad_norm": 0.5254512429237366, "learning_rate": 7.92977008101194e-05, "loss": 1.6003, "step": 5606 }, { "epoch": 0.3125243854857589, "grad_norm": 0.5747987031936646, "learning_rate": 7.929048484840984e-05, "loss": 1.7666, "step": 5607 }, { "epoch": 0.312580123738922, "grad_norm": 0.5682463645935059, "learning_rate": 7.928326795777688e-05, "loss": 1.7861, "step": 5608 }, { "epoch": 0.31263586199208515, "grad_norm": 0.5339683890342712, "learning_rate": 7.927605013844939e-05, "loss": 1.614, "step": 5609 }, { "epoch": 0.31269160024524834, "grad_norm": 0.5913909673690796, "learning_rate": 7.926883139065627e-05, "loss": 1.7949, "step": 5610 }, { "epoch": 0.31274733849841146, "grad_norm": 0.5656397342681885, "learning_rate": 7.926161171462648e-05, "loss": 1.8147, "step": 5611 }, { "epoch": 0.3128030767515746, "grad_norm": 0.5707045197486877, "learning_rate": 7.925439111058897e-05, "loss": 1.7117, "step": 5612 }, { "epoch": 0.3128588150047378, "grad_norm": 0.5682026743888855, "learning_rate": 7.924716957877275e-05, "loss": 1.6873, "step": 5613 }, { "epoch": 0.3129145532579009, "grad_norm": 0.6239393353462219, "learning_rate": 7.92399471194068e-05, "loss": 2.136, "step": 5614 }, { "epoch": 0.31297029151106404, "grad_norm": 0.5405849814414978, "learning_rate": 7.923272373272024e-05, "loss": 1.7105, "step": 5615 }, { "epoch": 0.31302602976422716, "grad_norm": 0.5093609094619751, "learning_rate": 7.922549941894212e-05, "loss": 1.7117, "step": 5616 }, { "epoch": 0.31308176801739035, "grad_norm": 0.5615028738975525, "learning_rate": 7.921827417830155e-05, "loss": 1.7621, "step": 5617 }, { "epoch": 0.3131375062705535, "grad_norm": 0.5841954946517944, "learning_rate": 7.921104801102766e-05, "loss": 1.7155, "step": 5618 }, { "epoch": 0.3131932445237166, "grad_norm": 0.5684096217155457, "learning_rate": 7.920382091734966e-05, "loss": 1.5615, "step": 5619 }, { "epoch": 0.3132489827768798, "grad_norm": 0.5647116303443909, "learning_rate": 7.919659289749673e-05, "loss": 1.6964, "step": 5620 }, { "epoch": 0.3133047210300429, "grad_norm": 0.5479496121406555, "learning_rate": 7.918936395169809e-05, "loss": 1.6701, "step": 5621 }, { "epoch": 0.31336045928320605, "grad_norm": 0.5465035438537598, "learning_rate": 7.918213408018302e-05, "loss": 1.8372, "step": 5622 }, { "epoch": 0.31341619753636923, "grad_norm": 0.5440232157707214, "learning_rate": 7.91749032831808e-05, "loss": 1.6181, "step": 5623 }, { "epoch": 0.31347193578953236, "grad_norm": 0.5956066846847534, "learning_rate": 7.916767156092073e-05, "loss": 1.8816, "step": 5624 }, { "epoch": 0.3135276740426955, "grad_norm": 0.4970141053199768, "learning_rate": 7.916043891363221e-05, "loss": 1.331, "step": 5625 }, { "epoch": 0.3135834122958586, "grad_norm": 0.5314142107963562, "learning_rate": 7.915320534154457e-05, "loss": 1.7526, "step": 5626 }, { "epoch": 0.3136391505490218, "grad_norm": 0.5765748620033264, "learning_rate": 7.914597084488723e-05, "loss": 1.7204, "step": 5627 }, { "epoch": 0.31369488880218493, "grad_norm": 0.5975958704948425, "learning_rate": 7.913873542388963e-05, "loss": 1.8833, "step": 5628 }, { "epoch": 0.31375062705534806, "grad_norm": 0.5788082480430603, "learning_rate": 7.913149907878123e-05, "loss": 1.9049, "step": 5629 }, { "epoch": 0.31380636530851125, "grad_norm": 0.6019555330276489, "learning_rate": 7.912426180979152e-05, "loss": 2.005, "step": 5630 }, { "epoch": 0.3138621035616744, "grad_norm": 0.5763736963272095, "learning_rate": 7.911702361715006e-05, "loss": 1.7476, "step": 5631 }, { "epoch": 0.3139178418148375, "grad_norm": 0.5758547782897949, "learning_rate": 7.910978450108634e-05, "loss": 1.69, "step": 5632 }, { "epoch": 0.3139735800680007, "grad_norm": 0.5762767791748047, "learning_rate": 7.910254446183e-05, "loss": 1.7354, "step": 5633 }, { "epoch": 0.3140293183211638, "grad_norm": 0.5475091338157654, "learning_rate": 7.909530349961062e-05, "loss": 1.803, "step": 5634 }, { "epoch": 0.31408505657432695, "grad_norm": 0.5797522664070129, "learning_rate": 7.908806161465785e-05, "loss": 1.8425, "step": 5635 }, { "epoch": 0.31414079482749013, "grad_norm": 0.5494913458824158, "learning_rate": 7.908081880720137e-05, "loss": 1.7041, "step": 5636 }, { "epoch": 0.31419653308065326, "grad_norm": 0.5253703594207764, "learning_rate": 7.907357507747087e-05, "loss": 1.5982, "step": 5637 }, { "epoch": 0.3142522713338164, "grad_norm": 0.5663535594940186, "learning_rate": 7.906633042569607e-05, "loss": 1.6506, "step": 5638 }, { "epoch": 0.3143080095869795, "grad_norm": 0.5768305659294128, "learning_rate": 7.905908485210674e-05, "loss": 1.675, "step": 5639 }, { "epoch": 0.3143637478401427, "grad_norm": 0.5730108022689819, "learning_rate": 7.905183835693266e-05, "loss": 1.6702, "step": 5640 }, { "epoch": 0.31441948609330583, "grad_norm": 0.5377948880195618, "learning_rate": 7.904459094040366e-05, "loss": 1.8156, "step": 5641 }, { "epoch": 0.31447522434646896, "grad_norm": 0.5925690531730652, "learning_rate": 7.903734260274958e-05, "loss": 1.8198, "step": 5642 }, { "epoch": 0.31453096259963215, "grad_norm": 0.5221425294876099, "learning_rate": 7.903009334420027e-05, "loss": 1.6291, "step": 5643 }, { "epoch": 0.3145867008527953, "grad_norm": 0.5379535555839539, "learning_rate": 7.902284316498567e-05, "loss": 1.6026, "step": 5644 }, { "epoch": 0.3146424391059584, "grad_norm": 0.5477253198623657, "learning_rate": 7.901559206533571e-05, "loss": 1.9096, "step": 5645 }, { "epoch": 0.3146981773591216, "grad_norm": 0.6306549310684204, "learning_rate": 7.900834004548034e-05, "loss": 1.9637, "step": 5646 }, { "epoch": 0.3147539156122847, "grad_norm": 0.5738115906715393, "learning_rate": 7.900108710564954e-05, "loss": 1.8217, "step": 5647 }, { "epoch": 0.31480965386544785, "grad_norm": 0.5737825036048889, "learning_rate": 7.899383324607336e-05, "loss": 1.7018, "step": 5648 }, { "epoch": 0.314865392118611, "grad_norm": 0.5575332641601562, "learning_rate": 7.898657846698183e-05, "loss": 1.823, "step": 5649 }, { "epoch": 0.31492113037177416, "grad_norm": 0.5665508508682251, "learning_rate": 7.897932276860502e-05, "loss": 1.8531, "step": 5650 }, { "epoch": 0.3149768686249373, "grad_norm": 0.6147223711013794, "learning_rate": 7.897206615117307e-05, "loss": 1.8, "step": 5651 }, { "epoch": 0.3150326068781004, "grad_norm": 0.5605811476707458, "learning_rate": 7.89648086149161e-05, "loss": 1.8554, "step": 5652 }, { "epoch": 0.3150883451312636, "grad_norm": 0.5749962329864502, "learning_rate": 7.895755016006427e-05, "loss": 1.9814, "step": 5653 }, { "epoch": 0.31514408338442673, "grad_norm": 0.6655054688453674, "learning_rate": 7.895029078684779e-05, "loss": 1.6895, "step": 5654 }, { "epoch": 0.31519982163758986, "grad_norm": 0.5131604671478271, "learning_rate": 7.894303049549687e-05, "loss": 1.4731, "step": 5655 }, { "epoch": 0.31525555989075305, "grad_norm": 0.5364745855331421, "learning_rate": 7.893576928624178e-05, "loss": 1.819, "step": 5656 }, { "epoch": 0.3153112981439162, "grad_norm": 0.563586413860321, "learning_rate": 7.89285071593128e-05, "loss": 1.6023, "step": 5657 }, { "epoch": 0.3153670363970793, "grad_norm": 0.5618447065353394, "learning_rate": 7.892124411494022e-05, "loss": 1.5903, "step": 5658 }, { "epoch": 0.3154227746502425, "grad_norm": 0.5073031783103943, "learning_rate": 7.891398015335442e-05, "loss": 1.646, "step": 5659 }, { "epoch": 0.3154785129034056, "grad_norm": 0.5081502795219421, "learning_rate": 7.890671527478574e-05, "loss": 1.3751, "step": 5660 }, { "epoch": 0.31553425115656875, "grad_norm": 0.524069607257843, "learning_rate": 7.88994494794646e-05, "loss": 1.6491, "step": 5661 }, { "epoch": 0.3155899894097319, "grad_norm": 0.5874504446983337, "learning_rate": 7.88921827676214e-05, "loss": 1.5753, "step": 5662 }, { "epoch": 0.31564572766289506, "grad_norm": 0.5709517002105713, "learning_rate": 7.888491513948661e-05, "loss": 1.8023, "step": 5663 }, { "epoch": 0.3157014659160582, "grad_norm": 0.5294995903968811, "learning_rate": 7.887764659529073e-05, "loss": 1.6754, "step": 5664 }, { "epoch": 0.3157572041692213, "grad_norm": 0.5117160677909851, "learning_rate": 7.887037713526428e-05, "loss": 1.6262, "step": 5665 }, { "epoch": 0.3158129424223845, "grad_norm": 0.49994394183158875, "learning_rate": 7.88631067596378e-05, "loss": 1.5649, "step": 5666 }, { "epoch": 0.31586868067554763, "grad_norm": 0.486306756734848, "learning_rate": 7.885583546864184e-05, "loss": 1.4968, "step": 5667 }, { "epoch": 0.31592441892871076, "grad_norm": 0.5242376327514648, "learning_rate": 7.884856326250703e-05, "loss": 1.5559, "step": 5668 }, { "epoch": 0.31598015718187394, "grad_norm": 0.5692494511604309, "learning_rate": 7.884129014146397e-05, "loss": 1.8384, "step": 5669 }, { "epoch": 0.3160358954350371, "grad_norm": 0.5784143209457397, "learning_rate": 7.883401610574336e-05, "loss": 1.9506, "step": 5670 }, { "epoch": 0.3160916336882002, "grad_norm": 0.5659399032592773, "learning_rate": 7.882674115557587e-05, "loss": 1.6864, "step": 5671 }, { "epoch": 0.31614737194136333, "grad_norm": 0.6336827278137207, "learning_rate": 7.881946529119223e-05, "loss": 1.9635, "step": 5672 }, { "epoch": 0.3162031101945265, "grad_norm": 0.5327314734458923, "learning_rate": 7.881218851282317e-05, "loss": 1.5806, "step": 5673 }, { "epoch": 0.31625884844768964, "grad_norm": 0.5700320601463318, "learning_rate": 7.880491082069949e-05, "loss": 1.7419, "step": 5674 }, { "epoch": 0.3163145867008528, "grad_norm": 0.569348156452179, "learning_rate": 7.879763221505197e-05, "loss": 1.7392, "step": 5675 }, { "epoch": 0.31637032495401596, "grad_norm": 0.5255264639854431, "learning_rate": 7.879035269611146e-05, "loss": 1.6862, "step": 5676 }, { "epoch": 0.3164260632071791, "grad_norm": 0.5734140872955322, "learning_rate": 7.878307226410882e-05, "loss": 1.8253, "step": 5677 }, { "epoch": 0.3164818014603422, "grad_norm": 0.5915566086769104, "learning_rate": 7.877579091927496e-05, "loss": 1.7754, "step": 5678 }, { "epoch": 0.3165375397135054, "grad_norm": 0.5272923707962036, "learning_rate": 7.876850866184077e-05, "loss": 1.7315, "step": 5679 }, { "epoch": 0.31659327796666853, "grad_norm": 0.5072640180587769, "learning_rate": 7.876122549203723e-05, "loss": 1.5367, "step": 5680 }, { "epoch": 0.31664901621983166, "grad_norm": 0.5453153848648071, "learning_rate": 7.87539414100953e-05, "loss": 1.7551, "step": 5681 }, { "epoch": 0.31670475447299484, "grad_norm": 0.5492895245552063, "learning_rate": 7.874665641624599e-05, "loss": 1.7739, "step": 5682 }, { "epoch": 0.31676049272615797, "grad_norm": 0.5405164957046509, "learning_rate": 7.873937051072035e-05, "loss": 1.747, "step": 5683 }, { "epoch": 0.3168162309793211, "grad_norm": 0.5549308061599731, "learning_rate": 7.873208369374943e-05, "loss": 1.8224, "step": 5684 }, { "epoch": 0.31687196923248423, "grad_norm": 0.5366522669792175, "learning_rate": 7.872479596556435e-05, "loss": 1.6589, "step": 5685 }, { "epoch": 0.3169277074856474, "grad_norm": 0.527472734451294, "learning_rate": 7.871750732639621e-05, "loss": 1.6122, "step": 5686 }, { "epoch": 0.31698344573881054, "grad_norm": 0.5421255826950073, "learning_rate": 7.871021777647618e-05, "loss": 1.766, "step": 5687 }, { "epoch": 0.31703918399197367, "grad_norm": 0.5596272945404053, "learning_rate": 7.870292731603544e-05, "loss": 1.765, "step": 5688 }, { "epoch": 0.31709492224513686, "grad_norm": 0.5629613995552063, "learning_rate": 7.869563594530517e-05, "loss": 1.6374, "step": 5689 }, { "epoch": 0.3171506604983, "grad_norm": 0.5471567511558533, "learning_rate": 7.868834366451665e-05, "loss": 1.8048, "step": 5690 }, { "epoch": 0.3172063987514631, "grad_norm": 0.6505834460258484, "learning_rate": 7.868105047390113e-05, "loss": 2.1298, "step": 5691 }, { "epoch": 0.3172621370046263, "grad_norm": 0.5665611624717712, "learning_rate": 7.867375637368993e-05, "loss": 1.6, "step": 5692 }, { "epoch": 0.31731787525778943, "grad_norm": 0.5327755212783813, "learning_rate": 7.866646136411433e-05, "loss": 1.7876, "step": 5693 }, { "epoch": 0.31737361351095256, "grad_norm": 0.5993742942810059, "learning_rate": 7.865916544540573e-05, "loss": 1.7237, "step": 5694 }, { "epoch": 0.3174293517641157, "grad_norm": 0.5317041873931885, "learning_rate": 7.865186861779548e-05, "loss": 1.5221, "step": 5695 }, { "epoch": 0.31748509001727887, "grad_norm": 0.5825653076171875, "learning_rate": 7.864457088151502e-05, "loss": 1.7575, "step": 5696 }, { "epoch": 0.317540828270442, "grad_norm": 0.5435444116592407, "learning_rate": 7.863727223679578e-05, "loss": 1.789, "step": 5697 }, { "epoch": 0.31759656652360513, "grad_norm": 0.5559577941894531, "learning_rate": 7.862997268386924e-05, "loss": 1.802, "step": 5698 }, { "epoch": 0.3176523047767683, "grad_norm": 0.6636247634887695, "learning_rate": 7.862267222296687e-05, "loss": 2.0765, "step": 5699 }, { "epoch": 0.31770804302993144, "grad_norm": 0.49671420454978943, "learning_rate": 7.861537085432025e-05, "loss": 1.5644, "step": 5700 }, { "epoch": 0.31776378128309457, "grad_norm": 0.5270445942878723, "learning_rate": 7.860806857816088e-05, "loss": 1.7291, "step": 5701 }, { "epoch": 0.31781951953625776, "grad_norm": 0.6097070574760437, "learning_rate": 7.860076539472037e-05, "loss": 1.9244, "step": 5702 }, { "epoch": 0.3178752577894209, "grad_norm": 0.537875235080719, "learning_rate": 7.859346130423035e-05, "loss": 1.7579, "step": 5703 }, { "epoch": 0.317930996042584, "grad_norm": 0.5384728908538818, "learning_rate": 7.858615630692244e-05, "loss": 1.5755, "step": 5704 }, { "epoch": 0.3179867342957472, "grad_norm": 0.5751199722290039, "learning_rate": 7.857885040302833e-05, "loss": 1.6979, "step": 5705 }, { "epoch": 0.3180424725489103, "grad_norm": 0.5749076008796692, "learning_rate": 7.857154359277972e-05, "loss": 1.6744, "step": 5706 }, { "epoch": 0.31809821080207346, "grad_norm": 0.5693714022636414, "learning_rate": 7.85642358764083e-05, "loss": 1.8986, "step": 5707 }, { "epoch": 0.3181539490552366, "grad_norm": 0.504147469997406, "learning_rate": 7.855692725414587e-05, "loss": 1.5641, "step": 5708 }, { "epoch": 0.31820968730839977, "grad_norm": 0.5494616031646729, "learning_rate": 7.854961772622423e-05, "loss": 1.6743, "step": 5709 }, { "epoch": 0.3182654255615629, "grad_norm": 0.49635690450668335, "learning_rate": 7.854230729287515e-05, "loss": 1.5466, "step": 5710 }, { "epoch": 0.318321163814726, "grad_norm": 0.569781482219696, "learning_rate": 7.853499595433049e-05, "loss": 1.7647, "step": 5711 }, { "epoch": 0.3183769020678892, "grad_norm": 0.540679931640625, "learning_rate": 7.852768371082215e-05, "loss": 1.6237, "step": 5712 }, { "epoch": 0.31843264032105234, "grad_norm": 0.5818458795547485, "learning_rate": 7.852037056258199e-05, "loss": 1.9955, "step": 5713 }, { "epoch": 0.31848837857421547, "grad_norm": 0.5366159081459045, "learning_rate": 7.851305650984197e-05, "loss": 1.5985, "step": 5714 }, { "epoch": 0.31854411682737865, "grad_norm": 0.7078673839569092, "learning_rate": 7.850574155283404e-05, "loss": 1.6371, "step": 5715 }, { "epoch": 0.3185998550805418, "grad_norm": 0.6395692825317383, "learning_rate": 7.849842569179017e-05, "loss": 2.0647, "step": 5716 }, { "epoch": 0.3186555933337049, "grad_norm": 0.5583460927009583, "learning_rate": 7.849110892694242e-05, "loss": 1.8005, "step": 5717 }, { "epoch": 0.31871133158686804, "grad_norm": 0.6016951203346252, "learning_rate": 7.848379125852282e-05, "loss": 1.9861, "step": 5718 }, { "epoch": 0.3187670698400312, "grad_norm": 0.5291598439216614, "learning_rate": 7.847647268676341e-05, "loss": 1.6806, "step": 5719 }, { "epoch": 0.31882280809319435, "grad_norm": 0.5864149332046509, "learning_rate": 7.846915321189632e-05, "loss": 1.7323, "step": 5720 }, { "epoch": 0.3188785463463575, "grad_norm": 0.5477664470672607, "learning_rate": 7.846183283415367e-05, "loss": 1.7307, "step": 5721 }, { "epoch": 0.31893428459952067, "grad_norm": 0.5449158549308777, "learning_rate": 7.845451155376764e-05, "loss": 1.679, "step": 5722 }, { "epoch": 0.3189900228526838, "grad_norm": 0.5383809804916382, "learning_rate": 7.844718937097039e-05, "loss": 1.6991, "step": 5723 }, { "epoch": 0.3190457611058469, "grad_norm": 0.4735757112503052, "learning_rate": 7.843986628599416e-05, "loss": 1.4701, "step": 5724 }, { "epoch": 0.3191014993590101, "grad_norm": 0.5248317122459412, "learning_rate": 7.843254229907119e-05, "loss": 1.7293, "step": 5725 }, { "epoch": 0.31915723761217324, "grad_norm": 0.5262721180915833, "learning_rate": 7.842521741043375e-05, "loss": 1.6067, "step": 5726 }, { "epoch": 0.31921297586533637, "grad_norm": 0.5584807991981506, "learning_rate": 7.841789162031415e-05, "loss": 1.8573, "step": 5727 }, { "epoch": 0.31926871411849955, "grad_norm": 0.5617311596870422, "learning_rate": 7.84105649289447e-05, "loss": 1.7482, "step": 5728 }, { "epoch": 0.3193244523716627, "grad_norm": 0.5431827902793884, "learning_rate": 7.840323733655778e-05, "loss": 1.8564, "step": 5729 }, { "epoch": 0.3193801906248258, "grad_norm": 0.5269571542739868, "learning_rate": 7.839590884338579e-05, "loss": 1.4677, "step": 5730 }, { "epoch": 0.31943592887798894, "grad_norm": 0.5726506114006042, "learning_rate": 7.838857944966113e-05, "loss": 1.7656, "step": 5731 }, { "epoch": 0.3194916671311521, "grad_norm": 0.5350455641746521, "learning_rate": 7.838124915561623e-05, "loss": 1.525, "step": 5732 }, { "epoch": 0.31954740538431525, "grad_norm": 0.6093659996986389, "learning_rate": 7.837391796148359e-05, "loss": 1.9737, "step": 5733 }, { "epoch": 0.3196031436374784, "grad_norm": 0.5513406991958618, "learning_rate": 7.83665858674957e-05, "loss": 1.6783, "step": 5734 }, { "epoch": 0.31965888189064157, "grad_norm": 0.5465078949928284, "learning_rate": 7.835925287388511e-05, "loss": 1.5786, "step": 5735 }, { "epoch": 0.3197146201438047, "grad_norm": 0.5756266713142395, "learning_rate": 7.835191898088435e-05, "loss": 1.7969, "step": 5736 }, { "epoch": 0.3197703583969678, "grad_norm": 0.5218703150749207, "learning_rate": 7.8344584188726e-05, "loss": 1.619, "step": 5737 }, { "epoch": 0.319826096650131, "grad_norm": 0.5465853810310364, "learning_rate": 7.833724849764273e-05, "loss": 1.6193, "step": 5738 }, { "epoch": 0.31988183490329414, "grad_norm": 0.596364438533783, "learning_rate": 7.832991190786716e-05, "loss": 1.7853, "step": 5739 }, { "epoch": 0.31993757315645727, "grad_norm": 0.544185221195221, "learning_rate": 7.832257441963195e-05, "loss": 1.8835, "step": 5740 }, { "epoch": 0.3199933114096204, "grad_norm": 0.6070075631141663, "learning_rate": 7.83152360331698e-05, "loss": 2.1082, "step": 5741 }, { "epoch": 0.3200490496627836, "grad_norm": 0.5382431745529175, "learning_rate": 7.830789674871346e-05, "loss": 1.7184, "step": 5742 }, { "epoch": 0.3201047879159467, "grad_norm": 0.5074361562728882, "learning_rate": 7.830055656649568e-05, "loss": 1.5133, "step": 5743 }, { "epoch": 0.32016052616910984, "grad_norm": 0.5396546125411987, "learning_rate": 7.829321548674926e-05, "loss": 1.6203, "step": 5744 }, { "epoch": 0.320216264422273, "grad_norm": 0.5758295059204102, "learning_rate": 7.8285873509707e-05, "loss": 1.8658, "step": 5745 }, { "epoch": 0.32027200267543615, "grad_norm": 0.506420910358429, "learning_rate": 7.827853063560175e-05, "loss": 1.509, "step": 5746 }, { "epoch": 0.3203277409285993, "grad_norm": 0.5390977263450623, "learning_rate": 7.82711868646664e-05, "loss": 1.8333, "step": 5747 }, { "epoch": 0.32038347918176246, "grad_norm": 0.5680609345436096, "learning_rate": 7.82638421971338e-05, "loss": 1.6984, "step": 5748 }, { "epoch": 0.3204392174349256, "grad_norm": 0.5344312191009521, "learning_rate": 7.825649663323693e-05, "loss": 1.6667, "step": 5749 }, { "epoch": 0.3204949556880887, "grad_norm": 0.610658586025238, "learning_rate": 7.824915017320874e-05, "loss": 1.7763, "step": 5750 }, { "epoch": 0.3205506939412519, "grad_norm": 0.5463300943374634, "learning_rate": 7.824180281728222e-05, "loss": 1.5632, "step": 5751 }, { "epoch": 0.32060643219441504, "grad_norm": 0.5856190919876099, "learning_rate": 7.823445456569036e-05, "loss": 1.8129, "step": 5752 }, { "epoch": 0.32066217044757817, "grad_norm": 0.7068459987640381, "learning_rate": 7.822710541866622e-05, "loss": 1.8126, "step": 5753 }, { "epoch": 0.3207179087007413, "grad_norm": 0.6159639954566956, "learning_rate": 7.821975537644286e-05, "loss": 1.7802, "step": 5754 }, { "epoch": 0.3207736469539045, "grad_norm": 0.583821177482605, "learning_rate": 7.821240443925341e-05, "loss": 1.9406, "step": 5755 }, { "epoch": 0.3208293852070676, "grad_norm": 0.49633607268333435, "learning_rate": 7.820505260733098e-05, "loss": 1.4748, "step": 5756 }, { "epoch": 0.32088512346023074, "grad_norm": 0.5159478187561035, "learning_rate": 7.819769988090873e-05, "loss": 1.716, "step": 5757 }, { "epoch": 0.3209408617133939, "grad_norm": 0.5665544867515564, "learning_rate": 7.819034626021983e-05, "loss": 1.8005, "step": 5758 }, { "epoch": 0.32099659996655705, "grad_norm": 0.567043125629425, "learning_rate": 7.818299174549752e-05, "loss": 1.675, "step": 5759 }, { "epoch": 0.3210523382197202, "grad_norm": 0.5980729460716248, "learning_rate": 7.817563633697503e-05, "loss": 1.9635, "step": 5760 }, { "epoch": 0.32110807647288336, "grad_norm": 0.5714271068572998, "learning_rate": 7.816828003488563e-05, "loss": 1.7265, "step": 5761 }, { "epoch": 0.3211638147260465, "grad_norm": 0.5386238694190979, "learning_rate": 7.816092283946261e-05, "loss": 1.6653, "step": 5762 }, { "epoch": 0.3212195529792096, "grad_norm": 0.5798346400260925, "learning_rate": 7.815356475093931e-05, "loss": 1.6578, "step": 5763 }, { "epoch": 0.32127529123237275, "grad_norm": 0.5155278444290161, "learning_rate": 7.81462057695491e-05, "loss": 1.787, "step": 5764 }, { "epoch": 0.32133102948553594, "grad_norm": 0.49146315455436707, "learning_rate": 7.813884589552534e-05, "loss": 1.5927, "step": 5765 }, { "epoch": 0.32138676773869906, "grad_norm": 0.553433895111084, "learning_rate": 7.813148512910144e-05, "loss": 1.7973, "step": 5766 }, { "epoch": 0.3214425059918622, "grad_norm": 0.5665645003318787, "learning_rate": 7.812412347051083e-05, "loss": 1.7949, "step": 5767 }, { "epoch": 0.3214982442450254, "grad_norm": 0.5180385708808899, "learning_rate": 7.811676091998704e-05, "loss": 1.7011, "step": 5768 }, { "epoch": 0.3215539824981885, "grad_norm": 0.581295371055603, "learning_rate": 7.81093974777635e-05, "loss": 1.7513, "step": 5769 }, { "epoch": 0.32160972075135164, "grad_norm": 0.5677274465560913, "learning_rate": 7.810203314407377e-05, "loss": 1.9528, "step": 5770 }, { "epoch": 0.3216654590045148, "grad_norm": 0.5377728939056396, "learning_rate": 7.80946679191514e-05, "loss": 1.6544, "step": 5771 }, { "epoch": 0.32172119725767795, "grad_norm": 0.533319354057312, "learning_rate": 7.808730180322996e-05, "loss": 1.6561, "step": 5772 }, { "epoch": 0.3217769355108411, "grad_norm": 0.5324406623840332, "learning_rate": 7.807993479654307e-05, "loss": 1.6776, "step": 5773 }, { "epoch": 0.32183267376400426, "grad_norm": 0.5995755195617676, "learning_rate": 7.807256689932435e-05, "loss": 1.6976, "step": 5774 }, { "epoch": 0.3218884120171674, "grad_norm": 0.5474086999893188, "learning_rate": 7.806519811180751e-05, "loss": 1.4983, "step": 5775 }, { "epoch": 0.3219441502703305, "grad_norm": 0.5364895462989807, "learning_rate": 7.805782843422618e-05, "loss": 1.7632, "step": 5776 }, { "epoch": 0.32199988852349365, "grad_norm": 0.5104418396949768, "learning_rate": 7.805045786681415e-05, "loss": 1.6873, "step": 5777 }, { "epoch": 0.32205562677665683, "grad_norm": 0.5162766575813293, "learning_rate": 7.804308640980513e-05, "loss": 1.6692, "step": 5778 }, { "epoch": 0.32211136502981996, "grad_norm": 0.5526577234268188, "learning_rate": 7.803571406343293e-05, "loss": 1.631, "step": 5779 }, { "epoch": 0.3221671032829831, "grad_norm": 0.4954930245876312, "learning_rate": 7.802834082793131e-05, "loss": 1.4774, "step": 5780 }, { "epoch": 0.3222228415361463, "grad_norm": 0.5704354643821716, "learning_rate": 7.802096670353416e-05, "loss": 1.9247, "step": 5781 }, { "epoch": 0.3222785797893094, "grad_norm": 0.5746217966079712, "learning_rate": 7.80135916904753e-05, "loss": 1.9075, "step": 5782 }, { "epoch": 0.32233431804247253, "grad_norm": 0.5538354516029358, "learning_rate": 7.800621578898867e-05, "loss": 1.6338, "step": 5783 }, { "epoch": 0.3223900562956357, "grad_norm": 0.5441854596138, "learning_rate": 7.799883899930815e-05, "loss": 1.6214, "step": 5784 }, { "epoch": 0.32244579454879885, "grad_norm": 0.5677271485328674, "learning_rate": 7.79914613216677e-05, "loss": 1.7258, "step": 5785 }, { "epoch": 0.322501532801962, "grad_norm": 0.5610553026199341, "learning_rate": 7.798408275630129e-05, "loss": 1.6471, "step": 5786 }, { "epoch": 0.3225572710551251, "grad_norm": 0.5126567482948303, "learning_rate": 7.797670330344294e-05, "loss": 1.7154, "step": 5787 }, { "epoch": 0.3226130093082883, "grad_norm": 0.565370500087738, "learning_rate": 7.796932296332667e-05, "loss": 1.7534, "step": 5788 }, { "epoch": 0.3226687475614514, "grad_norm": 0.5113086104393005, "learning_rate": 7.796194173618654e-05, "loss": 1.5581, "step": 5789 }, { "epoch": 0.32272448581461455, "grad_norm": 0.543984591960907, "learning_rate": 7.795455962225669e-05, "loss": 1.7255, "step": 5790 }, { "epoch": 0.32278022406777773, "grad_norm": 0.5158193707466125, "learning_rate": 7.794717662177115e-05, "loss": 1.6029, "step": 5791 }, { "epoch": 0.32283596232094086, "grad_norm": 0.5405291318893433, "learning_rate": 7.793979273496414e-05, "loss": 1.6035, "step": 5792 }, { "epoch": 0.322891700574104, "grad_norm": 0.617701530456543, "learning_rate": 7.793240796206979e-05, "loss": 1.8577, "step": 5793 }, { "epoch": 0.3229474388272672, "grad_norm": 0.4910410940647125, "learning_rate": 7.79250223033223e-05, "loss": 1.4227, "step": 5794 }, { "epoch": 0.3230031770804303, "grad_norm": 0.5436237454414368, "learning_rate": 7.791763575895594e-05, "loss": 1.5865, "step": 5795 }, { "epoch": 0.32305891533359343, "grad_norm": 0.5777418613433838, "learning_rate": 7.791024832920496e-05, "loss": 1.8056, "step": 5796 }, { "epoch": 0.3231146535867566, "grad_norm": 0.5960043668746948, "learning_rate": 7.79028600143036e-05, "loss": 1.8124, "step": 5797 }, { "epoch": 0.32317039183991975, "grad_norm": 0.5568564534187317, "learning_rate": 7.789547081448622e-05, "loss": 1.614, "step": 5798 }, { "epoch": 0.3232261300930829, "grad_norm": 0.5896525979042053, "learning_rate": 7.788808072998715e-05, "loss": 1.784, "step": 5799 }, { "epoch": 0.323281868346246, "grad_norm": 0.5450705885887146, "learning_rate": 7.788068976104074e-05, "loss": 1.462, "step": 5800 }, { "epoch": 0.3233376065994092, "grad_norm": 0.4870886206626892, "learning_rate": 7.787329790788142e-05, "loss": 1.5523, "step": 5801 }, { "epoch": 0.3233933448525723, "grad_norm": 0.5481093525886536, "learning_rate": 7.78659051707436e-05, "loss": 1.6292, "step": 5802 }, { "epoch": 0.32344908310573545, "grad_norm": 0.5144929885864258, "learning_rate": 7.785851154986174e-05, "loss": 1.4811, "step": 5803 }, { "epoch": 0.32350482135889863, "grad_norm": 0.5884720683097839, "learning_rate": 7.785111704547032e-05, "loss": 1.8426, "step": 5804 }, { "epoch": 0.32356055961206176, "grad_norm": 0.5478202104568481, "learning_rate": 7.784372165780386e-05, "loss": 1.4918, "step": 5805 }, { "epoch": 0.3236162978652249, "grad_norm": 0.5706868767738342, "learning_rate": 7.783632538709688e-05, "loss": 1.6687, "step": 5806 }, { "epoch": 0.3236720361183881, "grad_norm": 0.569288432598114, "learning_rate": 7.782892823358394e-05, "loss": 1.7208, "step": 5807 }, { "epoch": 0.3237277743715512, "grad_norm": 0.6056145429611206, "learning_rate": 7.782153019749967e-05, "loss": 1.9566, "step": 5808 }, { "epoch": 0.32378351262471433, "grad_norm": 0.5828245878219604, "learning_rate": 7.781413127907868e-05, "loss": 1.7169, "step": 5809 }, { "epoch": 0.32383925087787746, "grad_norm": 0.5503557920455933, "learning_rate": 7.780673147855559e-05, "loss": 1.7084, "step": 5810 }, { "epoch": 0.32389498913104064, "grad_norm": 0.5861828327178955, "learning_rate": 7.779933079616512e-05, "loss": 1.6815, "step": 5811 }, { "epoch": 0.3239507273842038, "grad_norm": 0.5410308837890625, "learning_rate": 7.779192923214196e-05, "loss": 1.6899, "step": 5812 }, { "epoch": 0.3240064656373669, "grad_norm": 0.6349414587020874, "learning_rate": 7.778452678672084e-05, "loss": 2.0061, "step": 5813 }, { "epoch": 0.3240622038905301, "grad_norm": 0.6143296360969543, "learning_rate": 7.777712346013651e-05, "loss": 1.6939, "step": 5814 }, { "epoch": 0.3241179421436932, "grad_norm": 0.5646039247512817, "learning_rate": 7.776971925262379e-05, "loss": 1.4296, "step": 5815 }, { "epoch": 0.32417368039685635, "grad_norm": 0.570025622844696, "learning_rate": 7.776231416441748e-05, "loss": 1.8693, "step": 5816 }, { "epoch": 0.32422941865001953, "grad_norm": 0.4873752295970917, "learning_rate": 7.775490819575242e-05, "loss": 1.5215, "step": 5817 }, { "epoch": 0.32428515690318266, "grad_norm": 0.5546776652336121, "learning_rate": 7.774750134686352e-05, "loss": 1.6002, "step": 5818 }, { "epoch": 0.3243408951563458, "grad_norm": 0.5605872273445129, "learning_rate": 7.774009361798565e-05, "loss": 1.42, "step": 5819 }, { "epoch": 0.32439663340950897, "grad_norm": 0.5118110179901123, "learning_rate": 7.773268500935372e-05, "loss": 1.6076, "step": 5820 }, { "epoch": 0.3244523716626721, "grad_norm": 0.5516108274459839, "learning_rate": 7.772527552120273e-05, "loss": 1.6444, "step": 5821 }, { "epoch": 0.32450810991583523, "grad_norm": 0.5176465511322021, "learning_rate": 7.771786515376765e-05, "loss": 1.3809, "step": 5822 }, { "epoch": 0.32456384816899836, "grad_norm": 0.5901971459388733, "learning_rate": 7.77104539072835e-05, "loss": 1.8976, "step": 5823 }, { "epoch": 0.32461958642216154, "grad_norm": 0.5981687903404236, "learning_rate": 7.770304178198531e-05, "loss": 1.7352, "step": 5824 }, { "epoch": 0.3246753246753247, "grad_norm": 0.48600277304649353, "learning_rate": 7.769562877810816e-05, "loss": 1.5827, "step": 5825 }, { "epoch": 0.3247310629284878, "grad_norm": 0.47773730754852295, "learning_rate": 7.768821489588713e-05, "loss": 1.44, "step": 5826 }, { "epoch": 0.324786801181651, "grad_norm": 0.5615780353546143, "learning_rate": 7.768080013555737e-05, "loss": 1.6719, "step": 5827 }, { "epoch": 0.3248425394348141, "grad_norm": 0.5451145172119141, "learning_rate": 7.767338449735401e-05, "loss": 1.355, "step": 5828 }, { "epoch": 0.32489827768797724, "grad_norm": 0.5609704852104187, "learning_rate": 7.766596798151224e-05, "loss": 1.6764, "step": 5829 }, { "epoch": 0.32495401594114043, "grad_norm": 0.5926015973091125, "learning_rate": 7.765855058826727e-05, "loss": 1.8243, "step": 5830 }, { "epoch": 0.32500975419430356, "grad_norm": 0.5234283804893494, "learning_rate": 7.765113231785435e-05, "loss": 1.7313, "step": 5831 }, { "epoch": 0.3250654924474667, "grad_norm": 0.5433173179626465, "learning_rate": 7.764371317050873e-05, "loss": 1.7546, "step": 5832 }, { "epoch": 0.3251212307006298, "grad_norm": 0.6074669361114502, "learning_rate": 7.763629314646568e-05, "loss": 1.7879, "step": 5833 }, { "epoch": 0.325176968953793, "grad_norm": 0.6136168241500854, "learning_rate": 7.762887224596055e-05, "loss": 1.8066, "step": 5834 }, { "epoch": 0.32523270720695613, "grad_norm": 0.5498754978179932, "learning_rate": 7.76214504692287e-05, "loss": 1.6913, "step": 5835 }, { "epoch": 0.32528844546011926, "grad_norm": 0.5876418352127075, "learning_rate": 7.761402781650547e-05, "loss": 1.7581, "step": 5836 }, { "epoch": 0.32534418371328244, "grad_norm": 0.5235028862953186, "learning_rate": 7.760660428802628e-05, "loss": 1.5955, "step": 5837 }, { "epoch": 0.32539992196644557, "grad_norm": 0.54973304271698, "learning_rate": 7.759917988402657e-05, "loss": 1.6833, "step": 5838 }, { "epoch": 0.3254556602196087, "grad_norm": 0.6082160472869873, "learning_rate": 7.759175460474177e-05, "loss": 1.8303, "step": 5839 }, { "epoch": 0.3255113984727719, "grad_norm": 0.5204039812088013, "learning_rate": 7.758432845040737e-05, "loss": 1.7216, "step": 5840 }, { "epoch": 0.325567136725935, "grad_norm": 0.5268458724021912, "learning_rate": 7.757690142125893e-05, "loss": 1.6099, "step": 5841 }, { "epoch": 0.32562287497909814, "grad_norm": 0.5118129253387451, "learning_rate": 7.756947351753196e-05, "loss": 1.5388, "step": 5842 }, { "epoch": 0.3256786132322613, "grad_norm": 0.5349292159080505, "learning_rate": 7.756204473946203e-05, "loss": 1.6813, "step": 5843 }, { "epoch": 0.32573435148542446, "grad_norm": 0.5555446743965149, "learning_rate": 7.755461508728472e-05, "loss": 1.5549, "step": 5844 }, { "epoch": 0.3257900897385876, "grad_norm": 0.5379804372787476, "learning_rate": 7.75471845612357e-05, "loss": 1.5658, "step": 5845 }, { "epoch": 0.3258458279917507, "grad_norm": 0.618511974811554, "learning_rate": 7.753975316155057e-05, "loss": 1.8505, "step": 5846 }, { "epoch": 0.3259015662449139, "grad_norm": 0.6143367290496826, "learning_rate": 7.753232088846505e-05, "loss": 1.953, "step": 5847 }, { "epoch": 0.325957304498077, "grad_norm": 0.543201208114624, "learning_rate": 7.752488774221485e-05, "loss": 1.9068, "step": 5848 }, { "epoch": 0.32601304275124016, "grad_norm": 0.5580254197120667, "learning_rate": 7.751745372303567e-05, "loss": 1.6766, "step": 5849 }, { "epoch": 0.32606878100440334, "grad_norm": 0.5846728086471558, "learning_rate": 7.751001883116331e-05, "loss": 1.874, "step": 5850 }, { "epoch": 0.32612451925756647, "grad_norm": 0.5597751140594482, "learning_rate": 7.750258306683353e-05, "loss": 1.7491, "step": 5851 }, { "epoch": 0.3261802575107296, "grad_norm": 0.49921393394470215, "learning_rate": 7.749514643028218e-05, "loss": 1.3701, "step": 5852 }, { "epoch": 0.3262359957638928, "grad_norm": 0.5255808234214783, "learning_rate": 7.748770892174509e-05, "loss": 1.4772, "step": 5853 }, { "epoch": 0.3262917340170559, "grad_norm": 0.5470353960990906, "learning_rate": 7.748027054145814e-05, "loss": 1.7885, "step": 5854 }, { "epoch": 0.32634747227021904, "grad_norm": 0.575181782245636, "learning_rate": 7.747283128965723e-05, "loss": 1.8875, "step": 5855 }, { "epoch": 0.32640321052338217, "grad_norm": 0.6346047520637512, "learning_rate": 7.74653911665783e-05, "loss": 2.0948, "step": 5856 }, { "epoch": 0.32645894877654535, "grad_norm": 0.5814865231513977, "learning_rate": 7.745795017245729e-05, "loss": 1.572, "step": 5857 }, { "epoch": 0.3265146870297085, "grad_norm": 0.5990648865699768, "learning_rate": 7.745050830753018e-05, "loss": 1.7464, "step": 5858 }, { "epoch": 0.3265704252828716, "grad_norm": 0.5689359903335571, "learning_rate": 7.744306557203299e-05, "loss": 1.9168, "step": 5859 }, { "epoch": 0.3266261635360348, "grad_norm": 0.5398204326629639, "learning_rate": 7.743562196620177e-05, "loss": 1.6884, "step": 5860 }, { "epoch": 0.3266819017891979, "grad_norm": 0.5738016366958618, "learning_rate": 7.74281774902726e-05, "loss": 1.815, "step": 5861 }, { "epoch": 0.32673764004236105, "grad_norm": 0.5424049496650696, "learning_rate": 7.742073214448153e-05, "loss": 1.832, "step": 5862 }, { "epoch": 0.32679337829552424, "grad_norm": 0.5409512519836426, "learning_rate": 7.741328592906474e-05, "loss": 1.7179, "step": 5863 }, { "epoch": 0.32684911654868737, "grad_norm": 0.5621674656867981, "learning_rate": 7.740583884425833e-05, "loss": 1.8319, "step": 5864 }, { "epoch": 0.3269048548018505, "grad_norm": 0.5400972962379456, "learning_rate": 7.73983908902985e-05, "loss": 1.6868, "step": 5865 }, { "epoch": 0.3269605930550137, "grad_norm": 0.5927982926368713, "learning_rate": 7.739094206742146e-05, "loss": 1.6426, "step": 5866 }, { "epoch": 0.3270163313081768, "grad_norm": 0.510775089263916, "learning_rate": 7.738349237586343e-05, "loss": 1.6661, "step": 5867 }, { "epoch": 0.32707206956133994, "grad_norm": 0.5710152387619019, "learning_rate": 7.737604181586068e-05, "loss": 1.7263, "step": 5868 }, { "epoch": 0.32712780781450307, "grad_norm": 0.5645250082015991, "learning_rate": 7.736859038764952e-05, "loss": 1.7197, "step": 5869 }, { "epoch": 0.32718354606766625, "grad_norm": 0.5439823865890503, "learning_rate": 7.73611380914662e-05, "loss": 1.7229, "step": 5870 }, { "epoch": 0.3272392843208294, "grad_norm": 0.5163010358810425, "learning_rate": 7.735368492754715e-05, "loss": 1.5273, "step": 5871 }, { "epoch": 0.3272950225739925, "grad_norm": 0.5735363960266113, "learning_rate": 7.734623089612867e-05, "loss": 1.7926, "step": 5872 }, { "epoch": 0.3273507608271557, "grad_norm": 0.5508522391319275, "learning_rate": 7.73387759974472e-05, "loss": 1.492, "step": 5873 }, { "epoch": 0.3274064990803188, "grad_norm": 0.6105926632881165, "learning_rate": 7.733132023173915e-05, "loss": 1.6155, "step": 5874 }, { "epoch": 0.32746223733348195, "grad_norm": 0.5956704020500183, "learning_rate": 7.732386359924097e-05, "loss": 1.7757, "step": 5875 }, { "epoch": 0.32751797558664514, "grad_norm": 0.6001446843147278, "learning_rate": 7.731640610018914e-05, "loss": 1.6669, "step": 5876 }, { "epoch": 0.32757371383980827, "grad_norm": 0.6132667660713196, "learning_rate": 7.730894773482019e-05, "loss": 1.944, "step": 5877 }, { "epoch": 0.3276294520929714, "grad_norm": 0.5684986710548401, "learning_rate": 7.730148850337062e-05, "loss": 1.7491, "step": 5878 }, { "epoch": 0.3276851903461345, "grad_norm": 0.537605881690979, "learning_rate": 7.729402840607702e-05, "loss": 1.7473, "step": 5879 }, { "epoch": 0.3277409285992977, "grad_norm": 0.5186078548431396, "learning_rate": 7.728656744317598e-05, "loss": 1.7703, "step": 5880 }, { "epoch": 0.32779666685246084, "grad_norm": 0.5188151001930237, "learning_rate": 7.727910561490411e-05, "loss": 1.6632, "step": 5881 }, { "epoch": 0.32785240510562397, "grad_norm": 0.5799871683120728, "learning_rate": 7.727164292149806e-05, "loss": 1.7289, "step": 5882 }, { "epoch": 0.32790814335878715, "grad_norm": 0.5974400639533997, "learning_rate": 7.72641793631945e-05, "loss": 1.9396, "step": 5883 }, { "epoch": 0.3279638816119503, "grad_norm": 0.5383574366569519, "learning_rate": 7.725671494023014e-05, "loss": 1.6176, "step": 5884 }, { "epoch": 0.3280196198651134, "grad_norm": 0.5623538494110107, "learning_rate": 7.724924965284169e-05, "loss": 1.7997, "step": 5885 }, { "epoch": 0.3280753581182766, "grad_norm": 0.5270793437957764, "learning_rate": 7.72417835012659e-05, "loss": 1.762, "step": 5886 }, { "epoch": 0.3281310963714397, "grad_norm": 0.4922736585140228, "learning_rate": 7.72343164857396e-05, "loss": 1.29, "step": 5887 }, { "epoch": 0.32818683462460285, "grad_norm": 0.5568634867668152, "learning_rate": 7.722684860649953e-05, "loss": 1.8285, "step": 5888 }, { "epoch": 0.32824257287776604, "grad_norm": 0.5732812285423279, "learning_rate": 7.721937986378261e-05, "loss": 1.6134, "step": 5889 }, { "epoch": 0.32829831113092917, "grad_norm": 0.5091588497161865, "learning_rate": 7.721191025782563e-05, "loss": 1.5536, "step": 5890 }, { "epoch": 0.3283540493840923, "grad_norm": 0.5646446347236633, "learning_rate": 7.720443978886551e-05, "loss": 1.6102, "step": 5891 }, { "epoch": 0.3284097876372554, "grad_norm": 0.5230876207351685, "learning_rate": 7.71969684571392e-05, "loss": 1.7258, "step": 5892 }, { "epoch": 0.3284655258904186, "grad_norm": 0.5695227980613708, "learning_rate": 7.718949626288359e-05, "loss": 1.7538, "step": 5893 }, { "epoch": 0.32852126414358174, "grad_norm": 0.5724740028381348, "learning_rate": 7.718202320633572e-05, "loss": 1.5929, "step": 5894 }, { "epoch": 0.32857700239674487, "grad_norm": 0.5088779926300049, "learning_rate": 7.717454928773253e-05, "loss": 1.5781, "step": 5895 }, { "epoch": 0.32863274064990805, "grad_norm": 0.6324506402015686, "learning_rate": 7.716707450731109e-05, "loss": 1.97, "step": 5896 }, { "epoch": 0.3286884789030712, "grad_norm": 0.5300724506378174, "learning_rate": 7.715959886530843e-05, "loss": 1.6759, "step": 5897 }, { "epoch": 0.3287442171562343, "grad_norm": 0.5645179152488708, "learning_rate": 7.715212236196164e-05, "loss": 1.6515, "step": 5898 }, { "epoch": 0.3287999554093975, "grad_norm": 0.575449526309967, "learning_rate": 7.714464499750784e-05, "loss": 1.7267, "step": 5899 }, { "epoch": 0.3288556936625606, "grad_norm": 0.5279715657234192, "learning_rate": 7.713716677218416e-05, "loss": 1.6431, "step": 5900 }, { "epoch": 0.32891143191572375, "grad_norm": 0.5209466814994812, "learning_rate": 7.712968768622779e-05, "loss": 1.5909, "step": 5901 }, { "epoch": 0.3289671701688869, "grad_norm": 0.5469819903373718, "learning_rate": 7.712220773987589e-05, "loss": 1.6273, "step": 5902 }, { "epoch": 0.32902290842205006, "grad_norm": 0.5781688690185547, "learning_rate": 7.71147269333657e-05, "loss": 1.8497, "step": 5903 }, { "epoch": 0.3290786466752132, "grad_norm": 0.5549498200416565, "learning_rate": 7.710724526693445e-05, "loss": 1.6606, "step": 5904 }, { "epoch": 0.3291343849283763, "grad_norm": 0.5616956949234009, "learning_rate": 7.709976274081944e-05, "loss": 1.8094, "step": 5905 }, { "epoch": 0.3291901231815395, "grad_norm": 0.5189547538757324, "learning_rate": 7.709227935525796e-05, "loss": 1.7477, "step": 5906 }, { "epoch": 0.32924586143470264, "grad_norm": 0.5060945749282837, "learning_rate": 7.708479511048732e-05, "loss": 1.4591, "step": 5907 }, { "epoch": 0.32930159968786576, "grad_norm": 0.5463743209838867, "learning_rate": 7.707731000674492e-05, "loss": 1.6762, "step": 5908 }, { "epoch": 0.32935733794102895, "grad_norm": 0.5190552473068237, "learning_rate": 7.70698240442681e-05, "loss": 1.529, "step": 5909 }, { "epoch": 0.3294130761941921, "grad_norm": 0.5391181111335754, "learning_rate": 7.70623372232943e-05, "loss": 1.6953, "step": 5910 }, { "epoch": 0.3294688144473552, "grad_norm": 0.5780003070831299, "learning_rate": 7.705484954406092e-05, "loss": 1.6728, "step": 5911 }, { "epoch": 0.3295245527005184, "grad_norm": 0.554817795753479, "learning_rate": 7.704736100680547e-05, "loss": 1.6731, "step": 5912 }, { "epoch": 0.3295802909536815, "grad_norm": 0.590787410736084, "learning_rate": 7.703987161176545e-05, "loss": 1.9063, "step": 5913 }, { "epoch": 0.32963602920684465, "grad_norm": 0.5418079495429993, "learning_rate": 7.703238135917832e-05, "loss": 1.6984, "step": 5914 }, { "epoch": 0.3296917674600078, "grad_norm": 0.5568365454673767, "learning_rate": 7.702489024928168e-05, "loss": 1.7057, "step": 5915 }, { "epoch": 0.32974750571317096, "grad_norm": 0.5823662281036377, "learning_rate": 7.701739828231309e-05, "loss": 1.8851, "step": 5916 }, { "epoch": 0.3298032439663341, "grad_norm": 0.588046133518219, "learning_rate": 7.700990545851014e-05, "loss": 1.6514, "step": 5917 }, { "epoch": 0.3298589822194972, "grad_norm": 0.5833228826522827, "learning_rate": 7.700241177811048e-05, "loss": 1.7474, "step": 5918 }, { "epoch": 0.3299147204726604, "grad_norm": 0.5376124978065491, "learning_rate": 7.699491724135175e-05, "loss": 1.65, "step": 5919 }, { "epoch": 0.32997045872582353, "grad_norm": 0.579406201839447, "learning_rate": 7.698742184847163e-05, "loss": 1.7039, "step": 5920 }, { "epoch": 0.33002619697898666, "grad_norm": 0.5547471046447754, "learning_rate": 7.697992559970784e-05, "loss": 1.7428, "step": 5921 }, { "epoch": 0.33008193523214985, "grad_norm": 0.5924109816551208, "learning_rate": 7.697242849529812e-05, "loss": 1.7935, "step": 5922 }, { "epoch": 0.330137673485313, "grad_norm": 0.5609079003334045, "learning_rate": 7.69649305354802e-05, "loss": 1.7302, "step": 5923 }, { "epoch": 0.3301934117384761, "grad_norm": 0.5709410309791565, "learning_rate": 7.695743172049192e-05, "loss": 1.6529, "step": 5924 }, { "epoch": 0.33024914999163923, "grad_norm": 0.5341020822525024, "learning_rate": 7.694993205057108e-05, "loss": 1.696, "step": 5925 }, { "epoch": 0.3303048882448024, "grad_norm": 0.5852230787277222, "learning_rate": 7.694243152595552e-05, "loss": 1.6173, "step": 5926 }, { "epoch": 0.33036062649796555, "grad_norm": 0.5338337421417236, "learning_rate": 7.693493014688313e-05, "loss": 1.4818, "step": 5927 }, { "epoch": 0.3304163647511287, "grad_norm": 0.5398749113082886, "learning_rate": 7.69274279135918e-05, "loss": 1.631, "step": 5928 }, { "epoch": 0.33047210300429186, "grad_norm": 0.5520002245903015, "learning_rate": 7.691992482631944e-05, "loss": 1.8426, "step": 5929 }, { "epoch": 0.330527841257455, "grad_norm": 0.5498268008232117, "learning_rate": 7.691242088530401e-05, "loss": 1.8106, "step": 5930 }, { "epoch": 0.3305835795106181, "grad_norm": 0.5437809824943542, "learning_rate": 7.690491609078351e-05, "loss": 1.7523, "step": 5931 }, { "epoch": 0.3306393177637813, "grad_norm": 0.6089059114456177, "learning_rate": 7.689741044299595e-05, "loss": 1.7299, "step": 5932 }, { "epoch": 0.33069505601694443, "grad_norm": 0.5289489030838013, "learning_rate": 7.688990394217933e-05, "loss": 1.691, "step": 5933 }, { "epoch": 0.33075079427010756, "grad_norm": 0.555590033531189, "learning_rate": 7.688239658857174e-05, "loss": 1.45, "step": 5934 }, { "epoch": 0.33080653252327075, "grad_norm": 0.6252313256263733, "learning_rate": 7.687488838241128e-05, "loss": 1.8009, "step": 5935 }, { "epoch": 0.3308622707764339, "grad_norm": 0.5846867561340332, "learning_rate": 7.686737932393605e-05, "loss": 1.7873, "step": 5936 }, { "epoch": 0.330918009029597, "grad_norm": 0.5312223434448242, "learning_rate": 7.685986941338419e-05, "loss": 1.6196, "step": 5937 }, { "epoch": 0.33097374728276013, "grad_norm": 0.5511593222618103, "learning_rate": 7.685235865099387e-05, "loss": 1.7915, "step": 5938 }, { "epoch": 0.3310294855359233, "grad_norm": 0.5287107825279236, "learning_rate": 7.684484703700332e-05, "loss": 1.6648, "step": 5939 }, { "epoch": 0.33108522378908645, "grad_norm": 0.5697956681251526, "learning_rate": 7.683733457165071e-05, "loss": 2.0054, "step": 5940 }, { "epoch": 0.3311409620422496, "grad_norm": 0.5331019759178162, "learning_rate": 7.682982125517433e-05, "loss": 1.7598, "step": 5941 }, { "epoch": 0.33119670029541276, "grad_norm": 0.5488009452819824, "learning_rate": 7.682230708781244e-05, "loss": 1.4258, "step": 5942 }, { "epoch": 0.3312524385485759, "grad_norm": 0.5415595173835754, "learning_rate": 7.681479206980338e-05, "loss": 1.766, "step": 5943 }, { "epoch": 0.331308176801739, "grad_norm": 0.6208872199058533, "learning_rate": 7.680727620138542e-05, "loss": 1.879, "step": 5944 }, { "epoch": 0.3313639150549022, "grad_norm": 0.5650165677070618, "learning_rate": 7.679975948279699e-05, "loss": 1.4933, "step": 5945 }, { "epoch": 0.33141965330806533, "grad_norm": 0.5754852890968323, "learning_rate": 7.679224191427642e-05, "loss": 1.6821, "step": 5946 }, { "epoch": 0.33147539156122846, "grad_norm": 0.5749027132987976, "learning_rate": 7.678472349606215e-05, "loss": 1.8599, "step": 5947 }, { "epoch": 0.3315311298143916, "grad_norm": 0.5200157761573792, "learning_rate": 7.677720422839263e-05, "loss": 1.6659, "step": 5948 }, { "epoch": 0.3315868680675548, "grad_norm": 0.6056989431381226, "learning_rate": 7.676968411150629e-05, "loss": 1.9657, "step": 5949 }, { "epoch": 0.3316426063207179, "grad_norm": 0.5650584697723389, "learning_rate": 7.676216314564166e-05, "loss": 1.9396, "step": 5950 }, { "epoch": 0.33169834457388103, "grad_norm": 0.5425543785095215, "learning_rate": 7.675464133103726e-05, "loss": 1.6447, "step": 5951 }, { "epoch": 0.3317540828270442, "grad_norm": 0.5751011967658997, "learning_rate": 7.674711866793163e-05, "loss": 1.7975, "step": 5952 }, { "epoch": 0.33180982108020735, "grad_norm": 0.521195113658905, "learning_rate": 7.673959515656333e-05, "loss": 1.6343, "step": 5953 }, { "epoch": 0.3318655593333705, "grad_norm": 0.5193372964859009, "learning_rate": 7.673207079717098e-05, "loss": 1.7215, "step": 5954 }, { "epoch": 0.33192129758653366, "grad_norm": 0.4974719285964966, "learning_rate": 7.672454558999318e-05, "loss": 1.5058, "step": 5955 }, { "epoch": 0.3319770358396968, "grad_norm": 0.610576868057251, "learning_rate": 7.671701953526863e-05, "loss": 1.8826, "step": 5956 }, { "epoch": 0.3320327740928599, "grad_norm": 0.5185069441795349, "learning_rate": 7.670949263323599e-05, "loss": 1.3823, "step": 5957 }, { "epoch": 0.3320885123460231, "grad_norm": 0.5048871636390686, "learning_rate": 7.670196488413397e-05, "loss": 1.3208, "step": 5958 }, { "epoch": 0.33214425059918623, "grad_norm": 0.512177586555481, "learning_rate": 7.66944362882013e-05, "loss": 1.4293, "step": 5959 }, { "epoch": 0.33219998885234936, "grad_norm": 0.5636778473854065, "learning_rate": 7.668690684567676e-05, "loss": 1.5585, "step": 5960 }, { "epoch": 0.3322557271055125, "grad_norm": 0.5499832630157471, "learning_rate": 7.667937655679913e-05, "loss": 1.5834, "step": 5961 }, { "epoch": 0.3323114653586757, "grad_norm": 0.6139015555381775, "learning_rate": 7.667184542180723e-05, "loss": 2.0935, "step": 5962 }, { "epoch": 0.3323672036118388, "grad_norm": 0.5284989476203918, "learning_rate": 7.666431344093988e-05, "loss": 1.6838, "step": 5963 }, { "epoch": 0.33242294186500193, "grad_norm": 0.5448603630065918, "learning_rate": 7.665678061443599e-05, "loss": 1.6688, "step": 5964 }, { "epoch": 0.3324786801181651, "grad_norm": 0.5356377959251404, "learning_rate": 7.664924694253443e-05, "loss": 1.6131, "step": 5965 }, { "epoch": 0.33253441837132824, "grad_norm": 0.5786362886428833, "learning_rate": 7.664171242547414e-05, "loss": 1.859, "step": 5966 }, { "epoch": 0.3325901566244914, "grad_norm": 0.5811523199081421, "learning_rate": 7.663417706349407e-05, "loss": 1.6848, "step": 5967 }, { "epoch": 0.33264589487765456, "grad_norm": 0.5504920482635498, "learning_rate": 7.662664085683317e-05, "loss": 1.7, "step": 5968 }, { "epoch": 0.3327016331308177, "grad_norm": 0.6110926866531372, "learning_rate": 7.66191038057305e-05, "loss": 1.87, "step": 5969 }, { "epoch": 0.3327573713839808, "grad_norm": 0.5238990187644958, "learning_rate": 7.661156591042502e-05, "loss": 1.6083, "step": 5970 }, { "epoch": 0.33281310963714394, "grad_norm": 0.5919533371925354, "learning_rate": 7.660402717115584e-05, "loss": 1.6786, "step": 5971 }, { "epoch": 0.33286884789030713, "grad_norm": 0.565631091594696, "learning_rate": 7.659648758816205e-05, "loss": 1.595, "step": 5972 }, { "epoch": 0.33292458614347026, "grad_norm": 0.6189529299736023, "learning_rate": 7.658894716168271e-05, "loss": 2.0188, "step": 5973 }, { "epoch": 0.3329803243966334, "grad_norm": 0.5532551407814026, "learning_rate": 7.658140589195701e-05, "loss": 1.6095, "step": 5974 }, { "epoch": 0.33303606264979657, "grad_norm": 0.4914916157722473, "learning_rate": 7.657386377922409e-05, "loss": 1.6199, "step": 5975 }, { "epoch": 0.3330918009029597, "grad_norm": 0.5677047371864319, "learning_rate": 7.656632082372315e-05, "loss": 1.5635, "step": 5976 }, { "epoch": 0.33314753915612283, "grad_norm": 0.5638590455055237, "learning_rate": 7.65587770256934e-05, "loss": 1.7578, "step": 5977 }, { "epoch": 0.333203277409286, "grad_norm": 0.5115950107574463, "learning_rate": 7.655123238537409e-05, "loss": 1.4157, "step": 5978 }, { "epoch": 0.33325901566244914, "grad_norm": 0.6125264763832092, "learning_rate": 7.65436869030045e-05, "loss": 1.8876, "step": 5979 }, { "epoch": 0.33331475391561227, "grad_norm": 0.5354574918746948, "learning_rate": 7.653614057882393e-05, "loss": 1.7052, "step": 5980 }, { "epoch": 0.33337049216877546, "grad_norm": 0.5426600575447083, "learning_rate": 7.652859341307168e-05, "loss": 1.7011, "step": 5981 }, { "epoch": 0.3334262304219386, "grad_norm": 0.7442419528961182, "learning_rate": 7.652104540598712e-05, "loss": 1.7664, "step": 5982 }, { "epoch": 0.3334819686751017, "grad_norm": 0.5431948900222778, "learning_rate": 7.651349655780965e-05, "loss": 1.5627, "step": 5983 }, { "epoch": 0.33353770692826484, "grad_norm": 0.5939268469810486, "learning_rate": 7.650594686877863e-05, "loss": 1.8128, "step": 5984 }, { "epoch": 0.333593445181428, "grad_norm": 0.540123462677002, "learning_rate": 7.649839633913352e-05, "loss": 1.6395, "step": 5985 }, { "epoch": 0.33364918343459116, "grad_norm": 0.5777207016944885, "learning_rate": 7.649084496911378e-05, "loss": 1.7467, "step": 5986 }, { "epoch": 0.3337049216877543, "grad_norm": 0.5720601081848145, "learning_rate": 7.648329275895889e-05, "loss": 1.8314, "step": 5987 }, { "epoch": 0.33376065994091747, "grad_norm": 0.5010839104652405, "learning_rate": 7.647573970890837e-05, "loss": 1.5876, "step": 5988 }, { "epoch": 0.3338163981940806, "grad_norm": 0.5364264249801636, "learning_rate": 7.646818581920173e-05, "loss": 1.6042, "step": 5989 }, { "epoch": 0.33387213644724373, "grad_norm": 0.5355646014213562, "learning_rate": 7.646063109007858e-05, "loss": 1.5054, "step": 5990 }, { "epoch": 0.3339278747004069, "grad_norm": 0.5173195600509644, "learning_rate": 7.645307552177847e-05, "loss": 1.7355, "step": 5991 }, { "epoch": 0.33398361295357004, "grad_norm": 0.5141093134880066, "learning_rate": 7.644551911454103e-05, "loss": 1.5428, "step": 5992 }, { "epoch": 0.33403935120673317, "grad_norm": 0.5739405751228333, "learning_rate": 7.643796186860595e-05, "loss": 1.8064, "step": 5993 }, { "epoch": 0.3340950894598963, "grad_norm": 0.6502695083618164, "learning_rate": 7.643040378421282e-05, "loss": 1.9495, "step": 5994 }, { "epoch": 0.3341508277130595, "grad_norm": 0.5652748942375183, "learning_rate": 7.64228448616014e-05, "loss": 1.6926, "step": 5995 }, { "epoch": 0.3342065659662226, "grad_norm": 0.5500004291534424, "learning_rate": 7.64152851010114e-05, "loss": 1.6566, "step": 5996 }, { "epoch": 0.33426230421938574, "grad_norm": 0.6248365044593811, "learning_rate": 7.640772450268255e-05, "loss": 1.6196, "step": 5997 }, { "epoch": 0.3343180424725489, "grad_norm": 0.5509215593338013, "learning_rate": 7.640016306685467e-05, "loss": 1.6845, "step": 5998 }, { "epoch": 0.33437378072571206, "grad_norm": 0.6251245141029358, "learning_rate": 7.639260079376753e-05, "loss": 1.9948, "step": 5999 }, { "epoch": 0.3344295189788752, "grad_norm": 0.536384642124176, "learning_rate": 7.638503768366098e-05, "loss": 1.6778, "step": 6000 }, { "epoch": 0.33448525723203837, "grad_norm": 0.5998651385307312, "learning_rate": 7.637747373677486e-05, "loss": 1.6279, "step": 6001 }, { "epoch": 0.3345409954852015, "grad_norm": 0.5673259496688843, "learning_rate": 7.636990895334907e-05, "loss": 1.7001, "step": 6002 }, { "epoch": 0.3345967337383646, "grad_norm": 0.5465088486671448, "learning_rate": 7.63623433336235e-05, "loss": 1.7576, "step": 6003 }, { "epoch": 0.3346524719915278, "grad_norm": 0.5544756054878235, "learning_rate": 7.635477687783814e-05, "loss": 1.844, "step": 6004 }, { "epoch": 0.33470821024469094, "grad_norm": 0.5186877846717834, "learning_rate": 7.634720958623287e-05, "loss": 1.6125, "step": 6005 }, { "epoch": 0.33476394849785407, "grad_norm": 0.5501444935798645, "learning_rate": 7.633964145904777e-05, "loss": 1.7169, "step": 6006 }, { "epoch": 0.3348196867510172, "grad_norm": 0.5606530904769897, "learning_rate": 7.633207249652278e-05, "loss": 1.6944, "step": 6007 }, { "epoch": 0.3348754250041804, "grad_norm": 0.49215444922447205, "learning_rate": 7.6324502698898e-05, "loss": 1.4025, "step": 6008 }, { "epoch": 0.3349311632573435, "grad_norm": 0.555610716342926, "learning_rate": 7.631693206641346e-05, "loss": 1.7292, "step": 6009 }, { "epoch": 0.33498690151050664, "grad_norm": 0.5174264907836914, "learning_rate": 7.630936059930927e-05, "loss": 1.5525, "step": 6010 }, { "epoch": 0.3350426397636698, "grad_norm": 0.5901679992675781, "learning_rate": 7.630178829782558e-05, "loss": 1.7284, "step": 6011 }, { "epoch": 0.33509837801683295, "grad_norm": 0.5459769368171692, "learning_rate": 7.629421516220249e-05, "loss": 1.6727, "step": 6012 }, { "epoch": 0.3351541162699961, "grad_norm": 0.5339307188987732, "learning_rate": 7.628664119268023e-05, "loss": 1.7325, "step": 6013 }, { "epoch": 0.33520985452315927, "grad_norm": 0.533289909362793, "learning_rate": 7.627906638949895e-05, "loss": 1.5102, "step": 6014 }, { "epoch": 0.3352655927763224, "grad_norm": 0.5171735286712646, "learning_rate": 7.62714907528989e-05, "loss": 1.5725, "step": 6015 }, { "epoch": 0.3353213310294855, "grad_norm": 0.585667610168457, "learning_rate": 7.626391428312035e-05, "loss": 1.8119, "step": 6016 }, { "epoch": 0.33537706928264865, "grad_norm": 0.504396378993988, "learning_rate": 7.625633698040357e-05, "loss": 1.4209, "step": 6017 }, { "epoch": 0.33543280753581184, "grad_norm": 0.5608323216438293, "learning_rate": 7.624875884498886e-05, "loss": 1.8436, "step": 6018 }, { "epoch": 0.33548854578897497, "grad_norm": 0.5625400543212891, "learning_rate": 7.624117987711656e-05, "loss": 1.836, "step": 6019 }, { "epoch": 0.3355442840421381, "grad_norm": 0.6377468109130859, "learning_rate": 7.623360007702702e-05, "loss": 1.7539, "step": 6020 }, { "epoch": 0.3356000222953013, "grad_norm": 0.556115984916687, "learning_rate": 7.622601944496064e-05, "loss": 1.6686, "step": 6021 }, { "epoch": 0.3356557605484644, "grad_norm": 0.49739575386047363, "learning_rate": 7.621843798115785e-05, "loss": 1.5361, "step": 6022 }, { "epoch": 0.33571149880162754, "grad_norm": 0.5968783497810364, "learning_rate": 7.621085568585905e-05, "loss": 1.8225, "step": 6023 }, { "epoch": 0.3357672370547907, "grad_norm": 0.575768232345581, "learning_rate": 7.620327255930474e-05, "loss": 1.908, "step": 6024 }, { "epoch": 0.33582297530795385, "grad_norm": 0.5628235340118408, "learning_rate": 7.61956886017354e-05, "loss": 1.6388, "step": 6025 }, { "epoch": 0.335878713561117, "grad_norm": 0.5842387676239014, "learning_rate": 7.618810381339155e-05, "loss": 1.8774, "step": 6026 }, { "epoch": 0.33593445181428017, "grad_norm": 0.5307137370109558, "learning_rate": 7.618051819451373e-05, "loss": 1.6372, "step": 6027 }, { "epoch": 0.3359901900674433, "grad_norm": 0.5524066090583801, "learning_rate": 7.617293174534253e-05, "loss": 1.7415, "step": 6028 }, { "epoch": 0.3360459283206064, "grad_norm": 0.5315592885017395, "learning_rate": 7.616534446611851e-05, "loss": 1.6005, "step": 6029 }, { "epoch": 0.33610166657376955, "grad_norm": 0.5379803776741028, "learning_rate": 7.615775635708234e-05, "loss": 1.6998, "step": 6030 }, { "epoch": 0.33615740482693274, "grad_norm": 0.593471884727478, "learning_rate": 7.615016741847463e-05, "loss": 1.6948, "step": 6031 }, { "epoch": 0.33621314308009587, "grad_norm": 0.5759322643280029, "learning_rate": 7.614257765053609e-05, "loss": 1.5575, "step": 6032 }, { "epoch": 0.336268881333259, "grad_norm": 0.5627144575119019, "learning_rate": 7.61349870535074e-05, "loss": 1.7633, "step": 6033 }, { "epoch": 0.3363246195864222, "grad_norm": 0.5872805714607239, "learning_rate": 7.612739562762929e-05, "loss": 1.8196, "step": 6034 }, { "epoch": 0.3363803578395853, "grad_norm": 0.5651592016220093, "learning_rate": 7.611980337314254e-05, "loss": 1.7916, "step": 6035 }, { "epoch": 0.33643609609274844, "grad_norm": 0.5263227820396423, "learning_rate": 7.61122102902879e-05, "loss": 1.6909, "step": 6036 }, { "epoch": 0.3364918343459116, "grad_norm": 0.5474349856376648, "learning_rate": 7.610461637930621e-05, "loss": 1.7166, "step": 6037 }, { "epoch": 0.33654757259907475, "grad_norm": 0.5443328022956848, "learning_rate": 7.609702164043829e-05, "loss": 1.6479, "step": 6038 }, { "epoch": 0.3366033108522379, "grad_norm": 0.5788392424583435, "learning_rate": 7.6089426073925e-05, "loss": 1.7645, "step": 6039 }, { "epoch": 0.336659049105401, "grad_norm": 0.5407717823982239, "learning_rate": 7.608182968000721e-05, "loss": 1.7543, "step": 6040 }, { "epoch": 0.3367147873585642, "grad_norm": 0.5548073649406433, "learning_rate": 7.607423245892586e-05, "loss": 1.6023, "step": 6041 }, { "epoch": 0.3367705256117273, "grad_norm": 0.5452112555503845, "learning_rate": 7.606663441092188e-05, "loss": 1.7298, "step": 6042 }, { "epoch": 0.33682626386489045, "grad_norm": 0.5845810770988464, "learning_rate": 7.605903553623625e-05, "loss": 1.9093, "step": 6043 }, { "epoch": 0.33688200211805364, "grad_norm": 0.5392171740531921, "learning_rate": 7.605143583510991e-05, "loss": 1.7111, "step": 6044 }, { "epoch": 0.33693774037121677, "grad_norm": 0.51267009973526, "learning_rate": 7.604383530778396e-05, "loss": 1.5154, "step": 6045 }, { "epoch": 0.3369934786243799, "grad_norm": 0.5741301774978638, "learning_rate": 7.603623395449937e-05, "loss": 1.7287, "step": 6046 }, { "epoch": 0.3370492168775431, "grad_norm": 0.5356318354606628, "learning_rate": 7.602863177549724e-05, "loss": 1.7299, "step": 6047 }, { "epoch": 0.3371049551307062, "grad_norm": 0.5820077061653137, "learning_rate": 7.602102877101869e-05, "loss": 1.8304, "step": 6048 }, { "epoch": 0.33716069338386934, "grad_norm": 0.5404535531997681, "learning_rate": 7.60134249413048e-05, "loss": 1.5754, "step": 6049 }, { "epoch": 0.3372164316370325, "grad_norm": 0.5398672819137573, "learning_rate": 7.600582028659675e-05, "loss": 1.7943, "step": 6050 }, { "epoch": 0.33727216989019565, "grad_norm": 0.5376107692718506, "learning_rate": 7.59982148071357e-05, "loss": 1.4528, "step": 6051 }, { "epoch": 0.3373279081433588, "grad_norm": 0.5899469256401062, "learning_rate": 7.599060850316287e-05, "loss": 1.7503, "step": 6052 }, { "epoch": 0.3373836463965219, "grad_norm": 0.5668314695358276, "learning_rate": 7.598300137491946e-05, "loss": 1.7732, "step": 6053 }, { "epoch": 0.3374393846496851, "grad_norm": 0.6154149174690247, "learning_rate": 7.597539342264675e-05, "loss": 1.6534, "step": 6054 }, { "epoch": 0.3374951229028482, "grad_norm": 0.5487502813339233, "learning_rate": 7.596778464658599e-05, "loss": 1.6286, "step": 6055 }, { "epoch": 0.33755086115601135, "grad_norm": 0.5876896977424622, "learning_rate": 7.596017504697851e-05, "loss": 1.7787, "step": 6056 }, { "epoch": 0.33760659940917453, "grad_norm": 0.5587677359580994, "learning_rate": 7.595256462406564e-05, "loss": 1.7862, "step": 6057 }, { "epoch": 0.33766233766233766, "grad_norm": 0.5694131255149841, "learning_rate": 7.594495337808873e-05, "loss": 1.6926, "step": 6058 }, { "epoch": 0.3377180759155008, "grad_norm": 0.5591508150100708, "learning_rate": 7.593734130928918e-05, "loss": 1.6135, "step": 6059 }, { "epoch": 0.337773814168664, "grad_norm": 0.5355261564254761, "learning_rate": 7.592972841790837e-05, "loss": 1.5746, "step": 6060 }, { "epoch": 0.3378295524218271, "grad_norm": 0.5518434047698975, "learning_rate": 7.592211470418777e-05, "loss": 1.6457, "step": 6061 }, { "epoch": 0.33788529067499024, "grad_norm": 0.5891780257225037, "learning_rate": 7.59145001683688e-05, "loss": 1.7026, "step": 6062 }, { "epoch": 0.33794102892815336, "grad_norm": 0.5723276734352112, "learning_rate": 7.590688481069302e-05, "loss": 1.8168, "step": 6063 }, { "epoch": 0.33799676718131655, "grad_norm": 0.5468711853027344, "learning_rate": 7.589926863140187e-05, "loss": 1.607, "step": 6064 }, { "epoch": 0.3380525054344797, "grad_norm": 0.6062466502189636, "learning_rate": 7.589165163073695e-05, "loss": 1.9372, "step": 6065 }, { "epoch": 0.3381082436876428, "grad_norm": 0.5140287280082703, "learning_rate": 7.588403380893979e-05, "loss": 1.6545, "step": 6066 }, { "epoch": 0.338163981940806, "grad_norm": 0.5543786287307739, "learning_rate": 7.587641516625197e-05, "loss": 1.8205, "step": 6067 }, { "epoch": 0.3382197201939691, "grad_norm": 0.5844648480415344, "learning_rate": 7.586879570291514e-05, "loss": 1.8597, "step": 6068 }, { "epoch": 0.33827545844713225, "grad_norm": 0.5109902024269104, "learning_rate": 7.586117541917095e-05, "loss": 1.5266, "step": 6069 }, { "epoch": 0.33833119670029543, "grad_norm": 0.5208814740180969, "learning_rate": 7.585355431526104e-05, "loss": 1.721, "step": 6070 }, { "epoch": 0.33838693495345856, "grad_norm": 0.5144614577293396, "learning_rate": 7.584593239142712e-05, "loss": 1.624, "step": 6071 }, { "epoch": 0.3384426732066217, "grad_norm": 0.5855271220207214, "learning_rate": 7.583830964791094e-05, "loss": 1.8765, "step": 6072 }, { "epoch": 0.3384984114597849, "grad_norm": 0.5410987138748169, "learning_rate": 7.58306860849542e-05, "loss": 1.6027, "step": 6073 }, { "epoch": 0.338554149712948, "grad_norm": 0.6230753064155579, "learning_rate": 7.582306170279872e-05, "loss": 1.8485, "step": 6074 }, { "epoch": 0.33860988796611113, "grad_norm": 0.5517315864562988, "learning_rate": 7.581543650168628e-05, "loss": 1.7822, "step": 6075 }, { "epoch": 0.33866562621927426, "grad_norm": 0.5739060044288635, "learning_rate": 7.580781048185871e-05, "loss": 1.6443, "step": 6076 }, { "epoch": 0.33872136447243745, "grad_norm": 0.5618791580200195, "learning_rate": 7.580018364355785e-05, "loss": 1.5943, "step": 6077 }, { "epoch": 0.3387771027256006, "grad_norm": 0.5723870396614075, "learning_rate": 7.579255598702562e-05, "loss": 1.4501, "step": 6078 }, { "epoch": 0.3388328409787637, "grad_norm": 0.5427421927452087, "learning_rate": 7.578492751250386e-05, "loss": 1.7001, "step": 6079 }, { "epoch": 0.3388885792319269, "grad_norm": 0.5765356421470642, "learning_rate": 7.577729822023455e-05, "loss": 1.6652, "step": 6080 }, { "epoch": 0.33894431748509, "grad_norm": 0.5492302179336548, "learning_rate": 7.576966811045963e-05, "loss": 1.6988, "step": 6081 }, { "epoch": 0.33900005573825315, "grad_norm": 0.5814895033836365, "learning_rate": 7.576203718342108e-05, "loss": 1.9584, "step": 6082 }, { "epoch": 0.33905579399141633, "grad_norm": 0.6068232655525208, "learning_rate": 7.575440543936092e-05, "loss": 2.0357, "step": 6083 }, { "epoch": 0.33911153224457946, "grad_norm": 0.5426899790763855, "learning_rate": 7.574677287852117e-05, "loss": 1.6323, "step": 6084 }, { "epoch": 0.3391672704977426, "grad_norm": 0.5811708569526672, "learning_rate": 7.573913950114391e-05, "loss": 1.538, "step": 6085 }, { "epoch": 0.3392230087509057, "grad_norm": 0.5753393769264221, "learning_rate": 7.573150530747122e-05, "loss": 1.6013, "step": 6086 }, { "epoch": 0.3392787470040689, "grad_norm": 0.5427485108375549, "learning_rate": 7.572387029774519e-05, "loss": 1.6444, "step": 6087 }, { "epoch": 0.33933448525723203, "grad_norm": 0.5431930422782898, "learning_rate": 7.571623447220797e-05, "loss": 1.6733, "step": 6088 }, { "epoch": 0.33939022351039516, "grad_norm": 0.555357813835144, "learning_rate": 7.570859783110176e-05, "loss": 1.7219, "step": 6089 }, { "epoch": 0.33944596176355835, "grad_norm": 0.5578222274780273, "learning_rate": 7.570096037466869e-05, "loss": 1.407, "step": 6090 }, { "epoch": 0.3395017000167215, "grad_norm": 0.5213090777397156, "learning_rate": 7.5693322103151e-05, "loss": 1.4608, "step": 6091 }, { "epoch": 0.3395574382698846, "grad_norm": 0.5651876330375671, "learning_rate": 7.568568301679096e-05, "loss": 1.6756, "step": 6092 }, { "epoch": 0.3396131765230478, "grad_norm": 0.5914562940597534, "learning_rate": 7.56780431158308e-05, "loss": 1.7648, "step": 6093 }, { "epoch": 0.3396689147762109, "grad_norm": 0.5577222108840942, "learning_rate": 7.567040240051281e-05, "loss": 1.6954, "step": 6094 }, { "epoch": 0.33972465302937405, "grad_norm": 0.5938786268234253, "learning_rate": 7.566276087107935e-05, "loss": 1.8131, "step": 6095 }, { "epoch": 0.33978039128253723, "grad_norm": 0.5387003421783447, "learning_rate": 7.565511852777274e-05, "loss": 1.6522, "step": 6096 }, { "epoch": 0.33983612953570036, "grad_norm": 0.5465493202209473, "learning_rate": 7.564747537083534e-05, "loss": 1.6971, "step": 6097 }, { "epoch": 0.3398918677888635, "grad_norm": 0.5273247361183167, "learning_rate": 7.563983140050955e-05, "loss": 1.6759, "step": 6098 }, { "epoch": 0.3399476060420266, "grad_norm": 0.5733767151832581, "learning_rate": 7.563218661703782e-05, "loss": 1.7203, "step": 6099 }, { "epoch": 0.3400033442951898, "grad_norm": 0.6077031493186951, "learning_rate": 7.562454102066255e-05, "loss": 1.9364, "step": 6100 }, { "epoch": 0.34005908254835293, "grad_norm": 0.5688176155090332, "learning_rate": 7.561689461162625e-05, "loss": 1.6623, "step": 6101 }, { "epoch": 0.34011482080151606, "grad_norm": 0.5663187503814697, "learning_rate": 7.56092473901714e-05, "loss": 1.567, "step": 6102 }, { "epoch": 0.34017055905467924, "grad_norm": 0.6150177121162415, "learning_rate": 7.560159935654056e-05, "loss": 1.8714, "step": 6103 }, { "epoch": 0.3402262973078424, "grad_norm": 0.5515531301498413, "learning_rate": 7.559395051097624e-05, "loss": 1.6713, "step": 6104 }, { "epoch": 0.3402820355610055, "grad_norm": 0.687240481376648, "learning_rate": 7.558630085372105e-05, "loss": 1.6552, "step": 6105 }, { "epoch": 0.3403377738141687, "grad_norm": 0.5493181943893433, "learning_rate": 7.557865038501756e-05, "loss": 1.65, "step": 6106 }, { "epoch": 0.3403935120673318, "grad_norm": 0.5683436989784241, "learning_rate": 7.55709991051084e-05, "loss": 1.8507, "step": 6107 }, { "epoch": 0.34044925032049494, "grad_norm": 0.5895001292228699, "learning_rate": 7.556334701423627e-05, "loss": 2.0143, "step": 6108 }, { "epoch": 0.3405049885736581, "grad_norm": 0.5967059135437012, "learning_rate": 7.555569411264378e-05, "loss": 1.9006, "step": 6109 }, { "epoch": 0.34056072682682126, "grad_norm": 0.5140407085418701, "learning_rate": 7.554804040057369e-05, "loss": 1.4028, "step": 6110 }, { "epoch": 0.3406164650799844, "grad_norm": 0.5586955547332764, "learning_rate": 7.554038587826872e-05, "loss": 1.6835, "step": 6111 }, { "epoch": 0.3406722033331475, "grad_norm": 0.4853399395942688, "learning_rate": 7.553273054597163e-05, "loss": 1.5901, "step": 6112 }, { "epoch": 0.3407279415863107, "grad_norm": 0.5674946308135986, "learning_rate": 7.552507440392518e-05, "loss": 1.8776, "step": 6113 }, { "epoch": 0.34078367983947383, "grad_norm": 0.5115534663200378, "learning_rate": 7.551741745237218e-05, "loss": 1.4647, "step": 6114 }, { "epoch": 0.34083941809263696, "grad_norm": 0.6239203214645386, "learning_rate": 7.55097596915555e-05, "loss": 1.8638, "step": 6115 }, { "epoch": 0.34089515634580014, "grad_norm": 0.5367839336395264, "learning_rate": 7.550210112171796e-05, "loss": 1.7598, "step": 6116 }, { "epoch": 0.34095089459896327, "grad_norm": 0.5434908270835876, "learning_rate": 7.549444174310246e-05, "loss": 1.8239, "step": 6117 }, { "epoch": 0.3410066328521264, "grad_norm": 0.5503940582275391, "learning_rate": 7.548678155595192e-05, "loss": 1.7103, "step": 6118 }, { "epoch": 0.3410623711052896, "grad_norm": 0.5601882338523865, "learning_rate": 7.547912056050925e-05, "loss": 1.8269, "step": 6119 }, { "epoch": 0.3411181093584527, "grad_norm": 0.5472147464752197, "learning_rate": 7.547145875701744e-05, "loss": 1.7221, "step": 6120 }, { "epoch": 0.34117384761161584, "grad_norm": 0.5327697396278381, "learning_rate": 7.546379614571947e-05, "loss": 1.6879, "step": 6121 }, { "epoch": 0.341229585864779, "grad_norm": 0.5991697311401367, "learning_rate": 7.545613272685834e-05, "loss": 1.9402, "step": 6122 }, { "epoch": 0.34128532411794216, "grad_norm": 0.5222532749176025, "learning_rate": 7.544846850067711e-05, "loss": 1.6331, "step": 6123 }, { "epoch": 0.3413410623711053, "grad_norm": 0.5213292837142944, "learning_rate": 7.544080346741884e-05, "loss": 1.6547, "step": 6124 }, { "epoch": 0.3413968006242684, "grad_norm": 0.516547441482544, "learning_rate": 7.54331376273266e-05, "loss": 1.5988, "step": 6125 }, { "epoch": 0.3414525388774316, "grad_norm": 0.5505926609039307, "learning_rate": 7.542547098064351e-05, "loss": 1.8314, "step": 6126 }, { "epoch": 0.34150827713059473, "grad_norm": 0.5631290078163147, "learning_rate": 7.541780352761275e-05, "loss": 1.7797, "step": 6127 }, { "epoch": 0.34156401538375786, "grad_norm": 0.5578431487083435, "learning_rate": 7.541013526847745e-05, "loss": 1.7118, "step": 6128 }, { "epoch": 0.34161975363692104, "grad_norm": 0.6077129244804382, "learning_rate": 7.540246620348079e-05, "loss": 1.8582, "step": 6129 }, { "epoch": 0.34167549189008417, "grad_norm": 0.5378260612487793, "learning_rate": 7.539479633286604e-05, "loss": 1.5773, "step": 6130 }, { "epoch": 0.3417312301432473, "grad_norm": 0.5147218108177185, "learning_rate": 7.538712565687637e-05, "loss": 1.6079, "step": 6131 }, { "epoch": 0.34178696839641043, "grad_norm": 0.5637179017066956, "learning_rate": 7.537945417575513e-05, "loss": 1.7772, "step": 6132 }, { "epoch": 0.3418427066495736, "grad_norm": 0.5718836188316345, "learning_rate": 7.537178188974556e-05, "loss": 1.8646, "step": 6133 }, { "epoch": 0.34189844490273674, "grad_norm": 0.5593611001968384, "learning_rate": 7.5364108799091e-05, "loss": 1.7059, "step": 6134 }, { "epoch": 0.34195418315589987, "grad_norm": 0.5491702556610107, "learning_rate": 7.535643490403478e-05, "loss": 1.5904, "step": 6135 }, { "epoch": 0.34200992140906306, "grad_norm": 0.5673286318778992, "learning_rate": 7.534876020482032e-05, "loss": 1.6569, "step": 6136 }, { "epoch": 0.3420656596622262, "grad_norm": 0.555279552936554, "learning_rate": 7.534108470169094e-05, "loss": 1.947, "step": 6137 }, { "epoch": 0.3421213979153893, "grad_norm": 0.5502607226371765, "learning_rate": 7.533340839489011e-05, "loss": 1.6199, "step": 6138 }, { "epoch": 0.3421771361685525, "grad_norm": 0.5711556673049927, "learning_rate": 7.532573128466129e-05, "loss": 1.901, "step": 6139 }, { "epoch": 0.3422328744217156, "grad_norm": 0.5685670375823975, "learning_rate": 7.53180533712479e-05, "loss": 1.7284, "step": 6140 }, { "epoch": 0.34228861267487876, "grad_norm": 0.555075466632843, "learning_rate": 7.53103746548935e-05, "loss": 1.8184, "step": 6141 }, { "epoch": 0.34234435092804194, "grad_norm": 0.5404545664787292, "learning_rate": 7.530269513584158e-05, "loss": 1.6444, "step": 6142 }, { "epoch": 0.34240008918120507, "grad_norm": 0.5739527344703674, "learning_rate": 7.52950148143357e-05, "loss": 1.5748, "step": 6143 }, { "epoch": 0.3424558274343682, "grad_norm": 0.5569913983345032, "learning_rate": 7.528733369061942e-05, "loss": 1.8188, "step": 6144 }, { "epoch": 0.3425115656875313, "grad_norm": 0.5430577397346497, "learning_rate": 7.527965176493636e-05, "loss": 1.5839, "step": 6145 }, { "epoch": 0.3425673039406945, "grad_norm": 0.5321673154830933, "learning_rate": 7.527196903753011e-05, "loss": 1.3862, "step": 6146 }, { "epoch": 0.34262304219385764, "grad_norm": 0.5757884979248047, "learning_rate": 7.526428550864437e-05, "loss": 1.5308, "step": 6147 }, { "epoch": 0.34267878044702077, "grad_norm": 0.556651771068573, "learning_rate": 7.525660117852279e-05, "loss": 1.7377, "step": 6148 }, { "epoch": 0.34273451870018395, "grad_norm": 0.5236818790435791, "learning_rate": 7.524891604740908e-05, "loss": 1.7305, "step": 6149 }, { "epoch": 0.3427902569533471, "grad_norm": 0.5686874985694885, "learning_rate": 7.524123011554697e-05, "loss": 1.5379, "step": 6150 }, { "epoch": 0.3428459952065102, "grad_norm": 0.5817770957946777, "learning_rate": 7.52335433831802e-05, "loss": 1.7069, "step": 6151 }, { "epoch": 0.3429017334596734, "grad_norm": 0.5717275738716125, "learning_rate": 7.522585585055255e-05, "loss": 1.8944, "step": 6152 }, { "epoch": 0.3429574717128365, "grad_norm": 0.5469644665718079, "learning_rate": 7.521816751790783e-05, "loss": 1.622, "step": 6153 }, { "epoch": 0.34301320996599965, "grad_norm": 0.5735164880752563, "learning_rate": 7.521047838548988e-05, "loss": 1.8005, "step": 6154 }, { "epoch": 0.3430689482191628, "grad_norm": 0.5070759057998657, "learning_rate": 7.520278845354254e-05, "loss": 1.4795, "step": 6155 }, { "epoch": 0.34312468647232597, "grad_norm": 0.5179046392440796, "learning_rate": 7.519509772230968e-05, "loss": 1.5029, "step": 6156 }, { "epoch": 0.3431804247254891, "grad_norm": 0.5747403502464294, "learning_rate": 7.518740619203523e-05, "loss": 1.7075, "step": 6157 }, { "epoch": 0.3432361629786522, "grad_norm": 0.6233847141265869, "learning_rate": 7.517971386296309e-05, "loss": 1.9524, "step": 6158 }, { "epoch": 0.3432919012318154, "grad_norm": 0.5195590853691101, "learning_rate": 7.517202073533727e-05, "loss": 1.533, "step": 6159 }, { "epoch": 0.34334763948497854, "grad_norm": 0.6035041213035583, "learning_rate": 7.516432680940168e-05, "loss": 1.7298, "step": 6160 }, { "epoch": 0.34340337773814167, "grad_norm": 0.59979248046875, "learning_rate": 7.515663208540037e-05, "loss": 1.7295, "step": 6161 }, { "epoch": 0.34345911599130485, "grad_norm": 0.5844981074333191, "learning_rate": 7.514893656357738e-05, "loss": 1.756, "step": 6162 }, { "epoch": 0.343514854244468, "grad_norm": 0.5281308889389038, "learning_rate": 7.514124024417674e-05, "loss": 1.7149, "step": 6163 }, { "epoch": 0.3435705924976311, "grad_norm": 0.5352674126625061, "learning_rate": 7.513354312744256e-05, "loss": 1.7262, "step": 6164 }, { "epoch": 0.3436263307507943, "grad_norm": 0.562127411365509, "learning_rate": 7.512584521361891e-05, "loss": 1.6434, "step": 6165 }, { "epoch": 0.3436820690039574, "grad_norm": 0.5535931587219238, "learning_rate": 7.511814650294994e-05, "loss": 1.5353, "step": 6166 }, { "epoch": 0.34373780725712055, "grad_norm": 0.543641209602356, "learning_rate": 7.511044699567981e-05, "loss": 1.8312, "step": 6167 }, { "epoch": 0.3437935455102837, "grad_norm": 0.559559166431427, "learning_rate": 7.510274669205273e-05, "loss": 1.6326, "step": 6168 }, { "epoch": 0.34384928376344687, "grad_norm": 0.5449449419975281, "learning_rate": 7.509504559231287e-05, "loss": 1.7319, "step": 6169 }, { "epoch": 0.34390502201661, "grad_norm": 0.5315961837768555, "learning_rate": 7.508734369670447e-05, "loss": 1.69, "step": 6170 }, { "epoch": 0.3439607602697731, "grad_norm": 0.5506524443626404, "learning_rate": 7.507964100547181e-05, "loss": 1.6961, "step": 6171 }, { "epoch": 0.3440164985229363, "grad_norm": 0.5587935447692871, "learning_rate": 7.507193751885915e-05, "loss": 1.794, "step": 6172 }, { "epoch": 0.34407223677609944, "grad_norm": 0.5281456112861633, "learning_rate": 7.506423323711083e-05, "loss": 1.637, "step": 6173 }, { "epoch": 0.34412797502926257, "grad_norm": 0.5220721960067749, "learning_rate": 7.505652816047115e-05, "loss": 1.4696, "step": 6174 }, { "epoch": 0.34418371328242575, "grad_norm": 0.565938413143158, "learning_rate": 7.504882228918449e-05, "loss": 1.6329, "step": 6175 }, { "epoch": 0.3442394515355889, "grad_norm": 0.532490074634552, "learning_rate": 7.504111562349524e-05, "loss": 1.5929, "step": 6176 }, { "epoch": 0.344295189788752, "grad_norm": 0.5559155941009521, "learning_rate": 7.503340816364779e-05, "loss": 1.6935, "step": 6177 }, { "epoch": 0.3443509280419152, "grad_norm": 0.5494531989097595, "learning_rate": 7.502569990988659e-05, "loss": 1.5508, "step": 6178 }, { "epoch": 0.3444066662950783, "grad_norm": 0.48615095019340515, "learning_rate": 7.50179908624561e-05, "loss": 1.3464, "step": 6179 }, { "epoch": 0.34446240454824145, "grad_norm": 0.543402373790741, "learning_rate": 7.501028102160082e-05, "loss": 1.6306, "step": 6180 }, { "epoch": 0.3445181428014046, "grad_norm": 0.5688214898109436, "learning_rate": 7.500257038756522e-05, "loss": 1.9743, "step": 6181 }, { "epoch": 0.34457388105456777, "grad_norm": 0.5336653590202332, "learning_rate": 7.499485896059389e-05, "loss": 1.7876, "step": 6182 }, { "epoch": 0.3446296193077309, "grad_norm": 0.6009781360626221, "learning_rate": 7.498714674093134e-05, "loss": 1.599, "step": 6183 }, { "epoch": 0.344685357560894, "grad_norm": 0.5108974575996399, "learning_rate": 7.497943372882219e-05, "loss": 1.3671, "step": 6184 }, { "epoch": 0.3447410958140572, "grad_norm": 0.5875006914138794, "learning_rate": 7.497171992451104e-05, "loss": 1.8846, "step": 6185 }, { "epoch": 0.34479683406722034, "grad_norm": 0.5741475820541382, "learning_rate": 7.496400532824252e-05, "loss": 1.8147, "step": 6186 }, { "epoch": 0.34485257232038347, "grad_norm": 0.5426183938980103, "learning_rate": 7.495628994026131e-05, "loss": 1.8584, "step": 6187 }, { "epoch": 0.34490831057354665, "grad_norm": 0.5665351152420044, "learning_rate": 7.49485737608121e-05, "loss": 1.6254, "step": 6188 }, { "epoch": 0.3449640488267098, "grad_norm": 0.6417822241783142, "learning_rate": 7.494085679013959e-05, "loss": 1.5997, "step": 6189 }, { "epoch": 0.3450197870798729, "grad_norm": 0.580936849117279, "learning_rate": 7.49331390284885e-05, "loss": 1.7723, "step": 6190 }, { "epoch": 0.34507552533303604, "grad_norm": 0.5405949354171753, "learning_rate": 7.492542047610362e-05, "loss": 1.7536, "step": 6191 }, { "epoch": 0.3451312635861992, "grad_norm": 0.567459225654602, "learning_rate": 7.491770113322972e-05, "loss": 1.5518, "step": 6192 }, { "epoch": 0.34518700183936235, "grad_norm": 0.5930157899856567, "learning_rate": 7.490998100011164e-05, "loss": 1.8805, "step": 6193 }, { "epoch": 0.3452427400925255, "grad_norm": 0.5590851902961731, "learning_rate": 7.490226007699418e-05, "loss": 1.7369, "step": 6194 }, { "epoch": 0.34529847834568866, "grad_norm": 0.5540249943733215, "learning_rate": 7.489453836412224e-05, "loss": 1.7199, "step": 6195 }, { "epoch": 0.3453542165988518, "grad_norm": 0.6100202798843384, "learning_rate": 7.488681586174066e-05, "loss": 1.8962, "step": 6196 }, { "epoch": 0.3454099548520149, "grad_norm": 0.5453261137008667, "learning_rate": 7.48790925700944e-05, "loss": 1.6779, "step": 6197 }, { "epoch": 0.3454656931051781, "grad_norm": 0.6191526651382446, "learning_rate": 7.487136848942838e-05, "loss": 1.837, "step": 6198 }, { "epoch": 0.34552143135834124, "grad_norm": 0.5043689608573914, "learning_rate": 7.486364361998754e-05, "loss": 1.5438, "step": 6199 }, { "epoch": 0.34557716961150436, "grad_norm": 0.5927308797836304, "learning_rate": 7.485591796201692e-05, "loss": 1.8893, "step": 6200 }, { "epoch": 0.34563290786466755, "grad_norm": 0.5387723445892334, "learning_rate": 7.484819151576147e-05, "loss": 1.7063, "step": 6201 }, { "epoch": 0.3456886461178307, "grad_norm": 0.5273063778877258, "learning_rate": 7.48404642814663e-05, "loss": 1.6052, "step": 6202 }, { "epoch": 0.3457443843709938, "grad_norm": 0.5235535502433777, "learning_rate": 7.48327362593764e-05, "loss": 1.5859, "step": 6203 }, { "epoch": 0.34580012262415694, "grad_norm": 0.5952630043029785, "learning_rate": 7.48250074497369e-05, "loss": 1.9669, "step": 6204 }, { "epoch": 0.3458558608773201, "grad_norm": 0.5512803196907043, "learning_rate": 7.48172778527929e-05, "loss": 1.6103, "step": 6205 }, { "epoch": 0.34591159913048325, "grad_norm": 0.5485497117042542, "learning_rate": 7.480954746878955e-05, "loss": 1.4648, "step": 6206 }, { "epoch": 0.3459673373836464, "grad_norm": 0.5755242109298706, "learning_rate": 7.480181629797201e-05, "loss": 1.7882, "step": 6207 }, { "epoch": 0.34602307563680956, "grad_norm": 0.586279034614563, "learning_rate": 7.479408434058545e-05, "loss": 1.757, "step": 6208 }, { "epoch": 0.3460788138899727, "grad_norm": 0.6023716926574707, "learning_rate": 7.47863515968751e-05, "loss": 1.6573, "step": 6209 }, { "epoch": 0.3461345521431358, "grad_norm": 0.5629722476005554, "learning_rate": 7.477861806708618e-05, "loss": 1.8348, "step": 6210 }, { "epoch": 0.346190290396299, "grad_norm": 0.64363032579422, "learning_rate": 7.477088375146397e-05, "loss": 2.1581, "step": 6211 }, { "epoch": 0.34624602864946213, "grad_norm": 0.5952073335647583, "learning_rate": 7.476314865025376e-05, "loss": 1.7823, "step": 6212 }, { "epoch": 0.34630176690262526, "grad_norm": 0.5444992780685425, "learning_rate": 7.475541276370083e-05, "loss": 1.5717, "step": 6213 }, { "epoch": 0.3463575051557884, "grad_norm": 0.5698938965797424, "learning_rate": 7.474767609205057e-05, "loss": 1.8471, "step": 6214 }, { "epoch": 0.3464132434089516, "grad_norm": 0.521270751953125, "learning_rate": 7.473993863554832e-05, "loss": 1.5991, "step": 6215 }, { "epoch": 0.3464689816621147, "grad_norm": 0.5909140110015869, "learning_rate": 7.473220039443942e-05, "loss": 1.8795, "step": 6216 }, { "epoch": 0.34652471991527783, "grad_norm": 0.5595431923866272, "learning_rate": 7.472446136896935e-05, "loss": 1.5189, "step": 6217 }, { "epoch": 0.346580458168441, "grad_norm": 0.5549118518829346, "learning_rate": 7.471672155938351e-05, "loss": 1.5113, "step": 6218 }, { "epoch": 0.34663619642160415, "grad_norm": 0.5784697532653809, "learning_rate": 7.470898096592738e-05, "loss": 1.62, "step": 6219 }, { "epoch": 0.3466919346747673, "grad_norm": 0.582065224647522, "learning_rate": 7.470123958884643e-05, "loss": 1.7652, "step": 6220 }, { "epoch": 0.34674767292793046, "grad_norm": 0.5781643986701965, "learning_rate": 7.469349742838619e-05, "loss": 1.816, "step": 6221 }, { "epoch": 0.3468034111810936, "grad_norm": 0.5270411968231201, "learning_rate": 7.468575448479217e-05, "loss": 1.4521, "step": 6222 }, { "epoch": 0.3468591494342567, "grad_norm": 0.5568832159042358, "learning_rate": 7.467801075830995e-05, "loss": 1.6393, "step": 6223 }, { "epoch": 0.3469148876874199, "grad_norm": 0.6102818846702576, "learning_rate": 7.467026624918511e-05, "loss": 1.8486, "step": 6224 }, { "epoch": 0.34697062594058303, "grad_norm": 0.6040059328079224, "learning_rate": 7.466252095766326e-05, "loss": 1.9639, "step": 6225 }, { "epoch": 0.34702636419374616, "grad_norm": 0.5577713847160339, "learning_rate": 7.465477488399004e-05, "loss": 1.7672, "step": 6226 }, { "epoch": 0.3470821024469093, "grad_norm": 0.6022251844406128, "learning_rate": 7.464702802841111e-05, "loss": 1.8587, "step": 6227 }, { "epoch": 0.3471378407000725, "grad_norm": 0.6043629050254822, "learning_rate": 7.463928039117216e-05, "loss": 1.6798, "step": 6228 }, { "epoch": 0.3471935789532356, "grad_norm": 0.5550456643104553, "learning_rate": 7.463153197251889e-05, "loss": 1.6258, "step": 6229 }, { "epoch": 0.34724931720639873, "grad_norm": 0.5740575790405273, "learning_rate": 7.462378277269704e-05, "loss": 1.6253, "step": 6230 }, { "epoch": 0.3473050554595619, "grad_norm": 0.5348698496818542, "learning_rate": 7.461603279195235e-05, "loss": 1.7417, "step": 6231 }, { "epoch": 0.34736079371272505, "grad_norm": 0.5703982710838318, "learning_rate": 7.460828203053063e-05, "loss": 1.8448, "step": 6232 }, { "epoch": 0.3474165319658882, "grad_norm": 0.5818899869918823, "learning_rate": 7.460053048867768e-05, "loss": 1.783, "step": 6233 }, { "epoch": 0.34747227021905136, "grad_norm": 0.5640279054641724, "learning_rate": 7.459277816663934e-05, "loss": 1.8757, "step": 6234 }, { "epoch": 0.3475280084722145, "grad_norm": 0.519883394241333, "learning_rate": 7.458502506466147e-05, "loss": 1.622, "step": 6235 }, { "epoch": 0.3475837467253776, "grad_norm": 0.5207779407501221, "learning_rate": 7.457727118298991e-05, "loss": 1.4801, "step": 6236 }, { "epoch": 0.34763948497854075, "grad_norm": 0.5227778553962708, "learning_rate": 7.456951652187063e-05, "loss": 1.6797, "step": 6237 }, { "epoch": 0.34769522323170393, "grad_norm": 0.6305186748504639, "learning_rate": 7.456176108154956e-05, "loss": 2.0804, "step": 6238 }, { "epoch": 0.34775096148486706, "grad_norm": 0.6344568133354187, "learning_rate": 7.45540048622726e-05, "loss": 1.881, "step": 6239 }, { "epoch": 0.3478066997380302, "grad_norm": 0.5849176645278931, "learning_rate": 7.454624786428576e-05, "loss": 1.7058, "step": 6240 }, { "epoch": 0.3478624379911934, "grad_norm": 0.5511870980262756, "learning_rate": 7.453849008783507e-05, "loss": 1.7262, "step": 6241 }, { "epoch": 0.3479181762443565, "grad_norm": 0.590895414352417, "learning_rate": 7.453073153316654e-05, "loss": 1.7584, "step": 6242 }, { "epoch": 0.34797391449751963, "grad_norm": 0.5347367525100708, "learning_rate": 7.452297220052624e-05, "loss": 1.7057, "step": 6243 }, { "epoch": 0.3480296527506828, "grad_norm": 0.5574136972427368, "learning_rate": 7.451521209016021e-05, "loss": 1.8928, "step": 6244 }, { "epoch": 0.34808539100384595, "grad_norm": 0.5794700384140015, "learning_rate": 7.450745120231462e-05, "loss": 1.9479, "step": 6245 }, { "epoch": 0.3481411292570091, "grad_norm": 0.5384243726730347, "learning_rate": 7.449968953723554e-05, "loss": 1.678, "step": 6246 }, { "epoch": 0.34819686751017226, "grad_norm": 0.560627281665802, "learning_rate": 7.449192709516916e-05, "loss": 1.7936, "step": 6247 }, { "epoch": 0.3482526057633354, "grad_norm": 0.6408939957618713, "learning_rate": 7.448416387636166e-05, "loss": 1.8022, "step": 6248 }, { "epoch": 0.3483083440164985, "grad_norm": 0.5532012581825256, "learning_rate": 7.447639988105922e-05, "loss": 1.6318, "step": 6249 }, { "epoch": 0.34836408226966165, "grad_norm": 0.6528187990188599, "learning_rate": 7.44686351095081e-05, "loss": 2.0857, "step": 6250 }, { "epoch": 0.34841982052282483, "grad_norm": 0.5271794199943542, "learning_rate": 7.446086956195452e-05, "loss": 1.6236, "step": 6251 }, { "epoch": 0.34847555877598796, "grad_norm": 0.6053271293640137, "learning_rate": 7.445310323864478e-05, "loss": 1.895, "step": 6252 }, { "epoch": 0.3485312970291511, "grad_norm": 0.5544027090072632, "learning_rate": 7.444533613982519e-05, "loss": 1.6158, "step": 6253 }, { "epoch": 0.3485870352823143, "grad_norm": 0.5839915871620178, "learning_rate": 7.443756826574204e-05, "loss": 1.7887, "step": 6254 }, { "epoch": 0.3486427735354774, "grad_norm": 0.5946133732795715, "learning_rate": 7.442979961664171e-05, "loss": 1.7628, "step": 6255 }, { "epoch": 0.34869851178864053, "grad_norm": 0.5356269478797913, "learning_rate": 7.442203019277059e-05, "loss": 1.6563, "step": 6256 }, { "epoch": 0.3487542500418037, "grad_norm": 0.5791853666305542, "learning_rate": 7.441425999437505e-05, "loss": 1.7944, "step": 6257 }, { "epoch": 0.34880998829496684, "grad_norm": 0.514127254486084, "learning_rate": 7.440648902170153e-05, "loss": 1.6007, "step": 6258 }, { "epoch": 0.34886572654813, "grad_norm": 0.5857915878295898, "learning_rate": 7.439871727499648e-05, "loss": 1.6401, "step": 6259 }, { "epoch": 0.3489214648012931, "grad_norm": 0.5310158729553223, "learning_rate": 7.439094475450638e-05, "loss": 1.6605, "step": 6260 }, { "epoch": 0.3489772030544563, "grad_norm": 0.5631361603736877, "learning_rate": 7.43831714604777e-05, "loss": 1.7541, "step": 6261 }, { "epoch": 0.3490329413076194, "grad_norm": 0.5697758197784424, "learning_rate": 7.4375397393157e-05, "loss": 1.5488, "step": 6262 }, { "epoch": 0.34908867956078254, "grad_norm": 0.5197820663452148, "learning_rate": 7.43676225527908e-05, "loss": 1.7463, "step": 6263 }, { "epoch": 0.34914441781394573, "grad_norm": 0.6369295120239258, "learning_rate": 7.43598469396257e-05, "loss": 2.106, "step": 6264 }, { "epoch": 0.34920015606710886, "grad_norm": 0.5751513242721558, "learning_rate": 7.435207055390828e-05, "loss": 1.8146, "step": 6265 }, { "epoch": 0.349255894320272, "grad_norm": 0.5785645246505737, "learning_rate": 7.434429339588516e-05, "loss": 1.8598, "step": 6266 }, { "epoch": 0.34931163257343517, "grad_norm": 0.5536054968833923, "learning_rate": 7.4336515465803e-05, "loss": 1.7508, "step": 6267 }, { "epoch": 0.3493673708265983, "grad_norm": 0.5529542565345764, "learning_rate": 7.432873676390845e-05, "loss": 1.7749, "step": 6268 }, { "epoch": 0.34942310907976143, "grad_norm": 0.5571187734603882, "learning_rate": 7.432095729044823e-05, "loss": 1.6954, "step": 6269 }, { "epoch": 0.3494788473329246, "grad_norm": 0.5445393323898315, "learning_rate": 7.431317704566902e-05, "loss": 1.5363, "step": 6270 }, { "epoch": 0.34953458558608774, "grad_norm": 0.5723183155059814, "learning_rate": 7.430539602981761e-05, "loss": 1.7007, "step": 6271 }, { "epoch": 0.34959032383925087, "grad_norm": 0.5553802847862244, "learning_rate": 7.429761424314075e-05, "loss": 1.9324, "step": 6272 }, { "epoch": 0.349646062092414, "grad_norm": 0.5308825969696045, "learning_rate": 7.428983168588522e-05, "loss": 1.6236, "step": 6273 }, { "epoch": 0.3497018003455772, "grad_norm": 0.5892744064331055, "learning_rate": 7.428204835829787e-05, "loss": 1.8567, "step": 6274 }, { "epoch": 0.3497575385987403, "grad_norm": 0.5890315175056458, "learning_rate": 7.42742642606255e-05, "loss": 1.7612, "step": 6275 }, { "epoch": 0.34981327685190344, "grad_norm": 0.5714004635810852, "learning_rate": 7.426647939311499e-05, "loss": 1.8783, "step": 6276 }, { "epoch": 0.3498690151050666, "grad_norm": 0.5221744775772095, "learning_rate": 7.425869375601324e-05, "loss": 1.533, "step": 6277 }, { "epoch": 0.34992475335822976, "grad_norm": 0.5754460692405701, "learning_rate": 7.425090734956717e-05, "loss": 1.7922, "step": 6278 }, { "epoch": 0.3499804916113929, "grad_norm": 0.5325612425804138, "learning_rate": 7.424312017402371e-05, "loss": 1.5523, "step": 6279 }, { "epoch": 0.35003622986455607, "grad_norm": 0.5452947020530701, "learning_rate": 7.423533222962984e-05, "loss": 1.7528, "step": 6280 }, { "epoch": 0.3500919681177192, "grad_norm": 0.5132524371147156, "learning_rate": 7.422754351663252e-05, "loss": 1.6118, "step": 6281 }, { "epoch": 0.35014770637088233, "grad_norm": 0.5661509037017822, "learning_rate": 7.421975403527877e-05, "loss": 1.7999, "step": 6282 }, { "epoch": 0.35020344462404546, "grad_norm": 0.5532317161560059, "learning_rate": 7.421196378581563e-05, "loss": 1.8317, "step": 6283 }, { "epoch": 0.35025918287720864, "grad_norm": 0.5239238142967224, "learning_rate": 7.420417276849018e-05, "loss": 1.6949, "step": 6284 }, { "epoch": 0.35031492113037177, "grad_norm": 0.5444215536117554, "learning_rate": 7.419638098354948e-05, "loss": 1.666, "step": 6285 }, { "epoch": 0.3503706593835349, "grad_norm": 0.5257874131202698, "learning_rate": 7.418858843124065e-05, "loss": 1.7663, "step": 6286 }, { "epoch": 0.3504263976366981, "grad_norm": 0.5424786806106567, "learning_rate": 7.418079511181084e-05, "loss": 1.6048, "step": 6287 }, { "epoch": 0.3504821358898612, "grad_norm": 0.5822529196739197, "learning_rate": 7.417300102550718e-05, "loss": 1.7153, "step": 6288 }, { "epoch": 0.35053787414302434, "grad_norm": 0.6322096586227417, "learning_rate": 7.416520617257686e-05, "loss": 2.0466, "step": 6289 }, { "epoch": 0.3505936123961875, "grad_norm": 0.6034446358680725, "learning_rate": 7.41574105532671e-05, "loss": 1.7793, "step": 6290 }, { "epoch": 0.35064935064935066, "grad_norm": 0.5261698365211487, "learning_rate": 7.414961416782512e-05, "loss": 1.6958, "step": 6291 }, { "epoch": 0.3507050889025138, "grad_norm": 0.5508055090904236, "learning_rate": 7.414181701649818e-05, "loss": 1.7336, "step": 6292 }, { "epoch": 0.35076082715567697, "grad_norm": 0.5106075406074524, "learning_rate": 7.413401909953356e-05, "loss": 1.5585, "step": 6293 }, { "epoch": 0.3508165654088401, "grad_norm": 0.5312706232070923, "learning_rate": 7.412622041717858e-05, "loss": 1.5692, "step": 6294 }, { "epoch": 0.3508723036620032, "grad_norm": 0.5598204135894775, "learning_rate": 7.411842096968055e-05, "loss": 1.6424, "step": 6295 }, { "epoch": 0.35092804191516636, "grad_norm": 0.5455132126808167, "learning_rate": 7.411062075728681e-05, "loss": 1.7084, "step": 6296 }, { "epoch": 0.35098378016832954, "grad_norm": 0.5335630774497986, "learning_rate": 7.410281978024478e-05, "loss": 1.6269, "step": 6297 }, { "epoch": 0.35103951842149267, "grad_norm": 0.5936735272407532, "learning_rate": 7.409501803880182e-05, "loss": 1.6821, "step": 6298 }, { "epoch": 0.3510952566746558, "grad_norm": 0.626340389251709, "learning_rate": 7.408721553320536e-05, "loss": 1.8958, "step": 6299 }, { "epoch": 0.351150994927819, "grad_norm": 0.5382502675056458, "learning_rate": 7.407941226370289e-05, "loss": 1.6456, "step": 6300 }, { "epoch": 0.3512067331809821, "grad_norm": 0.5597545504570007, "learning_rate": 7.407160823054182e-05, "loss": 1.7168, "step": 6301 }, { "epoch": 0.35126247143414524, "grad_norm": 0.5945395231246948, "learning_rate": 7.406380343396973e-05, "loss": 2.0034, "step": 6302 }, { "epoch": 0.3513182096873084, "grad_norm": 0.5297150611877441, "learning_rate": 7.405599787423406e-05, "loss": 1.5787, "step": 6303 }, { "epoch": 0.35137394794047155, "grad_norm": 0.5702363848686218, "learning_rate": 7.40481915515824e-05, "loss": 1.8993, "step": 6304 }, { "epoch": 0.3514296861936347, "grad_norm": 0.6293717622756958, "learning_rate": 7.404038446626231e-05, "loss": 1.9086, "step": 6305 }, { "epoch": 0.3514854244467978, "grad_norm": 0.579983651638031, "learning_rate": 7.403257661852142e-05, "loss": 1.74, "step": 6306 }, { "epoch": 0.351541162699961, "grad_norm": 0.558723509311676, "learning_rate": 7.40247680086073e-05, "loss": 1.7519, "step": 6307 }, { "epoch": 0.3515969009531241, "grad_norm": 0.5575239062309265, "learning_rate": 7.401695863676761e-05, "loss": 1.8393, "step": 6308 }, { "epoch": 0.35165263920628725, "grad_norm": 0.5667286515235901, "learning_rate": 7.400914850325001e-05, "loss": 1.7958, "step": 6309 }, { "epoch": 0.35170837745945044, "grad_norm": 0.5829740762710571, "learning_rate": 7.400133760830221e-05, "loss": 1.7113, "step": 6310 }, { "epoch": 0.35176411571261357, "grad_norm": 0.5255504846572876, "learning_rate": 7.399352595217193e-05, "loss": 1.6819, "step": 6311 }, { "epoch": 0.3518198539657767, "grad_norm": 0.5315664410591125, "learning_rate": 7.39857135351069e-05, "loss": 1.5692, "step": 6312 }, { "epoch": 0.3518755922189399, "grad_norm": 0.5694820880889893, "learning_rate": 7.397790035735487e-05, "loss": 1.813, "step": 6313 }, { "epoch": 0.351931330472103, "grad_norm": 0.5584225058555603, "learning_rate": 7.397008641916364e-05, "loss": 1.6653, "step": 6314 }, { "epoch": 0.35198706872526614, "grad_norm": 0.5575059652328491, "learning_rate": 7.396227172078103e-05, "loss": 1.7948, "step": 6315 }, { "epoch": 0.3520428069784293, "grad_norm": 0.5385696887969971, "learning_rate": 7.395445626245486e-05, "loss": 1.6823, "step": 6316 }, { "epoch": 0.35209854523159245, "grad_norm": 0.5181571841239929, "learning_rate": 7.394664004443302e-05, "loss": 1.4832, "step": 6317 }, { "epoch": 0.3521542834847556, "grad_norm": 0.5436875224113464, "learning_rate": 7.393882306696338e-05, "loss": 1.5743, "step": 6318 }, { "epoch": 0.3522100217379187, "grad_norm": 0.5831631422042847, "learning_rate": 7.393100533029383e-05, "loss": 1.7726, "step": 6319 }, { "epoch": 0.3522657599910819, "grad_norm": 0.5740854144096375, "learning_rate": 7.392318683467232e-05, "loss": 1.5639, "step": 6320 }, { "epoch": 0.352321498244245, "grad_norm": 0.5731649994850159, "learning_rate": 7.391536758034682e-05, "loss": 1.9563, "step": 6321 }, { "epoch": 0.35237723649740815, "grad_norm": 0.6104768514633179, "learning_rate": 7.390754756756526e-05, "loss": 1.6392, "step": 6322 }, { "epoch": 0.35243297475057134, "grad_norm": 0.5218120813369751, "learning_rate": 7.389972679657571e-05, "loss": 1.6262, "step": 6323 }, { "epoch": 0.35248871300373447, "grad_norm": 0.5537388324737549, "learning_rate": 7.389190526762618e-05, "loss": 1.7317, "step": 6324 }, { "epoch": 0.3525444512568976, "grad_norm": 0.577392578125, "learning_rate": 7.38840829809647e-05, "loss": 1.7069, "step": 6325 }, { "epoch": 0.3526001895100608, "grad_norm": 0.5511906147003174, "learning_rate": 7.387625993683937e-05, "loss": 1.6009, "step": 6326 }, { "epoch": 0.3526559277632239, "grad_norm": 0.5822625756263733, "learning_rate": 7.386843613549827e-05, "loss": 1.7174, "step": 6327 }, { "epoch": 0.35271166601638704, "grad_norm": 0.5413920879364014, "learning_rate": 7.386061157718955e-05, "loss": 1.5927, "step": 6328 }, { "epoch": 0.35276740426955017, "grad_norm": 0.5867698192596436, "learning_rate": 7.385278626216133e-05, "loss": 1.7494, "step": 6329 }, { "epoch": 0.35282314252271335, "grad_norm": 0.6775004863739014, "learning_rate": 7.384496019066182e-05, "loss": 1.8777, "step": 6330 }, { "epoch": 0.3528788807758765, "grad_norm": 0.6009215116500854, "learning_rate": 7.383713336293919e-05, "loss": 1.7538, "step": 6331 }, { "epoch": 0.3529346190290396, "grad_norm": 0.5513560771942139, "learning_rate": 7.382930577924168e-05, "loss": 1.6307, "step": 6332 }, { "epoch": 0.3529903572822028, "grad_norm": 0.5479623079299927, "learning_rate": 7.382147743981751e-05, "loss": 1.6945, "step": 6333 }, { "epoch": 0.3530460955353659, "grad_norm": 0.603458046913147, "learning_rate": 7.381364834491499e-05, "loss": 1.7531, "step": 6334 }, { "epoch": 0.35310183378852905, "grad_norm": 0.951324999332428, "learning_rate": 7.380581849478236e-05, "loss": 1.8593, "step": 6335 }, { "epoch": 0.35315757204169224, "grad_norm": 0.5293959975242615, "learning_rate": 7.379798788966798e-05, "loss": 1.7638, "step": 6336 }, { "epoch": 0.35321331029485536, "grad_norm": 0.5229690670967102, "learning_rate": 7.379015652982016e-05, "loss": 1.7042, "step": 6337 }, { "epoch": 0.3532690485480185, "grad_norm": 0.5152291059494019, "learning_rate": 7.378232441548729e-05, "loss": 1.607, "step": 6338 }, { "epoch": 0.3533247868011817, "grad_norm": 0.5136567950248718, "learning_rate": 7.377449154691775e-05, "loss": 1.7222, "step": 6339 }, { "epoch": 0.3533805250543448, "grad_norm": 0.5531160235404968, "learning_rate": 7.376665792435996e-05, "loss": 1.6946, "step": 6340 }, { "epoch": 0.35343626330750794, "grad_norm": 0.554097592830658, "learning_rate": 7.375882354806235e-05, "loss": 1.6551, "step": 6341 }, { "epoch": 0.35349200156067107, "grad_norm": 0.5862346887588501, "learning_rate": 7.375098841827337e-05, "loss": 1.7594, "step": 6342 }, { "epoch": 0.35354773981383425, "grad_norm": 0.5202105641365051, "learning_rate": 7.374315253524152e-05, "loss": 1.6205, "step": 6343 }, { "epoch": 0.3536034780669974, "grad_norm": 0.5510536432266235, "learning_rate": 7.373531589921531e-05, "loss": 1.5776, "step": 6344 }, { "epoch": 0.3536592163201605, "grad_norm": 0.5484849214553833, "learning_rate": 7.372747851044326e-05, "loss": 1.5603, "step": 6345 }, { "epoch": 0.3537149545733237, "grad_norm": 0.55774986743927, "learning_rate": 7.371964036917394e-05, "loss": 1.7814, "step": 6346 }, { "epoch": 0.3537706928264868, "grad_norm": 0.5338320732116699, "learning_rate": 7.371180147565592e-05, "loss": 1.5941, "step": 6347 }, { "epoch": 0.35382643107964995, "grad_norm": 0.5263161659240723, "learning_rate": 7.370396183013779e-05, "loss": 1.2328, "step": 6348 }, { "epoch": 0.35388216933281313, "grad_norm": 0.533647894859314, "learning_rate": 7.369612143286822e-05, "loss": 1.7327, "step": 6349 }, { "epoch": 0.35393790758597626, "grad_norm": 0.5682227611541748, "learning_rate": 7.368828028409581e-05, "loss": 1.8406, "step": 6350 }, { "epoch": 0.3539936458391394, "grad_norm": 0.5832127332687378, "learning_rate": 7.368043838406927e-05, "loss": 1.7841, "step": 6351 }, { "epoch": 0.3540493840923025, "grad_norm": 0.5741327404975891, "learning_rate": 7.36725957330373e-05, "loss": 1.787, "step": 6352 }, { "epoch": 0.3541051223454657, "grad_norm": 0.5750821828842163, "learning_rate": 7.366475233124861e-05, "loss": 1.7946, "step": 6353 }, { "epoch": 0.35416086059862883, "grad_norm": 0.5595529079437256, "learning_rate": 7.365690817895195e-05, "loss": 1.6904, "step": 6354 }, { "epoch": 0.35421659885179196, "grad_norm": 0.5768024921417236, "learning_rate": 7.364906327639608e-05, "loss": 1.7634, "step": 6355 }, { "epoch": 0.35427233710495515, "grad_norm": 0.5867105722427368, "learning_rate": 7.364121762382983e-05, "loss": 1.7406, "step": 6356 }, { "epoch": 0.3543280753581183, "grad_norm": 0.5967558026313782, "learning_rate": 7.363337122150197e-05, "loss": 1.5078, "step": 6357 }, { "epoch": 0.3543838136112814, "grad_norm": 0.5712282061576843, "learning_rate": 7.36255240696614e-05, "loss": 1.767, "step": 6358 }, { "epoch": 0.3544395518644446, "grad_norm": 0.5473513603210449, "learning_rate": 7.361767616855692e-05, "loss": 1.6409, "step": 6359 }, { "epoch": 0.3544952901176077, "grad_norm": 0.5412675738334656, "learning_rate": 7.360982751843747e-05, "loss": 1.6319, "step": 6360 }, { "epoch": 0.35455102837077085, "grad_norm": 0.5327848792076111, "learning_rate": 7.360197811955194e-05, "loss": 1.511, "step": 6361 }, { "epoch": 0.35460676662393403, "grad_norm": 0.5604977607727051, "learning_rate": 7.359412797214929e-05, "loss": 1.7604, "step": 6362 }, { "epoch": 0.35466250487709716, "grad_norm": 0.5807721018791199, "learning_rate": 7.358627707647844e-05, "loss": 1.5816, "step": 6363 }, { "epoch": 0.3547182431302603, "grad_norm": 0.5296190977096558, "learning_rate": 7.357842543278841e-05, "loss": 1.2601, "step": 6364 }, { "epoch": 0.3547739813834234, "grad_norm": 0.5498451590538025, "learning_rate": 7.357057304132819e-05, "loss": 1.8474, "step": 6365 }, { "epoch": 0.3548297196365866, "grad_norm": 0.5772817134857178, "learning_rate": 7.356271990234683e-05, "loss": 1.7508, "step": 6366 }, { "epoch": 0.35488545788974973, "grad_norm": 0.520463764667511, "learning_rate": 7.355486601609339e-05, "loss": 1.5589, "step": 6367 }, { "epoch": 0.35494119614291286, "grad_norm": 0.5433523058891296, "learning_rate": 7.354701138281688e-05, "loss": 1.7982, "step": 6368 }, { "epoch": 0.35499693439607605, "grad_norm": 0.587772011756897, "learning_rate": 7.35391560027665e-05, "loss": 1.7944, "step": 6369 }, { "epoch": 0.3550526726492392, "grad_norm": 0.562419056892395, "learning_rate": 7.353129987619133e-05, "loss": 1.8376, "step": 6370 }, { "epoch": 0.3551084109024023, "grad_norm": 0.524745523929596, "learning_rate": 7.352344300334053e-05, "loss": 1.575, "step": 6371 }, { "epoch": 0.3551641491555655, "grad_norm": 0.5049068927764893, "learning_rate": 7.351558538446326e-05, "loss": 1.3716, "step": 6372 }, { "epoch": 0.3552198874087286, "grad_norm": 0.6006641387939453, "learning_rate": 7.350772701980872e-05, "loss": 1.9018, "step": 6373 }, { "epoch": 0.35527562566189175, "grad_norm": 0.5516168475151062, "learning_rate": 7.349986790962613e-05, "loss": 1.6401, "step": 6374 }, { "epoch": 0.3553313639150549, "grad_norm": 0.5250164270401001, "learning_rate": 7.349200805416478e-05, "loss": 1.5694, "step": 6375 }, { "epoch": 0.35538710216821806, "grad_norm": 0.5079348087310791, "learning_rate": 7.348414745367387e-05, "loss": 1.6291, "step": 6376 }, { "epoch": 0.3554428404213812, "grad_norm": 0.5634783506393433, "learning_rate": 7.347628610840274e-05, "loss": 1.6777, "step": 6377 }, { "epoch": 0.3554985786745443, "grad_norm": 0.5921057462692261, "learning_rate": 7.346842401860069e-05, "loss": 1.922, "step": 6378 }, { "epoch": 0.3555543169277075, "grad_norm": 0.5826466679573059, "learning_rate": 7.346056118451705e-05, "loss": 1.7305, "step": 6379 }, { "epoch": 0.35561005518087063, "grad_norm": 0.5478690266609192, "learning_rate": 7.345269760640121e-05, "loss": 1.7387, "step": 6380 }, { "epoch": 0.35566579343403376, "grad_norm": 0.5795879364013672, "learning_rate": 7.344483328450253e-05, "loss": 1.6662, "step": 6381 }, { "epoch": 0.35572153168719695, "grad_norm": 0.5886217355728149, "learning_rate": 7.343696821907042e-05, "loss": 1.8065, "step": 6382 }, { "epoch": 0.3557772699403601, "grad_norm": 0.6385563611984253, "learning_rate": 7.342910241035434e-05, "loss": 1.7933, "step": 6383 }, { "epoch": 0.3558330081935232, "grad_norm": 0.5828480124473572, "learning_rate": 7.342123585860374e-05, "loss": 1.6203, "step": 6384 }, { "epoch": 0.3558887464466864, "grad_norm": 0.5478693842887878, "learning_rate": 7.341336856406808e-05, "loss": 1.6706, "step": 6385 }, { "epoch": 0.3559444846998495, "grad_norm": 0.5751214027404785, "learning_rate": 7.340550052699689e-05, "loss": 1.8427, "step": 6386 }, { "epoch": 0.35600022295301265, "grad_norm": 0.5512586236000061, "learning_rate": 7.339763174763968e-05, "loss": 1.7332, "step": 6387 }, { "epoch": 0.3560559612061758, "grad_norm": 0.5546371340751648, "learning_rate": 7.3389762226246e-05, "loss": 1.5966, "step": 6388 }, { "epoch": 0.35611169945933896, "grad_norm": 0.5267236232757568, "learning_rate": 7.338189196306544e-05, "loss": 1.8137, "step": 6389 }, { "epoch": 0.3561674377125021, "grad_norm": 0.5219095945358276, "learning_rate": 7.33740209583476e-05, "loss": 1.6799, "step": 6390 }, { "epoch": 0.3562231759656652, "grad_norm": 0.5330881476402283, "learning_rate": 7.33661492123421e-05, "loss": 1.6959, "step": 6391 }, { "epoch": 0.3562789142188284, "grad_norm": 0.5660157203674316, "learning_rate": 7.335827672529856e-05, "loss": 1.7565, "step": 6392 }, { "epoch": 0.35633465247199153, "grad_norm": 0.5627869963645935, "learning_rate": 7.335040349746669e-05, "loss": 1.7526, "step": 6393 }, { "epoch": 0.35639039072515466, "grad_norm": 0.588152289390564, "learning_rate": 7.334252952909615e-05, "loss": 1.64, "step": 6394 }, { "epoch": 0.35644612897831784, "grad_norm": 0.5885617733001709, "learning_rate": 7.333465482043667e-05, "loss": 1.7358, "step": 6395 }, { "epoch": 0.356501867231481, "grad_norm": 0.6158447265625, "learning_rate": 7.3326779371738e-05, "loss": 1.854, "step": 6396 }, { "epoch": 0.3565576054846441, "grad_norm": 0.5353176593780518, "learning_rate": 7.33189031832499e-05, "loss": 1.6502, "step": 6397 }, { "epoch": 0.35661334373780723, "grad_norm": 0.5986976027488708, "learning_rate": 7.331102625522212e-05, "loss": 1.6757, "step": 6398 }, { "epoch": 0.3566690819909704, "grad_norm": 0.5034981966018677, "learning_rate": 7.330314858790453e-05, "loss": 1.5362, "step": 6399 }, { "epoch": 0.35672482024413354, "grad_norm": 0.5768936276435852, "learning_rate": 7.32952701815469e-05, "loss": 1.7302, "step": 6400 }, { "epoch": 0.3567805584972967, "grad_norm": 0.5493230819702148, "learning_rate": 7.328739103639916e-05, "loss": 1.7755, "step": 6401 }, { "epoch": 0.35683629675045986, "grad_norm": 0.5121830105781555, "learning_rate": 7.327951115271113e-05, "loss": 1.5803, "step": 6402 }, { "epoch": 0.356892035003623, "grad_norm": 0.546416699886322, "learning_rate": 7.327163053073273e-05, "loss": 1.5991, "step": 6403 }, { "epoch": 0.3569477732567861, "grad_norm": 0.5108504891395569, "learning_rate": 7.32637491707139e-05, "loss": 1.6789, "step": 6404 }, { "epoch": 0.3570035115099493, "grad_norm": 0.5747851729393005, "learning_rate": 7.32558670729046e-05, "loss": 1.8266, "step": 6405 }, { "epoch": 0.35705924976311243, "grad_norm": 0.587032675743103, "learning_rate": 7.324798423755476e-05, "loss": 1.6093, "step": 6406 }, { "epoch": 0.35711498801627556, "grad_norm": 0.5485719442367554, "learning_rate": 7.324010066491442e-05, "loss": 1.6672, "step": 6407 }, { "epoch": 0.35717072626943874, "grad_norm": 0.5325014591217041, "learning_rate": 7.323221635523358e-05, "loss": 1.7776, "step": 6408 }, { "epoch": 0.35722646452260187, "grad_norm": 0.5524224638938904, "learning_rate": 7.32243313087623e-05, "loss": 1.9326, "step": 6409 }, { "epoch": 0.357282202775765, "grad_norm": 0.5688652396202087, "learning_rate": 7.321644552575062e-05, "loss": 1.8942, "step": 6410 }, { "epoch": 0.35733794102892813, "grad_norm": 0.5133098363876343, "learning_rate": 7.320855900644867e-05, "loss": 1.6339, "step": 6411 }, { "epoch": 0.3573936792820913, "grad_norm": 0.5422292947769165, "learning_rate": 7.320067175110653e-05, "loss": 1.681, "step": 6412 }, { "epoch": 0.35744941753525444, "grad_norm": 0.5691182613372803, "learning_rate": 7.319278375997436e-05, "loss": 1.847, "step": 6413 }, { "epoch": 0.3575051557884176, "grad_norm": 0.5584883689880371, "learning_rate": 7.31848950333023e-05, "loss": 1.7616, "step": 6414 }, { "epoch": 0.35756089404158076, "grad_norm": 0.5878840088844299, "learning_rate": 7.317700557134056e-05, "loss": 1.7561, "step": 6415 }, { "epoch": 0.3576166322947439, "grad_norm": 0.5363910794258118, "learning_rate": 7.316911537433933e-05, "loss": 1.6086, "step": 6416 }, { "epoch": 0.357672370547907, "grad_norm": 0.5783511996269226, "learning_rate": 7.316122444254884e-05, "loss": 1.7853, "step": 6417 }, { "epoch": 0.3577281088010702, "grad_norm": 0.5695887804031372, "learning_rate": 7.315333277621935e-05, "loss": 1.5816, "step": 6418 }, { "epoch": 0.35778384705423333, "grad_norm": 0.5631670355796814, "learning_rate": 7.314544037560114e-05, "loss": 1.5703, "step": 6419 }, { "epoch": 0.35783958530739646, "grad_norm": 0.5459564328193665, "learning_rate": 7.313754724094451e-05, "loss": 1.6222, "step": 6420 }, { "epoch": 0.3578953235605596, "grad_norm": 0.5215150117874146, "learning_rate": 7.312965337249979e-05, "loss": 1.7888, "step": 6421 }, { "epoch": 0.35795106181372277, "grad_norm": 0.5654617547988892, "learning_rate": 7.312175877051732e-05, "loss": 1.7508, "step": 6422 }, { "epoch": 0.3580068000668859, "grad_norm": 0.5510186553001404, "learning_rate": 7.311386343524747e-05, "loss": 1.8401, "step": 6423 }, { "epoch": 0.35806253832004903, "grad_norm": 0.521782398223877, "learning_rate": 7.310596736694062e-05, "loss": 1.5428, "step": 6424 }, { "epoch": 0.3581182765732122, "grad_norm": 0.5308924317359924, "learning_rate": 7.309807056584722e-05, "loss": 1.464, "step": 6425 }, { "epoch": 0.35817401482637534, "grad_norm": 0.5567795634269714, "learning_rate": 7.309017303221768e-05, "loss": 1.7063, "step": 6426 }, { "epoch": 0.35822975307953847, "grad_norm": 0.5558245778083801, "learning_rate": 7.308227476630249e-05, "loss": 1.6636, "step": 6427 }, { "epoch": 0.35828549133270166, "grad_norm": 0.5258497595787048, "learning_rate": 7.30743757683521e-05, "loss": 1.5777, "step": 6428 }, { "epoch": 0.3583412295858648, "grad_norm": 0.5101563930511475, "learning_rate": 7.306647603861706e-05, "loss": 1.5602, "step": 6429 }, { "epoch": 0.3583969678390279, "grad_norm": 0.5508061647415161, "learning_rate": 7.305857557734789e-05, "loss": 1.659, "step": 6430 }, { "epoch": 0.3584527060921911, "grad_norm": 0.6159545183181763, "learning_rate": 7.305067438479513e-05, "loss": 1.9413, "step": 6431 }, { "epoch": 0.3585084443453542, "grad_norm": 0.5804408192634583, "learning_rate": 7.30427724612094e-05, "loss": 1.7138, "step": 6432 }, { "epoch": 0.35856418259851736, "grad_norm": 0.5316668748855591, "learning_rate": 7.303486980684125e-05, "loss": 1.7588, "step": 6433 }, { "epoch": 0.3586199208516805, "grad_norm": 0.6093178391456604, "learning_rate": 7.302696642194134e-05, "loss": 1.8426, "step": 6434 }, { "epoch": 0.35867565910484367, "grad_norm": 0.5371636152267456, "learning_rate": 7.30190623067603e-05, "loss": 1.5852, "step": 6435 }, { "epoch": 0.3587313973580068, "grad_norm": 0.5050824284553528, "learning_rate": 7.301115746154884e-05, "loss": 1.5495, "step": 6436 }, { "epoch": 0.3587871356111699, "grad_norm": 0.5830590724945068, "learning_rate": 7.300325188655761e-05, "loss": 1.8611, "step": 6437 }, { "epoch": 0.3588428738643331, "grad_norm": 0.5415953397750854, "learning_rate": 7.299534558203735e-05, "loss": 1.6437, "step": 6438 }, { "epoch": 0.35889861211749624, "grad_norm": 0.5701804757118225, "learning_rate": 7.298743854823882e-05, "loss": 1.8723, "step": 6439 }, { "epoch": 0.35895435037065937, "grad_norm": 0.5361306667327881, "learning_rate": 7.297953078541274e-05, "loss": 1.518, "step": 6440 }, { "epoch": 0.35901008862382255, "grad_norm": 0.5895618796348572, "learning_rate": 7.297162229380994e-05, "loss": 1.8528, "step": 6441 }, { "epoch": 0.3590658268769857, "grad_norm": 0.5555623173713684, "learning_rate": 7.29637130736812e-05, "loss": 1.6619, "step": 6442 }, { "epoch": 0.3591215651301488, "grad_norm": 0.5527105331420898, "learning_rate": 7.295580312527739e-05, "loss": 1.8209, "step": 6443 }, { "epoch": 0.35917730338331194, "grad_norm": 0.5717308521270752, "learning_rate": 7.294789244884932e-05, "loss": 1.6109, "step": 6444 }, { "epoch": 0.3592330416364751, "grad_norm": 0.5484607815742493, "learning_rate": 7.293998104464792e-05, "loss": 1.7449, "step": 6445 }, { "epoch": 0.35928877988963825, "grad_norm": 0.5548183917999268, "learning_rate": 7.293206891292405e-05, "loss": 1.7952, "step": 6446 }, { "epoch": 0.3593445181428014, "grad_norm": 0.5666037201881409, "learning_rate": 7.292415605392867e-05, "loss": 1.8784, "step": 6447 }, { "epoch": 0.35940025639596457, "grad_norm": 0.5922662615776062, "learning_rate": 7.291624246791272e-05, "loss": 1.8764, "step": 6448 }, { "epoch": 0.3594559946491277, "grad_norm": 0.5456053018569946, "learning_rate": 7.290832815512716e-05, "loss": 1.7389, "step": 6449 }, { "epoch": 0.3595117329022908, "grad_norm": 0.5417848229408264, "learning_rate": 7.290041311582301e-05, "loss": 1.591, "step": 6450 }, { "epoch": 0.359567471155454, "grad_norm": 0.5787496566772461, "learning_rate": 7.289249735025127e-05, "loss": 1.765, "step": 6451 }, { "epoch": 0.35962320940861714, "grad_norm": 0.5513389110565186, "learning_rate": 7.288458085866298e-05, "loss": 1.6685, "step": 6452 }, { "epoch": 0.35967894766178027, "grad_norm": 0.5737441182136536, "learning_rate": 7.287666364130921e-05, "loss": 1.6956, "step": 6453 }, { "epoch": 0.35973468591494345, "grad_norm": 0.6044551134109497, "learning_rate": 7.286874569844106e-05, "loss": 1.7829, "step": 6454 }, { "epoch": 0.3597904241681066, "grad_norm": 0.5688374638557434, "learning_rate": 7.286082703030961e-05, "loss": 1.8747, "step": 6455 }, { "epoch": 0.3598461624212697, "grad_norm": 0.5276156067848206, "learning_rate": 7.285290763716604e-05, "loss": 1.5944, "step": 6456 }, { "epoch": 0.35990190067443284, "grad_norm": 0.5913518667221069, "learning_rate": 7.284498751926147e-05, "loss": 1.6307, "step": 6457 }, { "epoch": 0.359957638927596, "grad_norm": 0.5470561981201172, "learning_rate": 7.283706667684709e-05, "loss": 1.6096, "step": 6458 }, { "epoch": 0.36001337718075915, "grad_norm": 0.5165275931358337, "learning_rate": 7.28291451101741e-05, "loss": 1.6963, "step": 6459 }, { "epoch": 0.3600691154339223, "grad_norm": 0.552894651889801, "learning_rate": 7.282122281949374e-05, "loss": 1.7304, "step": 6460 }, { "epoch": 0.36012485368708547, "grad_norm": 0.573884129524231, "learning_rate": 7.281329980505724e-05, "loss": 1.8304, "step": 6461 }, { "epoch": 0.3601805919402486, "grad_norm": 0.5113431811332703, "learning_rate": 7.280537606711589e-05, "loss": 1.509, "step": 6462 }, { "epoch": 0.3602363301934117, "grad_norm": 0.54507976770401, "learning_rate": 7.279745160592097e-05, "loss": 1.765, "step": 6463 }, { "epoch": 0.3602920684465749, "grad_norm": 0.5524507761001587, "learning_rate": 7.278952642172381e-05, "loss": 1.6604, "step": 6464 }, { "epoch": 0.36034780669973804, "grad_norm": 0.5713779926300049, "learning_rate": 7.278160051477574e-05, "loss": 1.6273, "step": 6465 }, { "epoch": 0.36040354495290117, "grad_norm": 0.5713092684745789, "learning_rate": 7.277367388532812e-05, "loss": 1.7693, "step": 6466 }, { "epoch": 0.3604592832060643, "grad_norm": 0.5316145420074463, "learning_rate": 7.276574653363236e-05, "loss": 1.6402, "step": 6467 }, { "epoch": 0.3605150214592275, "grad_norm": 0.5453936457633972, "learning_rate": 7.275781845993983e-05, "loss": 1.9642, "step": 6468 }, { "epoch": 0.3605707597123906, "grad_norm": 0.5773400068283081, "learning_rate": 7.274988966450201e-05, "loss": 1.8417, "step": 6469 }, { "epoch": 0.36062649796555374, "grad_norm": 0.5517837405204773, "learning_rate": 7.274196014757032e-05, "loss": 1.6307, "step": 6470 }, { "epoch": 0.3606822362187169, "grad_norm": 0.5454963445663452, "learning_rate": 7.273402990939626e-05, "loss": 1.7725, "step": 6471 }, { "epoch": 0.36073797447188005, "grad_norm": 0.5993366837501526, "learning_rate": 7.272609895023129e-05, "loss": 1.831, "step": 6472 }, { "epoch": 0.3607937127250432, "grad_norm": 0.5621082186698914, "learning_rate": 7.2718167270327e-05, "loss": 1.4942, "step": 6473 }, { "epoch": 0.36084945097820637, "grad_norm": 0.5455790758132935, "learning_rate": 7.271023486993488e-05, "loss": 1.722, "step": 6474 }, { "epoch": 0.3609051892313695, "grad_norm": 0.5093836784362793, "learning_rate": 7.270230174930653e-05, "loss": 1.5921, "step": 6475 }, { "epoch": 0.3609609274845326, "grad_norm": 0.5746651887893677, "learning_rate": 7.269436790869352e-05, "loss": 1.7303, "step": 6476 }, { "epoch": 0.3610166657376958, "grad_norm": 0.5042871832847595, "learning_rate": 7.268643334834748e-05, "loss": 1.4386, "step": 6477 }, { "epoch": 0.36107240399085894, "grad_norm": 0.6014384627342224, "learning_rate": 7.267849806852005e-05, "loss": 1.7803, "step": 6478 }, { "epoch": 0.36112814224402207, "grad_norm": 0.49684464931488037, "learning_rate": 7.267056206946289e-05, "loss": 1.6513, "step": 6479 }, { "epoch": 0.3611838804971852, "grad_norm": 0.6013120412826538, "learning_rate": 7.266262535142767e-05, "loss": 1.718, "step": 6480 }, { "epoch": 0.3612396187503484, "grad_norm": 0.5482946038246155, "learning_rate": 7.26546879146661e-05, "loss": 1.8295, "step": 6481 }, { "epoch": 0.3612953570035115, "grad_norm": 0.5593370199203491, "learning_rate": 7.264674975942994e-05, "loss": 1.8042, "step": 6482 }, { "epoch": 0.36135109525667464, "grad_norm": 0.5430756211280823, "learning_rate": 7.26388108859709e-05, "loss": 1.6976, "step": 6483 }, { "epoch": 0.3614068335098378, "grad_norm": 0.5408653020858765, "learning_rate": 7.263087129454078e-05, "loss": 1.5425, "step": 6484 }, { "epoch": 0.36146257176300095, "grad_norm": 0.5399406552314758, "learning_rate": 7.262293098539134e-05, "loss": 1.7552, "step": 6485 }, { "epoch": 0.3615183100161641, "grad_norm": 0.5077804923057556, "learning_rate": 7.261498995877447e-05, "loss": 1.5728, "step": 6486 }, { "epoch": 0.36157404826932726, "grad_norm": 0.5409159660339355, "learning_rate": 7.260704821494196e-05, "loss": 1.7926, "step": 6487 }, { "epoch": 0.3616297865224904, "grad_norm": 0.4922293424606323, "learning_rate": 7.259910575414569e-05, "loss": 1.46, "step": 6488 }, { "epoch": 0.3616855247756535, "grad_norm": 0.530104398727417, "learning_rate": 7.259116257663753e-05, "loss": 1.4995, "step": 6489 }, { "epoch": 0.36174126302881665, "grad_norm": 0.5683631896972656, "learning_rate": 7.258321868266943e-05, "loss": 1.6736, "step": 6490 }, { "epoch": 0.36179700128197984, "grad_norm": 0.5562074184417725, "learning_rate": 7.25752740724933e-05, "loss": 1.6224, "step": 6491 }, { "epoch": 0.36185273953514296, "grad_norm": 0.6077651381492615, "learning_rate": 7.256732874636109e-05, "loss": 1.7814, "step": 6492 }, { "epoch": 0.3619084777883061, "grad_norm": 0.5739646553993225, "learning_rate": 7.255938270452479e-05, "loss": 1.7024, "step": 6493 }, { "epoch": 0.3619642160414693, "grad_norm": 0.5540484189987183, "learning_rate": 7.25514359472364e-05, "loss": 1.5576, "step": 6494 }, { "epoch": 0.3620199542946324, "grad_norm": 0.5674034953117371, "learning_rate": 7.254348847474797e-05, "loss": 1.8389, "step": 6495 }, { "epoch": 0.36207569254779554, "grad_norm": 0.5664230585098267, "learning_rate": 7.253554028731148e-05, "loss": 1.7194, "step": 6496 }, { "epoch": 0.3621314308009587, "grad_norm": 0.5525626540184021, "learning_rate": 7.252759138517909e-05, "loss": 1.3394, "step": 6497 }, { "epoch": 0.36218716905412185, "grad_norm": 0.5549319982528687, "learning_rate": 7.251964176860281e-05, "loss": 1.6234, "step": 6498 }, { "epoch": 0.362242907307285, "grad_norm": 0.5454506874084473, "learning_rate": 7.25116914378348e-05, "loss": 1.8937, "step": 6499 }, { "epoch": 0.36229864556044816, "grad_norm": 0.5178475379943848, "learning_rate": 7.25037403931272e-05, "loss": 1.5599, "step": 6500 }, { "epoch": 0.3623543838136113, "grad_norm": 0.5836609601974487, "learning_rate": 7.249578863473216e-05, "loss": 1.8547, "step": 6501 }, { "epoch": 0.3624101220667744, "grad_norm": 0.5162068605422974, "learning_rate": 7.248783616290186e-05, "loss": 1.4538, "step": 6502 }, { "epoch": 0.36246586031993755, "grad_norm": 0.5959255695343018, "learning_rate": 7.24798829778885e-05, "loss": 1.8237, "step": 6503 }, { "epoch": 0.36252159857310073, "grad_norm": 0.5471253395080566, "learning_rate": 7.247192907994433e-05, "loss": 1.5705, "step": 6504 }, { "epoch": 0.36257733682626386, "grad_norm": 0.5264948010444641, "learning_rate": 7.246397446932159e-05, "loss": 1.6597, "step": 6505 }, { "epoch": 0.362633075079427, "grad_norm": 0.5829636454582214, "learning_rate": 7.245601914627255e-05, "loss": 1.9137, "step": 6506 }, { "epoch": 0.3626888133325902, "grad_norm": 0.5371459722518921, "learning_rate": 7.244806311104952e-05, "loss": 1.5883, "step": 6507 }, { "epoch": 0.3627445515857533, "grad_norm": 0.6225298643112183, "learning_rate": 7.24401063639048e-05, "loss": 1.9112, "step": 6508 }, { "epoch": 0.36280028983891643, "grad_norm": 0.5452820062637329, "learning_rate": 7.243214890509073e-05, "loss": 1.6557, "step": 6509 }, { "epoch": 0.3628560280920796, "grad_norm": 0.5052100419998169, "learning_rate": 7.24241907348597e-05, "loss": 1.4815, "step": 6510 }, { "epoch": 0.36291176634524275, "grad_norm": 0.5527931451797485, "learning_rate": 7.241623185346409e-05, "loss": 1.6867, "step": 6511 }, { "epoch": 0.3629675045984059, "grad_norm": 0.5412555932998657, "learning_rate": 7.240827226115629e-05, "loss": 1.5461, "step": 6512 }, { "epoch": 0.363023242851569, "grad_norm": 0.5910593271255493, "learning_rate": 7.240031195818874e-05, "loss": 1.7713, "step": 6513 }, { "epoch": 0.3630789811047322, "grad_norm": 0.5672844052314758, "learning_rate": 7.239235094481391e-05, "loss": 1.3757, "step": 6514 }, { "epoch": 0.3631347193578953, "grad_norm": 0.580847442150116, "learning_rate": 7.238438922128425e-05, "loss": 1.9571, "step": 6515 }, { "epoch": 0.36319045761105845, "grad_norm": 0.642082691192627, "learning_rate": 7.237642678785228e-05, "loss": 1.9311, "step": 6516 }, { "epoch": 0.36324619586422163, "grad_norm": 0.49659648537635803, "learning_rate": 7.236846364477052e-05, "loss": 1.6393, "step": 6517 }, { "epoch": 0.36330193411738476, "grad_norm": 0.5082789063453674, "learning_rate": 7.23604997922915e-05, "loss": 1.5183, "step": 6518 }, { "epoch": 0.3633576723705479, "grad_norm": 0.5978274941444397, "learning_rate": 7.235253523066781e-05, "loss": 1.8529, "step": 6519 }, { "epoch": 0.3634134106237111, "grad_norm": 0.5323169231414795, "learning_rate": 7.234456996015202e-05, "loss": 1.6463, "step": 6520 }, { "epoch": 0.3634691488768742, "grad_norm": 0.5250840187072754, "learning_rate": 7.233660398099675e-05, "loss": 1.4439, "step": 6521 }, { "epoch": 0.36352488713003733, "grad_norm": 0.566667914390564, "learning_rate": 7.232863729345464e-05, "loss": 1.5871, "step": 6522 }, { "epoch": 0.3635806253832005, "grad_norm": 0.5944371223449707, "learning_rate": 7.232066989777833e-05, "loss": 1.978, "step": 6523 }, { "epoch": 0.36363636363636365, "grad_norm": 0.556692361831665, "learning_rate": 7.231270179422051e-05, "loss": 1.5579, "step": 6524 }, { "epoch": 0.3636921018895268, "grad_norm": 0.5578793883323669, "learning_rate": 7.230473298303388e-05, "loss": 1.6899, "step": 6525 }, { "epoch": 0.3637478401426899, "grad_norm": 0.672099232673645, "learning_rate": 7.229676346447117e-05, "loss": 2.1324, "step": 6526 }, { "epoch": 0.3638035783958531, "grad_norm": 0.5312888622283936, "learning_rate": 7.228879323878512e-05, "loss": 1.453, "step": 6527 }, { "epoch": 0.3638593166490162, "grad_norm": 0.5675061345100403, "learning_rate": 7.22808223062285e-05, "loss": 1.8623, "step": 6528 }, { "epoch": 0.36391505490217935, "grad_norm": 0.49803319573402405, "learning_rate": 7.227285066705412e-05, "loss": 1.41, "step": 6529 }, { "epoch": 0.36397079315534253, "grad_norm": 0.5489189028739929, "learning_rate": 7.226487832151476e-05, "loss": 1.6551, "step": 6530 }, { "epoch": 0.36402653140850566, "grad_norm": 0.5769960284233093, "learning_rate": 7.225690526986326e-05, "loss": 1.7853, "step": 6531 }, { "epoch": 0.3640822696616688, "grad_norm": 0.5400393605232239, "learning_rate": 7.224893151235251e-05, "loss": 1.5544, "step": 6532 }, { "epoch": 0.364138007914832, "grad_norm": 0.5720942616462708, "learning_rate": 7.224095704923537e-05, "loss": 1.9824, "step": 6533 }, { "epoch": 0.3641937461679951, "grad_norm": 0.5403176546096802, "learning_rate": 7.223298188076475e-05, "loss": 1.5978, "step": 6534 }, { "epoch": 0.36424948442115823, "grad_norm": 0.5350765585899353, "learning_rate": 7.222500600719356e-05, "loss": 1.5945, "step": 6535 }, { "epoch": 0.36430522267432136, "grad_norm": 0.542413592338562, "learning_rate": 7.221702942877477e-05, "loss": 1.717, "step": 6536 }, { "epoch": 0.36436096092748455, "grad_norm": 0.5472922921180725, "learning_rate": 7.220905214576134e-05, "loss": 1.6535, "step": 6537 }, { "epoch": 0.3644166991806477, "grad_norm": 0.5479559302330017, "learning_rate": 7.220107415840626e-05, "loss": 1.5444, "step": 6538 }, { "epoch": 0.3644724374338108, "grad_norm": 0.5131190419197083, "learning_rate": 7.219309546696255e-05, "loss": 1.3543, "step": 6539 }, { "epoch": 0.364528175686974, "grad_norm": 0.5852962732315063, "learning_rate": 7.218511607168326e-05, "loss": 1.7422, "step": 6540 }, { "epoch": 0.3645839139401371, "grad_norm": 0.5998173356056213, "learning_rate": 7.21771359728214e-05, "loss": 1.9125, "step": 6541 }, { "epoch": 0.36463965219330025, "grad_norm": 0.5412231683731079, "learning_rate": 7.216915517063012e-05, "loss": 1.8743, "step": 6542 }, { "epoch": 0.36469539044646343, "grad_norm": 0.5305824279785156, "learning_rate": 7.216117366536249e-05, "loss": 1.476, "step": 6543 }, { "epoch": 0.36475112869962656, "grad_norm": 0.586646556854248, "learning_rate": 7.215319145727161e-05, "loss": 1.7591, "step": 6544 }, { "epoch": 0.3648068669527897, "grad_norm": 0.5975636839866638, "learning_rate": 7.214520854661071e-05, "loss": 1.9996, "step": 6545 }, { "epoch": 0.3648626052059529, "grad_norm": 0.543255627155304, "learning_rate": 7.213722493363288e-05, "loss": 1.6909, "step": 6546 }, { "epoch": 0.364918343459116, "grad_norm": 0.5423970222473145, "learning_rate": 7.212924061859135e-05, "loss": 1.6429, "step": 6547 }, { "epoch": 0.36497408171227913, "grad_norm": 0.5859336256980896, "learning_rate": 7.212125560173933e-05, "loss": 1.9055, "step": 6548 }, { "epoch": 0.36502981996544226, "grad_norm": 0.5425530672073364, "learning_rate": 7.211326988333006e-05, "loss": 1.7073, "step": 6549 }, { "epoch": 0.36508555821860544, "grad_norm": 0.580575168132782, "learning_rate": 7.210528346361678e-05, "loss": 1.6739, "step": 6550 }, { "epoch": 0.3651412964717686, "grad_norm": 0.599686324596405, "learning_rate": 7.209729634285282e-05, "loss": 1.7179, "step": 6551 }, { "epoch": 0.3651970347249317, "grad_norm": 0.5199704170227051, "learning_rate": 7.208930852129143e-05, "loss": 1.7484, "step": 6552 }, { "epoch": 0.3652527729780949, "grad_norm": 0.5557204484939575, "learning_rate": 7.208131999918599e-05, "loss": 1.6169, "step": 6553 }, { "epoch": 0.365308511231258, "grad_norm": 0.5307885408401489, "learning_rate": 7.20733307767898e-05, "loss": 1.4029, "step": 6554 }, { "epoch": 0.36536424948442114, "grad_norm": 0.5462751388549805, "learning_rate": 7.206534085435625e-05, "loss": 1.6399, "step": 6555 }, { "epoch": 0.36541998773758433, "grad_norm": 0.5815526247024536, "learning_rate": 7.205735023213877e-05, "loss": 1.7943, "step": 6556 }, { "epoch": 0.36547572599074746, "grad_norm": 0.5783229470252991, "learning_rate": 7.204935891039071e-05, "loss": 1.7919, "step": 6557 }, { "epoch": 0.3655314642439106, "grad_norm": 0.5671087503433228, "learning_rate": 7.204136688936556e-05, "loss": 1.8125, "step": 6558 }, { "epoch": 0.3655872024970737, "grad_norm": 0.5661280751228333, "learning_rate": 7.203337416931675e-05, "loss": 1.6377, "step": 6559 }, { "epoch": 0.3656429407502369, "grad_norm": 0.5448043942451477, "learning_rate": 7.202538075049781e-05, "loss": 1.6929, "step": 6560 }, { "epoch": 0.36569867900340003, "grad_norm": 0.5517578125, "learning_rate": 7.201738663316217e-05, "loss": 1.7849, "step": 6561 }, { "epoch": 0.36575441725656316, "grad_norm": 0.5554426312446594, "learning_rate": 7.200939181756341e-05, "loss": 1.3314, "step": 6562 }, { "epoch": 0.36581015550972634, "grad_norm": 0.5693673491477966, "learning_rate": 7.200139630395507e-05, "loss": 1.8517, "step": 6563 }, { "epoch": 0.36586589376288947, "grad_norm": 0.5405746698379517, "learning_rate": 7.199340009259072e-05, "loss": 1.58, "step": 6564 }, { "epoch": 0.3659216320160526, "grad_norm": 0.633660078048706, "learning_rate": 7.198540318372394e-05, "loss": 1.9478, "step": 6565 }, { "epoch": 0.3659773702692158, "grad_norm": 0.5665812492370605, "learning_rate": 7.197740557760834e-05, "loss": 1.6334, "step": 6566 }, { "epoch": 0.3660331085223789, "grad_norm": 0.549199104309082, "learning_rate": 7.196940727449759e-05, "loss": 1.4779, "step": 6567 }, { "epoch": 0.36608884677554204, "grad_norm": 0.49754953384399414, "learning_rate": 7.196140827464533e-05, "loss": 1.5101, "step": 6568 }, { "epoch": 0.3661445850287052, "grad_norm": 0.5829338431358337, "learning_rate": 7.195340857830524e-05, "loss": 1.7219, "step": 6569 }, { "epoch": 0.36620032328186836, "grad_norm": 0.5498637557029724, "learning_rate": 7.194540818573103e-05, "loss": 1.6491, "step": 6570 }, { "epoch": 0.3662560615350315, "grad_norm": 0.5562663674354553, "learning_rate": 7.193740709717643e-05, "loss": 1.7846, "step": 6571 }, { "epoch": 0.3663117997881946, "grad_norm": 0.5268211364746094, "learning_rate": 7.192940531289517e-05, "loss": 1.5521, "step": 6572 }, { "epoch": 0.3663675380413578, "grad_norm": 0.5425642132759094, "learning_rate": 7.192140283314104e-05, "loss": 1.7968, "step": 6573 }, { "epoch": 0.3664232762945209, "grad_norm": 0.5653149485588074, "learning_rate": 7.191339965816781e-05, "loss": 1.6086, "step": 6574 }, { "epoch": 0.36647901454768406, "grad_norm": 0.5728870630264282, "learning_rate": 7.190539578822932e-05, "loss": 1.8264, "step": 6575 }, { "epoch": 0.36653475280084724, "grad_norm": 0.5501007437705994, "learning_rate": 7.189739122357939e-05, "loss": 1.8426, "step": 6576 }, { "epoch": 0.36659049105401037, "grad_norm": 0.5318872332572937, "learning_rate": 7.188938596447188e-05, "loss": 1.7968, "step": 6577 }, { "epoch": 0.3666462293071735, "grad_norm": 0.5750231146812439, "learning_rate": 7.188138001116065e-05, "loss": 1.6745, "step": 6578 }, { "epoch": 0.3667019675603367, "grad_norm": 0.6171157956123352, "learning_rate": 7.187337336389966e-05, "loss": 2.0264, "step": 6579 }, { "epoch": 0.3667577058134998, "grad_norm": 0.5361387133598328, "learning_rate": 7.186536602294278e-05, "loss": 1.5105, "step": 6580 }, { "epoch": 0.36681344406666294, "grad_norm": 0.5726244449615479, "learning_rate": 7.185735798854396e-05, "loss": 1.6055, "step": 6581 }, { "epoch": 0.36686918231982607, "grad_norm": 0.5350404381752014, "learning_rate": 7.184934926095721e-05, "loss": 1.7493, "step": 6582 }, { "epoch": 0.36692492057298925, "grad_norm": 0.5755828022956848, "learning_rate": 7.184133984043646e-05, "loss": 1.6443, "step": 6583 }, { "epoch": 0.3669806588261524, "grad_norm": 0.5558964610099792, "learning_rate": 7.183332972723578e-05, "loss": 1.816, "step": 6584 }, { "epoch": 0.3670363970793155, "grad_norm": 0.5483201146125793, "learning_rate": 7.182531892160917e-05, "loss": 1.6545, "step": 6585 }, { "epoch": 0.3670921353324787, "grad_norm": 0.5599815249443054, "learning_rate": 7.18173074238107e-05, "loss": 1.634, "step": 6586 }, { "epoch": 0.3671478735856418, "grad_norm": 0.5529213547706604, "learning_rate": 7.180929523409443e-05, "loss": 1.7378, "step": 6587 }, { "epoch": 0.36720361183880496, "grad_norm": 0.5131180286407471, "learning_rate": 7.180128235271449e-05, "loss": 1.5528, "step": 6588 }, { "epoch": 0.36725935009196814, "grad_norm": 0.591602623462677, "learning_rate": 7.179326877992497e-05, "loss": 1.7482, "step": 6589 }, { "epoch": 0.36731508834513127, "grad_norm": 0.4902382791042328, "learning_rate": 7.178525451598003e-05, "loss": 1.4865, "step": 6590 }, { "epoch": 0.3673708265982944, "grad_norm": 0.5887609720230103, "learning_rate": 7.177723956113383e-05, "loss": 1.9031, "step": 6591 }, { "epoch": 0.3674265648514576, "grad_norm": 0.5403375625610352, "learning_rate": 7.176922391564056e-05, "loss": 1.6702, "step": 6592 }, { "epoch": 0.3674823031046207, "grad_norm": 0.5793707370758057, "learning_rate": 7.176120757975444e-05, "loss": 1.6571, "step": 6593 }, { "epoch": 0.36753804135778384, "grad_norm": 0.5770851373672485, "learning_rate": 7.175319055372969e-05, "loss": 1.7841, "step": 6594 }, { "epoch": 0.36759377961094697, "grad_norm": 0.5472514629364014, "learning_rate": 7.174517283782058e-05, "loss": 1.6785, "step": 6595 }, { "epoch": 0.36764951786411015, "grad_norm": 0.5961628556251526, "learning_rate": 7.173715443228133e-05, "loss": 1.6604, "step": 6596 }, { "epoch": 0.3677052561172733, "grad_norm": 0.5890954732894897, "learning_rate": 7.172913533736632e-05, "loss": 1.7003, "step": 6597 }, { "epoch": 0.3677609943704364, "grad_norm": 0.6537253260612488, "learning_rate": 7.17211155533298e-05, "loss": 1.9955, "step": 6598 }, { "epoch": 0.3678167326235996, "grad_norm": 0.5514366030693054, "learning_rate": 7.171309508042615e-05, "loss": 1.5601, "step": 6599 }, { "epoch": 0.3678724708767627, "grad_norm": 0.6790293455123901, "learning_rate": 7.170507391890972e-05, "loss": 2.1675, "step": 6600 }, { "epoch": 0.36792820912992585, "grad_norm": 0.5294934511184692, "learning_rate": 7.16970520690349e-05, "loss": 1.6509, "step": 6601 }, { "epoch": 0.36798394738308904, "grad_norm": 0.5617215037345886, "learning_rate": 7.168902953105608e-05, "loss": 1.7301, "step": 6602 }, { "epoch": 0.36803968563625217, "grad_norm": 0.5187042355537415, "learning_rate": 7.16810063052277e-05, "loss": 1.4945, "step": 6603 }, { "epoch": 0.3680954238894153, "grad_norm": 0.5646756291389465, "learning_rate": 7.16729823918042e-05, "loss": 1.8281, "step": 6604 }, { "epoch": 0.3681511621425784, "grad_norm": 0.5496782064437866, "learning_rate": 7.166495779104007e-05, "loss": 1.6996, "step": 6605 }, { "epoch": 0.3682069003957416, "grad_norm": 0.6056029796600342, "learning_rate": 7.16569325031898e-05, "loss": 1.9787, "step": 6606 }, { "epoch": 0.36826263864890474, "grad_norm": 0.5624659061431885, "learning_rate": 7.164890652850789e-05, "loss": 1.7931, "step": 6607 }, { "epoch": 0.36831837690206787, "grad_norm": 0.5342402458190918, "learning_rate": 7.16408798672489e-05, "loss": 1.664, "step": 6608 }, { "epoch": 0.36837411515523105, "grad_norm": 0.5402200818061829, "learning_rate": 7.163285251966736e-05, "loss": 1.6754, "step": 6609 }, { "epoch": 0.3684298534083942, "grad_norm": 0.5262821316719055, "learning_rate": 7.162482448601789e-05, "loss": 1.5501, "step": 6610 }, { "epoch": 0.3684855916615573, "grad_norm": 0.5371507406234741, "learning_rate": 7.161679576655503e-05, "loss": 1.6168, "step": 6611 }, { "epoch": 0.3685413299147205, "grad_norm": 0.5895312428474426, "learning_rate": 7.160876636153349e-05, "loss": 1.8576, "step": 6612 }, { "epoch": 0.3685970681678836, "grad_norm": 0.5309399962425232, "learning_rate": 7.160073627120784e-05, "loss": 1.5803, "step": 6613 }, { "epoch": 0.36865280642104675, "grad_norm": 0.564697265625, "learning_rate": 7.159270549583278e-05, "loss": 1.2999, "step": 6614 }, { "epoch": 0.36870854467420994, "grad_norm": 0.5483527183532715, "learning_rate": 7.158467403566299e-05, "loss": 1.559, "step": 6615 }, { "epoch": 0.36876428292737307, "grad_norm": 0.47662925720214844, "learning_rate": 7.15766418909532e-05, "loss": 1.2871, "step": 6616 }, { "epoch": 0.3688200211805362, "grad_norm": 0.5505543947219849, "learning_rate": 7.156860906195811e-05, "loss": 1.717, "step": 6617 }, { "epoch": 0.3688757594336993, "grad_norm": 0.5837799310684204, "learning_rate": 7.156057554893251e-05, "loss": 1.8828, "step": 6618 }, { "epoch": 0.3689314976868625, "grad_norm": 0.6020135283470154, "learning_rate": 7.155254135213117e-05, "loss": 1.6727, "step": 6619 }, { "epoch": 0.36898723594002564, "grad_norm": 0.5805865526199341, "learning_rate": 7.154450647180886e-05, "loss": 1.7273, "step": 6620 }, { "epoch": 0.36904297419318877, "grad_norm": 0.5338916182518005, "learning_rate": 7.153647090822043e-05, "loss": 1.5732, "step": 6621 }, { "epoch": 0.36909871244635195, "grad_norm": 0.5388802886009216, "learning_rate": 7.152843466162069e-05, "loss": 1.5612, "step": 6622 }, { "epoch": 0.3691544506995151, "grad_norm": 0.5497878789901733, "learning_rate": 7.152039773226456e-05, "loss": 1.6601, "step": 6623 }, { "epoch": 0.3692101889526782, "grad_norm": 0.5147888660430908, "learning_rate": 7.151236012040685e-05, "loss": 1.6467, "step": 6624 }, { "epoch": 0.3692659272058414, "grad_norm": 0.5906471014022827, "learning_rate": 7.150432182630252e-05, "loss": 1.6429, "step": 6625 }, { "epoch": 0.3693216654590045, "grad_norm": 0.5193469524383545, "learning_rate": 7.149628285020648e-05, "loss": 1.7369, "step": 6626 }, { "epoch": 0.36937740371216765, "grad_norm": 0.5903412699699402, "learning_rate": 7.148824319237367e-05, "loss": 1.7329, "step": 6627 }, { "epoch": 0.3694331419653308, "grad_norm": 0.5230131149291992, "learning_rate": 7.148020285305907e-05, "loss": 1.5495, "step": 6628 }, { "epoch": 0.36948888021849396, "grad_norm": 0.5554400086402893, "learning_rate": 7.147216183251768e-05, "loss": 1.7592, "step": 6629 }, { "epoch": 0.3695446184716571, "grad_norm": 0.4992237985134125, "learning_rate": 7.146412013100451e-05, "loss": 1.5094, "step": 6630 }, { "epoch": 0.3696003567248202, "grad_norm": 0.6239908933639526, "learning_rate": 7.14560777487746e-05, "loss": 1.9804, "step": 6631 }, { "epoch": 0.3696560949779834, "grad_norm": 0.49736112356185913, "learning_rate": 7.144803468608298e-05, "loss": 1.4165, "step": 6632 }, { "epoch": 0.36971183323114654, "grad_norm": 0.5291538834571838, "learning_rate": 7.143999094318477e-05, "loss": 1.6362, "step": 6633 }, { "epoch": 0.36976757148430967, "grad_norm": 0.5881434679031372, "learning_rate": 7.143194652033505e-05, "loss": 1.8459, "step": 6634 }, { "epoch": 0.36982330973747285, "grad_norm": 0.5663610100746155, "learning_rate": 7.142390141778895e-05, "loss": 1.655, "step": 6635 }, { "epoch": 0.369879047990636, "grad_norm": 0.6780499219894409, "learning_rate": 7.141585563580158e-05, "loss": 1.8284, "step": 6636 }, { "epoch": 0.3699347862437991, "grad_norm": 0.544389009475708, "learning_rate": 7.140780917462814e-05, "loss": 1.6024, "step": 6637 }, { "epoch": 0.3699905244969623, "grad_norm": 0.5259643197059631, "learning_rate": 7.139976203452383e-05, "loss": 1.6143, "step": 6638 }, { "epoch": 0.3700462627501254, "grad_norm": 0.5904932022094727, "learning_rate": 7.139171421574383e-05, "loss": 1.7714, "step": 6639 }, { "epoch": 0.37010200100328855, "grad_norm": 0.5398536920547485, "learning_rate": 7.138366571854338e-05, "loss": 1.5943, "step": 6640 }, { "epoch": 0.3701577392564517, "grad_norm": 0.5698688626289368, "learning_rate": 7.137561654317772e-05, "loss": 1.7892, "step": 6641 }, { "epoch": 0.37021347750961486, "grad_norm": 0.5498561859130859, "learning_rate": 7.136756668990213e-05, "loss": 1.7051, "step": 6642 }, { "epoch": 0.370269215762778, "grad_norm": 0.5418841242790222, "learning_rate": 7.13595161589719e-05, "loss": 1.6284, "step": 6643 }, { "epoch": 0.3703249540159411, "grad_norm": 0.5735422968864441, "learning_rate": 7.135146495064236e-05, "loss": 1.5837, "step": 6644 }, { "epoch": 0.3703806922691043, "grad_norm": 0.593471348285675, "learning_rate": 7.134341306516885e-05, "loss": 1.891, "step": 6645 }, { "epoch": 0.37043643052226743, "grad_norm": 0.519626796245575, "learning_rate": 7.13353605028067e-05, "loss": 1.676, "step": 6646 }, { "epoch": 0.37049216877543056, "grad_norm": 0.59029620885849, "learning_rate": 7.132730726381134e-05, "loss": 1.8638, "step": 6647 }, { "epoch": 0.37054790702859375, "grad_norm": 0.6374014019966125, "learning_rate": 7.13192533484381e-05, "loss": 2.0887, "step": 6648 }, { "epoch": 0.3706036452817569, "grad_norm": 0.5250412821769714, "learning_rate": 7.131119875694246e-05, "loss": 1.5408, "step": 6649 }, { "epoch": 0.37065938353492, "grad_norm": 0.5467897653579712, "learning_rate": 7.130314348957986e-05, "loss": 1.4246, "step": 6650 }, { "epoch": 0.37071512178808314, "grad_norm": 0.5109268426895142, "learning_rate": 7.129508754660575e-05, "loss": 1.4972, "step": 6651 }, { "epoch": 0.3707708600412463, "grad_norm": 0.5759547352790833, "learning_rate": 7.128703092827562e-05, "loss": 1.9089, "step": 6652 }, { "epoch": 0.37082659829440945, "grad_norm": 0.6243898272514343, "learning_rate": 7.127897363484497e-05, "loss": 1.9196, "step": 6653 }, { "epoch": 0.3708823365475726, "grad_norm": 0.5852481722831726, "learning_rate": 7.127091566656936e-05, "loss": 1.7842, "step": 6654 }, { "epoch": 0.37093807480073576, "grad_norm": 0.5579434037208557, "learning_rate": 7.12628570237043e-05, "loss": 1.6261, "step": 6655 }, { "epoch": 0.3709938130538989, "grad_norm": 0.5315961837768555, "learning_rate": 7.125479770650539e-05, "loss": 1.6085, "step": 6656 }, { "epoch": 0.371049551307062, "grad_norm": 0.5678053498268127, "learning_rate": 7.124673771522824e-05, "loss": 1.905, "step": 6657 }, { "epoch": 0.3711052895602252, "grad_norm": 0.5308210849761963, "learning_rate": 7.123867705012843e-05, "loss": 1.5081, "step": 6658 }, { "epoch": 0.37116102781338833, "grad_norm": 0.5750522017478943, "learning_rate": 7.123061571146161e-05, "loss": 1.6793, "step": 6659 }, { "epoch": 0.37121676606655146, "grad_norm": 0.5785144567489624, "learning_rate": 7.122255369948346e-05, "loss": 1.6402, "step": 6660 }, { "epoch": 0.37127250431971465, "grad_norm": 0.5107117891311646, "learning_rate": 7.121449101444964e-05, "loss": 1.6232, "step": 6661 }, { "epoch": 0.3713282425728778, "grad_norm": 0.5365573763847351, "learning_rate": 7.120642765661584e-05, "loss": 1.7163, "step": 6662 }, { "epoch": 0.3713839808260409, "grad_norm": 0.5924217104911804, "learning_rate": 7.119836362623781e-05, "loss": 1.9706, "step": 6663 }, { "epoch": 0.37143971907920403, "grad_norm": 0.5683318972587585, "learning_rate": 7.119029892357128e-05, "loss": 1.9116, "step": 6664 }, { "epoch": 0.3714954573323672, "grad_norm": 0.524502694606781, "learning_rate": 7.118223354887201e-05, "loss": 1.5862, "step": 6665 }, { "epoch": 0.37155119558553035, "grad_norm": 0.5245027542114258, "learning_rate": 7.11741675023958e-05, "loss": 1.5945, "step": 6666 }, { "epoch": 0.3716069338386935, "grad_norm": 0.5658608675003052, "learning_rate": 7.116610078439845e-05, "loss": 1.5802, "step": 6667 }, { "epoch": 0.37166267209185666, "grad_norm": 0.5938420295715332, "learning_rate": 7.115803339513578e-05, "loss": 2.005, "step": 6668 }, { "epoch": 0.3717184103450198, "grad_norm": 0.5456317663192749, "learning_rate": 7.114996533486366e-05, "loss": 1.5013, "step": 6669 }, { "epoch": 0.3717741485981829, "grad_norm": 0.5922924280166626, "learning_rate": 7.114189660383794e-05, "loss": 2.0418, "step": 6670 }, { "epoch": 0.3718298868513461, "grad_norm": 0.5821951627731323, "learning_rate": 7.113382720231454e-05, "loss": 1.7955, "step": 6671 }, { "epoch": 0.37188562510450923, "grad_norm": 0.5134814381599426, "learning_rate": 7.112575713054936e-05, "loss": 1.4315, "step": 6672 }, { "epoch": 0.37194136335767236, "grad_norm": 0.5751433968544006, "learning_rate": 7.111768638879833e-05, "loss": 1.566, "step": 6673 }, { "epoch": 0.3719971016108355, "grad_norm": 0.5614348649978638, "learning_rate": 7.110961497731742e-05, "loss": 1.8572, "step": 6674 }, { "epoch": 0.3720528398639987, "grad_norm": 0.5680375099182129, "learning_rate": 7.110154289636259e-05, "loss": 2.0372, "step": 6675 }, { "epoch": 0.3721085781171618, "grad_norm": 0.5367892980575562, "learning_rate": 7.109347014618985e-05, "loss": 1.6665, "step": 6676 }, { "epoch": 0.37216431637032493, "grad_norm": 0.563017725944519, "learning_rate": 7.108539672705523e-05, "loss": 1.747, "step": 6677 }, { "epoch": 0.3722200546234881, "grad_norm": 0.5716055631637573, "learning_rate": 7.107732263921475e-05, "loss": 1.4182, "step": 6678 }, { "epoch": 0.37227579287665125, "grad_norm": 0.514310896396637, "learning_rate": 7.106924788292448e-05, "loss": 1.6223, "step": 6679 }, { "epoch": 0.3723315311298144, "grad_norm": 0.5039160251617432, "learning_rate": 7.106117245844054e-05, "loss": 1.5979, "step": 6680 }, { "epoch": 0.37238726938297756, "grad_norm": 0.5815281867980957, "learning_rate": 7.105309636601898e-05, "loss": 1.9983, "step": 6681 }, { "epoch": 0.3724430076361407, "grad_norm": 0.5450384616851807, "learning_rate": 7.104501960591595e-05, "loss": 1.5488, "step": 6682 }, { "epoch": 0.3724987458893038, "grad_norm": 0.5386560559272766, "learning_rate": 7.103694217838761e-05, "loss": 1.6376, "step": 6683 }, { "epoch": 0.372554484142467, "grad_norm": 0.5220578908920288, "learning_rate": 7.102886408369012e-05, "loss": 1.4654, "step": 6684 }, { "epoch": 0.37261022239563013, "grad_norm": 0.5630038976669312, "learning_rate": 7.102078532207966e-05, "loss": 1.7554, "step": 6685 }, { "epoch": 0.37266596064879326, "grad_norm": 0.5405006408691406, "learning_rate": 7.101270589381245e-05, "loss": 1.8247, "step": 6686 }, { "epoch": 0.3727216989019564, "grad_norm": 0.5460960865020752, "learning_rate": 7.100462579914474e-05, "loss": 1.7902, "step": 6687 }, { "epoch": 0.3727774371551196, "grad_norm": 0.5519078969955444, "learning_rate": 7.099654503833273e-05, "loss": 1.7138, "step": 6688 }, { "epoch": 0.3728331754082827, "grad_norm": 0.5574856400489807, "learning_rate": 7.098846361163273e-05, "loss": 1.6607, "step": 6689 }, { "epoch": 0.37288891366144583, "grad_norm": 0.5525651574134827, "learning_rate": 7.098038151930107e-05, "loss": 1.8834, "step": 6690 }, { "epoch": 0.372944651914609, "grad_norm": 0.5278156399726868, "learning_rate": 7.097229876159401e-05, "loss": 1.67, "step": 6691 }, { "epoch": 0.37300039016777214, "grad_norm": 0.5362699627876282, "learning_rate": 7.096421533876792e-05, "loss": 1.6881, "step": 6692 }, { "epoch": 0.3730561284209353, "grad_norm": 0.522748589515686, "learning_rate": 7.095613125107915e-05, "loss": 1.6077, "step": 6693 }, { "epoch": 0.37311186667409846, "grad_norm": 0.5335802435874939, "learning_rate": 7.094804649878407e-05, "loss": 1.6124, "step": 6694 }, { "epoch": 0.3731676049272616, "grad_norm": 0.5322664976119995, "learning_rate": 7.093996108213909e-05, "loss": 1.735, "step": 6695 }, { "epoch": 0.3732233431804247, "grad_norm": 0.5863260626792908, "learning_rate": 7.093187500140064e-05, "loss": 1.9465, "step": 6696 }, { "epoch": 0.37327908143358784, "grad_norm": 0.5546720623970032, "learning_rate": 7.092378825682517e-05, "loss": 1.6817, "step": 6697 }, { "epoch": 0.37333481968675103, "grad_norm": 0.5397077798843384, "learning_rate": 7.091570084866909e-05, "loss": 1.7072, "step": 6698 }, { "epoch": 0.37339055793991416, "grad_norm": 0.5567345023155212, "learning_rate": 7.090761277718897e-05, "loss": 1.7315, "step": 6699 }, { "epoch": 0.3734462961930773, "grad_norm": 0.5560916662216187, "learning_rate": 7.089952404264126e-05, "loss": 1.5599, "step": 6700 }, { "epoch": 0.37350203444624047, "grad_norm": 0.5497678518295288, "learning_rate": 7.089143464528249e-05, "loss": 1.6328, "step": 6701 }, { "epoch": 0.3735577726994036, "grad_norm": 0.5806947946548462, "learning_rate": 7.088334458536921e-05, "loss": 1.8025, "step": 6702 }, { "epoch": 0.37361351095256673, "grad_norm": 0.6178561449050903, "learning_rate": 7.087525386315802e-05, "loss": 1.6715, "step": 6703 }, { "epoch": 0.3736692492057299, "grad_norm": 0.5702304244041443, "learning_rate": 7.086716247890548e-05, "loss": 1.7321, "step": 6704 }, { "epoch": 0.37372498745889304, "grad_norm": 0.5194035172462463, "learning_rate": 7.08590704328682e-05, "loss": 1.5648, "step": 6705 }, { "epoch": 0.37378072571205617, "grad_norm": 0.5901757478713989, "learning_rate": 7.085097772530283e-05, "loss": 1.9348, "step": 6706 }, { "epoch": 0.37383646396521936, "grad_norm": 0.7031030654907227, "learning_rate": 7.084288435646603e-05, "loss": 1.5634, "step": 6707 }, { "epoch": 0.3738922022183825, "grad_norm": 0.5556403398513794, "learning_rate": 7.083479032661445e-05, "loss": 1.6525, "step": 6708 }, { "epoch": 0.3739479404715456, "grad_norm": 0.5691899061203003, "learning_rate": 7.082669563600478e-05, "loss": 1.885, "step": 6709 }, { "epoch": 0.37400367872470874, "grad_norm": 0.5547059774398804, "learning_rate": 7.081860028489377e-05, "loss": 1.8645, "step": 6710 }, { "epoch": 0.37405941697787193, "grad_norm": 0.5635570287704468, "learning_rate": 7.081050427353814e-05, "loss": 1.8752, "step": 6711 }, { "epoch": 0.37411515523103506, "grad_norm": 0.5423487424850464, "learning_rate": 7.080240760219465e-05, "loss": 1.5953, "step": 6712 }, { "epoch": 0.3741708934841982, "grad_norm": 0.5141568183898926, "learning_rate": 7.079431027112006e-05, "loss": 1.4812, "step": 6713 }, { "epoch": 0.37422663173736137, "grad_norm": 0.5988462567329407, "learning_rate": 7.078621228057121e-05, "loss": 1.8588, "step": 6714 }, { "epoch": 0.3742823699905245, "grad_norm": 0.5320055484771729, "learning_rate": 7.077811363080489e-05, "loss": 1.745, "step": 6715 }, { "epoch": 0.37433810824368763, "grad_norm": 0.5388814806938171, "learning_rate": 7.077001432207795e-05, "loss": 1.511, "step": 6716 }, { "epoch": 0.3743938464968508, "grad_norm": 0.537324070930481, "learning_rate": 7.076191435464725e-05, "loss": 1.6644, "step": 6717 }, { "epoch": 0.37444958475001394, "grad_norm": 0.533687174320221, "learning_rate": 7.075381372876967e-05, "loss": 1.73, "step": 6718 }, { "epoch": 0.37450532300317707, "grad_norm": 0.5057275295257568, "learning_rate": 7.074571244470214e-05, "loss": 1.6284, "step": 6719 }, { "epoch": 0.3745610612563402, "grad_norm": 0.6067156195640564, "learning_rate": 7.073761050270156e-05, "loss": 1.84, "step": 6720 }, { "epoch": 0.3746167995095034, "grad_norm": 0.5253334641456604, "learning_rate": 7.072950790302487e-05, "loss": 1.4598, "step": 6721 }, { "epoch": 0.3746725377626665, "grad_norm": 0.521193265914917, "learning_rate": 7.072140464592907e-05, "loss": 1.5442, "step": 6722 }, { "epoch": 0.37472827601582964, "grad_norm": 0.5262565612792969, "learning_rate": 7.071330073167112e-05, "loss": 1.6898, "step": 6723 }, { "epoch": 0.3747840142689928, "grad_norm": 0.6259338855743408, "learning_rate": 7.070519616050804e-05, "loss": 1.731, "step": 6724 }, { "epoch": 0.37483975252215596, "grad_norm": 0.5520288348197937, "learning_rate": 7.069709093269687e-05, "loss": 1.796, "step": 6725 }, { "epoch": 0.3748954907753191, "grad_norm": 0.5660863518714905, "learning_rate": 7.068898504849462e-05, "loss": 1.656, "step": 6726 }, { "epoch": 0.37495122902848227, "grad_norm": 0.5522897839546204, "learning_rate": 7.06808785081584e-05, "loss": 1.6656, "step": 6727 }, { "epoch": 0.3750069672816454, "grad_norm": 0.6100639700889587, "learning_rate": 7.067277131194529e-05, "loss": 1.7658, "step": 6728 }, { "epoch": 0.3750627055348085, "grad_norm": 0.5829086899757385, "learning_rate": 7.066466346011242e-05, "loss": 1.6342, "step": 6729 }, { "epoch": 0.3751184437879717, "grad_norm": 0.6315231323242188, "learning_rate": 7.06565549529169e-05, "loss": 1.7829, "step": 6730 }, { "epoch": 0.37517418204113484, "grad_norm": 0.6006489992141724, "learning_rate": 7.064844579061588e-05, "loss": 1.8819, "step": 6731 }, { "epoch": 0.37522992029429797, "grad_norm": 0.5952304005622864, "learning_rate": 7.064033597346658e-05, "loss": 1.6654, "step": 6732 }, { "epoch": 0.3752856585474611, "grad_norm": 0.5768652558326721, "learning_rate": 7.063222550172612e-05, "loss": 1.6577, "step": 6733 }, { "epoch": 0.3753413968006243, "grad_norm": 0.5706788301467896, "learning_rate": 7.062411437565179e-05, "loss": 1.7532, "step": 6734 }, { "epoch": 0.3753971350537874, "grad_norm": 0.6298890113830566, "learning_rate": 7.06160025955008e-05, "loss": 1.7744, "step": 6735 }, { "epoch": 0.37545287330695054, "grad_norm": 0.5873239636421204, "learning_rate": 7.06078901615304e-05, "loss": 1.9847, "step": 6736 }, { "epoch": 0.3755086115601137, "grad_norm": 0.5103023648262024, "learning_rate": 7.059977707399787e-05, "loss": 1.4559, "step": 6737 }, { "epoch": 0.37556434981327685, "grad_norm": 0.521653950214386, "learning_rate": 7.059166333316054e-05, "loss": 1.6796, "step": 6738 }, { "epoch": 0.37562008806644, "grad_norm": 0.5209727883338928, "learning_rate": 7.058354893927568e-05, "loss": 1.5015, "step": 6739 }, { "epoch": 0.37567582631960317, "grad_norm": 0.6425443887710571, "learning_rate": 7.057543389260068e-05, "loss": 1.8178, "step": 6740 }, { "epoch": 0.3757315645727663, "grad_norm": 0.5647505521774292, "learning_rate": 7.056731819339286e-05, "loss": 1.7513, "step": 6741 }, { "epoch": 0.3757873028259294, "grad_norm": 0.5992183089256287, "learning_rate": 7.055920184190964e-05, "loss": 1.6351, "step": 6742 }, { "epoch": 0.37584304107909255, "grad_norm": 0.5495748519897461, "learning_rate": 7.055108483840839e-05, "loss": 1.6854, "step": 6743 }, { "epoch": 0.37589877933225574, "grad_norm": 0.5780972242355347, "learning_rate": 7.054296718314656e-05, "loss": 1.7937, "step": 6744 }, { "epoch": 0.37595451758541887, "grad_norm": 0.5518954992294312, "learning_rate": 7.053484887638158e-05, "loss": 1.6708, "step": 6745 }, { "epoch": 0.376010255838582, "grad_norm": 0.5211352109909058, "learning_rate": 7.052672991837093e-05, "loss": 1.6565, "step": 6746 }, { "epoch": 0.3760659940917452, "grad_norm": 0.5192275643348694, "learning_rate": 7.051861030937207e-05, "loss": 1.5376, "step": 6747 }, { "epoch": 0.3761217323449083, "grad_norm": 0.5492019057273865, "learning_rate": 7.051049004964254e-05, "loss": 1.7518, "step": 6748 }, { "epoch": 0.37617747059807144, "grad_norm": 0.5412474274635315, "learning_rate": 7.050236913943984e-05, "loss": 1.5384, "step": 6749 }, { "epoch": 0.3762332088512346, "grad_norm": 0.5172974467277527, "learning_rate": 7.049424757902153e-05, "loss": 1.6072, "step": 6750 }, { "epoch": 0.37628894710439775, "grad_norm": 0.5415205955505371, "learning_rate": 7.048612536864517e-05, "loss": 1.8342, "step": 6751 }, { "epoch": 0.3763446853575609, "grad_norm": 0.5428817868232727, "learning_rate": 7.047800250856837e-05, "loss": 1.6988, "step": 6752 }, { "epoch": 0.37640042361072407, "grad_norm": 0.5195114612579346, "learning_rate": 7.046987899904871e-05, "loss": 1.4647, "step": 6753 }, { "epoch": 0.3764561618638872, "grad_norm": 0.5440792441368103, "learning_rate": 7.046175484034384e-05, "loss": 1.5224, "step": 6754 }, { "epoch": 0.3765119001170503, "grad_norm": 0.5353301763534546, "learning_rate": 7.045363003271141e-05, "loss": 1.664, "step": 6755 }, { "epoch": 0.37656763837021345, "grad_norm": 0.5722842812538147, "learning_rate": 7.044550457640909e-05, "loss": 1.6422, "step": 6756 }, { "epoch": 0.37662337662337664, "grad_norm": 0.5732778906822205, "learning_rate": 7.043737847169455e-05, "loss": 2.0161, "step": 6757 }, { "epoch": 0.37667911487653977, "grad_norm": 0.5180158019065857, "learning_rate": 7.042925171882557e-05, "loss": 1.6388, "step": 6758 }, { "epoch": 0.3767348531297029, "grad_norm": 0.530694305896759, "learning_rate": 7.042112431805979e-05, "loss": 1.546, "step": 6759 }, { "epoch": 0.3767905913828661, "grad_norm": 0.5620813965797424, "learning_rate": 7.041299626965503e-05, "loss": 1.6727, "step": 6760 }, { "epoch": 0.3768463296360292, "grad_norm": 0.5627542734146118, "learning_rate": 7.040486757386904e-05, "loss": 1.7527, "step": 6761 }, { "epoch": 0.37690206788919234, "grad_norm": 0.588291347026825, "learning_rate": 7.039673823095963e-05, "loss": 1.9415, "step": 6762 }, { "epoch": 0.3769578061423555, "grad_norm": 0.5307551026344299, "learning_rate": 7.03886082411846e-05, "loss": 1.6316, "step": 6763 }, { "epoch": 0.37701354439551865, "grad_norm": 0.5484150648117065, "learning_rate": 7.038047760480179e-05, "loss": 1.6363, "step": 6764 }, { "epoch": 0.3770692826486818, "grad_norm": 0.5301684737205505, "learning_rate": 7.037234632206905e-05, "loss": 1.7243, "step": 6765 }, { "epoch": 0.3771250209018449, "grad_norm": 0.5907619595527649, "learning_rate": 7.036421439324427e-05, "loss": 1.6807, "step": 6766 }, { "epoch": 0.3771807591550081, "grad_norm": 0.5170425772666931, "learning_rate": 7.035608181858533e-05, "loss": 1.6641, "step": 6767 }, { "epoch": 0.3772364974081712, "grad_norm": 0.5344756245613098, "learning_rate": 7.034794859835016e-05, "loss": 1.8226, "step": 6768 }, { "epoch": 0.37729223566133435, "grad_norm": 0.5386238098144531, "learning_rate": 7.033981473279672e-05, "loss": 1.6291, "step": 6769 }, { "epoch": 0.37734797391449754, "grad_norm": 0.5417985916137695, "learning_rate": 7.033168022218292e-05, "loss": 1.5797, "step": 6770 }, { "epoch": 0.37740371216766067, "grad_norm": 0.5583431124687195, "learning_rate": 7.032354506676678e-05, "loss": 1.7165, "step": 6771 }, { "epoch": 0.3774594504208238, "grad_norm": 0.5974751114845276, "learning_rate": 7.031540926680627e-05, "loss": 1.9454, "step": 6772 }, { "epoch": 0.377515188673987, "grad_norm": 0.5629299283027649, "learning_rate": 7.030727282255944e-05, "loss": 1.9527, "step": 6773 }, { "epoch": 0.3775709269271501, "grad_norm": 0.49648937582969666, "learning_rate": 7.02991357342843e-05, "loss": 1.4055, "step": 6774 }, { "epoch": 0.37762666518031324, "grad_norm": 0.5776923298835754, "learning_rate": 7.029099800223895e-05, "loss": 1.5683, "step": 6775 }, { "epoch": 0.3776824034334764, "grad_norm": 0.5667086839675903, "learning_rate": 7.028285962668144e-05, "loss": 1.6576, "step": 6776 }, { "epoch": 0.37773814168663955, "grad_norm": 0.51173996925354, "learning_rate": 7.027472060786988e-05, "loss": 1.6046, "step": 6777 }, { "epoch": 0.3777938799398027, "grad_norm": 0.6762179732322693, "learning_rate": 7.026658094606238e-05, "loss": 1.8251, "step": 6778 }, { "epoch": 0.3778496181929658, "grad_norm": 0.6333464980125427, "learning_rate": 7.02584406415171e-05, "loss": 1.9974, "step": 6779 }, { "epoch": 0.377905356446129, "grad_norm": 0.5379152297973633, "learning_rate": 7.02502996944922e-05, "loss": 1.5211, "step": 6780 }, { "epoch": 0.3779610946992921, "grad_norm": 0.5208351016044617, "learning_rate": 7.024215810524586e-05, "loss": 1.7317, "step": 6781 }, { "epoch": 0.37801683295245525, "grad_norm": 0.5434418320655823, "learning_rate": 7.023401587403629e-05, "loss": 1.6749, "step": 6782 }, { "epoch": 0.37807257120561844, "grad_norm": 0.5639735460281372, "learning_rate": 7.022587300112171e-05, "loss": 1.7105, "step": 6783 }, { "epoch": 0.37812830945878156, "grad_norm": 0.600032389163971, "learning_rate": 7.021772948676037e-05, "loss": 1.8057, "step": 6784 }, { "epoch": 0.3781840477119447, "grad_norm": 0.5152847766876221, "learning_rate": 7.020958533121051e-05, "loss": 1.6275, "step": 6785 }, { "epoch": 0.3782397859651079, "grad_norm": 0.5553915500640869, "learning_rate": 7.020144053473044e-05, "loss": 1.786, "step": 6786 }, { "epoch": 0.378295524218271, "grad_norm": 0.5452811121940613, "learning_rate": 7.019329509757845e-05, "loss": 1.6452, "step": 6787 }, { "epoch": 0.37835126247143414, "grad_norm": 0.5100104212760925, "learning_rate": 7.01851490200129e-05, "loss": 1.5128, "step": 6788 }, { "epoch": 0.37840700072459726, "grad_norm": 0.6309191584587097, "learning_rate": 7.017700230229208e-05, "loss": 1.4683, "step": 6789 }, { "epoch": 0.37846273897776045, "grad_norm": 0.5344750881195068, "learning_rate": 7.01688549446744e-05, "loss": 1.6131, "step": 6790 }, { "epoch": 0.3785184772309236, "grad_norm": 0.5286291837692261, "learning_rate": 7.016070694741824e-05, "loss": 1.6499, "step": 6791 }, { "epoch": 0.3785742154840867, "grad_norm": 0.5597365498542786, "learning_rate": 7.015255831078201e-05, "loss": 1.6677, "step": 6792 }, { "epoch": 0.3786299537372499, "grad_norm": 0.5482022166252136, "learning_rate": 7.01444090350241e-05, "loss": 1.6498, "step": 6793 }, { "epoch": 0.378685691990413, "grad_norm": 0.6198036670684814, "learning_rate": 7.0136259120403e-05, "loss": 1.8393, "step": 6794 }, { "epoch": 0.37874143024357615, "grad_norm": 0.555736243724823, "learning_rate": 7.012810856717717e-05, "loss": 1.5817, "step": 6795 }, { "epoch": 0.37879716849673933, "grad_norm": 0.5894885659217834, "learning_rate": 7.011995737560507e-05, "loss": 1.736, "step": 6796 }, { "epoch": 0.37885290674990246, "grad_norm": 0.5784539580345154, "learning_rate": 7.011180554594525e-05, "loss": 1.7195, "step": 6797 }, { "epoch": 0.3789086450030656, "grad_norm": 0.5761838555335999, "learning_rate": 7.010365307845621e-05, "loss": 1.5784, "step": 6798 }, { "epoch": 0.3789643832562288, "grad_norm": 0.5359389185905457, "learning_rate": 7.00954999733965e-05, "loss": 1.4703, "step": 6799 }, { "epoch": 0.3790201215093919, "grad_norm": 0.5606504678726196, "learning_rate": 7.008734623102471e-05, "loss": 1.7026, "step": 6800 }, { "epoch": 0.37907585976255503, "grad_norm": 0.5452861785888672, "learning_rate": 7.007919185159942e-05, "loss": 1.6358, "step": 6801 }, { "epoch": 0.37913159801571816, "grad_norm": 0.533334493637085, "learning_rate": 7.007103683537922e-05, "loss": 1.5224, "step": 6802 }, { "epoch": 0.37918733626888135, "grad_norm": 0.5216323137283325, "learning_rate": 7.006288118262277e-05, "loss": 1.5611, "step": 6803 }, { "epoch": 0.3792430745220445, "grad_norm": 0.6083248853683472, "learning_rate": 7.005472489358868e-05, "loss": 1.9112, "step": 6804 }, { "epoch": 0.3792988127752076, "grad_norm": 0.5337701439857483, "learning_rate": 7.004656796853565e-05, "loss": 1.678, "step": 6805 }, { "epoch": 0.3793545510283708, "grad_norm": 0.5296239256858826, "learning_rate": 7.003841040772237e-05, "loss": 1.6372, "step": 6806 }, { "epoch": 0.3794102892815339, "grad_norm": 0.5512758493423462, "learning_rate": 7.003025221140754e-05, "loss": 1.7838, "step": 6807 }, { "epoch": 0.37946602753469705, "grad_norm": 0.5666672587394714, "learning_rate": 7.00220933798499e-05, "loss": 1.8518, "step": 6808 }, { "epoch": 0.37952176578786023, "grad_norm": 0.5516249537467957, "learning_rate": 7.001393391330819e-05, "loss": 1.4928, "step": 6809 }, { "epoch": 0.37957750404102336, "grad_norm": 0.5139819979667664, "learning_rate": 7.000577381204118e-05, "loss": 1.5464, "step": 6810 }, { "epoch": 0.3796332422941865, "grad_norm": 0.5297854542732239, "learning_rate": 6.999761307630767e-05, "loss": 1.4929, "step": 6811 }, { "epoch": 0.3796889805473496, "grad_norm": 0.5862724184989929, "learning_rate": 6.998945170636647e-05, "loss": 1.8435, "step": 6812 }, { "epoch": 0.3797447188005128, "grad_norm": 0.5517110228538513, "learning_rate": 6.998128970247641e-05, "loss": 1.5962, "step": 6813 }, { "epoch": 0.37980045705367593, "grad_norm": 0.5306249260902405, "learning_rate": 6.997312706489634e-05, "loss": 1.4978, "step": 6814 }, { "epoch": 0.37985619530683906, "grad_norm": 0.5715779662132263, "learning_rate": 6.996496379388512e-05, "loss": 1.7663, "step": 6815 }, { "epoch": 0.37991193356000225, "grad_norm": 0.5692317485809326, "learning_rate": 6.995679988970167e-05, "loss": 1.9011, "step": 6816 }, { "epoch": 0.3799676718131654, "grad_norm": 0.5604211091995239, "learning_rate": 6.994863535260488e-05, "loss": 1.5928, "step": 6817 }, { "epoch": 0.3800234100663285, "grad_norm": 0.5591232776641846, "learning_rate": 6.994047018285368e-05, "loss": 1.6347, "step": 6818 }, { "epoch": 0.3800791483194917, "grad_norm": 0.515835702419281, "learning_rate": 6.993230438070702e-05, "loss": 1.4441, "step": 6819 }, { "epoch": 0.3801348865726548, "grad_norm": 0.5194911360740662, "learning_rate": 6.99241379464239e-05, "loss": 1.6603, "step": 6820 }, { "epoch": 0.38019062482581795, "grad_norm": 0.49259036779403687, "learning_rate": 6.991597088026327e-05, "loss": 1.5785, "step": 6821 }, { "epoch": 0.38024636307898113, "grad_norm": 0.5865880846977234, "learning_rate": 6.990780318248416e-05, "loss": 1.7017, "step": 6822 }, { "epoch": 0.38030210133214426, "grad_norm": 0.532753050327301, "learning_rate": 6.989963485334562e-05, "loss": 1.7205, "step": 6823 }, { "epoch": 0.3803578395853074, "grad_norm": 0.6024113297462463, "learning_rate": 6.989146589310667e-05, "loss": 1.8499, "step": 6824 }, { "epoch": 0.3804135778384705, "grad_norm": 0.5912168622016907, "learning_rate": 6.988329630202641e-05, "loss": 1.7783, "step": 6825 }, { "epoch": 0.3804693160916337, "grad_norm": 0.5647505521774292, "learning_rate": 6.98751260803639e-05, "loss": 1.6106, "step": 6826 }, { "epoch": 0.38052505434479683, "grad_norm": 0.5149972438812256, "learning_rate": 6.98669552283783e-05, "loss": 1.652, "step": 6827 }, { "epoch": 0.38058079259795996, "grad_norm": 0.5642407536506653, "learning_rate": 6.98587837463287e-05, "loss": 1.6075, "step": 6828 }, { "epoch": 0.38063653085112314, "grad_norm": 0.6054338812828064, "learning_rate": 6.985061163447426e-05, "loss": 1.7205, "step": 6829 }, { "epoch": 0.3806922691042863, "grad_norm": 0.5490162372589111, "learning_rate": 6.984243889307415e-05, "loss": 1.605, "step": 6830 }, { "epoch": 0.3807480073574494, "grad_norm": 0.5481693744659424, "learning_rate": 6.983426552238756e-05, "loss": 1.6532, "step": 6831 }, { "epoch": 0.3808037456106126, "grad_norm": 0.5470540523529053, "learning_rate": 6.982609152267374e-05, "loss": 1.856, "step": 6832 }, { "epoch": 0.3808594838637757, "grad_norm": 0.5047014355659485, "learning_rate": 6.981791689419186e-05, "loss": 1.5632, "step": 6833 }, { "epoch": 0.38091522211693885, "grad_norm": 0.5213363766670227, "learning_rate": 6.980974163720123e-05, "loss": 1.648, "step": 6834 }, { "epoch": 0.380970960370102, "grad_norm": 0.5108797550201416, "learning_rate": 6.980156575196107e-05, "loss": 1.7048, "step": 6835 }, { "epoch": 0.38102669862326516, "grad_norm": 0.5571927428245544, "learning_rate": 6.979338923873073e-05, "loss": 1.7984, "step": 6836 }, { "epoch": 0.3810824368764283, "grad_norm": 0.5656031966209412, "learning_rate": 6.978521209776945e-05, "loss": 1.6214, "step": 6837 }, { "epoch": 0.3811381751295914, "grad_norm": 0.5520498752593994, "learning_rate": 6.977703432933661e-05, "loss": 1.5048, "step": 6838 }, { "epoch": 0.3811939133827546, "grad_norm": 0.5377273559570312, "learning_rate": 6.976885593369155e-05, "loss": 1.4111, "step": 6839 }, { "epoch": 0.38124965163591773, "grad_norm": 0.5396257042884827, "learning_rate": 6.976067691109365e-05, "loss": 1.6715, "step": 6840 }, { "epoch": 0.38130538988908086, "grad_norm": 0.5259842872619629, "learning_rate": 6.975249726180227e-05, "loss": 1.586, "step": 6841 }, { "epoch": 0.38136112814224404, "grad_norm": 0.5793870091438293, "learning_rate": 6.974431698607686e-05, "loss": 1.8532, "step": 6842 }, { "epoch": 0.3814168663954072, "grad_norm": 0.6075243353843689, "learning_rate": 6.973613608417683e-05, "loss": 1.8658, "step": 6843 }, { "epoch": 0.3814726046485703, "grad_norm": 0.5244048833847046, "learning_rate": 6.972795455636163e-05, "loss": 1.5298, "step": 6844 }, { "epoch": 0.3815283429017335, "grad_norm": 0.5625903010368347, "learning_rate": 6.971977240289073e-05, "loss": 1.7494, "step": 6845 }, { "epoch": 0.3815840811548966, "grad_norm": 0.5776612758636475, "learning_rate": 6.971158962402362e-05, "loss": 1.9495, "step": 6846 }, { "epoch": 0.38163981940805974, "grad_norm": 0.5811514258384705, "learning_rate": 6.970340622001983e-05, "loss": 1.6167, "step": 6847 }, { "epoch": 0.3816955576612229, "grad_norm": 0.5879440307617188, "learning_rate": 6.969522219113886e-05, "loss": 1.7636, "step": 6848 }, { "epoch": 0.38175129591438606, "grad_norm": 0.6386079788208008, "learning_rate": 6.968703753764027e-05, "loss": 1.779, "step": 6849 }, { "epoch": 0.3818070341675492, "grad_norm": 0.5324746966362, "learning_rate": 6.967885225978365e-05, "loss": 1.5693, "step": 6850 }, { "epoch": 0.3818627724207123, "grad_norm": 0.6155705451965332, "learning_rate": 6.967066635782855e-05, "loss": 1.8075, "step": 6851 }, { "epoch": 0.3819185106738755, "grad_norm": 0.5880451202392578, "learning_rate": 6.966247983203462e-05, "loss": 1.8192, "step": 6852 }, { "epoch": 0.38197424892703863, "grad_norm": 0.5279741287231445, "learning_rate": 6.965429268266147e-05, "loss": 1.5787, "step": 6853 }, { "epoch": 0.38202998718020176, "grad_norm": 0.5816035270690918, "learning_rate": 6.964610490996874e-05, "loss": 1.7935, "step": 6854 }, { "epoch": 0.38208572543336494, "grad_norm": 0.5708805918693542, "learning_rate": 6.963791651421612e-05, "loss": 1.6204, "step": 6855 }, { "epoch": 0.38214146368652807, "grad_norm": 0.5362871885299683, "learning_rate": 6.962972749566326e-05, "loss": 1.6198, "step": 6856 }, { "epoch": 0.3821972019396912, "grad_norm": 0.5008870363235474, "learning_rate": 6.962153785456991e-05, "loss": 1.3949, "step": 6857 }, { "epoch": 0.38225294019285433, "grad_norm": 0.5772041082382202, "learning_rate": 6.961334759119577e-05, "loss": 1.7137, "step": 6858 }, { "epoch": 0.3823086784460175, "grad_norm": 0.5443426966667175, "learning_rate": 6.960515670580061e-05, "loss": 1.809, "step": 6859 }, { "epoch": 0.38236441669918064, "grad_norm": 0.6082087755203247, "learning_rate": 6.959696519864418e-05, "loss": 1.8777, "step": 6860 }, { "epoch": 0.38242015495234377, "grad_norm": 0.5430213809013367, "learning_rate": 6.958877306998627e-05, "loss": 1.7168, "step": 6861 }, { "epoch": 0.38247589320550696, "grad_norm": 0.5611394047737122, "learning_rate": 6.95805803200867e-05, "loss": 1.7136, "step": 6862 }, { "epoch": 0.3825316314586701, "grad_norm": 0.5467121005058289, "learning_rate": 6.957238694920527e-05, "loss": 1.7348, "step": 6863 }, { "epoch": 0.3825873697118332, "grad_norm": 0.5907519459724426, "learning_rate": 6.956419295760184e-05, "loss": 1.8087, "step": 6864 }, { "epoch": 0.3826431079649964, "grad_norm": 0.4940342307090759, "learning_rate": 6.95559983455363e-05, "loss": 1.226, "step": 6865 }, { "epoch": 0.3826988462181595, "grad_norm": 0.525205135345459, "learning_rate": 6.954780311326849e-05, "loss": 1.6166, "step": 6866 }, { "epoch": 0.38275458447132266, "grad_norm": 0.5510271191596985, "learning_rate": 6.953960726105835e-05, "loss": 1.6143, "step": 6867 }, { "epoch": 0.38281032272448584, "grad_norm": 0.5778586268424988, "learning_rate": 6.953141078916578e-05, "loss": 1.8417, "step": 6868 }, { "epoch": 0.38286606097764897, "grad_norm": 0.5931724309921265, "learning_rate": 6.952321369785075e-05, "loss": 1.6908, "step": 6869 }, { "epoch": 0.3829217992308121, "grad_norm": 0.5995519161224365, "learning_rate": 6.951501598737318e-05, "loss": 1.9328, "step": 6870 }, { "epoch": 0.38297753748397523, "grad_norm": 0.5441159009933472, "learning_rate": 6.95068176579931e-05, "loss": 1.7226, "step": 6871 }, { "epoch": 0.3830332757371384, "grad_norm": 0.5795645117759705, "learning_rate": 6.94986187099705e-05, "loss": 1.8162, "step": 6872 }, { "epoch": 0.38308901399030154, "grad_norm": 0.5668213367462158, "learning_rate": 6.949041914356541e-05, "loss": 1.5981, "step": 6873 }, { "epoch": 0.38314475224346467, "grad_norm": 0.6034721732139587, "learning_rate": 6.948221895903784e-05, "loss": 1.688, "step": 6874 }, { "epoch": 0.38320049049662785, "grad_norm": 0.5386607050895691, "learning_rate": 6.94740181566479e-05, "loss": 1.6411, "step": 6875 }, { "epoch": 0.383256228749791, "grad_norm": 0.5482555627822876, "learning_rate": 6.946581673665561e-05, "loss": 1.3411, "step": 6876 }, { "epoch": 0.3833119670029541, "grad_norm": 0.5288286805152893, "learning_rate": 6.945761469932114e-05, "loss": 1.5896, "step": 6877 }, { "epoch": 0.3833677052561173, "grad_norm": 0.5721820592880249, "learning_rate": 6.944941204490456e-05, "loss": 1.7555, "step": 6878 }, { "epoch": 0.3834234435092804, "grad_norm": 0.5338029861450195, "learning_rate": 6.944120877366604e-05, "loss": 1.8117, "step": 6879 }, { "epoch": 0.38347918176244356, "grad_norm": 0.5430106520652771, "learning_rate": 6.943300488586572e-05, "loss": 1.5363, "step": 6880 }, { "epoch": 0.3835349200156067, "grad_norm": 0.5485236644744873, "learning_rate": 6.942480038176379e-05, "loss": 1.4549, "step": 6881 }, { "epoch": 0.38359065826876987, "grad_norm": 0.5767553448677063, "learning_rate": 6.941659526162045e-05, "loss": 1.5041, "step": 6882 }, { "epoch": 0.383646396521933, "grad_norm": 0.5788490176200867, "learning_rate": 6.940838952569589e-05, "loss": 1.8509, "step": 6883 }, { "epoch": 0.3837021347750961, "grad_norm": 0.5562904477119446, "learning_rate": 6.94001831742504e-05, "loss": 1.6337, "step": 6884 }, { "epoch": 0.3837578730282593, "grad_norm": 0.5514802932739258, "learning_rate": 6.939197620754419e-05, "loss": 1.6887, "step": 6885 }, { "epoch": 0.38381361128142244, "grad_norm": 0.6278872489929199, "learning_rate": 6.938376862583757e-05, "loss": 1.6762, "step": 6886 }, { "epoch": 0.38386934953458557, "grad_norm": 0.5348507761955261, "learning_rate": 6.937556042939083e-05, "loss": 1.5778, "step": 6887 }, { "epoch": 0.38392508778774875, "grad_norm": 0.555674135684967, "learning_rate": 6.936735161846429e-05, "loss": 1.6806, "step": 6888 }, { "epoch": 0.3839808260409119, "grad_norm": 0.5161069631576538, "learning_rate": 6.935914219331825e-05, "loss": 1.5607, "step": 6889 }, { "epoch": 0.384036564294075, "grad_norm": 0.5375397205352783, "learning_rate": 6.93509321542131e-05, "loss": 1.6835, "step": 6890 }, { "epoch": 0.3840923025472382, "grad_norm": 0.4695841073989868, "learning_rate": 6.934272150140921e-05, "loss": 1.3228, "step": 6891 }, { "epoch": 0.3841480408004013, "grad_norm": 0.5479111075401306, "learning_rate": 6.933451023516697e-05, "loss": 1.6331, "step": 6892 }, { "epoch": 0.38420377905356445, "grad_norm": 0.5705395936965942, "learning_rate": 6.932629835574679e-05, "loss": 1.7666, "step": 6893 }, { "epoch": 0.3842595173067276, "grad_norm": 0.5568275451660156, "learning_rate": 6.93180858634091e-05, "loss": 1.5809, "step": 6894 }, { "epoch": 0.38431525555989077, "grad_norm": 0.6088882088661194, "learning_rate": 6.930987275841439e-05, "loss": 1.7695, "step": 6895 }, { "epoch": 0.3843709938130539, "grad_norm": 0.5949798822402954, "learning_rate": 6.930165904102305e-05, "loss": 1.8917, "step": 6896 }, { "epoch": 0.384426732066217, "grad_norm": 0.557823657989502, "learning_rate": 6.929344471149566e-05, "loss": 1.8922, "step": 6897 }, { "epoch": 0.3844824703193802, "grad_norm": 0.5406614542007446, "learning_rate": 6.928522977009268e-05, "loss": 1.6488, "step": 6898 }, { "epoch": 0.38453820857254334, "grad_norm": 0.5692750811576843, "learning_rate": 6.927701421707466e-05, "loss": 1.6886, "step": 6899 }, { "epoch": 0.38459394682570647, "grad_norm": 0.5827295780181885, "learning_rate": 6.926879805270212e-05, "loss": 1.6532, "step": 6900 }, { "epoch": 0.38464968507886965, "grad_norm": 0.5955531001091003, "learning_rate": 6.926058127723568e-05, "loss": 1.6202, "step": 6901 }, { "epoch": 0.3847054233320328, "grad_norm": 0.5544630885124207, "learning_rate": 6.925236389093588e-05, "loss": 1.1835, "step": 6902 }, { "epoch": 0.3847611615851959, "grad_norm": 0.6354855298995972, "learning_rate": 6.924414589406335e-05, "loss": 1.9214, "step": 6903 }, { "epoch": 0.38481689983835904, "grad_norm": 0.6088757514953613, "learning_rate": 6.923592728687871e-05, "loss": 1.8236, "step": 6904 }, { "epoch": 0.3848726380915222, "grad_norm": 0.5689512491226196, "learning_rate": 6.922770806964263e-05, "loss": 1.5128, "step": 6905 }, { "epoch": 0.38492837634468535, "grad_norm": 0.5286409854888916, "learning_rate": 6.921948824261573e-05, "loss": 1.5956, "step": 6906 }, { "epoch": 0.3849841145978485, "grad_norm": 0.5316895842552185, "learning_rate": 6.921126780605873e-05, "loss": 1.5846, "step": 6907 }, { "epoch": 0.38503985285101167, "grad_norm": 0.5461425185203552, "learning_rate": 6.920304676023233e-05, "loss": 1.6645, "step": 6908 }, { "epoch": 0.3850955911041748, "grad_norm": 0.5628203749656677, "learning_rate": 6.919482510539723e-05, "loss": 1.6028, "step": 6909 }, { "epoch": 0.3851513293573379, "grad_norm": 0.5715482234954834, "learning_rate": 6.918660284181421e-05, "loss": 1.7378, "step": 6910 }, { "epoch": 0.3852070676105011, "grad_norm": 0.6020052433013916, "learning_rate": 6.9178379969744e-05, "loss": 1.8591, "step": 6911 }, { "epoch": 0.38526280586366424, "grad_norm": 0.5738694071769714, "learning_rate": 6.917015648944741e-05, "loss": 1.527, "step": 6912 }, { "epoch": 0.38531854411682737, "grad_norm": 0.5757240653038025, "learning_rate": 6.916193240118522e-05, "loss": 1.6982, "step": 6913 }, { "epoch": 0.38537428236999055, "grad_norm": 0.5647144913673401, "learning_rate": 6.915370770521825e-05, "loss": 1.6709, "step": 6914 }, { "epoch": 0.3854300206231537, "grad_norm": 0.5539698004722595, "learning_rate": 6.914548240180736e-05, "loss": 1.8178, "step": 6915 }, { "epoch": 0.3854857588763168, "grad_norm": 0.5621739625930786, "learning_rate": 6.913725649121337e-05, "loss": 1.8038, "step": 6916 }, { "epoch": 0.38554149712947994, "grad_norm": 0.5707613229751587, "learning_rate": 6.91290299736972e-05, "loss": 1.7155, "step": 6917 }, { "epoch": 0.3855972353826431, "grad_norm": 0.5707844495773315, "learning_rate": 6.912080284951972e-05, "loss": 1.7316, "step": 6918 }, { "epoch": 0.38565297363580625, "grad_norm": 0.5531010627746582, "learning_rate": 6.911257511894188e-05, "loss": 1.7607, "step": 6919 }, { "epoch": 0.3857087118889694, "grad_norm": 0.6005899906158447, "learning_rate": 6.910434678222457e-05, "loss": 1.8731, "step": 6920 }, { "epoch": 0.38576445014213256, "grad_norm": 0.5527727603912354, "learning_rate": 6.909611783962877e-05, "loss": 1.3704, "step": 6921 }, { "epoch": 0.3858201883952957, "grad_norm": 0.5586572885513306, "learning_rate": 6.908788829141544e-05, "loss": 1.6253, "step": 6922 }, { "epoch": 0.3858759266484588, "grad_norm": 0.6035952568054199, "learning_rate": 6.907965813784558e-05, "loss": 1.9226, "step": 6923 }, { "epoch": 0.385931664901622, "grad_norm": 0.5370834469795227, "learning_rate": 6.907142737918023e-05, "loss": 1.5934, "step": 6924 }, { "epoch": 0.38598740315478514, "grad_norm": 0.5954363346099854, "learning_rate": 6.906319601568038e-05, "loss": 1.8197, "step": 6925 }, { "epoch": 0.38604314140794826, "grad_norm": 0.5880860686302185, "learning_rate": 6.90549640476071e-05, "loss": 1.9775, "step": 6926 }, { "epoch": 0.3860988796611114, "grad_norm": 0.6047815084457397, "learning_rate": 6.904673147522147e-05, "loss": 1.9008, "step": 6927 }, { "epoch": 0.3861546179142746, "grad_norm": 0.6101181507110596, "learning_rate": 6.903849829878457e-05, "loss": 1.9632, "step": 6928 }, { "epoch": 0.3862103561674377, "grad_norm": 0.5670501589775085, "learning_rate": 6.903026451855748e-05, "loss": 1.7489, "step": 6929 }, { "epoch": 0.38626609442060084, "grad_norm": 0.6123764514923096, "learning_rate": 6.902203013480137e-05, "loss": 1.7719, "step": 6930 }, { "epoch": 0.386321832673764, "grad_norm": 0.53583824634552, "learning_rate": 6.901379514777739e-05, "loss": 1.5504, "step": 6931 }, { "epoch": 0.38637757092692715, "grad_norm": 0.5257768630981445, "learning_rate": 6.900555955774666e-05, "loss": 1.6045, "step": 6932 }, { "epoch": 0.3864333091800903, "grad_norm": 0.5276762843132019, "learning_rate": 6.899732336497038e-05, "loss": 1.7366, "step": 6933 }, { "epoch": 0.38648904743325346, "grad_norm": 0.555980384349823, "learning_rate": 6.898908656970979e-05, "loss": 1.3954, "step": 6934 }, { "epoch": 0.3865447856864166, "grad_norm": 0.5937703847885132, "learning_rate": 6.898084917222609e-05, "loss": 1.791, "step": 6935 }, { "epoch": 0.3866005239395797, "grad_norm": 0.5324926376342773, "learning_rate": 6.89726111727805e-05, "loss": 1.7835, "step": 6936 }, { "epoch": 0.3866562621927429, "grad_norm": 0.569644033908844, "learning_rate": 6.896437257163432e-05, "loss": 1.651, "step": 6937 }, { "epoch": 0.38671200044590603, "grad_norm": 0.5893319249153137, "learning_rate": 6.89561333690488e-05, "loss": 1.8836, "step": 6938 }, { "epoch": 0.38676773869906916, "grad_norm": 0.5247541666030884, "learning_rate": 6.894789356528526e-05, "loss": 1.5643, "step": 6939 }, { "epoch": 0.3868234769522323, "grad_norm": 0.5343844890594482, "learning_rate": 6.893965316060501e-05, "loss": 1.6483, "step": 6940 }, { "epoch": 0.3868792152053955, "grad_norm": 0.5714672803878784, "learning_rate": 6.893141215526938e-05, "loss": 1.5949, "step": 6941 }, { "epoch": 0.3869349534585586, "grad_norm": 0.5850149989128113, "learning_rate": 6.892317054953975e-05, "loss": 1.7971, "step": 6942 }, { "epoch": 0.38699069171172173, "grad_norm": 0.570669412612915, "learning_rate": 6.891492834367746e-05, "loss": 1.8339, "step": 6943 }, { "epoch": 0.3870464299648849, "grad_norm": 0.5296490788459778, "learning_rate": 6.890668553794392e-05, "loss": 1.6175, "step": 6944 }, { "epoch": 0.38710216821804805, "grad_norm": 0.5491392612457275, "learning_rate": 6.889844213260057e-05, "loss": 1.7679, "step": 6945 }, { "epoch": 0.3871579064712112, "grad_norm": 0.5886465907096863, "learning_rate": 6.88901981279088e-05, "loss": 1.5769, "step": 6946 }, { "epoch": 0.38721364472437436, "grad_norm": 0.5220004916191101, "learning_rate": 6.88819535241301e-05, "loss": 1.4678, "step": 6947 }, { "epoch": 0.3872693829775375, "grad_norm": 0.5555586814880371, "learning_rate": 6.887370832152592e-05, "loss": 1.6784, "step": 6948 }, { "epoch": 0.3873251212307006, "grad_norm": 0.5332651138305664, "learning_rate": 6.886546252035775e-05, "loss": 1.6139, "step": 6949 }, { "epoch": 0.38738085948386375, "grad_norm": 0.5473794341087341, "learning_rate": 6.88572161208871e-05, "loss": 1.8137, "step": 6950 }, { "epoch": 0.38743659773702693, "grad_norm": 0.5803813934326172, "learning_rate": 6.88489691233755e-05, "loss": 1.5237, "step": 6951 }, { "epoch": 0.38749233599019006, "grad_norm": 0.5329601168632507, "learning_rate": 6.884072152808451e-05, "loss": 1.686, "step": 6952 }, { "epoch": 0.3875480742433532, "grad_norm": 0.5633809566497803, "learning_rate": 6.883247333527567e-05, "loss": 1.9771, "step": 6953 }, { "epoch": 0.3876038124965164, "grad_norm": 0.6174986958503723, "learning_rate": 6.882422454521058e-05, "loss": 1.7549, "step": 6954 }, { "epoch": 0.3876595507496795, "grad_norm": 0.5496551394462585, "learning_rate": 6.881597515815084e-05, "loss": 1.7045, "step": 6955 }, { "epoch": 0.38771528900284263, "grad_norm": 0.5577127933502197, "learning_rate": 6.880772517435807e-05, "loss": 1.5901, "step": 6956 }, { "epoch": 0.3877710272560058, "grad_norm": 0.5230315327644348, "learning_rate": 6.879947459409393e-05, "loss": 1.5849, "step": 6957 }, { "epoch": 0.38782676550916895, "grad_norm": 0.5241686105728149, "learning_rate": 6.879122341762003e-05, "loss": 1.8152, "step": 6958 }, { "epoch": 0.3878825037623321, "grad_norm": 0.5810775756835938, "learning_rate": 6.878297164519812e-05, "loss": 1.7573, "step": 6959 }, { "epoch": 0.38793824201549526, "grad_norm": 0.5543670058250427, "learning_rate": 6.877471927708985e-05, "loss": 1.7487, "step": 6960 }, { "epoch": 0.3879939802686584, "grad_norm": 0.5780448317527771, "learning_rate": 6.876646631355693e-05, "loss": 1.8512, "step": 6961 }, { "epoch": 0.3880497185218215, "grad_norm": 0.6595468521118164, "learning_rate": 6.875821275486113e-05, "loss": 2.1185, "step": 6962 }, { "epoch": 0.38810545677498465, "grad_norm": 0.5663919448852539, "learning_rate": 6.874995860126419e-05, "loss": 1.6607, "step": 6963 }, { "epoch": 0.38816119502814783, "grad_norm": 0.6084817051887512, "learning_rate": 6.874170385302789e-05, "loss": 1.4841, "step": 6964 }, { "epoch": 0.38821693328131096, "grad_norm": 0.5507417321205139, "learning_rate": 6.8733448510414e-05, "loss": 1.7557, "step": 6965 }, { "epoch": 0.3882726715344741, "grad_norm": 0.5766531825065613, "learning_rate": 6.872519257368437e-05, "loss": 1.7722, "step": 6966 }, { "epoch": 0.3883284097876373, "grad_norm": 0.5653195381164551, "learning_rate": 6.871693604310077e-05, "loss": 1.8058, "step": 6967 }, { "epoch": 0.3883841480408004, "grad_norm": 0.6037474274635315, "learning_rate": 6.87086789189251e-05, "loss": 1.8542, "step": 6968 }, { "epoch": 0.38843988629396353, "grad_norm": 0.5463787317276001, "learning_rate": 6.870042120141923e-05, "loss": 1.7221, "step": 6969 }, { "epoch": 0.3884956245471267, "grad_norm": 0.5135644674301147, "learning_rate": 6.869216289084503e-05, "loss": 1.5492, "step": 6970 }, { "epoch": 0.38855136280028985, "grad_norm": 0.5640287399291992, "learning_rate": 6.86839039874644e-05, "loss": 1.4507, "step": 6971 }, { "epoch": 0.388607101053453, "grad_norm": 0.5661764144897461, "learning_rate": 6.867564449153925e-05, "loss": 1.7683, "step": 6972 }, { "epoch": 0.3886628393066161, "grad_norm": 0.5671542882919312, "learning_rate": 6.866738440333157e-05, "loss": 1.7076, "step": 6973 }, { "epoch": 0.3887185775597793, "grad_norm": 0.5259964466094971, "learning_rate": 6.865912372310328e-05, "loss": 1.542, "step": 6974 }, { "epoch": 0.3887743158129424, "grad_norm": 0.5321882963180542, "learning_rate": 6.865086245111638e-05, "loss": 1.6909, "step": 6975 }, { "epoch": 0.38883005406610555, "grad_norm": 0.5812041759490967, "learning_rate": 6.864260058763286e-05, "loss": 1.8409, "step": 6976 }, { "epoch": 0.38888579231926873, "grad_norm": 0.5516645312309265, "learning_rate": 6.863433813291477e-05, "loss": 1.5931, "step": 6977 }, { "epoch": 0.38894153057243186, "grad_norm": 0.612776517868042, "learning_rate": 6.86260750872241e-05, "loss": 1.7741, "step": 6978 }, { "epoch": 0.388997268825595, "grad_norm": 0.5400133728981018, "learning_rate": 6.861781145082293e-05, "loss": 1.6731, "step": 6979 }, { "epoch": 0.3890530070787582, "grad_norm": 0.5253887176513672, "learning_rate": 6.860954722397332e-05, "loss": 1.6809, "step": 6980 }, { "epoch": 0.3891087453319213, "grad_norm": 0.5338975191116333, "learning_rate": 6.860128240693737e-05, "loss": 1.7078, "step": 6981 }, { "epoch": 0.38916448358508443, "grad_norm": 0.6083932518959045, "learning_rate": 6.85930169999772e-05, "loss": 1.7694, "step": 6982 }, { "epoch": 0.3892202218382476, "grad_norm": 0.5741243958473206, "learning_rate": 6.858475100335496e-05, "loss": 1.7516, "step": 6983 }, { "epoch": 0.38927596009141074, "grad_norm": 0.5835102200508118, "learning_rate": 6.857648441733275e-05, "loss": 1.7409, "step": 6984 }, { "epoch": 0.3893316983445739, "grad_norm": 0.5485714673995972, "learning_rate": 6.856821724217276e-05, "loss": 1.7237, "step": 6985 }, { "epoch": 0.389387436597737, "grad_norm": 0.5908092856407166, "learning_rate": 6.855994947813719e-05, "loss": 1.8842, "step": 6986 }, { "epoch": 0.3894431748509002, "grad_norm": 0.5635112524032593, "learning_rate": 6.855168112548823e-05, "loss": 1.8356, "step": 6987 }, { "epoch": 0.3894989131040633, "grad_norm": 0.6175239086151123, "learning_rate": 6.85434121844881e-05, "loss": 2.1173, "step": 6988 }, { "epoch": 0.38955465135722644, "grad_norm": 0.5377556085586548, "learning_rate": 6.853514265539907e-05, "loss": 1.6531, "step": 6989 }, { "epoch": 0.38961038961038963, "grad_norm": 0.5529573559761047, "learning_rate": 6.852687253848337e-05, "loss": 1.7125, "step": 6990 }, { "epoch": 0.38966612786355276, "grad_norm": 0.5733687877655029, "learning_rate": 6.85186018340033e-05, "loss": 1.8723, "step": 6991 }, { "epoch": 0.3897218661167159, "grad_norm": 0.5605233311653137, "learning_rate": 6.851033054222115e-05, "loss": 1.9066, "step": 6992 }, { "epoch": 0.38977760436987907, "grad_norm": 0.5196309089660645, "learning_rate": 6.850205866339923e-05, "loss": 1.6027, "step": 6993 }, { "epoch": 0.3898333426230422, "grad_norm": 0.5691904425621033, "learning_rate": 6.849378619779989e-05, "loss": 1.7806, "step": 6994 }, { "epoch": 0.38988908087620533, "grad_norm": 0.5791077017784119, "learning_rate": 6.848551314568548e-05, "loss": 1.8153, "step": 6995 }, { "epoch": 0.38994481912936846, "grad_norm": 0.5611302256584167, "learning_rate": 6.847723950731837e-05, "loss": 1.7705, "step": 6996 }, { "epoch": 0.39000055738253164, "grad_norm": 0.6004642248153687, "learning_rate": 6.846896528296094e-05, "loss": 1.6717, "step": 6997 }, { "epoch": 0.39005629563569477, "grad_norm": 0.5229793787002563, "learning_rate": 6.846069047287562e-05, "loss": 1.6567, "step": 6998 }, { "epoch": 0.3901120338888579, "grad_norm": 0.5206711888313293, "learning_rate": 6.845241507732483e-05, "loss": 1.3903, "step": 6999 }, { "epoch": 0.3901677721420211, "grad_norm": 0.6022440791130066, "learning_rate": 6.844413909657104e-05, "loss": 1.8607, "step": 7000 }, { "epoch": 0.3902235103951842, "grad_norm": 0.5634634494781494, "learning_rate": 6.843586253087666e-05, "loss": 1.6199, "step": 7001 }, { "epoch": 0.39027924864834734, "grad_norm": 0.5622709393501282, "learning_rate": 6.842758538050422e-05, "loss": 1.5923, "step": 7002 }, { "epoch": 0.39033498690151053, "grad_norm": 0.5336858034133911, "learning_rate": 6.841930764571623e-05, "loss": 1.6086, "step": 7003 }, { "epoch": 0.39039072515467366, "grad_norm": 0.6216438412666321, "learning_rate": 6.841102932677517e-05, "loss": 1.8973, "step": 7004 }, { "epoch": 0.3904464634078368, "grad_norm": 0.5596641898155212, "learning_rate": 6.840275042394363e-05, "loss": 1.4897, "step": 7005 }, { "epoch": 0.39050220166099997, "grad_norm": 0.5638755559921265, "learning_rate": 6.839447093748413e-05, "loss": 1.7267, "step": 7006 }, { "epoch": 0.3905579399141631, "grad_norm": 0.5759851932525635, "learning_rate": 6.838619086765925e-05, "loss": 1.9025, "step": 7007 }, { "epoch": 0.39061367816732623, "grad_norm": 0.5657535791397095, "learning_rate": 6.83779102147316e-05, "loss": 1.6509, "step": 7008 }, { "epoch": 0.39066941642048936, "grad_norm": 0.5276607275009155, "learning_rate": 6.83696289789638e-05, "loss": 1.6244, "step": 7009 }, { "epoch": 0.39072515467365254, "grad_norm": 0.6091243624687195, "learning_rate": 6.836134716061845e-05, "loss": 1.7403, "step": 7010 }, { "epoch": 0.39078089292681567, "grad_norm": 0.5518734455108643, "learning_rate": 6.835306475995823e-05, "loss": 1.6201, "step": 7011 }, { "epoch": 0.3908366311799788, "grad_norm": 0.5169443488121033, "learning_rate": 6.834478177724581e-05, "loss": 1.5593, "step": 7012 }, { "epoch": 0.390892369433142, "grad_norm": 0.5405734181404114, "learning_rate": 6.833649821274386e-05, "loss": 1.6275, "step": 7013 }, { "epoch": 0.3909481076863051, "grad_norm": 0.639498233795166, "learning_rate": 6.83282140667151e-05, "loss": 1.9288, "step": 7014 }, { "epoch": 0.39100384593946824, "grad_norm": 0.5509902238845825, "learning_rate": 6.831992933942225e-05, "loss": 1.6756, "step": 7015 }, { "epoch": 0.3910595841926314, "grad_norm": 0.6026686429977417, "learning_rate": 6.831164403112806e-05, "loss": 1.8422, "step": 7016 }, { "epoch": 0.39111532244579456, "grad_norm": 0.4942910969257355, "learning_rate": 6.830335814209527e-05, "loss": 1.407, "step": 7017 }, { "epoch": 0.3911710606989577, "grad_norm": 0.5921064615249634, "learning_rate": 6.829507167258671e-05, "loss": 1.7507, "step": 7018 }, { "epoch": 0.3912267989521208, "grad_norm": 0.5901893377304077, "learning_rate": 6.828678462286511e-05, "loss": 1.9612, "step": 7019 }, { "epoch": 0.391282537205284, "grad_norm": 0.5834552049636841, "learning_rate": 6.827849699319333e-05, "loss": 1.8656, "step": 7020 }, { "epoch": 0.3913382754584471, "grad_norm": 0.5791158080101013, "learning_rate": 6.827020878383418e-05, "loss": 1.6849, "step": 7021 }, { "epoch": 0.39139401371161026, "grad_norm": 0.6698895692825317, "learning_rate": 6.826191999505056e-05, "loss": 1.9619, "step": 7022 }, { "epoch": 0.39144975196477344, "grad_norm": 0.5854638814926147, "learning_rate": 6.82536306271053e-05, "loss": 1.6066, "step": 7023 }, { "epoch": 0.39150549021793657, "grad_norm": 0.5511733293533325, "learning_rate": 6.82453406802613e-05, "loss": 1.8761, "step": 7024 }, { "epoch": 0.3915612284710997, "grad_norm": 0.5574920177459717, "learning_rate": 6.823705015478148e-05, "loss": 1.494, "step": 7025 }, { "epoch": 0.3916169667242629, "grad_norm": 0.5293987989425659, "learning_rate": 6.822875905092876e-05, "loss": 1.4918, "step": 7026 }, { "epoch": 0.391672704977426, "grad_norm": 0.5626353621482849, "learning_rate": 6.822046736896607e-05, "loss": 1.7521, "step": 7027 }, { "epoch": 0.39172844323058914, "grad_norm": 0.5664160847663879, "learning_rate": 6.821217510915639e-05, "loss": 1.5782, "step": 7028 }, { "epoch": 0.3917841814837523, "grad_norm": 0.5288576483726501, "learning_rate": 6.820388227176271e-05, "loss": 1.4754, "step": 7029 }, { "epoch": 0.39183991973691545, "grad_norm": 0.5488860607147217, "learning_rate": 6.819558885704801e-05, "loss": 1.6245, "step": 7030 }, { "epoch": 0.3918956579900786, "grad_norm": 0.5747123956680298, "learning_rate": 6.818729486527533e-05, "loss": 1.7134, "step": 7031 }, { "epoch": 0.3919513962432417, "grad_norm": 0.5334782600402832, "learning_rate": 6.817900029670769e-05, "loss": 1.6473, "step": 7032 }, { "epoch": 0.3920071344964049, "grad_norm": 0.5332539081573486, "learning_rate": 6.817070515160815e-05, "loss": 1.4961, "step": 7033 }, { "epoch": 0.392062872749568, "grad_norm": 0.5700680017471313, "learning_rate": 6.816240943023977e-05, "loss": 1.8336, "step": 7034 }, { "epoch": 0.39211861100273115, "grad_norm": 0.5893431901931763, "learning_rate": 6.815411313286568e-05, "loss": 1.8517, "step": 7035 }, { "epoch": 0.39217434925589434, "grad_norm": 0.5954105854034424, "learning_rate": 6.814581625974897e-05, "loss": 1.8405, "step": 7036 }, { "epoch": 0.39223008750905747, "grad_norm": 0.5694375038146973, "learning_rate": 6.813751881115275e-05, "loss": 1.7636, "step": 7037 }, { "epoch": 0.3922858257622206, "grad_norm": 0.6035060286521912, "learning_rate": 6.812922078734019e-05, "loss": 1.8142, "step": 7038 }, { "epoch": 0.3923415640153838, "grad_norm": 0.6111207008361816, "learning_rate": 6.812092218857444e-05, "loss": 1.7048, "step": 7039 }, { "epoch": 0.3923973022685469, "grad_norm": 0.5596774220466614, "learning_rate": 6.811262301511869e-05, "loss": 1.652, "step": 7040 }, { "epoch": 0.39245304052171004, "grad_norm": 0.5244095921516418, "learning_rate": 6.810432326723615e-05, "loss": 1.325, "step": 7041 }, { "epoch": 0.39250877877487317, "grad_norm": 0.5797486305236816, "learning_rate": 6.809602294519004e-05, "loss": 1.7832, "step": 7042 }, { "epoch": 0.39256451702803635, "grad_norm": 0.5226321816444397, "learning_rate": 6.808772204924357e-05, "loss": 1.6449, "step": 7043 }, { "epoch": 0.3926202552811995, "grad_norm": 0.5220246911048889, "learning_rate": 6.807942057966003e-05, "loss": 1.6308, "step": 7044 }, { "epoch": 0.3926759935343626, "grad_norm": 0.7185441255569458, "learning_rate": 6.807111853670268e-05, "loss": 1.6675, "step": 7045 }, { "epoch": 0.3927317317875258, "grad_norm": 0.6072642803192139, "learning_rate": 6.806281592063481e-05, "loss": 1.8951, "step": 7046 }, { "epoch": 0.3927874700406889, "grad_norm": 0.5583004355430603, "learning_rate": 6.805451273171972e-05, "loss": 1.686, "step": 7047 }, { "epoch": 0.39284320829385205, "grad_norm": 0.5066385865211487, "learning_rate": 6.804620897022076e-05, "loss": 1.407, "step": 7048 }, { "epoch": 0.39289894654701524, "grad_norm": 0.5519012212753296, "learning_rate": 6.803790463640127e-05, "loss": 1.8137, "step": 7049 }, { "epoch": 0.39295468480017837, "grad_norm": 0.5573792457580566, "learning_rate": 6.802959973052461e-05, "loss": 1.7861, "step": 7050 }, { "epoch": 0.3930104230533415, "grad_norm": 0.5672924518585205, "learning_rate": 6.802129425285417e-05, "loss": 1.6572, "step": 7051 }, { "epoch": 0.3930661613065047, "grad_norm": 0.5737549066543579, "learning_rate": 6.801298820365333e-05, "loss": 1.7467, "step": 7052 }, { "epoch": 0.3931218995596678, "grad_norm": 0.5474954843521118, "learning_rate": 6.800468158318554e-05, "loss": 1.7429, "step": 7053 }, { "epoch": 0.39317763781283094, "grad_norm": 0.549497127532959, "learning_rate": 6.799637439171424e-05, "loss": 1.764, "step": 7054 }, { "epoch": 0.39323337606599407, "grad_norm": 0.5415019392967224, "learning_rate": 6.798806662950286e-05, "loss": 1.4691, "step": 7055 }, { "epoch": 0.39328911431915725, "grad_norm": 0.5431099534034729, "learning_rate": 6.797975829681487e-05, "loss": 1.5577, "step": 7056 }, { "epoch": 0.3933448525723204, "grad_norm": 0.549314558506012, "learning_rate": 6.79714493939138e-05, "loss": 1.7471, "step": 7057 }, { "epoch": 0.3934005908254835, "grad_norm": 0.5444470047950745, "learning_rate": 6.796313992106313e-05, "loss": 1.765, "step": 7058 }, { "epoch": 0.3934563290786467, "grad_norm": 0.57083660364151, "learning_rate": 6.795482987852638e-05, "loss": 1.9101, "step": 7059 }, { "epoch": 0.3935120673318098, "grad_norm": 0.5475842952728271, "learning_rate": 6.794651926656711e-05, "loss": 1.8193, "step": 7060 }, { "epoch": 0.39356780558497295, "grad_norm": 0.5259652733802795, "learning_rate": 6.793820808544891e-05, "loss": 1.3794, "step": 7061 }, { "epoch": 0.39362354383813614, "grad_norm": 0.5105850100517273, "learning_rate": 6.792989633543531e-05, "loss": 1.5634, "step": 7062 }, { "epoch": 0.39367928209129927, "grad_norm": 0.5771433711051941, "learning_rate": 6.792158401678994e-05, "loss": 1.6858, "step": 7063 }, { "epoch": 0.3937350203444624, "grad_norm": 0.5675138235092163, "learning_rate": 6.791327112977644e-05, "loss": 1.8272, "step": 7064 }, { "epoch": 0.3937907585976255, "grad_norm": 0.5633112788200378, "learning_rate": 6.790495767465839e-05, "loss": 1.7226, "step": 7065 }, { "epoch": 0.3938464968507887, "grad_norm": 0.5350648760795593, "learning_rate": 6.789664365169947e-05, "loss": 1.5082, "step": 7066 }, { "epoch": 0.39390223510395184, "grad_norm": 0.5656428337097168, "learning_rate": 6.788832906116338e-05, "loss": 1.4914, "step": 7067 }, { "epoch": 0.39395797335711497, "grad_norm": 0.5312878489494324, "learning_rate": 6.78800139033138e-05, "loss": 1.5864, "step": 7068 }, { "epoch": 0.39401371161027815, "grad_norm": 0.6321331262588501, "learning_rate": 6.787169817841442e-05, "loss": 1.9452, "step": 7069 }, { "epoch": 0.3940694498634413, "grad_norm": 0.5593883991241455, "learning_rate": 6.786338188672896e-05, "loss": 1.7637, "step": 7070 }, { "epoch": 0.3941251881166044, "grad_norm": 0.5405465960502625, "learning_rate": 6.785506502852118e-05, "loss": 1.6875, "step": 7071 }, { "epoch": 0.3941809263697676, "grad_norm": 0.5527162551879883, "learning_rate": 6.784674760405482e-05, "loss": 1.6496, "step": 7072 }, { "epoch": 0.3942366646229307, "grad_norm": 0.5357568264007568, "learning_rate": 6.78384296135937e-05, "loss": 1.7234, "step": 7073 }, { "epoch": 0.39429240287609385, "grad_norm": 0.5588380694389343, "learning_rate": 6.783011105740162e-05, "loss": 1.9166, "step": 7074 }, { "epoch": 0.39434814112925703, "grad_norm": 0.7392244338989258, "learning_rate": 6.782179193574234e-05, "loss": 1.6746, "step": 7075 }, { "epoch": 0.39440387938242016, "grad_norm": 0.5365987420082092, "learning_rate": 6.781347224887974e-05, "loss": 1.6615, "step": 7076 }, { "epoch": 0.3944596176355833, "grad_norm": 0.5493837594985962, "learning_rate": 6.780515199707766e-05, "loss": 1.7271, "step": 7077 }, { "epoch": 0.3945153558887464, "grad_norm": 0.5309239029884338, "learning_rate": 6.779683118059997e-05, "loss": 1.5172, "step": 7078 }, { "epoch": 0.3945710941419096, "grad_norm": 0.5167561769485474, "learning_rate": 6.778850979971057e-05, "loss": 1.5777, "step": 7079 }, { "epoch": 0.39462683239507274, "grad_norm": 0.5119823217391968, "learning_rate": 6.778018785467332e-05, "loss": 1.5685, "step": 7080 }, { "epoch": 0.39468257064823586, "grad_norm": 0.5578561425209045, "learning_rate": 6.777186534575222e-05, "loss": 1.6626, "step": 7081 }, { "epoch": 0.39473830890139905, "grad_norm": 0.535065233707428, "learning_rate": 6.776354227321114e-05, "loss": 1.5554, "step": 7082 }, { "epoch": 0.3947940471545622, "grad_norm": 0.5996119976043701, "learning_rate": 6.775521863731408e-05, "loss": 1.613, "step": 7083 }, { "epoch": 0.3948497854077253, "grad_norm": 0.5490982532501221, "learning_rate": 6.7746894438325e-05, "loss": 1.6554, "step": 7084 }, { "epoch": 0.3949055236608885, "grad_norm": 0.5607420802116394, "learning_rate": 6.773856967650789e-05, "loss": 1.7542, "step": 7085 }, { "epoch": 0.3949612619140516, "grad_norm": 0.594559907913208, "learning_rate": 6.773024435212678e-05, "loss": 1.8008, "step": 7086 }, { "epoch": 0.39501700016721475, "grad_norm": 0.5436771512031555, "learning_rate": 6.77219184654457e-05, "loss": 1.6853, "step": 7087 }, { "epoch": 0.3950727384203779, "grad_norm": 0.6430955529212952, "learning_rate": 6.771359201672868e-05, "loss": 1.877, "step": 7088 }, { "epoch": 0.39512847667354106, "grad_norm": 0.5667055249214172, "learning_rate": 6.770526500623982e-05, "loss": 1.5347, "step": 7089 }, { "epoch": 0.3951842149267042, "grad_norm": 0.5299628376960754, "learning_rate": 6.769693743424317e-05, "loss": 1.6611, "step": 7090 }, { "epoch": 0.3952399531798673, "grad_norm": 0.6088326573371887, "learning_rate": 6.768860930100285e-05, "loss": 1.991, "step": 7091 }, { "epoch": 0.3952956914330305, "grad_norm": 0.5899388790130615, "learning_rate": 6.768028060678296e-05, "loss": 1.8402, "step": 7092 }, { "epoch": 0.39535142968619363, "grad_norm": 0.5693525075912476, "learning_rate": 6.767195135184765e-05, "loss": 1.6969, "step": 7093 }, { "epoch": 0.39540716793935676, "grad_norm": 0.5347588658332825, "learning_rate": 6.766362153646111e-05, "loss": 1.6525, "step": 7094 }, { "epoch": 0.39546290619251995, "grad_norm": 0.5795377492904663, "learning_rate": 6.765529116088745e-05, "loss": 1.7744, "step": 7095 }, { "epoch": 0.3955186444456831, "grad_norm": 0.5230005979537964, "learning_rate": 6.764696022539091e-05, "loss": 1.6068, "step": 7096 }, { "epoch": 0.3955743826988462, "grad_norm": 0.5676483511924744, "learning_rate": 6.763862873023567e-05, "loss": 1.6501, "step": 7097 }, { "epoch": 0.3956301209520094, "grad_norm": 0.5104279518127441, "learning_rate": 6.763029667568597e-05, "loss": 1.5805, "step": 7098 }, { "epoch": 0.3956858592051725, "grad_norm": 0.575018048286438, "learning_rate": 6.762196406200604e-05, "loss": 1.7185, "step": 7099 }, { "epoch": 0.39574159745833565, "grad_norm": 0.5459030270576477, "learning_rate": 6.761363088946017e-05, "loss": 1.7264, "step": 7100 }, { "epoch": 0.3957973357114988, "grad_norm": 0.5303768515586853, "learning_rate": 6.760529715831262e-05, "loss": 1.6626, "step": 7101 }, { "epoch": 0.39585307396466196, "grad_norm": 0.5729551911354065, "learning_rate": 6.759696286882769e-05, "loss": 1.827, "step": 7102 }, { "epoch": 0.3959088122178251, "grad_norm": 0.578536331653595, "learning_rate": 6.758862802126969e-05, "loss": 1.8003, "step": 7103 }, { "epoch": 0.3959645504709882, "grad_norm": 0.5476341247558594, "learning_rate": 6.758029261590296e-05, "loss": 1.7641, "step": 7104 }, { "epoch": 0.3960202887241514, "grad_norm": 0.5585542917251587, "learning_rate": 6.757195665299186e-05, "loss": 1.6907, "step": 7105 }, { "epoch": 0.39607602697731453, "grad_norm": 0.5314999222755432, "learning_rate": 6.756362013280072e-05, "loss": 1.5457, "step": 7106 }, { "epoch": 0.39613176523047766, "grad_norm": 0.5275375247001648, "learning_rate": 6.755528305559398e-05, "loss": 1.6021, "step": 7107 }, { "epoch": 0.39618750348364085, "grad_norm": 0.5544595122337341, "learning_rate": 6.7546945421636e-05, "loss": 1.5837, "step": 7108 }, { "epoch": 0.396243241736804, "grad_norm": 0.6334085464477539, "learning_rate": 6.753860723119122e-05, "loss": 2.096, "step": 7109 }, { "epoch": 0.3962989799899671, "grad_norm": 0.5980644822120667, "learning_rate": 6.753026848452407e-05, "loss": 1.9298, "step": 7110 }, { "epoch": 0.39635471824313023, "grad_norm": 0.5179347991943359, "learning_rate": 6.752192918189902e-05, "loss": 1.702, "step": 7111 }, { "epoch": 0.3964104564962934, "grad_norm": 0.5576172471046448, "learning_rate": 6.751358932358052e-05, "loss": 1.6217, "step": 7112 }, { "epoch": 0.39646619474945655, "grad_norm": 0.5886361002922058, "learning_rate": 6.750524890983309e-05, "loss": 1.9734, "step": 7113 }, { "epoch": 0.3965219330026197, "grad_norm": 0.573229193687439, "learning_rate": 6.749690794092125e-05, "loss": 1.9415, "step": 7114 }, { "epoch": 0.39657767125578286, "grad_norm": 1.0474965572357178, "learning_rate": 6.748856641710948e-05, "loss": 2.0009, "step": 7115 }, { "epoch": 0.396633409508946, "grad_norm": 0.5304273366928101, "learning_rate": 6.748022433866236e-05, "loss": 1.7601, "step": 7116 }, { "epoch": 0.3966891477621091, "grad_norm": 0.5350653529167175, "learning_rate": 6.747188170584444e-05, "loss": 1.7173, "step": 7117 }, { "epoch": 0.3967448860152723, "grad_norm": 0.5216551423072815, "learning_rate": 6.746353851892028e-05, "loss": 1.7054, "step": 7118 }, { "epoch": 0.39680062426843543, "grad_norm": 0.5482343435287476, "learning_rate": 6.745519477815451e-05, "loss": 1.6456, "step": 7119 }, { "epoch": 0.39685636252159856, "grad_norm": 0.5794587135314941, "learning_rate": 6.744685048381174e-05, "loss": 1.7264, "step": 7120 }, { "epoch": 0.39691210077476174, "grad_norm": 0.5834348797798157, "learning_rate": 6.743850563615659e-05, "loss": 1.7025, "step": 7121 }, { "epoch": 0.3969678390279249, "grad_norm": 0.5380405187606812, "learning_rate": 6.743016023545373e-05, "loss": 1.5742, "step": 7122 }, { "epoch": 0.397023577281088, "grad_norm": 0.5725619792938232, "learning_rate": 6.742181428196777e-05, "loss": 1.8845, "step": 7123 }, { "epoch": 0.39707931553425113, "grad_norm": 0.5491376519203186, "learning_rate": 6.741346777596347e-05, "loss": 1.6998, "step": 7124 }, { "epoch": 0.3971350537874143, "grad_norm": 0.5111629962921143, "learning_rate": 6.74051207177055e-05, "loss": 1.4712, "step": 7125 }, { "epoch": 0.39719079204057745, "grad_norm": 0.5327715277671814, "learning_rate": 6.739677310745856e-05, "loss": 1.4259, "step": 7126 }, { "epoch": 0.3972465302937406, "grad_norm": 0.585437536239624, "learning_rate": 6.738842494548742e-05, "loss": 1.6437, "step": 7127 }, { "epoch": 0.39730226854690376, "grad_norm": 0.4905366599559784, "learning_rate": 6.738007623205682e-05, "loss": 1.537, "step": 7128 }, { "epoch": 0.3973580068000669, "grad_norm": 0.578807532787323, "learning_rate": 6.737172696743155e-05, "loss": 1.7359, "step": 7129 }, { "epoch": 0.39741374505323, "grad_norm": 0.5269452333450317, "learning_rate": 6.736337715187638e-05, "loss": 1.632, "step": 7130 }, { "epoch": 0.3974694833063932, "grad_norm": 0.6212645769119263, "learning_rate": 6.735502678565611e-05, "loss": 1.6633, "step": 7131 }, { "epoch": 0.39752522155955633, "grad_norm": 0.5281040668487549, "learning_rate": 6.734667586903557e-05, "loss": 1.6349, "step": 7132 }, { "epoch": 0.39758095981271946, "grad_norm": 0.6241141557693481, "learning_rate": 6.733832440227963e-05, "loss": 1.8522, "step": 7133 }, { "epoch": 0.3976366980658826, "grad_norm": 0.5351576805114746, "learning_rate": 6.732997238565311e-05, "loss": 1.8608, "step": 7134 }, { "epoch": 0.3976924363190458, "grad_norm": 0.6173853278160095, "learning_rate": 6.732161981942093e-05, "loss": 1.7628, "step": 7135 }, { "epoch": 0.3977481745722089, "grad_norm": 0.5938517451286316, "learning_rate": 6.731326670384794e-05, "loss": 1.7216, "step": 7136 }, { "epoch": 0.39780391282537203, "grad_norm": 0.5863813161849976, "learning_rate": 6.730491303919907e-05, "loss": 1.6816, "step": 7137 }, { "epoch": 0.3978596510785352, "grad_norm": 0.6825369596481323, "learning_rate": 6.729655882573928e-05, "loss": 1.9808, "step": 7138 }, { "epoch": 0.39791538933169834, "grad_norm": 0.5284822583198547, "learning_rate": 6.728820406373346e-05, "loss": 1.8237, "step": 7139 }, { "epoch": 0.3979711275848615, "grad_norm": 0.554270327091217, "learning_rate": 6.727984875344663e-05, "loss": 1.61, "step": 7140 }, { "epoch": 0.39802686583802466, "grad_norm": 0.6326965093612671, "learning_rate": 6.727149289514373e-05, "loss": 2.1011, "step": 7141 }, { "epoch": 0.3980826040911878, "grad_norm": 0.5701342225074768, "learning_rate": 6.72631364890898e-05, "loss": 1.6724, "step": 7142 }, { "epoch": 0.3981383423443509, "grad_norm": 0.5414735078811646, "learning_rate": 6.725477953554979e-05, "loss": 1.5425, "step": 7143 }, { "epoch": 0.3981940805975141, "grad_norm": 0.5954646468162537, "learning_rate": 6.72464220347888e-05, "loss": 1.6308, "step": 7144 }, { "epoch": 0.39824981885067723, "grad_norm": 0.6013423204421997, "learning_rate": 6.723806398707185e-05, "loss": 1.8022, "step": 7145 }, { "epoch": 0.39830555710384036, "grad_norm": 0.5645208954811096, "learning_rate": 6.722970539266403e-05, "loss": 1.4448, "step": 7146 }, { "epoch": 0.3983612953570035, "grad_norm": 0.6153306365013123, "learning_rate": 6.72213462518304e-05, "loss": 1.7358, "step": 7147 }, { "epoch": 0.39841703361016667, "grad_norm": 0.5638027191162109, "learning_rate": 6.721298656483608e-05, "loss": 1.4709, "step": 7148 }, { "epoch": 0.3984727718633298, "grad_norm": 0.5619633197784424, "learning_rate": 6.720462633194618e-05, "loss": 1.6085, "step": 7149 }, { "epoch": 0.39852851011649293, "grad_norm": 0.5597891211509705, "learning_rate": 6.719626555342585e-05, "loss": 1.8059, "step": 7150 }, { "epoch": 0.3985842483696561, "grad_norm": 0.5170794725418091, "learning_rate": 6.718790422954021e-05, "loss": 1.7492, "step": 7151 }, { "epoch": 0.39863998662281924, "grad_norm": 0.5071738362312317, "learning_rate": 6.717954236055449e-05, "loss": 1.6074, "step": 7152 }, { "epoch": 0.39869572487598237, "grad_norm": 0.5328095555305481, "learning_rate": 6.717117994673384e-05, "loss": 1.3657, "step": 7153 }, { "epoch": 0.39875146312914556, "grad_norm": 0.5484116673469543, "learning_rate": 6.716281698834346e-05, "loss": 1.6112, "step": 7154 }, { "epoch": 0.3988072013823087, "grad_norm": 0.5871725678443909, "learning_rate": 6.715445348564862e-05, "loss": 1.9087, "step": 7155 }, { "epoch": 0.3988629396354718, "grad_norm": 0.5913428068161011, "learning_rate": 6.714608943891452e-05, "loss": 2.0278, "step": 7156 }, { "epoch": 0.39891867788863494, "grad_norm": 0.5644116997718811, "learning_rate": 6.713772484840645e-05, "loss": 1.63, "step": 7157 }, { "epoch": 0.3989744161417981, "grad_norm": 0.5353809595108032, "learning_rate": 6.712935971438962e-05, "loss": 1.6313, "step": 7158 }, { "epoch": 0.39903015439496126, "grad_norm": 0.5755419731140137, "learning_rate": 6.712099403712942e-05, "loss": 1.7367, "step": 7159 }, { "epoch": 0.3990858926481244, "grad_norm": 0.5571795105934143, "learning_rate": 6.711262781689109e-05, "loss": 1.8337, "step": 7160 }, { "epoch": 0.39914163090128757, "grad_norm": 0.5910276174545288, "learning_rate": 6.710426105394e-05, "loss": 1.8474, "step": 7161 }, { "epoch": 0.3991973691544507, "grad_norm": 0.5713383555412292, "learning_rate": 6.709589374854144e-05, "loss": 1.4712, "step": 7162 }, { "epoch": 0.3992531074076138, "grad_norm": 0.6179262399673462, "learning_rate": 6.708752590096082e-05, "loss": 1.6399, "step": 7163 }, { "epoch": 0.399308845660777, "grad_norm": 0.5618530511856079, "learning_rate": 6.707915751146351e-05, "loss": 1.6822, "step": 7164 }, { "epoch": 0.39936458391394014, "grad_norm": 0.5299525260925293, "learning_rate": 6.70707885803149e-05, "loss": 1.4796, "step": 7165 }, { "epoch": 0.39942032216710327, "grad_norm": 0.5534185767173767, "learning_rate": 6.706241910778041e-05, "loss": 1.844, "step": 7166 }, { "epoch": 0.39947606042026645, "grad_norm": 0.5665568709373474, "learning_rate": 6.705404909412547e-05, "loss": 1.787, "step": 7167 }, { "epoch": 0.3995317986734296, "grad_norm": 0.6122377514839172, "learning_rate": 6.704567853961552e-05, "loss": 1.7695, "step": 7168 }, { "epoch": 0.3995875369265927, "grad_norm": 0.5161054730415344, "learning_rate": 6.703730744451601e-05, "loss": 1.5939, "step": 7169 }, { "epoch": 0.39964327517975584, "grad_norm": 0.569864809513092, "learning_rate": 6.702893580909247e-05, "loss": 1.7385, "step": 7170 }, { "epoch": 0.399699013432919, "grad_norm": 0.5484759211540222, "learning_rate": 6.702056363361036e-05, "loss": 1.6495, "step": 7171 }, { "epoch": 0.39975475168608215, "grad_norm": 0.5385055541992188, "learning_rate": 6.701219091833522e-05, "loss": 1.8867, "step": 7172 }, { "epoch": 0.3998104899392453, "grad_norm": 0.5519033074378967, "learning_rate": 6.700381766353255e-05, "loss": 1.7746, "step": 7173 }, { "epoch": 0.39986622819240847, "grad_norm": 0.6148980259895325, "learning_rate": 6.699544386946795e-05, "loss": 1.8656, "step": 7174 }, { "epoch": 0.3999219664455716, "grad_norm": 0.569527268409729, "learning_rate": 6.698706953640693e-05, "loss": 1.6071, "step": 7175 }, { "epoch": 0.3999777046987347, "grad_norm": 0.5626715421676636, "learning_rate": 6.697869466461513e-05, "loss": 1.8849, "step": 7176 }, { "epoch": 0.4000334429518979, "grad_norm": 0.5838245153427124, "learning_rate": 6.69703192543581e-05, "loss": 1.7764, "step": 7177 }, { "epoch": 0.40008918120506104, "grad_norm": 0.552139937877655, "learning_rate": 6.696194330590151e-05, "loss": 1.6598, "step": 7178 }, { "epoch": 0.40014491945822417, "grad_norm": 0.5443406105041504, "learning_rate": 6.695356681951099e-05, "loss": 1.6139, "step": 7179 }, { "epoch": 0.4002006577113873, "grad_norm": 0.5214937329292297, "learning_rate": 6.694518979545214e-05, "loss": 1.6783, "step": 7180 }, { "epoch": 0.4002563959645505, "grad_norm": 0.5553892254829407, "learning_rate": 6.69368122339907e-05, "loss": 1.6699, "step": 7181 }, { "epoch": 0.4003121342177136, "grad_norm": 0.5150647163391113, "learning_rate": 6.692843413539229e-05, "loss": 1.532, "step": 7182 }, { "epoch": 0.40036787247087674, "grad_norm": 0.5763303637504578, "learning_rate": 6.692005549992268e-05, "loss": 1.9554, "step": 7183 }, { "epoch": 0.4004236107240399, "grad_norm": 0.5533180832862854, "learning_rate": 6.691167632784754e-05, "loss": 1.4465, "step": 7184 }, { "epoch": 0.40047934897720305, "grad_norm": 0.5495351552963257, "learning_rate": 6.690329661943265e-05, "loss": 1.6263, "step": 7185 }, { "epoch": 0.4005350872303662, "grad_norm": 0.5440528988838196, "learning_rate": 6.689491637494371e-05, "loss": 1.8053, "step": 7186 }, { "epoch": 0.40059082548352937, "grad_norm": 0.5240649580955505, "learning_rate": 6.688653559464655e-05, "loss": 1.6647, "step": 7187 }, { "epoch": 0.4006465637366925, "grad_norm": 0.5496859550476074, "learning_rate": 6.687815427880694e-05, "loss": 1.7904, "step": 7188 }, { "epoch": 0.4007023019898556, "grad_norm": 0.5740963816642761, "learning_rate": 6.686977242769067e-05, "loss": 1.8628, "step": 7189 }, { "epoch": 0.4007580402430188, "grad_norm": 0.5899214148521423, "learning_rate": 6.686139004156358e-05, "loss": 1.6146, "step": 7190 }, { "epoch": 0.40081377849618194, "grad_norm": 0.5265205502510071, "learning_rate": 6.68530071206915e-05, "loss": 1.683, "step": 7191 }, { "epoch": 0.40086951674934507, "grad_norm": 0.560076892375946, "learning_rate": 6.684462366534032e-05, "loss": 1.6757, "step": 7192 }, { "epoch": 0.4009252550025082, "grad_norm": 0.5472216010093689, "learning_rate": 6.683623967577586e-05, "loss": 1.7725, "step": 7193 }, { "epoch": 0.4009809932556714, "grad_norm": 0.5014883875846863, "learning_rate": 6.682785515226407e-05, "loss": 1.4681, "step": 7194 }, { "epoch": 0.4010367315088345, "grad_norm": 0.5076844692230225, "learning_rate": 6.681947009507079e-05, "loss": 1.4126, "step": 7195 }, { "epoch": 0.40109246976199764, "grad_norm": 0.5327789187431335, "learning_rate": 6.681108450446202e-05, "loss": 1.6593, "step": 7196 }, { "epoch": 0.4011482080151608, "grad_norm": 0.6164959073066711, "learning_rate": 6.680269838070364e-05, "loss": 1.9668, "step": 7197 }, { "epoch": 0.40120394626832395, "grad_norm": 0.5150039792060852, "learning_rate": 6.679431172406163e-05, "loss": 1.4285, "step": 7198 }, { "epoch": 0.4012596845214871, "grad_norm": 0.5839514136314392, "learning_rate": 6.678592453480198e-05, "loss": 1.8469, "step": 7199 }, { "epoch": 0.40131542277465027, "grad_norm": 0.6449024677276611, "learning_rate": 6.677753681319066e-05, "loss": 2.1511, "step": 7200 }, { "epoch": 0.4013711610278134, "grad_norm": 0.5425246357917786, "learning_rate": 6.676914855949372e-05, "loss": 1.8045, "step": 7201 }, { "epoch": 0.4014268992809765, "grad_norm": 0.5886958241462708, "learning_rate": 6.676075977397715e-05, "loss": 1.7844, "step": 7202 }, { "epoch": 0.40148263753413965, "grad_norm": 0.5560657382011414, "learning_rate": 6.675237045690699e-05, "loss": 1.7289, "step": 7203 }, { "epoch": 0.40153837578730284, "grad_norm": 0.5133156776428223, "learning_rate": 6.674398060854931e-05, "loss": 1.4584, "step": 7204 }, { "epoch": 0.40159411404046597, "grad_norm": 0.5923200845718384, "learning_rate": 6.67355902291702e-05, "loss": 1.8035, "step": 7205 }, { "epoch": 0.4016498522936291, "grad_norm": 0.5706618428230286, "learning_rate": 6.672719931903574e-05, "loss": 1.781, "step": 7206 }, { "epoch": 0.4017055905467923, "grad_norm": 0.548729419708252, "learning_rate": 6.671880787841204e-05, "loss": 1.7033, "step": 7207 }, { "epoch": 0.4017613287999554, "grad_norm": 0.5980433225631714, "learning_rate": 6.671041590756524e-05, "loss": 1.7048, "step": 7208 }, { "epoch": 0.40181706705311854, "grad_norm": 0.5054447054862976, "learning_rate": 6.670202340676149e-05, "loss": 1.6601, "step": 7209 }, { "epoch": 0.4018728053062817, "grad_norm": 0.5414553880691528, "learning_rate": 6.669363037626689e-05, "loss": 1.619, "step": 7210 }, { "epoch": 0.40192854355944485, "grad_norm": 0.5375347137451172, "learning_rate": 6.66852368163477e-05, "loss": 1.6898, "step": 7211 }, { "epoch": 0.401984281812608, "grad_norm": 0.5620880722999573, "learning_rate": 6.667684272727007e-05, "loss": 1.4842, "step": 7212 }, { "epoch": 0.40204002006577116, "grad_norm": 0.5257782936096191, "learning_rate": 6.666844810930021e-05, "loss": 1.5747, "step": 7213 }, { "epoch": 0.4020957583189343, "grad_norm": 0.586007297039032, "learning_rate": 6.666005296270439e-05, "loss": 1.9183, "step": 7214 }, { "epoch": 0.4021514965720974, "grad_norm": 0.5531460642814636, "learning_rate": 6.66516572877488e-05, "loss": 1.708, "step": 7215 }, { "epoch": 0.40220723482526055, "grad_norm": 0.544386625289917, "learning_rate": 6.664326108469974e-05, "loss": 1.5666, "step": 7216 }, { "epoch": 0.40226297307842374, "grad_norm": 0.5806384682655334, "learning_rate": 6.663486435382347e-05, "loss": 1.8389, "step": 7217 }, { "epoch": 0.40231871133158686, "grad_norm": 0.6060808300971985, "learning_rate": 6.66264670953863e-05, "loss": 1.91, "step": 7218 }, { "epoch": 0.40237444958475, "grad_norm": 0.5704980492591858, "learning_rate": 6.661806930965452e-05, "loss": 1.6892, "step": 7219 }, { "epoch": 0.4024301878379132, "grad_norm": 0.5570072531700134, "learning_rate": 6.660967099689448e-05, "loss": 1.6718, "step": 7220 }, { "epoch": 0.4024859260910763, "grad_norm": 0.5326122641563416, "learning_rate": 6.66012721573725e-05, "loss": 1.7055, "step": 7221 }, { "epoch": 0.40254166434423944, "grad_norm": 0.5099365711212158, "learning_rate": 6.659287279135499e-05, "loss": 1.6732, "step": 7222 }, { "epoch": 0.4025974025974026, "grad_norm": 0.5786659717559814, "learning_rate": 6.658447289910827e-05, "loss": 1.4223, "step": 7223 }, { "epoch": 0.40265314085056575, "grad_norm": 0.5925951600074768, "learning_rate": 6.657607248089879e-05, "loss": 1.8696, "step": 7224 }, { "epoch": 0.4027088791037289, "grad_norm": 0.5589519739151001, "learning_rate": 6.65676715369929e-05, "loss": 1.5046, "step": 7225 }, { "epoch": 0.402764617356892, "grad_norm": 0.5450175404548645, "learning_rate": 6.655927006765709e-05, "loss": 1.6517, "step": 7226 }, { "epoch": 0.4028203556100552, "grad_norm": 0.563928484916687, "learning_rate": 6.655086807315778e-05, "loss": 1.8544, "step": 7227 }, { "epoch": 0.4028760938632183, "grad_norm": 0.5899096131324768, "learning_rate": 6.654246555376144e-05, "loss": 1.7556, "step": 7228 }, { "epoch": 0.40293183211638145, "grad_norm": 0.5601338744163513, "learning_rate": 6.653406250973451e-05, "loss": 1.7469, "step": 7229 }, { "epoch": 0.40298757036954463, "grad_norm": 0.5789577960968018, "learning_rate": 6.652565894134355e-05, "loss": 1.6428, "step": 7230 }, { "epoch": 0.40304330862270776, "grad_norm": 0.5229625701904297, "learning_rate": 6.651725484885503e-05, "loss": 1.4699, "step": 7231 }, { "epoch": 0.4030990468758709, "grad_norm": 0.5528407096862793, "learning_rate": 6.650885023253548e-05, "loss": 1.8881, "step": 7232 }, { "epoch": 0.4031547851290341, "grad_norm": 0.5682995319366455, "learning_rate": 6.650044509265147e-05, "loss": 1.8263, "step": 7233 }, { "epoch": 0.4032105233821972, "grad_norm": 0.5219863057136536, "learning_rate": 6.649203942946954e-05, "loss": 1.5232, "step": 7234 }, { "epoch": 0.40326626163536033, "grad_norm": 0.5359931588172913, "learning_rate": 6.648363324325627e-05, "loss": 1.5617, "step": 7235 }, { "epoch": 0.4033219998885235, "grad_norm": 0.5631711483001709, "learning_rate": 6.647522653427825e-05, "loss": 1.7428, "step": 7236 }, { "epoch": 0.40337773814168665, "grad_norm": 0.5994919538497925, "learning_rate": 6.646681930280211e-05, "loss": 1.5538, "step": 7237 }, { "epoch": 0.4034334763948498, "grad_norm": 0.5310835242271423, "learning_rate": 6.645841154909448e-05, "loss": 1.5501, "step": 7238 }, { "epoch": 0.4034892146480129, "grad_norm": 0.7443162798881531, "learning_rate": 6.6450003273422e-05, "loss": 1.7322, "step": 7239 }, { "epoch": 0.4035449529011761, "grad_norm": 0.5354825258255005, "learning_rate": 6.644159447605131e-05, "loss": 1.6913, "step": 7240 }, { "epoch": 0.4036006911543392, "grad_norm": 0.5255858898162842, "learning_rate": 6.64331851572491e-05, "loss": 1.6574, "step": 7241 }, { "epoch": 0.40365642940750235, "grad_norm": 0.531148374080658, "learning_rate": 6.642477531728207e-05, "loss": 1.5934, "step": 7242 }, { "epoch": 0.40371216766066553, "grad_norm": 0.5981380939483643, "learning_rate": 6.641636495641694e-05, "loss": 1.8274, "step": 7243 }, { "epoch": 0.40376790591382866, "grad_norm": 0.5403674840927124, "learning_rate": 6.640795407492043e-05, "loss": 1.4047, "step": 7244 }, { "epoch": 0.4038236441669918, "grad_norm": 0.5610218048095703, "learning_rate": 6.639954267305928e-05, "loss": 1.8228, "step": 7245 }, { "epoch": 0.403879382420155, "grad_norm": 0.5543003678321838, "learning_rate": 6.639113075110025e-05, "loss": 1.8899, "step": 7246 }, { "epoch": 0.4039351206733181, "grad_norm": 0.5696173906326294, "learning_rate": 6.63827183093101e-05, "loss": 1.6491, "step": 7247 }, { "epoch": 0.40399085892648123, "grad_norm": 0.5595298409461975, "learning_rate": 6.637430534795567e-05, "loss": 1.7502, "step": 7248 }, { "epoch": 0.40404659717964436, "grad_norm": 0.5707483291625977, "learning_rate": 6.636589186730373e-05, "loss": 1.6643, "step": 7249 }, { "epoch": 0.40410233543280755, "grad_norm": 0.5698502063751221, "learning_rate": 6.635747786762113e-05, "loss": 1.5516, "step": 7250 }, { "epoch": 0.4041580736859707, "grad_norm": 0.5298511385917664, "learning_rate": 6.63490633491747e-05, "loss": 1.5581, "step": 7251 }, { "epoch": 0.4042138119391338, "grad_norm": 0.5572474598884583, "learning_rate": 6.63406483122313e-05, "loss": 1.7449, "step": 7252 }, { "epoch": 0.404269550192297, "grad_norm": 0.5807195901870728, "learning_rate": 6.633223275705781e-05, "loss": 1.6806, "step": 7253 }, { "epoch": 0.4043252884454601, "grad_norm": 0.5467732548713684, "learning_rate": 6.632381668392111e-05, "loss": 1.742, "step": 7254 }, { "epoch": 0.40438102669862325, "grad_norm": 0.5687143206596375, "learning_rate": 6.631540009308813e-05, "loss": 1.7586, "step": 7255 }, { "epoch": 0.40443676495178643, "grad_norm": 0.5853325128555298, "learning_rate": 6.630698298482578e-05, "loss": 1.8601, "step": 7256 }, { "epoch": 0.40449250320494956, "grad_norm": 0.5176242589950562, "learning_rate": 6.629856535940101e-05, "loss": 1.5131, "step": 7257 }, { "epoch": 0.4045482414581127, "grad_norm": 0.5749338865280151, "learning_rate": 6.629014721708076e-05, "loss": 1.6167, "step": 7258 }, { "epoch": 0.4046039797112759, "grad_norm": 0.6350910663604736, "learning_rate": 6.628172855813203e-05, "loss": 1.6698, "step": 7259 }, { "epoch": 0.404659717964439, "grad_norm": 0.538773238658905, "learning_rate": 6.627330938282182e-05, "loss": 1.7449, "step": 7260 }, { "epoch": 0.40471545621760213, "grad_norm": 0.5643429160118103, "learning_rate": 6.62648896914171e-05, "loss": 1.6906, "step": 7261 }, { "epoch": 0.40477119447076526, "grad_norm": 0.5482378005981445, "learning_rate": 6.62564694841849e-05, "loss": 1.651, "step": 7262 }, { "epoch": 0.40482693272392845, "grad_norm": 0.556492805480957, "learning_rate": 6.624804876139227e-05, "loss": 1.6232, "step": 7263 }, { "epoch": 0.4048826709770916, "grad_norm": 0.5243347883224487, "learning_rate": 6.623962752330627e-05, "loss": 1.5745, "step": 7264 }, { "epoch": 0.4049384092302547, "grad_norm": 0.5533580780029297, "learning_rate": 6.623120577019396e-05, "loss": 1.621, "step": 7265 }, { "epoch": 0.4049941474834179, "grad_norm": 0.6168079376220703, "learning_rate": 6.622278350232246e-05, "loss": 1.8571, "step": 7266 }, { "epoch": 0.405049885736581, "grad_norm": 0.5359664559364319, "learning_rate": 6.621436071995884e-05, "loss": 1.5815, "step": 7267 }, { "epoch": 0.40510562398974415, "grad_norm": 0.6080171465873718, "learning_rate": 6.620593742337022e-05, "loss": 1.7069, "step": 7268 }, { "epoch": 0.40516136224290733, "grad_norm": 0.5019293427467346, "learning_rate": 6.619751361282377e-05, "loss": 1.5408, "step": 7269 }, { "epoch": 0.40521710049607046, "grad_norm": 0.5557806491851807, "learning_rate": 6.618908928858663e-05, "loss": 1.7405, "step": 7270 }, { "epoch": 0.4052728387492336, "grad_norm": 0.5392197370529175, "learning_rate": 6.618066445092595e-05, "loss": 1.5968, "step": 7271 }, { "epoch": 0.4053285770023967, "grad_norm": 0.621353030204773, "learning_rate": 6.617223910010896e-05, "loss": 1.8194, "step": 7272 }, { "epoch": 0.4053843152555599, "grad_norm": 0.5642111301422119, "learning_rate": 6.61638132364028e-05, "loss": 1.4983, "step": 7273 }, { "epoch": 0.40544005350872303, "grad_norm": 0.5767485499382019, "learning_rate": 6.615538686007476e-05, "loss": 1.6838, "step": 7274 }, { "epoch": 0.40549579176188616, "grad_norm": 0.5635485649108887, "learning_rate": 6.614695997139202e-05, "loss": 1.87, "step": 7275 }, { "epoch": 0.40555153001504934, "grad_norm": 0.617825448513031, "learning_rate": 6.613853257062186e-05, "loss": 1.839, "step": 7276 }, { "epoch": 0.4056072682682125, "grad_norm": 0.5892661213874817, "learning_rate": 6.613010465803153e-05, "loss": 1.7833, "step": 7277 }, { "epoch": 0.4056630065213756, "grad_norm": 0.6038499474525452, "learning_rate": 6.612167623388834e-05, "loss": 1.8361, "step": 7278 }, { "epoch": 0.4057187447745388, "grad_norm": 0.5470013618469238, "learning_rate": 6.611324729845958e-05, "loss": 1.8218, "step": 7279 }, { "epoch": 0.4057744830277019, "grad_norm": 0.5531765818595886, "learning_rate": 6.610481785201254e-05, "loss": 1.6214, "step": 7280 }, { "epoch": 0.40583022128086504, "grad_norm": 0.5488517880439758, "learning_rate": 6.60963878948146e-05, "loss": 1.5644, "step": 7281 }, { "epoch": 0.40588595953402823, "grad_norm": 0.5389445424079895, "learning_rate": 6.608795742713306e-05, "loss": 1.6407, "step": 7282 }, { "epoch": 0.40594169778719136, "grad_norm": 0.5432456731796265, "learning_rate": 6.607952644923534e-05, "loss": 1.6906, "step": 7283 }, { "epoch": 0.4059974360403545, "grad_norm": 0.5381740927696228, "learning_rate": 6.607109496138877e-05, "loss": 1.5545, "step": 7284 }, { "epoch": 0.4060531742935176, "grad_norm": 0.5759360194206238, "learning_rate": 6.606266296386078e-05, "loss": 1.3279, "step": 7285 }, { "epoch": 0.4061089125466808, "grad_norm": 0.5859653949737549, "learning_rate": 6.605423045691875e-05, "loss": 1.6515, "step": 7286 }, { "epoch": 0.40616465079984393, "grad_norm": 0.5650625228881836, "learning_rate": 6.604579744083015e-05, "loss": 1.7375, "step": 7287 }, { "epoch": 0.40622038905300706, "grad_norm": 0.5053606629371643, "learning_rate": 6.60373639158624e-05, "loss": 1.3345, "step": 7288 }, { "epoch": 0.40627612730617024, "grad_norm": 0.559548020362854, "learning_rate": 6.602892988228299e-05, "loss": 1.5881, "step": 7289 }, { "epoch": 0.40633186555933337, "grad_norm": 0.5711749196052551, "learning_rate": 6.602049534035937e-05, "loss": 1.6593, "step": 7290 }, { "epoch": 0.4063876038124965, "grad_norm": 0.5415685176849365, "learning_rate": 6.601206029035904e-05, "loss": 1.7801, "step": 7291 }, { "epoch": 0.4064433420656597, "grad_norm": 0.5906074643135071, "learning_rate": 6.60036247325495e-05, "loss": 1.8566, "step": 7292 }, { "epoch": 0.4064990803188228, "grad_norm": 0.5831937789916992, "learning_rate": 6.599518866719831e-05, "loss": 1.6081, "step": 7293 }, { "epoch": 0.40655481857198594, "grad_norm": 0.5068337917327881, "learning_rate": 6.5986752094573e-05, "loss": 1.5883, "step": 7294 }, { "epoch": 0.4066105568251491, "grad_norm": 0.5402857065200806, "learning_rate": 6.59783150149411e-05, "loss": 1.7286, "step": 7295 }, { "epoch": 0.40666629507831226, "grad_norm": 0.5793524980545044, "learning_rate": 6.596987742857024e-05, "loss": 1.782, "step": 7296 }, { "epoch": 0.4067220333314754, "grad_norm": 0.5685024261474609, "learning_rate": 6.596143933572795e-05, "loss": 1.6989, "step": 7297 }, { "epoch": 0.4067777715846385, "grad_norm": 0.5885668396949768, "learning_rate": 6.595300073668188e-05, "loss": 1.7724, "step": 7298 }, { "epoch": 0.4068335098378017, "grad_norm": 0.5693629384040833, "learning_rate": 6.594456163169963e-05, "loss": 1.7927, "step": 7299 }, { "epoch": 0.40688924809096483, "grad_norm": 0.6024751663208008, "learning_rate": 6.593612202104885e-05, "loss": 1.9269, "step": 7300 }, { "epoch": 0.40694498634412796, "grad_norm": 0.5218265652656555, "learning_rate": 6.59276819049972e-05, "loss": 1.6254, "step": 7301 }, { "epoch": 0.40700072459729114, "grad_norm": 0.6775539517402649, "learning_rate": 6.591924128381234e-05, "loss": 2.2446, "step": 7302 }, { "epoch": 0.40705646285045427, "grad_norm": 0.5457693338394165, "learning_rate": 6.591080015776196e-05, "loss": 1.7268, "step": 7303 }, { "epoch": 0.4071122011036174, "grad_norm": 0.5545173287391663, "learning_rate": 6.590235852711377e-05, "loss": 1.5403, "step": 7304 }, { "epoch": 0.4071679393567806, "grad_norm": 0.5415998697280884, "learning_rate": 6.589391639213549e-05, "loss": 1.7487, "step": 7305 }, { "epoch": 0.4072236776099437, "grad_norm": 0.535123884677887, "learning_rate": 6.588547375309484e-05, "loss": 1.8118, "step": 7306 }, { "epoch": 0.40727941586310684, "grad_norm": 0.5559954643249512, "learning_rate": 6.587703061025959e-05, "loss": 1.7792, "step": 7307 }, { "epoch": 0.40733515411626997, "grad_norm": 0.5952346920967102, "learning_rate": 6.586858696389748e-05, "loss": 1.8367, "step": 7308 }, { "epoch": 0.40739089236943316, "grad_norm": 0.5658838152885437, "learning_rate": 6.586014281427632e-05, "loss": 1.8874, "step": 7309 }, { "epoch": 0.4074466306225963, "grad_norm": 0.5443295240402222, "learning_rate": 6.585169816166392e-05, "loss": 1.6405, "step": 7310 }, { "epoch": 0.4075023688757594, "grad_norm": 0.5414347648620605, "learning_rate": 6.584325300632806e-05, "loss": 1.7544, "step": 7311 }, { "epoch": 0.4075581071289226, "grad_norm": 0.5387737154960632, "learning_rate": 6.583480734853658e-05, "loss": 1.6416, "step": 7312 }, { "epoch": 0.4076138453820857, "grad_norm": 0.5518178343772888, "learning_rate": 6.582636118855735e-05, "loss": 1.7322, "step": 7313 }, { "epoch": 0.40766958363524886, "grad_norm": 0.5452878475189209, "learning_rate": 6.58179145266582e-05, "loss": 1.7432, "step": 7314 }, { "epoch": 0.40772532188841204, "grad_norm": 0.5074037313461304, "learning_rate": 6.580946736310704e-05, "loss": 1.6643, "step": 7315 }, { "epoch": 0.40778106014157517, "grad_norm": 0.5745427012443542, "learning_rate": 6.580101969817175e-05, "loss": 1.8664, "step": 7316 }, { "epoch": 0.4078367983947383, "grad_norm": 0.5891657471656799, "learning_rate": 6.579257153212024e-05, "loss": 1.8217, "step": 7317 }, { "epoch": 0.4078925366479015, "grad_norm": 0.5395662188529968, "learning_rate": 6.578412286522044e-05, "loss": 1.5422, "step": 7318 }, { "epoch": 0.4079482749010646, "grad_norm": 0.5738537907600403, "learning_rate": 6.57756736977403e-05, "loss": 1.753, "step": 7319 }, { "epoch": 0.40800401315422774, "grad_norm": 0.5593982338905334, "learning_rate": 6.576722402994775e-05, "loss": 1.5805, "step": 7320 }, { "epoch": 0.40805975140739087, "grad_norm": 0.6101201772689819, "learning_rate": 6.575877386211077e-05, "loss": 1.742, "step": 7321 }, { "epoch": 0.40811548966055405, "grad_norm": 0.5429602265357971, "learning_rate": 6.57503231944974e-05, "loss": 1.7166, "step": 7322 }, { "epoch": 0.4081712279137172, "grad_norm": 0.5799590349197388, "learning_rate": 6.574187202737558e-05, "loss": 1.8698, "step": 7323 }, { "epoch": 0.4082269661668803, "grad_norm": 0.5671953558921814, "learning_rate": 6.573342036101339e-05, "loss": 1.5871, "step": 7324 }, { "epoch": 0.4082827044200435, "grad_norm": 0.5521631836891174, "learning_rate": 6.572496819567882e-05, "loss": 1.6091, "step": 7325 }, { "epoch": 0.4083384426732066, "grad_norm": 0.6058674454689026, "learning_rate": 6.571651553163994e-05, "loss": 1.9233, "step": 7326 }, { "epoch": 0.40839418092636975, "grad_norm": 0.5595351457595825, "learning_rate": 6.570806236916481e-05, "loss": 1.681, "step": 7327 }, { "epoch": 0.40844991917953294, "grad_norm": 0.5565963983535767, "learning_rate": 6.569960870852156e-05, "loss": 1.8081, "step": 7328 }, { "epoch": 0.40850565743269607, "grad_norm": 0.5626837015151978, "learning_rate": 6.569115454997823e-05, "loss": 1.7268, "step": 7329 }, { "epoch": 0.4085613956858592, "grad_norm": 0.5642188787460327, "learning_rate": 6.568269989380296e-05, "loss": 1.9007, "step": 7330 }, { "epoch": 0.4086171339390223, "grad_norm": 0.5992141962051392, "learning_rate": 6.56742447402639e-05, "loss": 1.8163, "step": 7331 }, { "epoch": 0.4086728721921855, "grad_norm": 0.5469499826431274, "learning_rate": 6.566578908962918e-05, "loss": 1.6564, "step": 7332 }, { "epoch": 0.40872861044534864, "grad_norm": 0.5719706416130066, "learning_rate": 6.565733294216697e-05, "loss": 1.3752, "step": 7333 }, { "epoch": 0.40878434869851177, "grad_norm": 0.5726919174194336, "learning_rate": 6.564887629814543e-05, "loss": 1.629, "step": 7334 }, { "epoch": 0.40884008695167495, "grad_norm": 0.6024767160415649, "learning_rate": 6.56404191578328e-05, "loss": 1.6818, "step": 7335 }, { "epoch": 0.4088958252048381, "grad_norm": 0.5598945021629333, "learning_rate": 6.563196152149725e-05, "loss": 1.6562, "step": 7336 }, { "epoch": 0.4089515634580012, "grad_norm": 0.6022909283638, "learning_rate": 6.562350338940704e-05, "loss": 1.6497, "step": 7337 }, { "epoch": 0.4090073017111644, "grad_norm": 0.5557130575180054, "learning_rate": 6.561504476183037e-05, "loss": 1.5777, "step": 7338 }, { "epoch": 0.4090630399643275, "grad_norm": 0.556742787361145, "learning_rate": 6.560658563903553e-05, "loss": 1.6048, "step": 7339 }, { "epoch": 0.40911877821749065, "grad_norm": 0.6215361952781677, "learning_rate": 6.559812602129078e-05, "loss": 1.85, "step": 7340 }, { "epoch": 0.40917451647065384, "grad_norm": 0.5431729555130005, "learning_rate": 6.558966590886443e-05, "loss": 1.7366, "step": 7341 }, { "epoch": 0.40923025472381697, "grad_norm": 0.5173145532608032, "learning_rate": 6.558120530202476e-05, "loss": 1.5962, "step": 7342 }, { "epoch": 0.4092859929769801, "grad_norm": 0.558746874332428, "learning_rate": 6.55727442010401e-05, "loss": 1.6842, "step": 7343 }, { "epoch": 0.4093417312301432, "grad_norm": 0.5484337210655212, "learning_rate": 6.55642826061788e-05, "loss": 1.8824, "step": 7344 }, { "epoch": 0.4093974694833064, "grad_norm": 0.5415590405464172, "learning_rate": 6.55558205177092e-05, "loss": 1.7393, "step": 7345 }, { "epoch": 0.40945320773646954, "grad_norm": 0.5736859440803528, "learning_rate": 6.554735793589967e-05, "loss": 1.6012, "step": 7346 }, { "epoch": 0.40950894598963267, "grad_norm": 0.5511910319328308, "learning_rate": 6.553889486101857e-05, "loss": 1.6051, "step": 7347 }, { "epoch": 0.40956468424279585, "grad_norm": 0.5481744408607483, "learning_rate": 6.553043129333436e-05, "loss": 1.6571, "step": 7348 }, { "epoch": 0.409620422495959, "grad_norm": 0.7418869733810425, "learning_rate": 6.55219672331154e-05, "loss": 1.6247, "step": 7349 }, { "epoch": 0.4096761607491221, "grad_norm": 0.5882282257080078, "learning_rate": 6.551350268063015e-05, "loss": 1.7125, "step": 7350 }, { "epoch": 0.4097318990022853, "grad_norm": 0.6087817549705505, "learning_rate": 6.550503763614702e-05, "loss": 1.9143, "step": 7351 }, { "epoch": 0.4097876372554484, "grad_norm": 0.5106980800628662, "learning_rate": 6.549657209993452e-05, "loss": 1.4884, "step": 7352 }, { "epoch": 0.40984337550861155, "grad_norm": 0.5542812347412109, "learning_rate": 6.548810607226109e-05, "loss": 1.6739, "step": 7353 }, { "epoch": 0.4098991137617747, "grad_norm": 0.6260994672775269, "learning_rate": 6.547963955339526e-05, "loss": 1.8902, "step": 7354 }, { "epoch": 0.40995485201493786, "grad_norm": 0.5681547522544861, "learning_rate": 6.547117254360549e-05, "loss": 1.8688, "step": 7355 }, { "epoch": 0.410010590268101, "grad_norm": 0.5453806519508362, "learning_rate": 6.546270504316033e-05, "loss": 1.7046, "step": 7356 }, { "epoch": 0.4100663285212641, "grad_norm": 0.5230925679206848, "learning_rate": 6.545423705232834e-05, "loss": 1.6008, "step": 7357 }, { "epoch": 0.4101220667744273, "grad_norm": 0.5534452795982361, "learning_rate": 6.544576857137804e-05, "loss": 1.806, "step": 7358 }, { "epoch": 0.41017780502759044, "grad_norm": 0.586466908454895, "learning_rate": 6.543729960057803e-05, "loss": 1.8252, "step": 7359 }, { "epoch": 0.41023354328075357, "grad_norm": 0.5712817311286926, "learning_rate": 6.542883014019686e-05, "loss": 1.6653, "step": 7360 }, { "epoch": 0.41028928153391675, "grad_norm": 0.5666759014129639, "learning_rate": 6.542036019050318e-05, "loss": 1.7503, "step": 7361 }, { "epoch": 0.4103450197870799, "grad_norm": 0.6092966198921204, "learning_rate": 6.541188975176557e-05, "loss": 2.0138, "step": 7362 }, { "epoch": 0.410400758040243, "grad_norm": 0.5910922884941101, "learning_rate": 6.540341882425267e-05, "loss": 1.8193, "step": 7363 }, { "epoch": 0.4104564962934062, "grad_norm": 0.5653868317604065, "learning_rate": 6.539494740823313e-05, "loss": 1.6905, "step": 7364 }, { "epoch": 0.4105122345465693, "grad_norm": 0.5556957721710205, "learning_rate": 6.538647550397563e-05, "loss": 1.5966, "step": 7365 }, { "epoch": 0.41056797279973245, "grad_norm": 0.6585522294044495, "learning_rate": 6.537800311174882e-05, "loss": 1.9665, "step": 7366 }, { "epoch": 0.4106237110528956, "grad_norm": 0.5647701621055603, "learning_rate": 6.536953023182143e-05, "loss": 1.7119, "step": 7367 }, { "epoch": 0.41067944930605876, "grad_norm": 0.5993644595146179, "learning_rate": 6.536105686446214e-05, "loss": 1.8307, "step": 7368 }, { "epoch": 0.4107351875592219, "grad_norm": 0.5878274440765381, "learning_rate": 6.535258300993969e-05, "loss": 1.6834, "step": 7369 }, { "epoch": 0.410790925812385, "grad_norm": 0.5731014609336853, "learning_rate": 6.534410866852283e-05, "loss": 1.7639, "step": 7370 }, { "epoch": 0.4108466640655482, "grad_norm": 0.558718204498291, "learning_rate": 6.533563384048029e-05, "loss": 1.68, "step": 7371 }, { "epoch": 0.41090240231871134, "grad_norm": 0.5906892418861389, "learning_rate": 6.532715852608087e-05, "loss": 1.6856, "step": 7372 }, { "epoch": 0.41095814057187446, "grad_norm": 0.5575792193412781, "learning_rate": 6.531868272559333e-05, "loss": 1.6829, "step": 7373 }, { "epoch": 0.41101387882503765, "grad_norm": 0.5349531769752502, "learning_rate": 6.531020643928649e-05, "loss": 1.666, "step": 7374 }, { "epoch": 0.4110696170782008, "grad_norm": 0.5200047492980957, "learning_rate": 6.530172966742918e-05, "loss": 1.5504, "step": 7375 }, { "epoch": 0.4111253553313639, "grad_norm": 0.599875271320343, "learning_rate": 6.529325241029022e-05, "loss": 1.8604, "step": 7376 }, { "epoch": 0.41118109358452704, "grad_norm": 0.5267208814620972, "learning_rate": 6.528477466813845e-05, "loss": 1.5969, "step": 7377 }, { "epoch": 0.4112368318376902, "grad_norm": 0.5209345817565918, "learning_rate": 6.527629644124273e-05, "loss": 1.5824, "step": 7378 }, { "epoch": 0.41129257009085335, "grad_norm": 0.5929481983184814, "learning_rate": 6.526781772987197e-05, "loss": 1.9316, "step": 7379 }, { "epoch": 0.4113483083440165, "grad_norm": 0.5629690885543823, "learning_rate": 6.525933853429505e-05, "loss": 1.6927, "step": 7380 }, { "epoch": 0.41140404659717966, "grad_norm": 0.5802732110023499, "learning_rate": 6.525085885478089e-05, "loss": 1.7149, "step": 7381 }, { "epoch": 0.4114597848503428, "grad_norm": 0.5767194032669067, "learning_rate": 6.524237869159838e-05, "loss": 1.6511, "step": 7382 }, { "epoch": 0.4115155231035059, "grad_norm": 0.5414605140686035, "learning_rate": 6.523389804501651e-05, "loss": 1.5401, "step": 7383 }, { "epoch": 0.4115712613566691, "grad_norm": 0.5376063585281372, "learning_rate": 6.52254169153042e-05, "loss": 1.6796, "step": 7384 }, { "epoch": 0.41162699960983223, "grad_norm": 0.5899385809898376, "learning_rate": 6.521693530273045e-05, "loss": 1.7729, "step": 7385 }, { "epoch": 0.41168273786299536, "grad_norm": 0.5602531433105469, "learning_rate": 6.520845320756421e-05, "loss": 1.6136, "step": 7386 }, { "epoch": 0.41173847611615855, "grad_norm": 0.5425115823745728, "learning_rate": 6.519997063007452e-05, "loss": 1.5817, "step": 7387 }, { "epoch": 0.4117942143693217, "grad_norm": 0.5449849963188171, "learning_rate": 6.51914875705304e-05, "loss": 1.6962, "step": 7388 }, { "epoch": 0.4118499526224848, "grad_norm": 0.5851723551750183, "learning_rate": 6.518300402920084e-05, "loss": 2.035, "step": 7389 }, { "epoch": 0.41190569087564793, "grad_norm": 0.5257713794708252, "learning_rate": 6.517452000635493e-05, "loss": 1.1806, "step": 7390 }, { "epoch": 0.4119614291288111, "grad_norm": 0.5605010390281677, "learning_rate": 6.516603550226171e-05, "loss": 1.7513, "step": 7391 }, { "epoch": 0.41201716738197425, "grad_norm": 0.6154865026473999, "learning_rate": 6.515755051719026e-05, "loss": 1.8616, "step": 7392 }, { "epoch": 0.4120729056351374, "grad_norm": 0.5920423269271851, "learning_rate": 6.51490650514097e-05, "loss": 1.7594, "step": 7393 }, { "epoch": 0.41212864388830056, "grad_norm": 0.545600414276123, "learning_rate": 6.514057910518913e-05, "loss": 1.5641, "step": 7394 }, { "epoch": 0.4121843821414637, "grad_norm": 0.5568488836288452, "learning_rate": 6.513209267879765e-05, "loss": 1.6398, "step": 7395 }, { "epoch": 0.4122401203946268, "grad_norm": 0.5209145545959473, "learning_rate": 6.512360577250443e-05, "loss": 1.4485, "step": 7396 }, { "epoch": 0.41229585864779, "grad_norm": 0.5175876021385193, "learning_rate": 6.511511838657859e-05, "loss": 1.6851, "step": 7397 }, { "epoch": 0.41235159690095313, "grad_norm": 0.5393850803375244, "learning_rate": 6.510663052128934e-05, "loss": 1.6724, "step": 7398 }, { "epoch": 0.41240733515411626, "grad_norm": 0.5579698085784912, "learning_rate": 6.509814217690582e-05, "loss": 1.7999, "step": 7399 }, { "epoch": 0.4124630734072794, "grad_norm": 0.5217966437339783, "learning_rate": 6.508965335369729e-05, "loss": 1.5216, "step": 7400 }, { "epoch": 0.4125188116604426, "grad_norm": 0.5507352352142334, "learning_rate": 6.508116405193292e-05, "loss": 1.5396, "step": 7401 }, { "epoch": 0.4125745499136057, "grad_norm": 0.5592759847640991, "learning_rate": 6.507267427188197e-05, "loss": 1.7238, "step": 7402 }, { "epoch": 0.41263028816676883, "grad_norm": 0.5734774470329285, "learning_rate": 6.506418401381365e-05, "loss": 1.7004, "step": 7403 }, { "epoch": 0.412686026419932, "grad_norm": 0.5572485327720642, "learning_rate": 6.505569327799726e-05, "loss": 1.5875, "step": 7404 }, { "epoch": 0.41274176467309515, "grad_norm": 0.5783054232597351, "learning_rate": 6.504720206470205e-05, "loss": 1.806, "step": 7405 }, { "epoch": 0.4127975029262583, "grad_norm": 0.5762080550193787, "learning_rate": 6.503871037419731e-05, "loss": 1.6241, "step": 7406 }, { "epoch": 0.41285324117942146, "grad_norm": 0.5752031207084656, "learning_rate": 6.50302182067524e-05, "loss": 1.5105, "step": 7407 }, { "epoch": 0.4129089794325846, "grad_norm": 0.5618080496788025, "learning_rate": 6.502172556263656e-05, "loss": 1.6661, "step": 7408 }, { "epoch": 0.4129647176857477, "grad_norm": 0.5460039377212524, "learning_rate": 6.501323244211919e-05, "loss": 1.5414, "step": 7409 }, { "epoch": 0.4130204559389109, "grad_norm": 0.5536362528800964, "learning_rate": 6.500473884546962e-05, "loss": 1.7312, "step": 7410 }, { "epoch": 0.41307619419207403, "grad_norm": 0.5220944285392761, "learning_rate": 6.499624477295722e-05, "loss": 1.4628, "step": 7411 }, { "epoch": 0.41313193244523716, "grad_norm": 0.5702623128890991, "learning_rate": 6.498775022485134e-05, "loss": 1.7568, "step": 7412 }, { "epoch": 0.4131876706984003, "grad_norm": 0.5831007361412048, "learning_rate": 6.497925520142143e-05, "loss": 1.8805, "step": 7413 }, { "epoch": 0.4132434089515635, "grad_norm": 0.5719270706176758, "learning_rate": 6.497075970293688e-05, "loss": 1.8406, "step": 7414 }, { "epoch": 0.4132991472047266, "grad_norm": 0.5721832513809204, "learning_rate": 6.496226372966711e-05, "loss": 1.8577, "step": 7415 }, { "epoch": 0.41335488545788973, "grad_norm": 0.5381945967674255, "learning_rate": 6.495376728188159e-05, "loss": 1.5441, "step": 7416 }, { "epoch": 0.4134106237110529, "grad_norm": 0.5105479955673218, "learning_rate": 6.494527035984974e-05, "loss": 1.7383, "step": 7417 }, { "epoch": 0.41346636196421604, "grad_norm": 0.5516504049301147, "learning_rate": 6.493677296384106e-05, "loss": 1.7542, "step": 7418 }, { "epoch": 0.4135221002173792, "grad_norm": 0.5726693868637085, "learning_rate": 6.492827509412501e-05, "loss": 1.887, "step": 7419 }, { "epoch": 0.41357783847054236, "grad_norm": 0.5425702333450317, "learning_rate": 6.491977675097114e-05, "loss": 1.6247, "step": 7420 }, { "epoch": 0.4136335767237055, "grad_norm": 0.7511564493179321, "learning_rate": 6.491127793464893e-05, "loss": 1.7428, "step": 7421 }, { "epoch": 0.4136893149768686, "grad_norm": 0.5151875019073486, "learning_rate": 6.490277864542792e-05, "loss": 1.6937, "step": 7422 }, { "epoch": 0.41374505323003175, "grad_norm": 0.5558873414993286, "learning_rate": 6.489427888357765e-05, "loss": 1.7254, "step": 7423 }, { "epoch": 0.41380079148319493, "grad_norm": 0.5704571008682251, "learning_rate": 6.488577864936771e-05, "loss": 1.6893, "step": 7424 }, { "epoch": 0.41385652973635806, "grad_norm": 0.5515883564949036, "learning_rate": 6.487727794306765e-05, "loss": 1.5928, "step": 7425 }, { "epoch": 0.4139122679895212, "grad_norm": 0.5346539616584778, "learning_rate": 6.48687767649471e-05, "loss": 1.5923, "step": 7426 }, { "epoch": 0.41396800624268437, "grad_norm": 0.48073434829711914, "learning_rate": 6.48602751152756e-05, "loss": 1.5783, "step": 7427 }, { "epoch": 0.4140237444958475, "grad_norm": 0.5613585114479065, "learning_rate": 6.485177299432284e-05, "loss": 1.7081, "step": 7428 }, { "epoch": 0.41407948274901063, "grad_norm": 0.5521184206008911, "learning_rate": 6.484327040235844e-05, "loss": 1.8141, "step": 7429 }, { "epoch": 0.4141352210021738, "grad_norm": 0.5570716857910156, "learning_rate": 6.483476733965202e-05, "loss": 1.8114, "step": 7430 }, { "epoch": 0.41419095925533694, "grad_norm": 0.5927569270133972, "learning_rate": 6.48262638064733e-05, "loss": 1.8538, "step": 7431 }, { "epoch": 0.4142466975085001, "grad_norm": 0.6198796629905701, "learning_rate": 6.48177598030919e-05, "loss": 1.8671, "step": 7432 }, { "epoch": 0.41430243576166326, "grad_norm": 0.562487781047821, "learning_rate": 6.480925532977758e-05, "loss": 1.6247, "step": 7433 }, { "epoch": 0.4143581740148264, "grad_norm": 0.5455536246299744, "learning_rate": 6.480075038680002e-05, "loss": 1.6946, "step": 7434 }, { "epoch": 0.4144139122679895, "grad_norm": 0.6041662096977234, "learning_rate": 6.479224497442897e-05, "loss": 1.9345, "step": 7435 }, { "epoch": 0.41446965052115264, "grad_norm": 0.5616452693939209, "learning_rate": 6.478373909293412e-05, "loss": 1.8108, "step": 7436 }, { "epoch": 0.41452538877431583, "grad_norm": 0.5593286752700806, "learning_rate": 6.477523274258528e-05, "loss": 1.6404, "step": 7437 }, { "epoch": 0.41458112702747896, "grad_norm": 0.5919610261917114, "learning_rate": 6.47667259236522e-05, "loss": 1.8287, "step": 7438 }, { "epoch": 0.4146368652806421, "grad_norm": 0.6362894177436829, "learning_rate": 6.475821863640467e-05, "loss": 1.8535, "step": 7439 }, { "epoch": 0.41469260353380527, "grad_norm": 0.4930521547794342, "learning_rate": 6.474971088111248e-05, "loss": 1.3973, "step": 7440 }, { "epoch": 0.4147483417869684, "grad_norm": 0.5308540463447571, "learning_rate": 6.474120265804549e-05, "loss": 1.5271, "step": 7441 }, { "epoch": 0.41480408004013153, "grad_norm": 0.5587360262870789, "learning_rate": 6.473269396747346e-05, "loss": 1.6953, "step": 7442 }, { "epoch": 0.4148598182932947, "grad_norm": 0.5565241575241089, "learning_rate": 6.47241848096663e-05, "loss": 1.7807, "step": 7443 }, { "epoch": 0.41491555654645784, "grad_norm": 0.6130486130714417, "learning_rate": 6.471567518489383e-05, "loss": 2.0551, "step": 7444 }, { "epoch": 0.41497129479962097, "grad_norm": 0.5374565720558167, "learning_rate": 6.470716509342594e-05, "loss": 1.6525, "step": 7445 }, { "epoch": 0.4150270330527841, "grad_norm": 0.5470364093780518, "learning_rate": 6.469865453553254e-05, "loss": 1.7753, "step": 7446 }, { "epoch": 0.4150827713059473, "grad_norm": 0.5423111319541931, "learning_rate": 6.46901435114835e-05, "loss": 1.6718, "step": 7447 }, { "epoch": 0.4151385095591104, "grad_norm": 0.630453884601593, "learning_rate": 6.468163202154877e-05, "loss": 1.7607, "step": 7448 }, { "epoch": 0.41519424781227354, "grad_norm": 0.5870693325996399, "learning_rate": 6.467312006599828e-05, "loss": 1.8854, "step": 7449 }, { "epoch": 0.4152499860654367, "grad_norm": 0.6026604771614075, "learning_rate": 6.466460764510196e-05, "loss": 1.6298, "step": 7450 }, { "epoch": 0.41530572431859986, "grad_norm": 0.5341464281082153, "learning_rate": 6.465609475912977e-05, "loss": 1.5961, "step": 7451 }, { "epoch": 0.415361462571763, "grad_norm": 0.5364176630973816, "learning_rate": 6.464758140835173e-05, "loss": 1.6091, "step": 7452 }, { "epoch": 0.41541720082492617, "grad_norm": 0.5682061910629272, "learning_rate": 6.463906759303779e-05, "loss": 1.6807, "step": 7453 }, { "epoch": 0.4154729390780893, "grad_norm": 0.5520201325416565, "learning_rate": 6.463055331345798e-05, "loss": 1.8693, "step": 7454 }, { "epoch": 0.4155286773312524, "grad_norm": 0.5386977195739746, "learning_rate": 6.462203856988233e-05, "loss": 1.5473, "step": 7455 }, { "epoch": 0.4155844155844156, "grad_norm": 0.5517452955245972, "learning_rate": 6.461352336258088e-05, "loss": 1.5523, "step": 7456 }, { "epoch": 0.41564015383757874, "grad_norm": 0.6362208127975464, "learning_rate": 6.460500769182365e-05, "loss": 1.6515, "step": 7457 }, { "epoch": 0.41569589209074187, "grad_norm": 0.5483435392379761, "learning_rate": 6.459649155788075e-05, "loss": 1.6962, "step": 7458 }, { "epoch": 0.415751630343905, "grad_norm": 0.5627394914627075, "learning_rate": 6.458797496102222e-05, "loss": 1.5808, "step": 7459 }, { "epoch": 0.4158073685970682, "grad_norm": 0.5749256610870361, "learning_rate": 6.45794579015182e-05, "loss": 1.6652, "step": 7460 }, { "epoch": 0.4158631068502313, "grad_norm": 0.561033308506012, "learning_rate": 6.457094037963877e-05, "loss": 1.5447, "step": 7461 }, { "epoch": 0.41591884510339444, "grad_norm": 0.6188123822212219, "learning_rate": 6.456242239565405e-05, "loss": 1.8373, "step": 7462 }, { "epoch": 0.4159745833565576, "grad_norm": 0.5495220422744751, "learning_rate": 6.455390394983422e-05, "loss": 1.7338, "step": 7463 }, { "epoch": 0.41603032160972075, "grad_norm": 0.5390871167182922, "learning_rate": 6.454538504244938e-05, "loss": 1.5552, "step": 7464 }, { "epoch": 0.4160860598628839, "grad_norm": 0.5653820633888245, "learning_rate": 6.453686567376976e-05, "loss": 1.692, "step": 7465 }, { "epoch": 0.41614179811604707, "grad_norm": 0.5153915286064148, "learning_rate": 6.45283458440655e-05, "loss": 1.6676, "step": 7466 }, { "epoch": 0.4161975363692102, "grad_norm": 0.5695963501930237, "learning_rate": 6.451982555360682e-05, "loss": 1.6982, "step": 7467 }, { "epoch": 0.4162532746223733, "grad_norm": 0.6078826785087585, "learning_rate": 6.451130480266395e-05, "loss": 1.762, "step": 7468 }, { "epoch": 0.41630901287553645, "grad_norm": 0.5621688961982727, "learning_rate": 6.450278359150708e-05, "loss": 1.5914, "step": 7469 }, { "epoch": 0.41636475112869964, "grad_norm": 0.5914077162742615, "learning_rate": 6.449426192040649e-05, "loss": 1.909, "step": 7470 }, { "epoch": 0.41642048938186277, "grad_norm": 0.5638688802719116, "learning_rate": 6.448573978963239e-05, "loss": 1.8037, "step": 7471 }, { "epoch": 0.4164762276350259, "grad_norm": 0.569990336894989, "learning_rate": 6.44772171994551e-05, "loss": 1.5707, "step": 7472 }, { "epoch": 0.4165319658881891, "grad_norm": 0.5680502653121948, "learning_rate": 6.446869415014488e-05, "loss": 1.6062, "step": 7473 }, { "epoch": 0.4165877041413522, "grad_norm": 0.5565951466560364, "learning_rate": 6.446017064197205e-05, "loss": 1.7973, "step": 7474 }, { "epoch": 0.41664344239451534, "grad_norm": 0.5711973905563354, "learning_rate": 6.445164667520691e-05, "loss": 1.751, "step": 7475 }, { "epoch": 0.4166991806476785, "grad_norm": 0.5332829356193542, "learning_rate": 6.44431222501198e-05, "loss": 1.5348, "step": 7476 }, { "epoch": 0.41675491890084165, "grad_norm": 0.5311811566352844, "learning_rate": 6.443459736698105e-05, "loss": 1.7648, "step": 7477 }, { "epoch": 0.4168106571540048, "grad_norm": 0.5389667749404907, "learning_rate": 6.442607202606104e-05, "loss": 1.5702, "step": 7478 }, { "epoch": 0.41686639540716797, "grad_norm": 0.5450131297111511, "learning_rate": 6.441754622763015e-05, "loss": 1.5624, "step": 7479 }, { "epoch": 0.4169221336603311, "grad_norm": 0.6195186376571655, "learning_rate": 6.440901997195871e-05, "loss": 1.8265, "step": 7480 }, { "epoch": 0.4169778719134942, "grad_norm": 0.5652611255645752, "learning_rate": 6.440049325931721e-05, "loss": 1.6908, "step": 7481 }, { "epoch": 0.41703361016665735, "grad_norm": 0.5675498843193054, "learning_rate": 6.4391966089976e-05, "loss": 1.8279, "step": 7482 }, { "epoch": 0.41708934841982054, "grad_norm": 0.5133779048919678, "learning_rate": 6.438343846420556e-05, "loss": 1.4909, "step": 7483 }, { "epoch": 0.41714508667298367, "grad_norm": 0.5815598964691162, "learning_rate": 6.437491038227628e-05, "loss": 1.6886, "step": 7484 }, { "epoch": 0.4172008249261468, "grad_norm": 0.5756742358207703, "learning_rate": 6.43663818444587e-05, "loss": 1.5501, "step": 7485 }, { "epoch": 0.41725656317931, "grad_norm": 0.5238984227180481, "learning_rate": 6.435785285102321e-05, "loss": 1.5227, "step": 7486 }, { "epoch": 0.4173123014324731, "grad_norm": 0.6538522839546204, "learning_rate": 6.434932340224036e-05, "loss": 1.8644, "step": 7487 }, { "epoch": 0.41736803968563624, "grad_norm": 0.5802149772644043, "learning_rate": 6.434079349838062e-05, "loss": 1.823, "step": 7488 }, { "epoch": 0.4174237779387994, "grad_norm": 0.5617754459381104, "learning_rate": 6.433226313971455e-05, "loss": 1.6917, "step": 7489 }, { "epoch": 0.41747951619196255, "grad_norm": 0.5967627763748169, "learning_rate": 6.432373232651261e-05, "loss": 1.8103, "step": 7490 }, { "epoch": 0.4175352544451257, "grad_norm": 0.5762447714805603, "learning_rate": 6.431520105904543e-05, "loss": 1.6457, "step": 7491 }, { "epoch": 0.4175909926982888, "grad_norm": 0.5717265009880066, "learning_rate": 6.430666933758353e-05, "loss": 1.7308, "step": 7492 }, { "epoch": 0.417646730951452, "grad_norm": 0.5314132571220398, "learning_rate": 6.429813716239747e-05, "loss": 1.5346, "step": 7493 }, { "epoch": 0.4177024692046151, "grad_norm": 0.5187550187110901, "learning_rate": 6.42896045337579e-05, "loss": 1.541, "step": 7494 }, { "epoch": 0.41775820745777825, "grad_norm": 0.524467945098877, "learning_rate": 6.428107145193535e-05, "loss": 1.6209, "step": 7495 }, { "epoch": 0.41781394571094144, "grad_norm": 0.5283476710319519, "learning_rate": 6.427253791720051e-05, "loss": 1.6333, "step": 7496 }, { "epoch": 0.41786968396410457, "grad_norm": 0.5059264302253723, "learning_rate": 6.426400392982396e-05, "loss": 1.4312, "step": 7497 }, { "epoch": 0.4179254222172677, "grad_norm": 0.5070070028305054, "learning_rate": 6.425546949007639e-05, "loss": 1.4918, "step": 7498 }, { "epoch": 0.4179811604704309, "grad_norm": 0.5226110219955444, "learning_rate": 6.424693459822842e-05, "loss": 1.6224, "step": 7499 }, { "epoch": 0.418036898723594, "grad_norm": 0.5620803833007812, "learning_rate": 6.423839925455077e-05, "loss": 1.815, "step": 7500 }, { "epoch": 0.41809263697675714, "grad_norm": 0.5102522969245911, "learning_rate": 6.422986345931411e-05, "loss": 1.6608, "step": 7501 }, { "epoch": 0.4181483752299203, "grad_norm": 0.5353087782859802, "learning_rate": 6.422132721278915e-05, "loss": 1.5651, "step": 7502 }, { "epoch": 0.41820411348308345, "grad_norm": 0.6161815524101257, "learning_rate": 6.421279051524658e-05, "loss": 1.6941, "step": 7503 }, { "epoch": 0.4182598517362466, "grad_norm": 0.6280367970466614, "learning_rate": 6.420425336695719e-05, "loss": 1.8122, "step": 7504 }, { "epoch": 0.4183155899894097, "grad_norm": 0.5285361409187317, "learning_rate": 6.419571576819168e-05, "loss": 1.59, "step": 7505 }, { "epoch": 0.4183713282425729, "grad_norm": 0.5601312518119812, "learning_rate": 6.418717771922084e-05, "loss": 1.6675, "step": 7506 }, { "epoch": 0.418427066495736, "grad_norm": 0.6108425855636597, "learning_rate": 6.417863922031544e-05, "loss": 1.9184, "step": 7507 }, { "epoch": 0.41848280474889915, "grad_norm": 0.5752027034759521, "learning_rate": 6.417010027174627e-05, "loss": 1.7789, "step": 7508 }, { "epoch": 0.41853854300206234, "grad_norm": 0.5731359720230103, "learning_rate": 6.416156087378415e-05, "loss": 1.6246, "step": 7509 }, { "epoch": 0.41859428125522546, "grad_norm": 0.5547140836715698, "learning_rate": 6.415302102669987e-05, "loss": 1.5967, "step": 7510 }, { "epoch": 0.4186500195083886, "grad_norm": 0.5709370970726013, "learning_rate": 6.414448073076429e-05, "loss": 1.6613, "step": 7511 }, { "epoch": 0.4187057577615518, "grad_norm": 0.5591392517089844, "learning_rate": 6.413593998624824e-05, "loss": 1.709, "step": 7512 }, { "epoch": 0.4187614960147149, "grad_norm": 0.5560973286628723, "learning_rate": 6.41273987934226e-05, "loss": 1.6281, "step": 7513 }, { "epoch": 0.41881723426787804, "grad_norm": 0.5822799205780029, "learning_rate": 6.411885715255823e-05, "loss": 1.7274, "step": 7514 }, { "epoch": 0.41887297252104116, "grad_norm": 0.5955770611763, "learning_rate": 6.411031506392605e-05, "loss": 1.6704, "step": 7515 }, { "epoch": 0.41892871077420435, "grad_norm": 0.5852923393249512, "learning_rate": 6.410177252779692e-05, "loss": 1.7526, "step": 7516 }, { "epoch": 0.4189844490273675, "grad_norm": 0.5543795228004456, "learning_rate": 6.409322954444179e-05, "loss": 1.5793, "step": 7517 }, { "epoch": 0.4190401872805306, "grad_norm": 0.5983227491378784, "learning_rate": 6.408468611413159e-05, "loss": 1.8319, "step": 7518 }, { "epoch": 0.4190959255336938, "grad_norm": 0.5510286688804626, "learning_rate": 6.407614223713727e-05, "loss": 1.6506, "step": 7519 }, { "epoch": 0.4191516637868569, "grad_norm": 0.5010602474212646, "learning_rate": 6.40675979137298e-05, "loss": 1.5807, "step": 7520 }, { "epoch": 0.41920740204002005, "grad_norm": 0.5825363397598267, "learning_rate": 6.405905314418013e-05, "loss": 1.6839, "step": 7521 }, { "epoch": 0.41926314029318323, "grad_norm": 0.5282953977584839, "learning_rate": 6.405050792875926e-05, "loss": 1.5602, "step": 7522 }, { "epoch": 0.41931887854634636, "grad_norm": 0.5378554463386536, "learning_rate": 6.40419622677382e-05, "loss": 1.5204, "step": 7523 }, { "epoch": 0.4193746167995095, "grad_norm": 0.548743486404419, "learning_rate": 6.403341616138797e-05, "loss": 1.7654, "step": 7524 }, { "epoch": 0.4194303550526727, "grad_norm": 0.5437180399894714, "learning_rate": 6.40248696099796e-05, "loss": 1.7341, "step": 7525 }, { "epoch": 0.4194860933058358, "grad_norm": 0.7081752419471741, "learning_rate": 6.401632261378414e-05, "loss": 1.3932, "step": 7526 }, { "epoch": 0.41954183155899893, "grad_norm": 0.6215348243713379, "learning_rate": 6.400777517307265e-05, "loss": 1.9211, "step": 7527 }, { "epoch": 0.41959756981216206, "grad_norm": 0.5972661375999451, "learning_rate": 6.39992272881162e-05, "loss": 1.848, "step": 7528 }, { "epoch": 0.41965330806532525, "grad_norm": 0.5357066988945007, "learning_rate": 6.399067895918587e-05, "loss": 1.6233, "step": 7529 }, { "epoch": 0.4197090463184884, "grad_norm": 0.5154542922973633, "learning_rate": 6.39821301865528e-05, "loss": 1.578, "step": 7530 }, { "epoch": 0.4197647845716515, "grad_norm": 0.524694561958313, "learning_rate": 6.397358097048806e-05, "loss": 1.6923, "step": 7531 }, { "epoch": 0.4198205228248147, "grad_norm": 0.5902459025382996, "learning_rate": 6.39650313112628e-05, "loss": 1.7314, "step": 7532 }, { "epoch": 0.4198762610779778, "grad_norm": 0.5320487022399902, "learning_rate": 6.39564812091482e-05, "loss": 1.6396, "step": 7533 }, { "epoch": 0.41993199933114095, "grad_norm": 0.5881032943725586, "learning_rate": 6.394793066441534e-05, "loss": 1.8865, "step": 7534 }, { "epoch": 0.41998773758430413, "grad_norm": 0.5616896748542786, "learning_rate": 6.393937967733548e-05, "loss": 1.8735, "step": 7535 }, { "epoch": 0.42004347583746726, "grad_norm": 0.5341779589653015, "learning_rate": 6.393082824817974e-05, "loss": 1.635, "step": 7536 }, { "epoch": 0.4200992140906304, "grad_norm": 0.5636286735534668, "learning_rate": 6.392227637721937e-05, "loss": 1.797, "step": 7537 }, { "epoch": 0.4201549523437935, "grad_norm": 0.5334611535072327, "learning_rate": 6.391372406472557e-05, "loss": 1.6705, "step": 7538 }, { "epoch": 0.4202106905969567, "grad_norm": 0.588848888874054, "learning_rate": 6.390517131096955e-05, "loss": 1.7877, "step": 7539 }, { "epoch": 0.42026642885011983, "grad_norm": 0.5427910685539246, "learning_rate": 6.389661811622258e-05, "loss": 1.5672, "step": 7540 }, { "epoch": 0.42032216710328296, "grad_norm": 0.6046989560127258, "learning_rate": 6.388806448075591e-05, "loss": 1.8186, "step": 7541 }, { "epoch": 0.42037790535644615, "grad_norm": 0.5373850464820862, "learning_rate": 6.38795104048408e-05, "loss": 1.5539, "step": 7542 }, { "epoch": 0.4204336436096093, "grad_norm": 0.5726231336593628, "learning_rate": 6.387095588874854e-05, "loss": 1.6383, "step": 7543 }, { "epoch": 0.4204893818627724, "grad_norm": 0.5964796543121338, "learning_rate": 6.386240093275044e-05, "loss": 1.9338, "step": 7544 }, { "epoch": 0.4205451201159356, "grad_norm": 0.5379793047904968, "learning_rate": 6.385384553711779e-05, "loss": 1.5479, "step": 7545 }, { "epoch": 0.4206008583690987, "grad_norm": 0.5321194529533386, "learning_rate": 6.384528970212196e-05, "loss": 1.6119, "step": 7546 }, { "epoch": 0.42065659662226185, "grad_norm": 0.6583168506622314, "learning_rate": 6.383673342803424e-05, "loss": 1.7555, "step": 7547 }, { "epoch": 0.42071233487542503, "grad_norm": 0.5755535364151001, "learning_rate": 6.382817671512603e-05, "loss": 1.629, "step": 7548 }, { "epoch": 0.42076807312858816, "grad_norm": 0.614747941493988, "learning_rate": 6.381961956366865e-05, "loss": 2.0066, "step": 7549 }, { "epoch": 0.4208238113817513, "grad_norm": 0.5643095374107361, "learning_rate": 6.381106197393353e-05, "loss": 1.7497, "step": 7550 }, { "epoch": 0.4208795496349144, "grad_norm": 0.5332757234573364, "learning_rate": 6.380250394619205e-05, "loss": 1.4505, "step": 7551 }, { "epoch": 0.4209352878880776, "grad_norm": 0.5462849736213684, "learning_rate": 6.379394548071563e-05, "loss": 1.7164, "step": 7552 }, { "epoch": 0.42099102614124073, "grad_norm": 0.5277321338653564, "learning_rate": 6.378538657777565e-05, "loss": 1.4521, "step": 7553 }, { "epoch": 0.42104676439440386, "grad_norm": 0.5687193274497986, "learning_rate": 6.37768272376436e-05, "loss": 1.6832, "step": 7554 }, { "epoch": 0.42110250264756705, "grad_norm": 0.5538173913955688, "learning_rate": 6.376826746059092e-05, "loss": 1.5916, "step": 7555 }, { "epoch": 0.4211582409007302, "grad_norm": 0.5794023871421814, "learning_rate": 6.375970724688906e-05, "loss": 1.5985, "step": 7556 }, { "epoch": 0.4212139791538933, "grad_norm": 0.534807026386261, "learning_rate": 6.375114659680951e-05, "loss": 1.5822, "step": 7557 }, { "epoch": 0.4212697174070565, "grad_norm": 0.5474613308906555, "learning_rate": 6.374258551062378e-05, "loss": 1.7155, "step": 7558 }, { "epoch": 0.4213254556602196, "grad_norm": 0.558594286441803, "learning_rate": 6.373402398860336e-05, "loss": 1.7239, "step": 7559 }, { "epoch": 0.42138119391338275, "grad_norm": 0.6263135671615601, "learning_rate": 6.372546203101977e-05, "loss": 1.8782, "step": 7560 }, { "epoch": 0.4214369321665459, "grad_norm": 0.5759534239768982, "learning_rate": 6.371689963814455e-05, "loss": 1.798, "step": 7561 }, { "epoch": 0.42149267041970906, "grad_norm": 0.582333505153656, "learning_rate": 6.370833681024924e-05, "loss": 1.679, "step": 7562 }, { "epoch": 0.4215484086728722, "grad_norm": 0.5175591707229614, "learning_rate": 6.369977354760541e-05, "loss": 1.6172, "step": 7563 }, { "epoch": 0.4216041469260353, "grad_norm": 0.6253464818000793, "learning_rate": 6.369120985048464e-05, "loss": 1.8897, "step": 7564 }, { "epoch": 0.4216598851791985, "grad_norm": 0.6171419024467468, "learning_rate": 6.368264571915854e-05, "loss": 1.9296, "step": 7565 }, { "epoch": 0.42171562343236163, "grad_norm": 0.5854969620704651, "learning_rate": 6.367408115389868e-05, "loss": 1.8127, "step": 7566 }, { "epoch": 0.42177136168552476, "grad_norm": 0.5167074203491211, "learning_rate": 6.366551615497669e-05, "loss": 1.4419, "step": 7567 }, { "epoch": 0.42182709993868794, "grad_norm": 0.5605902075767517, "learning_rate": 6.36569507226642e-05, "loss": 1.5106, "step": 7568 }, { "epoch": 0.4218828381918511, "grad_norm": 0.5542864799499512, "learning_rate": 6.364838485723286e-05, "loss": 1.6104, "step": 7569 }, { "epoch": 0.4219385764450142, "grad_norm": 0.5589380860328674, "learning_rate": 6.363981855895433e-05, "loss": 1.8112, "step": 7570 }, { "epoch": 0.4219943146981774, "grad_norm": 0.5342586040496826, "learning_rate": 6.363125182810028e-05, "loss": 1.668, "step": 7571 }, { "epoch": 0.4220500529513405, "grad_norm": 0.5474408268928528, "learning_rate": 6.36226846649424e-05, "loss": 1.477, "step": 7572 }, { "epoch": 0.42210579120450364, "grad_norm": 0.549768328666687, "learning_rate": 6.361411706975237e-05, "loss": 1.6127, "step": 7573 }, { "epoch": 0.4221615294576668, "grad_norm": 0.5820984244346619, "learning_rate": 6.360554904280196e-05, "loss": 1.7687, "step": 7574 }, { "epoch": 0.42221726771082996, "grad_norm": 0.5574761033058167, "learning_rate": 6.359698058436282e-05, "loss": 1.7282, "step": 7575 }, { "epoch": 0.4222730059639931, "grad_norm": 0.5506951808929443, "learning_rate": 6.358841169470676e-05, "loss": 1.6214, "step": 7576 }, { "epoch": 0.4223287442171562, "grad_norm": 0.5659124851226807, "learning_rate": 6.35798423741055e-05, "loss": 1.6966, "step": 7577 }, { "epoch": 0.4223844824703194, "grad_norm": 0.5484572052955627, "learning_rate": 6.357127262283081e-05, "loss": 1.6683, "step": 7578 }, { "epoch": 0.42244022072348253, "grad_norm": 0.4761580526828766, "learning_rate": 6.356270244115448e-05, "loss": 1.3579, "step": 7579 }, { "epoch": 0.42249595897664566, "grad_norm": 0.5656337738037109, "learning_rate": 6.355413182934831e-05, "loss": 1.7506, "step": 7580 }, { "epoch": 0.42255169722980884, "grad_norm": 0.6253755688667297, "learning_rate": 6.35455607876841e-05, "loss": 1.5443, "step": 7581 }, { "epoch": 0.42260743548297197, "grad_norm": 0.5522517561912537, "learning_rate": 6.353698931643368e-05, "loss": 1.7318, "step": 7582 }, { "epoch": 0.4226631737361351, "grad_norm": 0.5824682712554932, "learning_rate": 6.352841741586888e-05, "loss": 1.9499, "step": 7583 }, { "epoch": 0.42271891198929823, "grad_norm": 0.6166448593139648, "learning_rate": 6.351984508626155e-05, "loss": 1.6598, "step": 7584 }, { "epoch": 0.4227746502424614, "grad_norm": 0.6640730500221252, "learning_rate": 6.351127232788357e-05, "loss": 1.9022, "step": 7585 }, { "epoch": 0.42283038849562454, "grad_norm": 0.5395544171333313, "learning_rate": 6.350269914100681e-05, "loss": 1.8523, "step": 7586 }, { "epoch": 0.42288612674878767, "grad_norm": 0.597951352596283, "learning_rate": 6.349412552590317e-05, "loss": 1.7423, "step": 7587 }, { "epoch": 0.42294186500195086, "grad_norm": 0.5310340523719788, "learning_rate": 6.348555148284452e-05, "loss": 1.6669, "step": 7588 }, { "epoch": 0.422997603255114, "grad_norm": 0.563275933265686, "learning_rate": 6.347697701210281e-05, "loss": 1.8138, "step": 7589 }, { "epoch": 0.4230533415082771, "grad_norm": 0.5225051641464233, "learning_rate": 6.346840211394998e-05, "loss": 1.5228, "step": 7590 }, { "epoch": 0.4231090797614403, "grad_norm": 0.5949013233184814, "learning_rate": 6.345982678865795e-05, "loss": 1.8378, "step": 7591 }, { "epoch": 0.4231648180146034, "grad_norm": 0.6444050073623657, "learning_rate": 6.345125103649869e-05, "loss": 1.9561, "step": 7592 }, { "epoch": 0.42322055626776656, "grad_norm": 0.538077712059021, "learning_rate": 6.344267485774417e-05, "loss": 1.6172, "step": 7593 }, { "epoch": 0.42327629452092974, "grad_norm": 0.5770418047904968, "learning_rate": 6.34340982526664e-05, "loss": 1.7064, "step": 7594 }, { "epoch": 0.42333203277409287, "grad_norm": 0.5491243600845337, "learning_rate": 6.342552122153734e-05, "loss": 1.5869, "step": 7595 }, { "epoch": 0.423387771027256, "grad_norm": 0.5911741852760315, "learning_rate": 6.3416943764629e-05, "loss": 1.4539, "step": 7596 }, { "epoch": 0.42344350928041913, "grad_norm": 0.5493375062942505, "learning_rate": 6.340836588221347e-05, "loss": 1.2324, "step": 7597 }, { "epoch": 0.4234992475335823, "grad_norm": 0.5272154808044434, "learning_rate": 6.339978757456274e-05, "loss": 1.7336, "step": 7598 }, { "epoch": 0.42355498578674544, "grad_norm": 0.6132648587226868, "learning_rate": 6.339120884194886e-05, "loss": 1.8399, "step": 7599 }, { "epoch": 0.42361072403990857, "grad_norm": 0.6002299189567566, "learning_rate": 6.338262968464394e-05, "loss": 1.7355, "step": 7600 }, { "epoch": 0.42366646229307175, "grad_norm": 0.5747309327125549, "learning_rate": 6.337405010292e-05, "loss": 1.5466, "step": 7601 }, { "epoch": 0.4237222005462349, "grad_norm": 0.6044133901596069, "learning_rate": 6.336547009704919e-05, "loss": 1.894, "step": 7602 }, { "epoch": 0.423777938799398, "grad_norm": 0.6029581427574158, "learning_rate": 6.335688966730358e-05, "loss": 1.7874, "step": 7603 }, { "epoch": 0.4238336770525612, "grad_norm": 0.5374162197113037, "learning_rate": 6.334830881395533e-05, "loss": 1.4537, "step": 7604 }, { "epoch": 0.4238894153057243, "grad_norm": 0.5794885158538818, "learning_rate": 6.333972753727653e-05, "loss": 1.6731, "step": 7605 }, { "epoch": 0.42394515355888746, "grad_norm": 0.6136147379875183, "learning_rate": 6.333114583753936e-05, "loss": 2.0005, "step": 7606 }, { "epoch": 0.4240008918120506, "grad_norm": 0.6465775370597839, "learning_rate": 6.332256371501597e-05, "loss": 1.7024, "step": 7607 }, { "epoch": 0.42405663006521377, "grad_norm": 0.4953748285770416, "learning_rate": 6.331398116997851e-05, "loss": 1.4046, "step": 7608 }, { "epoch": 0.4241123683183769, "grad_norm": 0.5147947669029236, "learning_rate": 6.330539820269921e-05, "loss": 1.7066, "step": 7609 }, { "epoch": 0.42416810657154, "grad_norm": 0.5854727029800415, "learning_rate": 6.329681481345026e-05, "loss": 1.7871, "step": 7610 }, { "epoch": 0.4242238448247032, "grad_norm": 0.5421152710914612, "learning_rate": 6.328823100250386e-05, "loss": 1.6782, "step": 7611 }, { "epoch": 0.42427958307786634, "grad_norm": 0.5201201438903809, "learning_rate": 6.327964677013224e-05, "loss": 1.6405, "step": 7612 }, { "epoch": 0.42433532133102947, "grad_norm": 0.5656992197036743, "learning_rate": 6.327106211660769e-05, "loss": 1.798, "step": 7613 }, { "epoch": 0.42439105958419265, "grad_norm": 0.5751951336860657, "learning_rate": 6.326247704220239e-05, "loss": 1.6055, "step": 7614 }, { "epoch": 0.4244467978373558, "grad_norm": 0.546371579170227, "learning_rate": 6.325389154718865e-05, "loss": 1.7596, "step": 7615 }, { "epoch": 0.4245025360905189, "grad_norm": 0.5406731367111206, "learning_rate": 6.324530563183875e-05, "loss": 1.6401, "step": 7616 }, { "epoch": 0.4245582743436821, "grad_norm": 0.5809882879257202, "learning_rate": 6.323671929642498e-05, "loss": 1.868, "step": 7617 }, { "epoch": 0.4246140125968452, "grad_norm": 0.540643572807312, "learning_rate": 6.322813254121964e-05, "loss": 1.715, "step": 7618 }, { "epoch": 0.42466975085000835, "grad_norm": 0.5267550945281982, "learning_rate": 6.321954536649508e-05, "loss": 1.5837, "step": 7619 }, { "epoch": 0.4247254891031715, "grad_norm": 0.5602602958679199, "learning_rate": 6.32109577725236e-05, "loss": 1.7406, "step": 7620 }, { "epoch": 0.42478122735633467, "grad_norm": 0.5607280731201172, "learning_rate": 6.320236975957757e-05, "loss": 1.6099, "step": 7621 }, { "epoch": 0.4248369656094978, "grad_norm": 0.5364249348640442, "learning_rate": 6.319378132792935e-05, "loss": 1.5277, "step": 7622 }, { "epoch": 0.4248927038626609, "grad_norm": 0.5527327656745911, "learning_rate": 6.318519247785131e-05, "loss": 1.7702, "step": 7623 }, { "epoch": 0.4249484421158241, "grad_norm": 0.5770801901817322, "learning_rate": 6.317660320961585e-05, "loss": 1.6098, "step": 7624 }, { "epoch": 0.42500418036898724, "grad_norm": 0.5606113076210022, "learning_rate": 6.316801352349534e-05, "loss": 1.6451, "step": 7625 }, { "epoch": 0.42505991862215037, "grad_norm": 0.6124593615531921, "learning_rate": 6.315942341976223e-05, "loss": 1.9987, "step": 7626 }, { "epoch": 0.42511565687531355, "grad_norm": 0.5524605512619019, "learning_rate": 6.315083289868892e-05, "loss": 1.6352, "step": 7627 }, { "epoch": 0.4251713951284767, "grad_norm": 0.5734837651252747, "learning_rate": 6.314224196054787e-05, "loss": 1.8757, "step": 7628 }, { "epoch": 0.4252271333816398, "grad_norm": 0.64513099193573, "learning_rate": 6.313365060561153e-05, "loss": 2.0665, "step": 7629 }, { "epoch": 0.42528287163480294, "grad_norm": 0.5457690954208374, "learning_rate": 6.312505883415238e-05, "loss": 1.6602, "step": 7630 }, { "epoch": 0.4253386098879661, "grad_norm": 0.6007886528968811, "learning_rate": 6.311646664644288e-05, "loss": 1.7241, "step": 7631 }, { "epoch": 0.42539434814112925, "grad_norm": 0.5715931057929993, "learning_rate": 6.310787404275553e-05, "loss": 1.7581, "step": 7632 }, { "epoch": 0.4254500863942924, "grad_norm": 0.5710930228233337, "learning_rate": 6.309928102336284e-05, "loss": 1.7147, "step": 7633 }, { "epoch": 0.42550582464745557, "grad_norm": 0.5583118796348572, "learning_rate": 6.309068758853732e-05, "loss": 1.6103, "step": 7634 }, { "epoch": 0.4255615629006187, "grad_norm": 0.5537952184677124, "learning_rate": 6.308209373855154e-05, "loss": 1.6947, "step": 7635 }, { "epoch": 0.4256173011537818, "grad_norm": 0.5451967716217041, "learning_rate": 6.3073499473678e-05, "loss": 1.6384, "step": 7636 }, { "epoch": 0.425673039406945, "grad_norm": 0.5317254066467285, "learning_rate": 6.30649047941893e-05, "loss": 1.5643, "step": 7637 }, { "epoch": 0.42572877766010814, "grad_norm": 0.5423393845558167, "learning_rate": 6.305630970035796e-05, "loss": 1.5257, "step": 7638 }, { "epoch": 0.42578451591327127, "grad_norm": 0.5897427797317505, "learning_rate": 6.304771419245663e-05, "loss": 1.8738, "step": 7639 }, { "epoch": 0.42584025416643445, "grad_norm": 0.5559675097465515, "learning_rate": 6.303911827075786e-05, "loss": 1.8562, "step": 7640 }, { "epoch": 0.4258959924195976, "grad_norm": 0.5857858061790466, "learning_rate": 6.303052193553429e-05, "loss": 1.7146, "step": 7641 }, { "epoch": 0.4259517306727607, "grad_norm": 0.6495271325111389, "learning_rate": 6.302192518705853e-05, "loss": 1.7639, "step": 7642 }, { "epoch": 0.42600746892592384, "grad_norm": 0.5638108253479004, "learning_rate": 6.301332802560325e-05, "loss": 1.5804, "step": 7643 }, { "epoch": 0.426063207179087, "grad_norm": 0.5066633224487305, "learning_rate": 6.300473045144107e-05, "loss": 1.4344, "step": 7644 }, { "epoch": 0.42611894543225015, "grad_norm": 0.5637665390968323, "learning_rate": 6.299613246484464e-05, "loss": 1.6573, "step": 7645 }, { "epoch": 0.4261746836854133, "grad_norm": 0.5206940174102783, "learning_rate": 6.298753406608668e-05, "loss": 1.5995, "step": 7646 }, { "epoch": 0.42623042193857646, "grad_norm": 0.5374553799629211, "learning_rate": 6.297893525543986e-05, "loss": 1.7107, "step": 7647 }, { "epoch": 0.4262861601917396, "grad_norm": 0.5552041530609131, "learning_rate": 6.297033603317689e-05, "loss": 1.6734, "step": 7648 }, { "epoch": 0.4263418984449027, "grad_norm": 0.5269225239753723, "learning_rate": 6.296173639957045e-05, "loss": 1.64, "step": 7649 }, { "epoch": 0.4263976366980659, "grad_norm": 0.5553382635116577, "learning_rate": 6.295313635489335e-05, "loss": 1.3837, "step": 7650 }, { "epoch": 0.42645337495122904, "grad_norm": 0.5205674171447754, "learning_rate": 6.294453589941826e-05, "loss": 1.6142, "step": 7651 }, { "epoch": 0.42650911320439217, "grad_norm": 0.6198689937591553, "learning_rate": 6.2935935033418e-05, "loss": 1.7297, "step": 7652 }, { "epoch": 0.4265648514575553, "grad_norm": 0.556909441947937, "learning_rate": 6.292733375716526e-05, "loss": 1.7119, "step": 7653 }, { "epoch": 0.4266205897107185, "grad_norm": 0.5496246218681335, "learning_rate": 6.291873207093287e-05, "loss": 1.6478, "step": 7654 }, { "epoch": 0.4266763279638816, "grad_norm": 0.5758047103881836, "learning_rate": 6.291012997499362e-05, "loss": 1.8439, "step": 7655 }, { "epoch": 0.42673206621704474, "grad_norm": 0.5833730697631836, "learning_rate": 6.290152746962034e-05, "loss": 1.6251, "step": 7656 }, { "epoch": 0.4267878044702079, "grad_norm": 0.509559690952301, "learning_rate": 6.289292455508582e-05, "loss": 1.6364, "step": 7657 }, { "epoch": 0.42684354272337105, "grad_norm": 0.5244433879852295, "learning_rate": 6.28843212316629e-05, "loss": 1.4855, "step": 7658 }, { "epoch": 0.4268992809765342, "grad_norm": 0.5262942314147949, "learning_rate": 6.287571749962444e-05, "loss": 1.6034, "step": 7659 }, { "epoch": 0.42695501922969736, "grad_norm": 0.592850923538208, "learning_rate": 6.286711335924326e-05, "loss": 2.0333, "step": 7660 }, { "epoch": 0.4270107574828605, "grad_norm": 0.5585233569145203, "learning_rate": 6.28585088107923e-05, "loss": 1.7037, "step": 7661 }, { "epoch": 0.4270664957360236, "grad_norm": 0.5201496481895447, "learning_rate": 6.284990385454439e-05, "loss": 1.5226, "step": 7662 }, { "epoch": 0.4271222339891868, "grad_norm": 0.5410779714584351, "learning_rate": 6.284129849077247e-05, "loss": 1.6186, "step": 7663 }, { "epoch": 0.42717797224234993, "grad_norm": 0.5643417835235596, "learning_rate": 6.283269271974941e-05, "loss": 1.7211, "step": 7664 }, { "epoch": 0.42723371049551306, "grad_norm": 0.5603637099266052, "learning_rate": 6.282408654174818e-05, "loss": 1.6978, "step": 7665 }, { "epoch": 0.4272894487486762, "grad_norm": 0.5303884744644165, "learning_rate": 6.281547995704168e-05, "loss": 1.5544, "step": 7666 }, { "epoch": 0.4273451870018394, "grad_norm": 0.5895907282829285, "learning_rate": 6.280687296590287e-05, "loss": 1.697, "step": 7667 }, { "epoch": 0.4274009252550025, "grad_norm": 0.566055953502655, "learning_rate": 6.279826556860472e-05, "loss": 1.6596, "step": 7668 }, { "epoch": 0.42745666350816564, "grad_norm": 0.5401179790496826, "learning_rate": 6.278965776542021e-05, "loss": 1.7029, "step": 7669 }, { "epoch": 0.4275124017613288, "grad_norm": 0.6178464889526367, "learning_rate": 6.278104955662234e-05, "loss": 1.7344, "step": 7670 }, { "epoch": 0.42756814001449195, "grad_norm": 0.5440572500228882, "learning_rate": 6.277244094248407e-05, "loss": 1.7182, "step": 7671 }, { "epoch": 0.4276238782676551, "grad_norm": 0.5953531265258789, "learning_rate": 6.276383192327846e-05, "loss": 1.7045, "step": 7672 }, { "epoch": 0.42767961652081826, "grad_norm": 0.5182901620864868, "learning_rate": 6.27552224992785e-05, "loss": 1.5657, "step": 7673 }, { "epoch": 0.4277353547739814, "grad_norm": 0.5608685612678528, "learning_rate": 6.274661267075728e-05, "loss": 1.701, "step": 7674 }, { "epoch": 0.4277910930271445, "grad_norm": 0.5933842658996582, "learning_rate": 6.27380024379878e-05, "loss": 1.804, "step": 7675 }, { "epoch": 0.42784683128030765, "grad_norm": 0.586521327495575, "learning_rate": 6.272939180124317e-05, "loss": 1.7744, "step": 7676 }, { "epoch": 0.42790256953347083, "grad_norm": 0.6096509695053101, "learning_rate": 6.272078076079644e-05, "loss": 1.9837, "step": 7677 }, { "epoch": 0.42795830778663396, "grad_norm": 0.6212565302848816, "learning_rate": 6.27121693169207e-05, "loss": 1.8042, "step": 7678 }, { "epoch": 0.4280140460397971, "grad_norm": 0.5542432069778442, "learning_rate": 6.270355746988908e-05, "loss": 1.6222, "step": 7679 }, { "epoch": 0.4280697842929603, "grad_norm": 0.5913196802139282, "learning_rate": 6.269494521997467e-05, "loss": 1.6313, "step": 7680 }, { "epoch": 0.4281255225461234, "grad_norm": 0.5573778748512268, "learning_rate": 6.268633256745063e-05, "loss": 1.7364, "step": 7681 }, { "epoch": 0.42818126079928653, "grad_norm": 0.5151004195213318, "learning_rate": 6.267771951259009e-05, "loss": 1.8938, "step": 7682 }, { "epoch": 0.4282369990524497, "grad_norm": 0.5424497127532959, "learning_rate": 6.26691060556662e-05, "loss": 1.706, "step": 7683 }, { "epoch": 0.42829273730561285, "grad_norm": 0.5353766083717346, "learning_rate": 6.266049219695211e-05, "loss": 1.6015, "step": 7684 }, { "epoch": 0.428348475558776, "grad_norm": 0.5848101377487183, "learning_rate": 6.265187793672105e-05, "loss": 1.9252, "step": 7685 }, { "epoch": 0.42840421381193916, "grad_norm": 0.5816083550453186, "learning_rate": 6.264326327524617e-05, "loss": 1.7076, "step": 7686 }, { "epoch": 0.4284599520651023, "grad_norm": 0.595378577709198, "learning_rate": 6.263464821280071e-05, "loss": 1.8343, "step": 7687 }, { "epoch": 0.4285156903182654, "grad_norm": 0.5391969084739685, "learning_rate": 6.262603274965786e-05, "loss": 1.5771, "step": 7688 }, { "epoch": 0.42857142857142855, "grad_norm": 0.5316036939620972, "learning_rate": 6.261741688609087e-05, "loss": 1.6646, "step": 7689 }, { "epoch": 0.42862716682459173, "grad_norm": 0.5671446323394775, "learning_rate": 6.260880062237299e-05, "loss": 1.8235, "step": 7690 }, { "epoch": 0.42868290507775486, "grad_norm": 0.5752628445625305, "learning_rate": 6.260018395877747e-05, "loss": 1.7776, "step": 7691 }, { "epoch": 0.428738643330918, "grad_norm": 0.5416520833969116, "learning_rate": 6.259156689557757e-05, "loss": 1.5817, "step": 7692 }, { "epoch": 0.4287943815840812, "grad_norm": 0.5795433521270752, "learning_rate": 6.258294943304656e-05, "loss": 1.6236, "step": 7693 }, { "epoch": 0.4288501198372443, "grad_norm": 0.5906192064285278, "learning_rate": 6.257433157145779e-05, "loss": 1.8114, "step": 7694 }, { "epoch": 0.42890585809040743, "grad_norm": 0.589847207069397, "learning_rate": 6.256571331108454e-05, "loss": 1.7796, "step": 7695 }, { "epoch": 0.4289615963435706, "grad_norm": 0.5236275792121887, "learning_rate": 6.25570946522001e-05, "loss": 1.4089, "step": 7696 }, { "epoch": 0.42901733459673375, "grad_norm": 0.5735291838645935, "learning_rate": 6.254847559507783e-05, "loss": 1.8332, "step": 7697 }, { "epoch": 0.4290730728498969, "grad_norm": 0.5835584998130798, "learning_rate": 6.253985613999111e-05, "loss": 1.7905, "step": 7698 }, { "epoch": 0.42912881110306, "grad_norm": 0.5706406831741333, "learning_rate": 6.253123628721324e-05, "loss": 1.7185, "step": 7699 }, { "epoch": 0.4291845493562232, "grad_norm": 0.6053869724273682, "learning_rate": 6.252261603701762e-05, "loss": 1.6092, "step": 7700 }, { "epoch": 0.4292402876093863, "grad_norm": 0.559517502784729, "learning_rate": 6.251399538967764e-05, "loss": 1.6353, "step": 7701 }, { "epoch": 0.42929602586254945, "grad_norm": 0.5170453190803528, "learning_rate": 6.250537434546668e-05, "loss": 1.5933, "step": 7702 }, { "epoch": 0.42935176411571263, "grad_norm": 0.5452066659927368, "learning_rate": 6.249675290465817e-05, "loss": 1.5875, "step": 7703 }, { "epoch": 0.42940750236887576, "grad_norm": 0.5306586623191833, "learning_rate": 6.248813106752551e-05, "loss": 1.4277, "step": 7704 }, { "epoch": 0.4294632406220389, "grad_norm": 0.601926863193512, "learning_rate": 6.247950883434214e-05, "loss": 1.667, "step": 7705 }, { "epoch": 0.4295189788752021, "grad_norm": 0.6103541254997253, "learning_rate": 6.24708862053815e-05, "loss": 1.6387, "step": 7706 }, { "epoch": 0.4295747171283652, "grad_norm": 0.5850464701652527, "learning_rate": 6.246226318091708e-05, "loss": 1.5703, "step": 7707 }, { "epoch": 0.42963045538152833, "grad_norm": 0.564311683177948, "learning_rate": 6.245363976122232e-05, "loss": 1.4084, "step": 7708 }, { "epoch": 0.4296861936346915, "grad_norm": 0.5692956447601318, "learning_rate": 6.244501594657073e-05, "loss": 1.6056, "step": 7709 }, { "epoch": 0.42974193188785464, "grad_norm": 0.48438626527786255, "learning_rate": 6.243639173723577e-05, "loss": 1.3122, "step": 7710 }, { "epoch": 0.4297976701410178, "grad_norm": 0.5293724536895752, "learning_rate": 6.2427767133491e-05, "loss": 1.5922, "step": 7711 }, { "epoch": 0.4298534083941809, "grad_norm": 0.5632352232933044, "learning_rate": 6.241914213560988e-05, "loss": 1.7423, "step": 7712 }, { "epoch": 0.4299091466473441, "grad_norm": 0.5172026753425598, "learning_rate": 6.241051674386602e-05, "loss": 1.4298, "step": 7713 }, { "epoch": 0.4299648849005072, "grad_norm": 0.5803625583648682, "learning_rate": 6.24018909585329e-05, "loss": 1.6772, "step": 7714 }, { "epoch": 0.43002062315367034, "grad_norm": 0.530988335609436, "learning_rate": 6.239326477988413e-05, "loss": 1.7007, "step": 7715 }, { "epoch": 0.43007636140683353, "grad_norm": 0.5132483243942261, "learning_rate": 6.238463820819325e-05, "loss": 1.5829, "step": 7716 }, { "epoch": 0.43013209965999666, "grad_norm": 0.6094499230384827, "learning_rate": 6.237601124373385e-05, "loss": 1.7885, "step": 7717 }, { "epoch": 0.4301878379131598, "grad_norm": 0.5744908452033997, "learning_rate": 6.236738388677952e-05, "loss": 1.7993, "step": 7718 }, { "epoch": 0.43024357616632297, "grad_norm": 0.6198621392250061, "learning_rate": 6.23587561376039e-05, "loss": 1.8437, "step": 7719 }, { "epoch": 0.4302993144194861, "grad_norm": 0.5478682518005371, "learning_rate": 6.235012799648057e-05, "loss": 1.7246, "step": 7720 }, { "epoch": 0.43035505267264923, "grad_norm": 0.5738255381584167, "learning_rate": 6.23414994636832e-05, "loss": 1.7322, "step": 7721 }, { "epoch": 0.43041079092581236, "grad_norm": 0.6019119024276733, "learning_rate": 6.233287053948543e-05, "loss": 1.6743, "step": 7722 }, { "epoch": 0.43046652917897554, "grad_norm": 0.5403818488121033, "learning_rate": 6.23242412241609e-05, "loss": 1.5439, "step": 7723 }, { "epoch": 0.43052226743213867, "grad_norm": 0.5892661213874817, "learning_rate": 6.23156115179833e-05, "loss": 2.0254, "step": 7724 }, { "epoch": 0.4305780056853018, "grad_norm": 0.6273830533027649, "learning_rate": 6.230698142122629e-05, "loss": 1.6787, "step": 7725 }, { "epoch": 0.430633743938465, "grad_norm": 0.5560447573661804, "learning_rate": 6.229835093416361e-05, "loss": 1.711, "step": 7726 }, { "epoch": 0.4306894821916281, "grad_norm": 0.5284225344657898, "learning_rate": 6.228972005706893e-05, "loss": 1.5921, "step": 7727 }, { "epoch": 0.43074522044479124, "grad_norm": 0.5550575852394104, "learning_rate": 6.228108879021599e-05, "loss": 1.5798, "step": 7728 }, { "epoch": 0.43080095869795443, "grad_norm": 0.5931698083877563, "learning_rate": 6.22724571338785e-05, "loss": 2.0899, "step": 7729 }, { "epoch": 0.43085669695111756, "grad_norm": 0.5341006517410278, "learning_rate": 6.226382508833026e-05, "loss": 1.6937, "step": 7730 }, { "epoch": 0.4309124352042807, "grad_norm": 0.5837813019752502, "learning_rate": 6.225519265384495e-05, "loss": 1.7363, "step": 7731 }, { "epoch": 0.43096817345744387, "grad_norm": 0.5665456056594849, "learning_rate": 6.22465598306964e-05, "loss": 1.6438, "step": 7732 }, { "epoch": 0.431023911710607, "grad_norm": 0.7508494257926941, "learning_rate": 6.223792661915838e-05, "loss": 1.6701, "step": 7733 }, { "epoch": 0.43107964996377013, "grad_norm": 0.5742450952529907, "learning_rate": 6.222929301950466e-05, "loss": 1.6195, "step": 7734 }, { "epoch": 0.43113538821693326, "grad_norm": 0.5885428190231323, "learning_rate": 6.222065903200908e-05, "loss": 1.852, "step": 7735 }, { "epoch": 0.43119112647009644, "grad_norm": 0.6054401993751526, "learning_rate": 6.221202465694545e-05, "loss": 1.9739, "step": 7736 }, { "epoch": 0.43124686472325957, "grad_norm": 0.5252482891082764, "learning_rate": 6.22033898945876e-05, "loss": 1.5755, "step": 7737 }, { "epoch": 0.4313026029764227, "grad_norm": 0.5708329677581787, "learning_rate": 6.219475474520936e-05, "loss": 1.7666, "step": 7738 }, { "epoch": 0.4313583412295859, "grad_norm": 0.5406473278999329, "learning_rate": 6.218611920908461e-05, "loss": 1.6721, "step": 7739 }, { "epoch": 0.431414079482749, "grad_norm": 0.5870915055274963, "learning_rate": 6.21774832864872e-05, "loss": 1.635, "step": 7740 }, { "epoch": 0.43146981773591214, "grad_norm": 0.5580663681030273, "learning_rate": 6.216884697769104e-05, "loss": 1.7878, "step": 7741 }, { "epoch": 0.4315255559890753, "grad_norm": 0.6071598529815674, "learning_rate": 6.216021028296999e-05, "loss": 1.817, "step": 7742 }, { "epoch": 0.43158129424223846, "grad_norm": 0.5742529630661011, "learning_rate": 6.215157320259798e-05, "loss": 1.6086, "step": 7743 }, { "epoch": 0.4316370324954016, "grad_norm": 0.5802901387214661, "learning_rate": 6.214293573684889e-05, "loss": 1.7647, "step": 7744 }, { "epoch": 0.4316927707485647, "grad_norm": 0.6176155209541321, "learning_rate": 6.21342978859967e-05, "loss": 2.0043, "step": 7745 }, { "epoch": 0.4317485090017279, "grad_norm": 0.6097760200500488, "learning_rate": 6.212565965031532e-05, "loss": 1.7955, "step": 7746 }, { "epoch": 0.431804247254891, "grad_norm": 0.5612444877624512, "learning_rate": 6.211702103007871e-05, "loss": 1.6242, "step": 7747 }, { "epoch": 0.43185998550805416, "grad_norm": 0.6074878573417664, "learning_rate": 6.210838202556085e-05, "loss": 1.5951, "step": 7748 }, { "epoch": 0.43191572376121734, "grad_norm": 0.5827562808990479, "learning_rate": 6.209974263703569e-05, "loss": 1.849, "step": 7749 }, { "epoch": 0.43197146201438047, "grad_norm": 0.5888208746910095, "learning_rate": 6.209110286477727e-05, "loss": 1.7899, "step": 7750 }, { "epoch": 0.4320272002675436, "grad_norm": 0.5709846019744873, "learning_rate": 6.208246270905952e-05, "loss": 1.8588, "step": 7751 }, { "epoch": 0.4320829385207068, "grad_norm": 0.5687053203582764, "learning_rate": 6.207382217015655e-05, "loss": 1.7115, "step": 7752 }, { "epoch": 0.4321386767738699, "grad_norm": 0.5730668306350708, "learning_rate": 6.206518124834231e-05, "loss": 1.7556, "step": 7753 }, { "epoch": 0.43219441502703304, "grad_norm": 0.48593658208847046, "learning_rate": 6.205653994389087e-05, "loss": 1.4447, "step": 7754 }, { "epoch": 0.4322501532801962, "grad_norm": 0.5364407896995544, "learning_rate": 6.204789825707626e-05, "loss": 1.7097, "step": 7755 }, { "epoch": 0.43230589153335935, "grad_norm": 0.5474497079849243, "learning_rate": 6.203925618817258e-05, "loss": 1.6242, "step": 7756 }, { "epoch": 0.4323616297865225, "grad_norm": 0.5366718173027039, "learning_rate": 6.203061373745388e-05, "loss": 1.6055, "step": 7757 }, { "epoch": 0.4324173680396856, "grad_norm": 0.6138222813606262, "learning_rate": 6.202197090519428e-05, "loss": 1.6537, "step": 7758 }, { "epoch": 0.4324731062928488, "grad_norm": 0.5678575038909912, "learning_rate": 6.201332769166782e-05, "loss": 1.5895, "step": 7759 }, { "epoch": 0.4325288445460119, "grad_norm": 0.5866283178329468, "learning_rate": 6.200468409714866e-05, "loss": 1.6663, "step": 7760 }, { "epoch": 0.43258458279917505, "grad_norm": 0.5652245879173279, "learning_rate": 6.199604012191093e-05, "loss": 1.6446, "step": 7761 }, { "epoch": 0.43264032105233824, "grad_norm": 0.5838261842727661, "learning_rate": 6.198739576622872e-05, "loss": 1.8155, "step": 7762 }, { "epoch": 0.43269605930550137, "grad_norm": 0.537699818611145, "learning_rate": 6.197875103037623e-05, "loss": 1.6124, "step": 7763 }, { "epoch": 0.4327517975586645, "grad_norm": 0.6197475790977478, "learning_rate": 6.197010591462758e-05, "loss": 1.72, "step": 7764 }, { "epoch": 0.4328075358118277, "grad_norm": 0.5581753253936768, "learning_rate": 6.196146041925697e-05, "loss": 1.6948, "step": 7765 }, { "epoch": 0.4328632740649908, "grad_norm": 0.5555060505867004, "learning_rate": 6.195281454453858e-05, "loss": 1.5966, "step": 7766 }, { "epoch": 0.43291901231815394, "grad_norm": 0.5592203140258789, "learning_rate": 6.19441682907466e-05, "loss": 1.8594, "step": 7767 }, { "epoch": 0.43297475057131707, "grad_norm": 0.5492338538169861, "learning_rate": 6.193552165815525e-05, "loss": 1.707, "step": 7768 }, { "epoch": 0.43303048882448025, "grad_norm": 0.5119403600692749, "learning_rate": 6.192687464703873e-05, "loss": 1.3713, "step": 7769 }, { "epoch": 0.4330862270776434, "grad_norm": 0.6076398491859436, "learning_rate": 6.191822725767129e-05, "loss": 1.7667, "step": 7770 }, { "epoch": 0.4331419653308065, "grad_norm": 0.5796701312065125, "learning_rate": 6.190957949032716e-05, "loss": 1.688, "step": 7771 }, { "epoch": 0.4331977035839697, "grad_norm": 0.5363877415657043, "learning_rate": 6.190093134528061e-05, "loss": 1.6081, "step": 7772 }, { "epoch": 0.4332534418371328, "grad_norm": 0.5938536524772644, "learning_rate": 6.189228282280592e-05, "loss": 1.7503, "step": 7773 }, { "epoch": 0.43330918009029595, "grad_norm": 0.5643225312232971, "learning_rate": 6.188363392317734e-05, "loss": 1.7848, "step": 7774 }, { "epoch": 0.43336491834345914, "grad_norm": 0.5852196216583252, "learning_rate": 6.187498464666917e-05, "loss": 1.8112, "step": 7775 }, { "epoch": 0.43342065659662227, "grad_norm": 0.5774117112159729, "learning_rate": 6.186633499355576e-05, "loss": 1.5268, "step": 7776 }, { "epoch": 0.4334763948497854, "grad_norm": 0.5480836033821106, "learning_rate": 6.185768496411135e-05, "loss": 1.6839, "step": 7777 }, { "epoch": 0.4335321331029486, "grad_norm": 0.5210850834846497, "learning_rate": 6.184903455861032e-05, "loss": 1.592, "step": 7778 }, { "epoch": 0.4335878713561117, "grad_norm": 0.532539427280426, "learning_rate": 6.1840383777327e-05, "loss": 1.7992, "step": 7779 }, { "epoch": 0.43364360960927484, "grad_norm": 0.5546075105667114, "learning_rate": 6.183173262053575e-05, "loss": 1.76, "step": 7780 }, { "epoch": 0.43369934786243797, "grad_norm": 0.5634498000144958, "learning_rate": 6.182308108851091e-05, "loss": 1.5548, "step": 7781 }, { "epoch": 0.43375508611560115, "grad_norm": 0.5091983079910278, "learning_rate": 6.18144291815269e-05, "loss": 1.4981, "step": 7782 }, { "epoch": 0.4338108243687643, "grad_norm": 0.550807535648346, "learning_rate": 6.180577689985805e-05, "loss": 1.6661, "step": 7783 }, { "epoch": 0.4338665626219274, "grad_norm": 0.5441664457321167, "learning_rate": 6.179712424377879e-05, "loss": 1.6262, "step": 7784 }, { "epoch": 0.4339223008750906, "grad_norm": 0.620506227016449, "learning_rate": 6.178847121356353e-05, "loss": 1.9091, "step": 7785 }, { "epoch": 0.4339780391282537, "grad_norm": 0.6028100252151489, "learning_rate": 6.17798178094867e-05, "loss": 1.7357, "step": 7786 }, { "epoch": 0.43403377738141685, "grad_norm": 0.549159049987793, "learning_rate": 6.177116403182274e-05, "loss": 1.6313, "step": 7787 }, { "epoch": 0.43408951563458004, "grad_norm": 0.5400141477584839, "learning_rate": 6.176250988084608e-05, "loss": 1.605, "step": 7788 }, { "epoch": 0.43414525388774317, "grad_norm": 0.5363699793815613, "learning_rate": 6.17538553568312e-05, "loss": 1.5072, "step": 7789 }, { "epoch": 0.4342009921409063, "grad_norm": 0.5816105604171753, "learning_rate": 6.174520046005253e-05, "loss": 1.769, "step": 7790 }, { "epoch": 0.4342567303940694, "grad_norm": 0.5653383731842041, "learning_rate": 6.17365451907846e-05, "loss": 1.6427, "step": 7791 }, { "epoch": 0.4343124686472326, "grad_norm": 0.5933492183685303, "learning_rate": 6.172788954930188e-05, "loss": 1.7614, "step": 7792 }, { "epoch": 0.43436820690039574, "grad_norm": 0.5355760455131531, "learning_rate": 6.171923353587888e-05, "loss": 1.7932, "step": 7793 }, { "epoch": 0.43442394515355887, "grad_norm": 0.5630636811256409, "learning_rate": 6.171057715079012e-05, "loss": 1.5032, "step": 7794 }, { "epoch": 0.43447968340672205, "grad_norm": 0.5832585692405701, "learning_rate": 6.170192039431013e-05, "loss": 1.7822, "step": 7795 }, { "epoch": 0.4345354216598852, "grad_norm": 0.4809796214103699, "learning_rate": 6.169326326671346e-05, "loss": 1.4389, "step": 7796 }, { "epoch": 0.4345911599130483, "grad_norm": 0.5459611415863037, "learning_rate": 6.168460576827465e-05, "loss": 1.6287, "step": 7797 }, { "epoch": 0.4346468981662115, "grad_norm": 0.5732072591781616, "learning_rate": 6.167594789926827e-05, "loss": 1.9769, "step": 7798 }, { "epoch": 0.4347026364193746, "grad_norm": 0.5578893423080444, "learning_rate": 6.16672896599689e-05, "loss": 1.8077, "step": 7799 }, { "epoch": 0.43475837467253775, "grad_norm": 0.5882522463798523, "learning_rate": 6.165863105065113e-05, "loss": 1.7451, "step": 7800 }, { "epoch": 0.43481411292570094, "grad_norm": 0.6155940890312195, "learning_rate": 6.164997207158954e-05, "loss": 1.809, "step": 7801 }, { "epoch": 0.43486985117886406, "grad_norm": 0.5675914883613586, "learning_rate": 6.164131272305878e-05, "loss": 1.7839, "step": 7802 }, { "epoch": 0.4349255894320272, "grad_norm": 0.5673891305923462, "learning_rate": 6.163265300533345e-05, "loss": 1.6121, "step": 7803 }, { "epoch": 0.4349813276851903, "grad_norm": 0.5579030513763428, "learning_rate": 6.162399291868819e-05, "loss": 1.7024, "step": 7804 }, { "epoch": 0.4350370659383535, "grad_norm": 0.5674803256988525, "learning_rate": 6.161533246339764e-05, "loss": 1.702, "step": 7805 }, { "epoch": 0.43509280419151664, "grad_norm": 0.5546411275863647, "learning_rate": 6.160667163973648e-05, "loss": 1.7928, "step": 7806 }, { "epoch": 0.43514854244467976, "grad_norm": 0.6025899648666382, "learning_rate": 6.159801044797936e-05, "loss": 1.7094, "step": 7807 }, { "epoch": 0.43520428069784295, "grad_norm": 0.5264720916748047, "learning_rate": 6.158934888840095e-05, "loss": 1.4788, "step": 7808 }, { "epoch": 0.4352600189510061, "grad_norm": 0.6098587512969971, "learning_rate": 6.158068696127601e-05, "loss": 1.789, "step": 7809 }, { "epoch": 0.4353157572041692, "grad_norm": 0.5427471995353699, "learning_rate": 6.157202466687916e-05, "loss": 1.7309, "step": 7810 }, { "epoch": 0.4353714954573324, "grad_norm": 0.5572206974029541, "learning_rate": 6.156336200548517e-05, "loss": 1.7018, "step": 7811 }, { "epoch": 0.4354272337104955, "grad_norm": 0.5554936528205872, "learning_rate": 6.155469897736874e-05, "loss": 1.6621, "step": 7812 }, { "epoch": 0.43548297196365865, "grad_norm": 0.5617427229881287, "learning_rate": 6.154603558280466e-05, "loss": 1.7123, "step": 7813 }, { "epoch": 0.4355387102168218, "grad_norm": 0.572582483291626, "learning_rate": 6.153737182206762e-05, "loss": 1.7392, "step": 7814 }, { "epoch": 0.43559444846998496, "grad_norm": 0.5278533101081848, "learning_rate": 6.152870769543245e-05, "loss": 1.5766, "step": 7815 }, { "epoch": 0.4356501867231481, "grad_norm": 0.5663198232650757, "learning_rate": 6.152004320317385e-05, "loss": 1.6999, "step": 7816 }, { "epoch": 0.4357059249763112, "grad_norm": 0.5262326598167419, "learning_rate": 6.151137834556666e-05, "loss": 1.569, "step": 7817 }, { "epoch": 0.4357616632294744, "grad_norm": 0.6140465140342712, "learning_rate": 6.150271312288566e-05, "loss": 1.9939, "step": 7818 }, { "epoch": 0.43581740148263753, "grad_norm": 0.5997401475906372, "learning_rate": 6.149404753540567e-05, "loss": 1.7254, "step": 7819 }, { "epoch": 0.43587313973580066, "grad_norm": 0.6437683701515198, "learning_rate": 6.14853815834015e-05, "loss": 2.0098, "step": 7820 }, { "epoch": 0.43592887798896385, "grad_norm": 0.6912010312080383, "learning_rate": 6.1476715267148e-05, "loss": 2.1957, "step": 7821 }, { "epoch": 0.435984616242127, "grad_norm": 0.5197498202323914, "learning_rate": 6.146804858692001e-05, "loss": 1.614, "step": 7822 }, { "epoch": 0.4360403544952901, "grad_norm": 0.5308524370193481, "learning_rate": 6.145938154299237e-05, "loss": 1.5681, "step": 7823 }, { "epoch": 0.4360960927484533, "grad_norm": 0.5914180278778076, "learning_rate": 6.145071413563996e-05, "loss": 1.8961, "step": 7824 }, { "epoch": 0.4361518310016164, "grad_norm": 0.583292543888092, "learning_rate": 6.144204636513767e-05, "loss": 1.7469, "step": 7825 }, { "epoch": 0.43620756925477955, "grad_norm": 0.6572228074073792, "learning_rate": 6.143337823176038e-05, "loss": 1.8796, "step": 7826 }, { "epoch": 0.4362633075079427, "grad_norm": 0.5719166994094849, "learning_rate": 6.142470973578299e-05, "loss": 1.8995, "step": 7827 }, { "epoch": 0.43631904576110586, "grad_norm": 0.561431348323822, "learning_rate": 6.141604087748043e-05, "loss": 1.544, "step": 7828 }, { "epoch": 0.436374784014269, "grad_norm": 0.5519416928291321, "learning_rate": 6.14073716571276e-05, "loss": 1.7948, "step": 7829 }, { "epoch": 0.4364305222674321, "grad_norm": 0.5517488718032837, "learning_rate": 6.139870207499945e-05, "loss": 1.6391, "step": 7830 }, { "epoch": 0.4364862605205953, "grad_norm": 0.5172828435897827, "learning_rate": 6.139003213137092e-05, "loss": 1.7099, "step": 7831 }, { "epoch": 0.43654199877375843, "grad_norm": 0.5379384756088257, "learning_rate": 6.1381361826517e-05, "loss": 1.5748, "step": 7832 }, { "epoch": 0.43659773702692156, "grad_norm": 0.5668090581893921, "learning_rate": 6.137269116071263e-05, "loss": 1.6389, "step": 7833 }, { "epoch": 0.43665347528008475, "grad_norm": 0.5936790704727173, "learning_rate": 6.13640201342328e-05, "loss": 1.7916, "step": 7834 }, { "epoch": 0.4367092135332479, "grad_norm": 0.5564102530479431, "learning_rate": 6.135534874735253e-05, "loss": 1.6772, "step": 7835 }, { "epoch": 0.436764951786411, "grad_norm": 0.6297538876533508, "learning_rate": 6.134667700034678e-05, "loss": 1.6905, "step": 7836 }, { "epoch": 0.43682069003957413, "grad_norm": 0.5488330125808716, "learning_rate": 6.13380048934906e-05, "loss": 1.4808, "step": 7837 }, { "epoch": 0.4368764282927373, "grad_norm": 0.5490309000015259, "learning_rate": 6.132933242705899e-05, "loss": 1.4744, "step": 7838 }, { "epoch": 0.43693216654590045, "grad_norm": 0.5560508370399475, "learning_rate": 6.132065960132705e-05, "loss": 1.5957, "step": 7839 }, { "epoch": 0.4369879047990636, "grad_norm": 0.6161486506462097, "learning_rate": 6.131198641656976e-05, "loss": 1.7756, "step": 7840 }, { "epoch": 0.43704364305222676, "grad_norm": 0.5948550701141357, "learning_rate": 6.130331287306224e-05, "loss": 1.8239, "step": 7841 }, { "epoch": 0.4370993813053899, "grad_norm": 0.5820697546005249, "learning_rate": 6.129463897107951e-05, "loss": 1.5446, "step": 7842 }, { "epoch": 0.437155119558553, "grad_norm": 0.5708462595939636, "learning_rate": 6.128596471089669e-05, "loss": 1.7479, "step": 7843 }, { "epoch": 0.4372108578117162, "grad_norm": 0.543056309223175, "learning_rate": 6.127729009278889e-05, "loss": 1.5951, "step": 7844 }, { "epoch": 0.43726659606487933, "grad_norm": 0.5421169400215149, "learning_rate": 6.126861511703119e-05, "loss": 1.7609, "step": 7845 }, { "epoch": 0.43732233431804246, "grad_norm": 0.5461887121200562, "learning_rate": 6.125993978389871e-05, "loss": 1.559, "step": 7846 }, { "epoch": 0.43737807257120564, "grad_norm": 0.5687921643257141, "learning_rate": 6.12512640936666e-05, "loss": 1.8498, "step": 7847 }, { "epoch": 0.4374338108243688, "grad_norm": 0.571535050868988, "learning_rate": 6.124258804660999e-05, "loss": 1.6316, "step": 7848 }, { "epoch": 0.4374895490775319, "grad_norm": 0.5363306999206543, "learning_rate": 6.123391164300404e-05, "loss": 1.5648, "step": 7849 }, { "epoch": 0.43754528733069503, "grad_norm": 0.5810931324958801, "learning_rate": 6.12252348831239e-05, "loss": 1.6624, "step": 7850 }, { "epoch": 0.4376010255838582, "grad_norm": 0.54121994972229, "learning_rate": 6.121655776724475e-05, "loss": 1.617, "step": 7851 }, { "epoch": 0.43765676383702135, "grad_norm": 0.54410719871521, "learning_rate": 6.120788029564181e-05, "loss": 1.6805, "step": 7852 }, { "epoch": 0.4377125020901845, "grad_norm": 0.5891941785812378, "learning_rate": 6.119920246859024e-05, "loss": 1.51, "step": 7853 }, { "epoch": 0.43776824034334766, "grad_norm": 0.625268280506134, "learning_rate": 6.119052428636529e-05, "loss": 1.9405, "step": 7854 }, { "epoch": 0.4378239785965108, "grad_norm": 0.5463603138923645, "learning_rate": 6.118184574924212e-05, "loss": 1.6922, "step": 7855 }, { "epoch": 0.4378797168496739, "grad_norm": 0.6116244196891785, "learning_rate": 6.1173166857496e-05, "loss": 1.7829, "step": 7856 }, { "epoch": 0.4379354551028371, "grad_norm": 0.60081547498703, "learning_rate": 6.116448761140218e-05, "loss": 1.9078, "step": 7857 }, { "epoch": 0.43799119335600023, "grad_norm": 0.5881320238113403, "learning_rate": 6.11558080112359e-05, "loss": 1.4085, "step": 7858 }, { "epoch": 0.43804693160916336, "grad_norm": 0.5768188238143921, "learning_rate": 6.114712805727244e-05, "loss": 1.8526, "step": 7859 }, { "epoch": 0.4381026698623265, "grad_norm": 0.530643105506897, "learning_rate": 6.113844774978706e-05, "loss": 1.6052, "step": 7860 }, { "epoch": 0.4381584081154897, "grad_norm": 0.5398595929145813, "learning_rate": 6.112976708905508e-05, "loss": 1.7706, "step": 7861 }, { "epoch": 0.4382141463686528, "grad_norm": 0.5204975008964539, "learning_rate": 6.112108607535176e-05, "loss": 1.6883, "step": 7862 }, { "epoch": 0.43826988462181593, "grad_norm": 0.7956941723823547, "learning_rate": 6.111240470895245e-05, "loss": 1.4164, "step": 7863 }, { "epoch": 0.4383256228749791, "grad_norm": 0.5599929094314575, "learning_rate": 6.110372299013243e-05, "loss": 1.7575, "step": 7864 }, { "epoch": 0.43838136112814224, "grad_norm": 0.5534434914588928, "learning_rate": 6.109504091916707e-05, "loss": 1.825, "step": 7865 }, { "epoch": 0.4384370993813054, "grad_norm": 0.5528411269187927, "learning_rate": 6.108635849633169e-05, "loss": 1.5657, "step": 7866 }, { "epoch": 0.43849283763446856, "grad_norm": 0.5750871300697327, "learning_rate": 6.107767572190168e-05, "loss": 2.019, "step": 7867 }, { "epoch": 0.4385485758876317, "grad_norm": 0.5783527493476868, "learning_rate": 6.106899259615236e-05, "loss": 1.5383, "step": 7868 }, { "epoch": 0.4386043141407948, "grad_norm": 0.5577226877212524, "learning_rate": 6.106030911935913e-05, "loss": 1.8226, "step": 7869 }, { "epoch": 0.438660052393958, "grad_norm": 0.5514130592346191, "learning_rate": 6.105162529179738e-05, "loss": 1.8757, "step": 7870 }, { "epoch": 0.43871579064712113, "grad_norm": 0.5459834337234497, "learning_rate": 6.104294111374252e-05, "loss": 1.6836, "step": 7871 }, { "epoch": 0.43877152890028426, "grad_norm": 0.5836615562438965, "learning_rate": 6.103425658546995e-05, "loss": 1.7928, "step": 7872 }, { "epoch": 0.4388272671534474, "grad_norm": 0.552156925201416, "learning_rate": 6.1025571707255104e-05, "loss": 1.7313, "step": 7873 }, { "epoch": 0.43888300540661057, "grad_norm": 0.5519532561302185, "learning_rate": 6.10168864793734e-05, "loss": 1.7947, "step": 7874 }, { "epoch": 0.4389387436597737, "grad_norm": 0.5163867473602295, "learning_rate": 6.100820090210028e-05, "loss": 1.5192, "step": 7875 }, { "epoch": 0.43899448191293683, "grad_norm": 0.5566312074661255, "learning_rate": 6.099951497571123e-05, "loss": 1.5993, "step": 7876 }, { "epoch": 0.4390502201661, "grad_norm": 0.5464503765106201, "learning_rate": 6.099082870048168e-05, "loss": 1.8421, "step": 7877 }, { "epoch": 0.43910595841926314, "grad_norm": 0.5337437987327576, "learning_rate": 6.098214207668713e-05, "loss": 1.5466, "step": 7878 }, { "epoch": 0.43916169667242627, "grad_norm": 0.6034952402114868, "learning_rate": 6.097345510460307e-05, "loss": 1.8151, "step": 7879 }, { "epoch": 0.43921743492558946, "grad_norm": 0.5526003241539001, "learning_rate": 6.0964767784504995e-05, "loss": 1.6425, "step": 7880 }, { "epoch": 0.4392731731787526, "grad_norm": 0.575605571269989, "learning_rate": 6.09560801166684e-05, "loss": 1.7276, "step": 7881 }, { "epoch": 0.4393289114319157, "grad_norm": 0.6006867289543152, "learning_rate": 6.094739210136883e-05, "loss": 1.7726, "step": 7882 }, { "epoch": 0.43938464968507884, "grad_norm": 0.5347257852554321, "learning_rate": 6.093870373888181e-05, "loss": 1.6228, "step": 7883 }, { "epoch": 0.439440387938242, "grad_norm": 0.5642088651657104, "learning_rate": 6.093001502948289e-05, "loss": 1.7197, "step": 7884 }, { "epoch": 0.43949612619140516, "grad_norm": 0.5518479943275452, "learning_rate": 6.0921325973447604e-05, "loss": 1.5778, "step": 7885 }, { "epoch": 0.4395518644445683, "grad_norm": 0.6168820261955261, "learning_rate": 6.091263657105155e-05, "loss": 1.7891, "step": 7886 }, { "epoch": 0.43960760269773147, "grad_norm": 0.5440758466720581, "learning_rate": 6.090394682257029e-05, "loss": 1.5781, "step": 7887 }, { "epoch": 0.4396633409508946, "grad_norm": 0.5412326455116272, "learning_rate": 6.08952567282794e-05, "loss": 1.683, "step": 7888 }, { "epoch": 0.43971907920405773, "grad_norm": 0.563556969165802, "learning_rate": 6.0886566288454496e-05, "loss": 1.5673, "step": 7889 }, { "epoch": 0.4397748174572209, "grad_norm": 0.5224372148513794, "learning_rate": 6.0877875503371176e-05, "loss": 1.7352, "step": 7890 }, { "epoch": 0.43983055571038404, "grad_norm": 0.5953571796417236, "learning_rate": 6.086918437330508e-05, "loss": 1.7736, "step": 7891 }, { "epoch": 0.43988629396354717, "grad_norm": 0.5646018385887146, "learning_rate": 6.086049289853182e-05, "loss": 1.7542, "step": 7892 }, { "epoch": 0.43994203221671035, "grad_norm": 0.6011926531791687, "learning_rate": 6.0851801079327056e-05, "loss": 1.7245, "step": 7893 }, { "epoch": 0.4399977704698735, "grad_norm": 0.4823513627052307, "learning_rate": 6.0843108915966415e-05, "loss": 1.4047, "step": 7894 }, { "epoch": 0.4400535087230366, "grad_norm": 0.6140894889831543, "learning_rate": 6.083441640872558e-05, "loss": 2.0188, "step": 7895 }, { "epoch": 0.44010924697619974, "grad_norm": 0.5411475896835327, "learning_rate": 6.082572355788023e-05, "loss": 1.5408, "step": 7896 }, { "epoch": 0.4401649852293629, "grad_norm": 0.6488401293754578, "learning_rate": 6.081703036370606e-05, "loss": 2.0136, "step": 7897 }, { "epoch": 0.44022072348252606, "grad_norm": 0.7427087426185608, "learning_rate": 6.080833682647874e-05, "loss": 1.6615, "step": 7898 }, { "epoch": 0.4402764617356892, "grad_norm": 0.6195456385612488, "learning_rate": 6.0799642946473986e-05, "loss": 1.5859, "step": 7899 }, { "epoch": 0.44033219998885237, "grad_norm": 0.5988082885742188, "learning_rate": 6.079094872396754e-05, "loss": 1.7462, "step": 7900 }, { "epoch": 0.4403879382420155, "grad_norm": 0.6001728177070618, "learning_rate": 6.0782254159235116e-05, "loss": 1.736, "step": 7901 }, { "epoch": 0.4404436764951786, "grad_norm": 0.5472791790962219, "learning_rate": 6.0773559252552446e-05, "loss": 1.372, "step": 7902 }, { "epoch": 0.4404994147483418, "grad_norm": 0.5791669487953186, "learning_rate": 6.0764864004195286e-05, "loss": 1.7732, "step": 7903 }, { "epoch": 0.44055515300150494, "grad_norm": 0.5353814363479614, "learning_rate": 6.075616841443943e-05, "loss": 1.8002, "step": 7904 }, { "epoch": 0.44061089125466807, "grad_norm": 0.5734871029853821, "learning_rate": 6.07474724835606e-05, "loss": 1.7832, "step": 7905 }, { "epoch": 0.4406666295078312, "grad_norm": 0.6158138513565063, "learning_rate": 6.0738776211834615e-05, "loss": 1.9006, "step": 7906 }, { "epoch": 0.4407223677609944, "grad_norm": 0.5585591793060303, "learning_rate": 6.073007959953726e-05, "loss": 1.8046, "step": 7907 }, { "epoch": 0.4407781060141575, "grad_norm": 0.5921459794044495, "learning_rate": 6.0721382646944326e-05, "loss": 1.8318, "step": 7908 }, { "epoch": 0.44083384426732064, "grad_norm": 0.5314304828643799, "learning_rate": 6.0712685354331654e-05, "loss": 1.4663, "step": 7909 }, { "epoch": 0.4408895825204838, "grad_norm": 0.5642038583755493, "learning_rate": 6.0703987721975076e-05, "loss": 1.6231, "step": 7910 }, { "epoch": 0.44094532077364695, "grad_norm": 0.598506510257721, "learning_rate": 6.0695289750150394e-05, "loss": 1.6668, "step": 7911 }, { "epoch": 0.4410010590268101, "grad_norm": 0.5824127197265625, "learning_rate": 6.068659143913349e-05, "loss": 1.7711, "step": 7912 }, { "epoch": 0.44105679727997327, "grad_norm": 0.5553746223449707, "learning_rate": 6.0677892789200216e-05, "loss": 1.7025, "step": 7913 }, { "epoch": 0.4411125355331364, "grad_norm": 0.5868836641311646, "learning_rate": 6.066919380062643e-05, "loss": 1.7495, "step": 7914 }, { "epoch": 0.4411682737862995, "grad_norm": 0.5977121591567993, "learning_rate": 6.066049447368802e-05, "loss": 1.5988, "step": 7915 }, { "epoch": 0.4412240120394627, "grad_norm": 0.6062576770782471, "learning_rate": 6.065179480866089e-05, "loss": 1.7006, "step": 7916 }, { "epoch": 0.44127975029262584, "grad_norm": 0.5636418461799622, "learning_rate": 6.064309480582093e-05, "loss": 1.6275, "step": 7917 }, { "epoch": 0.44133548854578897, "grad_norm": 0.5832415223121643, "learning_rate": 6.0634394465444056e-05, "loss": 1.8278, "step": 7918 }, { "epoch": 0.4413912267989521, "grad_norm": 0.5471083521842957, "learning_rate": 6.062569378780621e-05, "loss": 1.724, "step": 7919 }, { "epoch": 0.4414469650521153, "grad_norm": 0.5676271915435791, "learning_rate": 6.061699277318328e-05, "loss": 1.706, "step": 7920 }, { "epoch": 0.4415027033052784, "grad_norm": 0.5920431613922119, "learning_rate": 6.060829142185125e-05, "loss": 1.7118, "step": 7921 }, { "epoch": 0.44155844155844154, "grad_norm": 0.6104030609130859, "learning_rate": 6.059958973408607e-05, "loss": 1.908, "step": 7922 }, { "epoch": 0.4416141798116047, "grad_norm": 0.5903329849243164, "learning_rate": 6.05908877101637e-05, "loss": 1.7077, "step": 7923 }, { "epoch": 0.44166991806476785, "grad_norm": 0.5489821434020996, "learning_rate": 6.058218535036013e-05, "loss": 1.6519, "step": 7924 }, { "epoch": 0.441725656317931, "grad_norm": 0.5121790170669556, "learning_rate": 6.057348265495133e-05, "loss": 1.4665, "step": 7925 }, { "epoch": 0.44178139457109417, "grad_norm": 0.5221953392028809, "learning_rate": 6.0564779624213316e-05, "loss": 1.6157, "step": 7926 }, { "epoch": 0.4418371328242573, "grad_norm": 0.5600380897521973, "learning_rate": 6.055607625842208e-05, "loss": 1.5828, "step": 7927 }, { "epoch": 0.4418928710774204, "grad_norm": 0.5320744514465332, "learning_rate": 6.0547372557853655e-05, "loss": 1.6772, "step": 7928 }, { "epoch": 0.44194860933058355, "grad_norm": 0.5403137803077698, "learning_rate": 6.053866852278406e-05, "loss": 1.7394, "step": 7929 }, { "epoch": 0.44200434758374674, "grad_norm": 0.591922402381897, "learning_rate": 6.052996415348936e-05, "loss": 1.8231, "step": 7930 }, { "epoch": 0.44206008583690987, "grad_norm": 0.5516440868377686, "learning_rate": 6.052125945024558e-05, "loss": 1.6415, "step": 7931 }, { "epoch": 0.442115824090073, "grad_norm": 0.5129381418228149, "learning_rate": 6.05125544133288e-05, "loss": 1.5515, "step": 7932 }, { "epoch": 0.4421715623432362, "grad_norm": 0.5778689980506897, "learning_rate": 6.050384904301508e-05, "loss": 1.7348, "step": 7933 }, { "epoch": 0.4422273005963993, "grad_norm": 0.5508379340171814, "learning_rate": 6.049514333958052e-05, "loss": 1.6601, "step": 7934 }, { "epoch": 0.44228303884956244, "grad_norm": 0.5481617450714111, "learning_rate": 6.048643730330119e-05, "loss": 1.5493, "step": 7935 }, { "epoch": 0.4423387771027256, "grad_norm": 0.5237631797790527, "learning_rate": 6.0477730934453226e-05, "loss": 1.5092, "step": 7936 }, { "epoch": 0.44239451535588875, "grad_norm": 0.5657276511192322, "learning_rate": 6.046902423331271e-05, "loss": 1.4483, "step": 7937 }, { "epoch": 0.4424502536090519, "grad_norm": 0.5502325892448425, "learning_rate": 6.046031720015579e-05, "loss": 1.6987, "step": 7938 }, { "epoch": 0.44250599186221506, "grad_norm": 0.6082862615585327, "learning_rate": 6.045160983525859e-05, "loss": 1.8988, "step": 7939 }, { "epoch": 0.4425617301153782, "grad_norm": 0.5569537878036499, "learning_rate": 6.044290213889727e-05, "loss": 1.696, "step": 7940 }, { "epoch": 0.4426174683685413, "grad_norm": 0.518162190914154, "learning_rate": 6.0434194111347985e-05, "loss": 1.5279, "step": 7941 }, { "epoch": 0.44267320662170445, "grad_norm": 0.5695126056671143, "learning_rate": 6.042548575288689e-05, "loss": 1.7109, "step": 7942 }, { "epoch": 0.44272894487486764, "grad_norm": 0.49009808897972107, "learning_rate": 6.0416777063790184e-05, "loss": 1.4709, "step": 7943 }, { "epoch": 0.44278468312803076, "grad_norm": 0.5802407264709473, "learning_rate": 6.040806804433403e-05, "loss": 1.6943, "step": 7944 }, { "epoch": 0.4428404213811939, "grad_norm": 0.5507357716560364, "learning_rate": 6.0399358694794647e-05, "loss": 1.3918, "step": 7945 }, { "epoch": 0.4428961596343571, "grad_norm": 0.5855342745780945, "learning_rate": 6.039064901544824e-05, "loss": 1.8103, "step": 7946 }, { "epoch": 0.4429518978875202, "grad_norm": 0.5658082365989685, "learning_rate": 6.038193900657102e-05, "loss": 1.7597, "step": 7947 }, { "epoch": 0.44300763614068334, "grad_norm": 0.5863122344017029, "learning_rate": 6.037322866843923e-05, "loss": 1.7671, "step": 7948 }, { "epoch": 0.4430633743938465, "grad_norm": 0.5610207915306091, "learning_rate": 6.036451800132912e-05, "loss": 1.7487, "step": 7949 }, { "epoch": 0.44311911264700965, "grad_norm": 0.5848312377929688, "learning_rate": 6.03558070055169e-05, "loss": 1.7112, "step": 7950 }, { "epoch": 0.4431748509001728, "grad_norm": 0.5728501081466675, "learning_rate": 6.0347095681278876e-05, "loss": 1.7736, "step": 7951 }, { "epoch": 0.4432305891533359, "grad_norm": 0.5987431406974792, "learning_rate": 6.033838402889131e-05, "loss": 1.7693, "step": 7952 }, { "epoch": 0.4432863274064991, "grad_norm": 0.5747002959251404, "learning_rate": 6.032967204863048e-05, "loss": 1.6216, "step": 7953 }, { "epoch": 0.4433420656596622, "grad_norm": 0.5476230382919312, "learning_rate": 6.0320959740772666e-05, "loss": 1.7631, "step": 7954 }, { "epoch": 0.44339780391282535, "grad_norm": 0.5305277109146118, "learning_rate": 6.031224710559419e-05, "loss": 1.6809, "step": 7955 }, { "epoch": 0.44345354216598853, "grad_norm": 0.5442744493484497, "learning_rate": 6.0303534143371374e-05, "loss": 1.5357, "step": 7956 }, { "epoch": 0.44350928041915166, "grad_norm": 0.5553621053695679, "learning_rate": 6.029482085438051e-05, "loss": 1.6955, "step": 7957 }, { "epoch": 0.4435650186723148, "grad_norm": 0.5430163741111755, "learning_rate": 6.028610723889797e-05, "loss": 1.762, "step": 7958 }, { "epoch": 0.443620756925478, "grad_norm": 0.5217944979667664, "learning_rate": 6.027739329720006e-05, "loss": 1.4594, "step": 7959 }, { "epoch": 0.4436764951786411, "grad_norm": 0.5763014554977417, "learning_rate": 6.026867902956317e-05, "loss": 1.7942, "step": 7960 }, { "epoch": 0.44373223343180423, "grad_norm": 0.533718466758728, "learning_rate": 6.025996443626364e-05, "loss": 1.6659, "step": 7961 }, { "epoch": 0.4437879716849674, "grad_norm": 0.5921129584312439, "learning_rate": 6.0251249517577854e-05, "loss": 1.9042, "step": 7962 }, { "epoch": 0.44384370993813055, "grad_norm": 0.5379483103752136, "learning_rate": 6.024253427378222e-05, "loss": 1.6772, "step": 7963 }, { "epoch": 0.4438994481912937, "grad_norm": 0.5350393652915955, "learning_rate": 6.0233818705153114e-05, "loss": 1.5868, "step": 7964 }, { "epoch": 0.4439551864444568, "grad_norm": 0.5462901592254639, "learning_rate": 6.022510281196695e-05, "loss": 1.6118, "step": 7965 }, { "epoch": 0.44401092469762, "grad_norm": 0.5518479943275452, "learning_rate": 6.021638659450013e-05, "loss": 1.4902, "step": 7966 }, { "epoch": 0.4440666629507831, "grad_norm": 0.5284306406974792, "learning_rate": 6.020767005302909e-05, "loss": 1.5573, "step": 7967 }, { "epoch": 0.44412240120394625, "grad_norm": 0.6189160346984863, "learning_rate": 6.0198953187830277e-05, "loss": 1.9599, "step": 7968 }, { "epoch": 0.44417813945710943, "grad_norm": 0.5723422765731812, "learning_rate": 6.019023599918014e-05, "loss": 1.7111, "step": 7969 }, { "epoch": 0.44423387771027256, "grad_norm": 0.5545480251312256, "learning_rate": 6.018151848735511e-05, "loss": 1.6214, "step": 7970 }, { "epoch": 0.4442896159634357, "grad_norm": 0.5693395733833313, "learning_rate": 6.01728006526317e-05, "loss": 1.8074, "step": 7971 }, { "epoch": 0.4443453542165989, "grad_norm": 0.5313411951065063, "learning_rate": 6.0164082495286354e-05, "loss": 1.6405, "step": 7972 }, { "epoch": 0.444401092469762, "grad_norm": 0.5680732727050781, "learning_rate": 6.015536401559556e-05, "loss": 1.4973, "step": 7973 }, { "epoch": 0.44445683072292513, "grad_norm": 0.6219733357429504, "learning_rate": 6.014664521383584e-05, "loss": 1.8733, "step": 7974 }, { "epoch": 0.44451256897608826, "grad_norm": 0.5903530716896057, "learning_rate": 6.0137926090283694e-05, "loss": 1.6334, "step": 7975 }, { "epoch": 0.44456830722925145, "grad_norm": 0.6504166722297668, "learning_rate": 6.0129206645215655e-05, "loss": 1.7995, "step": 7976 }, { "epoch": 0.4446240454824146, "grad_norm": 0.6121776103973389, "learning_rate": 6.012048687890821e-05, "loss": 1.8132, "step": 7977 }, { "epoch": 0.4446797837355777, "grad_norm": 0.6290067434310913, "learning_rate": 6.011176679163796e-05, "loss": 1.9482, "step": 7978 }, { "epoch": 0.4447355219887409, "grad_norm": 0.6563844084739685, "learning_rate": 6.010304638368139e-05, "loss": 1.8485, "step": 7979 }, { "epoch": 0.444791260241904, "grad_norm": 0.5514439940452576, "learning_rate": 6.009432565531511e-05, "loss": 1.6343, "step": 7980 }, { "epoch": 0.44484699849506715, "grad_norm": 0.5945736765861511, "learning_rate": 6.008560460681567e-05, "loss": 1.721, "step": 7981 }, { "epoch": 0.44490273674823033, "grad_norm": 0.5428782105445862, "learning_rate": 6.007688323845966e-05, "loss": 1.5152, "step": 7982 }, { "epoch": 0.44495847500139346, "grad_norm": 0.5975694060325623, "learning_rate": 6.006816155052366e-05, "loss": 1.7975, "step": 7983 }, { "epoch": 0.4450142132545566, "grad_norm": 0.5683627724647522, "learning_rate": 6.005943954328429e-05, "loss": 1.7401, "step": 7984 }, { "epoch": 0.4450699515077198, "grad_norm": 0.552085280418396, "learning_rate": 6.005071721701814e-05, "loss": 1.5525, "step": 7985 }, { "epoch": 0.4451256897608829, "grad_norm": 0.5957344770431519, "learning_rate": 6.004199457200184e-05, "loss": 1.8248, "step": 7986 }, { "epoch": 0.44518142801404603, "grad_norm": 0.5816213488578796, "learning_rate": 6.003327160851201e-05, "loss": 1.5985, "step": 7987 }, { "epoch": 0.44523716626720916, "grad_norm": 0.5090708136558533, "learning_rate": 6.002454832682532e-05, "loss": 1.4312, "step": 7988 }, { "epoch": 0.44529290452037235, "grad_norm": 0.5570594668388367, "learning_rate": 6.00158247272184e-05, "loss": 1.6288, "step": 7989 }, { "epoch": 0.4453486427735355, "grad_norm": 0.4970921576023102, "learning_rate": 6.00071008099679e-05, "loss": 1.2663, "step": 7990 }, { "epoch": 0.4454043810266986, "grad_norm": 0.5791414976119995, "learning_rate": 5.999837657535052e-05, "loss": 1.8037, "step": 7991 }, { "epoch": 0.4454601192798618, "grad_norm": 0.5636151432991028, "learning_rate": 5.998965202364294e-05, "loss": 1.6298, "step": 7992 }, { "epoch": 0.4455158575330249, "grad_norm": 0.5829344987869263, "learning_rate": 5.998092715512183e-05, "loss": 1.6349, "step": 7993 }, { "epoch": 0.44557159578618805, "grad_norm": 0.556348979473114, "learning_rate": 5.9972201970063904e-05, "loss": 1.5642, "step": 7994 }, { "epoch": 0.44562733403935123, "grad_norm": 0.5365790724754333, "learning_rate": 5.996347646874587e-05, "loss": 1.6421, "step": 7995 }, { "epoch": 0.44568307229251436, "grad_norm": 0.576501190662384, "learning_rate": 5.9954750651444455e-05, "loss": 1.6171, "step": 7996 }, { "epoch": 0.4457388105456775, "grad_norm": 0.5861379504203796, "learning_rate": 5.9946024518436406e-05, "loss": 1.6702, "step": 7997 }, { "epoch": 0.4457945487988406, "grad_norm": 0.5348252058029175, "learning_rate": 5.9937298069998424e-05, "loss": 1.4339, "step": 7998 }, { "epoch": 0.4458502870520038, "grad_norm": 0.5516197085380554, "learning_rate": 5.99285713064073e-05, "loss": 1.738, "step": 7999 }, { "epoch": 0.44590602530516693, "grad_norm": 0.58391934633255, "learning_rate": 5.991984422793977e-05, "loss": 1.4481, "step": 8000 }, { "epoch": 0.44596176355833006, "grad_norm": 0.5707566738128662, "learning_rate": 5.9911116834872624e-05, "loss": 1.7051, "step": 8001 }, { "epoch": 0.44601750181149324, "grad_norm": 0.5384584069252014, "learning_rate": 5.990238912748265e-05, "loss": 1.7542, "step": 8002 }, { "epoch": 0.4460732400646564, "grad_norm": 0.5866785645484924, "learning_rate": 5.989366110604662e-05, "loss": 1.8245, "step": 8003 }, { "epoch": 0.4461289783178195, "grad_norm": 0.5644246935844421, "learning_rate": 5.988493277084134e-05, "loss": 1.7637, "step": 8004 }, { "epoch": 0.4461847165709827, "grad_norm": 0.5331970453262329, "learning_rate": 5.9876204122143634e-05, "loss": 1.6303, "step": 8005 }, { "epoch": 0.4462404548241458, "grad_norm": 0.5923652648925781, "learning_rate": 5.98674751602303e-05, "loss": 1.8505, "step": 8006 }, { "epoch": 0.44629619307730894, "grad_norm": 0.5415480136871338, "learning_rate": 5.985874588537819e-05, "loss": 1.6483, "step": 8007 }, { "epoch": 0.44635193133047213, "grad_norm": 0.5634106397628784, "learning_rate": 5.985001629786415e-05, "loss": 1.5566, "step": 8008 }, { "epoch": 0.44640766958363526, "grad_norm": 0.5723522901535034, "learning_rate": 5.9841286397965014e-05, "loss": 1.7409, "step": 8009 }, { "epoch": 0.4464634078367984, "grad_norm": 0.5537884831428528, "learning_rate": 5.983255618595767e-05, "loss": 1.712, "step": 8010 }, { "epoch": 0.4465191460899615, "grad_norm": 0.5915796160697937, "learning_rate": 5.982382566211895e-05, "loss": 1.7699, "step": 8011 }, { "epoch": 0.4465748843431247, "grad_norm": 0.6134962439537048, "learning_rate": 5.981509482672576e-05, "loss": 1.862, "step": 8012 }, { "epoch": 0.44663062259628783, "grad_norm": 0.4997968077659607, "learning_rate": 5.980636368005499e-05, "loss": 1.5174, "step": 8013 }, { "epoch": 0.44668636084945096, "grad_norm": 0.5801420211791992, "learning_rate": 5.979763222238354e-05, "loss": 1.8425, "step": 8014 }, { "epoch": 0.44674209910261414, "grad_norm": 0.5159302949905396, "learning_rate": 5.978890045398833e-05, "loss": 1.7243, "step": 8015 }, { "epoch": 0.44679783735577727, "grad_norm": 0.59089195728302, "learning_rate": 5.978016837514625e-05, "loss": 1.8003, "step": 8016 }, { "epoch": 0.4468535756089404, "grad_norm": 0.5666080713272095, "learning_rate": 5.9771435986134274e-05, "loss": 1.648, "step": 8017 }, { "epoch": 0.4469093138621036, "grad_norm": 0.5891024470329285, "learning_rate": 5.9762703287229304e-05, "loss": 1.5867, "step": 8018 }, { "epoch": 0.4469650521152667, "grad_norm": 0.5871114730834961, "learning_rate": 5.975397027870831e-05, "loss": 1.656, "step": 8019 }, { "epoch": 0.44702079036842984, "grad_norm": 0.6023023724555969, "learning_rate": 5.974523696084825e-05, "loss": 1.6628, "step": 8020 }, { "epoch": 0.447076528621593, "grad_norm": 0.5608631372451782, "learning_rate": 5.97365033339261e-05, "loss": 1.4316, "step": 8021 }, { "epoch": 0.44713226687475616, "grad_norm": 0.5549430251121521, "learning_rate": 5.972776939821883e-05, "loss": 1.4696, "step": 8022 }, { "epoch": 0.4471880051279193, "grad_norm": 0.5799054503440857, "learning_rate": 5.971903515400342e-05, "loss": 1.7885, "step": 8023 }, { "epoch": 0.4472437433810824, "grad_norm": 0.5215498208999634, "learning_rate": 5.971030060155689e-05, "loss": 1.6956, "step": 8024 }, { "epoch": 0.4472994816342456, "grad_norm": 0.5385097861289978, "learning_rate": 5.970156574115623e-05, "loss": 1.5434, "step": 8025 }, { "epoch": 0.44735521988740873, "grad_norm": 0.5320507287979126, "learning_rate": 5.969283057307847e-05, "loss": 1.5207, "step": 8026 }, { "epoch": 0.44741095814057186, "grad_norm": 0.53661048412323, "learning_rate": 5.9684095097600645e-05, "loss": 1.6211, "step": 8027 }, { "epoch": 0.44746669639373504, "grad_norm": 0.5779610872268677, "learning_rate": 5.967535931499979e-05, "loss": 1.7282, "step": 8028 }, { "epoch": 0.44752243464689817, "grad_norm": 0.5973451137542725, "learning_rate": 5.966662322555294e-05, "loss": 1.822, "step": 8029 }, { "epoch": 0.4475781729000613, "grad_norm": 0.6070274710655212, "learning_rate": 5.965788682953717e-05, "loss": 1.6235, "step": 8030 }, { "epoch": 0.4476339111532245, "grad_norm": 0.5565271377563477, "learning_rate": 5.9649150127229534e-05, "loss": 1.8248, "step": 8031 }, { "epoch": 0.4476896494063876, "grad_norm": 0.5610112547874451, "learning_rate": 5.964041311890711e-05, "loss": 1.5738, "step": 8032 }, { "epoch": 0.44774538765955074, "grad_norm": 0.5636839270591736, "learning_rate": 5.9631675804846985e-05, "loss": 1.5644, "step": 8033 }, { "epoch": 0.44780112591271387, "grad_norm": 0.5381824970245361, "learning_rate": 5.962293818532628e-05, "loss": 1.6785, "step": 8034 }, { "epoch": 0.44785686416587706, "grad_norm": 0.5614325404167175, "learning_rate": 5.9614200260622066e-05, "loss": 1.7991, "step": 8035 }, { "epoch": 0.4479126024190402, "grad_norm": 0.527214527130127, "learning_rate": 5.960546203101148e-05, "loss": 1.6311, "step": 8036 }, { "epoch": 0.4479683406722033, "grad_norm": 0.5667834877967834, "learning_rate": 5.959672349677163e-05, "loss": 1.4416, "step": 8037 }, { "epoch": 0.4480240789253665, "grad_norm": 0.5953390002250671, "learning_rate": 5.9587984658179676e-05, "loss": 1.8168, "step": 8038 }, { "epoch": 0.4480798171785296, "grad_norm": 0.5339275598526001, "learning_rate": 5.957924551551275e-05, "loss": 1.6999, "step": 8039 }, { "epoch": 0.44813555543169276, "grad_norm": 0.5568943619728088, "learning_rate": 5.9570506069048e-05, "loss": 1.7066, "step": 8040 }, { "epoch": 0.44819129368485594, "grad_norm": 0.5787097215652466, "learning_rate": 5.95617663190626e-05, "loss": 1.6468, "step": 8041 }, { "epoch": 0.44824703193801907, "grad_norm": 0.5685398578643799, "learning_rate": 5.955302626583374e-05, "loss": 1.8804, "step": 8042 }, { "epoch": 0.4483027701911822, "grad_norm": 0.5303986668586731, "learning_rate": 5.9544285909638566e-05, "loss": 1.4389, "step": 8043 }, { "epoch": 0.4483585084443453, "grad_norm": 0.5936418771743774, "learning_rate": 5.953554525075429e-05, "loss": 1.9128, "step": 8044 }, { "epoch": 0.4484142466975085, "grad_norm": 0.5271584391593933, "learning_rate": 5.952680428945812e-05, "loss": 1.5926, "step": 8045 }, { "epoch": 0.44846998495067164, "grad_norm": 0.5615208148956299, "learning_rate": 5.951806302602725e-05, "loss": 1.6805, "step": 8046 }, { "epoch": 0.44852572320383477, "grad_norm": 0.5467960834503174, "learning_rate": 5.950932146073893e-05, "loss": 1.6863, "step": 8047 }, { "epoch": 0.44858146145699795, "grad_norm": 0.5716736912727356, "learning_rate": 5.950057959387038e-05, "loss": 1.695, "step": 8048 }, { "epoch": 0.4486371997101611, "grad_norm": 0.5174785852432251, "learning_rate": 5.9491837425698816e-05, "loss": 1.3978, "step": 8049 }, { "epoch": 0.4486929379633242, "grad_norm": 0.5112467408180237, "learning_rate": 5.948309495650153e-05, "loss": 1.3862, "step": 8050 }, { "epoch": 0.4487486762164874, "grad_norm": 0.6070237755775452, "learning_rate": 5.947435218655576e-05, "loss": 1.744, "step": 8051 }, { "epoch": 0.4488044144696505, "grad_norm": 0.5886159539222717, "learning_rate": 5.946560911613877e-05, "loss": 1.9782, "step": 8052 }, { "epoch": 0.44886015272281365, "grad_norm": 0.6077089309692383, "learning_rate": 5.945686574552785e-05, "loss": 1.6861, "step": 8053 }, { "epoch": 0.44891589097597684, "grad_norm": 0.5767019391059875, "learning_rate": 5.944812207500029e-05, "loss": 1.8577, "step": 8054 }, { "epoch": 0.44897162922913997, "grad_norm": 0.5735483765602112, "learning_rate": 5.943937810483338e-05, "loss": 1.8143, "step": 8055 }, { "epoch": 0.4490273674823031, "grad_norm": 0.5384686589241028, "learning_rate": 5.943063383530444e-05, "loss": 1.7183, "step": 8056 }, { "epoch": 0.4490831057354662, "grad_norm": 0.5415961146354675, "learning_rate": 5.942188926669077e-05, "loss": 1.5619, "step": 8057 }, { "epoch": 0.4491388439886294, "grad_norm": 0.5548281669616699, "learning_rate": 5.941314439926969e-05, "loss": 1.8049, "step": 8058 }, { "epoch": 0.44919458224179254, "grad_norm": 0.5731210112571716, "learning_rate": 5.940439923331857e-05, "loss": 1.9301, "step": 8059 }, { "epoch": 0.44925032049495567, "grad_norm": 0.5715717673301697, "learning_rate": 5.939565376911475e-05, "loss": 1.6145, "step": 8060 }, { "epoch": 0.44930605874811885, "grad_norm": 0.5775079131126404, "learning_rate": 5.938690800693556e-05, "loss": 1.7435, "step": 8061 }, { "epoch": 0.449361797001282, "grad_norm": 0.5366044044494629, "learning_rate": 5.937816194705838e-05, "loss": 1.7497, "step": 8062 }, { "epoch": 0.4494175352544451, "grad_norm": 0.5498981475830078, "learning_rate": 5.936941558976058e-05, "loss": 1.6565, "step": 8063 }, { "epoch": 0.4494732735076083, "grad_norm": 0.541826605796814, "learning_rate": 5.936066893531954e-05, "loss": 1.6147, "step": 8064 }, { "epoch": 0.4495290117607714, "grad_norm": 0.5456510186195374, "learning_rate": 5.9351921984012657e-05, "loss": 1.652, "step": 8065 }, { "epoch": 0.44958475001393455, "grad_norm": 0.5831677317619324, "learning_rate": 5.934317473611734e-05, "loss": 1.7302, "step": 8066 }, { "epoch": 0.4496404882670977, "grad_norm": 0.55061274766922, "learning_rate": 5.9334427191911e-05, "loss": 1.6976, "step": 8067 }, { "epoch": 0.44969622652026087, "grad_norm": 0.5210010409355164, "learning_rate": 5.932567935167104e-05, "loss": 1.5901, "step": 8068 }, { "epoch": 0.449751964773424, "grad_norm": 0.5638371706008911, "learning_rate": 5.931693121567492e-05, "loss": 1.7005, "step": 8069 }, { "epoch": 0.4498077030265871, "grad_norm": 0.5460227131843567, "learning_rate": 5.930818278420005e-05, "loss": 1.8827, "step": 8070 }, { "epoch": 0.4498634412797503, "grad_norm": 0.5335036516189575, "learning_rate": 5.9299434057523894e-05, "loss": 1.6689, "step": 8071 }, { "epoch": 0.44991917953291344, "grad_norm": 0.45309698581695557, "learning_rate": 5.929068503592391e-05, "loss": 1.1558, "step": 8072 }, { "epoch": 0.44997491778607657, "grad_norm": 0.5678838491439819, "learning_rate": 5.9281935719677574e-05, "loss": 1.7916, "step": 8073 }, { "epoch": 0.45003065603923975, "grad_norm": 0.6037769913673401, "learning_rate": 5.927318610906234e-05, "loss": 1.6458, "step": 8074 }, { "epoch": 0.4500863942924029, "grad_norm": 0.5376781821250916, "learning_rate": 5.9264436204355724e-05, "loss": 1.754, "step": 8075 }, { "epoch": 0.450142132545566, "grad_norm": 0.5493988394737244, "learning_rate": 5.92556860058352e-05, "loss": 1.7992, "step": 8076 }, { "epoch": 0.4501978707987292, "grad_norm": 0.5373069643974304, "learning_rate": 5.9246935513778276e-05, "loss": 1.6756, "step": 8077 }, { "epoch": 0.4502536090518923, "grad_norm": 0.5574460625648499, "learning_rate": 5.923818472846248e-05, "loss": 1.6423, "step": 8078 }, { "epoch": 0.45030934730505545, "grad_norm": 0.5568375587463379, "learning_rate": 5.922943365016531e-05, "loss": 1.7708, "step": 8079 }, { "epoch": 0.4503650855582186, "grad_norm": 0.551171064376831, "learning_rate": 5.922068227916433e-05, "loss": 1.7107, "step": 8080 }, { "epoch": 0.45042082381138177, "grad_norm": 0.5870986580848694, "learning_rate": 5.9211930615737066e-05, "loss": 1.801, "step": 8081 }, { "epoch": 0.4504765620645449, "grad_norm": 0.5700268745422363, "learning_rate": 5.920317866016108e-05, "loss": 1.6317, "step": 8082 }, { "epoch": 0.450532300317708, "grad_norm": 0.5469490885734558, "learning_rate": 5.919442641271391e-05, "loss": 1.6841, "step": 8083 }, { "epoch": 0.4505880385708712, "grad_norm": 0.5380752682685852, "learning_rate": 5.9185673873673154e-05, "loss": 1.3761, "step": 8084 }, { "epoch": 0.45064377682403434, "grad_norm": 0.6156383156776428, "learning_rate": 5.917692104331637e-05, "loss": 1.9012, "step": 8085 }, { "epoch": 0.45069951507719747, "grad_norm": 0.6044989824295044, "learning_rate": 5.916816792192116e-05, "loss": 1.8825, "step": 8086 }, { "epoch": 0.45075525333036065, "grad_norm": 0.5541858673095703, "learning_rate": 5.915941450976512e-05, "loss": 1.6097, "step": 8087 }, { "epoch": 0.4508109915835238, "grad_norm": 0.5468337535858154, "learning_rate": 5.9150660807125844e-05, "loss": 1.7299, "step": 8088 }, { "epoch": 0.4508667298366869, "grad_norm": 0.6255477070808411, "learning_rate": 5.9141906814280975e-05, "loss": 1.818, "step": 8089 }, { "epoch": 0.45092246808985004, "grad_norm": 0.5574450492858887, "learning_rate": 5.9133152531508106e-05, "loss": 1.8804, "step": 8090 }, { "epoch": 0.4509782063430132, "grad_norm": 0.5240482091903687, "learning_rate": 5.91243979590849e-05, "loss": 1.6162, "step": 8091 }, { "epoch": 0.45103394459617635, "grad_norm": 0.5322662591934204, "learning_rate": 5.911564309728899e-05, "loss": 1.7833, "step": 8092 }, { "epoch": 0.4510896828493395, "grad_norm": 0.5365003347396851, "learning_rate": 5.910688794639803e-05, "loss": 1.5982, "step": 8093 }, { "epoch": 0.45114542110250266, "grad_norm": 0.5948169827461243, "learning_rate": 5.909813250668967e-05, "loss": 1.8386, "step": 8094 }, { "epoch": 0.4512011593556658, "grad_norm": 0.5501197576522827, "learning_rate": 5.9089376778441606e-05, "loss": 1.748, "step": 8095 }, { "epoch": 0.4512568976088289, "grad_norm": 0.5238162875175476, "learning_rate": 5.908062076193149e-05, "loss": 1.4871, "step": 8096 }, { "epoch": 0.4513126358619921, "grad_norm": 0.515355110168457, "learning_rate": 5.907186445743704e-05, "loss": 1.4985, "step": 8097 }, { "epoch": 0.45136837411515524, "grad_norm": 0.5451371073722839, "learning_rate": 5.9063107865235936e-05, "loss": 1.7953, "step": 8098 }, { "epoch": 0.45142411236831836, "grad_norm": 0.5602155327796936, "learning_rate": 5.90543509856059e-05, "loss": 1.4848, "step": 8099 }, { "epoch": 0.45147985062148155, "grad_norm": 0.6136230826377869, "learning_rate": 5.904559381882463e-05, "loss": 1.8602, "step": 8100 }, { "epoch": 0.4515355888746447, "grad_norm": 0.5416921973228455, "learning_rate": 5.9036836365169865e-05, "loss": 1.7242, "step": 8101 }, { "epoch": 0.4515913271278078, "grad_norm": 0.5299700498580933, "learning_rate": 5.9028078624919344e-05, "loss": 1.4976, "step": 8102 }, { "epoch": 0.45164706538097094, "grad_norm": 0.5295999050140381, "learning_rate": 5.901932059835081e-05, "loss": 1.667, "step": 8103 }, { "epoch": 0.4517028036341341, "grad_norm": 0.5291856527328491, "learning_rate": 5.9010562285742e-05, "loss": 1.5909, "step": 8104 }, { "epoch": 0.45175854188729725, "grad_norm": 0.5456459522247314, "learning_rate": 5.9001803687370696e-05, "loss": 1.6947, "step": 8105 }, { "epoch": 0.4518142801404604, "grad_norm": 0.534061074256897, "learning_rate": 5.8993044803514674e-05, "loss": 1.4796, "step": 8106 }, { "epoch": 0.45187001839362356, "grad_norm": 0.5795206427574158, "learning_rate": 5.8984285634451695e-05, "loss": 1.8176, "step": 8107 }, { "epoch": 0.4519257566467867, "grad_norm": 0.5638490915298462, "learning_rate": 5.897552618045956e-05, "loss": 1.6067, "step": 8108 }, { "epoch": 0.4519814948999498, "grad_norm": 0.5725950002670288, "learning_rate": 5.896676644181607e-05, "loss": 1.6761, "step": 8109 }, { "epoch": 0.452037233153113, "grad_norm": 0.6189979314804077, "learning_rate": 5.8958006418799005e-05, "loss": 1.8323, "step": 8110 }, { "epoch": 0.45209297140627613, "grad_norm": 0.550565779209137, "learning_rate": 5.894924611168622e-05, "loss": 1.865, "step": 8111 }, { "epoch": 0.45214870965943926, "grad_norm": 0.563420832157135, "learning_rate": 5.894048552075554e-05, "loss": 1.8, "step": 8112 }, { "epoch": 0.4522044479126024, "grad_norm": 0.5111345052719116, "learning_rate": 5.893172464628477e-05, "loss": 1.4806, "step": 8113 }, { "epoch": 0.4522601861657656, "grad_norm": 0.566088855266571, "learning_rate": 5.8922963488551775e-05, "loss": 1.7427, "step": 8114 }, { "epoch": 0.4523159244189287, "grad_norm": 0.5696318745613098, "learning_rate": 5.89142020478344e-05, "loss": 1.8576, "step": 8115 }, { "epoch": 0.45237166267209183, "grad_norm": 0.5730637907981873, "learning_rate": 5.890544032441051e-05, "loss": 1.6966, "step": 8116 }, { "epoch": 0.452427400925255, "grad_norm": 0.5427675247192383, "learning_rate": 5.889667831855797e-05, "loss": 1.639, "step": 8117 }, { "epoch": 0.45248313917841815, "grad_norm": 0.6031304001808167, "learning_rate": 5.888791603055467e-05, "loss": 1.7707, "step": 8118 }, { "epoch": 0.4525388774315813, "grad_norm": 0.5573417544364929, "learning_rate": 5.887915346067851e-05, "loss": 1.8751, "step": 8119 }, { "epoch": 0.45259461568474446, "grad_norm": 0.5398233532905579, "learning_rate": 5.8870390609207337e-05, "loss": 1.5854, "step": 8120 }, { "epoch": 0.4526503539379076, "grad_norm": 0.554905354976654, "learning_rate": 5.886162747641912e-05, "loss": 1.6138, "step": 8121 }, { "epoch": 0.4527060921910707, "grad_norm": 0.5116898417472839, "learning_rate": 5.885286406259174e-05, "loss": 1.4997, "step": 8122 }, { "epoch": 0.4527618304442339, "grad_norm": 0.5095398426055908, "learning_rate": 5.884410036800312e-05, "loss": 1.372, "step": 8123 }, { "epoch": 0.45281756869739703, "grad_norm": 0.5345844626426697, "learning_rate": 5.883533639293119e-05, "loss": 1.7398, "step": 8124 }, { "epoch": 0.45287330695056016, "grad_norm": 0.5889625549316406, "learning_rate": 5.882657213765393e-05, "loss": 1.8826, "step": 8125 }, { "epoch": 0.4529290452037233, "grad_norm": 0.5907882452011108, "learning_rate": 5.881780760244926e-05, "loss": 1.8187, "step": 8126 }, { "epoch": 0.4529847834568865, "grad_norm": 0.5326589941978455, "learning_rate": 5.8809042787595135e-05, "loss": 1.5317, "step": 8127 }, { "epoch": 0.4530405217100496, "grad_norm": 0.6067203283309937, "learning_rate": 5.880027769336953e-05, "loss": 1.9912, "step": 8128 }, { "epoch": 0.45309625996321273, "grad_norm": 0.5273611545562744, "learning_rate": 5.879151232005044e-05, "loss": 1.7771, "step": 8129 }, { "epoch": 0.4531519982163759, "grad_norm": 0.5791671872138977, "learning_rate": 5.8782746667915824e-05, "loss": 1.9728, "step": 8130 }, { "epoch": 0.45320773646953905, "grad_norm": 0.5748934149742126, "learning_rate": 5.877398073724368e-05, "loss": 1.7932, "step": 8131 }, { "epoch": 0.4532634747227022, "grad_norm": 0.5750080943107605, "learning_rate": 5.876521452831205e-05, "loss": 1.6562, "step": 8132 }, { "epoch": 0.45331921297586536, "grad_norm": 0.5455517172813416, "learning_rate": 5.87564480413989e-05, "loss": 1.6491, "step": 8133 }, { "epoch": 0.4533749512290285, "grad_norm": 0.5786875486373901, "learning_rate": 5.8747681276782294e-05, "loss": 1.6799, "step": 8134 }, { "epoch": 0.4534306894821916, "grad_norm": 0.5193260908126831, "learning_rate": 5.8738914234740225e-05, "loss": 1.7299, "step": 8135 }, { "epoch": 0.45348642773535475, "grad_norm": 0.5477581024169922, "learning_rate": 5.8730146915550745e-05, "loss": 1.529, "step": 8136 }, { "epoch": 0.45354216598851793, "grad_norm": 0.5622334480285645, "learning_rate": 5.872137931949191e-05, "loss": 1.7301, "step": 8137 }, { "epoch": 0.45359790424168106, "grad_norm": 0.5410364866256714, "learning_rate": 5.871261144684177e-05, "loss": 1.7159, "step": 8138 }, { "epoch": 0.4536536424948442, "grad_norm": 0.5440908670425415, "learning_rate": 5.870384329787839e-05, "loss": 1.6208, "step": 8139 }, { "epoch": 0.4537093807480074, "grad_norm": 0.5730171799659729, "learning_rate": 5.8695074872879855e-05, "loss": 1.7554, "step": 8140 }, { "epoch": 0.4537651190011705, "grad_norm": 0.5274659991264343, "learning_rate": 5.868630617212424e-05, "loss": 1.6493, "step": 8141 }, { "epoch": 0.45382085725433363, "grad_norm": 0.5639094114303589, "learning_rate": 5.867753719588963e-05, "loss": 1.8717, "step": 8142 }, { "epoch": 0.4538765955074968, "grad_norm": 0.5402084589004517, "learning_rate": 5.8668767944454136e-05, "loss": 1.7959, "step": 8143 }, { "epoch": 0.45393233376065995, "grad_norm": 0.5999549627304077, "learning_rate": 5.865999841809586e-05, "loss": 1.7492, "step": 8144 }, { "epoch": 0.4539880720138231, "grad_norm": 0.5832345485687256, "learning_rate": 5.865122861709295e-05, "loss": 1.7432, "step": 8145 }, { "epoch": 0.45404381026698626, "grad_norm": 0.500333309173584, "learning_rate": 5.864245854172349e-05, "loss": 1.5536, "step": 8146 }, { "epoch": 0.4540995485201494, "grad_norm": 0.5283179879188538, "learning_rate": 5.8633688192265645e-05, "loss": 1.5528, "step": 8147 }, { "epoch": 0.4541552867733125, "grad_norm": 0.5074849128723145, "learning_rate": 5.862491756899753e-05, "loss": 1.5251, "step": 8148 }, { "epoch": 0.45421102502647565, "grad_norm": 0.5706311464309692, "learning_rate": 5.8616146672197326e-05, "loss": 1.5709, "step": 8149 }, { "epoch": 0.45426676327963883, "grad_norm": 0.570326566696167, "learning_rate": 5.8607375502143183e-05, "loss": 1.6585, "step": 8150 }, { "epoch": 0.45432250153280196, "grad_norm": 0.7040314674377441, "learning_rate": 5.859860405911328e-05, "loss": 2.0239, "step": 8151 }, { "epoch": 0.4543782397859651, "grad_norm": 0.5602174401283264, "learning_rate": 5.858983234338579e-05, "loss": 1.5565, "step": 8152 }, { "epoch": 0.4544339780391283, "grad_norm": 0.596564531326294, "learning_rate": 5.858106035523888e-05, "loss": 1.8482, "step": 8153 }, { "epoch": 0.4544897162922914, "grad_norm": 0.5571820735931396, "learning_rate": 5.85722880949508e-05, "loss": 1.6401, "step": 8154 }, { "epoch": 0.45454545454545453, "grad_norm": 0.5759769678115845, "learning_rate": 5.8563515562799695e-05, "loss": 1.8876, "step": 8155 }, { "epoch": 0.4546011927986177, "grad_norm": 0.526823103427887, "learning_rate": 5.855474275906381e-05, "loss": 1.4215, "step": 8156 }, { "epoch": 0.45465693105178084, "grad_norm": 0.5801699161529541, "learning_rate": 5.854596968402136e-05, "loss": 1.8225, "step": 8157 }, { "epoch": 0.454712669304944, "grad_norm": 0.548812747001648, "learning_rate": 5.8537196337950596e-05, "loss": 1.6582, "step": 8158 }, { "epoch": 0.4547684075581071, "grad_norm": 0.5647279024124146, "learning_rate": 5.8528422721129726e-05, "loss": 1.6121, "step": 8159 }, { "epoch": 0.4548241458112703, "grad_norm": 0.5501880645751953, "learning_rate": 5.8519648833837013e-05, "loss": 1.5704, "step": 8160 }, { "epoch": 0.4548798840644334, "grad_norm": 0.5714605450630188, "learning_rate": 5.851087467635071e-05, "loss": 1.918, "step": 8161 }, { "epoch": 0.45493562231759654, "grad_norm": 0.5872429609298706, "learning_rate": 5.8502100248949085e-05, "loss": 1.7381, "step": 8162 }, { "epoch": 0.45499136057075973, "grad_norm": 0.5113133788108826, "learning_rate": 5.8493325551910405e-05, "loss": 1.6602, "step": 8163 }, { "epoch": 0.45504709882392286, "grad_norm": 0.5724974274635315, "learning_rate": 5.848455058551298e-05, "loss": 1.7762, "step": 8164 }, { "epoch": 0.455102837077086, "grad_norm": 0.5925339460372925, "learning_rate": 5.8475775350035056e-05, "loss": 1.8456, "step": 8165 }, { "epoch": 0.45515857533024917, "grad_norm": 0.567402720451355, "learning_rate": 5.846699984575497e-05, "loss": 1.6512, "step": 8166 }, { "epoch": 0.4552143135834123, "grad_norm": 0.53789883852005, "learning_rate": 5.8458224072951005e-05, "loss": 1.675, "step": 8167 }, { "epoch": 0.45527005183657543, "grad_norm": 0.563400149345398, "learning_rate": 5.844944803190149e-05, "loss": 1.4973, "step": 8168 }, { "epoch": 0.4553257900897386, "grad_norm": 0.5786770582199097, "learning_rate": 5.844067172288474e-05, "loss": 1.6223, "step": 8169 }, { "epoch": 0.45538152834290174, "grad_norm": 0.5910102725028992, "learning_rate": 5.843189514617911e-05, "loss": 1.7822, "step": 8170 }, { "epoch": 0.45543726659606487, "grad_norm": 0.5599364638328552, "learning_rate": 5.8423118302062915e-05, "loss": 1.7511, "step": 8171 }, { "epoch": 0.455493004849228, "grad_norm": 0.5284358263015747, "learning_rate": 5.841434119081453e-05, "loss": 1.6494, "step": 8172 }, { "epoch": 0.4555487431023912, "grad_norm": 0.5970794558525085, "learning_rate": 5.840556381271229e-05, "loss": 1.6952, "step": 8173 }, { "epoch": 0.4556044813555543, "grad_norm": 0.5448065400123596, "learning_rate": 5.839678616803458e-05, "loss": 1.5907, "step": 8174 }, { "epoch": 0.45566021960871744, "grad_norm": 0.5598198771476746, "learning_rate": 5.838800825705977e-05, "loss": 1.6862, "step": 8175 }, { "epoch": 0.4557159578618806, "grad_norm": 0.5819631218910217, "learning_rate": 5.837923008006623e-05, "loss": 1.7354, "step": 8176 }, { "epoch": 0.45577169611504376, "grad_norm": 0.5947074890136719, "learning_rate": 5.837045163733239e-05, "loss": 1.7971, "step": 8177 }, { "epoch": 0.4558274343682069, "grad_norm": 0.541515588760376, "learning_rate": 5.8361672929136614e-05, "loss": 1.4939, "step": 8178 }, { "epoch": 0.45588317262137007, "grad_norm": 0.670753002166748, "learning_rate": 5.835289395575731e-05, "loss": 1.8816, "step": 8179 }, { "epoch": 0.4559389108745332, "grad_norm": 0.5665016174316406, "learning_rate": 5.8344114717472943e-05, "loss": 1.6907, "step": 8180 }, { "epoch": 0.4559946491276963, "grad_norm": 0.5885823369026184, "learning_rate": 5.833533521456188e-05, "loss": 1.6905, "step": 8181 }, { "epoch": 0.45605038738085946, "grad_norm": 0.5672965049743652, "learning_rate": 5.832655544730259e-05, "loss": 1.5996, "step": 8182 }, { "epoch": 0.45610612563402264, "grad_norm": 0.5488877296447754, "learning_rate": 5.831777541597351e-05, "loss": 1.6316, "step": 8183 }, { "epoch": 0.45616186388718577, "grad_norm": 0.541111409664154, "learning_rate": 5.8308995120853096e-05, "loss": 1.7246, "step": 8184 }, { "epoch": 0.4562176021403489, "grad_norm": 0.5794996619224548, "learning_rate": 5.830021456221979e-05, "loss": 1.8438, "step": 8185 }, { "epoch": 0.4562733403935121, "grad_norm": 0.4965246021747589, "learning_rate": 5.829143374035209e-05, "loss": 1.2569, "step": 8186 }, { "epoch": 0.4563290786466752, "grad_norm": 0.5464833974838257, "learning_rate": 5.8282652655528426e-05, "loss": 1.6355, "step": 8187 }, { "epoch": 0.45638481689983834, "grad_norm": 0.617215096950531, "learning_rate": 5.827387130802733e-05, "loss": 1.7473, "step": 8188 }, { "epoch": 0.4564405551530015, "grad_norm": 0.6064026355743408, "learning_rate": 5.826508969812726e-05, "loss": 1.817, "step": 8189 }, { "epoch": 0.45649629340616465, "grad_norm": 0.6004077792167664, "learning_rate": 5.825630782610676e-05, "loss": 1.8728, "step": 8190 }, { "epoch": 0.4565520316593278, "grad_norm": 0.6301288604736328, "learning_rate": 5.82475256922443e-05, "loss": 1.8616, "step": 8191 }, { "epoch": 0.45660776991249097, "grad_norm": 0.540440022945404, "learning_rate": 5.8238743296818396e-05, "loss": 1.7224, "step": 8192 }, { "epoch": 0.4566635081656541, "grad_norm": 0.5390138626098633, "learning_rate": 5.8229960640107617e-05, "loss": 1.556, "step": 8193 }, { "epoch": 0.4567192464188172, "grad_norm": 0.5261824131011963, "learning_rate": 5.822117772239045e-05, "loss": 1.6086, "step": 8194 }, { "epoch": 0.45677498467198036, "grad_norm": 0.543070375919342, "learning_rate": 5.821239454394547e-05, "loss": 1.5987, "step": 8195 }, { "epoch": 0.45683072292514354, "grad_norm": 0.6048296689987183, "learning_rate": 5.8203611105051204e-05, "loss": 1.7936, "step": 8196 }, { "epoch": 0.45688646117830667, "grad_norm": 0.5308238863945007, "learning_rate": 5.819482740598624e-05, "loss": 1.5304, "step": 8197 }, { "epoch": 0.4569421994314698, "grad_norm": 0.5806917548179626, "learning_rate": 5.8186043447029125e-05, "loss": 1.6869, "step": 8198 }, { "epoch": 0.456997937684633, "grad_norm": 0.5387137532234192, "learning_rate": 5.8177259228458444e-05, "loss": 1.7673, "step": 8199 }, { "epoch": 0.4570536759377961, "grad_norm": 0.5830815434455872, "learning_rate": 5.816847475055277e-05, "loss": 1.9119, "step": 8200 }, { "epoch": 0.45710941419095924, "grad_norm": 0.5564570426940918, "learning_rate": 5.8159690013590695e-05, "loss": 1.5385, "step": 8201 }, { "epoch": 0.4571651524441224, "grad_norm": 0.5688846707344055, "learning_rate": 5.815090501785083e-05, "loss": 1.5954, "step": 8202 }, { "epoch": 0.45722089069728555, "grad_norm": 0.6317092776298523, "learning_rate": 5.814211976361179e-05, "loss": 1.9886, "step": 8203 }, { "epoch": 0.4572766289504487, "grad_norm": 0.5649227499961853, "learning_rate": 5.813333425115218e-05, "loss": 1.6259, "step": 8204 }, { "epoch": 0.4573323672036118, "grad_norm": 0.5452385544776917, "learning_rate": 5.812454848075063e-05, "loss": 1.7129, "step": 8205 }, { "epoch": 0.457388105456775, "grad_norm": 0.5575756430625916, "learning_rate": 5.8115762452685774e-05, "loss": 1.7543, "step": 8206 }, { "epoch": 0.4574438437099381, "grad_norm": 0.5120208263397217, "learning_rate": 5.810697616723624e-05, "loss": 1.5619, "step": 8207 }, { "epoch": 0.45749958196310125, "grad_norm": 0.5111353397369385, "learning_rate": 5.809818962468069e-05, "loss": 1.5594, "step": 8208 }, { "epoch": 0.45755532021626444, "grad_norm": 0.5274066925048828, "learning_rate": 5.8089402825297776e-05, "loss": 1.5727, "step": 8209 }, { "epoch": 0.45761105846942757, "grad_norm": 0.531512439250946, "learning_rate": 5.80806157693662e-05, "loss": 1.6845, "step": 8210 }, { "epoch": 0.4576667967225907, "grad_norm": 0.587890088558197, "learning_rate": 5.807182845716458e-05, "loss": 1.8239, "step": 8211 }, { "epoch": 0.4577225349757539, "grad_norm": 0.543900191783905, "learning_rate": 5.8063040888971635e-05, "loss": 1.9671, "step": 8212 }, { "epoch": 0.457778273228917, "grad_norm": 0.5269332528114319, "learning_rate": 5.8054253065066024e-05, "loss": 1.5801, "step": 8213 }, { "epoch": 0.45783401148208014, "grad_norm": 0.5568074584007263, "learning_rate": 5.8045464985726474e-05, "loss": 1.5843, "step": 8214 }, { "epoch": 0.4578897497352433, "grad_norm": 0.5887969136238098, "learning_rate": 5.803667665123168e-05, "loss": 1.9532, "step": 8215 }, { "epoch": 0.45794548798840645, "grad_norm": 0.6071587204933167, "learning_rate": 5.802788806186038e-05, "loss": 1.9501, "step": 8216 }, { "epoch": 0.4580012262415696, "grad_norm": 0.5481032133102417, "learning_rate": 5.801909921789126e-05, "loss": 1.7435, "step": 8217 }, { "epoch": 0.4580569644947327, "grad_norm": 0.6313177347183228, "learning_rate": 5.801031011960306e-05, "loss": 1.928, "step": 8218 }, { "epoch": 0.4581127027478959, "grad_norm": 0.5789720416069031, "learning_rate": 5.800152076727454e-05, "loss": 1.8, "step": 8219 }, { "epoch": 0.458168441001059, "grad_norm": 0.5438299775123596, "learning_rate": 5.799273116118443e-05, "loss": 1.6805, "step": 8220 }, { "epoch": 0.45822417925422215, "grad_norm": 0.5296357870101929, "learning_rate": 5.798394130161149e-05, "loss": 1.4218, "step": 8221 }, { "epoch": 0.45827991750738534, "grad_norm": 0.6217812895774841, "learning_rate": 5.7975151188834475e-05, "loss": 1.7633, "step": 8222 }, { "epoch": 0.45833565576054847, "grad_norm": 0.6416480541229248, "learning_rate": 5.796636082313217e-05, "loss": 2.0147, "step": 8223 }, { "epoch": 0.4583913940137116, "grad_norm": 0.5263529419898987, "learning_rate": 5.795757020478334e-05, "loss": 1.5335, "step": 8224 }, { "epoch": 0.4584471322668748, "grad_norm": 0.565466046333313, "learning_rate": 5.794877933406679e-05, "loss": 1.778, "step": 8225 }, { "epoch": 0.4585028705200379, "grad_norm": 0.5382056832313538, "learning_rate": 5.79399882112613e-05, "loss": 1.678, "step": 8226 }, { "epoch": 0.45855860877320104, "grad_norm": 0.5097582340240479, "learning_rate": 5.7931196836645675e-05, "loss": 1.5224, "step": 8227 }, { "epoch": 0.45861434702636417, "grad_norm": 0.5619562268257141, "learning_rate": 5.792240521049872e-05, "loss": 1.9743, "step": 8228 }, { "epoch": 0.45867008527952735, "grad_norm": 0.57401442527771, "learning_rate": 5.791361333309926e-05, "loss": 1.6526, "step": 8229 }, { "epoch": 0.4587258235326905, "grad_norm": 0.557773232460022, "learning_rate": 5.790482120472615e-05, "loss": 1.7427, "step": 8230 }, { "epoch": 0.4587815617858536, "grad_norm": 0.5370197296142578, "learning_rate": 5.789602882565818e-05, "loss": 1.5028, "step": 8231 }, { "epoch": 0.4588373000390168, "grad_norm": 0.559916079044342, "learning_rate": 5.788723619617422e-05, "loss": 1.6115, "step": 8232 }, { "epoch": 0.4588930382921799, "grad_norm": 0.5461910367012024, "learning_rate": 5.787844331655311e-05, "loss": 1.5789, "step": 8233 }, { "epoch": 0.45894877654534305, "grad_norm": 0.5319302082061768, "learning_rate": 5.786965018707371e-05, "loss": 1.66, "step": 8234 }, { "epoch": 0.45900451479850624, "grad_norm": 0.5757958292961121, "learning_rate": 5.786085680801488e-05, "loss": 1.9192, "step": 8235 }, { "epoch": 0.45906025305166936, "grad_norm": 0.523041844367981, "learning_rate": 5.785206317965553e-05, "loss": 1.5435, "step": 8236 }, { "epoch": 0.4591159913048325, "grad_norm": 0.5196270942687988, "learning_rate": 5.7843269302274506e-05, "loss": 1.2152, "step": 8237 }, { "epoch": 0.4591717295579957, "grad_norm": 0.5284752249717712, "learning_rate": 5.7834475176150715e-05, "loss": 1.6407, "step": 8238 }, { "epoch": 0.4592274678111588, "grad_norm": 0.5639576315879822, "learning_rate": 5.782568080156303e-05, "loss": 1.8297, "step": 8239 }, { "epoch": 0.45928320606432194, "grad_norm": 0.5723278522491455, "learning_rate": 5.781688617879039e-05, "loss": 1.7981, "step": 8240 }, { "epoch": 0.45933894431748507, "grad_norm": 0.5638182759284973, "learning_rate": 5.780809130811169e-05, "loss": 1.6244, "step": 8241 }, { "epoch": 0.45939468257064825, "grad_norm": 0.5704604983329773, "learning_rate": 5.779929618980586e-05, "loss": 1.6348, "step": 8242 }, { "epoch": 0.4594504208238114, "grad_norm": 0.5768876671791077, "learning_rate": 5.779050082415184e-05, "loss": 1.7342, "step": 8243 }, { "epoch": 0.4595061590769745, "grad_norm": 0.5308094620704651, "learning_rate": 5.778170521142854e-05, "loss": 1.6838, "step": 8244 }, { "epoch": 0.4595618973301377, "grad_norm": 0.6009156703948975, "learning_rate": 5.777290935191493e-05, "loss": 1.72, "step": 8245 }, { "epoch": 0.4596176355833008, "grad_norm": 0.5695474743843079, "learning_rate": 5.776411324588995e-05, "loss": 1.6783, "step": 8246 }, { "epoch": 0.45967337383646395, "grad_norm": 0.5541953444480896, "learning_rate": 5.775531689363256e-05, "loss": 1.5248, "step": 8247 }, { "epoch": 0.45972911208962713, "grad_norm": 0.5543676614761353, "learning_rate": 5.7746520295421736e-05, "loss": 1.5673, "step": 8248 }, { "epoch": 0.45978485034279026, "grad_norm": 0.6300926804542542, "learning_rate": 5.773772345153648e-05, "loss": 1.9275, "step": 8249 }, { "epoch": 0.4598405885959534, "grad_norm": 0.580083429813385, "learning_rate": 5.772892636225572e-05, "loss": 1.583, "step": 8250 }, { "epoch": 0.4598963268491165, "grad_norm": 0.6072207689285278, "learning_rate": 5.7720129027858496e-05, "loss": 1.6752, "step": 8251 }, { "epoch": 0.4599520651022797, "grad_norm": 0.575436532497406, "learning_rate": 5.771133144862377e-05, "loss": 1.5191, "step": 8252 }, { "epoch": 0.46000780335544283, "grad_norm": 0.5946778655052185, "learning_rate": 5.770253362483059e-05, "loss": 1.7338, "step": 8253 }, { "epoch": 0.46006354160860596, "grad_norm": 0.5782346129417419, "learning_rate": 5.769373555675794e-05, "loss": 1.9825, "step": 8254 }, { "epoch": 0.46011927986176915, "grad_norm": 0.6065311431884766, "learning_rate": 5.7684937244684856e-05, "loss": 1.8879, "step": 8255 }, { "epoch": 0.4601750181149323, "grad_norm": 0.5789337158203125, "learning_rate": 5.767613868889038e-05, "loss": 1.5408, "step": 8256 }, { "epoch": 0.4602307563680954, "grad_norm": 0.5640459060668945, "learning_rate": 5.766733988965354e-05, "loss": 1.7434, "step": 8257 }, { "epoch": 0.4602864946212586, "grad_norm": 0.5351431965827942, "learning_rate": 5.765854084725337e-05, "loss": 1.7586, "step": 8258 }, { "epoch": 0.4603422328744217, "grad_norm": 0.6039308905601501, "learning_rate": 5.764974156196895e-05, "loss": 1.8, "step": 8259 }, { "epoch": 0.46039797112758485, "grad_norm": 0.5545447468757629, "learning_rate": 5.764094203407933e-05, "loss": 1.5867, "step": 8260 }, { "epoch": 0.46045370938074803, "grad_norm": 0.5933241248130798, "learning_rate": 5.763214226386355e-05, "loss": 1.8117, "step": 8261 }, { "epoch": 0.46050944763391116, "grad_norm": 0.6593655943870544, "learning_rate": 5.7623342251600745e-05, "loss": 1.6466, "step": 8262 }, { "epoch": 0.4605651858870743, "grad_norm": 0.5840887427330017, "learning_rate": 5.761454199756996e-05, "loss": 1.6135, "step": 8263 }, { "epoch": 0.4606209241402374, "grad_norm": 0.5381019711494446, "learning_rate": 5.7605741502050314e-05, "loss": 1.6211, "step": 8264 }, { "epoch": 0.4606766623934006, "grad_norm": 0.6085990071296692, "learning_rate": 5.759694076532087e-05, "loss": 1.795, "step": 8265 }, { "epoch": 0.46073240064656373, "grad_norm": 0.5574647784233093, "learning_rate": 5.758813978766077e-05, "loss": 1.4925, "step": 8266 }, { "epoch": 0.46078813889972686, "grad_norm": 0.6263840794563293, "learning_rate": 5.75793385693491e-05, "loss": 1.8677, "step": 8267 }, { "epoch": 0.46084387715289005, "grad_norm": 0.543647289276123, "learning_rate": 5.7570537110665026e-05, "loss": 1.7692, "step": 8268 }, { "epoch": 0.4608996154060532, "grad_norm": 0.6330240368843079, "learning_rate": 5.7561735411887644e-05, "loss": 1.8521, "step": 8269 }, { "epoch": 0.4609553536592163, "grad_norm": 0.5961319208145142, "learning_rate": 5.75529334732961e-05, "loss": 1.8511, "step": 8270 }, { "epoch": 0.4610110919123795, "grad_norm": 0.5653590559959412, "learning_rate": 5.754413129516956e-05, "loss": 1.6472, "step": 8271 }, { "epoch": 0.4610668301655426, "grad_norm": 0.5134671330451965, "learning_rate": 5.753532887778714e-05, "loss": 1.5722, "step": 8272 }, { "epoch": 0.46112256841870575, "grad_norm": 0.5468015074729919, "learning_rate": 5.7526526221428036e-05, "loss": 1.6829, "step": 8273 }, { "epoch": 0.4611783066718689, "grad_norm": 0.5542712211608887, "learning_rate": 5.751772332637137e-05, "loss": 1.6583, "step": 8274 }, { "epoch": 0.46123404492503206, "grad_norm": 0.554300844669342, "learning_rate": 5.75089201928964e-05, "loss": 1.7805, "step": 8275 }, { "epoch": 0.4612897831781952, "grad_norm": 0.5648434162139893, "learning_rate": 5.750011682128222e-05, "loss": 1.8315, "step": 8276 }, { "epoch": 0.4613455214313583, "grad_norm": 0.5622681975364685, "learning_rate": 5.7491313211808095e-05, "loss": 1.6431, "step": 8277 }, { "epoch": 0.4614012596845215, "grad_norm": 0.5813915133476257, "learning_rate": 5.748250936475318e-05, "loss": 1.9023, "step": 8278 }, { "epoch": 0.46145699793768463, "grad_norm": 0.5567924380302429, "learning_rate": 5.747370528039668e-05, "loss": 1.7468, "step": 8279 }, { "epoch": 0.46151273619084776, "grad_norm": 0.5861298441886902, "learning_rate": 5.7464900959017844e-05, "loss": 1.7059, "step": 8280 }, { "epoch": 0.46156847444401095, "grad_norm": 0.642804741859436, "learning_rate": 5.745609640089585e-05, "loss": 1.8385, "step": 8281 }, { "epoch": 0.4616242126971741, "grad_norm": 0.5455397963523865, "learning_rate": 5.744729160630998e-05, "loss": 1.5585, "step": 8282 }, { "epoch": 0.4616799509503372, "grad_norm": 0.5456379055976868, "learning_rate": 5.743848657553943e-05, "loss": 1.6787, "step": 8283 }, { "epoch": 0.4617356892035004, "grad_norm": 0.6248784065246582, "learning_rate": 5.742968130886346e-05, "loss": 1.9457, "step": 8284 }, { "epoch": 0.4617914274566635, "grad_norm": 0.5508323311805725, "learning_rate": 5.74208758065613e-05, "loss": 1.7643, "step": 8285 }, { "epoch": 0.46184716570982665, "grad_norm": 0.5070561170578003, "learning_rate": 5.741207006891224e-05, "loss": 1.414, "step": 8286 }, { "epoch": 0.4619029039629898, "grad_norm": 0.5954271554946899, "learning_rate": 5.740326409619552e-05, "loss": 1.7004, "step": 8287 }, { "epoch": 0.46195864221615296, "grad_norm": 0.5585724115371704, "learning_rate": 5.739445788869043e-05, "loss": 1.7653, "step": 8288 }, { "epoch": 0.4620143804693161, "grad_norm": 0.5526925325393677, "learning_rate": 5.738565144667626e-05, "loss": 1.7572, "step": 8289 }, { "epoch": 0.4620701187224792, "grad_norm": 0.5708301663398743, "learning_rate": 5.737684477043228e-05, "loss": 1.8134, "step": 8290 }, { "epoch": 0.4621258569756424, "grad_norm": 0.5142967104911804, "learning_rate": 5.736803786023779e-05, "loss": 1.4841, "step": 8291 }, { "epoch": 0.46218159522880553, "grad_norm": 0.6403586864471436, "learning_rate": 5.7359230716372105e-05, "loss": 1.9146, "step": 8292 }, { "epoch": 0.46223733348196866, "grad_norm": 0.5327916145324707, "learning_rate": 5.735042333911452e-05, "loss": 1.6559, "step": 8293 }, { "epoch": 0.46229307173513184, "grad_norm": 0.5524441599845886, "learning_rate": 5.734161572874437e-05, "loss": 1.6659, "step": 8294 }, { "epoch": 0.462348809988295, "grad_norm": 0.5722818970680237, "learning_rate": 5.7332807885540976e-05, "loss": 1.7702, "step": 8295 }, { "epoch": 0.4624045482414581, "grad_norm": 0.5551111698150635, "learning_rate": 5.7323999809783656e-05, "loss": 1.6766, "step": 8296 }, { "epoch": 0.46246028649462123, "grad_norm": 0.5412301421165466, "learning_rate": 5.731519150175179e-05, "loss": 1.6475, "step": 8297 }, { "epoch": 0.4625160247477844, "grad_norm": 0.5476828813552856, "learning_rate": 5.730638296172467e-05, "loss": 1.643, "step": 8298 }, { "epoch": 0.46257176300094754, "grad_norm": 0.5418581366539001, "learning_rate": 5.7297574189981705e-05, "loss": 1.5904, "step": 8299 }, { "epoch": 0.4626275012541107, "grad_norm": 0.5094223022460938, "learning_rate": 5.7288765186802204e-05, "loss": 1.6782, "step": 8300 }, { "epoch": 0.46268323950727386, "grad_norm": 0.5535764694213867, "learning_rate": 5.72799559524656e-05, "loss": 1.7858, "step": 8301 }, { "epoch": 0.462738977760437, "grad_norm": 0.5554370284080505, "learning_rate": 5.7271146487251224e-05, "loss": 1.757, "step": 8302 }, { "epoch": 0.4627947160136001, "grad_norm": 0.5177475810050964, "learning_rate": 5.726233679143849e-05, "loss": 1.7816, "step": 8303 }, { "epoch": 0.4628504542667633, "grad_norm": 0.5340207815170288, "learning_rate": 5.725352686530676e-05, "loss": 1.742, "step": 8304 }, { "epoch": 0.46290619251992643, "grad_norm": 0.5540534257888794, "learning_rate": 5.724471670913545e-05, "loss": 1.7751, "step": 8305 }, { "epoch": 0.46296193077308956, "grad_norm": 0.539763331413269, "learning_rate": 5.7235906323203956e-05, "loss": 1.6988, "step": 8306 }, { "epoch": 0.46301766902625274, "grad_norm": 0.5649262070655823, "learning_rate": 5.7227095707791714e-05, "loss": 1.6722, "step": 8307 }, { "epoch": 0.46307340727941587, "grad_norm": 0.583903968334198, "learning_rate": 5.721828486317814e-05, "loss": 1.8056, "step": 8308 }, { "epoch": 0.463129145532579, "grad_norm": 0.5246012210845947, "learning_rate": 5.7209473789642644e-05, "loss": 1.4819, "step": 8309 }, { "epoch": 0.46318488378574213, "grad_norm": 0.5652540922164917, "learning_rate": 5.720066248746468e-05, "loss": 1.7022, "step": 8310 }, { "epoch": 0.4632406220389053, "grad_norm": 0.5494220852851868, "learning_rate": 5.7191850956923675e-05, "loss": 1.5258, "step": 8311 }, { "epoch": 0.46329636029206844, "grad_norm": 0.5923638343811035, "learning_rate": 5.7183039198299105e-05, "loss": 1.7439, "step": 8312 }, { "epoch": 0.46335209854523157, "grad_norm": 0.6051487922668457, "learning_rate": 5.717422721187039e-05, "loss": 1.8911, "step": 8313 }, { "epoch": 0.46340783679839476, "grad_norm": 0.5064337253570557, "learning_rate": 5.7165414997917045e-05, "loss": 1.6547, "step": 8314 }, { "epoch": 0.4634635750515579, "grad_norm": 0.6165828704833984, "learning_rate": 5.715660255671848e-05, "loss": 1.8988, "step": 8315 }, { "epoch": 0.463519313304721, "grad_norm": 0.5490414500236511, "learning_rate": 5.714778988855422e-05, "loss": 1.8075, "step": 8316 }, { "epoch": 0.4635750515578842, "grad_norm": 0.5493695139884949, "learning_rate": 5.713897699370376e-05, "loss": 1.6288, "step": 8317 }, { "epoch": 0.46363078981104733, "grad_norm": 0.5596882700920105, "learning_rate": 5.713016387244656e-05, "loss": 1.6575, "step": 8318 }, { "epoch": 0.46368652806421046, "grad_norm": 0.562776505947113, "learning_rate": 5.7121350525062126e-05, "loss": 1.7129, "step": 8319 }, { "epoch": 0.4637422663173736, "grad_norm": 0.6399055123329163, "learning_rate": 5.7112536951829975e-05, "loss": 1.7888, "step": 8320 }, { "epoch": 0.46379800457053677, "grad_norm": 0.5227872729301453, "learning_rate": 5.710372315302963e-05, "loss": 1.6324, "step": 8321 }, { "epoch": 0.4638537428236999, "grad_norm": 0.5664421319961548, "learning_rate": 5.70949091289406e-05, "loss": 1.5484, "step": 8322 }, { "epoch": 0.46390948107686303, "grad_norm": 0.5465877652168274, "learning_rate": 5.708609487984242e-05, "loss": 1.5863, "step": 8323 }, { "epoch": 0.4639652193300262, "grad_norm": 0.562119722366333, "learning_rate": 5.707728040601462e-05, "loss": 1.7411, "step": 8324 }, { "epoch": 0.46402095758318934, "grad_norm": 0.569681704044342, "learning_rate": 5.706846570773676e-05, "loss": 1.6488, "step": 8325 }, { "epoch": 0.46407669583635247, "grad_norm": 0.6219793558120728, "learning_rate": 5.7059650785288354e-05, "loss": 1.7995, "step": 8326 }, { "epoch": 0.46413243408951566, "grad_norm": 0.5750408172607422, "learning_rate": 5.705083563894902e-05, "loss": 1.8457, "step": 8327 }, { "epoch": 0.4641881723426788, "grad_norm": 0.5338056683540344, "learning_rate": 5.7042020268998265e-05, "loss": 1.665, "step": 8328 }, { "epoch": 0.4642439105958419, "grad_norm": 0.5091413259506226, "learning_rate": 5.703320467571569e-05, "loss": 1.5915, "step": 8329 }, { "epoch": 0.4642996488490051, "grad_norm": 0.567847490310669, "learning_rate": 5.7024388859380875e-05, "loss": 1.6417, "step": 8330 }, { "epoch": 0.4643553871021682, "grad_norm": 0.591010332107544, "learning_rate": 5.701557282027339e-05, "loss": 1.8457, "step": 8331 }, { "epoch": 0.46441112535533136, "grad_norm": 0.5327983498573303, "learning_rate": 5.700675655867285e-05, "loss": 1.6806, "step": 8332 }, { "epoch": 0.4644668636084945, "grad_norm": 0.5359470844268799, "learning_rate": 5.6997940074858835e-05, "loss": 1.5137, "step": 8333 }, { "epoch": 0.46452260186165767, "grad_norm": 0.5727723240852356, "learning_rate": 5.698912336911097e-05, "loss": 1.737, "step": 8334 }, { "epoch": 0.4645783401148208, "grad_norm": 0.5366725325584412, "learning_rate": 5.6980306441708854e-05, "loss": 1.5039, "step": 8335 }, { "epoch": 0.4646340783679839, "grad_norm": 0.5799429416656494, "learning_rate": 5.6971489292932126e-05, "loss": 1.7687, "step": 8336 }, { "epoch": 0.4646898166211471, "grad_norm": 0.6180622577667236, "learning_rate": 5.69626719230604e-05, "loss": 1.8375, "step": 8337 }, { "epoch": 0.46474555487431024, "grad_norm": 0.5698204636573792, "learning_rate": 5.6953854332373314e-05, "loss": 1.6076, "step": 8338 }, { "epoch": 0.46480129312747337, "grad_norm": 0.5486071109771729, "learning_rate": 5.6945036521150495e-05, "loss": 1.75, "step": 8339 }, { "epoch": 0.46485703138063655, "grad_norm": 0.5504134893417358, "learning_rate": 5.693621848967163e-05, "loss": 1.753, "step": 8340 }, { "epoch": 0.4649127696337997, "grad_norm": 0.5678994059562683, "learning_rate": 5.6927400238216354e-05, "loss": 1.845, "step": 8341 }, { "epoch": 0.4649685078869628, "grad_norm": 0.5259969234466553, "learning_rate": 5.6918581767064325e-05, "loss": 1.5699, "step": 8342 }, { "epoch": 0.46502424614012594, "grad_norm": 0.5243310928344727, "learning_rate": 5.690976307649523e-05, "loss": 1.5899, "step": 8343 }, { "epoch": 0.4650799843932891, "grad_norm": 0.5647771954536438, "learning_rate": 5.6900944166788725e-05, "loss": 1.7661, "step": 8344 }, { "epoch": 0.46513572264645225, "grad_norm": 0.6884542107582092, "learning_rate": 5.689212503822452e-05, "loss": 1.5225, "step": 8345 }, { "epoch": 0.4651914608996154, "grad_norm": 0.5403727889060974, "learning_rate": 5.688330569108228e-05, "loss": 1.5896, "step": 8346 }, { "epoch": 0.46524719915277857, "grad_norm": 0.5732728838920593, "learning_rate": 5.6874486125641726e-05, "loss": 1.5632, "step": 8347 }, { "epoch": 0.4653029374059417, "grad_norm": 0.5338377356529236, "learning_rate": 5.686566634218254e-05, "loss": 1.679, "step": 8348 }, { "epoch": 0.4653586756591048, "grad_norm": 0.6053128242492676, "learning_rate": 5.685684634098447e-05, "loss": 2.0888, "step": 8349 }, { "epoch": 0.465414413912268, "grad_norm": 0.5830248594284058, "learning_rate": 5.684802612232719e-05, "loss": 1.7972, "step": 8350 }, { "epoch": 0.46547015216543114, "grad_norm": 0.6264218688011169, "learning_rate": 5.683920568649047e-05, "loss": 1.8225, "step": 8351 }, { "epoch": 0.46552589041859427, "grad_norm": 0.6199706196784973, "learning_rate": 5.6830385033753995e-05, "loss": 1.6771, "step": 8352 }, { "epoch": 0.46558162867175745, "grad_norm": 0.5402054190635681, "learning_rate": 5.682156416439755e-05, "loss": 1.3349, "step": 8353 }, { "epoch": 0.4656373669249206, "grad_norm": 0.5562443733215332, "learning_rate": 5.681274307870085e-05, "loss": 1.606, "step": 8354 }, { "epoch": 0.4656931051780837, "grad_norm": 0.6087068915367126, "learning_rate": 5.680392177694366e-05, "loss": 1.7091, "step": 8355 }, { "epoch": 0.46574884343124684, "grad_norm": 0.5770891904830933, "learning_rate": 5.679510025940575e-05, "loss": 1.7989, "step": 8356 }, { "epoch": 0.46580458168441, "grad_norm": 0.5513335466384888, "learning_rate": 5.6786278526366875e-05, "loss": 1.5115, "step": 8357 }, { "epoch": 0.46586031993757315, "grad_norm": 0.5334859490394592, "learning_rate": 5.677745657810681e-05, "loss": 1.5391, "step": 8358 }, { "epoch": 0.4659160581907363, "grad_norm": 0.51854008436203, "learning_rate": 5.6768634414905344e-05, "loss": 1.4878, "step": 8359 }, { "epoch": 0.46597179644389947, "grad_norm": 0.5759007930755615, "learning_rate": 5.675981203704226e-05, "loss": 1.7812, "step": 8360 }, { "epoch": 0.4660275346970626, "grad_norm": 0.5255948305130005, "learning_rate": 5.675098944479733e-05, "loss": 1.6782, "step": 8361 }, { "epoch": 0.4660832729502257, "grad_norm": 0.5190218091011047, "learning_rate": 5.67421666384504e-05, "loss": 1.4408, "step": 8362 }, { "epoch": 0.4661390112033889, "grad_norm": 0.5538722276687622, "learning_rate": 5.673334361828124e-05, "loss": 1.6993, "step": 8363 }, { "epoch": 0.46619474945655204, "grad_norm": 0.5251713991165161, "learning_rate": 5.672452038456969e-05, "loss": 1.5929, "step": 8364 }, { "epoch": 0.46625048770971517, "grad_norm": 0.5203914642333984, "learning_rate": 5.671569693759554e-05, "loss": 1.5579, "step": 8365 }, { "epoch": 0.4663062259628783, "grad_norm": 0.4919300675392151, "learning_rate": 5.670687327763866e-05, "loss": 1.5625, "step": 8366 }, { "epoch": 0.4663619642160415, "grad_norm": 0.5500087141990662, "learning_rate": 5.6698049404978845e-05, "loss": 1.6695, "step": 8367 }, { "epoch": 0.4664177024692046, "grad_norm": 0.5846395492553711, "learning_rate": 5.6689225319895966e-05, "loss": 1.884, "step": 8368 }, { "epoch": 0.46647344072236774, "grad_norm": 0.5971377491950989, "learning_rate": 5.668040102266987e-05, "loss": 1.9091, "step": 8369 }, { "epoch": 0.4665291789755309, "grad_norm": 0.5873506665229797, "learning_rate": 5.6671576513580385e-05, "loss": 1.7085, "step": 8370 }, { "epoch": 0.46658491722869405, "grad_norm": 0.551792323589325, "learning_rate": 5.66627517929074e-05, "loss": 1.5626, "step": 8371 }, { "epoch": 0.4666406554818572, "grad_norm": 0.5586331486701965, "learning_rate": 5.665392686093076e-05, "loss": 1.7621, "step": 8372 }, { "epoch": 0.46669639373502037, "grad_norm": 0.6477528810501099, "learning_rate": 5.664510171793038e-05, "loss": 1.9983, "step": 8373 }, { "epoch": 0.4667521319881835, "grad_norm": 0.5568731427192688, "learning_rate": 5.6636276364186105e-05, "loss": 1.5046, "step": 8374 }, { "epoch": 0.4668078702413466, "grad_norm": 0.5492534637451172, "learning_rate": 5.6627450799977844e-05, "loss": 1.6931, "step": 8375 }, { "epoch": 0.4668636084945098, "grad_norm": 0.5230808854103088, "learning_rate": 5.661862502558547e-05, "loss": 1.5232, "step": 8376 }, { "epoch": 0.46691934674767294, "grad_norm": 0.5762078762054443, "learning_rate": 5.660979904128891e-05, "loss": 1.8327, "step": 8377 }, { "epoch": 0.46697508500083607, "grad_norm": 0.5496635437011719, "learning_rate": 5.660097284736805e-05, "loss": 1.5354, "step": 8378 }, { "epoch": 0.4670308232539992, "grad_norm": 0.5177884101867676, "learning_rate": 5.6592146444102826e-05, "loss": 1.4303, "step": 8379 }, { "epoch": 0.4670865615071624, "grad_norm": 0.6022128462791443, "learning_rate": 5.658331983177315e-05, "loss": 1.9321, "step": 8380 }, { "epoch": 0.4671422997603255, "grad_norm": 0.5913931131362915, "learning_rate": 5.657449301065895e-05, "loss": 1.9125, "step": 8381 }, { "epoch": 0.46719803801348864, "grad_norm": 0.4976262152194977, "learning_rate": 5.656566598104017e-05, "loss": 1.6072, "step": 8382 }, { "epoch": 0.4672537762666518, "grad_norm": 0.5472914576530457, "learning_rate": 5.655683874319675e-05, "loss": 1.719, "step": 8383 }, { "epoch": 0.46730951451981495, "grad_norm": 0.5451732277870178, "learning_rate": 5.6548011297408634e-05, "loss": 1.6492, "step": 8384 }, { "epoch": 0.4673652527729781, "grad_norm": 0.5876046419143677, "learning_rate": 5.653918364395575e-05, "loss": 1.7208, "step": 8385 }, { "epoch": 0.46742099102614126, "grad_norm": 0.5409192442893982, "learning_rate": 5.653035578311812e-05, "loss": 1.6186, "step": 8386 }, { "epoch": 0.4674767292793044, "grad_norm": 0.5066797733306885, "learning_rate": 5.652152771517566e-05, "loss": 1.2929, "step": 8387 }, { "epoch": 0.4675324675324675, "grad_norm": 0.5531768202781677, "learning_rate": 5.651269944040838e-05, "loss": 1.7447, "step": 8388 }, { "epoch": 0.46758820578563065, "grad_norm": 0.5745431780815125, "learning_rate": 5.650387095909623e-05, "loss": 1.7896, "step": 8389 }, { "epoch": 0.46764394403879384, "grad_norm": 0.5450076460838318, "learning_rate": 5.649504227151922e-05, "loss": 1.5537, "step": 8390 }, { "epoch": 0.46769968229195696, "grad_norm": 0.5614714622497559, "learning_rate": 5.648621337795733e-05, "loss": 1.5894, "step": 8391 }, { "epoch": 0.4677554205451201, "grad_norm": 0.6122470498085022, "learning_rate": 5.647738427869058e-05, "loss": 1.8336, "step": 8392 }, { "epoch": 0.4678111587982833, "grad_norm": 0.598466157913208, "learning_rate": 5.6468554973998955e-05, "loss": 1.799, "step": 8393 }, { "epoch": 0.4678668970514464, "grad_norm": 0.5752211213111877, "learning_rate": 5.645972546416248e-05, "loss": 1.7678, "step": 8394 }, { "epoch": 0.46792263530460954, "grad_norm": 0.5438199043273926, "learning_rate": 5.6450895749461194e-05, "loss": 1.6982, "step": 8395 }, { "epoch": 0.4679783735577727, "grad_norm": 0.5414747595787048, "learning_rate": 5.64420658301751e-05, "loss": 1.5794, "step": 8396 }, { "epoch": 0.46803411181093585, "grad_norm": 0.5446813702583313, "learning_rate": 5.643323570658424e-05, "loss": 1.4545, "step": 8397 }, { "epoch": 0.468089850064099, "grad_norm": 0.5998760461807251, "learning_rate": 5.642440537896863e-05, "loss": 1.6886, "step": 8398 }, { "epoch": 0.46814558831726216, "grad_norm": 0.5757097005844116, "learning_rate": 5.6415574847608365e-05, "loss": 1.6932, "step": 8399 }, { "epoch": 0.4682013265704253, "grad_norm": 0.5681119561195374, "learning_rate": 5.640674411278345e-05, "loss": 1.6357, "step": 8400 }, { "epoch": 0.4682570648235884, "grad_norm": 0.5782068371772766, "learning_rate": 5.6397913174773986e-05, "loss": 1.4748, "step": 8401 }, { "epoch": 0.46831280307675155, "grad_norm": 0.5838581323623657, "learning_rate": 5.638908203386001e-05, "loss": 1.6619, "step": 8402 }, { "epoch": 0.46836854132991473, "grad_norm": 0.5535818934440613, "learning_rate": 5.638025069032159e-05, "loss": 1.7486, "step": 8403 }, { "epoch": 0.46842427958307786, "grad_norm": 0.5350418090820312, "learning_rate": 5.637141914443883e-05, "loss": 1.6243, "step": 8404 }, { "epoch": 0.468480017836241, "grad_norm": 0.5376988053321838, "learning_rate": 5.6362587396491805e-05, "loss": 1.6984, "step": 8405 }, { "epoch": 0.4685357560894042, "grad_norm": 0.593912661075592, "learning_rate": 5.63537554467606e-05, "loss": 1.6001, "step": 8406 }, { "epoch": 0.4685914943425673, "grad_norm": 0.5185176730155945, "learning_rate": 5.634492329552531e-05, "loss": 1.4702, "step": 8407 }, { "epoch": 0.46864723259573043, "grad_norm": 0.5814734101295471, "learning_rate": 5.6336090943066063e-05, "loss": 1.8799, "step": 8408 }, { "epoch": 0.4687029708488936, "grad_norm": 0.5562795400619507, "learning_rate": 5.632725838966294e-05, "loss": 1.7107, "step": 8409 }, { "epoch": 0.46875870910205675, "grad_norm": 0.5342075824737549, "learning_rate": 5.631842563559608e-05, "loss": 1.6502, "step": 8410 }, { "epoch": 0.4688144473552199, "grad_norm": 0.5376294255256653, "learning_rate": 5.630959268114558e-05, "loss": 1.6374, "step": 8411 }, { "epoch": 0.46887018560838306, "grad_norm": 0.5461024641990662, "learning_rate": 5.630075952659162e-05, "loss": 1.7209, "step": 8412 }, { "epoch": 0.4689259238615462, "grad_norm": 0.5888074040412903, "learning_rate": 5.629192617221427e-05, "loss": 1.7923, "step": 8413 }, { "epoch": 0.4689816621147093, "grad_norm": 0.5504298210144043, "learning_rate": 5.6283092618293734e-05, "loss": 1.6201, "step": 8414 }, { "epoch": 0.46903740036787245, "grad_norm": 0.5408875942230225, "learning_rate": 5.627425886511012e-05, "loss": 1.5646, "step": 8415 }, { "epoch": 0.46909313862103563, "grad_norm": 0.5847890377044678, "learning_rate": 5.626542491294359e-05, "loss": 1.7076, "step": 8416 }, { "epoch": 0.46914887687419876, "grad_norm": 0.5354915261268616, "learning_rate": 5.6256590762074315e-05, "loss": 1.5801, "step": 8417 }, { "epoch": 0.4692046151273619, "grad_norm": 0.5805383324623108, "learning_rate": 5.624775641278247e-05, "loss": 1.8075, "step": 8418 }, { "epoch": 0.4692603533805251, "grad_norm": 0.5791111588478088, "learning_rate": 5.6238921865348204e-05, "loss": 1.8437, "step": 8419 }, { "epoch": 0.4693160916336882, "grad_norm": 0.5863295793533325, "learning_rate": 5.623008712005172e-05, "loss": 1.7371, "step": 8420 }, { "epoch": 0.46937182988685133, "grad_norm": 0.5539514422416687, "learning_rate": 5.62212521771732e-05, "loss": 1.646, "step": 8421 }, { "epoch": 0.4694275681400145, "grad_norm": 0.5049216151237488, "learning_rate": 5.6212417036992826e-05, "loss": 1.447, "step": 8422 }, { "epoch": 0.46948330639317765, "grad_norm": 0.5240146517753601, "learning_rate": 5.620358169979082e-05, "loss": 1.729, "step": 8423 }, { "epoch": 0.4695390446463408, "grad_norm": 0.5284691452980042, "learning_rate": 5.619474616584734e-05, "loss": 1.5096, "step": 8424 }, { "epoch": 0.4695947828995039, "grad_norm": 0.5499683618545532, "learning_rate": 5.618591043544266e-05, "loss": 1.5803, "step": 8425 }, { "epoch": 0.4696505211526671, "grad_norm": 0.588737964630127, "learning_rate": 5.617707450885695e-05, "loss": 1.6776, "step": 8426 }, { "epoch": 0.4697062594058302, "grad_norm": 0.5827232599258423, "learning_rate": 5.6168238386370466e-05, "loss": 1.6402, "step": 8427 }, { "epoch": 0.46976199765899335, "grad_norm": 0.5729832649230957, "learning_rate": 5.615940206826341e-05, "loss": 1.7642, "step": 8428 }, { "epoch": 0.46981773591215653, "grad_norm": 0.5644805431365967, "learning_rate": 5.6150565554816035e-05, "loss": 1.7081, "step": 8429 }, { "epoch": 0.46987347416531966, "grad_norm": 0.5413994193077087, "learning_rate": 5.6141728846308586e-05, "loss": 1.7756, "step": 8430 }, { "epoch": 0.4699292124184828, "grad_norm": 0.5305155515670776, "learning_rate": 5.6132891943021304e-05, "loss": 1.5193, "step": 8431 }, { "epoch": 0.469984950671646, "grad_norm": 0.5325213074684143, "learning_rate": 5.612405484523444e-05, "loss": 1.5169, "step": 8432 }, { "epoch": 0.4700406889248091, "grad_norm": 0.5783179998397827, "learning_rate": 5.6115217553228274e-05, "loss": 1.6159, "step": 8433 }, { "epoch": 0.47009642717797223, "grad_norm": 0.5537718534469604, "learning_rate": 5.610638006728306e-05, "loss": 1.6027, "step": 8434 }, { "epoch": 0.4701521654311354, "grad_norm": 0.6395325660705566, "learning_rate": 5.609754238767907e-05, "loss": 1.3854, "step": 8435 }, { "epoch": 0.47020790368429854, "grad_norm": 0.5301234126091003, "learning_rate": 5.608870451469659e-05, "loss": 1.6888, "step": 8436 }, { "epoch": 0.4702636419374617, "grad_norm": 0.5246771574020386, "learning_rate": 5.607986644861588e-05, "loss": 1.5963, "step": 8437 }, { "epoch": 0.4703193801906248, "grad_norm": 0.5331987738609314, "learning_rate": 5.607102818971729e-05, "loss": 1.7791, "step": 8438 }, { "epoch": 0.470375118443788, "grad_norm": 0.5587426424026489, "learning_rate": 5.6062189738281056e-05, "loss": 1.744, "step": 8439 }, { "epoch": 0.4704308566969511, "grad_norm": 0.5236651301383972, "learning_rate": 5.6053351094587526e-05, "loss": 1.4963, "step": 8440 }, { "epoch": 0.47048659495011425, "grad_norm": 0.5496351718902588, "learning_rate": 5.604451225891698e-05, "loss": 1.491, "step": 8441 }, { "epoch": 0.47054233320327743, "grad_norm": 0.5666020512580872, "learning_rate": 5.603567323154975e-05, "loss": 1.6241, "step": 8442 }, { "epoch": 0.47059807145644056, "grad_norm": 0.5503633618354797, "learning_rate": 5.602683401276615e-05, "loss": 1.6522, "step": 8443 }, { "epoch": 0.4706538097096037, "grad_norm": 0.5833953022956848, "learning_rate": 5.601799460284654e-05, "loss": 1.7361, "step": 8444 }, { "epoch": 0.47070954796276687, "grad_norm": 0.5664584636688232, "learning_rate": 5.60091550020712e-05, "loss": 1.6558, "step": 8445 }, { "epoch": 0.47076528621593, "grad_norm": 0.5645166635513306, "learning_rate": 5.60003152107205e-05, "loss": 1.7492, "step": 8446 }, { "epoch": 0.47082102446909313, "grad_norm": 0.5689491629600525, "learning_rate": 5.599147522907481e-05, "loss": 1.6956, "step": 8447 }, { "epoch": 0.47087676272225626, "grad_norm": 0.6192054152488708, "learning_rate": 5.598263505741443e-05, "loss": 1.5153, "step": 8448 }, { "epoch": 0.47093250097541944, "grad_norm": 0.5669271945953369, "learning_rate": 5.597379469601978e-05, "loss": 1.5719, "step": 8449 }, { "epoch": 0.4709882392285826, "grad_norm": 0.5729002952575684, "learning_rate": 5.5964954145171145e-05, "loss": 1.7169, "step": 8450 }, { "epoch": 0.4710439774817457, "grad_norm": 0.532015323638916, "learning_rate": 5.595611340514898e-05, "loss": 1.6197, "step": 8451 }, { "epoch": 0.4710997157349089, "grad_norm": 0.5148784518241882, "learning_rate": 5.594727247623361e-05, "loss": 1.611, "step": 8452 }, { "epoch": 0.471155453988072, "grad_norm": 0.5674019455909729, "learning_rate": 5.593843135870545e-05, "loss": 1.6694, "step": 8453 }, { "epoch": 0.47121119224123514, "grad_norm": 0.5392388701438904, "learning_rate": 5.592959005284485e-05, "loss": 1.5342, "step": 8454 }, { "epoch": 0.47126693049439833, "grad_norm": 0.5939937829971313, "learning_rate": 5.592074855893223e-05, "loss": 1.7698, "step": 8455 }, { "epoch": 0.47132266874756146, "grad_norm": 0.603952169418335, "learning_rate": 5.591190687724799e-05, "loss": 1.885, "step": 8456 }, { "epoch": 0.4713784070007246, "grad_norm": 0.5169516801834106, "learning_rate": 5.590306500807253e-05, "loss": 1.4436, "step": 8457 }, { "epoch": 0.47143414525388777, "grad_norm": 0.5573791265487671, "learning_rate": 5.589422295168626e-05, "loss": 1.6708, "step": 8458 }, { "epoch": 0.4714898835070509, "grad_norm": 0.5594834685325623, "learning_rate": 5.5885380708369606e-05, "loss": 1.6496, "step": 8459 }, { "epoch": 0.47154562176021403, "grad_norm": 0.5771753787994385, "learning_rate": 5.5876538278403e-05, "loss": 1.7612, "step": 8460 }, { "epoch": 0.47160136001337716, "grad_norm": 0.5862414240837097, "learning_rate": 5.586769566206686e-05, "loss": 1.9365, "step": 8461 }, { "epoch": 0.47165709826654034, "grad_norm": 0.5807836055755615, "learning_rate": 5.585885285964163e-05, "loss": 1.623, "step": 8462 }, { "epoch": 0.47171283651970347, "grad_norm": 0.5933867692947388, "learning_rate": 5.5850009871407716e-05, "loss": 1.8284, "step": 8463 }, { "epoch": 0.4717685747728666, "grad_norm": 0.5377753973007202, "learning_rate": 5.584116669764563e-05, "loss": 1.462, "step": 8464 }, { "epoch": 0.4718243130260298, "grad_norm": 0.5384745597839355, "learning_rate": 5.583232333863577e-05, "loss": 1.5878, "step": 8465 }, { "epoch": 0.4718800512791929, "grad_norm": 0.5296236872673035, "learning_rate": 5.582347979465864e-05, "loss": 1.6045, "step": 8466 }, { "epoch": 0.47193578953235604, "grad_norm": 0.6247029304504395, "learning_rate": 5.581463606599467e-05, "loss": 1.6802, "step": 8467 }, { "epoch": 0.4719915277855192, "grad_norm": 0.5652837157249451, "learning_rate": 5.580579215292435e-05, "loss": 1.6555, "step": 8468 }, { "epoch": 0.47204726603868236, "grad_norm": 0.5700575709342957, "learning_rate": 5.5796948055728147e-05, "loss": 1.8245, "step": 8469 }, { "epoch": 0.4721030042918455, "grad_norm": 0.5366250276565552, "learning_rate": 5.578810377468656e-05, "loss": 1.8156, "step": 8470 }, { "epoch": 0.4721587425450086, "grad_norm": 0.5650043487548828, "learning_rate": 5.577925931008007e-05, "loss": 1.6757, "step": 8471 }, { "epoch": 0.4722144807981718, "grad_norm": 0.5967742204666138, "learning_rate": 5.577041466218915e-05, "loss": 1.939, "step": 8472 }, { "epoch": 0.4722702190513349, "grad_norm": 0.5320480465888977, "learning_rate": 5.576156983129435e-05, "loss": 1.5016, "step": 8473 }, { "epoch": 0.47232595730449806, "grad_norm": 0.5365233421325684, "learning_rate": 5.5752724817676125e-05, "loss": 1.5794, "step": 8474 }, { "epoch": 0.47238169555766124, "grad_norm": 0.5704277753829956, "learning_rate": 5.5743879621615026e-05, "loss": 1.5467, "step": 8475 }, { "epoch": 0.47243743381082437, "grad_norm": 0.5679128170013428, "learning_rate": 5.5735034243391537e-05, "loss": 1.6893, "step": 8476 }, { "epoch": 0.4724931720639875, "grad_norm": 0.5593464970588684, "learning_rate": 5.572618868328621e-05, "loss": 1.6293, "step": 8477 }, { "epoch": 0.4725489103171507, "grad_norm": 0.527761697769165, "learning_rate": 5.5717342941579555e-05, "loss": 1.6616, "step": 8478 }, { "epoch": 0.4726046485703138, "grad_norm": 0.5714175701141357, "learning_rate": 5.570849701855213e-05, "loss": 1.7797, "step": 8479 }, { "epoch": 0.47266038682347694, "grad_norm": 0.5801485180854797, "learning_rate": 5.569965091448446e-05, "loss": 1.6934, "step": 8480 }, { "epoch": 0.4727161250766401, "grad_norm": 0.6128066778182983, "learning_rate": 5.5690804629657076e-05, "loss": 1.8593, "step": 8481 }, { "epoch": 0.47277186332980325, "grad_norm": 0.6358544230461121, "learning_rate": 5.568195816435057e-05, "loss": 1.8292, "step": 8482 }, { "epoch": 0.4728276015829664, "grad_norm": 0.5209305882453918, "learning_rate": 5.567311151884547e-05, "loss": 1.6183, "step": 8483 }, { "epoch": 0.4728833398361295, "grad_norm": 0.5640316605567932, "learning_rate": 5.566426469342235e-05, "loss": 1.7618, "step": 8484 }, { "epoch": 0.4729390780892927, "grad_norm": 0.5284755825996399, "learning_rate": 5.565541768836178e-05, "loss": 1.6473, "step": 8485 }, { "epoch": 0.4729948163424558, "grad_norm": 0.5737931728363037, "learning_rate": 5.564657050394434e-05, "loss": 1.9419, "step": 8486 }, { "epoch": 0.47305055459561896, "grad_norm": 0.5647780299186707, "learning_rate": 5.563772314045059e-05, "loss": 1.6413, "step": 8487 }, { "epoch": 0.47310629284878214, "grad_norm": 0.5379336476325989, "learning_rate": 5.562887559816116e-05, "loss": 1.5344, "step": 8488 }, { "epoch": 0.47316203110194527, "grad_norm": 0.5728521943092346, "learning_rate": 5.562002787735657e-05, "loss": 1.6937, "step": 8489 }, { "epoch": 0.4732177693551084, "grad_norm": 0.5722839832305908, "learning_rate": 5.561117997831751e-05, "loss": 1.6869, "step": 8490 }, { "epoch": 0.4732735076082716, "grad_norm": 0.5436987280845642, "learning_rate": 5.56023319013245e-05, "loss": 1.3939, "step": 8491 }, { "epoch": 0.4733292458614347, "grad_norm": 0.5408251285552979, "learning_rate": 5.559348364665822e-05, "loss": 1.5309, "step": 8492 }, { "epoch": 0.47338498411459784, "grad_norm": 0.5417353510856628, "learning_rate": 5.5584635214599225e-05, "loss": 1.5592, "step": 8493 }, { "epoch": 0.47344072236776097, "grad_norm": 0.5821628570556641, "learning_rate": 5.557578660542816e-05, "loss": 1.5603, "step": 8494 }, { "epoch": 0.47349646062092415, "grad_norm": 0.5318421721458435, "learning_rate": 5.5566937819425656e-05, "loss": 1.5251, "step": 8495 }, { "epoch": 0.4735521988740873, "grad_norm": 0.5154527425765991, "learning_rate": 5.5558088856872346e-05, "loss": 1.572, "step": 8496 }, { "epoch": 0.4736079371272504, "grad_norm": 0.5686662197113037, "learning_rate": 5.554923971804887e-05, "loss": 1.5153, "step": 8497 }, { "epoch": 0.4736636753804136, "grad_norm": 0.5712747573852539, "learning_rate": 5.554039040323586e-05, "loss": 1.7534, "step": 8498 }, { "epoch": 0.4737194136335767, "grad_norm": 0.5434257388114929, "learning_rate": 5.5531540912713974e-05, "loss": 1.6791, "step": 8499 }, { "epoch": 0.47377515188673985, "grad_norm": 0.5522347092628479, "learning_rate": 5.552269124676386e-05, "loss": 1.7779, "step": 8500 }, { "epoch": 0.47383089013990304, "grad_norm": 0.5155788064002991, "learning_rate": 5.551384140566618e-05, "loss": 1.4377, "step": 8501 }, { "epoch": 0.47388662839306617, "grad_norm": 0.5739377737045288, "learning_rate": 5.550499138970158e-05, "loss": 1.8262, "step": 8502 }, { "epoch": 0.4739423666462293, "grad_norm": 0.5527716875076294, "learning_rate": 5.5496141199150766e-05, "loss": 1.3705, "step": 8503 }, { "epoch": 0.4739981048993925, "grad_norm": 0.5810341238975525, "learning_rate": 5.548729083429439e-05, "loss": 1.7927, "step": 8504 }, { "epoch": 0.4740538431525556, "grad_norm": 0.5541203618049622, "learning_rate": 5.547844029541316e-05, "loss": 1.7237, "step": 8505 }, { "epoch": 0.47410958140571874, "grad_norm": 0.5816789865493774, "learning_rate": 5.546958958278773e-05, "loss": 1.6761, "step": 8506 }, { "epoch": 0.47416531965888187, "grad_norm": 0.5344805121421814, "learning_rate": 5.546073869669881e-05, "loss": 1.7347, "step": 8507 }, { "epoch": 0.47422105791204505, "grad_norm": 0.5249469876289368, "learning_rate": 5.5451887637427104e-05, "loss": 1.5048, "step": 8508 }, { "epoch": 0.4742767961652082, "grad_norm": 0.5707089900970459, "learning_rate": 5.544303640525328e-05, "loss": 1.811, "step": 8509 }, { "epoch": 0.4743325344183713, "grad_norm": 0.5320430397987366, "learning_rate": 5.5434185000458114e-05, "loss": 1.7104, "step": 8510 }, { "epoch": 0.4743882726715345, "grad_norm": 0.5608380436897278, "learning_rate": 5.5425333423322255e-05, "loss": 1.7893, "step": 8511 }, { "epoch": 0.4744440109246976, "grad_norm": 0.5271068811416626, "learning_rate": 5.5416481674126474e-05, "loss": 1.7735, "step": 8512 }, { "epoch": 0.47449974917786075, "grad_norm": 0.5395051836967468, "learning_rate": 5.540762975315147e-05, "loss": 1.7249, "step": 8513 }, { "epoch": 0.47455548743102394, "grad_norm": 0.5892390012741089, "learning_rate": 5.539877766067798e-05, "loss": 1.7148, "step": 8514 }, { "epoch": 0.47461122568418707, "grad_norm": 0.5333415269851685, "learning_rate": 5.538992539698672e-05, "loss": 1.6184, "step": 8515 }, { "epoch": 0.4746669639373502, "grad_norm": 0.6480614542961121, "learning_rate": 5.538107296235847e-05, "loss": 1.6898, "step": 8516 }, { "epoch": 0.4747227021905133, "grad_norm": 0.5696564316749573, "learning_rate": 5.5372220357073955e-05, "loss": 1.7039, "step": 8517 }, { "epoch": 0.4747784404436765, "grad_norm": 0.5047008991241455, "learning_rate": 5.536336758141394e-05, "loss": 1.5221, "step": 8518 }, { "epoch": 0.47483417869683964, "grad_norm": 0.6112247705459595, "learning_rate": 5.535451463565916e-05, "loss": 1.7282, "step": 8519 }, { "epoch": 0.47488991695000277, "grad_norm": 0.5554122924804688, "learning_rate": 5.5345661520090394e-05, "loss": 1.6662, "step": 8520 }, { "epoch": 0.47494565520316595, "grad_norm": 0.5461030602455139, "learning_rate": 5.533680823498844e-05, "loss": 1.6679, "step": 8521 }, { "epoch": 0.4750013934563291, "grad_norm": 0.5860038995742798, "learning_rate": 5.5327954780634004e-05, "loss": 1.769, "step": 8522 }, { "epoch": 0.4750571317094922, "grad_norm": 0.6236945390701294, "learning_rate": 5.531910115730794e-05, "loss": 1.9089, "step": 8523 }, { "epoch": 0.4751128699626554, "grad_norm": 0.545220673084259, "learning_rate": 5.531024736529099e-05, "loss": 1.7743, "step": 8524 }, { "epoch": 0.4751686082158185, "grad_norm": 0.6534609198570251, "learning_rate": 5.5301393404863954e-05, "loss": 1.9673, "step": 8525 }, { "epoch": 0.47522434646898165, "grad_norm": 0.5649281740188599, "learning_rate": 5.529253927630762e-05, "loss": 1.6666, "step": 8526 }, { "epoch": 0.47528008472214484, "grad_norm": 0.5315033197402954, "learning_rate": 5.5283684979902815e-05, "loss": 1.678, "step": 8527 }, { "epoch": 0.47533582297530796, "grad_norm": 0.5951296091079712, "learning_rate": 5.5274830515930306e-05, "loss": 1.6429, "step": 8528 }, { "epoch": 0.4753915612284711, "grad_norm": 0.5288706421852112, "learning_rate": 5.526597588467095e-05, "loss": 1.65, "step": 8529 }, { "epoch": 0.4754472994816342, "grad_norm": 0.5894261002540588, "learning_rate": 5.525712108640553e-05, "loss": 1.6486, "step": 8530 }, { "epoch": 0.4755030377347974, "grad_norm": 0.5475479960441589, "learning_rate": 5.524826612141488e-05, "loss": 1.5981, "step": 8531 }, { "epoch": 0.47555877598796054, "grad_norm": 0.5496692657470703, "learning_rate": 5.523941098997983e-05, "loss": 1.6958, "step": 8532 }, { "epoch": 0.47561451424112366, "grad_norm": 0.6038063168525696, "learning_rate": 5.5230555692381214e-05, "loss": 1.7152, "step": 8533 }, { "epoch": 0.47567025249428685, "grad_norm": 0.5410369038581848, "learning_rate": 5.5221700228899866e-05, "loss": 1.5163, "step": 8534 }, { "epoch": 0.47572599074745, "grad_norm": 0.5673332214355469, "learning_rate": 5.521284459981662e-05, "loss": 1.6854, "step": 8535 }, { "epoch": 0.4757817290006131, "grad_norm": 0.5714686512947083, "learning_rate": 5.520398880541235e-05, "loss": 1.6205, "step": 8536 }, { "epoch": 0.4758374672537763, "grad_norm": 0.6370970606803894, "learning_rate": 5.519513284596789e-05, "loss": 1.8303, "step": 8537 }, { "epoch": 0.4758932055069394, "grad_norm": 0.5482840538024902, "learning_rate": 5.518627672176412e-05, "loss": 1.5506, "step": 8538 }, { "epoch": 0.47594894376010255, "grad_norm": 0.5282999277114868, "learning_rate": 5.5177420433081874e-05, "loss": 1.2786, "step": 8539 }, { "epoch": 0.4760046820132657, "grad_norm": 0.5575840473175049, "learning_rate": 5.516856398020205e-05, "loss": 1.5573, "step": 8540 }, { "epoch": 0.47606042026642886, "grad_norm": 0.5926665663719177, "learning_rate": 5.5159707363405485e-05, "loss": 1.7721, "step": 8541 }, { "epoch": 0.476116158519592, "grad_norm": 0.5172202587127686, "learning_rate": 5.515085058297313e-05, "loss": 1.4076, "step": 8542 }, { "epoch": 0.4761718967727551, "grad_norm": 0.581986665725708, "learning_rate": 5.514199363918578e-05, "loss": 1.7104, "step": 8543 }, { "epoch": 0.4762276350259183, "grad_norm": 0.5978564023971558, "learning_rate": 5.51331365323244e-05, "loss": 1.8326, "step": 8544 }, { "epoch": 0.47628337327908143, "grad_norm": 0.5649850368499756, "learning_rate": 5.5124279262669856e-05, "loss": 1.6206, "step": 8545 }, { "epoch": 0.47633911153224456, "grad_norm": 0.6205348372459412, "learning_rate": 5.511542183050305e-05, "loss": 1.7466, "step": 8546 }, { "epoch": 0.47639484978540775, "grad_norm": 0.5095716714859009, "learning_rate": 5.5106564236104884e-05, "loss": 1.5614, "step": 8547 }, { "epoch": 0.4764505880385709, "grad_norm": 0.5600999593734741, "learning_rate": 5.509770647975626e-05, "loss": 1.825, "step": 8548 }, { "epoch": 0.476506326291734, "grad_norm": 0.5659551620483398, "learning_rate": 5.508884856173813e-05, "loss": 1.8289, "step": 8549 }, { "epoch": 0.4765620645448972, "grad_norm": 0.524356484413147, "learning_rate": 5.507999048233138e-05, "loss": 1.591, "step": 8550 }, { "epoch": 0.4766178027980603, "grad_norm": 0.5709447860717773, "learning_rate": 5.507113224181696e-05, "loss": 1.6152, "step": 8551 }, { "epoch": 0.47667354105122345, "grad_norm": 0.5852453112602234, "learning_rate": 5.506227384047579e-05, "loss": 1.7522, "step": 8552 }, { "epoch": 0.4767292793043866, "grad_norm": 0.6322617530822754, "learning_rate": 5.50534152785888e-05, "loss": 1.8002, "step": 8553 }, { "epoch": 0.47678501755754976, "grad_norm": 0.6037564277648926, "learning_rate": 5.504455655643694e-05, "loss": 1.7472, "step": 8554 }, { "epoch": 0.4768407558107129, "grad_norm": 0.6172270774841309, "learning_rate": 5.503569767430118e-05, "loss": 1.7638, "step": 8555 }, { "epoch": 0.476896494063876, "grad_norm": 0.5917114615440369, "learning_rate": 5.502683863246243e-05, "loss": 1.7726, "step": 8556 }, { "epoch": 0.4769522323170392, "grad_norm": 0.5618294477462769, "learning_rate": 5.5017979431201675e-05, "loss": 1.5519, "step": 8557 }, { "epoch": 0.47700797057020233, "grad_norm": 0.5710815191268921, "learning_rate": 5.500912007079987e-05, "loss": 1.6896, "step": 8558 }, { "epoch": 0.47706370882336546, "grad_norm": 0.5609897971153259, "learning_rate": 5.5000260551537975e-05, "loss": 1.7455, "step": 8559 }, { "epoch": 0.47711944707652865, "grad_norm": 0.5565608739852905, "learning_rate": 5.499140087369697e-05, "loss": 1.5399, "step": 8560 }, { "epoch": 0.4771751853296918, "grad_norm": 0.5751162767410278, "learning_rate": 5.4982541037557823e-05, "loss": 1.5373, "step": 8561 }, { "epoch": 0.4772309235828549, "grad_norm": 0.5089201927185059, "learning_rate": 5.4973681043401534e-05, "loss": 1.2027, "step": 8562 }, { "epoch": 0.47728666183601803, "grad_norm": 0.5925856232643127, "learning_rate": 5.496482089150908e-05, "loss": 1.9377, "step": 8563 }, { "epoch": 0.4773424000891812, "grad_norm": 0.5660269260406494, "learning_rate": 5.495596058216147e-05, "loss": 1.4814, "step": 8564 }, { "epoch": 0.47739813834234435, "grad_norm": 0.5554754734039307, "learning_rate": 5.494710011563966e-05, "loss": 1.6303, "step": 8565 }, { "epoch": 0.4774538765955075, "grad_norm": 0.6004930138587952, "learning_rate": 5.49382394922247e-05, "loss": 1.6204, "step": 8566 }, { "epoch": 0.47750961484867066, "grad_norm": 0.5308135747909546, "learning_rate": 5.4929378712197556e-05, "loss": 1.5949, "step": 8567 }, { "epoch": 0.4775653531018338, "grad_norm": 0.5763102769851685, "learning_rate": 5.4920517775839276e-05, "loss": 1.7625, "step": 8568 }, { "epoch": 0.4776210913549969, "grad_norm": 0.572308361530304, "learning_rate": 5.491165668343085e-05, "loss": 1.7809, "step": 8569 }, { "epoch": 0.4776768296081601, "grad_norm": 0.6404359340667725, "learning_rate": 5.4902795435253306e-05, "loss": 2.0053, "step": 8570 }, { "epoch": 0.47773256786132323, "grad_norm": 0.5613745450973511, "learning_rate": 5.489393403158769e-05, "loss": 1.8136, "step": 8571 }, { "epoch": 0.47778830611448636, "grad_norm": 0.5631322860717773, "learning_rate": 5.488507247271502e-05, "loss": 1.9469, "step": 8572 }, { "epoch": 0.47784404436764955, "grad_norm": 0.5425231456756592, "learning_rate": 5.487621075891632e-05, "loss": 1.7089, "step": 8573 }, { "epoch": 0.4778997826208127, "grad_norm": 0.6085340976715088, "learning_rate": 5.4867348890472646e-05, "loss": 1.8108, "step": 8574 }, { "epoch": 0.4779555208739758, "grad_norm": 0.5472151637077332, "learning_rate": 5.485848686766506e-05, "loss": 1.5179, "step": 8575 }, { "epoch": 0.47801125912713893, "grad_norm": 0.5451512336730957, "learning_rate": 5.484962469077458e-05, "loss": 1.6112, "step": 8576 }, { "epoch": 0.4780669973803021, "grad_norm": 0.5663710236549377, "learning_rate": 5.4840762360082286e-05, "loss": 1.6932, "step": 8577 }, { "epoch": 0.47812273563346525, "grad_norm": 0.5614507794380188, "learning_rate": 5.483189987586924e-05, "loss": 1.7001, "step": 8578 }, { "epoch": 0.4781784738866284, "grad_norm": 0.5428431034088135, "learning_rate": 5.4823037238416506e-05, "loss": 1.7767, "step": 8579 }, { "epoch": 0.47823421213979156, "grad_norm": 0.5602681636810303, "learning_rate": 5.481417444800512e-05, "loss": 1.6749, "step": 8580 }, { "epoch": 0.4782899503929547, "grad_norm": 0.5648148655891418, "learning_rate": 5.480531150491622e-05, "loss": 1.723, "step": 8581 }, { "epoch": 0.4783456886461178, "grad_norm": 0.5764549970626831, "learning_rate": 5.4796448409430845e-05, "loss": 1.8049, "step": 8582 }, { "epoch": 0.478401426899281, "grad_norm": 0.5871893167495728, "learning_rate": 5.478758516183009e-05, "loss": 1.979, "step": 8583 }, { "epoch": 0.47845716515244413, "grad_norm": 0.5481773018836975, "learning_rate": 5.477872176239506e-05, "loss": 1.738, "step": 8584 }, { "epoch": 0.47851290340560726, "grad_norm": 0.5214368104934692, "learning_rate": 5.4769858211406824e-05, "loss": 1.5133, "step": 8585 }, { "epoch": 0.4785686416587704, "grad_norm": 0.5468040704727173, "learning_rate": 5.4760994509146514e-05, "loss": 1.6054, "step": 8586 }, { "epoch": 0.4786243799119336, "grad_norm": 0.5729833841323853, "learning_rate": 5.475213065589518e-05, "loss": 1.4712, "step": 8587 }, { "epoch": 0.4786801181650967, "grad_norm": 0.558814525604248, "learning_rate": 5.4743266651934e-05, "loss": 1.4907, "step": 8588 }, { "epoch": 0.47873585641825983, "grad_norm": 0.5633212924003601, "learning_rate": 5.4734402497544044e-05, "loss": 1.4832, "step": 8589 }, { "epoch": 0.478791594671423, "grad_norm": 0.6136720180511475, "learning_rate": 5.472553819300645e-05, "loss": 1.6588, "step": 8590 }, { "epoch": 0.47884733292458614, "grad_norm": 0.537601113319397, "learning_rate": 5.471667373860234e-05, "loss": 1.6905, "step": 8591 }, { "epoch": 0.4789030711777493, "grad_norm": 0.5937305688858032, "learning_rate": 5.4707809134612844e-05, "loss": 1.7177, "step": 8592 }, { "epoch": 0.47895880943091246, "grad_norm": 0.6321950554847717, "learning_rate": 5.469894438131906e-05, "loss": 1.8388, "step": 8593 }, { "epoch": 0.4790145476840756, "grad_norm": 0.5728781223297119, "learning_rate": 5.469007947900219e-05, "loss": 1.9354, "step": 8594 }, { "epoch": 0.4790702859372387, "grad_norm": 0.5851932764053345, "learning_rate": 5.468121442794333e-05, "loss": 1.6465, "step": 8595 }, { "epoch": 0.4791260241904019, "grad_norm": 0.5869148969650269, "learning_rate": 5.467234922842363e-05, "loss": 1.8636, "step": 8596 }, { "epoch": 0.47918176244356503, "grad_norm": 0.5678532719612122, "learning_rate": 5.4663483880724275e-05, "loss": 1.7346, "step": 8597 }, { "epoch": 0.47923750069672816, "grad_norm": 0.5783692598342896, "learning_rate": 5.46546183851264e-05, "loss": 1.8068, "step": 8598 }, { "epoch": 0.4792932389498913, "grad_norm": 0.5361393690109253, "learning_rate": 5.464575274191116e-05, "loss": 1.4534, "step": 8599 }, { "epoch": 0.47934897720305447, "grad_norm": 0.5204313397407532, "learning_rate": 5.4636886951359726e-05, "loss": 1.5212, "step": 8600 }, { "epoch": 0.4794047154562176, "grad_norm": 0.5215826630592346, "learning_rate": 5.4628021013753284e-05, "loss": 1.6756, "step": 8601 }, { "epoch": 0.47946045370938073, "grad_norm": 0.5335747599601746, "learning_rate": 5.461915492937299e-05, "loss": 1.7895, "step": 8602 }, { "epoch": 0.4795161919625439, "grad_norm": 0.5702705979347229, "learning_rate": 5.461028869850004e-05, "loss": 1.7024, "step": 8603 }, { "epoch": 0.47957193021570704, "grad_norm": 0.5771311521530151, "learning_rate": 5.4601422321415606e-05, "loss": 1.7879, "step": 8604 }, { "epoch": 0.47962766846887017, "grad_norm": 0.5826980471611023, "learning_rate": 5.459255579840089e-05, "loss": 1.6198, "step": 8605 }, { "epoch": 0.47968340672203336, "grad_norm": 0.5219647288322449, "learning_rate": 5.458368912973707e-05, "loss": 1.6159, "step": 8606 }, { "epoch": 0.4797391449751965, "grad_norm": 0.5676286220550537, "learning_rate": 5.4574822315705366e-05, "loss": 1.6843, "step": 8607 }, { "epoch": 0.4797948832283596, "grad_norm": 0.5792801380157471, "learning_rate": 5.456595535658696e-05, "loss": 1.8092, "step": 8608 }, { "epoch": 0.47985062148152274, "grad_norm": 0.5464149713516235, "learning_rate": 5.455708825266308e-05, "loss": 1.7726, "step": 8609 }, { "epoch": 0.47990635973468593, "grad_norm": 0.597957968711853, "learning_rate": 5.4548221004214936e-05, "loss": 1.7107, "step": 8610 }, { "epoch": 0.47996209798784906, "grad_norm": 0.5609841346740723, "learning_rate": 5.453935361152374e-05, "loss": 1.5578, "step": 8611 }, { "epoch": 0.4800178362410122, "grad_norm": 0.5753505229949951, "learning_rate": 5.45304860748707e-05, "loss": 1.8959, "step": 8612 }, { "epoch": 0.48007357449417537, "grad_norm": 0.5798444747924805, "learning_rate": 5.4521618394537056e-05, "loss": 1.9346, "step": 8613 }, { "epoch": 0.4801293127473385, "grad_norm": 0.536660373210907, "learning_rate": 5.451275057080405e-05, "loss": 1.6191, "step": 8614 }, { "epoch": 0.48018505100050163, "grad_norm": 0.5759127736091614, "learning_rate": 5.4503882603952905e-05, "loss": 1.6555, "step": 8615 }, { "epoch": 0.4802407892536648, "grad_norm": 0.5895690321922302, "learning_rate": 5.449501449426487e-05, "loss": 1.7481, "step": 8616 }, { "epoch": 0.48029652750682794, "grad_norm": 0.5727548003196716, "learning_rate": 5.448614624202117e-05, "loss": 1.7338, "step": 8617 }, { "epoch": 0.48035226575999107, "grad_norm": 0.5720645189285278, "learning_rate": 5.447727784750308e-05, "loss": 1.7127, "step": 8618 }, { "epoch": 0.48040800401315426, "grad_norm": 0.5797655582427979, "learning_rate": 5.446840931099182e-05, "loss": 1.733, "step": 8619 }, { "epoch": 0.4804637422663174, "grad_norm": 0.5146819949150085, "learning_rate": 5.445954063276869e-05, "loss": 1.5931, "step": 8620 }, { "epoch": 0.4805194805194805, "grad_norm": 0.5465497970581055, "learning_rate": 5.445067181311492e-05, "loss": 1.6994, "step": 8621 }, { "epoch": 0.48057521877264364, "grad_norm": 0.5129651427268982, "learning_rate": 5.4441802852311795e-05, "loss": 1.5357, "step": 8622 }, { "epoch": 0.4806309570258068, "grad_norm": 0.5457690954208374, "learning_rate": 5.443293375064058e-05, "loss": 1.5543, "step": 8623 }, { "epoch": 0.48068669527896996, "grad_norm": 0.5993552207946777, "learning_rate": 5.4424064508382556e-05, "loss": 1.902, "step": 8624 }, { "epoch": 0.4807424335321331, "grad_norm": 0.5725103616714478, "learning_rate": 5.4415195125819e-05, "loss": 1.7444, "step": 8625 }, { "epoch": 0.48079817178529627, "grad_norm": 0.5666811466217041, "learning_rate": 5.440632560323118e-05, "loss": 1.6553, "step": 8626 }, { "epoch": 0.4808539100384594, "grad_norm": 0.5566148161888123, "learning_rate": 5.439745594090042e-05, "loss": 1.3808, "step": 8627 }, { "epoch": 0.4809096482916225, "grad_norm": 0.5133042335510254, "learning_rate": 5.438858613910799e-05, "loss": 1.5705, "step": 8628 }, { "epoch": 0.4809653865447857, "grad_norm": 0.6130719780921936, "learning_rate": 5.43797161981352e-05, "loss": 1.9702, "step": 8629 }, { "epoch": 0.48102112479794884, "grad_norm": 0.5869434475898743, "learning_rate": 5.4370846118263354e-05, "loss": 1.8149, "step": 8630 }, { "epoch": 0.48107686305111197, "grad_norm": 0.5676392316818237, "learning_rate": 5.436197589977374e-05, "loss": 1.5798, "step": 8631 }, { "epoch": 0.4811326013042751, "grad_norm": 0.5470464825630188, "learning_rate": 5.435310554294769e-05, "loss": 1.6549, "step": 8632 }, { "epoch": 0.4811883395574383, "grad_norm": 0.5741833448410034, "learning_rate": 5.434423504806651e-05, "loss": 1.7124, "step": 8633 }, { "epoch": 0.4812440778106014, "grad_norm": 0.5436912178993225, "learning_rate": 5.433536441541152e-05, "loss": 1.568, "step": 8634 }, { "epoch": 0.48129981606376454, "grad_norm": 0.5380058884620667, "learning_rate": 5.432649364526403e-05, "loss": 1.4785, "step": 8635 }, { "epoch": 0.4813555543169277, "grad_norm": 0.5699672102928162, "learning_rate": 5.4317622737905413e-05, "loss": 1.4929, "step": 8636 }, { "epoch": 0.48141129257009085, "grad_norm": 0.565059244632721, "learning_rate": 5.4308751693616975e-05, "loss": 1.7861, "step": 8637 }, { "epoch": 0.481467030823254, "grad_norm": 0.5427149534225464, "learning_rate": 5.429988051268006e-05, "loss": 1.6655, "step": 8638 }, { "epoch": 0.48152276907641717, "grad_norm": 0.5943994522094727, "learning_rate": 5.429100919537597e-05, "loss": 1.8461, "step": 8639 }, { "epoch": 0.4815785073295803, "grad_norm": 0.5920754671096802, "learning_rate": 5.4282137741986125e-05, "loss": 1.9077, "step": 8640 }, { "epoch": 0.4816342455827434, "grad_norm": 0.5471158623695374, "learning_rate": 5.427326615279182e-05, "loss": 1.6468, "step": 8641 }, { "epoch": 0.4816899838359066, "grad_norm": 0.5595037341117859, "learning_rate": 5.426439442807444e-05, "loss": 1.7315, "step": 8642 }, { "epoch": 0.48174572208906974, "grad_norm": 0.5808396935462952, "learning_rate": 5.4255522568115314e-05, "loss": 1.8597, "step": 8643 }, { "epoch": 0.48180146034223287, "grad_norm": 0.5106577277183533, "learning_rate": 5.424665057319584e-05, "loss": 1.4579, "step": 8644 }, { "epoch": 0.481857198595396, "grad_norm": 0.5588060617446899, "learning_rate": 5.4237778443597366e-05, "loss": 1.7045, "step": 8645 }, { "epoch": 0.4819129368485592, "grad_norm": 0.5763769149780273, "learning_rate": 5.4228906179601256e-05, "loss": 1.7194, "step": 8646 }, { "epoch": 0.4819686751017223, "grad_norm": 0.5877617597579956, "learning_rate": 5.42200337814889e-05, "loss": 1.8115, "step": 8647 }, { "epoch": 0.48202441335488544, "grad_norm": 0.588557779788971, "learning_rate": 5.421116124954169e-05, "loss": 1.7122, "step": 8648 }, { "epoch": 0.4820801516080486, "grad_norm": 0.5687382221221924, "learning_rate": 5.4202288584040996e-05, "loss": 1.6734, "step": 8649 }, { "epoch": 0.48213588986121175, "grad_norm": 0.5797961950302124, "learning_rate": 5.4193415785268195e-05, "loss": 1.9098, "step": 8650 }, { "epoch": 0.4821916281143749, "grad_norm": 0.5459732413291931, "learning_rate": 5.418454285350472e-05, "loss": 1.5751, "step": 8651 }, { "epoch": 0.48224736636753807, "grad_norm": 0.6237668991088867, "learning_rate": 5.4175669789031904e-05, "loss": 1.9574, "step": 8652 }, { "epoch": 0.4823031046207012, "grad_norm": 0.5237795114517212, "learning_rate": 5.4166796592131216e-05, "loss": 1.6274, "step": 8653 }, { "epoch": 0.4823588428738643, "grad_norm": 0.8351784348487854, "learning_rate": 5.415792326308403e-05, "loss": 1.6101, "step": 8654 }, { "epoch": 0.48241458112702745, "grad_norm": 0.553855836391449, "learning_rate": 5.414904980217177e-05, "loss": 1.7006, "step": 8655 }, { "epoch": 0.48247031938019064, "grad_norm": 0.5128687620162964, "learning_rate": 5.414017620967582e-05, "loss": 1.5782, "step": 8656 }, { "epoch": 0.48252605763335377, "grad_norm": 0.5743347406387329, "learning_rate": 5.4131302485877635e-05, "loss": 1.8762, "step": 8657 }, { "epoch": 0.4825817958865169, "grad_norm": 0.5579991936683655, "learning_rate": 5.412242863105862e-05, "loss": 1.6882, "step": 8658 }, { "epoch": 0.4826375341396801, "grad_norm": 0.5496572256088257, "learning_rate": 5.41135546455002e-05, "loss": 1.6909, "step": 8659 }, { "epoch": 0.4826932723928432, "grad_norm": 0.5845061540603638, "learning_rate": 5.410468052948381e-05, "loss": 1.8966, "step": 8660 }, { "epoch": 0.48274901064600634, "grad_norm": 0.5628004670143127, "learning_rate": 5.409580628329088e-05, "loss": 1.6114, "step": 8661 }, { "epoch": 0.4828047488991695, "grad_norm": 0.52235347032547, "learning_rate": 5.408693190720288e-05, "loss": 1.4296, "step": 8662 }, { "epoch": 0.48286048715233265, "grad_norm": 0.5655858516693115, "learning_rate": 5.40780574015012e-05, "loss": 1.7761, "step": 8663 }, { "epoch": 0.4829162254054958, "grad_norm": 0.5697308778762817, "learning_rate": 5.406918276646733e-05, "loss": 1.7426, "step": 8664 }, { "epoch": 0.48297196365865896, "grad_norm": 0.5626512169837952, "learning_rate": 5.40603080023827e-05, "loss": 1.5949, "step": 8665 }, { "epoch": 0.4830277019118221, "grad_norm": 0.6178479194641113, "learning_rate": 5.405143310952878e-05, "loss": 1.9571, "step": 8666 }, { "epoch": 0.4830834401649852, "grad_norm": 0.6123231053352356, "learning_rate": 5.4042558088187014e-05, "loss": 1.9154, "step": 8667 }, { "epoch": 0.48313917841814835, "grad_norm": 0.5526097416877747, "learning_rate": 5.40336829386389e-05, "loss": 1.5508, "step": 8668 }, { "epoch": 0.48319491667131154, "grad_norm": 0.5456022620201111, "learning_rate": 5.4024807661165855e-05, "loss": 1.5887, "step": 8669 }, { "epoch": 0.48325065492447467, "grad_norm": 0.49078524112701416, "learning_rate": 5.4015932256049386e-05, "loss": 1.5876, "step": 8670 }, { "epoch": 0.4833063931776378, "grad_norm": 0.5714897513389587, "learning_rate": 5.4007056723570956e-05, "loss": 1.8633, "step": 8671 }, { "epoch": 0.483362131430801, "grad_norm": 0.6069988012313843, "learning_rate": 5.399818106401206e-05, "loss": 1.7922, "step": 8672 }, { "epoch": 0.4834178696839641, "grad_norm": 0.5466931462287903, "learning_rate": 5.3989305277654156e-05, "loss": 1.7496, "step": 8673 }, { "epoch": 0.48347360793712724, "grad_norm": 0.562350869178772, "learning_rate": 5.398042936477875e-05, "loss": 1.6191, "step": 8674 }, { "epoch": 0.4835293461902904, "grad_norm": 0.5562702417373657, "learning_rate": 5.397155332566736e-05, "loss": 1.8695, "step": 8675 }, { "epoch": 0.48358508444345355, "grad_norm": 0.598784863948822, "learning_rate": 5.3962677160601426e-05, "loss": 1.5275, "step": 8676 }, { "epoch": 0.4836408226966167, "grad_norm": 0.5225400924682617, "learning_rate": 5.395380086986249e-05, "loss": 1.4847, "step": 8677 }, { "epoch": 0.4836965609497798, "grad_norm": 0.58516925573349, "learning_rate": 5.3944924453732014e-05, "loss": 1.652, "step": 8678 }, { "epoch": 0.483752299202943, "grad_norm": 0.5312181115150452, "learning_rate": 5.3936047912491574e-05, "loss": 1.356, "step": 8679 }, { "epoch": 0.4838080374561061, "grad_norm": 0.5645095109939575, "learning_rate": 5.3927171246422615e-05, "loss": 1.7965, "step": 8680 }, { "epoch": 0.48386377570926925, "grad_norm": 0.5576086044311523, "learning_rate": 5.39182944558067e-05, "loss": 1.6595, "step": 8681 }, { "epoch": 0.48391951396243243, "grad_norm": 0.5667631030082703, "learning_rate": 5.390941754092532e-05, "loss": 1.6973, "step": 8682 }, { "epoch": 0.48397525221559556, "grad_norm": 0.5693982243537903, "learning_rate": 5.3900540502060015e-05, "loss": 1.6383, "step": 8683 }, { "epoch": 0.4840309904687587, "grad_norm": 0.5972820520401001, "learning_rate": 5.3891663339492306e-05, "loss": 1.73, "step": 8684 }, { "epoch": 0.4840867287219219, "grad_norm": 0.5453163385391235, "learning_rate": 5.388278605350372e-05, "loss": 1.5295, "step": 8685 }, { "epoch": 0.484142466975085, "grad_norm": 0.5659864544868469, "learning_rate": 5.38739086443758e-05, "loss": 1.6765, "step": 8686 }, { "epoch": 0.48419820522824814, "grad_norm": 0.5438006520271301, "learning_rate": 5.386503111239008e-05, "loss": 1.5357, "step": 8687 }, { "epoch": 0.4842539434814113, "grad_norm": 0.5650402903556824, "learning_rate": 5.385615345782813e-05, "loss": 1.7396, "step": 8688 }, { "epoch": 0.48430968173457445, "grad_norm": 0.5356137156486511, "learning_rate": 5.3847275680971454e-05, "loss": 1.7116, "step": 8689 }, { "epoch": 0.4843654199877376, "grad_norm": 0.5687363743782043, "learning_rate": 5.383839778210163e-05, "loss": 1.6747, "step": 8690 }, { "epoch": 0.4844211582409007, "grad_norm": 0.5704367756843567, "learning_rate": 5.38295197615002e-05, "loss": 1.5563, "step": 8691 }, { "epoch": 0.4844768964940639, "grad_norm": 0.6154001355171204, "learning_rate": 5.382064161944874e-05, "loss": 2.1129, "step": 8692 }, { "epoch": 0.484532634747227, "grad_norm": 0.5885458588600159, "learning_rate": 5.3811763356228804e-05, "loss": 1.6652, "step": 8693 }, { "epoch": 0.48458837300039015, "grad_norm": 0.5427495837211609, "learning_rate": 5.3802884972121955e-05, "loss": 1.7085, "step": 8694 }, { "epoch": 0.48464411125355333, "grad_norm": 0.5415340065956116, "learning_rate": 5.379400646740977e-05, "loss": 1.7126, "step": 8695 }, { "epoch": 0.48469984950671646, "grad_norm": 0.50815749168396, "learning_rate": 5.3785127842373814e-05, "loss": 1.7257, "step": 8696 }, { "epoch": 0.4847555877598796, "grad_norm": 0.5710844397544861, "learning_rate": 5.3776249097295696e-05, "loss": 1.6778, "step": 8697 }, { "epoch": 0.4848113260130428, "grad_norm": 0.5827280282974243, "learning_rate": 5.376737023245695e-05, "loss": 1.717, "step": 8698 }, { "epoch": 0.4848670642662059, "grad_norm": 0.6222889423370361, "learning_rate": 5.375849124813919e-05, "loss": 1.9998, "step": 8699 }, { "epoch": 0.48492280251936903, "grad_norm": 0.5893861651420593, "learning_rate": 5.3749612144623995e-05, "loss": 1.9211, "step": 8700 }, { "epoch": 0.48497854077253216, "grad_norm": 0.5538213849067688, "learning_rate": 5.374073292219297e-05, "loss": 1.7934, "step": 8701 }, { "epoch": 0.48503427902569535, "grad_norm": 0.5892875790596008, "learning_rate": 5.3731853581127714e-05, "loss": 1.8932, "step": 8702 }, { "epoch": 0.4850900172788585, "grad_norm": 0.5553523302078247, "learning_rate": 5.3722974121709815e-05, "loss": 1.7465, "step": 8703 }, { "epoch": 0.4851457555320216, "grad_norm": 0.57076096534729, "learning_rate": 5.371409454422087e-05, "loss": 1.7025, "step": 8704 }, { "epoch": 0.4852014937851848, "grad_norm": 0.5483660101890564, "learning_rate": 5.370521484894252e-05, "loss": 1.6435, "step": 8705 }, { "epoch": 0.4852572320383479, "grad_norm": 0.5742903351783752, "learning_rate": 5.3696335036156345e-05, "loss": 1.7067, "step": 8706 }, { "epoch": 0.48531297029151105, "grad_norm": 0.5819395184516907, "learning_rate": 5.368745510614399e-05, "loss": 1.6528, "step": 8707 }, { "epoch": 0.48536870854467423, "grad_norm": 0.5477610230445862, "learning_rate": 5.367857505918704e-05, "loss": 1.8253, "step": 8708 }, { "epoch": 0.48542444679783736, "grad_norm": 0.6026375889778137, "learning_rate": 5.3669694895567145e-05, "loss": 1.8483, "step": 8709 }, { "epoch": 0.4854801850510005, "grad_norm": 0.49743878841400146, "learning_rate": 5.366081461556593e-05, "loss": 1.4705, "step": 8710 }, { "epoch": 0.4855359233041637, "grad_norm": 0.5510653853416443, "learning_rate": 5.365193421946502e-05, "loss": 1.4843, "step": 8711 }, { "epoch": 0.4855916615573268, "grad_norm": 0.5583814978599548, "learning_rate": 5.3643053707546034e-05, "loss": 1.6045, "step": 8712 }, { "epoch": 0.48564739981048993, "grad_norm": 0.5511784553527832, "learning_rate": 5.363417308009062e-05, "loss": 1.7184, "step": 8713 }, { "epoch": 0.48570313806365306, "grad_norm": 0.5590716600418091, "learning_rate": 5.362529233738045e-05, "loss": 1.6326, "step": 8714 }, { "epoch": 0.48575887631681625, "grad_norm": 0.564095139503479, "learning_rate": 5.361641147969713e-05, "loss": 1.6036, "step": 8715 }, { "epoch": 0.4858146145699794, "grad_norm": 0.6147303581237793, "learning_rate": 5.3607530507322334e-05, "loss": 1.8542, "step": 8716 }, { "epoch": 0.4858703528231425, "grad_norm": 0.556438684463501, "learning_rate": 5.3598649420537675e-05, "loss": 1.6413, "step": 8717 }, { "epoch": 0.4859260910763057, "grad_norm": 0.5851439237594604, "learning_rate": 5.358976821962487e-05, "loss": 1.7414, "step": 8718 }, { "epoch": 0.4859818293294688, "grad_norm": 0.5886179804801941, "learning_rate": 5.358088690486553e-05, "loss": 1.623, "step": 8719 }, { "epoch": 0.48603756758263195, "grad_norm": 0.5328960418701172, "learning_rate": 5.357200547654134e-05, "loss": 1.4861, "step": 8720 }, { "epoch": 0.48609330583579513, "grad_norm": 0.5452643036842346, "learning_rate": 5.356312393493396e-05, "loss": 1.763, "step": 8721 }, { "epoch": 0.48614904408895826, "grad_norm": 0.5395748019218445, "learning_rate": 5.3554242280325064e-05, "loss": 1.4284, "step": 8722 }, { "epoch": 0.4862047823421214, "grad_norm": 0.6557826399803162, "learning_rate": 5.354536051299634e-05, "loss": 1.8725, "step": 8723 }, { "epoch": 0.4862605205952845, "grad_norm": 0.5590106248855591, "learning_rate": 5.353647863322943e-05, "loss": 1.6673, "step": 8724 }, { "epoch": 0.4863162588484477, "grad_norm": 0.560207188129425, "learning_rate": 5.3527596641306034e-05, "loss": 1.7026, "step": 8725 }, { "epoch": 0.48637199710161083, "grad_norm": 0.54021817445755, "learning_rate": 5.3518714537507855e-05, "loss": 1.3786, "step": 8726 }, { "epoch": 0.48642773535477396, "grad_norm": 0.5303489565849304, "learning_rate": 5.350983232211657e-05, "loss": 1.5461, "step": 8727 }, { "epoch": 0.48648347360793714, "grad_norm": 0.5234289169311523, "learning_rate": 5.350094999541385e-05, "loss": 1.8215, "step": 8728 }, { "epoch": 0.4865392118611003, "grad_norm": 0.6171209216117859, "learning_rate": 5.349206755768142e-05, "loss": 1.6419, "step": 8729 }, { "epoch": 0.4865949501142634, "grad_norm": 0.5630922317504883, "learning_rate": 5.3483185009200955e-05, "loss": 1.7303, "step": 8730 }, { "epoch": 0.4866506883674266, "grad_norm": 0.5881733298301697, "learning_rate": 5.347430235025419e-05, "loss": 1.8506, "step": 8731 }, { "epoch": 0.4867064266205897, "grad_norm": 0.5110684633255005, "learning_rate": 5.34654195811228e-05, "loss": 1.4549, "step": 8732 }, { "epoch": 0.48676216487375285, "grad_norm": 0.5621329545974731, "learning_rate": 5.345653670208851e-05, "loss": 1.6001, "step": 8733 }, { "epoch": 0.48681790312691603, "grad_norm": 0.5230090022087097, "learning_rate": 5.344765371343302e-05, "loss": 1.7102, "step": 8734 }, { "epoch": 0.48687364138007916, "grad_norm": 0.5325090289115906, "learning_rate": 5.343877061543806e-05, "loss": 1.5661, "step": 8735 }, { "epoch": 0.4869293796332423, "grad_norm": 0.5863301753997803, "learning_rate": 5.342988740838535e-05, "loss": 1.9036, "step": 8736 }, { "epoch": 0.4869851178864054, "grad_norm": 0.5872917175292969, "learning_rate": 5.342100409255659e-05, "loss": 1.8516, "step": 8737 }, { "epoch": 0.4870408561395686, "grad_norm": 0.5677287578582764, "learning_rate": 5.341212066823355e-05, "loss": 1.5462, "step": 8738 }, { "epoch": 0.48709659439273173, "grad_norm": 0.5717810392379761, "learning_rate": 5.340323713569792e-05, "loss": 1.7118, "step": 8739 }, { "epoch": 0.48715233264589486, "grad_norm": 0.5940883159637451, "learning_rate": 5.339435349523148e-05, "loss": 1.8225, "step": 8740 }, { "epoch": 0.48720807089905804, "grad_norm": 0.6162937879562378, "learning_rate": 5.33854697471159e-05, "loss": 1.9512, "step": 8741 }, { "epoch": 0.4872638091522212, "grad_norm": 0.5418954491615295, "learning_rate": 5.337658589163299e-05, "loss": 1.6836, "step": 8742 }, { "epoch": 0.4873195474053843, "grad_norm": 0.5783557295799255, "learning_rate": 5.3367701929064426e-05, "loss": 1.709, "step": 8743 }, { "epoch": 0.4873752856585475, "grad_norm": 0.5385530591011047, "learning_rate": 5.3358817859692025e-05, "loss": 1.5885, "step": 8744 }, { "epoch": 0.4874310239117106, "grad_norm": 0.5666008591651917, "learning_rate": 5.334993368379748e-05, "loss": 1.6946, "step": 8745 }, { "epoch": 0.48748676216487374, "grad_norm": 0.549767255783081, "learning_rate": 5.3341049401662594e-05, "loss": 1.5776, "step": 8746 }, { "epoch": 0.4875425004180369, "grad_norm": 0.5610424280166626, "learning_rate": 5.333216501356909e-05, "loss": 1.6057, "step": 8747 }, { "epoch": 0.48759823867120006, "grad_norm": 0.5643283724784851, "learning_rate": 5.332328051979873e-05, "loss": 1.7629, "step": 8748 }, { "epoch": 0.4876539769243632, "grad_norm": 0.5474547743797302, "learning_rate": 5.3314395920633306e-05, "loss": 1.7972, "step": 8749 }, { "epoch": 0.4877097151775263, "grad_norm": 0.56900554895401, "learning_rate": 5.330551121635454e-05, "loss": 1.7521, "step": 8750 }, { "epoch": 0.4877654534306895, "grad_norm": 0.6560434103012085, "learning_rate": 5.329662640724426e-05, "loss": 1.4613, "step": 8751 }, { "epoch": 0.48782119168385263, "grad_norm": 0.5190215110778809, "learning_rate": 5.32877414935842e-05, "loss": 1.4367, "step": 8752 }, { "epoch": 0.48787692993701576, "grad_norm": 0.5503537058830261, "learning_rate": 5.3278856475656144e-05, "loss": 1.649, "step": 8753 }, { "epoch": 0.48793266819017894, "grad_norm": 0.5634624361991882, "learning_rate": 5.326997135374189e-05, "loss": 1.9406, "step": 8754 }, { "epoch": 0.48798840644334207, "grad_norm": 0.5632345676422119, "learning_rate": 5.3261086128123206e-05, "loss": 1.6661, "step": 8755 }, { "epoch": 0.4880441446965052, "grad_norm": 0.6362982392311096, "learning_rate": 5.3252200799081875e-05, "loss": 1.9258, "step": 8756 }, { "epoch": 0.4880998829496684, "grad_norm": 0.5737461447715759, "learning_rate": 5.3243315366899694e-05, "loss": 1.6868, "step": 8757 }, { "epoch": 0.4881556212028315, "grad_norm": 0.5335796475410461, "learning_rate": 5.3234429831858466e-05, "loss": 1.4586, "step": 8758 }, { "epoch": 0.48821135945599464, "grad_norm": 0.5574231743812561, "learning_rate": 5.3225544194239984e-05, "loss": 1.6262, "step": 8759 }, { "epoch": 0.48826709770915777, "grad_norm": 0.5251532196998596, "learning_rate": 5.3216658454326043e-05, "loss": 1.5789, "step": 8760 }, { "epoch": 0.48832283596232096, "grad_norm": 0.5983790159225464, "learning_rate": 5.3207772612398444e-05, "loss": 1.8751, "step": 8761 }, { "epoch": 0.4883785742154841, "grad_norm": 0.5940685272216797, "learning_rate": 5.319888666873902e-05, "loss": 1.5181, "step": 8762 }, { "epoch": 0.4884343124686472, "grad_norm": 0.5403158664703369, "learning_rate": 5.319000062362953e-05, "loss": 1.6698, "step": 8763 }, { "epoch": 0.4884900507218104, "grad_norm": 0.5441331267356873, "learning_rate": 5.318111447735186e-05, "loss": 1.6822, "step": 8764 }, { "epoch": 0.4885457889749735, "grad_norm": 0.6151909232139587, "learning_rate": 5.317222823018775e-05, "loss": 1.8201, "step": 8765 }, { "epoch": 0.48860152722813666, "grad_norm": 0.5616387724876404, "learning_rate": 5.316334188241908e-05, "loss": 1.705, "step": 8766 }, { "epoch": 0.48865726548129984, "grad_norm": 0.570561408996582, "learning_rate": 5.3154455434327634e-05, "loss": 1.7352, "step": 8767 }, { "epoch": 0.48871300373446297, "grad_norm": 0.5549841523170471, "learning_rate": 5.314556888619527e-05, "loss": 1.7109, "step": 8768 }, { "epoch": 0.4887687419876261, "grad_norm": 0.6028071045875549, "learning_rate": 5.313668223830378e-05, "loss": 1.7114, "step": 8769 }, { "epoch": 0.4888244802407892, "grad_norm": 0.563991129398346, "learning_rate": 5.312779549093503e-05, "loss": 1.5484, "step": 8770 }, { "epoch": 0.4888802184939524, "grad_norm": 0.5773816108703613, "learning_rate": 5.3118908644370834e-05, "loss": 1.7072, "step": 8771 }, { "epoch": 0.48893595674711554, "grad_norm": 0.5592569708824158, "learning_rate": 5.3110021698893053e-05, "loss": 1.7843, "step": 8772 }, { "epoch": 0.48899169500027867, "grad_norm": 0.5349111557006836, "learning_rate": 5.310113465478351e-05, "loss": 1.5887, "step": 8773 }, { "epoch": 0.48904743325344185, "grad_norm": 0.5708144903182983, "learning_rate": 5.309224751232406e-05, "loss": 1.5671, "step": 8774 }, { "epoch": 0.489103171506605, "grad_norm": 0.5695350766181946, "learning_rate": 5.308336027179655e-05, "loss": 1.8061, "step": 8775 }, { "epoch": 0.4891589097597681, "grad_norm": 0.5757440328598022, "learning_rate": 5.307447293348281e-05, "loss": 1.7021, "step": 8776 }, { "epoch": 0.4892146480129313, "grad_norm": 0.5219387412071228, "learning_rate": 5.306558549766473e-05, "loss": 1.5089, "step": 8777 }, { "epoch": 0.4892703862660944, "grad_norm": 0.5836179256439209, "learning_rate": 5.305669796462415e-05, "loss": 1.764, "step": 8778 }, { "epoch": 0.48932612451925755, "grad_norm": 0.5617983341217041, "learning_rate": 5.3047810334642935e-05, "loss": 1.751, "step": 8779 }, { "epoch": 0.48938186277242074, "grad_norm": 0.5990623831748962, "learning_rate": 5.303892260800294e-05, "loss": 1.7939, "step": 8780 }, { "epoch": 0.48943760102558387, "grad_norm": 0.5625554323196411, "learning_rate": 5.303003478498605e-05, "loss": 1.8436, "step": 8781 }, { "epoch": 0.489493339278747, "grad_norm": 0.6201027631759644, "learning_rate": 5.3021146865874117e-05, "loss": 1.7894, "step": 8782 }, { "epoch": 0.4895490775319101, "grad_norm": 0.5482053160667419, "learning_rate": 5.301225885094902e-05, "loss": 1.7486, "step": 8783 }, { "epoch": 0.4896048157850733, "grad_norm": 0.5940152406692505, "learning_rate": 5.300337074049262e-05, "loss": 1.7971, "step": 8784 }, { "epoch": 0.48966055403823644, "grad_norm": 0.49621883034706116, "learning_rate": 5.299448253478683e-05, "loss": 1.6085, "step": 8785 }, { "epoch": 0.48971629229139957, "grad_norm": 0.5509806275367737, "learning_rate": 5.29855942341135e-05, "loss": 1.8445, "step": 8786 }, { "epoch": 0.48977203054456275, "grad_norm": 0.5669719576835632, "learning_rate": 5.297670583875454e-05, "loss": 1.7854, "step": 8787 }, { "epoch": 0.4898277687977259, "grad_norm": 0.5512406826019287, "learning_rate": 5.296781734899182e-05, "loss": 1.4982, "step": 8788 }, { "epoch": 0.489883507050889, "grad_norm": 0.56741863489151, "learning_rate": 5.295892876510723e-05, "loss": 1.7415, "step": 8789 }, { "epoch": 0.4899392453040522, "grad_norm": 0.5425149202346802, "learning_rate": 5.295004008738268e-05, "loss": 1.5488, "step": 8790 }, { "epoch": 0.4899949835572153, "grad_norm": 0.5617731213569641, "learning_rate": 5.294115131610006e-05, "loss": 1.7582, "step": 8791 }, { "epoch": 0.49005072181037845, "grad_norm": 0.5693073868751526, "learning_rate": 5.293226245154127e-05, "loss": 1.5738, "step": 8792 }, { "epoch": 0.4901064600635416, "grad_norm": 0.6429868340492249, "learning_rate": 5.292337349398821e-05, "loss": 1.7709, "step": 8793 }, { "epoch": 0.49016219831670477, "grad_norm": 0.568608283996582, "learning_rate": 5.291448444372279e-05, "loss": 1.5022, "step": 8794 }, { "epoch": 0.4902179365698679, "grad_norm": 0.5543949604034424, "learning_rate": 5.29055953010269e-05, "loss": 1.7136, "step": 8795 }, { "epoch": 0.490273674823031, "grad_norm": 0.5077717900276184, "learning_rate": 5.289670606618248e-05, "loss": 1.5791, "step": 8796 }, { "epoch": 0.4903294130761942, "grad_norm": 0.5588290691375732, "learning_rate": 5.288781673947143e-05, "loss": 1.7905, "step": 8797 }, { "epoch": 0.49038515132935734, "grad_norm": 0.5637931823730469, "learning_rate": 5.2878927321175676e-05, "loss": 1.7184, "step": 8798 }, { "epoch": 0.49044088958252047, "grad_norm": 0.5664627552032471, "learning_rate": 5.2870037811577125e-05, "loss": 1.5013, "step": 8799 }, { "epoch": 0.49049662783568365, "grad_norm": 0.5796491503715515, "learning_rate": 5.28611482109577e-05, "loss": 1.7939, "step": 8800 }, { "epoch": 0.4905523660888468, "grad_norm": 0.556143045425415, "learning_rate": 5.2852258519599365e-05, "loss": 1.5717, "step": 8801 }, { "epoch": 0.4906081043420099, "grad_norm": 0.5120705366134644, "learning_rate": 5.284336873778398e-05, "loss": 1.5725, "step": 8802 }, { "epoch": 0.4906638425951731, "grad_norm": 0.5616738200187683, "learning_rate": 5.2834478865793545e-05, "loss": 1.5918, "step": 8803 }, { "epoch": 0.4907195808483362, "grad_norm": 0.5868408679962158, "learning_rate": 5.282558890390995e-05, "loss": 1.7262, "step": 8804 }, { "epoch": 0.49077531910149935, "grad_norm": 0.5609720945358276, "learning_rate": 5.281669885241517e-05, "loss": 1.6374, "step": 8805 }, { "epoch": 0.4908310573546625, "grad_norm": 0.5879573225975037, "learning_rate": 5.280780871159111e-05, "loss": 1.7363, "step": 8806 }, { "epoch": 0.49088679560782567, "grad_norm": 0.5944104790687561, "learning_rate": 5.279891848171974e-05, "loss": 1.8078, "step": 8807 }, { "epoch": 0.4909425338609888, "grad_norm": 0.5318206548690796, "learning_rate": 5.2790028163082985e-05, "loss": 1.5397, "step": 8808 }, { "epoch": 0.4909982721141519, "grad_norm": 0.542536199092865, "learning_rate": 5.2781137755962794e-05, "loss": 1.6362, "step": 8809 }, { "epoch": 0.4910540103673151, "grad_norm": 0.5784698128700256, "learning_rate": 5.2772247260641136e-05, "loss": 1.765, "step": 8810 }, { "epoch": 0.49110974862047824, "grad_norm": 0.5454279184341431, "learning_rate": 5.276335667739998e-05, "loss": 1.7014, "step": 8811 }, { "epoch": 0.49116548687364137, "grad_norm": 0.519689679145813, "learning_rate": 5.275446600652123e-05, "loss": 1.7533, "step": 8812 }, { "epoch": 0.49122122512680455, "grad_norm": 0.7089325785636902, "learning_rate": 5.2745575248286895e-05, "loss": 2.1051, "step": 8813 }, { "epoch": 0.4912769633799677, "grad_norm": 0.5588321089744568, "learning_rate": 5.273668440297892e-05, "loss": 1.6069, "step": 8814 }, { "epoch": 0.4913327016331308, "grad_norm": 0.5273601412773132, "learning_rate": 5.272779347087925e-05, "loss": 1.4399, "step": 8815 }, { "epoch": 0.49138843988629394, "grad_norm": 0.5443345904350281, "learning_rate": 5.27189024522699e-05, "loss": 1.5401, "step": 8816 }, { "epoch": 0.4914441781394571, "grad_norm": 0.5727609395980835, "learning_rate": 5.271001134743281e-05, "loss": 1.6588, "step": 8817 }, { "epoch": 0.49149991639262025, "grad_norm": 0.5712710618972778, "learning_rate": 5.270112015664997e-05, "loss": 1.7393, "step": 8818 }, { "epoch": 0.4915556546457834, "grad_norm": 0.5474506616592407, "learning_rate": 5.2692228880203333e-05, "loss": 1.6144, "step": 8819 }, { "epoch": 0.49161139289894656, "grad_norm": 0.5622429251670837, "learning_rate": 5.2683337518374906e-05, "loss": 1.6107, "step": 8820 }, { "epoch": 0.4916671311521097, "grad_norm": 0.5528522729873657, "learning_rate": 5.267444607144665e-05, "loss": 1.5545, "step": 8821 }, { "epoch": 0.4917228694052728, "grad_norm": 0.5275382995605469, "learning_rate": 5.2665554539700554e-05, "loss": 1.6128, "step": 8822 }, { "epoch": 0.491778607658436, "grad_norm": 0.6423818469047546, "learning_rate": 5.265666292341861e-05, "loss": 2.064, "step": 8823 }, { "epoch": 0.49183434591159914, "grad_norm": 0.5372768640518188, "learning_rate": 5.26477712228828e-05, "loss": 1.2805, "step": 8824 }, { "epoch": 0.49189008416476226, "grad_norm": 0.600679337978363, "learning_rate": 5.2638879438375144e-05, "loss": 1.8211, "step": 8825 }, { "epoch": 0.49194582241792545, "grad_norm": 0.5628047585487366, "learning_rate": 5.2629987570177606e-05, "loss": 1.6321, "step": 8826 }, { "epoch": 0.4920015606710886, "grad_norm": 0.600486695766449, "learning_rate": 5.262109561857221e-05, "loss": 1.782, "step": 8827 }, { "epoch": 0.4920572989242517, "grad_norm": 0.5375781655311584, "learning_rate": 5.261220358384091e-05, "loss": 1.5132, "step": 8828 }, { "epoch": 0.49211303717741484, "grad_norm": 0.5441939830780029, "learning_rate": 5.260331146626578e-05, "loss": 1.4457, "step": 8829 }, { "epoch": 0.492168775430578, "grad_norm": 0.5390109419822693, "learning_rate": 5.259441926612877e-05, "loss": 1.6268, "step": 8830 }, { "epoch": 0.49222451368374115, "grad_norm": 0.5406618714332581, "learning_rate": 5.2585526983711916e-05, "loss": 1.5747, "step": 8831 }, { "epoch": 0.4922802519369043, "grad_norm": 0.5526447296142578, "learning_rate": 5.2576634619297216e-05, "loss": 1.6989, "step": 8832 }, { "epoch": 0.49233599019006746, "grad_norm": 0.5135407447814941, "learning_rate": 5.256774217316669e-05, "loss": 1.4546, "step": 8833 }, { "epoch": 0.4923917284432306, "grad_norm": 0.5286427736282349, "learning_rate": 5.255884964560235e-05, "loss": 1.6071, "step": 8834 }, { "epoch": 0.4924474666963937, "grad_norm": 0.5706698894500732, "learning_rate": 5.254995703688621e-05, "loss": 1.7096, "step": 8835 }, { "epoch": 0.4925032049495569, "grad_norm": 0.5597012042999268, "learning_rate": 5.2541064347300306e-05, "loss": 1.6175, "step": 8836 }, { "epoch": 0.49255894320272003, "grad_norm": 0.4902280271053314, "learning_rate": 5.253217157712666e-05, "loss": 1.2836, "step": 8837 }, { "epoch": 0.49261468145588316, "grad_norm": 0.598961591720581, "learning_rate": 5.2523278726647304e-05, "loss": 1.7038, "step": 8838 }, { "epoch": 0.4926704197090463, "grad_norm": 1.2628682851791382, "learning_rate": 5.251438579614425e-05, "loss": 1.8079, "step": 8839 }, { "epoch": 0.4927261579622095, "grad_norm": 0.5793728232383728, "learning_rate": 5.250549278589955e-05, "loss": 1.8102, "step": 8840 }, { "epoch": 0.4927818962153726, "grad_norm": 0.5742671489715576, "learning_rate": 5.249659969619519e-05, "loss": 1.6611, "step": 8841 }, { "epoch": 0.49283763446853573, "grad_norm": 0.5438802242279053, "learning_rate": 5.248770652731327e-05, "loss": 1.5826, "step": 8842 }, { "epoch": 0.4928933727216989, "grad_norm": 0.553573727607727, "learning_rate": 5.247881327953581e-05, "loss": 1.5787, "step": 8843 }, { "epoch": 0.49294911097486205, "grad_norm": 0.5531934499740601, "learning_rate": 5.246991995314484e-05, "loss": 1.7769, "step": 8844 }, { "epoch": 0.4930048492280252, "grad_norm": 0.5669671893119812, "learning_rate": 5.24610265484224e-05, "loss": 1.6973, "step": 8845 }, { "epoch": 0.49306058748118836, "grad_norm": 0.5406858921051025, "learning_rate": 5.2452133065650565e-05, "loss": 1.4484, "step": 8846 }, { "epoch": 0.4931163257343515, "grad_norm": 0.6136825084686279, "learning_rate": 5.2443239505111354e-05, "loss": 1.7145, "step": 8847 }, { "epoch": 0.4931720639875146, "grad_norm": 0.5375277400016785, "learning_rate": 5.243434586708682e-05, "loss": 1.5229, "step": 8848 }, { "epoch": 0.4932278022406778, "grad_norm": 0.5452854633331299, "learning_rate": 5.2425452151859045e-05, "loss": 1.4448, "step": 8849 }, { "epoch": 0.49328354049384093, "grad_norm": 0.5728045701980591, "learning_rate": 5.241655835971006e-05, "loss": 1.8291, "step": 8850 }, { "epoch": 0.49333927874700406, "grad_norm": 0.5290676951408386, "learning_rate": 5.240766449092194e-05, "loss": 1.53, "step": 8851 }, { "epoch": 0.4933950170001672, "grad_norm": 0.6011704206466675, "learning_rate": 5.239877054577673e-05, "loss": 1.7215, "step": 8852 }, { "epoch": 0.4934507552533304, "grad_norm": 0.5930907130241394, "learning_rate": 5.2389876524556526e-05, "loss": 1.8231, "step": 8853 }, { "epoch": 0.4935064935064935, "grad_norm": 0.5788987874984741, "learning_rate": 5.2380982427543346e-05, "loss": 1.7529, "step": 8854 }, { "epoch": 0.49356223175965663, "grad_norm": 0.5591574311256409, "learning_rate": 5.23720882550193e-05, "loss": 1.5894, "step": 8855 }, { "epoch": 0.4936179700128198, "grad_norm": 0.6035146117210388, "learning_rate": 5.2363194007266435e-05, "loss": 1.811, "step": 8856 }, { "epoch": 0.49367370826598295, "grad_norm": 0.5160028338432312, "learning_rate": 5.2354299684566856e-05, "loss": 1.6787, "step": 8857 }, { "epoch": 0.4937294465191461, "grad_norm": 0.5431737899780273, "learning_rate": 5.2345405287202596e-05, "loss": 1.4917, "step": 8858 }, { "epoch": 0.49378518477230926, "grad_norm": 0.5381173491477966, "learning_rate": 5.233651081545577e-05, "loss": 1.6775, "step": 8859 }, { "epoch": 0.4938409230254724, "grad_norm": 0.6041108965873718, "learning_rate": 5.232761626960844e-05, "loss": 1.6414, "step": 8860 }, { "epoch": 0.4938966612786355, "grad_norm": 0.6218950152397156, "learning_rate": 5.231872164994268e-05, "loss": 1.6513, "step": 8861 }, { "epoch": 0.49395239953179865, "grad_norm": 0.5222500562667847, "learning_rate": 5.230982695674059e-05, "loss": 1.7083, "step": 8862 }, { "epoch": 0.49400813778496183, "grad_norm": 0.5420836806297302, "learning_rate": 5.230093219028427e-05, "loss": 1.5971, "step": 8863 }, { "epoch": 0.49406387603812496, "grad_norm": 0.5384796857833862, "learning_rate": 5.229203735085579e-05, "loss": 1.5896, "step": 8864 }, { "epoch": 0.4941196142912881, "grad_norm": 0.6375717520713806, "learning_rate": 5.2283142438737245e-05, "loss": 1.8503, "step": 8865 }, { "epoch": 0.4941753525444513, "grad_norm": 0.5303763151168823, "learning_rate": 5.227424745421074e-05, "loss": 1.6416, "step": 8866 }, { "epoch": 0.4942310907976144, "grad_norm": 0.5153331756591797, "learning_rate": 5.2265352397558354e-05, "loss": 1.3659, "step": 8867 }, { "epoch": 0.49428682905077753, "grad_norm": 0.5397130846977234, "learning_rate": 5.225645726906222e-05, "loss": 1.5523, "step": 8868 }, { "epoch": 0.4943425673039407, "grad_norm": 0.5596987009048462, "learning_rate": 5.224756206900439e-05, "loss": 1.7921, "step": 8869 }, { "epoch": 0.49439830555710385, "grad_norm": 0.5709193348884583, "learning_rate": 5.2238666797667026e-05, "loss": 1.6013, "step": 8870 }, { "epoch": 0.494454043810267, "grad_norm": 0.5561599731445312, "learning_rate": 5.2229771455332176e-05, "loss": 1.4794, "step": 8871 }, { "epoch": 0.49450978206343016, "grad_norm": 0.5445564985275269, "learning_rate": 5.2220876042281995e-05, "loss": 1.5029, "step": 8872 }, { "epoch": 0.4945655203165933, "grad_norm": 0.5647691488265991, "learning_rate": 5.2211980558798565e-05, "loss": 1.7888, "step": 8873 }, { "epoch": 0.4946212585697564, "grad_norm": 0.5487396717071533, "learning_rate": 5.220308500516401e-05, "loss": 1.6931, "step": 8874 }, { "epoch": 0.49467699682291955, "grad_norm": 0.5969203114509583, "learning_rate": 5.219418938166044e-05, "loss": 1.6718, "step": 8875 }, { "epoch": 0.49473273507608273, "grad_norm": 0.564508855342865, "learning_rate": 5.218529368856997e-05, "loss": 1.6968, "step": 8876 }, { "epoch": 0.49478847332924586, "grad_norm": 0.5070094466209412, "learning_rate": 5.217639792617475e-05, "loss": 1.5859, "step": 8877 }, { "epoch": 0.494844211582409, "grad_norm": 0.5474216341972351, "learning_rate": 5.216750209475685e-05, "loss": 1.7858, "step": 8878 }, { "epoch": 0.4948999498355722, "grad_norm": 0.4998477101325989, "learning_rate": 5.2158606194598436e-05, "loss": 1.4827, "step": 8879 }, { "epoch": 0.4949556880887353, "grad_norm": 0.5660443305969238, "learning_rate": 5.214971022598162e-05, "loss": 1.7799, "step": 8880 }, { "epoch": 0.49501142634189843, "grad_norm": 0.5911859273910522, "learning_rate": 5.2140814189188514e-05, "loss": 1.6708, "step": 8881 }, { "epoch": 0.4950671645950616, "grad_norm": 0.5817141532897949, "learning_rate": 5.213191808450127e-05, "loss": 1.6558, "step": 8882 }, { "epoch": 0.49512290284822474, "grad_norm": 0.5510105490684509, "learning_rate": 5.212302191220203e-05, "loss": 1.5644, "step": 8883 }, { "epoch": 0.4951786411013879, "grad_norm": 0.6024221181869507, "learning_rate": 5.21141256725729e-05, "loss": 1.7236, "step": 8884 }, { "epoch": 0.495234379354551, "grad_norm": 0.5197804570198059, "learning_rate": 5.210522936589604e-05, "loss": 1.5429, "step": 8885 }, { "epoch": 0.4952901176077142, "grad_norm": 0.5537724494934082, "learning_rate": 5.209633299245357e-05, "loss": 1.7254, "step": 8886 }, { "epoch": 0.4953458558608773, "grad_norm": 0.5095260739326477, "learning_rate": 5.208743655252763e-05, "loss": 1.4012, "step": 8887 }, { "epoch": 0.49540159411404044, "grad_norm": 0.5599790811538696, "learning_rate": 5.207854004640038e-05, "loss": 1.7249, "step": 8888 }, { "epoch": 0.49545733236720363, "grad_norm": 0.555938184261322, "learning_rate": 5.206964347435396e-05, "loss": 1.6312, "step": 8889 }, { "epoch": 0.49551307062036676, "grad_norm": 0.5438600182533264, "learning_rate": 5.206074683667053e-05, "loss": 1.7241, "step": 8890 }, { "epoch": 0.4955688088735299, "grad_norm": 0.5477585792541504, "learning_rate": 5.2051850133632206e-05, "loss": 1.6946, "step": 8891 }, { "epoch": 0.49562454712669307, "grad_norm": 0.5788122415542603, "learning_rate": 5.204295336552117e-05, "loss": 1.503, "step": 8892 }, { "epoch": 0.4956802853798562, "grad_norm": 0.5613676309585571, "learning_rate": 5.203405653261956e-05, "loss": 1.5574, "step": 8893 }, { "epoch": 0.49573602363301933, "grad_norm": 0.5826630592346191, "learning_rate": 5.202515963520953e-05, "loss": 1.85, "step": 8894 }, { "epoch": 0.4957917618861825, "grad_norm": 0.5635188817977905, "learning_rate": 5.2016262673573246e-05, "loss": 1.3931, "step": 8895 }, { "epoch": 0.49584750013934564, "grad_norm": 0.5745763182640076, "learning_rate": 5.200736564799288e-05, "loss": 1.7307, "step": 8896 }, { "epoch": 0.49590323839250877, "grad_norm": 0.5301480889320374, "learning_rate": 5.199846855875057e-05, "loss": 1.4952, "step": 8897 }, { "epoch": 0.4959589766456719, "grad_norm": 0.561489999294281, "learning_rate": 5.19895714061285e-05, "loss": 1.5023, "step": 8898 }, { "epoch": 0.4960147148988351, "grad_norm": 0.5963059663772583, "learning_rate": 5.198067419040881e-05, "loss": 1.7862, "step": 8899 }, { "epoch": 0.4960704531519982, "grad_norm": 0.5533133149147034, "learning_rate": 5.197177691187368e-05, "loss": 1.6099, "step": 8900 }, { "epoch": 0.49612619140516134, "grad_norm": 0.5286788940429688, "learning_rate": 5.196287957080529e-05, "loss": 1.5929, "step": 8901 }, { "epoch": 0.4961819296583245, "grad_norm": 0.5352204442024231, "learning_rate": 5.195398216748579e-05, "loss": 1.5723, "step": 8902 }, { "epoch": 0.49623766791148766, "grad_norm": 0.5606736540794373, "learning_rate": 5.194508470219739e-05, "loss": 1.6633, "step": 8903 }, { "epoch": 0.4962934061646508, "grad_norm": 0.5791866779327393, "learning_rate": 5.193618717522224e-05, "loss": 1.6933, "step": 8904 }, { "epoch": 0.49634914441781397, "grad_norm": 0.5928483009338379, "learning_rate": 5.192728958684252e-05, "loss": 1.8085, "step": 8905 }, { "epoch": 0.4964048826709771, "grad_norm": 0.545987606048584, "learning_rate": 5.1918391937340405e-05, "loss": 1.6682, "step": 8906 }, { "epoch": 0.49646062092414023, "grad_norm": 0.5828558206558228, "learning_rate": 5.190949422699808e-05, "loss": 1.7887, "step": 8907 }, { "epoch": 0.49651635917730336, "grad_norm": 0.5636189579963684, "learning_rate": 5.1900596456097736e-05, "loss": 1.6192, "step": 8908 }, { "epoch": 0.49657209743046654, "grad_norm": 0.5548069477081299, "learning_rate": 5.189169862492156e-05, "loss": 1.482, "step": 8909 }, { "epoch": 0.49662783568362967, "grad_norm": 0.5686978697776794, "learning_rate": 5.188280073375173e-05, "loss": 1.5428, "step": 8910 }, { "epoch": 0.4966835739367928, "grad_norm": 0.5715393424034119, "learning_rate": 5.187390278287043e-05, "loss": 1.751, "step": 8911 }, { "epoch": 0.496739312189956, "grad_norm": 0.5473306775093079, "learning_rate": 5.1865004772559876e-05, "loss": 1.6317, "step": 8912 }, { "epoch": 0.4967950504431191, "grad_norm": 0.5280557870864868, "learning_rate": 5.1856106703102225e-05, "loss": 1.382, "step": 8913 }, { "epoch": 0.49685078869628224, "grad_norm": 0.566477358341217, "learning_rate": 5.18472085747797e-05, "loss": 1.6059, "step": 8914 }, { "epoch": 0.4969065269494454, "grad_norm": 0.618401288986206, "learning_rate": 5.183831038787449e-05, "loss": 1.7905, "step": 8915 }, { "epoch": 0.49696226520260856, "grad_norm": 0.555980384349823, "learning_rate": 5.18294121426688e-05, "loss": 1.7827, "step": 8916 }, { "epoch": 0.4970180034557717, "grad_norm": 0.5835009813308716, "learning_rate": 5.1820513839444804e-05, "loss": 1.5225, "step": 8917 }, { "epoch": 0.49707374170893487, "grad_norm": 0.5366058945655823, "learning_rate": 5.181161547848474e-05, "loss": 1.584, "step": 8918 }, { "epoch": 0.497129479962098, "grad_norm": 0.5382677316665649, "learning_rate": 5.1802717060070795e-05, "loss": 1.7048, "step": 8919 }, { "epoch": 0.4971852182152611, "grad_norm": 0.5656511783599854, "learning_rate": 5.1793818584485166e-05, "loss": 1.7254, "step": 8920 }, { "epoch": 0.49724095646842426, "grad_norm": 0.4968765377998352, "learning_rate": 5.178492005201007e-05, "loss": 1.4276, "step": 8921 }, { "epoch": 0.49729669472158744, "grad_norm": 0.599624514579773, "learning_rate": 5.177602146292773e-05, "loss": 1.7886, "step": 8922 }, { "epoch": 0.49735243297475057, "grad_norm": 0.5555099844932556, "learning_rate": 5.176712281752033e-05, "loss": 1.5135, "step": 8923 }, { "epoch": 0.4974081712279137, "grad_norm": 0.5166276693344116, "learning_rate": 5.17582241160701e-05, "loss": 1.284, "step": 8924 }, { "epoch": 0.4974639094810769, "grad_norm": 0.5706877708435059, "learning_rate": 5.1749325358859255e-05, "loss": 1.5666, "step": 8925 }, { "epoch": 0.49751964773424, "grad_norm": 0.6055343747138977, "learning_rate": 5.1740426546170003e-05, "loss": 1.7793, "step": 8926 }, { "epoch": 0.49757538598740314, "grad_norm": 0.551367998123169, "learning_rate": 5.1731527678284575e-05, "loss": 1.7579, "step": 8927 }, { "epoch": 0.4976311242405663, "grad_norm": 0.6338830590248108, "learning_rate": 5.172262875548518e-05, "loss": 1.691, "step": 8928 }, { "epoch": 0.49768686249372945, "grad_norm": 0.5556480884552002, "learning_rate": 5.171372977805405e-05, "loss": 1.5507, "step": 8929 }, { "epoch": 0.4977426007468926, "grad_norm": 0.5841500163078308, "learning_rate": 5.17048307462734e-05, "loss": 1.8044, "step": 8930 }, { "epoch": 0.4977983390000557, "grad_norm": 0.5762627124786377, "learning_rate": 5.169593166042547e-05, "loss": 1.6068, "step": 8931 }, { "epoch": 0.4978540772532189, "grad_norm": 0.5406793355941772, "learning_rate": 5.1687032520792464e-05, "loss": 1.6587, "step": 8932 }, { "epoch": 0.497909815506382, "grad_norm": 0.5948076248168945, "learning_rate": 5.1678133327656616e-05, "loss": 1.7269, "step": 8933 }, { "epoch": 0.49796555375954515, "grad_norm": 0.5559920072555542, "learning_rate": 5.166923408130016e-05, "loss": 1.7147, "step": 8934 }, { "epoch": 0.49802129201270834, "grad_norm": 0.5676483511924744, "learning_rate": 5.166033478200536e-05, "loss": 1.5815, "step": 8935 }, { "epoch": 0.49807703026587147, "grad_norm": 0.5557644367218018, "learning_rate": 5.1651435430054396e-05, "loss": 1.7004, "step": 8936 }, { "epoch": 0.4981327685190346, "grad_norm": 0.5279107093811035, "learning_rate": 5.164253602572954e-05, "loss": 1.5522, "step": 8937 }, { "epoch": 0.4981885067721978, "grad_norm": 0.5402976870536804, "learning_rate": 5.1633636569313014e-05, "loss": 1.6626, "step": 8938 }, { "epoch": 0.4982442450253609, "grad_norm": 0.5484632849693298, "learning_rate": 5.1624737061087056e-05, "loss": 1.5598, "step": 8939 }, { "epoch": 0.49829998327852404, "grad_norm": 0.5460349321365356, "learning_rate": 5.161583750133392e-05, "loss": 1.6661, "step": 8940 }, { "epoch": 0.4983557215316872, "grad_norm": 0.5012972950935364, "learning_rate": 5.160693789033583e-05, "loss": 1.3436, "step": 8941 }, { "epoch": 0.49841145978485035, "grad_norm": 0.5560734272003174, "learning_rate": 5.159803822837506e-05, "loss": 1.5994, "step": 8942 }, { "epoch": 0.4984671980380135, "grad_norm": 0.5721739530563354, "learning_rate": 5.1589138515733805e-05, "loss": 1.8826, "step": 8943 }, { "epoch": 0.4985229362911766, "grad_norm": 0.548629105091095, "learning_rate": 5.158023875269436e-05, "loss": 1.465, "step": 8944 }, { "epoch": 0.4985786745443398, "grad_norm": 0.5386154651641846, "learning_rate": 5.157133893953895e-05, "loss": 1.624, "step": 8945 }, { "epoch": 0.4986344127975029, "grad_norm": 0.6287878155708313, "learning_rate": 5.156243907654983e-05, "loss": 1.6433, "step": 8946 }, { "epoch": 0.49869015105066605, "grad_norm": 0.6134181022644043, "learning_rate": 5.155353916400925e-05, "loss": 1.7598, "step": 8947 }, { "epoch": 0.49874588930382924, "grad_norm": 0.5654070377349854, "learning_rate": 5.154463920219947e-05, "loss": 1.7002, "step": 8948 }, { "epoch": 0.49880162755699237, "grad_norm": 0.5511396527290344, "learning_rate": 5.153573919140274e-05, "loss": 1.5513, "step": 8949 }, { "epoch": 0.4988573658101555, "grad_norm": 0.5892798900604248, "learning_rate": 5.1526839131901315e-05, "loss": 1.8855, "step": 8950 }, { "epoch": 0.4989131040633187, "grad_norm": 0.6024952530860901, "learning_rate": 5.151793902397747e-05, "loss": 1.591, "step": 8951 }, { "epoch": 0.4989688423164818, "grad_norm": 0.545107901096344, "learning_rate": 5.150903886791343e-05, "loss": 1.54, "step": 8952 }, { "epoch": 0.49902458056964494, "grad_norm": 0.5680729746818542, "learning_rate": 5.150013866399147e-05, "loss": 1.417, "step": 8953 }, { "epoch": 0.49908031882280807, "grad_norm": 0.5475823879241943, "learning_rate": 5.149123841249387e-05, "loss": 1.5283, "step": 8954 }, { "epoch": 0.49913605707597125, "grad_norm": 0.6003718376159668, "learning_rate": 5.148233811370289e-05, "loss": 1.9128, "step": 8955 }, { "epoch": 0.4991917953291344, "grad_norm": 0.5217127203941345, "learning_rate": 5.1473437767900766e-05, "loss": 1.5466, "step": 8956 }, { "epoch": 0.4992475335822975, "grad_norm": 0.5930051803588867, "learning_rate": 5.1464537375369816e-05, "loss": 1.7227, "step": 8957 }, { "epoch": 0.4993032718354607, "grad_norm": 0.5506693124771118, "learning_rate": 5.145563693639226e-05, "loss": 1.5488, "step": 8958 }, { "epoch": 0.4993590100886238, "grad_norm": 0.5341318845748901, "learning_rate": 5.144673645125039e-05, "loss": 1.6493, "step": 8959 }, { "epoch": 0.49941474834178695, "grad_norm": 0.5735641717910767, "learning_rate": 5.143783592022646e-05, "loss": 1.6502, "step": 8960 }, { "epoch": 0.49947048659495014, "grad_norm": 0.5525271892547607, "learning_rate": 5.142893534360278e-05, "loss": 1.389, "step": 8961 }, { "epoch": 0.49952622484811326, "grad_norm": 0.6138321161270142, "learning_rate": 5.1420034721661594e-05, "loss": 1.882, "step": 8962 }, { "epoch": 0.4995819631012764, "grad_norm": 0.5286270380020142, "learning_rate": 5.1411134054685185e-05, "loss": 1.6304, "step": 8963 }, { "epoch": 0.4996377013544396, "grad_norm": 0.5324103832244873, "learning_rate": 5.140223334295584e-05, "loss": 1.7474, "step": 8964 }, { "epoch": 0.4996934396076027, "grad_norm": 0.598732590675354, "learning_rate": 5.139333258675582e-05, "loss": 1.7623, "step": 8965 }, { "epoch": 0.49974917786076584, "grad_norm": 0.5680933594703674, "learning_rate": 5.138443178636742e-05, "loss": 1.5633, "step": 8966 }, { "epoch": 0.49980491611392897, "grad_norm": 0.5769996047019958, "learning_rate": 5.13755309420729e-05, "loss": 1.6215, "step": 8967 }, { "epoch": 0.49986065436709215, "grad_norm": 0.5486459732055664, "learning_rate": 5.1366630054154576e-05, "loss": 1.6782, "step": 8968 }, { "epoch": 0.4999163926202553, "grad_norm": 0.6276679635047913, "learning_rate": 5.1357729122894706e-05, "loss": 1.7972, "step": 8969 }, { "epoch": 0.4999721308734184, "grad_norm": 0.5534047484397888, "learning_rate": 5.134882814857559e-05, "loss": 1.5217, "step": 8970 }, { "epoch": 0.5000278691265816, "grad_norm": 0.7427502274513245, "learning_rate": 5.1339927131479503e-05, "loss": 1.7474, "step": 8971 }, { "epoch": 0.5000836073797447, "grad_norm": 0.5830016136169434, "learning_rate": 5.133102607188874e-05, "loss": 1.7703, "step": 8972 }, { "epoch": 0.5001393456329079, "grad_norm": 0.5821530818939209, "learning_rate": 5.132212497008559e-05, "loss": 1.6809, "step": 8973 }, { "epoch": 0.500195083886071, "grad_norm": 0.5597349405288696, "learning_rate": 5.1313223826352365e-05, "loss": 1.6982, "step": 8974 }, { "epoch": 0.5002508221392341, "grad_norm": 0.5627524256706238, "learning_rate": 5.1304322640971315e-05, "loss": 1.5646, "step": 8975 }, { "epoch": 0.5003065603923973, "grad_norm": 0.568310558795929, "learning_rate": 5.1295421414224754e-05, "loss": 1.6019, "step": 8976 }, { "epoch": 0.5003622986455605, "grad_norm": 0.5768476128578186, "learning_rate": 5.128652014639499e-05, "loss": 1.6455, "step": 8977 }, { "epoch": 0.5004180368987236, "grad_norm": 0.5494751930236816, "learning_rate": 5.1277618837764294e-05, "loss": 1.5586, "step": 8978 }, { "epoch": 0.5004737751518867, "grad_norm": 0.5893326997756958, "learning_rate": 5.126871748861499e-05, "loss": 1.8271, "step": 8979 }, { "epoch": 0.5005295134050499, "grad_norm": 0.5742121934890747, "learning_rate": 5.125981609922935e-05, "loss": 1.7673, "step": 8980 }, { "epoch": 0.500585251658213, "grad_norm": 0.5225714445114136, "learning_rate": 5.1250914669889714e-05, "loss": 1.5127, "step": 8981 }, { "epoch": 0.5006409899113762, "grad_norm": 0.5902960300445557, "learning_rate": 5.124201320087833e-05, "loss": 1.7471, "step": 8982 }, { "epoch": 0.5006967281645394, "grad_norm": 0.5950215458869934, "learning_rate": 5.1233111692477555e-05, "loss": 1.6188, "step": 8983 }, { "epoch": 0.5007524664177024, "grad_norm": 0.5525108575820923, "learning_rate": 5.122421014496965e-05, "loss": 1.6802, "step": 8984 }, { "epoch": 0.5008082046708656, "grad_norm": 0.5543337464332581, "learning_rate": 5.1215308558636944e-05, "loss": 1.5793, "step": 8985 }, { "epoch": 0.5008639429240288, "grad_norm": 0.5265454053878784, "learning_rate": 5.1206406933761716e-05, "loss": 1.3947, "step": 8986 }, { "epoch": 0.5009196811771919, "grad_norm": 0.6150608658790588, "learning_rate": 5.119750527062632e-05, "loss": 1.9244, "step": 8987 }, { "epoch": 0.5009754194303551, "grad_norm": 0.5269333124160767, "learning_rate": 5.1188603569513025e-05, "loss": 1.6002, "step": 8988 }, { "epoch": 0.5010311576835182, "grad_norm": 0.6029527187347412, "learning_rate": 5.117970183070416e-05, "loss": 1.8124, "step": 8989 }, { "epoch": 0.5010868959366813, "grad_norm": 0.5682185292243958, "learning_rate": 5.1170800054482035e-05, "loss": 1.6561, "step": 8990 }, { "epoch": 0.5011426341898445, "grad_norm": 0.5897371172904968, "learning_rate": 5.116189824112896e-05, "loss": 1.7734, "step": 8991 }, { "epoch": 0.5011983724430077, "grad_norm": 0.5152097940444946, "learning_rate": 5.115299639092723e-05, "loss": 1.4226, "step": 8992 }, { "epoch": 0.5012541106961708, "grad_norm": 0.546345591545105, "learning_rate": 5.114409450415919e-05, "loss": 1.4967, "step": 8993 }, { "epoch": 0.501309848949334, "grad_norm": 0.5303710103034973, "learning_rate": 5.113519258110715e-05, "loss": 1.6527, "step": 8994 }, { "epoch": 0.501365587202497, "grad_norm": 0.5513923764228821, "learning_rate": 5.1126290622053405e-05, "loss": 1.7632, "step": 8995 }, { "epoch": 0.5014213254556602, "grad_norm": 0.5321218371391296, "learning_rate": 5.1117388627280305e-05, "loss": 1.5339, "step": 8996 }, { "epoch": 0.5014770637088234, "grad_norm": 0.5597907900810242, "learning_rate": 5.1108486597070125e-05, "loss": 1.6767, "step": 8997 }, { "epoch": 0.5015328019619865, "grad_norm": 0.5612991452217102, "learning_rate": 5.109958453170524e-05, "loss": 1.7141, "step": 8998 }, { "epoch": 0.5015885402151496, "grad_norm": 0.549898087978363, "learning_rate": 5.109068243146793e-05, "loss": 1.393, "step": 8999 }, { "epoch": 0.5016442784683128, "grad_norm": 0.5984362959861755, "learning_rate": 5.1081780296640535e-05, "loss": 1.8804, "step": 9000 } ], "logging_steps": 1, "max_steps": 17941, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.714094069481472e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }