{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999856620546276, "eval_steps": 500, "global_step": 17436, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 15.558266639709473, "learning_rate": 3.816793893129771e-08, "loss": 1.6675, "step": 1 }, { "epoch": 0.0, "grad_norm": 13.988743782043457, "learning_rate": 7.633587786259542e-08, "loss": 1.5673, "step": 2 }, { "epoch": 0.0, "grad_norm": 18.858505249023438, "learning_rate": 1.1450381679389314e-07, "loss": 1.0491, "step": 3 }, { "epoch": 0.0, "grad_norm": 15.732449531555176, "learning_rate": 1.5267175572519085e-07, "loss": 1.5693, "step": 4 }, { "epoch": 0.0, "grad_norm": 14.124704360961914, "learning_rate": 1.9083969465648858e-07, "loss": 1.6515, "step": 5 }, { "epoch": 0.0, "grad_norm": 14.981911659240723, "learning_rate": 2.2900763358778629e-07, "loss": 1.6218, "step": 6 }, { "epoch": 0.0, "grad_norm": 16.896223068237305, "learning_rate": 2.67175572519084e-07, "loss": 1.6901, "step": 7 }, { "epoch": 0.0, "grad_norm": 11.718731880187988, "learning_rate": 3.053435114503817e-07, "loss": 1.6153, "step": 8 }, { "epoch": 0.0, "grad_norm": 10.451005935668945, "learning_rate": 3.4351145038167945e-07, "loss": 1.6435, "step": 9 }, { "epoch": 0.0, "grad_norm": 13.86658763885498, "learning_rate": 3.8167938931297716e-07, "loss": 1.6422, "step": 10 }, { "epoch": 0.0, "grad_norm": 14.805266380310059, "learning_rate": 4.1984732824427486e-07, "loss": 1.5509, "step": 11 }, { "epoch": 0.0, "grad_norm": 13.717146873474121, "learning_rate": 4.5801526717557257e-07, "loss": 1.5424, "step": 12 }, { "epoch": 0.0, "grad_norm": 13.827333450317383, "learning_rate": 4.961832061068702e-07, "loss": 1.5796, "step": 13 }, { "epoch": 0.0, "grad_norm": 9.937980651855469, "learning_rate": 5.34351145038168e-07, "loss": 1.5997, "step": 14 }, { "epoch": 0.0, "grad_norm": 13.568488121032715, "learning_rate": 5.725190839694656e-07, "loss": 1.5657, "step": 15 }, { "epoch": 0.0, "grad_norm": 16.67142677307129, "learning_rate": 6.106870229007634e-07, "loss": 0.8693, "step": 16 }, { "epoch": 0.0, "grad_norm": 7.242477893829346, "learning_rate": 6.48854961832061e-07, "loss": 1.4926, "step": 17 }, { "epoch": 0.0, "grad_norm": 6.931870937347412, "learning_rate": 6.870229007633589e-07, "loss": 1.4668, "step": 18 }, { "epoch": 0.0, "grad_norm": 6.434217929840088, "learning_rate": 7.251908396946565e-07, "loss": 1.4932, "step": 19 }, { "epoch": 0.0, "grad_norm": 6.345139980316162, "learning_rate": 7.633587786259543e-07, "loss": 1.5443, "step": 20 }, { "epoch": 0.0, "grad_norm": 4.9464006423950195, "learning_rate": 8.01526717557252e-07, "loss": 1.4134, "step": 21 }, { "epoch": 0.0, "grad_norm": 5.003053665161133, "learning_rate": 8.396946564885497e-07, "loss": 1.431, "step": 22 }, { "epoch": 0.0, "grad_norm": 3.969738721847534, "learning_rate": 8.778625954198474e-07, "loss": 1.431, "step": 23 }, { "epoch": 0.0, "grad_norm": 3.9027457237243652, "learning_rate": 9.160305343511451e-07, "loss": 1.3926, "step": 24 }, { "epoch": 0.0, "grad_norm": 4.465153694152832, "learning_rate": 9.54198473282443e-07, "loss": 1.4641, "step": 25 }, { "epoch": 0.0, "grad_norm": 4.1631879806518555, "learning_rate": 9.923664122137404e-07, "loss": 1.4377, "step": 26 }, { "epoch": 0.0, "grad_norm": 4.10839319229126, "learning_rate": 1.0305343511450382e-06, "loss": 1.4771, "step": 27 }, { "epoch": 0.0, "grad_norm": 4.254977226257324, "learning_rate": 1.068702290076336e-06, "loss": 1.4225, "step": 28 }, { "epoch": 0.0, "grad_norm": 3.8101425170898438, "learning_rate": 1.1068702290076337e-06, "loss": 1.44, "step": 29 }, { "epoch": 0.0, "grad_norm": 4.160100936889648, "learning_rate": 1.1450381679389313e-06, "loss": 1.4205, "step": 30 }, { "epoch": 0.0, "grad_norm": 3.5531253814697266, "learning_rate": 1.1832061068702292e-06, "loss": 1.5099, "step": 31 }, { "epoch": 0.0, "grad_norm": 3.6297051906585693, "learning_rate": 1.2213740458015268e-06, "loss": 1.4695, "step": 32 }, { "epoch": 0.0, "grad_norm": 3.042447566986084, "learning_rate": 1.2595419847328243e-06, "loss": 1.3552, "step": 33 }, { "epoch": 0.0, "grad_norm": 3.2630228996276855, "learning_rate": 1.297709923664122e-06, "loss": 1.3628, "step": 34 }, { "epoch": 0.0, "grad_norm": 2.807941436767578, "learning_rate": 1.33587786259542e-06, "loss": 1.4268, "step": 35 }, { "epoch": 0.0, "grad_norm": 2.7850215435028076, "learning_rate": 1.3740458015267178e-06, "loss": 1.3714, "step": 36 }, { "epoch": 0.0, "grad_norm": 2.831404447555542, "learning_rate": 1.4122137404580156e-06, "loss": 1.3748, "step": 37 }, { "epoch": 0.0, "grad_norm": 3.020758628845215, "learning_rate": 1.450381679389313e-06, "loss": 1.3267, "step": 38 }, { "epoch": 0.0, "grad_norm": 2.6790385246276855, "learning_rate": 1.4885496183206109e-06, "loss": 1.4149, "step": 39 }, { "epoch": 0.0, "grad_norm": 2.7830984592437744, "learning_rate": 1.5267175572519086e-06, "loss": 1.3075, "step": 40 }, { "epoch": 0.0, "grad_norm": 3.0869674682617188, "learning_rate": 1.5648854961832064e-06, "loss": 1.3319, "step": 41 }, { "epoch": 0.0, "grad_norm": 2.6376333236694336, "learning_rate": 1.603053435114504e-06, "loss": 1.4106, "step": 42 }, { "epoch": 0.0, "grad_norm": 3.0565459728240967, "learning_rate": 1.6412213740458017e-06, "loss": 1.3215, "step": 43 }, { "epoch": 0.0, "grad_norm": 2.8039722442626953, "learning_rate": 1.6793893129770995e-06, "loss": 1.3915, "step": 44 }, { "epoch": 0.0, "grad_norm": 2.494558811187744, "learning_rate": 1.7175572519083972e-06, "loss": 1.251, "step": 45 }, { "epoch": 0.0, "grad_norm": 2.8768439292907715, "learning_rate": 1.7557251908396948e-06, "loss": 1.3015, "step": 46 }, { "epoch": 0.0, "grad_norm": 3.027677536010742, "learning_rate": 1.7938931297709925e-06, "loss": 1.2823, "step": 47 }, { "epoch": 0.0, "grad_norm": 11.196527481079102, "learning_rate": 1.8320610687022903e-06, "loss": 0.8777, "step": 48 }, { "epoch": 0.0, "grad_norm": 2.764543056488037, "learning_rate": 1.870229007633588e-06, "loss": 1.2758, "step": 49 }, { "epoch": 0.0, "grad_norm": 3.6398603916168213, "learning_rate": 1.908396946564886e-06, "loss": 1.3325, "step": 50 }, { "epoch": 0.0, "grad_norm": 8.84912395477295, "learning_rate": 1.946564885496183e-06, "loss": 0.8353, "step": 51 }, { "epoch": 0.0, "grad_norm": 2.275533437728882, "learning_rate": 1.984732824427481e-06, "loss": 1.2948, "step": 52 }, { "epoch": 0.0, "grad_norm": 2.6710312366485596, "learning_rate": 2.0229007633587786e-06, "loss": 1.3265, "step": 53 }, { "epoch": 0.0, "grad_norm": 2.5640649795532227, "learning_rate": 2.0610687022900764e-06, "loss": 1.3774, "step": 54 }, { "epoch": 0.0, "grad_norm": 2.6674256324768066, "learning_rate": 2.099236641221374e-06, "loss": 1.3859, "step": 55 }, { "epoch": 0.0, "grad_norm": 2.7106809616088867, "learning_rate": 2.137404580152672e-06, "loss": 1.3243, "step": 56 }, { "epoch": 0.0, "grad_norm": 2.6420164108276367, "learning_rate": 2.1755725190839697e-06, "loss": 1.427, "step": 57 }, { "epoch": 0.0, "grad_norm": 2.4347591400146484, "learning_rate": 2.2137404580152674e-06, "loss": 1.3067, "step": 58 }, { "epoch": 0.0, "grad_norm": 2.5336878299713135, "learning_rate": 2.2519083969465648e-06, "loss": 1.3438, "step": 59 }, { "epoch": 0.0, "grad_norm": 2.2951669692993164, "learning_rate": 2.2900763358778625e-06, "loss": 1.2477, "step": 60 }, { "epoch": 0.0, "grad_norm": 2.7502987384796143, "learning_rate": 2.3282442748091603e-06, "loss": 1.2977, "step": 61 }, { "epoch": 0.0, "grad_norm": 2.4772419929504395, "learning_rate": 2.3664122137404585e-06, "loss": 1.2858, "step": 62 }, { "epoch": 0.0, "grad_norm": 2.3816637992858887, "learning_rate": 2.4045801526717562e-06, "loss": 1.2402, "step": 63 }, { "epoch": 0.0, "grad_norm": 2.4924306869506836, "learning_rate": 2.4427480916030536e-06, "loss": 1.2361, "step": 64 }, { "epoch": 0.0, "grad_norm": 2.4357974529266357, "learning_rate": 2.4809160305343513e-06, "loss": 1.3761, "step": 65 }, { "epoch": 0.0, "grad_norm": 4.770843029022217, "learning_rate": 2.5190839694656487e-06, "loss": 0.6892, "step": 66 }, { "epoch": 0.0, "grad_norm": 2.287750005722046, "learning_rate": 2.5572519083969464e-06, "loss": 1.359, "step": 67 }, { "epoch": 0.0, "grad_norm": 2.177046298980713, "learning_rate": 2.595419847328244e-06, "loss": 1.2689, "step": 68 }, { "epoch": 0.0, "grad_norm": 2.238996982574463, "learning_rate": 2.633587786259542e-06, "loss": 1.3359, "step": 69 }, { "epoch": 0.0, "grad_norm": 2.560926675796509, "learning_rate": 2.67175572519084e-06, "loss": 1.2537, "step": 70 }, { "epoch": 0.0, "grad_norm": 2.5880041122436523, "learning_rate": 2.709923664122138e-06, "loss": 1.3544, "step": 71 }, { "epoch": 0.0, "grad_norm": 2.2030255794525146, "learning_rate": 2.7480916030534356e-06, "loss": 1.2867, "step": 72 }, { "epoch": 0.0, "grad_norm": 2.4377143383026123, "learning_rate": 2.7862595419847334e-06, "loss": 1.2642, "step": 73 }, { "epoch": 0.0, "grad_norm": 2.3077785968780518, "learning_rate": 2.824427480916031e-06, "loss": 1.3151, "step": 74 }, { "epoch": 0.0, "grad_norm": 2.2587642669677734, "learning_rate": 2.862595419847328e-06, "loss": 1.2545, "step": 75 }, { "epoch": 0.0, "grad_norm": 2.4077560901641846, "learning_rate": 2.900763358778626e-06, "loss": 1.3183, "step": 76 }, { "epoch": 0.0, "grad_norm": 2.194283962249756, "learning_rate": 2.938931297709924e-06, "loss": 1.2414, "step": 77 }, { "epoch": 0.0, "grad_norm": 2.358806848526001, "learning_rate": 2.9770992366412218e-06, "loss": 1.2463, "step": 78 }, { "epoch": 0.0, "grad_norm": 2.3932883739471436, "learning_rate": 3.0152671755725195e-06, "loss": 1.1616, "step": 79 }, { "epoch": 0.0, "grad_norm": 2.4160308837890625, "learning_rate": 3.0534351145038173e-06, "loss": 1.1614, "step": 80 }, { "epoch": 0.0, "grad_norm": 2.3357017040252686, "learning_rate": 3.091603053435115e-06, "loss": 1.2228, "step": 81 }, { "epoch": 0.0, "grad_norm": 2.3943848609924316, "learning_rate": 3.129770992366413e-06, "loss": 1.3283, "step": 82 }, { "epoch": 0.0, "grad_norm": 2.553863525390625, "learning_rate": 3.1679389312977097e-06, "loss": 1.2791, "step": 83 }, { "epoch": 0.0, "grad_norm": 2.3540492057800293, "learning_rate": 3.206106870229008e-06, "loss": 1.2112, "step": 84 }, { "epoch": 0.0, "grad_norm": 2.340452194213867, "learning_rate": 3.2442748091603056e-06, "loss": 1.2269, "step": 85 }, { "epoch": 0.0, "grad_norm": 2.4069883823394775, "learning_rate": 3.2824427480916034e-06, "loss": 1.207, "step": 86 }, { "epoch": 0.0, "grad_norm": 2.083019256591797, "learning_rate": 3.320610687022901e-06, "loss": 1.1862, "step": 87 }, { "epoch": 0.01, "grad_norm": 2.1076905727386475, "learning_rate": 3.358778625954199e-06, "loss": 1.267, "step": 88 }, { "epoch": 0.01, "grad_norm": 2.48537540435791, "learning_rate": 3.3969465648854967e-06, "loss": 1.3569, "step": 89 }, { "epoch": 0.01, "grad_norm": 2.5323920249938965, "learning_rate": 3.4351145038167944e-06, "loss": 1.2864, "step": 90 }, { "epoch": 0.01, "grad_norm": 2.3568899631500244, "learning_rate": 3.473282442748092e-06, "loss": 1.3478, "step": 91 }, { "epoch": 0.01, "grad_norm": 2.498481273651123, "learning_rate": 3.5114503816793895e-06, "loss": 1.2504, "step": 92 }, { "epoch": 0.01, "grad_norm": 2.007481336593628, "learning_rate": 3.5496183206106873e-06, "loss": 1.1569, "step": 93 }, { "epoch": 0.01, "grad_norm": 2.409057378768921, "learning_rate": 3.587786259541985e-06, "loss": 1.2249, "step": 94 }, { "epoch": 0.01, "grad_norm": 4.408141136169434, "learning_rate": 3.625954198473283e-06, "loss": 1.2368, "step": 95 }, { "epoch": 0.01, "grad_norm": 2.244300603866577, "learning_rate": 3.6641221374045806e-06, "loss": 1.3195, "step": 96 }, { "epoch": 0.01, "grad_norm": 2.4743666648864746, "learning_rate": 3.7022900763358783e-06, "loss": 1.2646, "step": 97 }, { "epoch": 0.01, "grad_norm": 2.102954149246216, "learning_rate": 3.740458015267176e-06, "loss": 0.5534, "step": 98 }, { "epoch": 0.01, "grad_norm": 2.413912534713745, "learning_rate": 3.778625954198474e-06, "loss": 1.2545, "step": 99 }, { "epoch": 0.01, "grad_norm": 2.450010299682617, "learning_rate": 3.816793893129772e-06, "loss": 1.2882, "step": 100 }, { "epoch": 0.01, "grad_norm": 2.2480010986328125, "learning_rate": 3.8549618320610685e-06, "loss": 1.2514, "step": 101 }, { "epoch": 0.01, "grad_norm": 2.745875358581543, "learning_rate": 3.893129770992366e-06, "loss": 1.1647, "step": 102 }, { "epoch": 0.01, "grad_norm": 2.5601766109466553, "learning_rate": 3.931297709923664e-06, "loss": 1.2509, "step": 103 }, { "epoch": 0.01, "grad_norm": 2.514998435974121, "learning_rate": 3.969465648854962e-06, "loss": 1.2079, "step": 104 }, { "epoch": 0.01, "grad_norm": 2.424623727798462, "learning_rate": 4.0076335877862595e-06, "loss": 1.2166, "step": 105 }, { "epoch": 0.01, "grad_norm": 2.2762906551361084, "learning_rate": 4.045801526717557e-06, "loss": 1.2389, "step": 106 }, { "epoch": 0.01, "grad_norm": 2.150784969329834, "learning_rate": 4.083969465648855e-06, "loss": 1.1847, "step": 107 }, { "epoch": 0.01, "grad_norm": 2.3234362602233887, "learning_rate": 4.122137404580153e-06, "loss": 1.2278, "step": 108 }, { "epoch": 0.01, "grad_norm": 2.4249842166900635, "learning_rate": 4.1603053435114506e-06, "loss": 1.207, "step": 109 }, { "epoch": 0.01, "grad_norm": 2.2878878116607666, "learning_rate": 4.198473282442748e-06, "loss": 1.2264, "step": 110 }, { "epoch": 0.01, "grad_norm": 2.549649953842163, "learning_rate": 4.236641221374046e-06, "loss": 1.159, "step": 111 }, { "epoch": 0.01, "grad_norm": 2.4554660320281982, "learning_rate": 4.274809160305344e-06, "loss": 1.319, "step": 112 }, { "epoch": 0.01, "grad_norm": 3.0981709957122803, "learning_rate": 4.312977099236642e-06, "loss": 1.2092, "step": 113 }, { "epoch": 0.01, "grad_norm": 2.541743040084839, "learning_rate": 4.351145038167939e-06, "loss": 1.1693, "step": 114 }, { "epoch": 0.01, "grad_norm": 2.5217630863189697, "learning_rate": 4.389312977099237e-06, "loss": 1.2889, "step": 115 }, { "epoch": 0.01, "grad_norm": 2.495091199874878, "learning_rate": 4.427480916030535e-06, "loss": 1.1888, "step": 116 }, { "epoch": 0.01, "grad_norm": 2.4056596755981445, "learning_rate": 4.465648854961833e-06, "loss": 1.1838, "step": 117 }, { "epoch": 0.01, "grad_norm": 2.335122585296631, "learning_rate": 4.5038167938931296e-06, "loss": 1.2866, "step": 118 }, { "epoch": 0.01, "grad_norm": 2.1947944164276123, "learning_rate": 4.541984732824427e-06, "loss": 1.2358, "step": 119 }, { "epoch": 0.01, "grad_norm": 2.29399037361145, "learning_rate": 4.580152671755725e-06, "loss": 1.2221, "step": 120 }, { "epoch": 0.01, "grad_norm": 2.5239224433898926, "learning_rate": 4.618320610687023e-06, "loss": 1.2033, "step": 121 }, { "epoch": 0.01, "grad_norm": 2.3639538288116455, "learning_rate": 4.656488549618321e-06, "loss": 1.2669, "step": 122 }, { "epoch": 0.01, "grad_norm": 2.1837053298950195, "learning_rate": 4.694656488549618e-06, "loss": 1.2633, "step": 123 }, { "epoch": 0.01, "grad_norm": 1.715895175933838, "learning_rate": 4.732824427480917e-06, "loss": 0.6809, "step": 124 }, { "epoch": 0.01, "grad_norm": 2.198807716369629, "learning_rate": 4.770992366412215e-06, "loss": 1.2618, "step": 125 }, { "epoch": 0.01, "grad_norm": 2.3895256519317627, "learning_rate": 4.8091603053435125e-06, "loss": 1.1939, "step": 126 }, { "epoch": 0.01, "grad_norm": 2.3140602111816406, "learning_rate": 4.847328244274809e-06, "loss": 1.3025, "step": 127 }, { "epoch": 0.01, "grad_norm": 2.333887815475464, "learning_rate": 4.885496183206107e-06, "loss": 1.2884, "step": 128 }, { "epoch": 0.01, "grad_norm": 2.109236478805542, "learning_rate": 4.923664122137405e-06, "loss": 1.2517, "step": 129 }, { "epoch": 0.01, "grad_norm": 2.2595057487487793, "learning_rate": 4.961832061068703e-06, "loss": 1.2674, "step": 130 }, { "epoch": 0.01, "grad_norm": 2.338866710662842, "learning_rate": 5e-06, "loss": 1.2765, "step": 131 }, { "epoch": 0.01, "grad_norm": 2.3303911685943604, "learning_rate": 5.038167938931297e-06, "loss": 1.1896, "step": 132 }, { "epoch": 0.01, "grad_norm": 2.8968067169189453, "learning_rate": 5.076335877862596e-06, "loss": 1.2421, "step": 133 }, { "epoch": 0.01, "grad_norm": 2.240675687789917, "learning_rate": 5.114503816793893e-06, "loss": 1.2187, "step": 134 }, { "epoch": 0.01, "grad_norm": 2.4179251194000244, "learning_rate": 5.1526717557251914e-06, "loss": 1.2101, "step": 135 }, { "epoch": 0.01, "grad_norm": 2.381080389022827, "learning_rate": 5.190839694656488e-06, "loss": 1.2158, "step": 136 }, { "epoch": 0.01, "grad_norm": 2.3089752197265625, "learning_rate": 5.229007633587787e-06, "loss": 1.2186, "step": 137 }, { "epoch": 0.01, "grad_norm": 2.6404616832733154, "learning_rate": 5.267175572519084e-06, "loss": 1.2566, "step": 138 }, { "epoch": 0.01, "grad_norm": 2.435467004776001, "learning_rate": 5.3053435114503825e-06, "loss": 1.1878, "step": 139 }, { "epoch": 0.01, "grad_norm": 2.396341562271118, "learning_rate": 5.34351145038168e-06, "loss": 1.3049, "step": 140 }, { "epoch": 0.01, "grad_norm": 2.2711474895477295, "learning_rate": 5.381679389312977e-06, "loss": 1.1792, "step": 141 }, { "epoch": 0.01, "grad_norm": 2.1534476280212402, "learning_rate": 5.419847328244276e-06, "loss": 1.2657, "step": 142 }, { "epoch": 0.01, "grad_norm": 2.453995704650879, "learning_rate": 5.458015267175573e-06, "loss": 1.1267, "step": 143 }, { "epoch": 0.01, "grad_norm": 2.391569137573242, "learning_rate": 5.496183206106871e-06, "loss": 1.2367, "step": 144 }, { "epoch": 0.01, "grad_norm": 1.3522192239761353, "learning_rate": 5.534351145038168e-06, "loss": 0.6216, "step": 145 }, { "epoch": 0.01, "grad_norm": 2.379929304122925, "learning_rate": 5.572519083969467e-06, "loss": 1.2342, "step": 146 }, { "epoch": 0.01, "grad_norm": 2.2897443771362305, "learning_rate": 5.610687022900764e-06, "loss": 1.2379, "step": 147 }, { "epoch": 0.01, "grad_norm": 2.2341549396514893, "learning_rate": 5.648854961832062e-06, "loss": 1.2158, "step": 148 }, { "epoch": 0.01, "grad_norm": 2.227285146713257, "learning_rate": 5.687022900763359e-06, "loss": 1.2156, "step": 149 }, { "epoch": 0.01, "grad_norm": 2.068729877471924, "learning_rate": 5.725190839694656e-06, "loss": 1.1695, "step": 150 }, { "epoch": 0.01, "grad_norm": 2.238799810409546, "learning_rate": 5.763358778625955e-06, "loss": 1.1904, "step": 151 }, { "epoch": 0.01, "grad_norm": 2.5148062705993652, "learning_rate": 5.801526717557252e-06, "loss": 1.2824, "step": 152 }, { "epoch": 0.01, "grad_norm": 2.326028347015381, "learning_rate": 5.83969465648855e-06, "loss": 1.1364, "step": 153 }, { "epoch": 0.01, "grad_norm": 2.5881175994873047, "learning_rate": 5.877862595419848e-06, "loss": 1.2886, "step": 154 }, { "epoch": 0.01, "grad_norm": 2.399954080581665, "learning_rate": 5.916030534351146e-06, "loss": 1.1618, "step": 155 }, { "epoch": 0.01, "grad_norm": 2.169663190841675, "learning_rate": 5.9541984732824435e-06, "loss": 1.114, "step": 156 }, { "epoch": 0.01, "grad_norm": 2.353415012359619, "learning_rate": 5.992366412213741e-06, "loss": 1.2032, "step": 157 }, { "epoch": 0.01, "grad_norm": 2.1866724491119385, "learning_rate": 6.030534351145039e-06, "loss": 1.2034, "step": 158 }, { "epoch": 0.01, "grad_norm": 2.327688455581665, "learning_rate": 6.068702290076336e-06, "loss": 1.1703, "step": 159 }, { "epoch": 0.01, "grad_norm": 2.3042783737182617, "learning_rate": 6.1068702290076346e-06, "loss": 1.2322, "step": 160 }, { "epoch": 0.01, "grad_norm": 2.503833293914795, "learning_rate": 6.1450381679389315e-06, "loss": 1.1789, "step": 161 }, { "epoch": 0.01, "grad_norm": 2.350928783416748, "learning_rate": 6.18320610687023e-06, "loss": 1.236, "step": 162 }, { "epoch": 0.01, "grad_norm": 2.4380338191986084, "learning_rate": 6.221374045801527e-06, "loss": 1.1927, "step": 163 }, { "epoch": 0.01, "grad_norm": 2.268906593322754, "learning_rate": 6.259541984732826e-06, "loss": 1.2072, "step": 164 }, { "epoch": 0.01, "grad_norm": 2.231147527694702, "learning_rate": 6.2977099236641225e-06, "loss": 1.2117, "step": 165 }, { "epoch": 0.01, "grad_norm": 2.418914794921875, "learning_rate": 6.335877862595419e-06, "loss": 1.1606, "step": 166 }, { "epoch": 0.01, "grad_norm": 2.3540070056915283, "learning_rate": 6.374045801526718e-06, "loss": 1.2334, "step": 167 }, { "epoch": 0.01, "grad_norm": 2.2606024742126465, "learning_rate": 6.412213740458016e-06, "loss": 1.12, "step": 168 }, { "epoch": 0.01, "grad_norm": 2.2669661045074463, "learning_rate": 6.4503816793893135e-06, "loss": 1.204, "step": 169 }, { "epoch": 0.01, "grad_norm": 2.357459306716919, "learning_rate": 6.488549618320611e-06, "loss": 1.2345, "step": 170 }, { "epoch": 0.01, "grad_norm": 2.168829917907715, "learning_rate": 6.526717557251909e-06, "loss": 1.2474, "step": 171 }, { "epoch": 0.01, "grad_norm": 2.1880061626434326, "learning_rate": 6.564885496183207e-06, "loss": 1.1261, "step": 172 }, { "epoch": 0.01, "grad_norm": 2.262659788131714, "learning_rate": 6.6030534351145046e-06, "loss": 1.1857, "step": 173 }, { "epoch": 0.01, "grad_norm": 2.3341357707977295, "learning_rate": 6.641221374045802e-06, "loss": 1.1984, "step": 174 }, { "epoch": 0.01, "grad_norm": 2.26676869392395, "learning_rate": 6.679389312977099e-06, "loss": 1.146, "step": 175 }, { "epoch": 0.01, "grad_norm": 2.5866336822509766, "learning_rate": 6.717557251908398e-06, "loss": 1.2126, "step": 176 }, { "epoch": 0.01, "grad_norm": 2.249685525894165, "learning_rate": 6.755725190839695e-06, "loss": 1.2455, "step": 177 }, { "epoch": 0.01, "grad_norm": 2.474029779434204, "learning_rate": 6.793893129770993e-06, "loss": 1.2297, "step": 178 }, { "epoch": 0.01, "grad_norm": 2.3840372562408447, "learning_rate": 6.83206106870229e-06, "loss": 1.146, "step": 179 }, { "epoch": 0.01, "grad_norm": 1.0976841449737549, "learning_rate": 6.870229007633589e-06, "loss": 0.5018, "step": 180 }, { "epoch": 0.01, "grad_norm": 2.552171468734741, "learning_rate": 6.908396946564886e-06, "loss": 1.2003, "step": 181 }, { "epoch": 0.01, "grad_norm": 2.7230982780456543, "learning_rate": 6.946564885496184e-06, "loss": 1.1734, "step": 182 }, { "epoch": 0.01, "grad_norm": 2.2825429439544678, "learning_rate": 6.984732824427481e-06, "loss": 1.2047, "step": 183 }, { "epoch": 0.01, "grad_norm": 2.2877326011657715, "learning_rate": 7.022900763358779e-06, "loss": 1.1578, "step": 184 }, { "epoch": 0.01, "grad_norm": 2.451024055480957, "learning_rate": 7.061068702290077e-06, "loss": 1.2439, "step": 185 }, { "epoch": 0.01, "grad_norm": 1.2070621252059937, "learning_rate": 7.0992366412213746e-06, "loss": 0.5192, "step": 186 }, { "epoch": 0.01, "grad_norm": 2.5960514545440674, "learning_rate": 7.137404580152672e-06, "loss": 1.3137, "step": 187 }, { "epoch": 0.01, "grad_norm": 2.5490591526031494, "learning_rate": 7.17557251908397e-06, "loss": 1.2176, "step": 188 }, { "epoch": 0.01, "grad_norm": 2.679558038711548, "learning_rate": 7.213740458015268e-06, "loss": 1.1846, "step": 189 }, { "epoch": 0.01, "grad_norm": 2.649095296859741, "learning_rate": 7.251908396946566e-06, "loss": 1.1863, "step": 190 }, { "epoch": 0.01, "grad_norm": 2.5023233890533447, "learning_rate": 7.290076335877863e-06, "loss": 1.1572, "step": 191 }, { "epoch": 0.01, "grad_norm": 1.1537967920303345, "learning_rate": 7.328244274809161e-06, "loss": 0.5124, "step": 192 }, { "epoch": 0.01, "grad_norm": 2.515532970428467, "learning_rate": 7.366412213740458e-06, "loss": 1.0807, "step": 193 }, { "epoch": 0.01, "grad_norm": 2.3911054134368896, "learning_rate": 7.404580152671757e-06, "loss": 1.211, "step": 194 }, { "epoch": 0.01, "grad_norm": 2.3482248783111572, "learning_rate": 7.4427480916030536e-06, "loss": 1.1568, "step": 195 }, { "epoch": 0.01, "grad_norm": 2.3433539867401123, "learning_rate": 7.480916030534352e-06, "loss": 1.1664, "step": 196 }, { "epoch": 0.01, "grad_norm": 2.395803689956665, "learning_rate": 7.519083969465649e-06, "loss": 1.2107, "step": 197 }, { "epoch": 0.01, "grad_norm": 2.49005126953125, "learning_rate": 7.557251908396948e-06, "loss": 1.2141, "step": 198 }, { "epoch": 0.01, "grad_norm": 5.116427898406982, "learning_rate": 7.595419847328245e-06, "loss": 1.0903, "step": 199 }, { "epoch": 0.01, "grad_norm": 2.6343843936920166, "learning_rate": 7.633587786259543e-06, "loss": 1.0758, "step": 200 }, { "epoch": 0.01, "grad_norm": 2.756282329559326, "learning_rate": 7.671755725190841e-06, "loss": 1.1098, "step": 201 }, { "epoch": 0.01, "grad_norm": 2.110990285873413, "learning_rate": 7.709923664122137e-06, "loss": 1.1557, "step": 202 }, { "epoch": 0.01, "grad_norm": 2.480382204055786, "learning_rate": 7.748091603053436e-06, "loss": 1.1931, "step": 203 }, { "epoch": 0.01, "grad_norm": 2.264718532562256, "learning_rate": 7.786259541984733e-06, "loss": 1.1575, "step": 204 }, { "epoch": 0.01, "grad_norm": 2.4612679481506348, "learning_rate": 7.824427480916032e-06, "loss": 1.1488, "step": 205 }, { "epoch": 0.01, "grad_norm": 2.4971280097961426, "learning_rate": 7.862595419847328e-06, "loss": 1.2338, "step": 206 }, { "epoch": 0.01, "grad_norm": 2.420489549636841, "learning_rate": 7.900763358778627e-06, "loss": 1.187, "step": 207 }, { "epoch": 0.01, "grad_norm": 2.2725765705108643, "learning_rate": 7.938931297709924e-06, "loss": 1.169, "step": 208 }, { "epoch": 0.01, "grad_norm": 2.435575485229492, "learning_rate": 7.977099236641223e-06, "loss": 1.2375, "step": 209 }, { "epoch": 0.01, "grad_norm": 3.2475221157073975, "learning_rate": 8.015267175572519e-06, "loss": 1.1516, "step": 210 }, { "epoch": 0.01, "grad_norm": 2.1970536708831787, "learning_rate": 8.053435114503817e-06, "loss": 1.1942, "step": 211 }, { "epoch": 0.01, "grad_norm": 2.1885790824890137, "learning_rate": 8.091603053435115e-06, "loss": 1.2051, "step": 212 }, { "epoch": 0.01, "grad_norm": 2.4636831283569336, "learning_rate": 8.129770992366412e-06, "loss": 1.1964, "step": 213 }, { "epoch": 0.01, "grad_norm": 2.704554319381714, "learning_rate": 8.16793893129771e-06, "loss": 1.177, "step": 214 }, { "epoch": 0.01, "grad_norm": 2.538517475128174, "learning_rate": 8.206106870229008e-06, "loss": 1.1312, "step": 215 }, { "epoch": 0.01, "grad_norm": 2.2675399780273438, "learning_rate": 8.244274809160306e-06, "loss": 1.1574, "step": 216 }, { "epoch": 0.01, "grad_norm": 2.3015332221984863, "learning_rate": 8.282442748091603e-06, "loss": 1.1842, "step": 217 }, { "epoch": 0.01, "grad_norm": 1.1983731985092163, "learning_rate": 8.320610687022901e-06, "loss": 0.5498, "step": 218 }, { "epoch": 0.01, "grad_norm": 2.3395638465881348, "learning_rate": 8.358778625954199e-06, "loss": 1.1192, "step": 219 }, { "epoch": 0.01, "grad_norm": 2.7942235469818115, "learning_rate": 8.396946564885497e-06, "loss": 1.2464, "step": 220 }, { "epoch": 0.01, "grad_norm": 2.191277027130127, "learning_rate": 8.435114503816794e-06, "loss": 1.1368, "step": 221 }, { "epoch": 0.01, "grad_norm": 2.591585397720337, "learning_rate": 8.473282442748092e-06, "loss": 1.0736, "step": 222 }, { "epoch": 0.01, "grad_norm": 2.48899507522583, "learning_rate": 8.51145038167939e-06, "loss": 1.2223, "step": 223 }, { "epoch": 0.01, "grad_norm": 2.433845281600952, "learning_rate": 8.549618320610688e-06, "loss": 1.1786, "step": 224 }, { "epoch": 0.01, "grad_norm": 2.364865303039551, "learning_rate": 8.587786259541985e-06, "loss": 1.1529, "step": 225 }, { "epoch": 0.01, "grad_norm": 2.3349416255950928, "learning_rate": 8.625954198473283e-06, "loss": 1.1746, "step": 226 }, { "epoch": 0.01, "grad_norm": 2.141732692718506, "learning_rate": 8.664122137404581e-06, "loss": 1.2164, "step": 227 }, { "epoch": 0.01, "grad_norm": 2.3822381496429443, "learning_rate": 8.702290076335879e-06, "loss": 1.1444, "step": 228 }, { "epoch": 0.01, "grad_norm": 2.4650022983551025, "learning_rate": 8.740458015267176e-06, "loss": 1.1352, "step": 229 }, { "epoch": 0.01, "grad_norm": 2.655257225036621, "learning_rate": 8.778625954198474e-06, "loss": 1.2208, "step": 230 }, { "epoch": 0.01, "grad_norm": 2.4323301315307617, "learning_rate": 8.816793893129772e-06, "loss": 1.1644, "step": 231 }, { "epoch": 0.01, "grad_norm": 1.3118544816970825, "learning_rate": 8.85496183206107e-06, "loss": 0.5754, "step": 232 }, { "epoch": 0.01, "grad_norm": 2.6206159591674805, "learning_rate": 8.893129770992368e-06, "loss": 1.2652, "step": 233 }, { "epoch": 0.01, "grad_norm": 2.5270016193389893, "learning_rate": 8.931297709923665e-06, "loss": 1.0734, "step": 234 }, { "epoch": 0.01, "grad_norm": 2.644437551498413, "learning_rate": 8.969465648854963e-06, "loss": 1.1781, "step": 235 }, { "epoch": 0.01, "grad_norm": 2.3275222778320312, "learning_rate": 9.007633587786259e-06, "loss": 1.1713, "step": 236 }, { "epoch": 0.01, "grad_norm": 2.2932794094085693, "learning_rate": 9.045801526717559e-06, "loss": 1.1651, "step": 237 }, { "epoch": 0.01, "grad_norm": 2.413956642150879, "learning_rate": 9.083969465648855e-06, "loss": 1.2262, "step": 238 }, { "epoch": 0.01, "grad_norm": 2.231199026107788, "learning_rate": 9.122137404580154e-06, "loss": 1.0769, "step": 239 }, { "epoch": 0.01, "grad_norm": 2.3945555686950684, "learning_rate": 9.16030534351145e-06, "loss": 1.1812, "step": 240 }, { "epoch": 0.01, "grad_norm": 2.110468626022339, "learning_rate": 9.19847328244275e-06, "loss": 1.2368, "step": 241 }, { "epoch": 0.01, "grad_norm": 2.3194801807403564, "learning_rate": 9.236641221374046e-06, "loss": 1.2372, "step": 242 }, { "epoch": 0.01, "grad_norm": 2.27785325050354, "learning_rate": 9.274809160305345e-06, "loss": 1.1542, "step": 243 }, { "epoch": 0.01, "grad_norm": 2.417180061340332, "learning_rate": 9.312977099236641e-06, "loss": 1.2068, "step": 244 }, { "epoch": 0.01, "grad_norm": 2.3117575645446777, "learning_rate": 9.351145038167939e-06, "loss": 1.1361, "step": 245 }, { "epoch": 0.01, "grad_norm": 2.495323896408081, "learning_rate": 9.389312977099237e-06, "loss": 1.2174, "step": 246 }, { "epoch": 0.01, "grad_norm": 2.228602170944214, "learning_rate": 9.427480916030534e-06, "loss": 1.1987, "step": 247 }, { "epoch": 0.01, "grad_norm": 2.187556505203247, "learning_rate": 9.465648854961834e-06, "loss": 1.1911, "step": 248 }, { "epoch": 0.01, "grad_norm": 2.5458500385284424, "learning_rate": 9.50381679389313e-06, "loss": 1.132, "step": 249 }, { "epoch": 0.01, "grad_norm": 2.205439805984497, "learning_rate": 9.54198473282443e-06, "loss": 1.0681, "step": 250 }, { "epoch": 0.01, "grad_norm": 2.2369577884674072, "learning_rate": 9.580152671755725e-06, "loss": 1.2095, "step": 251 }, { "epoch": 0.01, "grad_norm": 2.3744304180145264, "learning_rate": 9.618320610687025e-06, "loss": 1.2271, "step": 252 }, { "epoch": 0.01, "grad_norm": 2.2684192657470703, "learning_rate": 9.656488549618321e-06, "loss": 1.0788, "step": 253 }, { "epoch": 0.01, "grad_norm": 2.1698288917541504, "learning_rate": 9.694656488549619e-06, "loss": 1.1339, "step": 254 }, { "epoch": 0.01, "grad_norm": 2.4651522636413574, "learning_rate": 9.732824427480917e-06, "loss": 1.1854, "step": 255 }, { "epoch": 0.01, "grad_norm": 2.563157081604004, "learning_rate": 9.770992366412214e-06, "loss": 1.1689, "step": 256 }, { "epoch": 0.01, "grad_norm": 1.057300090789795, "learning_rate": 9.809160305343512e-06, "loss": 0.602, "step": 257 }, { "epoch": 0.01, "grad_norm": 1.101321816444397, "learning_rate": 9.84732824427481e-06, "loss": 0.4822, "step": 258 }, { "epoch": 0.01, "grad_norm": 2.476306200027466, "learning_rate": 9.885496183206108e-06, "loss": 1.2241, "step": 259 }, { "epoch": 0.01, "grad_norm": 2.863455057144165, "learning_rate": 9.923664122137405e-06, "loss": 1.1446, "step": 260 }, { "epoch": 0.01, "grad_norm": 2.314697742462158, "learning_rate": 9.961832061068703e-06, "loss": 1.1508, "step": 261 }, { "epoch": 0.02, "grad_norm": 2.6635262966156006, "learning_rate": 1e-05, "loss": 1.2458, "step": 262 }, { "epoch": 0.02, "grad_norm": 2.232550859451294, "learning_rate": 1.0038167938931299e-05, "loss": 1.2193, "step": 263 }, { "epoch": 0.02, "grad_norm": 2.306417465209961, "learning_rate": 1.0076335877862595e-05, "loss": 1.1676, "step": 264 }, { "epoch": 0.02, "grad_norm": 2.8053040504455566, "learning_rate": 1.0114503816793894e-05, "loss": 1.207, "step": 265 }, { "epoch": 0.02, "grad_norm": 2.5533223152160645, "learning_rate": 1.0152671755725192e-05, "loss": 1.1838, "step": 266 }, { "epoch": 0.02, "grad_norm": 2.469116449356079, "learning_rate": 1.019083969465649e-05, "loss": 1.2508, "step": 267 }, { "epoch": 0.02, "grad_norm": 2.3705906867980957, "learning_rate": 1.0229007633587786e-05, "loss": 1.1399, "step": 268 }, { "epoch": 0.02, "grad_norm": 1.1668001413345337, "learning_rate": 1.0267175572519085e-05, "loss": 0.5029, "step": 269 }, { "epoch": 0.02, "grad_norm": 2.4078330993652344, "learning_rate": 1.0305343511450383e-05, "loss": 1.1489, "step": 270 }, { "epoch": 0.02, "grad_norm": 2.458362102508545, "learning_rate": 1.034351145038168e-05, "loss": 1.1049, "step": 271 }, { "epoch": 0.02, "grad_norm": 2.5195271968841553, "learning_rate": 1.0381679389312977e-05, "loss": 1.2086, "step": 272 }, { "epoch": 0.02, "grad_norm": 2.2613472938537598, "learning_rate": 1.0419847328244274e-05, "loss": 1.0938, "step": 273 }, { "epoch": 0.02, "grad_norm": 2.3890202045440674, "learning_rate": 1.0458015267175574e-05, "loss": 1.1137, "step": 274 }, { "epoch": 0.02, "grad_norm": 2.255711317062378, "learning_rate": 1.0496183206106872e-05, "loss": 1.2094, "step": 275 }, { "epoch": 0.02, "grad_norm": 1.0289547443389893, "learning_rate": 1.0534351145038168e-05, "loss": 0.5386, "step": 276 }, { "epoch": 0.02, "grad_norm": 2.49617600440979, "learning_rate": 1.0572519083969465e-05, "loss": 1.2289, "step": 277 }, { "epoch": 0.02, "grad_norm": 2.490529775619507, "learning_rate": 1.0610687022900765e-05, "loss": 1.1398, "step": 278 }, { "epoch": 0.02, "grad_norm": 2.4953157901763916, "learning_rate": 1.0648854961832063e-05, "loss": 1.2283, "step": 279 }, { "epoch": 0.02, "grad_norm": 2.51371169090271, "learning_rate": 1.068702290076336e-05, "loss": 1.1693, "step": 280 }, { "epoch": 0.02, "grad_norm": 2.34928560256958, "learning_rate": 1.0725190839694657e-05, "loss": 1.0796, "step": 281 }, { "epoch": 0.02, "grad_norm": 2.3046045303344727, "learning_rate": 1.0763358778625954e-05, "loss": 1.2126, "step": 282 }, { "epoch": 0.02, "grad_norm": 2.4181103706359863, "learning_rate": 1.0801526717557254e-05, "loss": 1.0868, "step": 283 }, { "epoch": 0.02, "grad_norm": 2.4339473247528076, "learning_rate": 1.0839694656488552e-05, "loss": 1.1992, "step": 284 }, { "epoch": 0.02, "grad_norm": 2.1299593448638916, "learning_rate": 1.0877862595419848e-05, "loss": 1.1235, "step": 285 }, { "epoch": 0.02, "grad_norm": 2.2972185611724854, "learning_rate": 1.0916030534351145e-05, "loss": 1.1303, "step": 286 }, { "epoch": 0.02, "grad_norm": 2.604118585586548, "learning_rate": 1.0954198473282445e-05, "loss": 1.1724, "step": 287 }, { "epoch": 0.02, "grad_norm": 2.3645050525665283, "learning_rate": 1.0992366412213743e-05, "loss": 1.3014, "step": 288 }, { "epoch": 0.02, "grad_norm": 2.4452872276306152, "learning_rate": 1.1030534351145039e-05, "loss": 1.1139, "step": 289 }, { "epoch": 0.02, "grad_norm": 2.1248505115509033, "learning_rate": 1.1068702290076336e-05, "loss": 1.1383, "step": 290 }, { "epoch": 0.02, "grad_norm": 2.3322041034698486, "learning_rate": 1.1106870229007634e-05, "loss": 1.2256, "step": 291 }, { "epoch": 0.02, "grad_norm": 2.192044734954834, "learning_rate": 1.1145038167938934e-05, "loss": 1.2323, "step": 292 }, { "epoch": 0.02, "grad_norm": 2.5046188831329346, "learning_rate": 1.118320610687023e-05, "loss": 1.1631, "step": 293 }, { "epoch": 0.02, "grad_norm": 2.338428258895874, "learning_rate": 1.1221374045801527e-05, "loss": 1.174, "step": 294 }, { "epoch": 0.02, "grad_norm": 2.187605619430542, "learning_rate": 1.1259541984732825e-05, "loss": 1.1683, "step": 295 }, { "epoch": 0.02, "grad_norm": 2.1554183959960938, "learning_rate": 1.1297709923664125e-05, "loss": 1.1883, "step": 296 }, { "epoch": 0.02, "grad_norm": 2.3376195430755615, "learning_rate": 1.133587786259542e-05, "loss": 1.2237, "step": 297 }, { "epoch": 0.02, "grad_norm": 2.1107397079467773, "learning_rate": 1.1374045801526718e-05, "loss": 1.1311, "step": 298 }, { "epoch": 0.02, "grad_norm": 2.434945583343506, "learning_rate": 1.1412213740458016e-05, "loss": 1.2061, "step": 299 }, { "epoch": 0.02, "grad_norm": 2.3518757820129395, "learning_rate": 1.1450381679389312e-05, "loss": 1.1361, "step": 300 }, { "epoch": 0.02, "grad_norm": 2.270901679992676, "learning_rate": 1.1488549618320612e-05, "loss": 1.1548, "step": 301 }, { "epoch": 0.02, "grad_norm": 2.242243528366089, "learning_rate": 1.152671755725191e-05, "loss": 1.181, "step": 302 }, { "epoch": 0.02, "grad_norm": 2.2376561164855957, "learning_rate": 1.1564885496183207e-05, "loss": 1.1291, "step": 303 }, { "epoch": 0.02, "grad_norm": 2.4677982330322266, "learning_rate": 1.1603053435114503e-05, "loss": 1.128, "step": 304 }, { "epoch": 0.02, "grad_norm": 2.2829904556274414, "learning_rate": 1.1641221374045803e-05, "loss": 1.1789, "step": 305 }, { "epoch": 0.02, "grad_norm": 2.0435593128204346, "learning_rate": 1.16793893129771e-05, "loss": 1.0958, "step": 306 }, { "epoch": 0.02, "grad_norm": 2.1729190349578857, "learning_rate": 1.1717557251908398e-05, "loss": 1.1834, "step": 307 }, { "epoch": 0.02, "grad_norm": 2.403015613555908, "learning_rate": 1.1755725190839696e-05, "loss": 1.1334, "step": 308 }, { "epoch": 0.02, "grad_norm": 2.153867244720459, "learning_rate": 1.1793893129770992e-05, "loss": 1.1676, "step": 309 }, { "epoch": 0.02, "grad_norm": 2.1921372413635254, "learning_rate": 1.1832061068702292e-05, "loss": 1.1697, "step": 310 }, { "epoch": 0.02, "grad_norm": 2.451202154159546, "learning_rate": 1.187022900763359e-05, "loss": 1.2018, "step": 311 }, { "epoch": 0.02, "grad_norm": 2.538238763809204, "learning_rate": 1.1908396946564887e-05, "loss": 1.1653, "step": 312 }, { "epoch": 0.02, "grad_norm": 2.3259646892547607, "learning_rate": 1.1946564885496183e-05, "loss": 1.277, "step": 313 }, { "epoch": 0.02, "grad_norm": 2.5378835201263428, "learning_rate": 1.1984732824427483e-05, "loss": 1.167, "step": 314 }, { "epoch": 0.02, "grad_norm": 2.323946714401245, "learning_rate": 1.202290076335878e-05, "loss": 1.1798, "step": 315 }, { "epoch": 0.02, "grad_norm": 2.3234386444091797, "learning_rate": 1.2061068702290078e-05, "loss": 1.1706, "step": 316 }, { "epoch": 0.02, "grad_norm": 2.256312847137451, "learning_rate": 1.2099236641221374e-05, "loss": 1.2463, "step": 317 }, { "epoch": 0.02, "grad_norm": 2.189204454421997, "learning_rate": 1.2137404580152672e-05, "loss": 1.1542, "step": 318 }, { "epoch": 0.02, "grad_norm": 2.2717251777648926, "learning_rate": 1.2175572519083971e-05, "loss": 1.1562, "step": 319 }, { "epoch": 0.02, "grad_norm": 2.362816572189331, "learning_rate": 1.2213740458015269e-05, "loss": 1.0983, "step": 320 }, { "epoch": 0.02, "grad_norm": 2.5264642238616943, "learning_rate": 1.2251908396946565e-05, "loss": 1.1484, "step": 321 }, { "epoch": 0.02, "grad_norm": 1.1048452854156494, "learning_rate": 1.2290076335877863e-05, "loss": 0.5417, "step": 322 }, { "epoch": 0.02, "grad_norm": 2.439535140991211, "learning_rate": 1.2328244274809162e-05, "loss": 1.1397, "step": 323 }, { "epoch": 0.02, "grad_norm": 2.338682174682617, "learning_rate": 1.236641221374046e-05, "loss": 1.1522, "step": 324 }, { "epoch": 0.02, "grad_norm": 2.173095703125, "learning_rate": 1.2404580152671756e-05, "loss": 1.2423, "step": 325 }, { "epoch": 0.02, "grad_norm": 2.2724859714508057, "learning_rate": 1.2442748091603054e-05, "loss": 1.1686, "step": 326 }, { "epoch": 0.02, "grad_norm": 2.3127293586730957, "learning_rate": 1.2480916030534352e-05, "loss": 1.18, "step": 327 }, { "epoch": 0.02, "grad_norm": 2.1922414302825928, "learning_rate": 1.2519083969465651e-05, "loss": 1.2393, "step": 328 }, { "epoch": 0.02, "grad_norm": 2.249760866165161, "learning_rate": 1.2557251908396947e-05, "loss": 1.1924, "step": 329 }, { "epoch": 0.02, "grad_norm": 2.5361180305480957, "learning_rate": 1.2595419847328245e-05, "loss": 1.2124, "step": 330 }, { "epoch": 0.02, "grad_norm": 2.4809746742248535, "learning_rate": 1.2633587786259543e-05, "loss": 1.1432, "step": 331 }, { "epoch": 0.02, "grad_norm": 2.598536252975464, "learning_rate": 1.2671755725190839e-05, "loss": 1.1612, "step": 332 }, { "epoch": 0.02, "grad_norm": 2.3069939613342285, "learning_rate": 1.2709923664122138e-05, "loss": 1.0567, "step": 333 }, { "epoch": 0.02, "grad_norm": 2.311523199081421, "learning_rate": 1.2748091603053436e-05, "loss": 1.2515, "step": 334 }, { "epoch": 0.02, "grad_norm": 2.0850589275360107, "learning_rate": 1.2786259541984734e-05, "loss": 1.0596, "step": 335 }, { "epoch": 0.02, "grad_norm": 2.3131818771362305, "learning_rate": 1.2824427480916032e-05, "loss": 1.1662, "step": 336 }, { "epoch": 0.02, "grad_norm": 2.3318216800689697, "learning_rate": 1.2862595419847331e-05, "loss": 1.1282, "step": 337 }, { "epoch": 0.02, "grad_norm": 2.2734594345092773, "learning_rate": 1.2900763358778627e-05, "loss": 1.1454, "step": 338 }, { "epoch": 0.02, "grad_norm": 2.7358264923095703, "learning_rate": 1.2938931297709925e-05, "loss": 1.0995, "step": 339 }, { "epoch": 0.02, "grad_norm": 2.342430830001831, "learning_rate": 1.2977099236641223e-05, "loss": 1.1517, "step": 340 }, { "epoch": 0.02, "grad_norm": 2.223081350326538, "learning_rate": 1.3015267175572519e-05, "loss": 1.1219, "step": 341 }, { "epoch": 0.02, "grad_norm": 2.2671773433685303, "learning_rate": 1.3053435114503818e-05, "loss": 1.2077, "step": 342 }, { "epoch": 0.02, "grad_norm": 2.451024293899536, "learning_rate": 1.3091603053435116e-05, "loss": 1.152, "step": 343 }, { "epoch": 0.02, "grad_norm": 2.571594715118408, "learning_rate": 1.3129770992366414e-05, "loss": 1.2044, "step": 344 }, { "epoch": 0.02, "grad_norm": 2.4298548698425293, "learning_rate": 1.316793893129771e-05, "loss": 1.1181, "step": 345 }, { "epoch": 0.02, "grad_norm": 2.064215660095215, "learning_rate": 1.3206106870229009e-05, "loss": 1.1252, "step": 346 }, { "epoch": 0.02, "grad_norm": 2.479543685913086, "learning_rate": 1.3244274809160307e-05, "loss": 1.057, "step": 347 }, { "epoch": 0.02, "grad_norm": 2.3734121322631836, "learning_rate": 1.3282442748091605e-05, "loss": 1.2348, "step": 348 }, { "epoch": 0.02, "grad_norm": 2.6014323234558105, "learning_rate": 1.33206106870229e-05, "loss": 1.0837, "step": 349 }, { "epoch": 0.02, "grad_norm": 2.5107686519622803, "learning_rate": 1.3358778625954198e-05, "loss": 1.1559, "step": 350 }, { "epoch": 0.02, "grad_norm": 2.419914722442627, "learning_rate": 1.3396946564885498e-05, "loss": 1.1412, "step": 351 }, { "epoch": 0.02, "grad_norm": 2.224263906478882, "learning_rate": 1.3435114503816796e-05, "loss": 1.1477, "step": 352 }, { "epoch": 0.02, "grad_norm": 2.3645153045654297, "learning_rate": 1.3473282442748092e-05, "loss": 1.1707, "step": 353 }, { "epoch": 0.02, "grad_norm": 2.332211971282959, "learning_rate": 1.351145038167939e-05, "loss": 1.1284, "step": 354 }, { "epoch": 0.02, "grad_norm": 2.29952073097229, "learning_rate": 1.3549618320610689e-05, "loss": 1.1937, "step": 355 }, { "epoch": 0.02, "grad_norm": 2.55173921585083, "learning_rate": 1.3587786259541987e-05, "loss": 1.2429, "step": 356 }, { "epoch": 0.02, "grad_norm": 2.178745746612549, "learning_rate": 1.3625954198473283e-05, "loss": 1.1681, "step": 357 }, { "epoch": 0.02, "grad_norm": 2.2342777252197266, "learning_rate": 1.366412213740458e-05, "loss": 1.1756, "step": 358 }, { "epoch": 0.02, "grad_norm": 2.4349939823150635, "learning_rate": 1.3702290076335878e-05, "loss": 1.1194, "step": 359 }, { "epoch": 0.02, "grad_norm": 2.2043814659118652, "learning_rate": 1.3740458015267178e-05, "loss": 1.0826, "step": 360 }, { "epoch": 0.02, "grad_norm": 2.24560809135437, "learning_rate": 1.3778625954198474e-05, "loss": 1.1475, "step": 361 }, { "epoch": 0.02, "grad_norm": 2.424367666244507, "learning_rate": 1.3816793893129772e-05, "loss": 1.2111, "step": 362 }, { "epoch": 0.02, "grad_norm": 2.1970183849334717, "learning_rate": 1.385496183206107e-05, "loss": 1.162, "step": 363 }, { "epoch": 0.02, "grad_norm": 2.3800458908081055, "learning_rate": 1.3893129770992369e-05, "loss": 1.1419, "step": 364 }, { "epoch": 0.02, "grad_norm": 1.1797505617141724, "learning_rate": 1.3931297709923667e-05, "loss": 0.5251, "step": 365 }, { "epoch": 0.02, "grad_norm": 2.403242349624634, "learning_rate": 1.3969465648854963e-05, "loss": 1.1738, "step": 366 }, { "epoch": 0.02, "grad_norm": 2.359656572341919, "learning_rate": 1.400763358778626e-05, "loss": 1.1734, "step": 367 }, { "epoch": 0.02, "grad_norm": 2.3161683082580566, "learning_rate": 1.4045801526717558e-05, "loss": 1.1039, "step": 368 }, { "epoch": 0.02, "grad_norm": 2.5263845920562744, "learning_rate": 1.4083969465648858e-05, "loss": 1.1701, "step": 369 }, { "epoch": 0.02, "grad_norm": 2.3975117206573486, "learning_rate": 1.4122137404580154e-05, "loss": 1.2048, "step": 370 }, { "epoch": 0.02, "grad_norm": 2.2706034183502197, "learning_rate": 1.4160305343511451e-05, "loss": 1.1579, "step": 371 }, { "epoch": 0.02, "grad_norm": 2.3223025798797607, "learning_rate": 1.4198473282442749e-05, "loss": 1.151, "step": 372 }, { "epoch": 0.02, "grad_norm": 2.183082103729248, "learning_rate": 1.4236641221374049e-05, "loss": 1.2054, "step": 373 }, { "epoch": 0.02, "grad_norm": 2.5176961421966553, "learning_rate": 1.4274809160305345e-05, "loss": 1.2146, "step": 374 }, { "epoch": 0.02, "grad_norm": 2.539336681365967, "learning_rate": 1.4312977099236642e-05, "loss": 1.165, "step": 375 }, { "epoch": 0.02, "grad_norm": 1.0185726881027222, "learning_rate": 1.435114503816794e-05, "loss": 0.4935, "step": 376 }, { "epoch": 0.02, "grad_norm": 2.5876379013061523, "learning_rate": 1.4389312977099236e-05, "loss": 1.2338, "step": 377 }, { "epoch": 0.02, "grad_norm": 2.3072938919067383, "learning_rate": 1.4427480916030536e-05, "loss": 1.0365, "step": 378 }, { "epoch": 0.02, "grad_norm": 2.4691827297210693, "learning_rate": 1.4465648854961833e-05, "loss": 1.1449, "step": 379 }, { "epoch": 0.02, "grad_norm": 2.482402801513672, "learning_rate": 1.4503816793893131e-05, "loss": 1.2034, "step": 380 }, { "epoch": 0.02, "grad_norm": 2.1270785331726074, "learning_rate": 1.4541984732824427e-05, "loss": 1.0944, "step": 381 }, { "epoch": 0.02, "grad_norm": 3.2602219581604004, "learning_rate": 1.4580152671755727e-05, "loss": 1.1551, "step": 382 }, { "epoch": 0.02, "grad_norm": 2.679429292678833, "learning_rate": 1.4618320610687024e-05, "loss": 1.1828, "step": 383 }, { "epoch": 0.02, "grad_norm": 2.602813720703125, "learning_rate": 1.4656488549618322e-05, "loss": 1.2577, "step": 384 }, { "epoch": 0.02, "grad_norm": 2.4410369396209717, "learning_rate": 1.4694656488549618e-05, "loss": 1.1418, "step": 385 }, { "epoch": 0.02, "grad_norm": 2.3489840030670166, "learning_rate": 1.4732824427480916e-05, "loss": 1.0767, "step": 386 }, { "epoch": 0.02, "grad_norm": 2.6419332027435303, "learning_rate": 1.4770992366412216e-05, "loss": 1.143, "step": 387 }, { "epoch": 0.02, "grad_norm": 2.2403504848480225, "learning_rate": 1.4809160305343513e-05, "loss": 1.1068, "step": 388 }, { "epoch": 0.02, "grad_norm": 2.5824480056762695, "learning_rate": 1.484732824427481e-05, "loss": 1.219, "step": 389 }, { "epoch": 0.02, "grad_norm": 2.3028483390808105, "learning_rate": 1.4885496183206107e-05, "loss": 1.2444, "step": 390 }, { "epoch": 0.02, "grad_norm": 2.359236478805542, "learning_rate": 1.4923664122137407e-05, "loss": 1.1451, "step": 391 }, { "epoch": 0.02, "grad_norm": 2.207191228866577, "learning_rate": 1.4961832061068704e-05, "loss": 1.1604, "step": 392 }, { "epoch": 0.02, "grad_norm": 2.6509203910827637, "learning_rate": 1.5000000000000002e-05, "loss": 1.1415, "step": 393 }, { "epoch": 0.02, "grad_norm": 2.4773623943328857, "learning_rate": 1.5038167938931298e-05, "loss": 1.0801, "step": 394 }, { "epoch": 0.02, "grad_norm": 2.52006196975708, "learning_rate": 1.5076335877862596e-05, "loss": 1.198, "step": 395 }, { "epoch": 0.02, "grad_norm": 1.1149399280548096, "learning_rate": 1.5114503816793895e-05, "loss": 0.5713, "step": 396 }, { "epoch": 0.02, "grad_norm": 2.519970655441284, "learning_rate": 1.5152671755725193e-05, "loss": 1.1421, "step": 397 }, { "epoch": 0.02, "grad_norm": 2.680042028427124, "learning_rate": 1.519083969465649e-05, "loss": 1.1582, "step": 398 }, { "epoch": 0.02, "grad_norm": 2.6061649322509766, "learning_rate": 1.5229007633587787e-05, "loss": 1.2406, "step": 399 }, { "epoch": 0.02, "grad_norm": 1.2451324462890625, "learning_rate": 1.5267175572519086e-05, "loss": 0.6114, "step": 400 }, { "epoch": 0.02, "grad_norm": 2.6718413829803467, "learning_rate": 1.5305343511450384e-05, "loss": 1.1769, "step": 401 }, { "epoch": 0.02, "grad_norm": 2.228839159011841, "learning_rate": 1.5343511450381682e-05, "loss": 1.144, "step": 402 }, { "epoch": 0.02, "grad_norm": 2.2783102989196777, "learning_rate": 1.5381679389312976e-05, "loss": 1.1056, "step": 403 }, { "epoch": 0.02, "grad_norm": 2.5894501209259033, "learning_rate": 1.5419847328244274e-05, "loss": 1.1594, "step": 404 }, { "epoch": 0.02, "grad_norm": 2.3087751865386963, "learning_rate": 1.5458015267175575e-05, "loss": 1.1738, "step": 405 }, { "epoch": 0.02, "grad_norm": 2.3469791412353516, "learning_rate": 1.5496183206106873e-05, "loss": 1.1155, "step": 406 }, { "epoch": 0.02, "grad_norm": 2.2417876720428467, "learning_rate": 1.5534351145038167e-05, "loss": 1.0861, "step": 407 }, { "epoch": 0.02, "grad_norm": 2.1803297996520996, "learning_rate": 1.5572519083969465e-05, "loss": 1.1145, "step": 408 }, { "epoch": 0.02, "grad_norm": 2.469996929168701, "learning_rate": 1.5610687022900766e-05, "loss": 1.2239, "step": 409 }, { "epoch": 0.02, "grad_norm": 2.3944132328033447, "learning_rate": 1.5648854961832064e-05, "loss": 1.1156, "step": 410 }, { "epoch": 0.02, "grad_norm": 2.118701934814453, "learning_rate": 1.5687022900763362e-05, "loss": 1.0973, "step": 411 }, { "epoch": 0.02, "grad_norm": 2.096414804458618, "learning_rate": 1.5725190839694656e-05, "loss": 1.0902, "step": 412 }, { "epoch": 0.02, "grad_norm": 2.3658647537231445, "learning_rate": 1.5763358778625954e-05, "loss": 1.1405, "step": 413 }, { "epoch": 0.02, "grad_norm": 2.6369361877441406, "learning_rate": 1.5801526717557255e-05, "loss": 1.1106, "step": 414 }, { "epoch": 0.02, "grad_norm": 1.9504631757736206, "learning_rate": 1.5839694656488553e-05, "loss": 1.018, "step": 415 }, { "epoch": 0.02, "grad_norm": 2.468120813369751, "learning_rate": 1.5877862595419847e-05, "loss": 1.1755, "step": 416 }, { "epoch": 0.02, "grad_norm": 2.623765707015991, "learning_rate": 1.5916030534351145e-05, "loss": 1.1679, "step": 417 }, { "epoch": 0.02, "grad_norm": 2.4666054248809814, "learning_rate": 1.5954198473282446e-05, "loss": 1.1704, "step": 418 }, { "epoch": 0.02, "grad_norm": 2.231243848800659, "learning_rate": 1.5992366412213744e-05, "loss": 1.0936, "step": 419 }, { "epoch": 0.02, "grad_norm": 2.238954782485962, "learning_rate": 1.6030534351145038e-05, "loss": 1.1977, "step": 420 }, { "epoch": 0.02, "grad_norm": 2.2670278549194336, "learning_rate": 1.6068702290076336e-05, "loss": 1.125, "step": 421 }, { "epoch": 0.02, "grad_norm": 2.356731414794922, "learning_rate": 1.6106870229007634e-05, "loss": 1.2258, "step": 422 }, { "epoch": 0.02, "grad_norm": 2.3338921070098877, "learning_rate": 1.6145038167938935e-05, "loss": 1.1358, "step": 423 }, { "epoch": 0.02, "grad_norm": 2.2780981063842773, "learning_rate": 1.618320610687023e-05, "loss": 1.2258, "step": 424 }, { "epoch": 0.02, "grad_norm": 2.3498711585998535, "learning_rate": 1.6221374045801527e-05, "loss": 1.1348, "step": 425 }, { "epoch": 0.02, "grad_norm": 2.09081768989563, "learning_rate": 1.6259541984732825e-05, "loss": 1.203, "step": 426 }, { "epoch": 0.02, "grad_norm": 2.5033562183380127, "learning_rate": 1.6297709923664126e-05, "loss": 1.1688, "step": 427 }, { "epoch": 0.02, "grad_norm": 1.005417823791504, "learning_rate": 1.633587786259542e-05, "loss": 0.504, "step": 428 }, { "epoch": 0.02, "grad_norm": 2.3319952487945557, "learning_rate": 1.6374045801526718e-05, "loss": 1.1287, "step": 429 }, { "epoch": 0.02, "grad_norm": 2.2967989444732666, "learning_rate": 1.6412213740458016e-05, "loss": 1.1932, "step": 430 }, { "epoch": 0.02, "grad_norm": 2.175524950027466, "learning_rate": 1.6450381679389314e-05, "loss": 1.1379, "step": 431 }, { "epoch": 0.02, "grad_norm": 2.226332902908325, "learning_rate": 1.648854961832061e-05, "loss": 1.2015, "step": 432 }, { "epoch": 0.02, "grad_norm": 2.348856210708618, "learning_rate": 1.652671755725191e-05, "loss": 1.1079, "step": 433 }, { "epoch": 0.02, "grad_norm": 2.217881679534912, "learning_rate": 1.6564885496183207e-05, "loss": 1.1985, "step": 434 }, { "epoch": 0.02, "grad_norm": 2.1928725242614746, "learning_rate": 1.6603053435114505e-05, "loss": 1.1277, "step": 435 }, { "epoch": 0.03, "grad_norm": 2.3529856204986572, "learning_rate": 1.6641221374045802e-05, "loss": 1.2094, "step": 436 }, { "epoch": 0.03, "grad_norm": 2.230029344558716, "learning_rate": 1.66793893129771e-05, "loss": 1.1525, "step": 437 }, { "epoch": 0.03, "grad_norm": 2.5140163898468018, "learning_rate": 1.6717557251908398e-05, "loss": 1.1015, "step": 438 }, { "epoch": 0.03, "grad_norm": 2.147373914718628, "learning_rate": 1.6755725190839696e-05, "loss": 1.1411, "step": 439 }, { "epoch": 0.03, "grad_norm": 2.0082900524139404, "learning_rate": 1.6793893129770993e-05, "loss": 1.1816, "step": 440 }, { "epoch": 0.03, "grad_norm": 2.1405038833618164, "learning_rate": 1.683206106870229e-05, "loss": 1.1419, "step": 441 }, { "epoch": 0.03, "grad_norm": 1.0462840795516968, "learning_rate": 1.687022900763359e-05, "loss": 0.5146, "step": 442 }, { "epoch": 0.03, "grad_norm": 2.9637937545776367, "learning_rate": 1.6908396946564887e-05, "loss": 1.1293, "step": 443 }, { "epoch": 0.03, "grad_norm": 2.505474328994751, "learning_rate": 1.6946564885496184e-05, "loss": 1.1538, "step": 444 }, { "epoch": 0.03, "grad_norm": 2.1799428462982178, "learning_rate": 1.6984732824427482e-05, "loss": 1.1244, "step": 445 }, { "epoch": 0.03, "grad_norm": 2.3049933910369873, "learning_rate": 1.702290076335878e-05, "loss": 1.1634, "step": 446 }, { "epoch": 0.03, "grad_norm": 2.2087562084198, "learning_rate": 1.7061068702290078e-05, "loss": 1.17, "step": 447 }, { "epoch": 0.03, "grad_norm": 2.218954086303711, "learning_rate": 1.7099236641221375e-05, "loss": 1.0968, "step": 448 }, { "epoch": 0.03, "grad_norm": 2.418799877166748, "learning_rate": 1.7137404580152673e-05, "loss": 1.2075, "step": 449 }, { "epoch": 0.03, "grad_norm": 2.171356678009033, "learning_rate": 1.717557251908397e-05, "loss": 1.2747, "step": 450 }, { "epoch": 0.03, "grad_norm": 2.5126261711120605, "learning_rate": 1.721374045801527e-05, "loss": 1.2013, "step": 451 }, { "epoch": 0.03, "grad_norm": 2.594771385192871, "learning_rate": 1.7251908396946566e-05, "loss": 1.1434, "step": 452 }, { "epoch": 0.03, "grad_norm": 2.4237306118011475, "learning_rate": 1.7290076335877864e-05, "loss": 1.165, "step": 453 }, { "epoch": 0.03, "grad_norm": 2.2773149013519287, "learning_rate": 1.7328244274809162e-05, "loss": 1.1997, "step": 454 }, { "epoch": 0.03, "grad_norm": 2.128214120864868, "learning_rate": 1.736641221374046e-05, "loss": 1.1609, "step": 455 }, { "epoch": 0.03, "grad_norm": 2.2007956504821777, "learning_rate": 1.7404580152671757e-05, "loss": 1.0923, "step": 456 }, { "epoch": 0.03, "grad_norm": 2.4340739250183105, "learning_rate": 1.7442748091603055e-05, "loss": 1.1317, "step": 457 }, { "epoch": 0.03, "grad_norm": 2.1774098873138428, "learning_rate": 1.7480916030534353e-05, "loss": 1.1116, "step": 458 }, { "epoch": 0.03, "grad_norm": 2.4845714569091797, "learning_rate": 1.751908396946565e-05, "loss": 1.0849, "step": 459 }, { "epoch": 0.03, "grad_norm": 2.3281002044677734, "learning_rate": 1.755725190839695e-05, "loss": 1.1561, "step": 460 }, { "epoch": 0.03, "grad_norm": 2.0520999431610107, "learning_rate": 1.7595419847328246e-05, "loss": 1.0587, "step": 461 }, { "epoch": 0.03, "grad_norm": 2.3974833488464355, "learning_rate": 1.7633587786259544e-05, "loss": 1.0751, "step": 462 }, { "epoch": 0.03, "grad_norm": 2.2551488876342773, "learning_rate": 1.767175572519084e-05, "loss": 1.1902, "step": 463 }, { "epoch": 0.03, "grad_norm": 2.5492265224456787, "learning_rate": 1.770992366412214e-05, "loss": 1.1645, "step": 464 }, { "epoch": 0.03, "grad_norm": 2.333768129348755, "learning_rate": 1.7748091603053437e-05, "loss": 1.0968, "step": 465 }, { "epoch": 0.03, "grad_norm": 0.9941705465316772, "learning_rate": 1.7786259541984735e-05, "loss": 0.4776, "step": 466 }, { "epoch": 0.03, "grad_norm": 3.0160634517669678, "learning_rate": 1.7824427480916033e-05, "loss": 1.1978, "step": 467 }, { "epoch": 0.03, "grad_norm": 2.2153005599975586, "learning_rate": 1.786259541984733e-05, "loss": 1.1401, "step": 468 }, { "epoch": 0.03, "grad_norm": 2.150308609008789, "learning_rate": 1.790076335877863e-05, "loss": 1.1996, "step": 469 }, { "epoch": 0.03, "grad_norm": 1.0668734312057495, "learning_rate": 1.7938931297709926e-05, "loss": 0.5641, "step": 470 }, { "epoch": 0.03, "grad_norm": 2.3119425773620605, "learning_rate": 1.7977099236641224e-05, "loss": 1.2263, "step": 471 }, { "epoch": 0.03, "grad_norm": 2.3993160724639893, "learning_rate": 1.8015267175572518e-05, "loss": 1.1162, "step": 472 }, { "epoch": 0.03, "grad_norm": 2.261383056640625, "learning_rate": 1.805343511450382e-05, "loss": 1.1778, "step": 473 }, { "epoch": 0.03, "grad_norm": 2.249664783477783, "learning_rate": 1.8091603053435117e-05, "loss": 1.1903, "step": 474 }, { "epoch": 0.03, "grad_norm": 2.4010398387908936, "learning_rate": 1.8129770992366415e-05, "loss": 1.0956, "step": 475 }, { "epoch": 0.03, "grad_norm": 2.1136553287506104, "learning_rate": 1.816793893129771e-05, "loss": 1.1892, "step": 476 }, { "epoch": 0.03, "grad_norm": 2.4139397144317627, "learning_rate": 1.820610687022901e-05, "loss": 1.1943, "step": 477 }, { "epoch": 0.03, "grad_norm": 2.240676164627075, "learning_rate": 1.8244274809160308e-05, "loss": 1.0821, "step": 478 }, { "epoch": 0.03, "grad_norm": 2.408395528793335, "learning_rate": 1.8282442748091606e-05, "loss": 1.1172, "step": 479 }, { "epoch": 0.03, "grad_norm": 2.193232297897339, "learning_rate": 1.83206106870229e-05, "loss": 1.157, "step": 480 }, { "epoch": 0.03, "grad_norm": 2.1655828952789307, "learning_rate": 1.8358778625954198e-05, "loss": 1.2059, "step": 481 }, { "epoch": 0.03, "grad_norm": 2.1595618724823, "learning_rate": 1.83969465648855e-05, "loss": 1.0686, "step": 482 }, { "epoch": 0.03, "grad_norm": 2.5173721313476562, "learning_rate": 1.8435114503816797e-05, "loss": 1.2197, "step": 483 }, { "epoch": 0.03, "grad_norm": 2.280892848968506, "learning_rate": 1.847328244274809e-05, "loss": 1.1005, "step": 484 }, { "epoch": 0.03, "grad_norm": 2.3220772743225098, "learning_rate": 1.851145038167939e-05, "loss": 1.1063, "step": 485 }, { "epoch": 0.03, "grad_norm": 2.079601287841797, "learning_rate": 1.854961832061069e-05, "loss": 1.1777, "step": 486 }, { "epoch": 0.03, "grad_norm": 2.2345283031463623, "learning_rate": 1.8587786259541988e-05, "loss": 1.1491, "step": 487 }, { "epoch": 0.03, "grad_norm": 2.3589625358581543, "learning_rate": 1.8625954198473282e-05, "loss": 1.1647, "step": 488 }, { "epoch": 0.03, "grad_norm": 2.4348666667938232, "learning_rate": 1.866412213740458e-05, "loss": 1.1538, "step": 489 }, { "epoch": 0.03, "grad_norm": 2.2719566822052, "learning_rate": 1.8702290076335878e-05, "loss": 1.1317, "step": 490 }, { "epoch": 0.03, "grad_norm": 2.241027355194092, "learning_rate": 1.874045801526718e-05, "loss": 1.1695, "step": 491 }, { "epoch": 0.03, "grad_norm": 2.3261735439300537, "learning_rate": 1.8778625954198473e-05, "loss": 1.1719, "step": 492 }, { "epoch": 0.03, "grad_norm": 2.1342687606811523, "learning_rate": 1.881679389312977e-05, "loss": 1.1512, "step": 493 }, { "epoch": 0.03, "grad_norm": 2.1521098613739014, "learning_rate": 1.885496183206107e-05, "loss": 1.2239, "step": 494 }, { "epoch": 0.03, "grad_norm": 2.1821930408477783, "learning_rate": 1.889312977099237e-05, "loss": 1.0751, "step": 495 }, { "epoch": 0.03, "grad_norm": 1.2680542469024658, "learning_rate": 1.8931297709923668e-05, "loss": 0.5478, "step": 496 }, { "epoch": 0.03, "grad_norm": 2.6782658100128174, "learning_rate": 1.8969465648854962e-05, "loss": 1.1517, "step": 497 }, { "epoch": 0.03, "grad_norm": 2.575467109680176, "learning_rate": 1.900763358778626e-05, "loss": 1.1372, "step": 498 }, { "epoch": 0.03, "grad_norm": 1.9334840774536133, "learning_rate": 1.9045801526717558e-05, "loss": 1.0998, "step": 499 }, { "epoch": 0.03, "grad_norm": 1.164106845855713, "learning_rate": 1.908396946564886e-05, "loss": 0.5547, "step": 500 }, { "epoch": 0.03, "grad_norm": 2.201937675476074, "learning_rate": 1.9122137404580153e-05, "loss": 1.1048, "step": 501 }, { "epoch": 0.03, "grad_norm": 2.4689877033233643, "learning_rate": 1.916030534351145e-05, "loss": 1.1524, "step": 502 }, { "epoch": 0.03, "grad_norm": 2.3615005016326904, "learning_rate": 1.919847328244275e-05, "loss": 1.1679, "step": 503 }, { "epoch": 0.03, "grad_norm": 2.327754020690918, "learning_rate": 1.923664122137405e-05, "loss": 1.1288, "step": 504 }, { "epoch": 0.03, "grad_norm": 2.1735355854034424, "learning_rate": 1.9274809160305344e-05, "loss": 1.212, "step": 505 }, { "epoch": 0.03, "grad_norm": 2.6156322956085205, "learning_rate": 1.9312977099236642e-05, "loss": 1.2327, "step": 506 }, { "epoch": 0.03, "grad_norm": 2.54784893989563, "learning_rate": 1.935114503816794e-05, "loss": 1.1699, "step": 507 }, { "epoch": 0.03, "grad_norm": 2.143899440765381, "learning_rate": 1.9389312977099238e-05, "loss": 1.1415, "step": 508 }, { "epoch": 0.03, "grad_norm": 2.2697091102600098, "learning_rate": 1.9427480916030535e-05, "loss": 1.2161, "step": 509 }, { "epoch": 0.03, "grad_norm": 2.1283698081970215, "learning_rate": 1.9465648854961833e-05, "loss": 1.1471, "step": 510 }, { "epoch": 0.03, "grad_norm": 2.2076292037963867, "learning_rate": 1.950381679389313e-05, "loss": 1.1259, "step": 511 }, { "epoch": 0.03, "grad_norm": 2.4508249759674072, "learning_rate": 1.954198473282443e-05, "loss": 1.1896, "step": 512 }, { "epoch": 0.03, "grad_norm": 2.149657964706421, "learning_rate": 1.9580152671755726e-05, "loss": 1.112, "step": 513 }, { "epoch": 0.03, "grad_norm": 2.2458279132843018, "learning_rate": 1.9618320610687024e-05, "loss": 1.1661, "step": 514 }, { "epoch": 0.03, "grad_norm": 2.184749126434326, "learning_rate": 1.9656488549618322e-05, "loss": 1.1254, "step": 515 }, { "epoch": 0.03, "grad_norm": 1.3220924139022827, "learning_rate": 1.969465648854962e-05, "loss": 0.5261, "step": 516 }, { "epoch": 0.03, "grad_norm": 2.3829903602600098, "learning_rate": 1.9732824427480917e-05, "loss": 1.2131, "step": 517 }, { "epoch": 0.03, "grad_norm": 2.315910577774048, "learning_rate": 1.9770992366412215e-05, "loss": 1.2225, "step": 518 }, { "epoch": 0.03, "grad_norm": 2.187544107437134, "learning_rate": 1.9809160305343513e-05, "loss": 1.1266, "step": 519 }, { "epoch": 0.03, "grad_norm": 2.349717617034912, "learning_rate": 1.984732824427481e-05, "loss": 1.1396, "step": 520 }, { "epoch": 0.03, "grad_norm": 2.3578975200653076, "learning_rate": 1.988549618320611e-05, "loss": 1.1718, "step": 521 }, { "epoch": 0.03, "grad_norm": 2.4661965370178223, "learning_rate": 1.9923664122137406e-05, "loss": 1.1849, "step": 522 }, { "epoch": 0.03, "grad_norm": 2.371136426925659, "learning_rate": 1.9961832061068704e-05, "loss": 1.1481, "step": 523 }, { "epoch": 0.03, "grad_norm": 2.2340142726898193, "learning_rate": 2e-05, "loss": 1.2411, "step": 524 }, { "epoch": 0.03, "grad_norm": 1.2085105180740356, "learning_rate": 1.9999999827463968e-05, "loss": 0.5797, "step": 525 }, { "epoch": 0.03, "grad_norm": 2.4866862297058105, "learning_rate": 1.9999999309855876e-05, "loss": 1.1384, "step": 526 }, { "epoch": 0.03, "grad_norm": 2.42767596244812, "learning_rate": 1.999999844717574e-05, "loss": 1.165, "step": 527 }, { "epoch": 0.03, "grad_norm": 2.4111995697021484, "learning_rate": 1.9999997239423593e-05, "loss": 1.0462, "step": 528 }, { "epoch": 0.03, "grad_norm": 2.409727096557617, "learning_rate": 1.999999568659947e-05, "loss": 1.1697, "step": 529 }, { "epoch": 0.03, "grad_norm": 2.0967955589294434, "learning_rate": 1.9999993788703435e-05, "loss": 1.1773, "step": 530 }, { "epoch": 0.03, "grad_norm": 2.5718483924865723, "learning_rate": 1.999999154573555e-05, "loss": 1.2503, "step": 531 }, { "epoch": 0.03, "grad_norm": 2.3113183975219727, "learning_rate": 1.9999988957695886e-05, "loss": 1.1549, "step": 532 }, { "epoch": 0.03, "grad_norm": 2.3366270065307617, "learning_rate": 1.999998602458454e-05, "loss": 1.05, "step": 533 }, { "epoch": 0.03, "grad_norm": 2.3591933250427246, "learning_rate": 1.9999982746401607e-05, "loss": 1.1218, "step": 534 }, { "epoch": 0.03, "grad_norm": 2.3871476650238037, "learning_rate": 1.9999979123147204e-05, "loss": 1.1413, "step": 535 }, { "epoch": 0.03, "grad_norm": 2.11716890335083, "learning_rate": 1.9999975154821454e-05, "loss": 1.2012, "step": 536 }, { "epoch": 0.03, "grad_norm": 2.312731981277466, "learning_rate": 1.99999708414245e-05, "loss": 1.2154, "step": 537 }, { "epoch": 0.03, "grad_norm": 2.3737916946411133, "learning_rate": 1.9999966182956486e-05, "loss": 1.0983, "step": 538 }, { "epoch": 0.03, "grad_norm": 2.3799524307250977, "learning_rate": 1.999996117941757e-05, "loss": 1.2057, "step": 539 }, { "epoch": 0.03, "grad_norm": 2.4459667205810547, "learning_rate": 1.9999955830807925e-05, "loss": 1.1362, "step": 540 }, { "epoch": 0.03, "grad_norm": 2.422807216644287, "learning_rate": 1.999995013712774e-05, "loss": 1.0987, "step": 541 }, { "epoch": 0.03, "grad_norm": 2.3833820819854736, "learning_rate": 1.9999944098377214e-05, "loss": 1.154, "step": 542 }, { "epoch": 0.03, "grad_norm": 2.2140049934387207, "learning_rate": 1.9999937714556546e-05, "loss": 1.1917, "step": 543 }, { "epoch": 0.03, "grad_norm": 3.7330381870269775, "learning_rate": 1.999993098566596e-05, "loss": 1.1322, "step": 544 }, { "epoch": 0.03, "grad_norm": 2.265702247619629, "learning_rate": 1.9999923911705693e-05, "loss": 1.2275, "step": 545 }, { "epoch": 0.03, "grad_norm": 2.586068868637085, "learning_rate": 1.9999916492675984e-05, "loss": 1.1429, "step": 546 }, { "epoch": 0.03, "grad_norm": 2.4351508617401123, "learning_rate": 1.999990872857709e-05, "loss": 1.2053, "step": 547 }, { "epoch": 0.03, "grad_norm": 2.2522010803222656, "learning_rate": 1.999990061940928e-05, "loss": 1.1497, "step": 548 }, { "epoch": 0.03, "grad_norm": 2.0282084941864014, "learning_rate": 1.999989216517283e-05, "loss": 1.134, "step": 549 }, { "epoch": 0.03, "grad_norm": 2.3976423740386963, "learning_rate": 1.999988336586804e-05, "loss": 1.1103, "step": 550 }, { "epoch": 0.03, "grad_norm": 2.2513225078582764, "learning_rate": 1.999987422149521e-05, "loss": 1.16, "step": 551 }, { "epoch": 0.03, "grad_norm": 2.314650297164917, "learning_rate": 1.999986473205465e-05, "loss": 1.1013, "step": 552 }, { "epoch": 0.03, "grad_norm": 2.2503671646118164, "learning_rate": 1.999985489754669e-05, "loss": 1.1604, "step": 553 }, { "epoch": 0.03, "grad_norm": 2.1580429077148438, "learning_rate": 1.9999844717971674e-05, "loss": 1.1881, "step": 554 }, { "epoch": 0.03, "grad_norm": 2.1537997722625732, "learning_rate": 1.9999834193329952e-05, "loss": 1.1533, "step": 555 }, { "epoch": 0.03, "grad_norm": 1.0059038400650024, "learning_rate": 1.999982332362188e-05, "loss": 0.4717, "step": 556 }, { "epoch": 0.03, "grad_norm": 2.3200104236602783, "learning_rate": 1.9999812108847844e-05, "loss": 1.0886, "step": 557 }, { "epoch": 0.03, "grad_norm": 2.1596384048461914, "learning_rate": 1.999980054900822e-05, "loss": 1.2114, "step": 558 }, { "epoch": 0.03, "grad_norm": 2.192187786102295, "learning_rate": 1.9999788644103418e-05, "loss": 1.0871, "step": 559 }, { "epoch": 0.03, "grad_norm": 2.4987313747406006, "learning_rate": 1.999977639413384e-05, "loss": 1.1774, "step": 560 }, { "epoch": 0.03, "grad_norm": 2.245305061340332, "learning_rate": 1.9999763799099912e-05, "loss": 1.1757, "step": 561 }, { "epoch": 0.03, "grad_norm": 1.0853499174118042, "learning_rate": 1.9999750859002066e-05, "loss": 0.5508, "step": 562 }, { "epoch": 0.03, "grad_norm": 2.2679243087768555, "learning_rate": 1.9999737573840755e-05, "loss": 1.1046, "step": 563 }, { "epoch": 0.03, "grad_norm": 2.236968517303467, "learning_rate": 1.9999723943616435e-05, "loss": 1.1015, "step": 564 }, { "epoch": 0.03, "grad_norm": 2.2850563526153564, "learning_rate": 1.9999709968329572e-05, "loss": 1.197, "step": 565 }, { "epoch": 0.03, "grad_norm": 2.270822525024414, "learning_rate": 1.999969564798065e-05, "loss": 1.1892, "step": 566 }, { "epoch": 0.03, "grad_norm": 2.2477834224700928, "learning_rate": 1.9999680982570165e-05, "loss": 1.1031, "step": 567 }, { "epoch": 0.03, "grad_norm": 2.257143497467041, "learning_rate": 1.9999665972098624e-05, "loss": 1.1298, "step": 568 }, { "epoch": 0.03, "grad_norm": 2.267540216445923, "learning_rate": 1.9999650616566542e-05, "loss": 1.2086, "step": 569 }, { "epoch": 0.03, "grad_norm": 2.1472766399383545, "learning_rate": 1.999963491597445e-05, "loss": 1.1297, "step": 570 }, { "epoch": 0.03, "grad_norm": 2.4439661502838135, "learning_rate": 1.999961887032289e-05, "loss": 1.1425, "step": 571 }, { "epoch": 0.03, "grad_norm": 2.184567928314209, "learning_rate": 1.9999602479612416e-05, "loss": 1.1636, "step": 572 }, { "epoch": 0.03, "grad_norm": 2.2469265460968018, "learning_rate": 1.9999585743843592e-05, "loss": 1.1684, "step": 573 }, { "epoch": 0.03, "grad_norm": 2.3082656860351562, "learning_rate": 1.9999568663016998e-05, "loss": 1.1929, "step": 574 }, { "epoch": 0.03, "grad_norm": 2.273785352706909, "learning_rate": 1.999955123713322e-05, "loss": 1.1653, "step": 575 }, { "epoch": 0.03, "grad_norm": 2.2690200805664062, "learning_rate": 1.9999533466192864e-05, "loss": 1.0866, "step": 576 }, { "epoch": 0.03, "grad_norm": 2.0065724849700928, "learning_rate": 1.9999515350196538e-05, "loss": 1.1763, "step": 577 }, { "epoch": 0.03, "grad_norm": 2.245633363723755, "learning_rate": 1.9999496889144874e-05, "loss": 1.1285, "step": 578 }, { "epoch": 0.03, "grad_norm": 2.2888190746307373, "learning_rate": 1.9999478083038503e-05, "loss": 1.2325, "step": 579 }, { "epoch": 0.03, "grad_norm": 2.347853183746338, "learning_rate": 1.999945893187807e-05, "loss": 1.2205, "step": 580 }, { "epoch": 0.03, "grad_norm": 2.051862955093384, "learning_rate": 1.999943943566425e-05, "loss": 1.1805, "step": 581 }, { "epoch": 0.03, "grad_norm": 2.1901590824127197, "learning_rate": 1.9999419594397706e-05, "loss": 1.1634, "step": 582 }, { "epoch": 0.03, "grad_norm": 2.16910982131958, "learning_rate": 1.999939940807912e-05, "loss": 1.1977, "step": 583 }, { "epoch": 0.03, "grad_norm": 1.0214115381240845, "learning_rate": 1.9999378876709194e-05, "loss": 0.5249, "step": 584 }, { "epoch": 0.03, "grad_norm": 2.3862781524658203, "learning_rate": 1.9999358000288637e-05, "loss": 1.1643, "step": 585 }, { "epoch": 0.03, "grad_norm": 2.666750431060791, "learning_rate": 1.9999336778818167e-05, "loss": 1.1773, "step": 586 }, { "epoch": 0.03, "grad_norm": 2.133347988128662, "learning_rate": 1.9999315212298516e-05, "loss": 1.1353, "step": 587 }, { "epoch": 0.03, "grad_norm": 2.2714200019836426, "learning_rate": 1.9999293300730426e-05, "loss": 1.179, "step": 588 }, { "epoch": 0.03, "grad_norm": 2.1183888912200928, "learning_rate": 1.9999271044114663e-05, "loss": 1.112, "step": 589 }, { "epoch": 0.03, "grad_norm": 2.2575178146362305, "learning_rate": 1.9999248442451984e-05, "loss": 1.06, "step": 590 }, { "epoch": 0.03, "grad_norm": 2.2987260818481445, "learning_rate": 1.999922549574317e-05, "loss": 1.1754, "step": 591 }, { "epoch": 0.03, "grad_norm": 2.289649724960327, "learning_rate": 1.9999202203989022e-05, "loss": 1.1596, "step": 592 }, { "epoch": 0.03, "grad_norm": 2.4353740215301514, "learning_rate": 1.9999178567190334e-05, "loss": 1.1812, "step": 593 }, { "epoch": 0.03, "grad_norm": 2.1835286617279053, "learning_rate": 1.9999154585347926e-05, "loss": 1.1053, "step": 594 }, { "epoch": 0.03, "grad_norm": 2.4543471336364746, "learning_rate": 1.9999130258462626e-05, "loss": 1.1579, "step": 595 }, { "epoch": 0.03, "grad_norm": 2.156053066253662, "learning_rate": 1.999910558653527e-05, "loss": 1.1879, "step": 596 }, { "epoch": 0.03, "grad_norm": 1.1831308603286743, "learning_rate": 1.999908056956671e-05, "loss": 0.5483, "step": 597 }, { "epoch": 0.03, "grad_norm": 2.505997657775879, "learning_rate": 1.9999055207557814e-05, "loss": 1.1579, "step": 598 }, { "epoch": 0.03, "grad_norm": 2.2746193408966064, "learning_rate": 1.9999029500509453e-05, "loss": 1.2156, "step": 599 }, { "epoch": 0.03, "grad_norm": 2.5150904655456543, "learning_rate": 1.9999003448422516e-05, "loss": 1.1332, "step": 600 }, { "epoch": 0.03, "grad_norm": 2.3727567195892334, "learning_rate": 1.99989770512979e-05, "loss": 1.1354, "step": 601 }, { "epoch": 0.03, "grad_norm": 2.439591646194458, "learning_rate": 1.999895030913652e-05, "loss": 1.1366, "step": 602 }, { "epoch": 0.03, "grad_norm": 2.472810745239258, "learning_rate": 1.9998923221939294e-05, "loss": 1.121, "step": 603 }, { "epoch": 0.03, "grad_norm": 2.1895086765289307, "learning_rate": 1.9998895789707156e-05, "loss": 1.0808, "step": 604 }, { "epoch": 0.03, "grad_norm": 2.1236534118652344, "learning_rate": 1.9998868012441056e-05, "loss": 1.1354, "step": 605 }, { "epoch": 0.03, "grad_norm": 2.1101601123809814, "learning_rate": 1.9998839890141953e-05, "loss": 1.1826, "step": 606 }, { "epoch": 0.03, "grad_norm": 2.3621020317077637, "learning_rate": 1.9998811422810816e-05, "loss": 1.235, "step": 607 }, { "epoch": 0.03, "grad_norm": 2.186937093734741, "learning_rate": 1.9998782610448625e-05, "loss": 1.1058, "step": 608 }, { "epoch": 0.03, "grad_norm": 2.4096362590789795, "learning_rate": 1.999875345305638e-05, "loss": 1.1583, "step": 609 }, { "epoch": 0.03, "grad_norm": 2.364793539047241, "learning_rate": 1.999872395063508e-05, "loss": 1.1775, "step": 610 }, { "epoch": 0.04, "grad_norm": 2.067514181137085, "learning_rate": 1.9998694103185753e-05, "loss": 1.0976, "step": 611 }, { "epoch": 0.04, "grad_norm": 2.5292017459869385, "learning_rate": 1.9998663910709416e-05, "loss": 1.1454, "step": 612 }, { "epoch": 0.04, "grad_norm": 2.290858268737793, "learning_rate": 1.999863337320712e-05, "loss": 1.1362, "step": 613 }, { "epoch": 0.04, "grad_norm": 2.3363685607910156, "learning_rate": 1.9998602490679916e-05, "loss": 1.1575, "step": 614 }, { "epoch": 0.04, "grad_norm": 2.250690460205078, "learning_rate": 1.9998571263128873e-05, "loss": 1.189, "step": 615 }, { "epoch": 0.04, "grad_norm": 2.323308229446411, "learning_rate": 1.999853969055506e-05, "loss": 1.0895, "step": 616 }, { "epoch": 0.04, "grad_norm": 2.347003936767578, "learning_rate": 1.9998507772959578e-05, "loss": 1.1435, "step": 617 }, { "epoch": 0.04, "grad_norm": 2.0986454486846924, "learning_rate": 1.999847551034352e-05, "loss": 1.1879, "step": 618 }, { "epoch": 0.04, "grad_norm": 2.2946653366088867, "learning_rate": 1.9998442902708e-05, "loss": 1.1618, "step": 619 }, { "epoch": 0.04, "grad_norm": 2.4341859817504883, "learning_rate": 1.999840995005415e-05, "loss": 1.1794, "step": 620 }, { "epoch": 0.04, "grad_norm": 2.2120912075042725, "learning_rate": 1.9998376652383095e-05, "loss": 1.076, "step": 621 }, { "epoch": 0.04, "grad_norm": 2.1352121829986572, "learning_rate": 1.9998343009695995e-05, "loss": 1.0731, "step": 622 }, { "epoch": 0.04, "grad_norm": 2.2443056106567383, "learning_rate": 1.9998309021994006e-05, "loss": 1.2216, "step": 623 }, { "epoch": 0.04, "grad_norm": 2.225339412689209, "learning_rate": 1.9998274689278302e-05, "loss": 1.0892, "step": 624 }, { "epoch": 0.04, "grad_norm": 2.3853800296783447, "learning_rate": 1.999824001155007e-05, "loss": 1.1826, "step": 625 }, { "epoch": 0.04, "grad_norm": 2.2819066047668457, "learning_rate": 1.99982049888105e-05, "loss": 1.1986, "step": 626 }, { "epoch": 0.04, "grad_norm": 2.5107710361480713, "learning_rate": 1.999816962106081e-05, "loss": 1.1054, "step": 627 }, { "epoch": 0.04, "grad_norm": 2.2733571529388428, "learning_rate": 1.999813390830221e-05, "loss": 1.1267, "step": 628 }, { "epoch": 0.04, "grad_norm": 1.1569218635559082, "learning_rate": 1.999809785053594e-05, "loss": 0.5558, "step": 629 }, { "epoch": 0.04, "grad_norm": 2.067953586578369, "learning_rate": 1.999806144776324e-05, "loss": 1.1731, "step": 630 }, { "epoch": 0.04, "grad_norm": 2.3705060482025146, "learning_rate": 1.999802469998537e-05, "loss": 1.2497, "step": 631 }, { "epoch": 0.04, "grad_norm": 2.12733793258667, "learning_rate": 1.9997987607203596e-05, "loss": 1.1613, "step": 632 }, { "epoch": 0.04, "grad_norm": 2.433986186981201, "learning_rate": 1.9997950169419194e-05, "loss": 1.1397, "step": 633 }, { "epoch": 0.04, "grad_norm": 2.2078075408935547, "learning_rate": 1.9997912386633464e-05, "loss": 1.12, "step": 634 }, { "epoch": 0.04, "grad_norm": 1.938741683959961, "learning_rate": 1.99978742588477e-05, "loss": 1.0168, "step": 635 }, { "epoch": 0.04, "grad_norm": 2.2172467708587646, "learning_rate": 1.999783578606323e-05, "loss": 1.1134, "step": 636 }, { "epoch": 0.04, "grad_norm": 2.2737231254577637, "learning_rate": 1.9997796968281373e-05, "loss": 1.1391, "step": 637 }, { "epoch": 0.04, "grad_norm": 2.331183671951294, "learning_rate": 1.999775780550347e-05, "loss": 1.1118, "step": 638 }, { "epoch": 0.04, "grad_norm": 2.3759515285491943, "learning_rate": 1.9997718297730874e-05, "loss": 1.182, "step": 639 }, { "epoch": 0.04, "grad_norm": 2.184342622756958, "learning_rate": 1.9997678444964947e-05, "loss": 1.0702, "step": 640 }, { "epoch": 0.04, "grad_norm": 2.2764105796813965, "learning_rate": 1.9997638247207057e-05, "loss": 1.1988, "step": 641 }, { "epoch": 0.04, "grad_norm": 1.9475120306015015, "learning_rate": 1.9997597704458608e-05, "loss": 1.0982, "step": 642 }, { "epoch": 0.04, "grad_norm": 2.0692942142486572, "learning_rate": 1.9997556816720985e-05, "loss": 1.148, "step": 643 }, { "epoch": 0.04, "grad_norm": 2.1277546882629395, "learning_rate": 1.9997515583995604e-05, "loss": 1.1454, "step": 644 }, { "epoch": 0.04, "grad_norm": 2.091170072555542, "learning_rate": 1.9997474006283885e-05, "loss": 1.1404, "step": 645 }, { "epoch": 0.04, "grad_norm": 2.2826859951019287, "learning_rate": 1.9997432083587268e-05, "loss": 1.1263, "step": 646 }, { "epoch": 0.04, "grad_norm": 2.2109992504119873, "learning_rate": 1.9997389815907193e-05, "loss": 1.2241, "step": 647 }, { "epoch": 0.04, "grad_norm": 2.08583927154541, "learning_rate": 1.9997347203245126e-05, "loss": 1.1907, "step": 648 }, { "epoch": 0.04, "grad_norm": 2.0009896755218506, "learning_rate": 1.9997304245602533e-05, "loss": 1.1202, "step": 649 }, { "epoch": 0.04, "grad_norm": 2.2349495887756348, "learning_rate": 1.9997260942980895e-05, "loss": 1.0894, "step": 650 }, { "epoch": 0.04, "grad_norm": 2.2654592990875244, "learning_rate": 1.999721729538171e-05, "loss": 1.0681, "step": 651 }, { "epoch": 0.04, "grad_norm": 2.2119839191436768, "learning_rate": 1.9997173302806478e-05, "loss": 1.1419, "step": 652 }, { "epoch": 0.04, "grad_norm": 2.471290349960327, "learning_rate": 1.9997128965256726e-05, "loss": 1.1598, "step": 653 }, { "epoch": 0.04, "grad_norm": 2.283198118209839, "learning_rate": 1.9997084282733975e-05, "loss": 1.1286, "step": 654 }, { "epoch": 0.04, "grad_norm": 2.6239185333251953, "learning_rate": 1.9997039255239774e-05, "loss": 1.2641, "step": 655 }, { "epoch": 0.04, "grad_norm": 2.125765562057495, "learning_rate": 1.9996993882775674e-05, "loss": 1.1104, "step": 656 }, { "epoch": 0.04, "grad_norm": 2.5450098514556885, "learning_rate": 1.9996948165343243e-05, "loss": 1.1035, "step": 657 }, { "epoch": 0.04, "grad_norm": 2.2383806705474854, "learning_rate": 1.999690210294405e-05, "loss": 1.1286, "step": 658 }, { "epoch": 0.04, "grad_norm": 2.2511579990386963, "learning_rate": 1.9996855695579694e-05, "loss": 1.1192, "step": 659 }, { "epoch": 0.04, "grad_norm": 2.0633535385131836, "learning_rate": 1.9996808943251773e-05, "loss": 1.1526, "step": 660 }, { "epoch": 0.04, "grad_norm": 2.3751425743103027, "learning_rate": 1.99967618459619e-05, "loss": 1.195, "step": 661 }, { "epoch": 0.04, "grad_norm": 2.061765193939209, "learning_rate": 1.99967144037117e-05, "loss": 1.1203, "step": 662 }, { "epoch": 0.04, "grad_norm": 2.1556942462921143, "learning_rate": 1.9996666616502812e-05, "loss": 1.1355, "step": 663 }, { "epoch": 0.04, "grad_norm": 2.3233346939086914, "learning_rate": 1.9996618484336885e-05, "loss": 1.1094, "step": 664 }, { "epoch": 0.04, "grad_norm": 2.210195779800415, "learning_rate": 1.9996570007215578e-05, "loss": 1.1181, "step": 665 }, { "epoch": 0.04, "grad_norm": 2.1433939933776855, "learning_rate": 1.999652118514056e-05, "loss": 1.1799, "step": 666 }, { "epoch": 0.04, "grad_norm": 2.1322736740112305, "learning_rate": 1.9996472018113523e-05, "loss": 1.1177, "step": 667 }, { "epoch": 0.04, "grad_norm": 2.336160659790039, "learning_rate": 1.999642250613616e-05, "loss": 1.1672, "step": 668 }, { "epoch": 0.04, "grad_norm": 2.432091474533081, "learning_rate": 1.9996372649210182e-05, "loss": 1.1951, "step": 669 }, { "epoch": 0.04, "grad_norm": 2.310286521911621, "learning_rate": 1.9996322447337307e-05, "loss": 1.2545, "step": 670 }, { "epoch": 0.04, "grad_norm": 2.2603771686553955, "learning_rate": 1.9996271900519267e-05, "loss": 1.2332, "step": 671 }, { "epoch": 0.04, "grad_norm": 2.2341551780700684, "learning_rate": 1.9996221008757807e-05, "loss": 1.1276, "step": 672 }, { "epoch": 0.04, "grad_norm": 2.2875022888183594, "learning_rate": 1.9996169772054684e-05, "loss": 1.1259, "step": 673 }, { "epoch": 0.04, "grad_norm": 2.266982078552246, "learning_rate": 1.9996118190411664e-05, "loss": 1.1116, "step": 674 }, { "epoch": 0.04, "grad_norm": 2.226486921310425, "learning_rate": 1.9996066263830533e-05, "loss": 1.1122, "step": 675 }, { "epoch": 0.04, "grad_norm": 2.379892110824585, "learning_rate": 1.9996013992313072e-05, "loss": 1.1502, "step": 676 }, { "epoch": 0.04, "grad_norm": 2.562361478805542, "learning_rate": 1.9995961375861092e-05, "loss": 1.1756, "step": 677 }, { "epoch": 0.04, "grad_norm": 2.3117401599884033, "learning_rate": 1.999590841447641e-05, "loss": 1.1153, "step": 678 }, { "epoch": 0.04, "grad_norm": 2.3392574787139893, "learning_rate": 1.9995855108160852e-05, "loss": 1.185, "step": 679 }, { "epoch": 0.04, "grad_norm": 2.1996610164642334, "learning_rate": 1.9995801456916252e-05, "loss": 1.1068, "step": 680 }, { "epoch": 0.04, "grad_norm": 2.414916753768921, "learning_rate": 1.9995747460744467e-05, "loss": 1.223, "step": 681 }, { "epoch": 0.04, "grad_norm": 2.2416093349456787, "learning_rate": 1.999569311964736e-05, "loss": 1.1219, "step": 682 }, { "epoch": 0.04, "grad_norm": 2.197721004486084, "learning_rate": 1.999563843362681e-05, "loss": 1.2113, "step": 683 }, { "epoch": 0.04, "grad_norm": 2.0379817485809326, "learning_rate": 1.9995583402684697e-05, "loss": 1.137, "step": 684 }, { "epoch": 0.04, "grad_norm": 2.0361454486846924, "learning_rate": 1.9995528026822916e-05, "loss": 1.0917, "step": 685 }, { "epoch": 0.04, "grad_norm": 2.349773406982422, "learning_rate": 1.999547230604339e-05, "loss": 1.1195, "step": 686 }, { "epoch": 0.04, "grad_norm": 2.339024782180786, "learning_rate": 1.9995416240348034e-05, "loss": 1.1838, "step": 687 }, { "epoch": 0.04, "grad_norm": 2.271723985671997, "learning_rate": 1.9995359829738784e-05, "loss": 1.1244, "step": 688 }, { "epoch": 0.04, "grad_norm": 2.658097982406616, "learning_rate": 1.999530307421759e-05, "loss": 1.1088, "step": 689 }, { "epoch": 0.04, "grad_norm": 2.427680253982544, "learning_rate": 1.9995245973786404e-05, "loss": 1.1421, "step": 690 }, { "epoch": 0.04, "grad_norm": 2.1488356590270996, "learning_rate": 1.9995188528447205e-05, "loss": 1.1048, "step": 691 }, { "epoch": 0.04, "grad_norm": 2.1456120014190674, "learning_rate": 1.9995130738201966e-05, "loss": 1.1714, "step": 692 }, { "epoch": 0.04, "grad_norm": 2.168026924133301, "learning_rate": 1.9995072603052687e-05, "loss": 1.1326, "step": 693 }, { "epoch": 0.04, "grad_norm": 2.3212029933929443, "learning_rate": 1.9995014123001374e-05, "loss": 1.1415, "step": 694 }, { "epoch": 0.04, "grad_norm": 2.588196039199829, "learning_rate": 1.999495529805004e-05, "loss": 1.1638, "step": 695 }, { "epoch": 0.04, "grad_norm": 2.14258074760437, "learning_rate": 1.9994896128200723e-05, "loss": 1.1866, "step": 696 }, { "epoch": 0.04, "grad_norm": 2.253497838973999, "learning_rate": 1.9994836613455456e-05, "loss": 1.1213, "step": 697 }, { "epoch": 0.04, "grad_norm": 1.9785290956497192, "learning_rate": 1.99947767538163e-05, "loss": 1.1912, "step": 698 }, { "epoch": 0.04, "grad_norm": 2.480708122253418, "learning_rate": 1.9994716549285312e-05, "loss": 1.1657, "step": 699 }, { "epoch": 0.04, "grad_norm": 2.3822779655456543, "learning_rate": 1.9994655999864583e-05, "loss": 1.0756, "step": 700 }, { "epoch": 0.04, "grad_norm": 2.1088130474090576, "learning_rate": 1.999459510555619e-05, "loss": 1.1789, "step": 701 }, { "epoch": 0.04, "grad_norm": 2.321089744567871, "learning_rate": 1.999453386636224e-05, "loss": 1.1927, "step": 702 }, { "epoch": 0.04, "grad_norm": 2.4990336894989014, "learning_rate": 1.9994472282284843e-05, "loss": 1.1084, "step": 703 }, { "epoch": 0.04, "grad_norm": 2.5052928924560547, "learning_rate": 1.9994410353326126e-05, "loss": 1.0988, "step": 704 }, { "epoch": 0.04, "grad_norm": 2.1726348400115967, "learning_rate": 1.9994348079488225e-05, "loss": 1.1309, "step": 705 }, { "epoch": 0.04, "grad_norm": 2.193157911300659, "learning_rate": 1.9994285460773294e-05, "loss": 1.1344, "step": 706 }, { "epoch": 0.04, "grad_norm": 2.2607314586639404, "learning_rate": 1.9994222497183487e-05, "loss": 1.1726, "step": 707 }, { "epoch": 0.04, "grad_norm": 2.1786909103393555, "learning_rate": 1.999415918872098e-05, "loss": 1.1471, "step": 708 }, { "epoch": 0.04, "grad_norm": 2.3930792808532715, "learning_rate": 1.999409553538796e-05, "loss": 1.0704, "step": 709 }, { "epoch": 0.04, "grad_norm": 2.2470383644104004, "learning_rate": 1.9994031537186615e-05, "loss": 1.1074, "step": 710 }, { "epoch": 0.04, "grad_norm": 2.1238656044006348, "learning_rate": 1.999396719411916e-05, "loss": 1.0823, "step": 711 }, { "epoch": 0.04, "grad_norm": 2.032536029815674, "learning_rate": 1.9993902506187815e-05, "loss": 1.0965, "step": 712 }, { "epoch": 0.04, "grad_norm": 2.1003777980804443, "learning_rate": 1.999383747339481e-05, "loss": 1.1183, "step": 713 }, { "epoch": 0.04, "grad_norm": 2.355907678604126, "learning_rate": 1.9993772095742396e-05, "loss": 1.1553, "step": 714 }, { "epoch": 0.04, "grad_norm": 2.1636922359466553, "learning_rate": 1.9993706373232818e-05, "loss": 1.1997, "step": 715 }, { "epoch": 0.04, "grad_norm": 1.1999868154525757, "learning_rate": 1.999364030586835e-05, "loss": 0.5811, "step": 716 }, { "epoch": 0.04, "grad_norm": 2.041339874267578, "learning_rate": 1.9993573893651273e-05, "loss": 1.1038, "step": 717 }, { "epoch": 0.04, "grad_norm": 2.0734360218048096, "learning_rate": 1.9993507136583876e-05, "loss": 1.1119, "step": 718 }, { "epoch": 0.04, "grad_norm": 1.2639509439468384, "learning_rate": 1.9993440034668462e-05, "loss": 0.6233, "step": 719 }, { "epoch": 0.04, "grad_norm": 2.218365430831909, "learning_rate": 1.9993372587907348e-05, "loss": 1.1584, "step": 720 }, { "epoch": 0.04, "grad_norm": 1.0786736011505127, "learning_rate": 1.9993304796302865e-05, "loss": 0.5584, "step": 721 }, { "epoch": 0.04, "grad_norm": 2.1460120677948, "learning_rate": 1.9993236659857347e-05, "loss": 1.153, "step": 722 }, { "epoch": 0.04, "grad_norm": 2.3159492015838623, "learning_rate": 1.9993168178573146e-05, "loss": 1.1244, "step": 723 }, { "epoch": 0.04, "grad_norm": 2.178205966949463, "learning_rate": 1.9993099352452626e-05, "loss": 1.0997, "step": 724 }, { "epoch": 0.04, "grad_norm": 2.245976686477661, "learning_rate": 1.9993030181498163e-05, "loss": 1.1348, "step": 725 }, { "epoch": 0.04, "grad_norm": 2.0968141555786133, "learning_rate": 1.999296066571214e-05, "loss": 1.2025, "step": 726 }, { "epoch": 0.04, "grad_norm": 1.3151137828826904, "learning_rate": 1.999289080509696e-05, "loss": 0.5537, "step": 727 }, { "epoch": 0.04, "grad_norm": 2.8331804275512695, "learning_rate": 1.9992820599655034e-05, "loss": 1.0328, "step": 728 }, { "epoch": 0.04, "grad_norm": 2.202378511428833, "learning_rate": 1.9992750049388783e-05, "loss": 1.1926, "step": 729 }, { "epoch": 0.04, "grad_norm": 2.318918228149414, "learning_rate": 1.999267915430064e-05, "loss": 1.1047, "step": 730 }, { "epoch": 0.04, "grad_norm": 2.3065872192382812, "learning_rate": 1.999260791439305e-05, "loss": 1.1673, "step": 731 }, { "epoch": 0.04, "grad_norm": 2.137610673904419, "learning_rate": 1.999253632966848e-05, "loss": 1.1542, "step": 732 }, { "epoch": 0.04, "grad_norm": 2.495720863342285, "learning_rate": 1.999246440012939e-05, "loss": 1.1439, "step": 733 }, { "epoch": 0.04, "grad_norm": 2.5278029441833496, "learning_rate": 1.9992392125778267e-05, "loss": 1.2329, "step": 734 }, { "epoch": 0.04, "grad_norm": 2.2882354259490967, "learning_rate": 1.9992319506617606e-05, "loss": 1.0965, "step": 735 }, { "epoch": 0.04, "grad_norm": 2.1578209400177, "learning_rate": 1.999224654264991e-05, "loss": 1.1287, "step": 736 }, { "epoch": 0.04, "grad_norm": 2.230405569076538, "learning_rate": 1.99921732338777e-05, "loss": 1.0937, "step": 737 }, { "epoch": 0.04, "grad_norm": 2.175158739089966, "learning_rate": 1.99920995803035e-05, "loss": 1.179, "step": 738 }, { "epoch": 0.04, "grad_norm": 2.416019916534424, "learning_rate": 1.9992025581929856e-05, "loss": 1.1751, "step": 739 }, { "epoch": 0.04, "grad_norm": 2.1297595500946045, "learning_rate": 1.9991951238759323e-05, "loss": 1.0965, "step": 740 }, { "epoch": 0.04, "grad_norm": 2.335035562515259, "learning_rate": 1.9991876550794465e-05, "loss": 1.0905, "step": 741 }, { "epoch": 0.04, "grad_norm": 1.9685287475585938, "learning_rate": 1.9991801518037856e-05, "loss": 1.1004, "step": 742 }, { "epoch": 0.04, "grad_norm": 2.143871784210205, "learning_rate": 1.9991726140492088e-05, "loss": 1.094, "step": 743 }, { "epoch": 0.04, "grad_norm": 2.1374764442443848, "learning_rate": 1.9991650418159763e-05, "loss": 1.1805, "step": 744 }, { "epoch": 0.04, "grad_norm": 2.418219566345215, "learning_rate": 1.999157435104349e-05, "loss": 1.1657, "step": 745 }, { "epoch": 0.04, "grad_norm": 2.1903107166290283, "learning_rate": 1.9991497939145898e-05, "loss": 1.1089, "step": 746 }, { "epoch": 0.04, "grad_norm": 2.141371250152588, "learning_rate": 1.9991421182469624e-05, "loss": 1.1817, "step": 747 }, { "epoch": 0.04, "grad_norm": 2.0492637157440186, "learning_rate": 1.9991344081017312e-05, "loss": 1.0642, "step": 748 }, { "epoch": 0.04, "grad_norm": 2.3152265548706055, "learning_rate": 1.9991266634791627e-05, "loss": 1.1055, "step": 749 }, { "epoch": 0.04, "grad_norm": 2.195970296859741, "learning_rate": 1.9991188843795238e-05, "loss": 1.187, "step": 750 }, { "epoch": 0.04, "grad_norm": 2.2096188068389893, "learning_rate": 1.9991110708030836e-05, "loss": 1.1665, "step": 751 }, { "epoch": 0.04, "grad_norm": 1.1097246408462524, "learning_rate": 1.999103222750111e-05, "loss": 0.5414, "step": 752 }, { "epoch": 0.04, "grad_norm": 2.29532527923584, "learning_rate": 1.9990953402208767e-05, "loss": 1.1721, "step": 753 }, { "epoch": 0.04, "grad_norm": 2.391451597213745, "learning_rate": 1.9990874232156533e-05, "loss": 1.2523, "step": 754 }, { "epoch": 0.04, "grad_norm": 2.2653884887695312, "learning_rate": 1.999079471734714e-05, "loss": 1.1512, "step": 755 }, { "epoch": 0.04, "grad_norm": 2.1708734035491943, "learning_rate": 1.9990714857783327e-05, "loss": 1.1133, "step": 756 }, { "epoch": 0.04, "grad_norm": 1.0662405490875244, "learning_rate": 1.9990634653467854e-05, "loss": 0.61, "step": 757 }, { "epoch": 0.04, "grad_norm": 2.258378744125366, "learning_rate": 1.9990554104403484e-05, "loss": 1.1304, "step": 758 }, { "epoch": 0.04, "grad_norm": 2.3086485862731934, "learning_rate": 1.9990473210593e-05, "loss": 1.1219, "step": 759 }, { "epoch": 0.04, "grad_norm": 2.2723228931427, "learning_rate": 1.9990391972039197e-05, "loss": 1.1812, "step": 760 }, { "epoch": 0.04, "grad_norm": 1.930712342262268, "learning_rate": 1.9990310388744868e-05, "loss": 1.054, "step": 761 }, { "epoch": 0.04, "grad_norm": 2.0871713161468506, "learning_rate": 1.999022846071284e-05, "loss": 1.0973, "step": 762 }, { "epoch": 0.04, "grad_norm": 2.204150915145874, "learning_rate": 1.9990146187945928e-05, "loss": 1.1459, "step": 763 }, { "epoch": 0.04, "grad_norm": 2.039926767349243, "learning_rate": 1.9990063570446985e-05, "loss": 1.0296, "step": 764 }, { "epoch": 0.04, "grad_norm": 2.004918098449707, "learning_rate": 1.9989980608218847e-05, "loss": 1.1188, "step": 765 }, { "epoch": 0.04, "grad_norm": 2.079618215560913, "learning_rate": 1.998989730126439e-05, "loss": 1.1185, "step": 766 }, { "epoch": 0.04, "grad_norm": 2.041402578353882, "learning_rate": 1.998981364958648e-05, "loss": 1.0871, "step": 767 }, { "epoch": 0.04, "grad_norm": 2.1574597358703613, "learning_rate": 1.9989729653188006e-05, "loss": 1.1514, "step": 768 }, { "epoch": 0.04, "grad_norm": 2.337864637374878, "learning_rate": 1.9989645312071867e-05, "loss": 1.1673, "step": 769 }, { "epoch": 0.04, "grad_norm": 1.9658972024917603, "learning_rate": 1.9989560626240975e-05, "loss": 1.0791, "step": 770 }, { "epoch": 0.04, "grad_norm": 1.1030495166778564, "learning_rate": 1.9989475595698245e-05, "loss": 0.5206, "step": 771 }, { "epoch": 0.04, "grad_norm": 1.949268102645874, "learning_rate": 1.9989390220446624e-05, "loss": 1.1177, "step": 772 }, { "epoch": 0.04, "grad_norm": 2.1365692615509033, "learning_rate": 1.9989304500489047e-05, "loss": 1.1042, "step": 773 }, { "epoch": 0.04, "grad_norm": 2.040756940841675, "learning_rate": 1.9989218435828478e-05, "loss": 1.1084, "step": 774 }, { "epoch": 0.04, "grad_norm": 2.290026903152466, "learning_rate": 1.998913202646788e-05, "loss": 1.2119, "step": 775 }, { "epoch": 0.04, "grad_norm": 2.152768611907959, "learning_rate": 1.9989045272410242e-05, "loss": 1.1365, "step": 776 }, { "epoch": 0.04, "grad_norm": 2.016942262649536, "learning_rate": 1.9988958173658556e-05, "loss": 1.105, "step": 777 }, { "epoch": 0.04, "grad_norm": 2.157235860824585, "learning_rate": 1.9988870730215827e-05, "loss": 1.014, "step": 778 }, { "epoch": 0.04, "grad_norm": 2.21296763420105, "learning_rate": 1.998878294208507e-05, "loss": 1.0783, "step": 779 }, { "epoch": 0.04, "grad_norm": 2.2792489528656006, "learning_rate": 1.9988694809269316e-05, "loss": 1.1374, "step": 780 }, { "epoch": 0.04, "grad_norm": 2.087193250656128, "learning_rate": 1.9988606331771608e-05, "loss": 1.203, "step": 781 }, { "epoch": 0.04, "grad_norm": 2.255445957183838, "learning_rate": 1.9988517509594994e-05, "loss": 1.1693, "step": 782 }, { "epoch": 0.04, "grad_norm": 2.696228504180908, "learning_rate": 1.9988428342742544e-05, "loss": 1.1136, "step": 783 }, { "epoch": 0.04, "grad_norm": 2.1770858764648438, "learning_rate": 1.9988338831217335e-05, "loss": 1.0596, "step": 784 }, { "epoch": 0.05, "grad_norm": 2.083209991455078, "learning_rate": 1.9988248975022455e-05, "loss": 1.0909, "step": 785 }, { "epoch": 0.05, "grad_norm": 1.146885871887207, "learning_rate": 1.9988158774161003e-05, "loss": 0.5317, "step": 786 }, { "epoch": 0.05, "grad_norm": 1.9633382558822632, "learning_rate": 1.9988068228636092e-05, "loss": 1.0857, "step": 787 }, { "epoch": 0.05, "grad_norm": 1.03809654712677, "learning_rate": 1.9987977338450845e-05, "loss": 0.5621, "step": 788 }, { "epoch": 0.05, "grad_norm": 2.098221778869629, "learning_rate": 1.9987886103608403e-05, "loss": 1.1404, "step": 789 }, { "epoch": 0.05, "grad_norm": 2.0369505882263184, "learning_rate": 1.998779452411191e-05, "loss": 1.146, "step": 790 }, { "epoch": 0.05, "grad_norm": 2.35223126411438, "learning_rate": 1.998770259996453e-05, "loss": 1.1684, "step": 791 }, { "epoch": 0.05, "grad_norm": 2.3191168308258057, "learning_rate": 1.998761033116943e-05, "loss": 1.1308, "step": 792 }, { "epoch": 0.05, "grad_norm": 2.250437021255493, "learning_rate": 1.99875177177298e-05, "loss": 1.1598, "step": 793 }, { "epoch": 0.05, "grad_norm": 2.364640712738037, "learning_rate": 1.9987424759648834e-05, "loss": 1.2441, "step": 794 }, { "epoch": 0.05, "grad_norm": 2.301056385040283, "learning_rate": 1.9987331456929734e-05, "loss": 1.1276, "step": 795 }, { "epoch": 0.05, "grad_norm": 2.3249990940093994, "learning_rate": 1.9987237809575722e-05, "loss": 1.0879, "step": 796 }, { "epoch": 0.05, "grad_norm": 2.185084104537964, "learning_rate": 1.998714381759004e-05, "loss": 1.1389, "step": 797 }, { "epoch": 0.05, "grad_norm": 2.1008193492889404, "learning_rate": 1.9987049480975913e-05, "loss": 1.0826, "step": 798 }, { "epoch": 0.05, "grad_norm": 2.1822409629821777, "learning_rate": 1.998695479973661e-05, "loss": 1.1188, "step": 799 }, { "epoch": 0.05, "grad_norm": 2.3375747203826904, "learning_rate": 1.9986859773875397e-05, "loss": 1.099, "step": 800 }, { "epoch": 0.05, "grad_norm": 2.00004506111145, "learning_rate": 1.9986764403395546e-05, "loss": 1.0803, "step": 801 }, { "epoch": 0.05, "grad_norm": 2.527397394180298, "learning_rate": 1.9986668688300354e-05, "loss": 1.1633, "step": 802 }, { "epoch": 0.05, "grad_norm": 2.4554026126861572, "learning_rate": 1.9986572628593124e-05, "loss": 1.1651, "step": 803 }, { "epoch": 0.05, "grad_norm": 2.0630898475646973, "learning_rate": 1.9986476224277167e-05, "loss": 1.149, "step": 804 }, { "epoch": 0.05, "grad_norm": 2.213650703430176, "learning_rate": 1.9986379475355813e-05, "loss": 1.1072, "step": 805 }, { "epoch": 0.05, "grad_norm": 2.1035141944885254, "learning_rate": 1.9986282381832396e-05, "loss": 1.1364, "step": 806 }, { "epoch": 0.05, "grad_norm": 2.2496273517608643, "learning_rate": 1.9986184943710274e-05, "loss": 1.1111, "step": 807 }, { "epoch": 0.05, "grad_norm": 2.480142593383789, "learning_rate": 1.99860871609928e-05, "loss": 1.1618, "step": 808 }, { "epoch": 0.05, "grad_norm": 2.1323018074035645, "learning_rate": 1.9985989033683357e-05, "loss": 1.1097, "step": 809 }, { "epoch": 0.05, "grad_norm": 2.1185803413391113, "learning_rate": 1.9985890561785326e-05, "loss": 1.1726, "step": 810 }, { "epoch": 0.05, "grad_norm": 2.2804670333862305, "learning_rate": 1.9985791745302108e-05, "loss": 1.1251, "step": 811 }, { "epoch": 0.05, "grad_norm": 2.079725503921509, "learning_rate": 1.998569258423711e-05, "loss": 1.0719, "step": 812 }, { "epoch": 0.05, "grad_norm": 1.9008687734603882, "learning_rate": 1.9985593078593753e-05, "loss": 1.0758, "step": 813 }, { "epoch": 0.05, "grad_norm": 2.283749580383301, "learning_rate": 1.9985493228375473e-05, "loss": 1.1424, "step": 814 }, { "epoch": 0.05, "grad_norm": 2.214712619781494, "learning_rate": 1.9985393033585715e-05, "loss": 1.1616, "step": 815 }, { "epoch": 0.05, "grad_norm": 2.1830966472625732, "learning_rate": 1.9985292494227937e-05, "loss": 1.2065, "step": 816 }, { "epoch": 0.05, "grad_norm": 2.2901535034179688, "learning_rate": 1.9985191610305607e-05, "loss": 1.0891, "step": 817 }, { "epoch": 0.05, "grad_norm": 2.0829639434814453, "learning_rate": 1.998509038182221e-05, "loss": 1.0692, "step": 818 }, { "epoch": 0.05, "grad_norm": 2.216646432876587, "learning_rate": 1.998498880878123e-05, "loss": 1.1806, "step": 819 }, { "epoch": 0.05, "grad_norm": 2.129645586013794, "learning_rate": 1.9984886891186184e-05, "loss": 1.1581, "step": 820 }, { "epoch": 0.05, "grad_norm": 2.1460018157958984, "learning_rate": 1.9984784629040584e-05, "loss": 1.1521, "step": 821 }, { "epoch": 0.05, "grad_norm": 1.9727392196655273, "learning_rate": 1.998468202234795e-05, "loss": 1.0632, "step": 822 }, { "epoch": 0.05, "grad_norm": 2.717949867248535, "learning_rate": 1.998457907111184e-05, "loss": 1.1785, "step": 823 }, { "epoch": 0.05, "grad_norm": 2.086191415786743, "learning_rate": 1.998447577533579e-05, "loss": 1.0725, "step": 824 }, { "epoch": 0.05, "grad_norm": 2.5673916339874268, "learning_rate": 1.9984372135023375e-05, "loss": 1.1785, "step": 825 }, { "epoch": 0.05, "grad_norm": 2.241105556488037, "learning_rate": 1.998426815017817e-05, "loss": 1.1964, "step": 826 }, { "epoch": 0.05, "grad_norm": 2.2614150047302246, "learning_rate": 1.9984163820803755e-05, "loss": 1.1448, "step": 827 }, { "epoch": 0.05, "grad_norm": 2.063995599746704, "learning_rate": 1.9984059146903738e-05, "loss": 1.0534, "step": 828 }, { "epoch": 0.05, "grad_norm": 2.260887384414673, "learning_rate": 1.998395412848173e-05, "loss": 1.0778, "step": 829 }, { "epoch": 0.05, "grad_norm": 2.075248956680298, "learning_rate": 1.9983848765541355e-05, "loss": 1.1163, "step": 830 }, { "epoch": 0.05, "grad_norm": 1.9940873384475708, "learning_rate": 1.998374305808625e-05, "loss": 1.1226, "step": 831 }, { "epoch": 0.05, "grad_norm": 2.212376356124878, "learning_rate": 1.9983637006120054e-05, "loss": 1.1138, "step": 832 }, { "epoch": 0.05, "grad_norm": 2.664011240005493, "learning_rate": 1.998353060964644e-05, "loss": 1.197, "step": 833 }, { "epoch": 0.05, "grad_norm": 1.432181715965271, "learning_rate": 1.9983423868669068e-05, "loss": 0.5363, "step": 834 }, { "epoch": 0.05, "grad_norm": 2.5739877223968506, "learning_rate": 1.9983316783191626e-05, "loss": 1.1845, "step": 835 }, { "epoch": 0.05, "grad_norm": 2.4608774185180664, "learning_rate": 1.998320935321781e-05, "loss": 1.1445, "step": 836 }, { "epoch": 0.05, "grad_norm": 2.483450412750244, "learning_rate": 1.9983101578751326e-05, "loss": 1.2228, "step": 837 }, { "epoch": 0.05, "grad_norm": 2.2174501419067383, "learning_rate": 1.9982993459795897e-05, "loss": 1.1913, "step": 838 }, { "epoch": 0.05, "grad_norm": 2.273247241973877, "learning_rate": 1.9982884996355248e-05, "loss": 1.128, "step": 839 }, { "epoch": 0.05, "grad_norm": 2.36789608001709, "learning_rate": 1.998277618843312e-05, "loss": 1.1906, "step": 840 }, { "epoch": 0.05, "grad_norm": 2.30841064453125, "learning_rate": 1.9982667036033274e-05, "loss": 1.1506, "step": 841 }, { "epoch": 0.05, "grad_norm": 2.3792459964752197, "learning_rate": 1.9982557539159476e-05, "loss": 1.1036, "step": 842 }, { "epoch": 0.05, "grad_norm": 2.5239768028259277, "learning_rate": 1.99824476978155e-05, "loss": 1.1803, "step": 843 }, { "epoch": 0.05, "grad_norm": 2.2788782119750977, "learning_rate": 1.998233751200514e-05, "loss": 1.0712, "step": 844 }, { "epoch": 0.05, "grad_norm": 2.088679552078247, "learning_rate": 1.9982226981732197e-05, "loss": 1.1522, "step": 845 }, { "epoch": 0.05, "grad_norm": 2.284238338470459, "learning_rate": 1.9982116107000485e-05, "loss": 1.1452, "step": 846 }, { "epoch": 0.05, "grad_norm": 2.2757132053375244, "learning_rate": 1.998200488781383e-05, "loss": 1.1883, "step": 847 }, { "epoch": 0.05, "grad_norm": 2.2445168495178223, "learning_rate": 1.9981893324176067e-05, "loss": 1.2275, "step": 848 }, { "epoch": 0.05, "grad_norm": 2.1674227714538574, "learning_rate": 1.998178141609105e-05, "loss": 1.0809, "step": 849 }, { "epoch": 0.05, "grad_norm": 2.361021041870117, "learning_rate": 1.9981669163562642e-05, "loss": 1.1416, "step": 850 }, { "epoch": 0.05, "grad_norm": 2.0345869064331055, "learning_rate": 1.9981556566594712e-05, "loss": 1.1171, "step": 851 }, { "epoch": 0.05, "grad_norm": 1.9510729312896729, "learning_rate": 1.9981443625191148e-05, "loss": 1.072, "step": 852 }, { "epoch": 0.05, "grad_norm": 2.1771600246429443, "learning_rate": 1.9981330339355846e-05, "loss": 1.0939, "step": 853 }, { "epoch": 0.05, "grad_norm": 1.4299519062042236, "learning_rate": 1.9981216709092715e-05, "loss": 0.5579, "step": 854 }, { "epoch": 0.05, "grad_norm": 2.224153518676758, "learning_rate": 1.9981102734405676e-05, "loss": 1.1466, "step": 855 }, { "epoch": 0.05, "grad_norm": 1.350743055343628, "learning_rate": 1.9980988415298666e-05, "loss": 0.59, "step": 856 }, { "epoch": 0.05, "grad_norm": 2.270078659057617, "learning_rate": 1.9980873751775625e-05, "loss": 1.1537, "step": 857 }, { "epoch": 0.05, "grad_norm": 2.3195390701293945, "learning_rate": 1.998075874384051e-05, "loss": 1.2126, "step": 858 }, { "epoch": 0.05, "grad_norm": 2.1503279209136963, "learning_rate": 1.998064339149729e-05, "loss": 1.05, "step": 859 }, { "epoch": 0.05, "grad_norm": 2.3343451023101807, "learning_rate": 1.9980527694749952e-05, "loss": 1.1342, "step": 860 }, { "epoch": 0.05, "grad_norm": 2.1604702472686768, "learning_rate": 1.9980411653602477e-05, "loss": 1.1234, "step": 861 }, { "epoch": 0.05, "grad_norm": 2.3645260334014893, "learning_rate": 1.998029526805888e-05, "loss": 1.1606, "step": 862 }, { "epoch": 0.05, "grad_norm": 2.0832858085632324, "learning_rate": 1.998017853812317e-05, "loss": 1.0962, "step": 863 }, { "epoch": 0.05, "grad_norm": 2.1821837425231934, "learning_rate": 1.9980061463799374e-05, "loss": 1.1678, "step": 864 }, { "epoch": 0.05, "grad_norm": 2.194148063659668, "learning_rate": 1.9979944045091537e-05, "loss": 1.1331, "step": 865 }, { "epoch": 0.05, "grad_norm": 1.22012197971344, "learning_rate": 1.9979826282003708e-05, "loss": 0.5133, "step": 866 }, { "epoch": 0.05, "grad_norm": 2.0265305042266846, "learning_rate": 1.9979708174539954e-05, "loss": 1.0999, "step": 867 }, { "epoch": 0.05, "grad_norm": 2.016631841659546, "learning_rate": 1.9979589722704348e-05, "loss": 1.0722, "step": 868 }, { "epoch": 0.05, "grad_norm": 1.9256972074508667, "learning_rate": 1.9979470926500977e-05, "loss": 1.1134, "step": 869 }, { "epoch": 0.05, "grad_norm": 2.1395270824432373, "learning_rate": 1.997935178593394e-05, "loss": 1.1621, "step": 870 }, { "epoch": 0.05, "grad_norm": 2.0393176078796387, "learning_rate": 1.9979232301007348e-05, "loss": 1.116, "step": 871 }, { "epoch": 0.05, "grad_norm": 2.2395260334014893, "learning_rate": 1.9979112471725326e-05, "loss": 1.1365, "step": 872 }, { "epoch": 0.05, "grad_norm": 2.9232733249664307, "learning_rate": 1.997899229809201e-05, "loss": 1.1507, "step": 873 }, { "epoch": 0.05, "grad_norm": 1.900830626487732, "learning_rate": 1.9978871780111544e-05, "loss": 1.0534, "step": 874 }, { "epoch": 0.05, "grad_norm": 1.9295040369033813, "learning_rate": 1.997875091778809e-05, "loss": 1.0768, "step": 875 }, { "epoch": 0.05, "grad_norm": 2.216456651687622, "learning_rate": 1.997862971112581e-05, "loss": 1.1466, "step": 876 }, { "epoch": 0.05, "grad_norm": 2.250154972076416, "learning_rate": 1.9978508160128896e-05, "loss": 1.0858, "step": 877 }, { "epoch": 0.05, "grad_norm": 2.1935391426086426, "learning_rate": 1.997838626480154e-05, "loss": 1.083, "step": 878 }, { "epoch": 0.05, "grad_norm": 1.9666025638580322, "learning_rate": 1.9978264025147947e-05, "loss": 1.1814, "step": 879 }, { "epoch": 0.05, "grad_norm": 1.9172236919403076, "learning_rate": 1.997814144117234e-05, "loss": 1.0622, "step": 880 }, { "epoch": 0.05, "grad_norm": 1.1978591680526733, "learning_rate": 1.9978018512878938e-05, "loss": 0.5714, "step": 881 }, { "epoch": 0.05, "grad_norm": 1.999265193939209, "learning_rate": 1.9977895240271992e-05, "loss": 1.1923, "step": 882 }, { "epoch": 0.05, "grad_norm": 2.092198133468628, "learning_rate": 1.9977771623355752e-05, "loss": 1.0511, "step": 883 }, { "epoch": 0.05, "grad_norm": 2.153632164001465, "learning_rate": 1.997764766213449e-05, "loss": 1.1513, "step": 884 }, { "epoch": 0.05, "grad_norm": 2.2152228355407715, "learning_rate": 1.997752335661247e-05, "loss": 1.0787, "step": 885 }, { "epoch": 0.05, "grad_norm": 2.1043803691864014, "learning_rate": 1.9977398706794e-05, "loss": 1.0355, "step": 886 }, { "epoch": 0.05, "grad_norm": 2.202075481414795, "learning_rate": 1.9977273712683366e-05, "loss": 1.112, "step": 887 }, { "epoch": 0.05, "grad_norm": 2.01200008392334, "learning_rate": 1.9977148374284886e-05, "loss": 1.0716, "step": 888 }, { "epoch": 0.05, "grad_norm": 2.144124984741211, "learning_rate": 1.9977022691602888e-05, "loss": 1.1628, "step": 889 }, { "epoch": 0.05, "grad_norm": 2.320427894592285, "learning_rate": 1.9976896664641706e-05, "loss": 1.1079, "step": 890 }, { "epoch": 0.05, "grad_norm": 2.182781219482422, "learning_rate": 1.9976770293405687e-05, "loss": 1.0892, "step": 891 }, { "epoch": 0.05, "grad_norm": 2.0892481803894043, "learning_rate": 1.9976643577899194e-05, "loss": 1.1918, "step": 892 }, { "epoch": 0.05, "grad_norm": 2.1765358448028564, "learning_rate": 1.9976516518126603e-05, "loss": 1.1628, "step": 893 }, { "epoch": 0.05, "grad_norm": 2.0505526065826416, "learning_rate": 1.9976389114092293e-05, "loss": 1.1478, "step": 894 }, { "epoch": 0.05, "grad_norm": 2.2590537071228027, "learning_rate": 1.9976261365800666e-05, "loss": 1.1414, "step": 895 }, { "epoch": 0.05, "grad_norm": 2.4349069595336914, "learning_rate": 1.9976133273256126e-05, "loss": 1.1164, "step": 896 }, { "epoch": 0.05, "grad_norm": 1.8681683540344238, "learning_rate": 1.997600483646309e-05, "loss": 1.1127, "step": 897 }, { "epoch": 0.05, "grad_norm": 2.2176530361175537, "learning_rate": 1.9975876055425995e-05, "loss": 1.0954, "step": 898 }, { "epoch": 0.05, "grad_norm": 2.453000068664551, "learning_rate": 1.9975746930149287e-05, "loss": 1.143, "step": 899 }, { "epoch": 0.05, "grad_norm": 2.231546401977539, "learning_rate": 1.9975617460637417e-05, "loss": 1.0971, "step": 900 }, { "epoch": 0.05, "grad_norm": 2.0050864219665527, "learning_rate": 1.9975487646894854e-05, "loss": 1.1196, "step": 901 }, { "epoch": 0.05, "grad_norm": 2.118800640106201, "learning_rate": 1.9975357488926077e-05, "loss": 1.1198, "step": 902 }, { "epoch": 0.05, "grad_norm": 2.1400129795074463, "learning_rate": 1.9975226986735578e-05, "loss": 1.1429, "step": 903 }, { "epoch": 0.05, "grad_norm": 2.1674747467041016, "learning_rate": 1.997509614032786e-05, "loss": 1.0515, "step": 904 }, { "epoch": 0.05, "grad_norm": 2.41999888420105, "learning_rate": 1.997496494970744e-05, "loss": 1.1189, "step": 905 }, { "epoch": 0.05, "grad_norm": 1.1876188516616821, "learning_rate": 1.9974833414878846e-05, "loss": 0.5326, "step": 906 }, { "epoch": 0.05, "grad_norm": 2.1859302520751953, "learning_rate": 1.997470153584661e-05, "loss": 1.0934, "step": 907 }, { "epoch": 0.05, "grad_norm": 2.0946156978607178, "learning_rate": 1.997456931261529e-05, "loss": 1.0923, "step": 908 }, { "epoch": 0.05, "grad_norm": 1.942283272743225, "learning_rate": 1.9974436745189444e-05, "loss": 1.0891, "step": 909 }, { "epoch": 0.05, "grad_norm": 2.2902328968048096, "learning_rate": 1.9974303833573646e-05, "loss": 1.1458, "step": 910 }, { "epoch": 0.05, "grad_norm": 2.140209674835205, "learning_rate": 1.997417057777249e-05, "loss": 1.1264, "step": 911 }, { "epoch": 0.05, "grad_norm": 2.2432613372802734, "learning_rate": 1.9974036977790566e-05, "loss": 1.0763, "step": 912 }, { "epoch": 0.05, "grad_norm": 2.3520562648773193, "learning_rate": 1.997390303363249e-05, "loss": 1.1707, "step": 913 }, { "epoch": 0.05, "grad_norm": 2.0676722526550293, "learning_rate": 1.997376874530288e-05, "loss": 1.1722, "step": 914 }, { "epoch": 0.05, "grad_norm": 2.253856897354126, "learning_rate": 1.997363411280637e-05, "loss": 1.1562, "step": 915 }, { "epoch": 0.05, "grad_norm": 2.1041007041931152, "learning_rate": 1.997349913614761e-05, "loss": 1.0894, "step": 916 }, { "epoch": 0.05, "grad_norm": 2.2459332942962646, "learning_rate": 1.9973363815331248e-05, "loss": 1.108, "step": 917 }, { "epoch": 0.05, "grad_norm": 2.2529101371765137, "learning_rate": 1.9973228150361965e-05, "loss": 1.0633, "step": 918 }, { "epoch": 0.05, "grad_norm": 2.222435712814331, "learning_rate": 1.9973092141244436e-05, "loss": 1.1954, "step": 919 }, { "epoch": 0.05, "grad_norm": 2.0947751998901367, "learning_rate": 1.997295578798336e-05, "loss": 1.1247, "step": 920 }, { "epoch": 0.05, "grad_norm": 2.228332757949829, "learning_rate": 1.9972819090583433e-05, "loss": 1.1268, "step": 921 }, { "epoch": 0.05, "grad_norm": 2.1569807529449463, "learning_rate": 1.9972682049049378e-05, "loss": 1.0752, "step": 922 }, { "epoch": 0.05, "grad_norm": 2.2037651538848877, "learning_rate": 1.9972544663385927e-05, "loss": 1.1937, "step": 923 }, { "epoch": 0.05, "grad_norm": 2.0998852252960205, "learning_rate": 1.9972406933597812e-05, "loss": 0.996, "step": 924 }, { "epoch": 0.05, "grad_norm": 2.0960259437561035, "learning_rate": 1.9972268859689792e-05, "loss": 1.2225, "step": 925 }, { "epoch": 0.05, "grad_norm": 2.0508906841278076, "learning_rate": 1.997213044166663e-05, "loss": 1.1079, "step": 926 }, { "epoch": 0.05, "grad_norm": 2.122688055038452, "learning_rate": 1.9971991679533103e-05, "loss": 1.0868, "step": 927 }, { "epoch": 0.05, "grad_norm": 2.0843045711517334, "learning_rate": 1.9971852573294003e-05, "loss": 1.087, "step": 928 }, { "epoch": 0.05, "grad_norm": 2.288818836212158, "learning_rate": 1.997171312295412e-05, "loss": 1.1175, "step": 929 }, { "epoch": 0.05, "grad_norm": 2.209801197052002, "learning_rate": 1.9971573328518273e-05, "loss": 1.1453, "step": 930 }, { "epoch": 0.05, "grad_norm": 2.073472738265991, "learning_rate": 1.9971433189991286e-05, "loss": 1.1381, "step": 931 }, { "epoch": 0.05, "grad_norm": 2.2973203659057617, "learning_rate": 1.997129270737799e-05, "loss": 1.1153, "step": 932 }, { "epoch": 0.05, "grad_norm": 2.0285723209381104, "learning_rate": 1.997115188068324e-05, "loss": 1.1248, "step": 933 }, { "epoch": 0.05, "grad_norm": 3.3216347694396973, "learning_rate": 1.9971010709911892e-05, "loss": 1.1412, "step": 934 }, { "epoch": 0.05, "grad_norm": 2.091198205947876, "learning_rate": 1.997086919506882e-05, "loss": 1.1571, "step": 935 }, { "epoch": 0.05, "grad_norm": 2.179415225982666, "learning_rate": 1.9970727336158902e-05, "loss": 1.1332, "step": 936 }, { "epoch": 0.05, "grad_norm": 2.1212592124938965, "learning_rate": 1.9970585133187034e-05, "loss": 1.1411, "step": 937 }, { "epoch": 0.05, "grad_norm": 2.2156178951263428, "learning_rate": 1.997044258615813e-05, "loss": 1.1483, "step": 938 }, { "epoch": 0.05, "grad_norm": 2.3334157466888428, "learning_rate": 1.9970299695077095e-05, "loss": 1.1107, "step": 939 }, { "epoch": 0.05, "grad_norm": 2.22662353515625, "learning_rate": 1.9970156459948872e-05, "loss": 1.1499, "step": 940 }, { "epoch": 0.05, "grad_norm": 2.175327777862549, "learning_rate": 1.9970012880778404e-05, "loss": 1.0863, "step": 941 }, { "epoch": 0.05, "grad_norm": 1.9993934631347656, "learning_rate": 1.9969868957570638e-05, "loss": 1.0823, "step": 942 }, { "epoch": 0.05, "grad_norm": 2.5160818099975586, "learning_rate": 1.9969724690330543e-05, "loss": 1.1077, "step": 943 }, { "epoch": 0.05, "grad_norm": 1.1422719955444336, "learning_rate": 1.9969580079063104e-05, "loss": 0.5942, "step": 944 }, { "epoch": 0.05, "grad_norm": 1.0571203231811523, "learning_rate": 1.99694351237733e-05, "loss": 0.5021, "step": 945 }, { "epoch": 0.05, "grad_norm": 2.6621596813201904, "learning_rate": 1.9969289824466138e-05, "loss": 1.1798, "step": 946 }, { "epoch": 0.05, "grad_norm": 2.3779985904693604, "learning_rate": 1.9969144181146634e-05, "loss": 1.1387, "step": 947 }, { "epoch": 0.05, "grad_norm": 2.196004629135132, "learning_rate": 1.996899819381981e-05, "loss": 1.165, "step": 948 }, { "epoch": 0.05, "grad_norm": 1.9988819360733032, "learning_rate": 1.996885186249071e-05, "loss": 1.191, "step": 949 }, { "epoch": 0.05, "grad_norm": 1.956302523612976, "learning_rate": 1.9968705187164375e-05, "loss": 1.0856, "step": 950 }, { "epoch": 0.05, "grad_norm": 2.3769612312316895, "learning_rate": 1.9968558167845873e-05, "loss": 1.1194, "step": 951 }, { "epoch": 0.05, "grad_norm": 2.7141518592834473, "learning_rate": 1.9968410804540273e-05, "loss": 1.162, "step": 952 }, { "epoch": 0.05, "grad_norm": 1.9573251008987427, "learning_rate": 1.9968263097252666e-05, "loss": 1.0224, "step": 953 }, { "epoch": 0.05, "grad_norm": 2.112016201019287, "learning_rate": 1.996811504598814e-05, "loss": 1.1112, "step": 954 }, { "epoch": 0.05, "grad_norm": 2.384512424468994, "learning_rate": 1.9967966650751808e-05, "loss": 1.2376, "step": 955 }, { "epoch": 0.05, "grad_norm": 2.255016803741455, "learning_rate": 1.9967817911548796e-05, "loss": 1.1659, "step": 956 }, { "epoch": 0.05, "grad_norm": 2.12328839302063, "learning_rate": 1.9967668828384227e-05, "loss": 1.0954, "step": 957 }, { "epoch": 0.05, "grad_norm": 1.163557767868042, "learning_rate": 1.996751940126325e-05, "loss": 0.5296, "step": 958 }, { "epoch": 0.06, "grad_norm": 2.215634822845459, "learning_rate": 1.996736963019103e-05, "loss": 1.2042, "step": 959 }, { "epoch": 0.06, "grad_norm": 2.477297782897949, "learning_rate": 1.996721951517272e-05, "loss": 1.145, "step": 960 }, { "epoch": 0.06, "grad_norm": 2.229360342025757, "learning_rate": 1.9967069056213503e-05, "loss": 1.0969, "step": 961 }, { "epoch": 0.06, "grad_norm": 1.1252036094665527, "learning_rate": 1.996691825331858e-05, "loss": 0.5572, "step": 962 }, { "epoch": 0.06, "grad_norm": 2.113152027130127, "learning_rate": 1.996676710649315e-05, "loss": 1.0193, "step": 963 }, { "epoch": 0.06, "grad_norm": 2.158536434173584, "learning_rate": 1.9966615615742423e-05, "loss": 1.1747, "step": 964 }, { "epoch": 0.06, "grad_norm": 2.04012131690979, "learning_rate": 1.9966463781071637e-05, "loss": 1.2352, "step": 965 }, { "epoch": 0.06, "grad_norm": 2.0038750171661377, "learning_rate": 1.9966311602486027e-05, "loss": 1.1696, "step": 966 }, { "epoch": 0.06, "grad_norm": 2.1827383041381836, "learning_rate": 1.996615907999084e-05, "loss": 1.0811, "step": 967 }, { "epoch": 0.06, "grad_norm": 2.425194025039673, "learning_rate": 1.9966006213591346e-05, "loss": 1.1728, "step": 968 }, { "epoch": 0.06, "grad_norm": 2.1980483531951904, "learning_rate": 1.9965853003292813e-05, "loss": 1.0274, "step": 969 }, { "epoch": 0.06, "grad_norm": 2.334479331970215, "learning_rate": 1.996569944910053e-05, "loss": 1.0978, "step": 970 }, { "epoch": 0.06, "grad_norm": 2.199012517929077, "learning_rate": 1.99655455510198e-05, "loss": 1.0849, "step": 971 }, { "epoch": 0.06, "grad_norm": 2.089942455291748, "learning_rate": 1.996539130905593e-05, "loss": 1.096, "step": 972 }, { "epoch": 0.06, "grad_norm": 1.2454452514648438, "learning_rate": 1.9965236723214242e-05, "loss": 0.5598, "step": 973 }, { "epoch": 0.06, "grad_norm": 2.190110206604004, "learning_rate": 1.9965081793500073e-05, "loss": 1.1166, "step": 974 }, { "epoch": 0.06, "grad_norm": 2.2716000080108643, "learning_rate": 1.996492651991877e-05, "loss": 1.1074, "step": 975 }, { "epoch": 0.06, "grad_norm": 2.482877731323242, "learning_rate": 1.9964770902475686e-05, "loss": 1.058, "step": 976 }, { "epoch": 0.06, "grad_norm": 2.189729690551758, "learning_rate": 1.9964614941176194e-05, "loss": 1.1188, "step": 977 }, { "epoch": 0.06, "grad_norm": 2.3052361011505127, "learning_rate": 1.9964458636025673e-05, "loss": 1.1462, "step": 978 }, { "epoch": 0.06, "grad_norm": 2.1463403701782227, "learning_rate": 1.9964301987029523e-05, "loss": 1.0888, "step": 979 }, { "epoch": 0.06, "grad_norm": 2.032259941101074, "learning_rate": 1.9964144994193143e-05, "loss": 1.0542, "step": 980 }, { "epoch": 0.06, "grad_norm": 2.0936243534088135, "learning_rate": 1.996398765752195e-05, "loss": 1.1355, "step": 981 }, { "epoch": 0.06, "grad_norm": 1.9559952020645142, "learning_rate": 1.9963829977021384e-05, "loss": 1.0634, "step": 982 }, { "epoch": 0.06, "grad_norm": 1.9189354181289673, "learning_rate": 1.9963671952696876e-05, "loss": 1.0758, "step": 983 }, { "epoch": 0.06, "grad_norm": 2.0477375984191895, "learning_rate": 1.9963513584553878e-05, "loss": 1.0469, "step": 984 }, { "epoch": 0.06, "grad_norm": 1.9667751789093018, "learning_rate": 1.996335487259786e-05, "loss": 1.108, "step": 985 }, { "epoch": 0.06, "grad_norm": 2.4961416721343994, "learning_rate": 1.9963195816834297e-05, "loss": 1.1275, "step": 986 }, { "epoch": 0.06, "grad_norm": 2.1340491771698, "learning_rate": 1.9963036417268674e-05, "loss": 1.0724, "step": 987 }, { "epoch": 0.06, "grad_norm": 1.0602165460586548, "learning_rate": 1.99628766739065e-05, "loss": 0.5734, "step": 988 }, { "epoch": 0.06, "grad_norm": 2.125601053237915, "learning_rate": 1.996271658675328e-05, "loss": 1.0747, "step": 989 }, { "epoch": 0.06, "grad_norm": 2.029818296432495, "learning_rate": 1.996255615581454e-05, "loss": 1.1072, "step": 990 }, { "epoch": 0.06, "grad_norm": 2.109917640686035, "learning_rate": 1.9962395381095817e-05, "loss": 1.0952, "step": 991 }, { "epoch": 0.06, "grad_norm": 2.0499300956726074, "learning_rate": 1.9962234262602656e-05, "loss": 1.1763, "step": 992 }, { "epoch": 0.06, "grad_norm": 1.9918546676635742, "learning_rate": 1.9962072800340625e-05, "loss": 1.0755, "step": 993 }, { "epoch": 0.06, "grad_norm": 2.075986623764038, "learning_rate": 1.9961910994315284e-05, "loss": 1.0751, "step": 994 }, { "epoch": 0.06, "grad_norm": 1.937086582183838, "learning_rate": 1.996174884453222e-05, "loss": 1.0872, "step": 995 }, { "epoch": 0.06, "grad_norm": 2.0692360401153564, "learning_rate": 1.9961586350997037e-05, "loss": 1.1102, "step": 996 }, { "epoch": 0.06, "grad_norm": 2.037346363067627, "learning_rate": 1.9961423513715333e-05, "loss": 1.094, "step": 997 }, { "epoch": 0.06, "grad_norm": 2.230212450027466, "learning_rate": 1.9961260332692728e-05, "loss": 1.0867, "step": 998 }, { "epoch": 0.06, "grad_norm": 2.1400532722473145, "learning_rate": 1.9961096807934855e-05, "loss": 1.2046, "step": 999 }, { "epoch": 0.06, "grad_norm": 2.0511980056762695, "learning_rate": 1.996093293944736e-05, "loss": 1.151, "step": 1000 }, { "epoch": 0.06, "grad_norm": 1.9836289882659912, "learning_rate": 1.9960768727235888e-05, "loss": 1.1205, "step": 1001 }, { "epoch": 0.06, "grad_norm": 2.0572667121887207, "learning_rate": 1.9960604171306116e-05, "loss": 1.082, "step": 1002 }, { "epoch": 0.06, "grad_norm": 2.1632437705993652, "learning_rate": 1.9960439271663717e-05, "loss": 1.1114, "step": 1003 }, { "epoch": 0.06, "grad_norm": 2.331650495529175, "learning_rate": 1.996027402831438e-05, "loss": 1.1712, "step": 1004 }, { "epoch": 0.06, "grad_norm": 2.2957890033721924, "learning_rate": 1.996010844126381e-05, "loss": 1.1529, "step": 1005 }, { "epoch": 0.06, "grad_norm": 1.9415627717971802, "learning_rate": 1.995994251051772e-05, "loss": 1.0596, "step": 1006 }, { "epoch": 0.06, "grad_norm": 2.061467409133911, "learning_rate": 1.9959776236081837e-05, "loss": 1.0728, "step": 1007 }, { "epoch": 0.06, "grad_norm": 2.1467807292938232, "learning_rate": 1.9959609617961898e-05, "loss": 1.1793, "step": 1008 }, { "epoch": 0.06, "grad_norm": 2.126514196395874, "learning_rate": 1.9959442656163653e-05, "loss": 1.1063, "step": 1009 }, { "epoch": 0.06, "grad_norm": 2.0872507095336914, "learning_rate": 1.9959275350692862e-05, "loss": 1.1129, "step": 1010 }, { "epoch": 0.06, "grad_norm": 2.078003406524658, "learning_rate": 1.9959107701555295e-05, "loss": 1.0982, "step": 1011 }, { "epoch": 0.06, "grad_norm": 2.3776357173919678, "learning_rate": 1.9958939708756746e-05, "loss": 1.1107, "step": 1012 }, { "epoch": 0.06, "grad_norm": 2.223130941390991, "learning_rate": 1.9958771372303e-05, "loss": 1.1322, "step": 1013 }, { "epoch": 0.06, "grad_norm": 2.150520086288452, "learning_rate": 1.9958602692199883e-05, "loss": 1.0494, "step": 1014 }, { "epoch": 0.06, "grad_norm": 1.2399572134017944, "learning_rate": 1.99584336684532e-05, "loss": 0.5704, "step": 1015 }, { "epoch": 0.06, "grad_norm": 2.046128273010254, "learning_rate": 1.9958264301068788e-05, "loss": 1.1085, "step": 1016 }, { "epoch": 0.06, "grad_norm": 2.2082788944244385, "learning_rate": 1.995809459005249e-05, "loss": 1.1426, "step": 1017 }, { "epoch": 0.06, "grad_norm": 2.005742073059082, "learning_rate": 1.9957924535410166e-05, "loss": 1.1323, "step": 1018 }, { "epoch": 0.06, "grad_norm": 2.1082708835601807, "learning_rate": 1.9957754137147683e-05, "loss": 1.172, "step": 1019 }, { "epoch": 0.06, "grad_norm": 1.9350619316101074, "learning_rate": 1.9957583395270924e-05, "loss": 1.1907, "step": 1020 }, { "epoch": 0.06, "grad_norm": 1.968170404434204, "learning_rate": 1.9957412309785776e-05, "loss": 1.1801, "step": 1021 }, { "epoch": 0.06, "grad_norm": 2.2556710243225098, "learning_rate": 1.995724088069814e-05, "loss": 1.1172, "step": 1022 }, { "epoch": 0.06, "grad_norm": 1.90117347240448, "learning_rate": 1.995706910801394e-05, "loss": 1.1361, "step": 1023 }, { "epoch": 0.06, "grad_norm": 2.2194061279296875, "learning_rate": 1.9956896991739095e-05, "loss": 1.1273, "step": 1024 }, { "epoch": 0.06, "grad_norm": 2.3678059577941895, "learning_rate": 1.995672453187955e-05, "loss": 1.1841, "step": 1025 }, { "epoch": 0.06, "grad_norm": 2.1089444160461426, "learning_rate": 1.995655172844126e-05, "loss": 1.1475, "step": 1026 }, { "epoch": 0.06, "grad_norm": 2.067833662033081, "learning_rate": 1.9956378581430175e-05, "loss": 1.1074, "step": 1027 }, { "epoch": 0.06, "grad_norm": 1.961103916168213, "learning_rate": 1.995620509085228e-05, "loss": 1.1198, "step": 1028 }, { "epoch": 0.06, "grad_norm": 2.339808702468872, "learning_rate": 1.995603125671356e-05, "loss": 1.0691, "step": 1029 }, { "epoch": 0.06, "grad_norm": 2.1119163036346436, "learning_rate": 1.995585707902001e-05, "loss": 1.076, "step": 1030 }, { "epoch": 0.06, "grad_norm": 2.0598556995391846, "learning_rate": 1.9955682557777644e-05, "loss": 1.1673, "step": 1031 }, { "epoch": 0.06, "grad_norm": 2.155109167098999, "learning_rate": 1.9955507692992482e-05, "loss": 1.104, "step": 1032 }, { "epoch": 0.06, "grad_norm": 2.061760663986206, "learning_rate": 1.995533248467056e-05, "loss": 1.0374, "step": 1033 }, { "epoch": 0.06, "grad_norm": 1.8904452323913574, "learning_rate": 1.995515693281792e-05, "loss": 1.0838, "step": 1034 }, { "epoch": 0.06, "grad_norm": 2.0027434825897217, "learning_rate": 1.995498103744063e-05, "loss": 1.1246, "step": 1035 }, { "epoch": 0.06, "grad_norm": 2.001688003540039, "learning_rate": 1.9954804798544748e-05, "loss": 1.1625, "step": 1036 }, { "epoch": 0.06, "grad_norm": 2.0883662700653076, "learning_rate": 1.995462821613636e-05, "loss": 1.1605, "step": 1037 }, { "epoch": 0.06, "grad_norm": 2.1495673656463623, "learning_rate": 1.9954451290221558e-05, "loss": 1.1232, "step": 1038 }, { "epoch": 0.06, "grad_norm": 2.0513052940368652, "learning_rate": 1.995427402080645e-05, "loss": 1.032, "step": 1039 }, { "epoch": 0.06, "grad_norm": 2.2151739597320557, "learning_rate": 1.9954096407897154e-05, "loss": 1.0669, "step": 1040 }, { "epoch": 0.06, "grad_norm": 2.0288562774658203, "learning_rate": 1.9953918451499797e-05, "loss": 1.216, "step": 1041 }, { "epoch": 0.06, "grad_norm": 2.1106815338134766, "learning_rate": 1.9953740151620515e-05, "loss": 1.1526, "step": 1042 }, { "epoch": 0.06, "grad_norm": 1.88935124874115, "learning_rate": 1.9953561508265466e-05, "loss": 1.136, "step": 1043 }, { "epoch": 0.06, "grad_norm": 2.146790027618408, "learning_rate": 1.9953382521440816e-05, "loss": 1.1014, "step": 1044 }, { "epoch": 0.06, "grad_norm": 2.2105722427368164, "learning_rate": 1.9953203191152736e-05, "loss": 1.1983, "step": 1045 }, { "epoch": 0.06, "grad_norm": 1.9890414476394653, "learning_rate": 1.995302351740742e-05, "loss": 1.149, "step": 1046 }, { "epoch": 0.06, "grad_norm": 2.050382137298584, "learning_rate": 1.9952843500211062e-05, "loss": 1.0702, "step": 1047 }, { "epoch": 0.06, "grad_norm": 2.1800410747528076, "learning_rate": 1.995266313956988e-05, "loss": 1.133, "step": 1048 }, { "epoch": 0.06, "grad_norm": 2.163120985031128, "learning_rate": 1.9952482435490094e-05, "loss": 1.1368, "step": 1049 }, { "epoch": 0.06, "grad_norm": 1.1013380289077759, "learning_rate": 1.9952301387977937e-05, "loss": 0.5949, "step": 1050 }, { "epoch": 0.06, "grad_norm": 2.332686185836792, "learning_rate": 1.9952119997039664e-05, "loss": 1.102, "step": 1051 }, { "epoch": 0.06, "grad_norm": 2.0294029712677, "learning_rate": 1.9951938262681527e-05, "loss": 1.0779, "step": 1052 }, { "epoch": 0.06, "grad_norm": 2.4254696369171143, "learning_rate": 1.99517561849098e-05, "loss": 1.1108, "step": 1053 }, { "epoch": 0.06, "grad_norm": 1.8786166906356812, "learning_rate": 1.995157376373077e-05, "loss": 1.0588, "step": 1054 }, { "epoch": 0.06, "grad_norm": 2.1033036708831787, "learning_rate": 1.9951390999150723e-05, "loss": 1.1072, "step": 1055 }, { "epoch": 0.06, "grad_norm": 1.1297013759613037, "learning_rate": 1.9951207891175973e-05, "loss": 0.5833, "step": 1056 }, { "epoch": 0.06, "grad_norm": 2.257452964782715, "learning_rate": 1.9951024439812837e-05, "loss": 1.0726, "step": 1057 }, { "epoch": 0.06, "grad_norm": 2.0183420181274414, "learning_rate": 1.9950840645067643e-05, "loss": 1.0994, "step": 1058 }, { "epoch": 0.06, "grad_norm": 2.3594791889190674, "learning_rate": 1.9950656506946737e-05, "loss": 1.0316, "step": 1059 }, { "epoch": 0.06, "grad_norm": 0.9593797326087952, "learning_rate": 1.9950472025456472e-05, "loss": 0.5592, "step": 1060 }, { "epoch": 0.06, "grad_norm": 2.1703927516937256, "learning_rate": 1.9950287200603208e-05, "loss": 1.1544, "step": 1061 }, { "epoch": 0.06, "grad_norm": 2.1522278785705566, "learning_rate": 1.995010203239333e-05, "loss": 1.0469, "step": 1062 }, { "epoch": 0.06, "grad_norm": 2.4177536964416504, "learning_rate": 1.9949916520833228e-05, "loss": 1.1175, "step": 1063 }, { "epoch": 0.06, "grad_norm": 2.304537534713745, "learning_rate": 1.99497306659293e-05, "loss": 1.1106, "step": 1064 }, { "epoch": 0.06, "grad_norm": 2.193523406982422, "learning_rate": 1.9949544467687957e-05, "loss": 1.1737, "step": 1065 }, { "epoch": 0.06, "grad_norm": 2.5079402923583984, "learning_rate": 1.994935792611563e-05, "loss": 1.1795, "step": 1066 }, { "epoch": 0.06, "grad_norm": 2.331172466278076, "learning_rate": 1.9949171041218754e-05, "loss": 1.1103, "step": 1067 }, { "epoch": 0.06, "grad_norm": 2.36053729057312, "learning_rate": 1.9948983813003776e-05, "loss": 1.127, "step": 1068 }, { "epoch": 0.06, "grad_norm": 2.178652763366699, "learning_rate": 1.9948796241477157e-05, "loss": 1.1637, "step": 1069 }, { "epoch": 0.06, "grad_norm": 1.9500905275344849, "learning_rate": 1.994860832664537e-05, "loss": 1.1083, "step": 1070 }, { "epoch": 0.06, "grad_norm": 1.9139505624771118, "learning_rate": 1.9948420068514904e-05, "loss": 1.0694, "step": 1071 }, { "epoch": 0.06, "grad_norm": 2.4539225101470947, "learning_rate": 1.9948231467092248e-05, "loss": 1.0925, "step": 1072 }, { "epoch": 0.06, "grad_norm": 2.2782180309295654, "learning_rate": 1.9948042522383915e-05, "loss": 1.0413, "step": 1073 }, { "epoch": 0.06, "grad_norm": 1.1228864192962646, "learning_rate": 1.9947853234396423e-05, "loss": 0.6092, "step": 1074 }, { "epoch": 0.06, "grad_norm": 2.0677382946014404, "learning_rate": 1.99476636031363e-05, "loss": 0.9715, "step": 1075 }, { "epoch": 0.06, "grad_norm": 1.9538472890853882, "learning_rate": 1.99474736286101e-05, "loss": 1.0481, "step": 1076 }, { "epoch": 0.06, "grad_norm": 2.227790117263794, "learning_rate": 1.994728331082437e-05, "loss": 1.1765, "step": 1077 }, { "epoch": 0.06, "grad_norm": 2.1632280349731445, "learning_rate": 1.9947092649785675e-05, "loss": 1.1241, "step": 1078 }, { "epoch": 0.06, "grad_norm": 2.1495938301086426, "learning_rate": 1.9946901645500604e-05, "loss": 1.1532, "step": 1079 }, { "epoch": 0.06, "grad_norm": 2.1210474967956543, "learning_rate": 1.9946710297975743e-05, "loss": 1.0604, "step": 1080 }, { "epoch": 0.06, "grad_norm": 2.076061487197876, "learning_rate": 1.994651860721769e-05, "loss": 1.1047, "step": 1081 }, { "epoch": 0.06, "grad_norm": 2.0261378288269043, "learning_rate": 1.9946326573233068e-05, "loss": 1.1278, "step": 1082 }, { "epoch": 0.06, "grad_norm": 2.0028727054595947, "learning_rate": 1.99461341960285e-05, "loss": 1.1157, "step": 1083 }, { "epoch": 0.06, "grad_norm": 2.0081613063812256, "learning_rate": 1.9945941475610623e-05, "loss": 1.0783, "step": 1084 }, { "epoch": 0.06, "grad_norm": 1.984207034111023, "learning_rate": 1.994574841198609e-05, "loss": 1.1184, "step": 1085 }, { "epoch": 0.06, "grad_norm": 2.1088995933532715, "learning_rate": 1.9945555005161562e-05, "loss": 1.0614, "step": 1086 }, { "epoch": 0.06, "grad_norm": 2.206160545349121, "learning_rate": 1.9945361255143712e-05, "loss": 1.1837, "step": 1087 }, { "epoch": 0.06, "grad_norm": 2.813061237335205, "learning_rate": 1.9945167161939225e-05, "loss": 1.1681, "step": 1088 }, { "epoch": 0.06, "grad_norm": 2.0713086128234863, "learning_rate": 1.99449727255548e-05, "loss": 1.123, "step": 1089 }, { "epoch": 0.06, "grad_norm": 2.0151636600494385, "learning_rate": 1.9944777945997146e-05, "loss": 1.184, "step": 1090 }, { "epoch": 0.06, "grad_norm": 2.175811290740967, "learning_rate": 1.9944582823272985e-05, "loss": 1.1253, "step": 1091 }, { "epoch": 0.06, "grad_norm": 2.008009672164917, "learning_rate": 1.994438735738905e-05, "loss": 1.1741, "step": 1092 }, { "epoch": 0.06, "grad_norm": 1.1865029335021973, "learning_rate": 1.9944191548352088e-05, "loss": 0.513, "step": 1093 }, { "epoch": 0.06, "grad_norm": 2.547581434249878, "learning_rate": 1.994399539616885e-05, "loss": 1.0834, "step": 1094 }, { "epoch": 0.06, "grad_norm": 2.438591241836548, "learning_rate": 1.994379890084611e-05, "loss": 1.1187, "step": 1095 }, { "epoch": 0.06, "grad_norm": 2.3844690322875977, "learning_rate": 1.994360206239065e-05, "loss": 1.1299, "step": 1096 }, { "epoch": 0.06, "grad_norm": 1.0553101301193237, "learning_rate": 1.9943404880809254e-05, "loss": 0.5425, "step": 1097 }, { "epoch": 0.06, "grad_norm": 2.0255768299102783, "learning_rate": 1.9943207356108733e-05, "loss": 1.0835, "step": 1098 }, { "epoch": 0.06, "grad_norm": 1.084327220916748, "learning_rate": 1.9943009488295903e-05, "loss": 0.5322, "step": 1099 }, { "epoch": 0.06, "grad_norm": 2.4381301403045654, "learning_rate": 1.994281127737759e-05, "loss": 1.1489, "step": 1100 }, { "epoch": 0.06, "grad_norm": 2.0825705528259277, "learning_rate": 1.9942612723360632e-05, "loss": 1.1164, "step": 1101 }, { "epoch": 0.06, "grad_norm": 2.114013910293579, "learning_rate": 1.9942413826251885e-05, "loss": 1.077, "step": 1102 }, { "epoch": 0.06, "grad_norm": 2.288700819015503, "learning_rate": 1.994221458605821e-05, "loss": 1.1001, "step": 1103 }, { "epoch": 0.06, "grad_norm": 2.0281717777252197, "learning_rate": 1.994201500278648e-05, "loss": 1.1124, "step": 1104 }, { "epoch": 0.06, "grad_norm": 2.1523780822753906, "learning_rate": 1.9941815076443586e-05, "loss": 1.0916, "step": 1105 }, { "epoch": 0.06, "grad_norm": 2.1003830432891846, "learning_rate": 1.994161480703642e-05, "loss": 1.0749, "step": 1106 }, { "epoch": 0.06, "grad_norm": 2.0702121257781982, "learning_rate": 1.9941414194571905e-05, "loss": 1.0608, "step": 1107 }, { "epoch": 0.06, "grad_norm": 1.113926649093628, "learning_rate": 1.994121323905695e-05, "loss": 0.5346, "step": 1108 }, { "epoch": 0.06, "grad_norm": 2.3483593463897705, "learning_rate": 1.9941011940498497e-05, "loss": 1.1051, "step": 1109 }, { "epoch": 0.06, "grad_norm": 2.1991989612579346, "learning_rate": 1.9940810298903495e-05, "loss": 1.0794, "step": 1110 }, { "epoch": 0.06, "grad_norm": 2.0061705112457275, "learning_rate": 1.9940608314278895e-05, "loss": 1.1055, "step": 1111 }, { "epoch": 0.06, "grad_norm": 2.084113836288452, "learning_rate": 1.994040598663167e-05, "loss": 1.0985, "step": 1112 }, { "epoch": 0.06, "grad_norm": 2.2391879558563232, "learning_rate": 1.9940203315968806e-05, "loss": 1.1343, "step": 1113 }, { "epoch": 0.06, "grad_norm": 1.9646164178848267, "learning_rate": 1.994000030229729e-05, "loss": 1.1927, "step": 1114 }, { "epoch": 0.06, "grad_norm": 2.478668451309204, "learning_rate": 1.9939796945624127e-05, "loss": 1.1542, "step": 1115 }, { "epoch": 0.06, "grad_norm": 2.0290873050689697, "learning_rate": 1.993959324595634e-05, "loss": 1.1116, "step": 1116 }, { "epoch": 0.06, "grad_norm": 2.150301456451416, "learning_rate": 1.9939389203300952e-05, "loss": 1.1534, "step": 1117 }, { "epoch": 0.06, "grad_norm": 1.1240397691726685, "learning_rate": 1.993918481766501e-05, "loss": 0.5673, "step": 1118 }, { "epoch": 0.06, "grad_norm": 2.1398959159851074, "learning_rate": 1.9938980089055565e-05, "loss": 1.0975, "step": 1119 }, { "epoch": 0.06, "grad_norm": 2.2083184719085693, "learning_rate": 1.9938775017479678e-05, "loss": 1.0974, "step": 1120 }, { "epoch": 0.06, "grad_norm": 1.2078986167907715, "learning_rate": 1.993856960294443e-05, "loss": 0.6306, "step": 1121 }, { "epoch": 0.06, "grad_norm": 2.0882461071014404, "learning_rate": 1.9938363845456904e-05, "loss": 1.1713, "step": 1122 }, { "epoch": 0.06, "grad_norm": 2.103302240371704, "learning_rate": 1.9938157745024208e-05, "loss": 1.0596, "step": 1123 }, { "epoch": 0.06, "grad_norm": 2.0410983562469482, "learning_rate": 1.9937951301653444e-05, "loss": 1.1503, "step": 1124 }, { "epoch": 0.06, "grad_norm": 2.01668381690979, "learning_rate": 1.9937744515351746e-05, "loss": 1.173, "step": 1125 }, { "epoch": 0.06, "grad_norm": 2.4383652210235596, "learning_rate": 1.9937537386126242e-05, "loss": 1.1395, "step": 1126 }, { "epoch": 0.06, "grad_norm": 2.0189781188964844, "learning_rate": 1.9937329913984084e-05, "loss": 1.036, "step": 1127 }, { "epoch": 0.06, "grad_norm": 3.122084379196167, "learning_rate": 1.9937122098932428e-05, "loss": 1.1449, "step": 1128 }, { "epoch": 0.06, "grad_norm": 2.011646270751953, "learning_rate": 1.9936913940978447e-05, "loss": 1.0708, "step": 1129 }, { "epoch": 0.06, "grad_norm": 1.0631135702133179, "learning_rate": 1.9936705440129326e-05, "loss": 0.5822, "step": 1130 }, { "epoch": 0.06, "grad_norm": 2.0753798484802246, "learning_rate": 1.9936496596392253e-05, "loss": 1.1244, "step": 1131 }, { "epoch": 0.06, "grad_norm": 2.1454896926879883, "learning_rate": 1.993628740977444e-05, "loss": 1.1368, "step": 1132 }, { "epoch": 0.06, "grad_norm": 1.8620635271072388, "learning_rate": 1.9936077880283108e-05, "loss": 1.1696, "step": 1133 }, { "epoch": 0.07, "grad_norm": 2.1776223182678223, "learning_rate": 1.993586800792548e-05, "loss": 1.0594, "step": 1134 }, { "epoch": 0.07, "grad_norm": 1.2247000932693481, "learning_rate": 1.9935657792708803e-05, "loss": 0.5753, "step": 1135 }, { "epoch": 0.07, "grad_norm": 2.2741165161132812, "learning_rate": 1.9935447234640328e-05, "loss": 1.1022, "step": 1136 }, { "epoch": 0.07, "grad_norm": 2.1057348251342773, "learning_rate": 1.9935236333727322e-05, "loss": 1.1318, "step": 1137 }, { "epoch": 0.07, "grad_norm": 2.031235456466675, "learning_rate": 1.9935025089977067e-05, "loss": 1.0913, "step": 1138 }, { "epoch": 0.07, "grad_norm": 2.0759148597717285, "learning_rate": 1.9934813503396847e-05, "loss": 1.2036, "step": 1139 }, { "epoch": 0.07, "grad_norm": 2.0053555965423584, "learning_rate": 1.993460157399396e-05, "loss": 1.1345, "step": 1140 }, { "epoch": 0.07, "grad_norm": 2.204301357269287, "learning_rate": 1.993438930177573e-05, "loss": 1.0714, "step": 1141 }, { "epoch": 0.07, "grad_norm": 2.1923940181732178, "learning_rate": 1.9934176686749476e-05, "loss": 1.1855, "step": 1142 }, { "epoch": 0.07, "grad_norm": 2.2842936515808105, "learning_rate": 1.9933963728922532e-05, "loss": 1.1546, "step": 1143 }, { "epoch": 0.07, "grad_norm": 2.091099262237549, "learning_rate": 1.993375042830225e-05, "loss": 1.1156, "step": 1144 }, { "epoch": 0.07, "grad_norm": 1.0759966373443604, "learning_rate": 1.993353678489599e-05, "loss": 0.5534, "step": 1145 }, { "epoch": 0.07, "grad_norm": 2.2083656787872314, "learning_rate": 1.993332279871112e-05, "loss": 1.1551, "step": 1146 }, { "epoch": 0.07, "grad_norm": 1.9549657106399536, "learning_rate": 1.9933108469755032e-05, "loss": 1.0497, "step": 1147 }, { "epoch": 0.07, "grad_norm": 2.377147912979126, "learning_rate": 1.9932893798035118e-05, "loss": 1.1436, "step": 1148 }, { "epoch": 0.07, "grad_norm": 2.3849093914031982, "learning_rate": 1.9932678783558785e-05, "loss": 1.1112, "step": 1149 }, { "epoch": 0.07, "grad_norm": 2.092904806137085, "learning_rate": 1.993246342633345e-05, "loss": 0.9871, "step": 1150 }, { "epoch": 0.07, "grad_norm": 1.9337904453277588, "learning_rate": 1.993224772636655e-05, "loss": 1.0408, "step": 1151 }, { "epoch": 0.07, "grad_norm": 2.0744550228118896, "learning_rate": 1.9932031683665523e-05, "loss": 1.1605, "step": 1152 }, { "epoch": 0.07, "grad_norm": 1.0451710224151611, "learning_rate": 1.993181529823783e-05, "loss": 0.549, "step": 1153 }, { "epoch": 0.07, "grad_norm": 2.3024978637695312, "learning_rate": 1.993159857009093e-05, "loss": 1.1693, "step": 1154 }, { "epoch": 0.07, "grad_norm": 2.349405527114868, "learning_rate": 1.993138149923231e-05, "loss": 1.1245, "step": 1155 }, { "epoch": 0.07, "grad_norm": 2.3075320720672607, "learning_rate": 1.9931164085669456e-05, "loss": 1.1042, "step": 1156 }, { "epoch": 0.07, "grad_norm": 1.9487121105194092, "learning_rate": 1.993094632940987e-05, "loss": 1.0848, "step": 1157 }, { "epoch": 0.07, "grad_norm": 1.157090187072754, "learning_rate": 1.993072823046107e-05, "loss": 0.539, "step": 1158 }, { "epoch": 0.07, "grad_norm": 2.2875044345855713, "learning_rate": 1.9930509788830575e-05, "loss": 1.1062, "step": 1159 }, { "epoch": 0.07, "grad_norm": 2.094050168991089, "learning_rate": 1.993029100452593e-05, "loss": 1.1257, "step": 1160 }, { "epoch": 0.07, "grad_norm": 2.2589635848999023, "learning_rate": 1.9930071877554683e-05, "loss": 1.0986, "step": 1161 }, { "epoch": 0.07, "grad_norm": 2.198604106903076, "learning_rate": 1.9929852407924392e-05, "loss": 1.1653, "step": 1162 }, { "epoch": 0.07, "grad_norm": 1.9290698766708374, "learning_rate": 1.992963259564263e-05, "loss": 1.1296, "step": 1163 }, { "epoch": 0.07, "grad_norm": 1.0985058546066284, "learning_rate": 1.9929412440716988e-05, "loss": 0.6325, "step": 1164 }, { "epoch": 0.07, "grad_norm": 2.008709669113159, "learning_rate": 1.9929191943155057e-05, "loss": 1.0745, "step": 1165 }, { "epoch": 0.07, "grad_norm": 2.088191270828247, "learning_rate": 1.9928971102964447e-05, "loss": 1.1129, "step": 1166 }, { "epoch": 0.07, "grad_norm": 2.0278618335723877, "learning_rate": 1.992874992015278e-05, "loss": 1.0919, "step": 1167 }, { "epoch": 0.07, "grad_norm": 2.0869240760803223, "learning_rate": 1.992852839472769e-05, "loss": 1.0378, "step": 1168 }, { "epoch": 0.07, "grad_norm": 2.011765241622925, "learning_rate": 1.992830652669682e-05, "loss": 1.12, "step": 1169 }, { "epoch": 0.07, "grad_norm": 2.0093207359313965, "learning_rate": 1.9928084316067823e-05, "loss": 1.0846, "step": 1170 }, { "epoch": 0.07, "grad_norm": 2.262693405151367, "learning_rate": 1.992786176284837e-05, "loss": 1.0494, "step": 1171 }, { "epoch": 0.07, "grad_norm": 2.1503779888153076, "learning_rate": 1.9927638867046143e-05, "loss": 1.0779, "step": 1172 }, { "epoch": 0.07, "grad_norm": 2.014343738555908, "learning_rate": 1.992741562866883e-05, "loss": 1.1392, "step": 1173 }, { "epoch": 0.07, "grad_norm": 2.0086891651153564, "learning_rate": 1.992719204772413e-05, "loss": 1.095, "step": 1174 }, { "epoch": 0.07, "grad_norm": 2.0909321308135986, "learning_rate": 1.9926968124219767e-05, "loss": 1.2311, "step": 1175 }, { "epoch": 0.07, "grad_norm": 2.2445292472839355, "learning_rate": 1.9926743858163463e-05, "loss": 1.2062, "step": 1176 }, { "epoch": 0.07, "grad_norm": 2.0266518592834473, "learning_rate": 1.9926519249562955e-05, "loss": 1.1218, "step": 1177 }, { "epoch": 0.07, "grad_norm": 2.060330390930176, "learning_rate": 1.9926294298426e-05, "loss": 1.2147, "step": 1178 }, { "epoch": 0.07, "grad_norm": 2.157761335372925, "learning_rate": 1.9926069004760356e-05, "loss": 1.1117, "step": 1179 }, { "epoch": 0.07, "grad_norm": 2.1145176887512207, "learning_rate": 1.9925843368573792e-05, "loss": 1.1634, "step": 1180 }, { "epoch": 0.07, "grad_norm": 1.917935848236084, "learning_rate": 1.9925617389874108e-05, "loss": 1.0402, "step": 1181 }, { "epoch": 0.07, "grad_norm": 2.046093463897705, "learning_rate": 1.9925391068669093e-05, "loss": 1.1241, "step": 1182 }, { "epoch": 0.07, "grad_norm": 2.0455591678619385, "learning_rate": 1.9925164404966556e-05, "loss": 1.1234, "step": 1183 }, { "epoch": 0.07, "grad_norm": 1.8993330001831055, "learning_rate": 1.992493739877432e-05, "loss": 1.0769, "step": 1184 }, { "epoch": 0.07, "grad_norm": 2.1404120922088623, "learning_rate": 1.9924710050100217e-05, "loss": 1.1305, "step": 1185 }, { "epoch": 0.07, "grad_norm": 2.159856081008911, "learning_rate": 1.9924482358952098e-05, "loss": 1.084, "step": 1186 }, { "epoch": 0.07, "grad_norm": 2.113464117050171, "learning_rate": 1.9924254325337812e-05, "loss": 1.091, "step": 1187 }, { "epoch": 0.07, "grad_norm": 1.978493332862854, "learning_rate": 1.9924025949265232e-05, "loss": 1.1047, "step": 1188 }, { "epoch": 0.07, "grad_norm": 2.1816928386688232, "learning_rate": 1.992379723074224e-05, "loss": 1.1469, "step": 1189 }, { "epoch": 0.07, "grad_norm": 2.222705602645874, "learning_rate": 1.9923568169776725e-05, "loss": 1.1837, "step": 1190 }, { "epoch": 0.07, "grad_norm": 1.8997925519943237, "learning_rate": 1.9923338766376593e-05, "loss": 1.1483, "step": 1191 }, { "epoch": 0.07, "grad_norm": 2.17057204246521, "learning_rate": 1.992310902054976e-05, "loss": 1.147, "step": 1192 }, { "epoch": 0.07, "grad_norm": 1.1332062482833862, "learning_rate": 1.9922878932304152e-05, "loss": 0.556, "step": 1193 }, { "epoch": 0.07, "grad_norm": 2.1345150470733643, "learning_rate": 1.9922648501647714e-05, "loss": 1.0373, "step": 1194 }, { "epoch": 0.07, "grad_norm": 2.1468448638916016, "learning_rate": 1.9922417728588394e-05, "loss": 1.086, "step": 1195 }, { "epoch": 0.07, "grad_norm": 2.1684858798980713, "learning_rate": 1.9922186613134152e-05, "loss": 1.1128, "step": 1196 }, { "epoch": 0.07, "grad_norm": 2.2784924507141113, "learning_rate": 1.9921955155292968e-05, "loss": 1.1567, "step": 1197 }, { "epoch": 0.07, "grad_norm": 1.0885435342788696, "learning_rate": 1.992172335507283e-05, "loss": 0.5883, "step": 1198 }, { "epoch": 0.07, "grad_norm": 2.2788376808166504, "learning_rate": 1.992149121248173e-05, "loss": 1.1182, "step": 1199 }, { "epoch": 0.07, "grad_norm": 2.1025798320770264, "learning_rate": 1.9921258727527685e-05, "loss": 1.0308, "step": 1200 }, { "epoch": 0.07, "grad_norm": 2.1366171836853027, "learning_rate": 1.9921025900218715e-05, "loss": 1.1531, "step": 1201 }, { "epoch": 0.07, "grad_norm": 2.092592477798462, "learning_rate": 1.9920792730562853e-05, "loss": 1.116, "step": 1202 }, { "epoch": 0.07, "grad_norm": 2.01593017578125, "learning_rate": 1.992055921856815e-05, "loss": 1.0914, "step": 1203 }, { "epoch": 0.07, "grad_norm": 1.9623132944107056, "learning_rate": 1.9920325364242658e-05, "loss": 1.0876, "step": 1204 }, { "epoch": 0.07, "grad_norm": 2.1434998512268066, "learning_rate": 1.992009116759445e-05, "loss": 1.1394, "step": 1205 }, { "epoch": 0.07, "grad_norm": 2.0466878414154053, "learning_rate": 1.9919856628631606e-05, "loss": 1.0924, "step": 1206 }, { "epoch": 0.07, "grad_norm": 2.0747780799865723, "learning_rate": 1.991962174736222e-05, "loss": 1.1534, "step": 1207 }, { "epoch": 0.07, "grad_norm": 1.9507187604904175, "learning_rate": 1.9919386523794396e-05, "loss": 1.1179, "step": 1208 }, { "epoch": 0.07, "grad_norm": 2.163022994995117, "learning_rate": 1.9919150957936252e-05, "loss": 1.1486, "step": 1209 }, { "epoch": 0.07, "grad_norm": 2.39188814163208, "learning_rate": 1.9918915049795916e-05, "loss": 1.097, "step": 1210 }, { "epoch": 0.07, "grad_norm": 2.0100901126861572, "learning_rate": 1.991867879938153e-05, "loss": 1.1397, "step": 1211 }, { "epoch": 0.07, "grad_norm": 1.9968924522399902, "learning_rate": 1.9918442206701244e-05, "loss": 1.1673, "step": 1212 }, { "epoch": 0.07, "grad_norm": 1.9430325031280518, "learning_rate": 1.9918205271763225e-05, "loss": 1.144, "step": 1213 }, { "epoch": 0.07, "grad_norm": 2.3547229766845703, "learning_rate": 1.9917967994575646e-05, "loss": 1.1018, "step": 1214 }, { "epoch": 0.07, "grad_norm": 2.100503444671631, "learning_rate": 1.9917730375146697e-05, "loss": 1.0594, "step": 1215 }, { "epoch": 0.07, "grad_norm": 1.8590503931045532, "learning_rate": 1.991749241348458e-05, "loss": 1.0954, "step": 1216 }, { "epoch": 0.07, "grad_norm": 2.084115505218506, "learning_rate": 1.9917254109597496e-05, "loss": 1.2, "step": 1217 }, { "epoch": 0.07, "grad_norm": 2.0761280059814453, "learning_rate": 1.991701546349368e-05, "loss": 1.1649, "step": 1218 }, { "epoch": 0.07, "grad_norm": 2.069512128829956, "learning_rate": 1.9916776475181363e-05, "loss": 1.1133, "step": 1219 }, { "epoch": 0.07, "grad_norm": 1.9715789556503296, "learning_rate": 1.991653714466879e-05, "loss": 1.095, "step": 1220 }, { "epoch": 0.07, "grad_norm": 2.4183547496795654, "learning_rate": 1.991629747196422e-05, "loss": 1.14, "step": 1221 }, { "epoch": 0.07, "grad_norm": 2.221222400665283, "learning_rate": 1.9916057457075925e-05, "loss": 1.1237, "step": 1222 }, { "epoch": 0.07, "grad_norm": 2.0392346382141113, "learning_rate": 1.991581710001219e-05, "loss": 1.0845, "step": 1223 }, { "epoch": 0.07, "grad_norm": 2.1710734367370605, "learning_rate": 1.99155764007813e-05, "loss": 1.105, "step": 1224 }, { "epoch": 0.07, "grad_norm": 2.1350514888763428, "learning_rate": 1.991533535939157e-05, "loss": 1.0766, "step": 1225 }, { "epoch": 0.07, "grad_norm": 2.0256454944610596, "learning_rate": 1.9915093975851313e-05, "loss": 1.1516, "step": 1226 }, { "epoch": 0.07, "grad_norm": 2.1307461261749268, "learning_rate": 1.991485225016886e-05, "loss": 1.1361, "step": 1227 }, { "epoch": 0.07, "grad_norm": 2.1100406646728516, "learning_rate": 1.991461018235255e-05, "loss": 1.0608, "step": 1228 }, { "epoch": 0.07, "grad_norm": 1.9077250957489014, "learning_rate": 1.991436777241074e-05, "loss": 1.1867, "step": 1229 }, { "epoch": 0.07, "grad_norm": 3.2169835567474365, "learning_rate": 1.9914125020351794e-05, "loss": 1.1167, "step": 1230 }, { "epoch": 0.07, "grad_norm": 2.226994276046753, "learning_rate": 1.9913881926184084e-05, "loss": 1.1852, "step": 1231 }, { "epoch": 0.07, "grad_norm": 2.021265983581543, "learning_rate": 1.9913638489916006e-05, "loss": 1.1297, "step": 1232 }, { "epoch": 0.07, "grad_norm": 2.1348133087158203, "learning_rate": 1.991339471155595e-05, "loss": 1.065, "step": 1233 }, { "epoch": 0.07, "grad_norm": 2.2522923946380615, "learning_rate": 1.991315059111234e-05, "loss": 1.102, "step": 1234 }, { "epoch": 0.07, "grad_norm": 2.0552234649658203, "learning_rate": 1.9912906128593595e-05, "loss": 1.1832, "step": 1235 }, { "epoch": 0.07, "grad_norm": 2.2361905574798584, "learning_rate": 1.9912661324008147e-05, "loss": 1.1429, "step": 1236 }, { "epoch": 0.07, "grad_norm": 1.079887866973877, "learning_rate": 1.991241617736445e-05, "loss": 0.5342, "step": 1237 }, { "epoch": 0.07, "grad_norm": 2.2332348823547363, "learning_rate": 1.9912170688670956e-05, "loss": 1.2202, "step": 1238 }, { "epoch": 0.07, "grad_norm": 2.3862452507019043, "learning_rate": 1.9911924857936142e-05, "loss": 1.1217, "step": 1239 }, { "epoch": 0.07, "grad_norm": 1.9727429151535034, "learning_rate": 1.9911678685168486e-05, "loss": 1.0248, "step": 1240 }, { "epoch": 0.07, "grad_norm": 2.2376153469085693, "learning_rate": 1.991143217037649e-05, "loss": 1.1876, "step": 1241 }, { "epoch": 0.07, "grad_norm": 2.046086311340332, "learning_rate": 1.9911185313568655e-05, "loss": 1.0718, "step": 1242 }, { "epoch": 0.07, "grad_norm": 2.0690810680389404, "learning_rate": 1.99109381147535e-05, "loss": 1.091, "step": 1243 }, { "epoch": 0.07, "grad_norm": 2.156420946121216, "learning_rate": 1.991069057393956e-05, "loss": 1.1693, "step": 1244 }, { "epoch": 0.07, "grad_norm": 2.3771722316741943, "learning_rate": 1.9910442691135364e-05, "loss": 1.0267, "step": 1245 }, { "epoch": 0.07, "grad_norm": 2.202235460281372, "learning_rate": 1.991019446634948e-05, "loss": 1.1042, "step": 1246 }, { "epoch": 0.07, "grad_norm": 2.227205753326416, "learning_rate": 1.9909945899590468e-05, "loss": 1.1327, "step": 1247 }, { "epoch": 0.07, "grad_norm": 2.118591785430908, "learning_rate": 1.9909696990866903e-05, "loss": 1.1394, "step": 1248 }, { "epoch": 0.07, "grad_norm": 2.1210837364196777, "learning_rate": 1.990944774018738e-05, "loss": 1.0889, "step": 1249 }, { "epoch": 0.07, "grad_norm": 2.3601319789886475, "learning_rate": 1.9909198147560492e-05, "loss": 1.0905, "step": 1250 }, { "epoch": 0.07, "grad_norm": 2.173114061355591, "learning_rate": 1.990894821299486e-05, "loss": 1.133, "step": 1251 }, { "epoch": 0.07, "grad_norm": 2.1293811798095703, "learning_rate": 1.9908697936499105e-05, "loss": 1.0958, "step": 1252 }, { "epoch": 0.07, "grad_norm": 2.268125295639038, "learning_rate": 1.990844731808186e-05, "loss": 1.1005, "step": 1253 }, { "epoch": 0.07, "grad_norm": 2.2594525814056396, "learning_rate": 1.9908196357751778e-05, "loss": 1.1161, "step": 1254 }, { "epoch": 0.07, "grad_norm": 2.474522829055786, "learning_rate": 1.9907945055517517e-05, "loss": 1.1654, "step": 1255 }, { "epoch": 0.07, "grad_norm": 2.132887840270996, "learning_rate": 1.990769341138775e-05, "loss": 1.1022, "step": 1256 }, { "epoch": 0.07, "grad_norm": 2.103699207305908, "learning_rate": 1.9907441425371155e-05, "loss": 1.0682, "step": 1257 }, { "epoch": 0.07, "grad_norm": 1.9943654537200928, "learning_rate": 1.9907189097476434e-05, "loss": 1.1605, "step": 1258 }, { "epoch": 0.07, "grad_norm": 2.103573799133301, "learning_rate": 1.9906936427712295e-05, "loss": 1.0961, "step": 1259 }, { "epoch": 0.07, "grad_norm": 1.9811022281646729, "learning_rate": 1.990668341608745e-05, "loss": 1.1111, "step": 1260 }, { "epoch": 0.07, "grad_norm": 2.086775064468384, "learning_rate": 1.9906430062610634e-05, "loss": 1.0567, "step": 1261 }, { "epoch": 0.07, "grad_norm": 2.085899591445923, "learning_rate": 1.990617636729059e-05, "loss": 1.1824, "step": 1262 }, { "epoch": 0.07, "grad_norm": 2.326277017593384, "learning_rate": 1.990592233013607e-05, "loss": 1.1446, "step": 1263 }, { "epoch": 0.07, "grad_norm": 2.0884342193603516, "learning_rate": 1.9905667951155846e-05, "loss": 1.1348, "step": 1264 }, { "epoch": 0.07, "grad_norm": 1.2065106630325317, "learning_rate": 1.9905413230358687e-05, "loss": 0.5621, "step": 1265 }, { "epoch": 0.07, "grad_norm": 1.869314432144165, "learning_rate": 1.990515816775339e-05, "loss": 1.173, "step": 1266 }, { "epoch": 0.07, "grad_norm": 2.268204689025879, "learning_rate": 1.990490276334875e-05, "loss": 1.1702, "step": 1267 }, { "epoch": 0.07, "grad_norm": 2.2038028240203857, "learning_rate": 1.9904647017153584e-05, "loss": 1.1344, "step": 1268 }, { "epoch": 0.07, "grad_norm": 2.0081677436828613, "learning_rate": 1.9904390929176716e-05, "loss": 1.1108, "step": 1269 }, { "epoch": 0.07, "grad_norm": 2.0541017055511475, "learning_rate": 1.990413449942699e-05, "loss": 1.0601, "step": 1270 }, { "epoch": 0.07, "grad_norm": 2.270810842514038, "learning_rate": 1.9903877727913245e-05, "loss": 1.0704, "step": 1271 }, { "epoch": 0.07, "grad_norm": 2.0008022785186768, "learning_rate": 1.9903620614644344e-05, "loss": 1.106, "step": 1272 }, { "epoch": 0.07, "grad_norm": 1.928165078163147, "learning_rate": 1.990336315962916e-05, "loss": 1.0916, "step": 1273 }, { "epoch": 0.07, "grad_norm": 1.9835573434829712, "learning_rate": 1.990310536287658e-05, "loss": 1.0741, "step": 1274 }, { "epoch": 0.07, "grad_norm": 2.0650382041931152, "learning_rate": 1.9902847224395495e-05, "loss": 1.1363, "step": 1275 }, { "epoch": 0.07, "grad_norm": 1.894583821296692, "learning_rate": 1.9902588744194815e-05, "loss": 1.1038, "step": 1276 }, { "epoch": 0.07, "grad_norm": 1.855492353439331, "learning_rate": 1.990232992228346e-05, "loss": 1.0987, "step": 1277 }, { "epoch": 0.07, "grad_norm": 1.9532442092895508, "learning_rate": 1.9902070758670357e-05, "loss": 1.1315, "step": 1278 }, { "epoch": 0.07, "grad_norm": 2.154186725616455, "learning_rate": 1.9901811253364458e-05, "loss": 1.0325, "step": 1279 }, { "epoch": 0.07, "grad_norm": 1.9850854873657227, "learning_rate": 1.9901551406374707e-05, "loss": 1.0386, "step": 1280 }, { "epoch": 0.07, "grad_norm": 2.3562185764312744, "learning_rate": 1.990129121771008e-05, "loss": 1.0897, "step": 1281 }, { "epoch": 0.07, "grad_norm": 1.9751865863800049, "learning_rate": 1.990103068737955e-05, "loss": 1.0541, "step": 1282 }, { "epoch": 0.07, "grad_norm": 1.1249632835388184, "learning_rate": 1.9900769815392106e-05, "loss": 0.5727, "step": 1283 }, { "epoch": 0.07, "grad_norm": 1.2207763195037842, "learning_rate": 1.9900508601756755e-05, "loss": 0.6802, "step": 1284 }, { "epoch": 0.07, "grad_norm": 2.27069354057312, "learning_rate": 1.9900247046482507e-05, "loss": 1.1251, "step": 1285 }, { "epoch": 0.07, "grad_norm": 2.1679444313049316, "learning_rate": 1.989998514957839e-05, "loss": 1.1064, "step": 1286 }, { "epoch": 0.07, "grad_norm": 2.284734010696411, "learning_rate": 1.9899722911053438e-05, "loss": 1.2097, "step": 1287 }, { "epoch": 0.07, "grad_norm": 1.1041972637176514, "learning_rate": 1.9899460330916706e-05, "loss": 0.6174, "step": 1288 }, { "epoch": 0.07, "grad_norm": 2.0110692977905273, "learning_rate": 1.9899197409177245e-05, "loss": 1.1213, "step": 1289 }, { "epoch": 0.07, "grad_norm": 2.0938215255737305, "learning_rate": 1.989893414584414e-05, "loss": 1.1117, "step": 1290 }, { "epoch": 0.07, "grad_norm": 1.9446513652801514, "learning_rate": 1.9898670540926463e-05, "loss": 1.1011, "step": 1291 }, { "epoch": 0.07, "grad_norm": 2.0141987800598145, "learning_rate": 1.989840659443332e-05, "loss": 1.1537, "step": 1292 }, { "epoch": 0.07, "grad_norm": 1.9681075811386108, "learning_rate": 1.9898142306373816e-05, "loss": 1.0889, "step": 1293 }, { "epoch": 0.07, "grad_norm": 3.014925241470337, "learning_rate": 1.989787767675707e-05, "loss": 1.0744, "step": 1294 }, { "epoch": 0.07, "grad_norm": 1.2276012897491455, "learning_rate": 1.9897612705592215e-05, "loss": 0.6677, "step": 1295 }, { "epoch": 0.07, "grad_norm": 2.2037341594696045, "learning_rate": 1.989734739288839e-05, "loss": 1.1658, "step": 1296 }, { "epoch": 0.07, "grad_norm": 2.159756660461426, "learning_rate": 1.9897081738654754e-05, "loss": 1.0833, "step": 1297 }, { "epoch": 0.07, "grad_norm": 2.310328483581543, "learning_rate": 1.989681574290048e-05, "loss": 1.1384, "step": 1298 }, { "epoch": 0.07, "grad_norm": 1.9768158197402954, "learning_rate": 1.989654940563473e-05, "loss": 1.2013, "step": 1299 }, { "epoch": 0.07, "grad_norm": 1.9453753232955933, "learning_rate": 1.9896282726866713e-05, "loss": 1.0661, "step": 1300 }, { "epoch": 0.07, "grad_norm": 2.080828905105591, "learning_rate": 1.989601570660562e-05, "loss": 1.0526, "step": 1301 }, { "epoch": 0.07, "grad_norm": 1.1605697870254517, "learning_rate": 1.9895748344860667e-05, "loss": 0.6008, "step": 1302 }, { "epoch": 0.07, "grad_norm": 2.5316574573516846, "learning_rate": 1.9895480641641086e-05, "loss": 1.1312, "step": 1303 }, { "epoch": 0.07, "grad_norm": 2.1274194717407227, "learning_rate": 1.9895212596956104e-05, "loss": 1.047, "step": 1304 }, { "epoch": 0.07, "grad_norm": 2.2341084480285645, "learning_rate": 1.989494421081498e-05, "loss": 1.08, "step": 1305 }, { "epoch": 0.07, "grad_norm": 2.1698412895202637, "learning_rate": 1.989467548322697e-05, "loss": 1.1666, "step": 1306 }, { "epoch": 0.07, "grad_norm": 1.1324745416641235, "learning_rate": 1.989440641420135e-05, "loss": 0.5499, "step": 1307 }, { "epoch": 0.08, "grad_norm": 2.3786816596984863, "learning_rate": 1.9894137003747404e-05, "loss": 1.1462, "step": 1308 }, { "epoch": 0.08, "grad_norm": 2.2287256717681885, "learning_rate": 1.9893867251874428e-05, "loss": 1.1352, "step": 1309 }, { "epoch": 0.08, "grad_norm": 2.190324544906616, "learning_rate": 1.9893597158591728e-05, "loss": 1.0935, "step": 1310 }, { "epoch": 0.08, "grad_norm": 2.148191452026367, "learning_rate": 1.9893326723908634e-05, "loss": 1.1491, "step": 1311 }, { "epoch": 0.08, "grad_norm": 2.108069658279419, "learning_rate": 1.9893055947834464e-05, "loss": 1.1705, "step": 1312 }, { "epoch": 0.08, "grad_norm": 2.154458999633789, "learning_rate": 1.989278483037857e-05, "loss": 1.0735, "step": 1313 }, { "epoch": 0.08, "grad_norm": 2.1555721759796143, "learning_rate": 1.9892513371550303e-05, "loss": 1.1025, "step": 1314 }, { "epoch": 0.08, "grad_norm": 2.0585250854492188, "learning_rate": 1.9892241571359035e-05, "loss": 1.103, "step": 1315 }, { "epoch": 0.08, "grad_norm": 1.9872242212295532, "learning_rate": 1.9891969429814147e-05, "loss": 1.1643, "step": 1316 }, { "epoch": 0.08, "grad_norm": 2.19966459274292, "learning_rate": 1.9891696946925024e-05, "loss": 1.0738, "step": 1317 }, { "epoch": 0.08, "grad_norm": 1.0834670066833496, "learning_rate": 1.9891424122701067e-05, "loss": 0.5802, "step": 1318 }, { "epoch": 0.08, "grad_norm": 2.0283498764038086, "learning_rate": 1.9891150957151696e-05, "loss": 1.1474, "step": 1319 }, { "epoch": 0.08, "grad_norm": 2.191458225250244, "learning_rate": 1.989087745028634e-05, "loss": 1.143, "step": 1320 }, { "epoch": 0.08, "grad_norm": 2.289520740509033, "learning_rate": 1.9890603602114428e-05, "loss": 1.0767, "step": 1321 }, { "epoch": 0.08, "grad_norm": 2.1580259799957275, "learning_rate": 1.9890329412645417e-05, "loss": 1.2069, "step": 1322 }, { "epoch": 0.08, "grad_norm": 2.199875593185425, "learning_rate": 1.9890054881888758e-05, "loss": 1.138, "step": 1323 }, { "epoch": 0.08, "grad_norm": 1.0213574171066284, "learning_rate": 1.988978000985394e-05, "loss": 0.5916, "step": 1324 }, { "epoch": 0.08, "grad_norm": 2.097529888153076, "learning_rate": 1.9889504796550435e-05, "loss": 1.0179, "step": 1325 }, { "epoch": 0.08, "grad_norm": 2.137336492538452, "learning_rate": 1.9889229241987747e-05, "loss": 1.1846, "step": 1326 }, { "epoch": 0.08, "grad_norm": 2.064444065093994, "learning_rate": 1.9888953346175383e-05, "loss": 1.1054, "step": 1327 }, { "epoch": 0.08, "grad_norm": 2.009007692337036, "learning_rate": 1.988867710912286e-05, "loss": 1.1348, "step": 1328 }, { "epoch": 0.08, "grad_norm": 1.9943597316741943, "learning_rate": 1.9888400530839713e-05, "loss": 1.1011, "step": 1329 }, { "epoch": 0.08, "grad_norm": 2.055872917175293, "learning_rate": 1.988812361133549e-05, "loss": 1.0661, "step": 1330 }, { "epoch": 0.08, "grad_norm": 2.2273976802825928, "learning_rate": 1.9887846350619736e-05, "loss": 1.1775, "step": 1331 }, { "epoch": 0.08, "grad_norm": 2.1020774841308594, "learning_rate": 1.9887568748702032e-05, "loss": 1.1429, "step": 1332 }, { "epoch": 0.08, "grad_norm": 1.1669255495071411, "learning_rate": 1.9887290805591946e-05, "loss": 0.6053, "step": 1333 }, { "epoch": 0.08, "grad_norm": 2.249624013900757, "learning_rate": 1.9887012521299072e-05, "loss": 1.158, "step": 1334 }, { "epoch": 0.08, "grad_norm": 2.042140483856201, "learning_rate": 1.9886733895833017e-05, "loss": 1.1374, "step": 1335 }, { "epoch": 0.08, "grad_norm": 2.120642900466919, "learning_rate": 1.9886454929203394e-05, "loss": 1.0874, "step": 1336 }, { "epoch": 0.08, "grad_norm": 2.055746555328369, "learning_rate": 1.9886175621419824e-05, "loss": 1.0788, "step": 1337 }, { "epoch": 0.08, "grad_norm": 2.217156171798706, "learning_rate": 1.9885895972491952e-05, "loss": 1.0792, "step": 1338 }, { "epoch": 0.08, "grad_norm": 1.9077961444854736, "learning_rate": 1.9885615982429425e-05, "loss": 1.0627, "step": 1339 }, { "epoch": 0.08, "grad_norm": 2.0082945823669434, "learning_rate": 1.9885335651241905e-05, "loss": 1.1286, "step": 1340 }, { "epoch": 0.08, "grad_norm": 1.9954739809036255, "learning_rate": 1.9885054978939062e-05, "loss": 1.0454, "step": 1341 }, { "epoch": 0.08, "grad_norm": 1.9854028224945068, "learning_rate": 1.988477396553059e-05, "loss": 1.1135, "step": 1342 }, { "epoch": 0.08, "grad_norm": 2.302316188812256, "learning_rate": 1.9884492611026177e-05, "loss": 1.1539, "step": 1343 }, { "epoch": 0.08, "grad_norm": 2.038797616958618, "learning_rate": 1.9884210915435536e-05, "loss": 1.1108, "step": 1344 }, { "epoch": 0.08, "grad_norm": 2.0623676776885986, "learning_rate": 1.9883928878768386e-05, "loss": 1.1101, "step": 1345 }, { "epoch": 0.08, "grad_norm": 1.9175207614898682, "learning_rate": 1.988364650103446e-05, "loss": 1.0681, "step": 1346 }, { "epoch": 0.08, "grad_norm": 2.099000930786133, "learning_rate": 1.98833637822435e-05, "loss": 1.1031, "step": 1347 }, { "epoch": 0.08, "grad_norm": 1.9874680042266846, "learning_rate": 1.988308072240527e-05, "loss": 1.0718, "step": 1348 }, { "epoch": 0.08, "grad_norm": 2.3376691341400146, "learning_rate": 1.9882797321529526e-05, "loss": 1.048, "step": 1349 }, { "epoch": 0.08, "grad_norm": 2.420262336730957, "learning_rate": 1.9882513579626056e-05, "loss": 1.1714, "step": 1350 }, { "epoch": 0.08, "grad_norm": 1.8753174543380737, "learning_rate": 1.9882229496704647e-05, "loss": 1.0122, "step": 1351 }, { "epoch": 0.08, "grad_norm": 2.2582755088806152, "learning_rate": 1.9881945072775106e-05, "loss": 1.1672, "step": 1352 }, { "epoch": 0.08, "grad_norm": 2.077871561050415, "learning_rate": 1.988166030784724e-05, "loss": 1.1688, "step": 1353 }, { "epoch": 0.08, "grad_norm": 2.053107261657715, "learning_rate": 1.988137520193088e-05, "loss": 1.0456, "step": 1354 }, { "epoch": 0.08, "grad_norm": 2.0064969062805176, "learning_rate": 1.9881089755035864e-05, "loss": 1.1126, "step": 1355 }, { "epoch": 0.08, "grad_norm": 2.039945363998413, "learning_rate": 1.9880803967172048e-05, "loss": 1.1581, "step": 1356 }, { "epoch": 0.08, "grad_norm": 2.1099343299865723, "learning_rate": 1.988051783834928e-05, "loss": 1.0637, "step": 1357 }, { "epoch": 0.08, "grad_norm": 2.2105233669281006, "learning_rate": 1.988023136857745e-05, "loss": 1.1763, "step": 1358 }, { "epoch": 0.08, "grad_norm": 2.122349262237549, "learning_rate": 1.987994455786643e-05, "loss": 1.0911, "step": 1359 }, { "epoch": 0.08, "grad_norm": 2.1639063358306885, "learning_rate": 1.9879657406226123e-05, "loss": 1.1139, "step": 1360 }, { "epoch": 0.08, "grad_norm": 2.036627769470215, "learning_rate": 1.9879369913666434e-05, "loss": 1.1323, "step": 1361 }, { "epoch": 0.08, "grad_norm": 2.110201835632324, "learning_rate": 1.9879082080197288e-05, "loss": 1.1668, "step": 1362 }, { "epoch": 0.08, "grad_norm": 2.3206636905670166, "learning_rate": 1.987879390582862e-05, "loss": 1.039, "step": 1363 }, { "epoch": 0.08, "grad_norm": 2.0441465377807617, "learning_rate": 1.987850539057036e-05, "loss": 1.0845, "step": 1364 }, { "epoch": 0.08, "grad_norm": 2.022278070449829, "learning_rate": 1.987821653443248e-05, "loss": 1.0997, "step": 1365 }, { "epoch": 0.08, "grad_norm": 2.1065540313720703, "learning_rate": 1.987792733742494e-05, "loss": 1.0949, "step": 1366 }, { "epoch": 0.08, "grad_norm": 2.2054505348205566, "learning_rate": 1.987763779955772e-05, "loss": 1.0267, "step": 1367 }, { "epoch": 0.08, "grad_norm": 2.15751576423645, "learning_rate": 1.9877347920840812e-05, "loss": 1.0617, "step": 1368 }, { "epoch": 0.08, "grad_norm": 2.1031882762908936, "learning_rate": 1.9877057701284217e-05, "loss": 1.1658, "step": 1369 }, { "epoch": 0.08, "grad_norm": 1.1594465970993042, "learning_rate": 1.9876767140897953e-05, "loss": 0.5538, "step": 1370 }, { "epoch": 0.08, "grad_norm": 2.0366275310516357, "learning_rate": 1.9876476239692045e-05, "loss": 1.0723, "step": 1371 }, { "epoch": 0.08, "grad_norm": 1.989363670349121, "learning_rate": 1.987618499767653e-05, "loss": 1.1171, "step": 1372 }, { "epoch": 0.08, "grad_norm": 2.2282488346099854, "learning_rate": 1.987589341486146e-05, "loss": 1.1912, "step": 1373 }, { "epoch": 0.08, "grad_norm": 2.123347043991089, "learning_rate": 1.9875601491256893e-05, "loss": 1.117, "step": 1374 }, { "epoch": 0.08, "grad_norm": 1.9939744472503662, "learning_rate": 1.9875309226872907e-05, "loss": 1.109, "step": 1375 }, { "epoch": 0.08, "grad_norm": 1.9738434553146362, "learning_rate": 1.9875016621719584e-05, "loss": 1.1615, "step": 1376 }, { "epoch": 0.08, "grad_norm": 2.1679413318634033, "learning_rate": 1.987472367580702e-05, "loss": 1.151, "step": 1377 }, { "epoch": 0.08, "grad_norm": 2.0676372051239014, "learning_rate": 1.987443038914533e-05, "loss": 1.1471, "step": 1378 }, { "epoch": 0.08, "grad_norm": 2.3361377716064453, "learning_rate": 1.987413676174463e-05, "loss": 1.0903, "step": 1379 }, { "epoch": 0.08, "grad_norm": 2.0200390815734863, "learning_rate": 1.987384279361505e-05, "loss": 1.0676, "step": 1380 }, { "epoch": 0.08, "grad_norm": 2.272378444671631, "learning_rate": 1.9873548484766737e-05, "loss": 1.288, "step": 1381 }, { "epoch": 0.08, "grad_norm": 2.0421066284179688, "learning_rate": 1.9873253835209848e-05, "loss": 1.0762, "step": 1382 }, { "epoch": 0.08, "grad_norm": 1.2948830127716064, "learning_rate": 1.9872958844954548e-05, "loss": 0.575, "step": 1383 }, { "epoch": 0.08, "grad_norm": 2.772346258163452, "learning_rate": 1.9872663514011016e-05, "loss": 1.1606, "step": 1384 }, { "epoch": 0.08, "grad_norm": 2.1205649375915527, "learning_rate": 1.9872367842389448e-05, "loss": 1.1125, "step": 1385 }, { "epoch": 0.08, "grad_norm": 2.0386877059936523, "learning_rate": 1.987207183010004e-05, "loss": 1.1183, "step": 1386 }, { "epoch": 0.08, "grad_norm": 2.187908411026001, "learning_rate": 1.9871775477153012e-05, "loss": 1.0917, "step": 1387 }, { "epoch": 0.08, "grad_norm": 2.3645827770233154, "learning_rate": 1.9871478783558586e-05, "loss": 1.0845, "step": 1388 }, { "epoch": 0.08, "grad_norm": 2.4528961181640625, "learning_rate": 1.9871181749327e-05, "loss": 1.1272, "step": 1389 }, { "epoch": 0.08, "grad_norm": 2.261669397354126, "learning_rate": 1.9870884374468512e-05, "loss": 1.0338, "step": 1390 }, { "epoch": 0.08, "grad_norm": 2.2685914039611816, "learning_rate": 1.9870586658993375e-05, "loss": 1.1373, "step": 1391 }, { "epoch": 0.08, "grad_norm": 1.9602514505386353, "learning_rate": 1.987028860291186e-05, "loss": 1.1259, "step": 1392 }, { "epoch": 0.08, "grad_norm": 1.986748218536377, "learning_rate": 1.986999020623426e-05, "loss": 1.1957, "step": 1393 }, { "epoch": 0.08, "grad_norm": 2.138457775115967, "learning_rate": 1.9869691468970873e-05, "loss": 1.1411, "step": 1394 }, { "epoch": 0.08, "grad_norm": 2.100825309753418, "learning_rate": 1.9869392391132e-05, "loss": 1.0838, "step": 1395 }, { "epoch": 0.08, "grad_norm": 2.173616409301758, "learning_rate": 1.986909297272796e-05, "loss": 1.03, "step": 1396 }, { "epoch": 0.08, "grad_norm": 2.161248207092285, "learning_rate": 1.9868793213769096e-05, "loss": 1.1236, "step": 1397 }, { "epoch": 0.08, "grad_norm": 2.2289650440216064, "learning_rate": 1.9868493114265743e-05, "loss": 1.1663, "step": 1398 }, { "epoch": 0.08, "grad_norm": 2.0525355339050293, "learning_rate": 1.986819267422826e-05, "loss": 1.0905, "step": 1399 }, { "epoch": 0.08, "grad_norm": 2.002760648727417, "learning_rate": 1.9867891893667012e-05, "loss": 1.0849, "step": 1400 }, { "epoch": 0.08, "grad_norm": 2.117975950241089, "learning_rate": 1.9867590772592384e-05, "loss": 1.1182, "step": 1401 }, { "epoch": 0.08, "grad_norm": 2.1687686443328857, "learning_rate": 1.9867289311014756e-05, "loss": 1.0956, "step": 1402 }, { "epoch": 0.08, "grad_norm": 2.150573253631592, "learning_rate": 1.986698750894454e-05, "loss": 1.1856, "step": 1403 }, { "epoch": 0.08, "grad_norm": 1.9842984676361084, "learning_rate": 1.9866685366392148e-05, "loss": 1.0706, "step": 1404 }, { "epoch": 0.08, "grad_norm": 1.9404444694519043, "learning_rate": 1.9866382883368005e-05, "loss": 1.2065, "step": 1405 }, { "epoch": 0.08, "grad_norm": 2.1583411693573, "learning_rate": 1.9866080059882547e-05, "loss": 1.139, "step": 1406 }, { "epoch": 0.08, "grad_norm": 1.3189854621887207, "learning_rate": 1.986577689594623e-05, "loss": 0.6255, "step": 1407 }, { "epoch": 0.08, "grad_norm": 2.076507806777954, "learning_rate": 1.986547339156951e-05, "loss": 1.1835, "step": 1408 }, { "epoch": 0.08, "grad_norm": 2.1921162605285645, "learning_rate": 1.986516954676286e-05, "loss": 1.1611, "step": 1409 }, { "epoch": 0.08, "grad_norm": 2.1837761402130127, "learning_rate": 1.986486536153677e-05, "loss": 1.0383, "step": 1410 }, { "epoch": 0.08, "grad_norm": 2.0888397693634033, "learning_rate": 1.9864560835901728e-05, "loss": 1.0853, "step": 1411 }, { "epoch": 0.08, "grad_norm": 1.9503192901611328, "learning_rate": 1.9864255969868248e-05, "loss": 1.0118, "step": 1412 }, { "epoch": 0.08, "grad_norm": 1.9684503078460693, "learning_rate": 1.986395076344685e-05, "loss": 1.0846, "step": 1413 }, { "epoch": 0.08, "grad_norm": 1.9836734533309937, "learning_rate": 1.9863645216648067e-05, "loss": 1.0429, "step": 1414 }, { "epoch": 0.08, "grad_norm": 2.076437473297119, "learning_rate": 1.9863339329482437e-05, "loss": 1.1452, "step": 1415 }, { "epoch": 0.08, "grad_norm": 1.7226722240447998, "learning_rate": 1.986303310196052e-05, "loss": 1.0353, "step": 1416 }, { "epoch": 0.08, "grad_norm": 2.0731446743011475, "learning_rate": 1.9862726534092884e-05, "loss": 1.1249, "step": 1417 }, { "epoch": 0.08, "grad_norm": 1.060102105140686, "learning_rate": 1.9862419625890104e-05, "loss": 0.5784, "step": 1418 }, { "epoch": 0.08, "grad_norm": 2.0847055912017822, "learning_rate": 1.9862112377362773e-05, "loss": 1.1337, "step": 1419 }, { "epoch": 0.08, "grad_norm": 2.1206746101379395, "learning_rate": 1.986180478852149e-05, "loss": 1.2428, "step": 1420 }, { "epoch": 0.08, "grad_norm": 2.3331515789031982, "learning_rate": 1.9861496859376876e-05, "loss": 1.2532, "step": 1421 }, { "epoch": 0.08, "grad_norm": 2.1501359939575195, "learning_rate": 1.9861188589939548e-05, "loss": 1.0665, "step": 1422 }, { "epoch": 0.08, "grad_norm": 2.265876531600952, "learning_rate": 1.9860879980220148e-05, "loss": 1.0789, "step": 1423 }, { "epoch": 0.08, "grad_norm": 2.1215591430664062, "learning_rate": 1.9860571030229327e-05, "loss": 1.051, "step": 1424 }, { "epoch": 0.08, "grad_norm": 2.048396348953247, "learning_rate": 1.9860261739977748e-05, "loss": 1.1216, "step": 1425 }, { "epoch": 0.08, "grad_norm": 2.0101122856140137, "learning_rate": 1.9859952109476076e-05, "loss": 1.2047, "step": 1426 }, { "epoch": 0.08, "grad_norm": 1.0449963808059692, "learning_rate": 1.9859642138734995e-05, "loss": 0.5079, "step": 1427 }, { "epoch": 0.08, "grad_norm": 2.464859962463379, "learning_rate": 1.9859331827765214e-05, "loss": 1.1511, "step": 1428 }, { "epoch": 0.08, "grad_norm": 1.9723495244979858, "learning_rate": 1.9859021176577426e-05, "loss": 1.0401, "step": 1429 }, { "epoch": 0.08, "grad_norm": 2.02600359916687, "learning_rate": 1.985871018518236e-05, "loss": 1.1363, "step": 1430 }, { "epoch": 0.08, "grad_norm": 1.9769604206085205, "learning_rate": 1.9858398853590745e-05, "loss": 1.1085, "step": 1431 }, { "epoch": 0.08, "grad_norm": 2.1852800846099854, "learning_rate": 1.9858087181813325e-05, "loss": 1.1026, "step": 1432 }, { "epoch": 0.08, "grad_norm": 2.135044813156128, "learning_rate": 1.985777516986085e-05, "loss": 1.0805, "step": 1433 }, { "epoch": 0.08, "grad_norm": 2.150658130645752, "learning_rate": 1.9857462817744093e-05, "loss": 0.9837, "step": 1434 }, { "epoch": 0.08, "grad_norm": 1.1276448965072632, "learning_rate": 1.9857150125473826e-05, "loss": 0.545, "step": 1435 }, { "epoch": 0.08, "grad_norm": 1.1883535385131836, "learning_rate": 1.985683709306085e-05, "loss": 0.633, "step": 1436 }, { "epoch": 0.08, "grad_norm": 2.2820518016815186, "learning_rate": 1.9856523720515954e-05, "loss": 1.1596, "step": 1437 }, { "epoch": 0.08, "grad_norm": 2.283904790878296, "learning_rate": 1.9856210007849965e-05, "loss": 1.1307, "step": 1438 }, { "epoch": 0.08, "grad_norm": 2.0505027770996094, "learning_rate": 1.985589595507369e-05, "loss": 1.141, "step": 1439 }, { "epoch": 0.08, "grad_norm": 2.033250331878662, "learning_rate": 1.9855581562197985e-05, "loss": 1.0985, "step": 1440 }, { "epoch": 0.08, "grad_norm": 1.9846283197402954, "learning_rate": 1.9855266829233688e-05, "loss": 1.0485, "step": 1441 }, { "epoch": 0.08, "grad_norm": 2.2820816040039062, "learning_rate": 1.9854951756191664e-05, "loss": 1.0767, "step": 1442 }, { "epoch": 0.08, "grad_norm": 2.033313751220703, "learning_rate": 1.9854636343082784e-05, "loss": 1.1953, "step": 1443 }, { "epoch": 0.08, "grad_norm": 1.9995808601379395, "learning_rate": 1.9854320589917928e-05, "loss": 1.0346, "step": 1444 }, { "epoch": 0.08, "grad_norm": 2.438791036605835, "learning_rate": 1.9854004496708e-05, "loss": 1.1486, "step": 1445 }, { "epoch": 0.08, "grad_norm": 1.9010323286056519, "learning_rate": 1.9853688063463896e-05, "loss": 1.0793, "step": 1446 }, { "epoch": 0.08, "grad_norm": 2.157346248626709, "learning_rate": 1.9853371290196547e-05, "loss": 1.0036, "step": 1447 }, { "epoch": 0.08, "grad_norm": 2.060274600982666, "learning_rate": 1.9853054176916877e-05, "loss": 1.0971, "step": 1448 }, { "epoch": 0.08, "grad_norm": 2.134073495864868, "learning_rate": 1.985273672363583e-05, "loss": 1.0651, "step": 1449 }, { "epoch": 0.08, "grad_norm": 1.9882314205169678, "learning_rate": 1.9852418930364366e-05, "loss": 1.0077, "step": 1450 }, { "epoch": 0.08, "grad_norm": 2.0374741554260254, "learning_rate": 1.9852100797113443e-05, "loss": 1.0779, "step": 1451 }, { "epoch": 0.08, "grad_norm": 2.1281180381774902, "learning_rate": 1.985178232389404e-05, "loss": 1.0282, "step": 1452 }, { "epoch": 0.08, "grad_norm": 2.0943431854248047, "learning_rate": 1.9851463510717154e-05, "loss": 1.0647, "step": 1453 }, { "epoch": 0.08, "grad_norm": 2.0908193588256836, "learning_rate": 1.985114435759378e-05, "loss": 1.1352, "step": 1454 }, { "epoch": 0.08, "grad_norm": 1.9162050485610962, "learning_rate": 1.985082486453493e-05, "loss": 1.0421, "step": 1455 }, { "epoch": 0.08, "grad_norm": 2.172769784927368, "learning_rate": 1.985050503155163e-05, "loss": 1.1203, "step": 1456 }, { "epoch": 0.08, "grad_norm": 2.0628693103790283, "learning_rate": 1.985018485865492e-05, "loss": 1.0958, "step": 1457 }, { "epoch": 0.08, "grad_norm": 2.1457321643829346, "learning_rate": 1.9849864345855847e-05, "loss": 1.1264, "step": 1458 }, { "epoch": 0.08, "grad_norm": 2.002023696899414, "learning_rate": 1.984954349316547e-05, "loss": 1.0712, "step": 1459 }, { "epoch": 0.08, "grad_norm": 1.8922641277313232, "learning_rate": 1.984922230059486e-05, "loss": 1.0871, "step": 1460 }, { "epoch": 0.08, "grad_norm": 1.9675027132034302, "learning_rate": 1.98489007681551e-05, "loss": 1.1516, "step": 1461 }, { "epoch": 0.08, "grad_norm": 2.0231220722198486, "learning_rate": 1.9848578895857286e-05, "loss": 1.0958, "step": 1462 }, { "epoch": 0.08, "grad_norm": 2.0484676361083984, "learning_rate": 1.9848256683712524e-05, "loss": 1.1855, "step": 1463 }, { "epoch": 0.08, "grad_norm": 2.0738344192504883, "learning_rate": 1.984793413173194e-05, "loss": 1.0251, "step": 1464 }, { "epoch": 0.08, "grad_norm": 2.0766384601593018, "learning_rate": 1.984761123992665e-05, "loss": 1.0946, "step": 1465 }, { "epoch": 0.08, "grad_norm": 2.0351288318634033, "learning_rate": 1.984728800830781e-05, "loss": 1.0554, "step": 1466 }, { "epoch": 0.08, "grad_norm": 2.038163900375366, "learning_rate": 1.9846964436886567e-05, "loss": 1.0246, "step": 1467 }, { "epoch": 0.08, "grad_norm": 1.9563249349594116, "learning_rate": 1.9846640525674085e-05, "loss": 1.097, "step": 1468 }, { "epoch": 0.08, "grad_norm": 1.8876897096633911, "learning_rate": 1.9846316274681547e-05, "loss": 1.0876, "step": 1469 }, { "epoch": 0.08, "grad_norm": 1.9763652086257935, "learning_rate": 1.9845991683920136e-05, "loss": 1.149, "step": 1470 }, { "epoch": 0.08, "grad_norm": 2.1362626552581787, "learning_rate": 1.9845666753401057e-05, "loss": 1.0093, "step": 1471 }, { "epoch": 0.08, "grad_norm": 2.1086034774780273, "learning_rate": 1.984534148313552e-05, "loss": 1.0959, "step": 1472 }, { "epoch": 0.08, "grad_norm": 2.030452251434326, "learning_rate": 1.9845015873134754e-05, "loss": 1.067, "step": 1473 }, { "epoch": 0.08, "grad_norm": 2.197798728942871, "learning_rate": 1.984468992340999e-05, "loss": 1.075, "step": 1474 }, { "epoch": 0.08, "grad_norm": 1.923909068107605, "learning_rate": 1.9844363633972477e-05, "loss": 1.1272, "step": 1475 }, { "epoch": 0.08, "grad_norm": 2.0921998023986816, "learning_rate": 1.984403700483347e-05, "loss": 1.149, "step": 1476 }, { "epoch": 0.08, "grad_norm": 1.9096180200576782, "learning_rate": 1.984371003600425e-05, "loss": 1.1273, "step": 1477 }, { "epoch": 0.08, "grad_norm": 1.9657894372940063, "learning_rate": 1.984338272749609e-05, "loss": 1.0852, "step": 1478 }, { "epoch": 0.08, "grad_norm": 2.162951946258545, "learning_rate": 1.9843055079320292e-05, "loss": 1.0699, "step": 1479 }, { "epoch": 0.08, "grad_norm": 2.099306344985962, "learning_rate": 1.9842727091488153e-05, "loss": 1.1107, "step": 1480 }, { "epoch": 0.08, "grad_norm": 2.193556547164917, "learning_rate": 1.9842398764011e-05, "loss": 1.1818, "step": 1481 }, { "epoch": 0.08, "grad_norm": 2.0320000648498535, "learning_rate": 1.9842070096900158e-05, "loss": 1.0553, "step": 1482 }, { "epoch": 0.09, "grad_norm": 2.025590658187866, "learning_rate": 1.984174109016697e-05, "loss": 1.1743, "step": 1483 }, { "epoch": 0.09, "grad_norm": 2.200209856033325, "learning_rate": 1.9841411743822792e-05, "loss": 1.1698, "step": 1484 }, { "epoch": 0.09, "grad_norm": 1.9037857055664062, "learning_rate": 1.984108205787898e-05, "loss": 1.0508, "step": 1485 }, { "epoch": 0.09, "grad_norm": 1.9431012868881226, "learning_rate": 1.984075203234692e-05, "loss": 1.0926, "step": 1486 }, { "epoch": 0.09, "grad_norm": 2.3005175590515137, "learning_rate": 1.9840421667237998e-05, "loss": 1.0724, "step": 1487 }, { "epoch": 0.09, "grad_norm": 1.7720915079116821, "learning_rate": 1.984009096256361e-05, "loss": 0.9589, "step": 1488 }, { "epoch": 0.09, "grad_norm": 1.0283432006835938, "learning_rate": 1.9839759918335168e-05, "loss": 0.5142, "step": 1489 }, { "epoch": 0.09, "grad_norm": 1.9947141408920288, "learning_rate": 1.98394285345641e-05, "loss": 1.1564, "step": 1490 }, { "epoch": 0.09, "grad_norm": 2.241518974304199, "learning_rate": 1.9839096811261838e-05, "loss": 1.121, "step": 1491 }, { "epoch": 0.09, "grad_norm": 1.1695207357406616, "learning_rate": 1.983876474843983e-05, "loss": 0.5971, "step": 1492 }, { "epoch": 0.09, "grad_norm": 2.0368034839630127, "learning_rate": 1.983843234610953e-05, "loss": 1.0841, "step": 1493 }, { "epoch": 0.09, "grad_norm": 2.2994043827056885, "learning_rate": 1.9838099604282418e-05, "loss": 1.1654, "step": 1494 }, { "epoch": 0.09, "grad_norm": 2.300075054168701, "learning_rate": 1.983776652296997e-05, "loss": 1.1359, "step": 1495 }, { "epoch": 0.09, "grad_norm": 2.1157824993133545, "learning_rate": 1.9837433102183677e-05, "loss": 1.0804, "step": 1496 }, { "epoch": 0.09, "grad_norm": 1.9550588130950928, "learning_rate": 1.9837099341935052e-05, "loss": 0.9942, "step": 1497 }, { "epoch": 0.09, "grad_norm": 2.383256673812866, "learning_rate": 1.9836765242235604e-05, "loss": 1.1689, "step": 1498 }, { "epoch": 0.09, "grad_norm": 1.9425299167633057, "learning_rate": 1.9836430803096865e-05, "loss": 1.0567, "step": 1499 }, { "epoch": 0.09, "grad_norm": 1.98634672164917, "learning_rate": 1.9836096024530373e-05, "loss": 1.0626, "step": 1500 }, { "epoch": 0.09, "grad_norm": 2.000257968902588, "learning_rate": 1.983576090654769e-05, "loss": 1.0638, "step": 1501 }, { "epoch": 0.09, "grad_norm": 2.091320037841797, "learning_rate": 1.9835425449160367e-05, "loss": 1.0527, "step": 1502 }, { "epoch": 0.09, "grad_norm": 2.2507221698760986, "learning_rate": 1.983508965237999e-05, "loss": 1.127, "step": 1503 }, { "epoch": 0.09, "grad_norm": 2.130293130874634, "learning_rate": 1.9834753516218138e-05, "loss": 1.0692, "step": 1504 }, { "epoch": 0.09, "grad_norm": 2.126779317855835, "learning_rate": 1.983441704068642e-05, "loss": 1.1485, "step": 1505 }, { "epoch": 0.09, "grad_norm": 2.020146369934082, "learning_rate": 1.9834080225796438e-05, "loss": 1.1033, "step": 1506 }, { "epoch": 0.09, "grad_norm": 2.2400009632110596, "learning_rate": 1.983374307155982e-05, "loss": 1.063, "step": 1507 }, { "epoch": 0.09, "grad_norm": 2.0688021183013916, "learning_rate": 1.9833405577988198e-05, "loss": 1.0404, "step": 1508 }, { "epoch": 0.09, "grad_norm": 1.9649642705917358, "learning_rate": 1.9833067745093214e-05, "loss": 1.0055, "step": 1509 }, { "epoch": 0.09, "grad_norm": 1.9329557418823242, "learning_rate": 1.9832729572886533e-05, "loss": 1.0667, "step": 1510 }, { "epoch": 0.09, "grad_norm": 2.0614495277404785, "learning_rate": 1.983239106137982e-05, "loss": 1.1431, "step": 1511 }, { "epoch": 0.09, "grad_norm": 2.2273495197296143, "learning_rate": 1.983205221058476e-05, "loss": 1.1212, "step": 1512 }, { "epoch": 0.09, "grad_norm": 1.9913157224655151, "learning_rate": 1.9831713020513038e-05, "loss": 1.0755, "step": 1513 }, { "epoch": 0.09, "grad_norm": 2.638934373855591, "learning_rate": 1.9831373491176365e-05, "loss": 1.124, "step": 1514 }, { "epoch": 0.09, "grad_norm": 2.0530943870544434, "learning_rate": 1.983103362258646e-05, "loss": 1.1661, "step": 1515 }, { "epoch": 0.09, "grad_norm": 1.8650323152542114, "learning_rate": 1.983069341475504e-05, "loss": 1.1003, "step": 1516 }, { "epoch": 0.09, "grad_norm": 2.0902040004730225, "learning_rate": 1.9830352867693854e-05, "loss": 1.1327, "step": 1517 }, { "epoch": 0.09, "grad_norm": 1.889133334159851, "learning_rate": 1.983001198141465e-05, "loss": 1.1197, "step": 1518 }, { "epoch": 0.09, "grad_norm": 1.9659128189086914, "learning_rate": 1.9829670755929196e-05, "loss": 1.1055, "step": 1519 }, { "epoch": 0.09, "grad_norm": 2.1931116580963135, "learning_rate": 1.9829329191249254e-05, "loss": 1.086, "step": 1520 }, { "epoch": 0.09, "grad_norm": 2.1118412017822266, "learning_rate": 1.9828987287386624e-05, "loss": 1.0795, "step": 1521 }, { "epoch": 0.09, "grad_norm": 2.164911985397339, "learning_rate": 1.98286450443531e-05, "loss": 1.1465, "step": 1522 }, { "epoch": 0.09, "grad_norm": 2.487156391143799, "learning_rate": 1.982830246216049e-05, "loss": 1.0907, "step": 1523 }, { "epoch": 0.09, "grad_norm": 2.1853439807891846, "learning_rate": 1.9827959540820615e-05, "loss": 1.1219, "step": 1524 }, { "epoch": 0.09, "grad_norm": 2.068653106689453, "learning_rate": 1.982761628034531e-05, "loss": 1.1896, "step": 1525 }, { "epoch": 0.09, "grad_norm": 2.0271356105804443, "learning_rate": 1.982727268074642e-05, "loss": 1.1053, "step": 1526 }, { "epoch": 0.09, "grad_norm": 2.1470835208892822, "learning_rate": 1.98269287420358e-05, "loss": 1.1822, "step": 1527 }, { "epoch": 0.09, "grad_norm": 1.8804415464401245, "learning_rate": 1.982658446422532e-05, "loss": 1.0792, "step": 1528 }, { "epoch": 0.09, "grad_norm": 2.2540791034698486, "learning_rate": 1.982623984732686e-05, "loss": 1.0937, "step": 1529 }, { "epoch": 0.09, "grad_norm": 2.3177947998046875, "learning_rate": 1.982589489135231e-05, "loss": 0.9667, "step": 1530 }, { "epoch": 0.09, "grad_norm": 2.124906539916992, "learning_rate": 1.9825549596313576e-05, "loss": 1.1543, "step": 1531 }, { "epoch": 0.09, "grad_norm": 2.0264222621917725, "learning_rate": 1.9825203962222573e-05, "loss": 1.0412, "step": 1532 }, { "epoch": 0.09, "grad_norm": 1.9088367223739624, "learning_rate": 1.9824857989091228e-05, "loss": 1.1721, "step": 1533 }, { "epoch": 0.09, "grad_norm": 1.2159401178359985, "learning_rate": 1.9824511676931472e-05, "loss": 0.5885, "step": 1534 }, { "epoch": 0.09, "grad_norm": 2.108255386352539, "learning_rate": 1.9824165025755267e-05, "loss": 1.0751, "step": 1535 }, { "epoch": 0.09, "grad_norm": 2.2997970581054688, "learning_rate": 1.9823818035574568e-05, "loss": 1.1204, "step": 1536 }, { "epoch": 0.09, "grad_norm": 2.0071542263031006, "learning_rate": 1.982347070640135e-05, "loss": 1.1042, "step": 1537 }, { "epoch": 0.09, "grad_norm": 2.3428642749786377, "learning_rate": 1.98231230382476e-05, "loss": 1.0368, "step": 1538 }, { "epoch": 0.09, "grad_norm": 1.1721205711364746, "learning_rate": 1.982277503112531e-05, "loss": 0.593, "step": 1539 }, { "epoch": 0.09, "grad_norm": 2.2708261013031006, "learning_rate": 1.9822426685046498e-05, "loss": 1.1309, "step": 1540 }, { "epoch": 0.09, "grad_norm": 2.3629820346832275, "learning_rate": 1.9822078000023176e-05, "loss": 1.0052, "step": 1541 }, { "epoch": 0.09, "grad_norm": 2.15863037109375, "learning_rate": 1.9821728976067382e-05, "loss": 1.1463, "step": 1542 }, { "epoch": 0.09, "grad_norm": 2.0399534702301025, "learning_rate": 1.9821379613191154e-05, "loss": 1.0613, "step": 1543 }, { "epoch": 0.09, "grad_norm": 2.0192134380340576, "learning_rate": 1.982102991140655e-05, "loss": 1.1146, "step": 1544 }, { "epoch": 0.09, "grad_norm": 3.148620843887329, "learning_rate": 1.9820679870725642e-05, "loss": 1.103, "step": 1545 }, { "epoch": 0.09, "grad_norm": 2.079495429992676, "learning_rate": 1.9820329491160498e-05, "loss": 1.0989, "step": 1546 }, { "epoch": 0.09, "grad_norm": 1.1147868633270264, "learning_rate": 1.981997877272322e-05, "loss": 0.5885, "step": 1547 }, { "epoch": 0.09, "grad_norm": 2.35673451423645, "learning_rate": 1.9819627715425904e-05, "loss": 1.0839, "step": 1548 }, { "epoch": 0.09, "grad_norm": 2.1471893787384033, "learning_rate": 1.9819276319280666e-05, "loss": 1.174, "step": 1549 }, { "epoch": 0.09, "grad_norm": 2.152531623840332, "learning_rate": 1.9818924584299634e-05, "loss": 1.1352, "step": 1550 }, { "epoch": 0.09, "grad_norm": 1.1739799976348877, "learning_rate": 1.9818572510494936e-05, "loss": 0.5836, "step": 1551 }, { "epoch": 0.09, "grad_norm": 2.113603115081787, "learning_rate": 1.981822009787873e-05, "loss": 1.1016, "step": 1552 }, { "epoch": 0.09, "grad_norm": 2.0553548336029053, "learning_rate": 1.981786734646318e-05, "loss": 1.1185, "step": 1553 }, { "epoch": 0.09, "grad_norm": 2.1447396278381348, "learning_rate": 1.981751425626045e-05, "loss": 1.1113, "step": 1554 }, { "epoch": 0.09, "grad_norm": 2.059035301208496, "learning_rate": 1.9817160827282725e-05, "loss": 1.1281, "step": 1555 }, { "epoch": 0.09, "grad_norm": 1.9455541372299194, "learning_rate": 1.9816807059542204e-05, "loss": 1.1825, "step": 1556 }, { "epoch": 0.09, "grad_norm": 2.2844650745391846, "learning_rate": 1.9816452953051092e-05, "loss": 1.1109, "step": 1557 }, { "epoch": 0.09, "grad_norm": 2.350222110748291, "learning_rate": 1.981609850782161e-05, "loss": 1.1377, "step": 1558 }, { "epoch": 0.09, "grad_norm": 2.0737218856811523, "learning_rate": 1.9815743723865993e-05, "loss": 1.0395, "step": 1559 }, { "epoch": 0.09, "grad_norm": 2.1310346126556396, "learning_rate": 1.9815388601196475e-05, "loss": 1.0461, "step": 1560 }, { "epoch": 0.09, "grad_norm": 2.0816190242767334, "learning_rate": 1.9815033139825315e-05, "loss": 1.0525, "step": 1561 }, { "epoch": 0.09, "grad_norm": 2.175285816192627, "learning_rate": 1.981467733976478e-05, "loss": 1.1439, "step": 1562 }, { "epoch": 0.09, "grad_norm": 2.250605583190918, "learning_rate": 1.9814321201027144e-05, "loss": 1.0734, "step": 1563 }, { "epoch": 0.09, "grad_norm": 2.2713162899017334, "learning_rate": 1.98139647236247e-05, "loss": 1.1042, "step": 1564 }, { "epoch": 0.09, "grad_norm": 2.0425164699554443, "learning_rate": 1.9813607907569747e-05, "loss": 1.1371, "step": 1565 }, { "epoch": 0.09, "grad_norm": 2.1377336978912354, "learning_rate": 1.98132507528746e-05, "loss": 1.0622, "step": 1566 }, { "epoch": 0.09, "grad_norm": 1.2684459686279297, "learning_rate": 1.9812893259551582e-05, "loss": 0.5724, "step": 1567 }, { "epoch": 0.09, "grad_norm": 2.1233832836151123, "learning_rate": 1.981253542761303e-05, "loss": 1.1783, "step": 1568 }, { "epoch": 0.09, "grad_norm": 2.1962993144989014, "learning_rate": 1.9812177257071284e-05, "loss": 1.1578, "step": 1569 }, { "epoch": 0.09, "grad_norm": 2.1200573444366455, "learning_rate": 1.9811818747938717e-05, "loss": 1.1289, "step": 1570 }, { "epoch": 0.09, "grad_norm": 2.270577907562256, "learning_rate": 1.981145990022769e-05, "loss": 1.2382, "step": 1571 }, { "epoch": 0.09, "grad_norm": 2.0432496070861816, "learning_rate": 1.9811100713950587e-05, "loss": 0.9483, "step": 1572 }, { "epoch": 0.09, "grad_norm": 2.045334815979004, "learning_rate": 1.981074118911981e-05, "loss": 1.0953, "step": 1573 }, { "epoch": 0.09, "grad_norm": 2.1907827854156494, "learning_rate": 1.9810381325747757e-05, "loss": 1.1282, "step": 1574 }, { "epoch": 0.09, "grad_norm": 2.030099630355835, "learning_rate": 1.9810021123846845e-05, "loss": 1.0778, "step": 1575 }, { "epoch": 0.09, "grad_norm": 2.286919355392456, "learning_rate": 1.980966058342951e-05, "loss": 1.112, "step": 1576 }, { "epoch": 0.09, "grad_norm": 2.13248348236084, "learning_rate": 1.9809299704508193e-05, "loss": 1.0865, "step": 1577 }, { "epoch": 0.09, "grad_norm": 2.1283349990844727, "learning_rate": 1.9808938487095343e-05, "loss": 1.066, "step": 1578 }, { "epoch": 0.09, "grad_norm": 1.9209989309310913, "learning_rate": 1.9808576931203424e-05, "loss": 1.0962, "step": 1579 }, { "epoch": 0.09, "grad_norm": 2.2582900524139404, "learning_rate": 1.980821503684492e-05, "loss": 1.0814, "step": 1580 }, { "epoch": 0.09, "grad_norm": 1.1568522453308105, "learning_rate": 1.9807852804032306e-05, "loss": 0.5813, "step": 1581 }, { "epoch": 0.09, "grad_norm": 1.098873496055603, "learning_rate": 1.980749023277809e-05, "loss": 0.5676, "step": 1582 }, { "epoch": 0.09, "grad_norm": 2.1907334327697754, "learning_rate": 1.9807127323094784e-05, "loss": 1.0623, "step": 1583 }, { "epoch": 0.09, "grad_norm": 2.0981929302215576, "learning_rate": 1.980676407499491e-05, "loss": 1.1046, "step": 1584 }, { "epoch": 0.09, "grad_norm": 2.335172414779663, "learning_rate": 1.9806400488491003e-05, "loss": 1.1292, "step": 1585 }, { "epoch": 0.09, "grad_norm": 1.1574161052703857, "learning_rate": 1.9806036563595606e-05, "loss": 0.5256, "step": 1586 }, { "epoch": 0.09, "grad_norm": 2.1440577507019043, "learning_rate": 1.980567230032128e-05, "loss": 1.1451, "step": 1587 }, { "epoch": 0.09, "grad_norm": 2.04799485206604, "learning_rate": 1.9805307698680592e-05, "loss": 1.0126, "step": 1588 }, { "epoch": 0.09, "grad_norm": 2.0676918029785156, "learning_rate": 1.980494275868613e-05, "loss": 1.1077, "step": 1589 }, { "epoch": 0.09, "grad_norm": 2.2290658950805664, "learning_rate": 1.9804577480350477e-05, "loss": 1.13, "step": 1590 }, { "epoch": 0.09, "grad_norm": 2.1077401638031006, "learning_rate": 1.9804211863686244e-05, "loss": 1.0092, "step": 1591 }, { "epoch": 0.09, "grad_norm": 2.29612398147583, "learning_rate": 1.980384590870605e-05, "loss": 1.1052, "step": 1592 }, { "epoch": 0.09, "grad_norm": 2.0813913345336914, "learning_rate": 1.9803479615422515e-05, "loss": 1.1089, "step": 1593 }, { "epoch": 0.09, "grad_norm": 2.0125625133514404, "learning_rate": 1.9803112983848287e-05, "loss": 1.0348, "step": 1594 }, { "epoch": 0.09, "grad_norm": 1.9546971321105957, "learning_rate": 1.9802746013996012e-05, "loss": 1.114, "step": 1595 }, { "epoch": 0.09, "grad_norm": 2.0356884002685547, "learning_rate": 1.9802378705878354e-05, "loss": 1.078, "step": 1596 }, { "epoch": 0.09, "grad_norm": 2.4589121341705322, "learning_rate": 1.9802011059507993e-05, "loss": 1.1889, "step": 1597 }, { "epoch": 0.09, "grad_norm": 2.1145145893096924, "learning_rate": 1.980164307489761e-05, "loss": 1.0445, "step": 1598 }, { "epoch": 0.09, "grad_norm": 1.9814850091934204, "learning_rate": 1.98012747520599e-05, "loss": 1.0659, "step": 1599 }, { "epoch": 0.09, "grad_norm": 1.982138752937317, "learning_rate": 1.980090609100758e-05, "loss": 1.0136, "step": 1600 }, { "epoch": 0.09, "grad_norm": 2.0517921447753906, "learning_rate": 1.980053709175337e-05, "loss": 1.064, "step": 1601 }, { "epoch": 0.09, "grad_norm": 2.168419599533081, "learning_rate": 1.980016775431e-05, "loss": 1.0297, "step": 1602 }, { "epoch": 0.09, "grad_norm": 2.1289284229278564, "learning_rate": 1.9799798078690216e-05, "loss": 1.0203, "step": 1603 }, { "epoch": 0.09, "grad_norm": 2.037092447280884, "learning_rate": 1.9799428064906775e-05, "loss": 1.134, "step": 1604 }, { "epoch": 0.09, "grad_norm": 2.0955047607421875, "learning_rate": 1.9799057712972444e-05, "loss": 1.1261, "step": 1605 }, { "epoch": 0.09, "grad_norm": 2.04819917678833, "learning_rate": 1.979868702290001e-05, "loss": 0.983, "step": 1606 }, { "epoch": 0.09, "grad_norm": 2.179774045944214, "learning_rate": 1.979831599470225e-05, "loss": 1.0752, "step": 1607 }, { "epoch": 0.09, "grad_norm": 2.177022933959961, "learning_rate": 1.9797944628391978e-05, "loss": 1.0054, "step": 1608 }, { "epoch": 0.09, "grad_norm": 1.8800945281982422, "learning_rate": 1.979757292398201e-05, "loss": 1.1204, "step": 1609 }, { "epoch": 0.09, "grad_norm": 2.152066707611084, "learning_rate": 1.9797200881485166e-05, "loss": 1.0746, "step": 1610 }, { "epoch": 0.09, "grad_norm": 2.0069997310638428, "learning_rate": 1.9796828500914285e-05, "loss": 1.1132, "step": 1611 }, { "epoch": 0.09, "grad_norm": 1.3057225942611694, "learning_rate": 1.979645578228222e-05, "loss": 0.517, "step": 1612 }, { "epoch": 0.09, "grad_norm": 1.9819554090499878, "learning_rate": 1.9796082725601836e-05, "loss": 1.1542, "step": 1613 }, { "epoch": 0.09, "grad_norm": 2.0664961338043213, "learning_rate": 1.9795709330885996e-05, "loss": 1.0918, "step": 1614 }, { "epoch": 0.09, "grad_norm": 1.9755898714065552, "learning_rate": 1.9795335598147592e-05, "loss": 1.1102, "step": 1615 }, { "epoch": 0.09, "grad_norm": 1.813048005104065, "learning_rate": 1.9794961527399518e-05, "loss": 1.0469, "step": 1616 }, { "epoch": 0.09, "grad_norm": 2.861116886138916, "learning_rate": 1.9794587118654686e-05, "loss": 1.0873, "step": 1617 }, { "epoch": 0.09, "grad_norm": 1.9587714672088623, "learning_rate": 1.9794212371926008e-05, "loss": 1.0606, "step": 1618 }, { "epoch": 0.09, "grad_norm": 1.183161735534668, "learning_rate": 1.9793837287226424e-05, "loss": 0.6656, "step": 1619 }, { "epoch": 0.09, "grad_norm": 2.1780409812927246, "learning_rate": 1.979346186456887e-05, "loss": 1.1627, "step": 1620 }, { "epoch": 0.09, "grad_norm": 2.119764804840088, "learning_rate": 1.9793086103966305e-05, "loss": 1.1192, "step": 1621 }, { "epoch": 0.09, "grad_norm": 2.0270514488220215, "learning_rate": 1.9792710005431695e-05, "loss": 1.0353, "step": 1622 }, { "epoch": 0.09, "grad_norm": 2.0057146549224854, "learning_rate": 1.9792333568978018e-05, "loss": 1.1613, "step": 1623 }, { "epoch": 0.09, "grad_norm": 2.018458127975464, "learning_rate": 1.9791956794618263e-05, "loss": 1.0525, "step": 1624 }, { "epoch": 0.09, "grad_norm": 2.075502872467041, "learning_rate": 1.979157968236543e-05, "loss": 1.065, "step": 1625 }, { "epoch": 0.09, "grad_norm": 1.9254873991012573, "learning_rate": 1.9791202232232534e-05, "loss": 1.0411, "step": 1626 }, { "epoch": 0.09, "grad_norm": 2.2357261180877686, "learning_rate": 1.97908244442326e-05, "loss": 1.0471, "step": 1627 }, { "epoch": 0.09, "grad_norm": 1.9962939023971558, "learning_rate": 1.9790446318378667e-05, "loss": 1.1121, "step": 1628 }, { "epoch": 0.09, "grad_norm": 1.823634147644043, "learning_rate": 1.9790067854683778e-05, "loss": 1.1161, "step": 1629 }, { "epoch": 0.09, "grad_norm": 1.9571431875228882, "learning_rate": 1.9789689053160995e-05, "loss": 1.0946, "step": 1630 }, { "epoch": 0.09, "grad_norm": 1.890627145767212, "learning_rate": 1.9789309913823387e-05, "loss": 1.0639, "step": 1631 }, { "epoch": 0.09, "grad_norm": 2.0019779205322266, "learning_rate": 1.9788930436684043e-05, "loss": 1.1484, "step": 1632 }, { "epoch": 0.09, "grad_norm": 1.835968255996704, "learning_rate": 1.978855062175605e-05, "loss": 1.0711, "step": 1633 }, { "epoch": 0.09, "grad_norm": 2.005263566970825, "learning_rate": 1.978817046905252e-05, "loss": 1.1072, "step": 1634 }, { "epoch": 0.09, "grad_norm": 1.9493229389190674, "learning_rate": 1.978778997858657e-05, "loss": 1.1526, "step": 1635 }, { "epoch": 0.09, "grad_norm": 2.0082051753997803, "learning_rate": 1.9787409150371327e-05, "loss": 1.0992, "step": 1636 }, { "epoch": 0.09, "grad_norm": 1.9586727619171143, "learning_rate": 1.978702798441994e-05, "loss": 1.1867, "step": 1637 }, { "epoch": 0.09, "grad_norm": 1.8139171600341797, "learning_rate": 1.9786646480745547e-05, "loss": 1.0407, "step": 1638 }, { "epoch": 0.09, "grad_norm": 1.8802461624145508, "learning_rate": 1.978626463936133e-05, "loss": 1.0505, "step": 1639 }, { "epoch": 0.09, "grad_norm": 2.0512077808380127, "learning_rate": 1.9785882460280452e-05, "loss": 1.1202, "step": 1640 }, { "epoch": 0.09, "grad_norm": 1.8554996252059937, "learning_rate": 1.9785499943516108e-05, "loss": 1.1558, "step": 1641 }, { "epoch": 0.09, "grad_norm": 2.029660701751709, "learning_rate": 1.9785117089081497e-05, "loss": 1.1417, "step": 1642 }, { "epoch": 0.09, "grad_norm": 2.0053927898406982, "learning_rate": 1.9784733896989826e-05, "loss": 1.0932, "step": 1643 }, { "epoch": 0.09, "grad_norm": 2.0511276721954346, "learning_rate": 1.9784350367254322e-05, "loss": 1.1392, "step": 1644 }, { "epoch": 0.09, "grad_norm": 1.8730463981628418, "learning_rate": 1.978396649988822e-05, "loss": 1.0124, "step": 1645 }, { "epoch": 0.09, "grad_norm": 1.9827896356582642, "learning_rate": 1.9783582294904762e-05, "loss": 1.0992, "step": 1646 }, { "epoch": 0.09, "grad_norm": 2.067535400390625, "learning_rate": 1.9783197752317207e-05, "loss": 1.0703, "step": 1647 }, { "epoch": 0.09, "grad_norm": 2.218522071838379, "learning_rate": 1.978281287213883e-05, "loss": 1.0789, "step": 1648 }, { "epoch": 0.09, "grad_norm": 2.1139981746673584, "learning_rate": 1.9782427654382906e-05, "loss": 1.0125, "step": 1649 }, { "epoch": 0.09, "grad_norm": 2.27189040184021, "learning_rate": 1.978204209906273e-05, "loss": 1.1011, "step": 1650 }, { "epoch": 0.09, "grad_norm": 2.0736489295959473, "learning_rate": 1.9781656206191604e-05, "loss": 1.0902, "step": 1651 }, { "epoch": 0.09, "grad_norm": 2.0755748748779297, "learning_rate": 1.9781269975782848e-05, "loss": 1.0653, "step": 1652 }, { "epoch": 0.09, "grad_norm": 1.988792061805725, "learning_rate": 1.978088340784979e-05, "loss": 1.0032, "step": 1653 }, { "epoch": 0.09, "grad_norm": 2.036255121231079, "learning_rate": 1.9780496502405765e-05, "loss": 1.1107, "step": 1654 }, { "epoch": 0.09, "grad_norm": 2.14223313331604, "learning_rate": 1.9780109259464127e-05, "loss": 1.073, "step": 1655 }, { "epoch": 0.09, "grad_norm": 2.1095829010009766, "learning_rate": 1.9779721679038237e-05, "loss": 1.0566, "step": 1656 }, { "epoch": 0.1, "grad_norm": 2.1173906326293945, "learning_rate": 1.977933376114147e-05, "loss": 0.9956, "step": 1657 }, { "epoch": 0.1, "grad_norm": 2.165433168411255, "learning_rate": 1.9778945505787216e-05, "loss": 1.1912, "step": 1658 }, { "epoch": 0.1, "grad_norm": 2.1865346431732178, "learning_rate": 1.9778556912988865e-05, "loss": 1.1065, "step": 1659 }, { "epoch": 0.1, "grad_norm": 1.797239065170288, "learning_rate": 1.9778167982759836e-05, "loss": 1.0452, "step": 1660 }, { "epoch": 0.1, "grad_norm": 2.170520782470703, "learning_rate": 1.977777871511354e-05, "loss": 1.1055, "step": 1661 }, { "epoch": 0.1, "grad_norm": 2.5014305114746094, "learning_rate": 1.977738911006341e-05, "loss": 1.2075, "step": 1662 }, { "epoch": 0.1, "grad_norm": 1.1980966329574585, "learning_rate": 1.9776999167622902e-05, "loss": 0.6351, "step": 1663 }, { "epoch": 0.1, "grad_norm": 2.0420920848846436, "learning_rate": 1.9776608887805456e-05, "loss": 1.0724, "step": 1664 }, { "epoch": 0.1, "grad_norm": 1.9526374340057373, "learning_rate": 1.977621827062455e-05, "loss": 1.0672, "step": 1665 }, { "epoch": 0.1, "grad_norm": 2.0691959857940674, "learning_rate": 1.9775827316093663e-05, "loss": 1.032, "step": 1666 }, { "epoch": 0.1, "grad_norm": 2.259793519973755, "learning_rate": 1.9775436024226283e-05, "loss": 1.2199, "step": 1667 }, { "epoch": 0.1, "grad_norm": 1.9516514539718628, "learning_rate": 1.977504439503591e-05, "loss": 1.0821, "step": 1668 }, { "epoch": 0.1, "grad_norm": 1.0254062414169312, "learning_rate": 1.977465242853606e-05, "loss": 0.5414, "step": 1669 }, { "epoch": 0.1, "grad_norm": 2.119377851486206, "learning_rate": 1.9774260124740257e-05, "loss": 1.1291, "step": 1670 }, { "epoch": 0.1, "grad_norm": 2.2170608043670654, "learning_rate": 1.9773867483662044e-05, "loss": 1.132, "step": 1671 }, { "epoch": 0.1, "grad_norm": 2.119483470916748, "learning_rate": 1.9773474505314966e-05, "loss": 1.073, "step": 1672 }, { "epoch": 0.1, "grad_norm": 1.8645308017730713, "learning_rate": 1.977308118971258e-05, "loss": 1.1062, "step": 1673 }, { "epoch": 0.1, "grad_norm": 2.0861430168151855, "learning_rate": 1.9772687536868468e-05, "loss": 1.143, "step": 1674 }, { "epoch": 0.1, "grad_norm": 2.1440775394439697, "learning_rate": 1.9772293546796205e-05, "loss": 1.106, "step": 1675 }, { "epoch": 0.1, "grad_norm": 2.320091724395752, "learning_rate": 1.9771899219509388e-05, "loss": 1.115, "step": 1676 }, { "epoch": 0.1, "grad_norm": 1.8981430530548096, "learning_rate": 1.977150455502163e-05, "loss": 1.085, "step": 1677 }, { "epoch": 0.1, "grad_norm": 2.4733781814575195, "learning_rate": 1.9771109553346542e-05, "loss": 1.1499, "step": 1678 }, { "epoch": 0.1, "grad_norm": 1.916886806488037, "learning_rate": 1.977071421449776e-05, "loss": 1.1342, "step": 1679 }, { "epoch": 0.1, "grad_norm": 1.9651583433151245, "learning_rate": 1.9770318538488923e-05, "loss": 1.1639, "step": 1680 }, { "epoch": 0.1, "grad_norm": 2.0920627117156982, "learning_rate": 1.9769922525333688e-05, "loss": 1.129, "step": 1681 }, { "epoch": 0.1, "grad_norm": 1.9595839977264404, "learning_rate": 1.9769526175045713e-05, "loss": 1.0276, "step": 1682 }, { "epoch": 0.1, "grad_norm": 1.2484540939331055, "learning_rate": 1.976912948763868e-05, "loss": 0.5912, "step": 1683 }, { "epoch": 0.1, "grad_norm": 2.2011122703552246, "learning_rate": 1.9768732463126282e-05, "loss": 1.0826, "step": 1684 }, { "epoch": 0.1, "grad_norm": 1.9850212335586548, "learning_rate": 1.9768335101522212e-05, "loss": 1.0202, "step": 1685 }, { "epoch": 0.1, "grad_norm": 2.0429179668426514, "learning_rate": 1.976793740284018e-05, "loss": 1.1044, "step": 1686 }, { "epoch": 0.1, "grad_norm": 1.9772089719772339, "learning_rate": 1.976753936709392e-05, "loss": 1.029, "step": 1687 }, { "epoch": 0.1, "grad_norm": 1.9813590049743652, "learning_rate": 1.976714099429716e-05, "loss": 1.1413, "step": 1688 }, { "epoch": 0.1, "grad_norm": 1.9641249179840088, "learning_rate": 1.9766742284463645e-05, "loss": 1.1728, "step": 1689 }, { "epoch": 0.1, "grad_norm": 1.9910151958465576, "learning_rate": 1.9766343237607136e-05, "loss": 1.0805, "step": 1690 }, { "epoch": 0.1, "grad_norm": 2.102437973022461, "learning_rate": 1.976594385374141e-05, "loss": 0.9996, "step": 1691 }, { "epoch": 0.1, "grad_norm": 1.8996132612228394, "learning_rate": 1.976554413288023e-05, "loss": 1.1539, "step": 1692 }, { "epoch": 0.1, "grad_norm": 2.278383255004883, "learning_rate": 1.976514407503741e-05, "loss": 1.0643, "step": 1693 }, { "epoch": 0.1, "grad_norm": 2.0727622509002686, "learning_rate": 1.9764743680226744e-05, "loss": 1.0664, "step": 1694 }, { "epoch": 0.1, "grad_norm": 2.003706932067871, "learning_rate": 1.9764342948462047e-05, "loss": 1.078, "step": 1695 }, { "epoch": 0.1, "grad_norm": 1.0936602354049683, "learning_rate": 1.9763941879757155e-05, "loss": 0.5485, "step": 1696 }, { "epoch": 0.1, "grad_norm": 2.1821393966674805, "learning_rate": 1.97635404741259e-05, "loss": 1.077, "step": 1697 }, { "epoch": 0.1, "grad_norm": 1.9630897045135498, "learning_rate": 1.9763138731582138e-05, "loss": 1.1377, "step": 1698 }, { "epoch": 0.1, "grad_norm": 1.9337328672409058, "learning_rate": 1.9762736652139727e-05, "loss": 1.0559, "step": 1699 }, { "epoch": 0.1, "grad_norm": 1.851905107498169, "learning_rate": 1.976233423581255e-05, "loss": 1.0427, "step": 1700 }, { "epoch": 0.1, "grad_norm": 1.8319166898727417, "learning_rate": 1.976193148261449e-05, "loss": 1.0862, "step": 1701 }, { "epoch": 0.1, "grad_norm": 2.0709736347198486, "learning_rate": 1.976152839255944e-05, "loss": 0.9576, "step": 1702 }, { "epoch": 0.1, "grad_norm": 2.1122336387634277, "learning_rate": 1.9761124965661313e-05, "loss": 1.1438, "step": 1703 }, { "epoch": 0.1, "grad_norm": 2.115537405014038, "learning_rate": 1.9760721201934026e-05, "loss": 1.0307, "step": 1704 }, { "epoch": 0.1, "grad_norm": 2.004452705383301, "learning_rate": 1.9760317101391525e-05, "loss": 1.0156, "step": 1705 }, { "epoch": 0.1, "grad_norm": 1.9772614240646362, "learning_rate": 1.975991266404774e-05, "loss": 1.0899, "step": 1706 }, { "epoch": 0.1, "grad_norm": 1.0291634798049927, "learning_rate": 1.975950788991663e-05, "loss": 0.5634, "step": 1707 }, { "epoch": 0.1, "grad_norm": 2.1664555072784424, "learning_rate": 1.9759102779012167e-05, "loss": 1.1282, "step": 1708 }, { "epoch": 0.1, "grad_norm": 2.046359062194824, "learning_rate": 1.9758697331348328e-05, "loss": 1.0731, "step": 1709 }, { "epoch": 0.1, "grad_norm": 2.0227787494659424, "learning_rate": 1.9758291546939107e-05, "loss": 1.0674, "step": 1710 }, { "epoch": 0.1, "grad_norm": 2.312598705291748, "learning_rate": 1.9757885425798497e-05, "loss": 1.1017, "step": 1711 }, { "epoch": 0.1, "grad_norm": 2.135272741317749, "learning_rate": 1.9757478967940526e-05, "loss": 1.1085, "step": 1712 }, { "epoch": 0.1, "grad_norm": 2.454162120819092, "learning_rate": 1.9757072173379206e-05, "loss": 1.1246, "step": 1713 }, { "epoch": 0.1, "grad_norm": 2.0647523403167725, "learning_rate": 1.975666504212858e-05, "loss": 1.0551, "step": 1714 }, { "epoch": 0.1, "grad_norm": 2.2257614135742188, "learning_rate": 1.9756257574202705e-05, "loss": 1.0485, "step": 1715 }, { "epoch": 0.1, "grad_norm": 2.2690961360931396, "learning_rate": 1.975584976961563e-05, "loss": 1.0749, "step": 1716 }, { "epoch": 0.1, "grad_norm": 1.9459435939788818, "learning_rate": 1.975544162838143e-05, "loss": 1.1076, "step": 1717 }, { "epoch": 0.1, "grad_norm": 0.9962010383605957, "learning_rate": 1.975503315051419e-05, "loss": 0.5681, "step": 1718 }, { "epoch": 0.1, "grad_norm": 2.205332040786743, "learning_rate": 1.9754624336028007e-05, "loss": 1.1968, "step": 1719 }, { "epoch": 0.1, "grad_norm": 2.0800867080688477, "learning_rate": 1.975421518493699e-05, "loss": 1.0004, "step": 1720 }, { "epoch": 0.1, "grad_norm": 2.0179686546325684, "learning_rate": 1.9753805697255246e-05, "loss": 1.0951, "step": 1721 }, { "epoch": 0.1, "grad_norm": 2.1346912384033203, "learning_rate": 1.975339587299692e-05, "loss": 1.1005, "step": 1722 }, { "epoch": 0.1, "grad_norm": 2.0877277851104736, "learning_rate": 1.975298571217614e-05, "loss": 1.0645, "step": 1723 }, { "epoch": 0.1, "grad_norm": 2.123556137084961, "learning_rate": 1.9752575214807077e-05, "loss": 1.0744, "step": 1724 }, { "epoch": 0.1, "grad_norm": 2.0165324211120605, "learning_rate": 1.975216438090388e-05, "loss": 1.0771, "step": 1725 }, { "epoch": 0.1, "grad_norm": 2.134835958480835, "learning_rate": 1.9751753210480733e-05, "loss": 1.0927, "step": 1726 }, { "epoch": 0.1, "grad_norm": 2.2924907207489014, "learning_rate": 1.9751341703551824e-05, "loss": 1.2181, "step": 1727 }, { "epoch": 0.1, "grad_norm": 2.1274254322052, "learning_rate": 1.9750929860131353e-05, "loss": 1.1463, "step": 1728 }, { "epoch": 0.1, "grad_norm": 2.1014857292175293, "learning_rate": 1.975051768023353e-05, "loss": 1.0484, "step": 1729 }, { "epoch": 0.1, "grad_norm": 2.0884482860565186, "learning_rate": 1.9750105163872577e-05, "loss": 1.1372, "step": 1730 }, { "epoch": 0.1, "grad_norm": 2.2393875122070312, "learning_rate": 1.9749692311062733e-05, "loss": 1.1367, "step": 1731 }, { "epoch": 0.1, "grad_norm": 1.917853832244873, "learning_rate": 1.9749279121818235e-05, "loss": 1.1067, "step": 1732 }, { "epoch": 0.1, "grad_norm": 2.194563388824463, "learning_rate": 1.9748865596153356e-05, "loss": 1.1273, "step": 1733 }, { "epoch": 0.1, "grad_norm": 2.126788854598999, "learning_rate": 1.9748451734082356e-05, "loss": 1.091, "step": 1734 }, { "epoch": 0.1, "grad_norm": 2.010206699371338, "learning_rate": 1.9748037535619518e-05, "loss": 1.1165, "step": 1735 }, { "epoch": 0.1, "grad_norm": 2.023131847381592, "learning_rate": 1.974762300077913e-05, "loss": 1.1873, "step": 1736 }, { "epoch": 0.1, "grad_norm": 1.9872779846191406, "learning_rate": 1.9747208129575507e-05, "loss": 1.0712, "step": 1737 }, { "epoch": 0.1, "grad_norm": 1.1089580059051514, "learning_rate": 1.9746792922022956e-05, "loss": 0.6061, "step": 1738 }, { "epoch": 0.1, "grad_norm": 1.8352059125900269, "learning_rate": 1.974637737813581e-05, "loss": 1.0785, "step": 1739 }, { "epoch": 0.1, "grad_norm": 1.9942800998687744, "learning_rate": 1.9745961497928406e-05, "loss": 1.1035, "step": 1740 }, { "epoch": 0.1, "grad_norm": 2.0443503856658936, "learning_rate": 1.974554528141509e-05, "loss": 1.1267, "step": 1741 }, { "epoch": 0.1, "grad_norm": 2.3537492752075195, "learning_rate": 1.9745128728610235e-05, "loss": 1.094, "step": 1742 }, { "epoch": 0.1, "grad_norm": 1.9897205829620361, "learning_rate": 1.974471183952821e-05, "loss": 1.0581, "step": 1743 }, { "epoch": 0.1, "grad_norm": 2.0357699394226074, "learning_rate": 1.9744294614183397e-05, "loss": 1.1107, "step": 1744 }, { "epoch": 0.1, "grad_norm": 2.2005937099456787, "learning_rate": 1.97438770525902e-05, "loss": 1.0821, "step": 1745 }, { "epoch": 0.1, "grad_norm": 2.001901865005493, "learning_rate": 1.974345915476302e-05, "loss": 1.0564, "step": 1746 }, { "epoch": 0.1, "grad_norm": 2.468035936355591, "learning_rate": 1.9743040920716282e-05, "loss": 1.1817, "step": 1747 }, { "epoch": 0.1, "grad_norm": 1.9884804487228394, "learning_rate": 1.974262235046442e-05, "loss": 1.1304, "step": 1748 }, { "epoch": 0.1, "grad_norm": 2.144960641860962, "learning_rate": 1.9742203444021878e-05, "loss": 1.0166, "step": 1749 }, { "epoch": 0.1, "grad_norm": 2.011486291885376, "learning_rate": 1.9741784201403104e-05, "loss": 1.0824, "step": 1750 }, { "epoch": 0.1, "grad_norm": 2.0152251720428467, "learning_rate": 1.974136462262257e-05, "loss": 1.0871, "step": 1751 }, { "epoch": 0.1, "grad_norm": 1.9161790609359741, "learning_rate": 1.9740944707694757e-05, "loss": 1.0788, "step": 1752 }, { "epoch": 0.1, "grad_norm": 2.0590739250183105, "learning_rate": 1.974052445663415e-05, "loss": 1.0372, "step": 1753 }, { "epoch": 0.1, "grad_norm": 2.0413475036621094, "learning_rate": 1.9740103869455257e-05, "loss": 1.0788, "step": 1754 }, { "epoch": 0.1, "grad_norm": 2.130965232849121, "learning_rate": 1.9739682946172584e-05, "loss": 1.1716, "step": 1755 }, { "epoch": 0.1, "grad_norm": 2.155247449874878, "learning_rate": 1.9739261686800662e-05, "loss": 1.1196, "step": 1756 }, { "epoch": 0.1, "grad_norm": 1.9767510890960693, "learning_rate": 1.973884009135402e-05, "loss": 1.1925, "step": 1757 }, { "epoch": 0.1, "grad_norm": 1.2008259296417236, "learning_rate": 1.9738418159847216e-05, "loss": 0.597, "step": 1758 }, { "epoch": 0.1, "grad_norm": 2.095219612121582, "learning_rate": 1.9737995892294803e-05, "loss": 1.0242, "step": 1759 }, { "epoch": 0.1, "grad_norm": 2.1930441856384277, "learning_rate": 1.9737573288711348e-05, "loss": 1.1751, "step": 1760 }, { "epoch": 0.1, "grad_norm": 1.9918029308319092, "learning_rate": 1.9737150349111447e-05, "loss": 1.1299, "step": 1761 }, { "epoch": 0.1, "grad_norm": 2.037109851837158, "learning_rate": 1.9736727073509684e-05, "loss": 1.0889, "step": 1762 }, { "epoch": 0.1, "grad_norm": 2.012993097305298, "learning_rate": 1.9736303461920667e-05, "loss": 1.0657, "step": 1763 }, { "epoch": 0.1, "grad_norm": 2.165102958679199, "learning_rate": 1.9735879514359017e-05, "loss": 1.0382, "step": 1764 }, { "epoch": 0.1, "grad_norm": 2.009129762649536, "learning_rate": 1.9735455230839363e-05, "loss": 1.0391, "step": 1765 }, { "epoch": 0.1, "grad_norm": 1.9560577869415283, "learning_rate": 1.973503061137634e-05, "loss": 1.0763, "step": 1766 }, { "epoch": 0.1, "grad_norm": 1.7726469039916992, "learning_rate": 1.9734605655984604e-05, "loss": 0.9933, "step": 1767 }, { "epoch": 0.1, "grad_norm": 2.263556718826294, "learning_rate": 1.9734180364678824e-05, "loss": 1.144, "step": 1768 }, { "epoch": 0.1, "grad_norm": 2.1548538208007812, "learning_rate": 1.973375473747367e-05, "loss": 1.1339, "step": 1769 }, { "epoch": 0.1, "grad_norm": 1.8769031763076782, "learning_rate": 1.9733328774383825e-05, "loss": 1.174, "step": 1770 }, { "epoch": 0.1, "grad_norm": 2.128791570663452, "learning_rate": 1.9732902475423995e-05, "loss": 0.9804, "step": 1771 }, { "epoch": 0.1, "grad_norm": 2.067493438720703, "learning_rate": 1.973247584060889e-05, "loss": 1.1482, "step": 1772 }, { "epoch": 0.1, "grad_norm": 1.8724035024642944, "learning_rate": 1.973204886995323e-05, "loss": 1.0242, "step": 1773 }, { "epoch": 0.1, "grad_norm": 1.9555832147598267, "learning_rate": 1.9731621563471748e-05, "loss": 1.0877, "step": 1774 }, { "epoch": 0.1, "grad_norm": 1.945119857788086, "learning_rate": 1.9731193921179192e-05, "loss": 1.181, "step": 1775 }, { "epoch": 0.1, "grad_norm": 1.8445144891738892, "learning_rate": 1.9730765943090314e-05, "loss": 1.081, "step": 1776 }, { "epoch": 0.1, "grad_norm": 2.080310106277466, "learning_rate": 1.9730337629219886e-05, "loss": 1.1375, "step": 1777 }, { "epoch": 0.1, "grad_norm": 2.0283448696136475, "learning_rate": 1.972990897958269e-05, "loss": 1.0731, "step": 1778 }, { "epoch": 0.1, "grad_norm": 1.3141883611679077, "learning_rate": 1.972947999419351e-05, "loss": 0.6173, "step": 1779 }, { "epoch": 0.1, "grad_norm": 2.1313295364379883, "learning_rate": 1.9729050673067156e-05, "loss": 1.0467, "step": 1780 }, { "epoch": 0.1, "grad_norm": 2.1361844539642334, "learning_rate": 1.972862101621844e-05, "loss": 1.1688, "step": 1781 }, { "epoch": 0.1, "grad_norm": 2.201749086380005, "learning_rate": 1.9728191023662188e-05, "loss": 1.1626, "step": 1782 }, { "epoch": 0.1, "grad_norm": 1.8586986064910889, "learning_rate": 1.972776069541324e-05, "loss": 1.1071, "step": 1783 }, { "epoch": 0.1, "grad_norm": 1.819804072380066, "learning_rate": 1.9727330031486443e-05, "loss": 1.1585, "step": 1784 }, { "epoch": 0.1, "grad_norm": 2.1963603496551514, "learning_rate": 1.972689903189666e-05, "loss": 1.0215, "step": 1785 }, { "epoch": 0.1, "grad_norm": 2.010779857635498, "learning_rate": 1.972646769665876e-05, "loss": 1.0796, "step": 1786 }, { "epoch": 0.1, "grad_norm": 1.9218958616256714, "learning_rate": 1.972603602578763e-05, "loss": 1.0612, "step": 1787 }, { "epoch": 0.1, "grad_norm": 1.994632601737976, "learning_rate": 1.9725604019298162e-05, "loss": 1.0832, "step": 1788 }, { "epoch": 0.1, "grad_norm": 2.118058919906616, "learning_rate": 1.9725171677205273e-05, "loss": 1.0512, "step": 1789 }, { "epoch": 0.1, "grad_norm": 1.8827786445617676, "learning_rate": 1.9724738999523874e-05, "loss": 1.0859, "step": 1790 }, { "epoch": 0.1, "grad_norm": 2.1775686740875244, "learning_rate": 1.9724305986268898e-05, "loss": 1.1353, "step": 1791 }, { "epoch": 0.1, "grad_norm": 2.099546432495117, "learning_rate": 1.972387263745528e-05, "loss": 1.0989, "step": 1792 }, { "epoch": 0.1, "grad_norm": 1.9460331201553345, "learning_rate": 1.9723438953097985e-05, "loss": 1.0469, "step": 1793 }, { "epoch": 0.1, "grad_norm": 1.8811291456222534, "learning_rate": 1.9723004933211975e-05, "loss": 1.0835, "step": 1794 }, { "epoch": 0.1, "grad_norm": 1.9230514764785767, "learning_rate": 1.972257057781222e-05, "loss": 1.1082, "step": 1795 }, { "epoch": 0.1, "grad_norm": 1.9974192380905151, "learning_rate": 1.9722135886913716e-05, "loss": 1.1541, "step": 1796 }, { "epoch": 0.1, "grad_norm": 1.9699289798736572, "learning_rate": 1.972170086053146e-05, "loss": 1.0339, "step": 1797 }, { "epoch": 0.1, "grad_norm": 1.9669231176376343, "learning_rate": 1.972126549868046e-05, "loss": 1.1009, "step": 1798 }, { "epoch": 0.1, "grad_norm": 2.0928196907043457, "learning_rate": 1.972082980137575e-05, "loss": 1.0161, "step": 1799 }, { "epoch": 0.1, "grad_norm": 2.152636766433716, "learning_rate": 1.972039376863235e-05, "loss": 1.0703, "step": 1800 }, { "epoch": 0.1, "grad_norm": 2.007467746734619, "learning_rate": 1.971995740046532e-05, "loss": 1.063, "step": 1801 }, { "epoch": 0.1, "grad_norm": 2.372265338897705, "learning_rate": 1.971952069688971e-05, "loss": 1.1111, "step": 1802 }, { "epoch": 0.1, "grad_norm": 1.9397143125534058, "learning_rate": 1.971908365792059e-05, "loss": 1.081, "step": 1803 }, { "epoch": 0.1, "grad_norm": 2.1565940380096436, "learning_rate": 1.971864628357304e-05, "loss": 1.1147, "step": 1804 }, { "epoch": 0.1, "grad_norm": 2.154966115951538, "learning_rate": 1.971820857386216e-05, "loss": 1.1186, "step": 1805 }, { "epoch": 0.1, "grad_norm": 2.2199151515960693, "learning_rate": 1.9717770528803046e-05, "loss": 1.0974, "step": 1806 }, { "epoch": 0.1, "grad_norm": 2.0128839015960693, "learning_rate": 1.9717332148410817e-05, "loss": 1.0827, "step": 1807 }, { "epoch": 0.1, "grad_norm": 2.1589548587799072, "learning_rate": 1.97168934327006e-05, "loss": 1.0958, "step": 1808 }, { "epoch": 0.1, "grad_norm": 1.9092520475387573, "learning_rate": 1.9716454381687535e-05, "loss": 1.0037, "step": 1809 }, { "epoch": 0.1, "grad_norm": 2.080681324005127, "learning_rate": 1.9716014995386767e-05, "loss": 1.0566, "step": 1810 }, { "epoch": 0.1, "grad_norm": 1.865631341934204, "learning_rate": 1.9715575273813466e-05, "loss": 0.9919, "step": 1811 }, { "epoch": 0.1, "grad_norm": 2.0890274047851562, "learning_rate": 1.97151352169828e-05, "loss": 1.1962, "step": 1812 }, { "epoch": 0.1, "grad_norm": 1.9518492221832275, "learning_rate": 1.9714694824909954e-05, "loss": 1.0589, "step": 1813 }, { "epoch": 0.1, "grad_norm": 1.9185514450073242, "learning_rate": 1.9714254097610128e-05, "loss": 1.076, "step": 1814 }, { "epoch": 0.1, "grad_norm": 1.9747978448867798, "learning_rate": 1.971381303509853e-05, "loss": 1.0736, "step": 1815 }, { "epoch": 0.1, "grad_norm": 2.0762267112731934, "learning_rate": 1.9713371637390376e-05, "loss": 1.1286, "step": 1816 }, { "epoch": 0.1, "grad_norm": 2.014113664627075, "learning_rate": 1.9712929904500905e-05, "loss": 1.0808, "step": 1817 }, { "epoch": 0.1, "grad_norm": 1.9392385482788086, "learning_rate": 1.971248783644535e-05, "loss": 1.0036, "step": 1818 }, { "epoch": 0.1, "grad_norm": 2.078747272491455, "learning_rate": 1.9712045433238972e-05, "loss": 1.1318, "step": 1819 }, { "epoch": 0.1, "grad_norm": 2.0274078845977783, "learning_rate": 1.971160269489704e-05, "loss": 1.0583, "step": 1820 }, { "epoch": 0.1, "grad_norm": 2.037526845932007, "learning_rate": 1.9711159621434822e-05, "loss": 1.0786, "step": 1821 }, { "epoch": 0.1, "grad_norm": 2.1042792797088623, "learning_rate": 1.971071621286761e-05, "loss": 1.1058, "step": 1822 }, { "epoch": 0.1, "grad_norm": 2.1857566833496094, "learning_rate": 1.9710272469210713e-05, "loss": 1.0701, "step": 1823 }, { "epoch": 0.1, "grad_norm": 1.9370806217193604, "learning_rate": 1.9709828390479436e-05, "loss": 1.0763, "step": 1824 }, { "epoch": 0.1, "grad_norm": 1.6989450454711914, "learning_rate": 1.9709383976689102e-05, "loss": 1.0247, "step": 1825 }, { "epoch": 0.1, "grad_norm": 2.118069648742676, "learning_rate": 1.970893922785505e-05, "loss": 1.2092, "step": 1826 }, { "epoch": 0.1, "grad_norm": 2.1400671005249023, "learning_rate": 1.970849414399263e-05, "loss": 1.0178, "step": 1827 }, { "epoch": 0.1, "grad_norm": 3.1096243858337402, "learning_rate": 1.9708048725117194e-05, "loss": 1.095, "step": 1828 }, { "epoch": 0.1, "grad_norm": 1.9998549222946167, "learning_rate": 1.9707602971244115e-05, "loss": 1.0742, "step": 1829 }, { "epoch": 0.1, "grad_norm": 1.9400320053100586, "learning_rate": 1.9707156882388773e-05, "loss": 0.9742, "step": 1830 }, { "epoch": 0.11, "grad_norm": 2.1331746578216553, "learning_rate": 1.9706710458566564e-05, "loss": 1.0566, "step": 1831 }, { "epoch": 0.11, "grad_norm": 1.9247522354125977, "learning_rate": 1.9706263699792895e-05, "loss": 1.0662, "step": 1832 }, { "epoch": 0.11, "grad_norm": 1.9742368459701538, "learning_rate": 1.970581660608317e-05, "loss": 1.0608, "step": 1833 }, { "epoch": 0.11, "grad_norm": 2.023930072784424, "learning_rate": 1.9705369177452835e-05, "loss": 0.9979, "step": 1834 }, { "epoch": 0.11, "grad_norm": 1.9788657426834106, "learning_rate": 1.970492141391732e-05, "loss": 1.0738, "step": 1835 }, { "epoch": 0.11, "grad_norm": 1.9974464178085327, "learning_rate": 1.9704473315492072e-05, "loss": 1.059, "step": 1836 }, { "epoch": 0.11, "grad_norm": 2.179856777191162, "learning_rate": 1.9704024882192562e-05, "loss": 1.1156, "step": 1837 }, { "epoch": 0.11, "grad_norm": 1.971026062965393, "learning_rate": 1.9703576114034257e-05, "loss": 1.1142, "step": 1838 }, { "epoch": 0.11, "grad_norm": 1.9058090448379517, "learning_rate": 1.9703127011032646e-05, "loss": 1.0117, "step": 1839 }, { "epoch": 0.11, "grad_norm": 1.9456253051757812, "learning_rate": 1.9702677573203232e-05, "loss": 1.0769, "step": 1840 }, { "epoch": 0.11, "grad_norm": 2.038161039352417, "learning_rate": 1.9702227800561514e-05, "loss": 1.1014, "step": 1841 }, { "epoch": 0.11, "grad_norm": 2.078125476837158, "learning_rate": 1.9701777693123017e-05, "loss": 1.141, "step": 1842 }, { "epoch": 0.11, "grad_norm": 2.029832124710083, "learning_rate": 1.9701327250903273e-05, "loss": 1.1286, "step": 1843 }, { "epoch": 0.11, "grad_norm": 1.8729559183120728, "learning_rate": 1.9700876473917825e-05, "loss": 1.1493, "step": 1844 }, { "epoch": 0.11, "grad_norm": 2.325866222381592, "learning_rate": 1.970042536218223e-05, "loss": 1.1554, "step": 1845 }, { "epoch": 0.11, "grad_norm": 2.217384099960327, "learning_rate": 1.9699973915712046e-05, "loss": 1.1087, "step": 1846 }, { "epoch": 0.11, "grad_norm": 2.1590614318847656, "learning_rate": 1.9699522134522866e-05, "loss": 1.0392, "step": 1847 }, { "epoch": 0.11, "grad_norm": 1.8054181337356567, "learning_rate": 1.969907001863027e-05, "loss": 1.0825, "step": 1848 }, { "epoch": 0.11, "grad_norm": 2.164466619491577, "learning_rate": 1.9698617568049857e-05, "loss": 1.1268, "step": 1849 }, { "epoch": 0.11, "grad_norm": 1.7488116025924683, "learning_rate": 1.9698164782797247e-05, "loss": 1.0256, "step": 1850 }, { "epoch": 0.11, "grad_norm": 2.063260078430176, "learning_rate": 1.9697711662888062e-05, "loss": 1.128, "step": 1851 }, { "epoch": 0.11, "grad_norm": 2.043689727783203, "learning_rate": 1.9697258208337935e-05, "loss": 1.1244, "step": 1852 }, { "epoch": 0.11, "grad_norm": 1.1509673595428467, "learning_rate": 1.9696804419162513e-05, "loss": 0.5746, "step": 1853 }, { "epoch": 0.11, "grad_norm": 1.0850212574005127, "learning_rate": 1.969635029537746e-05, "loss": 0.6038, "step": 1854 }, { "epoch": 0.11, "grad_norm": 2.0359199047088623, "learning_rate": 1.9695895836998448e-05, "loss": 1.1049, "step": 1855 }, { "epoch": 0.11, "grad_norm": 2.0931057929992676, "learning_rate": 1.969544104404115e-05, "loss": 1.0613, "step": 1856 }, { "epoch": 0.11, "grad_norm": 2.351095676422119, "learning_rate": 1.9694985916521266e-05, "loss": 1.1317, "step": 1857 }, { "epoch": 0.11, "grad_norm": 1.973114252090454, "learning_rate": 1.96945304544545e-05, "loss": 1.1384, "step": 1858 }, { "epoch": 0.11, "grad_norm": 1.8562629222869873, "learning_rate": 1.969407465785657e-05, "loss": 1.0101, "step": 1859 }, { "epoch": 0.11, "grad_norm": 2.224827766418457, "learning_rate": 1.96936185267432e-05, "loss": 1.1184, "step": 1860 }, { "epoch": 0.11, "grad_norm": 2.1688177585601807, "learning_rate": 1.969316206113013e-05, "loss": 1.0626, "step": 1861 }, { "epoch": 0.11, "grad_norm": 2.143812417984009, "learning_rate": 1.969270526103312e-05, "loss": 1.0885, "step": 1862 }, { "epoch": 0.11, "grad_norm": 1.8832557201385498, "learning_rate": 1.9692248126467923e-05, "loss": 1.0971, "step": 1863 }, { "epoch": 0.11, "grad_norm": 2.322359561920166, "learning_rate": 1.9691790657450324e-05, "loss": 1.1657, "step": 1864 }, { "epoch": 0.11, "grad_norm": 2.358800172805786, "learning_rate": 1.9691332853996093e-05, "loss": 1.0904, "step": 1865 }, { "epoch": 0.11, "grad_norm": 1.9578733444213867, "learning_rate": 1.969087471612104e-05, "loss": 1.0162, "step": 1866 }, { "epoch": 0.11, "grad_norm": 2.180544137954712, "learning_rate": 1.9690416243840976e-05, "loss": 1.0867, "step": 1867 }, { "epoch": 0.11, "grad_norm": 2.208935022354126, "learning_rate": 1.968995743717171e-05, "loss": 1.1689, "step": 1868 }, { "epoch": 0.11, "grad_norm": 2.13218355178833, "learning_rate": 1.9689498296129084e-05, "loss": 1.0683, "step": 1869 }, { "epoch": 0.11, "grad_norm": 2.3065497875213623, "learning_rate": 1.968903882072894e-05, "loss": 1.1383, "step": 1870 }, { "epoch": 0.11, "grad_norm": 2.1199758052825928, "learning_rate": 1.968857901098713e-05, "loss": 1.0516, "step": 1871 }, { "epoch": 0.11, "grad_norm": 2.2501637935638428, "learning_rate": 1.968811886691952e-05, "loss": 1.052, "step": 1872 }, { "epoch": 0.11, "grad_norm": 1.8632465600967407, "learning_rate": 1.968765838854199e-05, "loss": 1.0023, "step": 1873 }, { "epoch": 0.11, "grad_norm": 1.9477862119674683, "learning_rate": 1.9687197575870435e-05, "loss": 1.1966, "step": 1874 }, { "epoch": 0.11, "grad_norm": 2.224736452102661, "learning_rate": 1.9686736428920748e-05, "loss": 1.0793, "step": 1875 }, { "epoch": 0.11, "grad_norm": 1.985801100730896, "learning_rate": 1.9686274947708848e-05, "loss": 1.1082, "step": 1876 }, { "epoch": 0.11, "grad_norm": 2.041146755218506, "learning_rate": 1.9685813132250655e-05, "loss": 1.166, "step": 1877 }, { "epoch": 0.11, "grad_norm": 1.9730193614959717, "learning_rate": 1.968535098256211e-05, "loss": 1.0668, "step": 1878 }, { "epoch": 0.11, "grad_norm": 2.007770299911499, "learning_rate": 1.9684888498659154e-05, "loss": 1.1746, "step": 1879 }, { "epoch": 0.11, "grad_norm": 2.2531676292419434, "learning_rate": 1.968442568055775e-05, "loss": 1.0496, "step": 1880 }, { "epoch": 0.11, "grad_norm": 2.1449577808380127, "learning_rate": 1.968396252827387e-05, "loss": 1.1088, "step": 1881 }, { "epoch": 0.11, "grad_norm": 2.030939817428589, "learning_rate": 1.9683499041823495e-05, "loss": 1.1058, "step": 1882 }, { "epoch": 0.11, "grad_norm": 1.7645504474639893, "learning_rate": 1.9683035221222617e-05, "loss": 0.601, "step": 1883 }, { "epoch": 0.11, "grad_norm": 2.0631253719329834, "learning_rate": 1.9682571066487242e-05, "loss": 1.1194, "step": 1884 }, { "epoch": 0.11, "grad_norm": 1.9948899745941162, "learning_rate": 1.9682106577633385e-05, "loss": 1.0699, "step": 1885 }, { "epoch": 0.11, "grad_norm": 2.2630064487457275, "learning_rate": 1.9681641754677076e-05, "loss": 1.1082, "step": 1886 }, { "epoch": 0.11, "grad_norm": 2.015381336212158, "learning_rate": 1.9681176597634353e-05, "loss": 1.0819, "step": 1887 }, { "epoch": 0.11, "grad_norm": 1.3420841693878174, "learning_rate": 1.9680711106521274e-05, "loss": 0.6015, "step": 1888 }, { "epoch": 0.11, "grad_norm": 2.021106719970703, "learning_rate": 1.9680245281353894e-05, "loss": 1.1318, "step": 1889 }, { "epoch": 0.11, "grad_norm": 2.0298774242401123, "learning_rate": 1.967977912214829e-05, "loss": 1.1653, "step": 1890 }, { "epoch": 0.11, "grad_norm": 2.0051352977752686, "learning_rate": 1.9679312628920546e-05, "loss": 1.1263, "step": 1891 }, { "epoch": 0.11, "grad_norm": 2.2140235900878906, "learning_rate": 1.9678845801686766e-05, "loss": 1.1582, "step": 1892 }, { "epoch": 0.11, "grad_norm": 2.0819430351257324, "learning_rate": 1.9678378640463053e-05, "loss": 1.0603, "step": 1893 }, { "epoch": 0.11, "grad_norm": 2.0939204692840576, "learning_rate": 1.9677911145265524e-05, "loss": 1.0927, "step": 1894 }, { "epoch": 0.11, "grad_norm": 1.9242463111877441, "learning_rate": 1.9677443316110317e-05, "loss": 1.0455, "step": 1895 }, { "epoch": 0.11, "grad_norm": 1.8855524063110352, "learning_rate": 1.9676975153013574e-05, "loss": 1.0853, "step": 1896 }, { "epoch": 0.11, "grad_norm": 1.9202202558517456, "learning_rate": 1.9676506655991453e-05, "loss": 1.0659, "step": 1897 }, { "epoch": 0.11, "grad_norm": 2.2485251426696777, "learning_rate": 1.9676037825060117e-05, "loss": 1.0615, "step": 1898 }, { "epoch": 0.11, "grad_norm": 1.113287091255188, "learning_rate": 1.9675568660235746e-05, "loss": 0.5635, "step": 1899 }, { "epoch": 0.11, "grad_norm": 2.0088906288146973, "learning_rate": 1.9675099161534524e-05, "loss": 1.0554, "step": 1900 }, { "epoch": 0.11, "grad_norm": 1.8528177738189697, "learning_rate": 1.9674629328972657e-05, "loss": 1.1226, "step": 1901 }, { "epoch": 0.11, "grad_norm": 1.98314368724823, "learning_rate": 1.967415916256636e-05, "loss": 1.0786, "step": 1902 }, { "epoch": 0.11, "grad_norm": 1.992766261100769, "learning_rate": 1.9673688662331848e-05, "loss": 1.0806, "step": 1903 }, { "epoch": 0.11, "grad_norm": 2.0704426765441895, "learning_rate": 1.967321782828537e-05, "loss": 1.0969, "step": 1904 }, { "epoch": 0.11, "grad_norm": 3.5542867183685303, "learning_rate": 1.967274666044316e-05, "loss": 1.0653, "step": 1905 }, { "epoch": 0.11, "grad_norm": 2.056108236312866, "learning_rate": 1.9672275158821486e-05, "loss": 1.0861, "step": 1906 }, { "epoch": 0.11, "grad_norm": 2.040116548538208, "learning_rate": 1.9671803323436612e-05, "loss": 1.0995, "step": 1907 }, { "epoch": 0.11, "grad_norm": 2.223264694213867, "learning_rate": 1.9671331154304823e-05, "loss": 1.0498, "step": 1908 }, { "epoch": 0.11, "grad_norm": 2.055952548980713, "learning_rate": 1.967085865144241e-05, "loss": 1.1523, "step": 1909 }, { "epoch": 0.11, "grad_norm": 2.131486654281616, "learning_rate": 1.9670385814865685e-05, "loss": 1.1237, "step": 1910 }, { "epoch": 0.11, "grad_norm": 2.133578062057495, "learning_rate": 1.9669912644590954e-05, "loss": 1.0621, "step": 1911 }, { "epoch": 0.11, "grad_norm": 2.0090739727020264, "learning_rate": 1.9669439140634552e-05, "loss": 1.0827, "step": 1912 }, { "epoch": 0.11, "grad_norm": 1.9790887832641602, "learning_rate": 1.9668965303012815e-05, "loss": 1.119, "step": 1913 }, { "epoch": 0.11, "grad_norm": 2.1349194049835205, "learning_rate": 1.9668491131742092e-05, "loss": 1.0418, "step": 1914 }, { "epoch": 0.11, "grad_norm": 2.134464979171753, "learning_rate": 1.9668016626838753e-05, "loss": 1.0664, "step": 1915 }, { "epoch": 0.11, "grad_norm": 2.0129239559173584, "learning_rate": 1.966754178831916e-05, "loss": 1.2199, "step": 1916 }, { "epoch": 0.11, "grad_norm": 1.9092477560043335, "learning_rate": 1.9667066616199712e-05, "loss": 1.0602, "step": 1917 }, { "epoch": 0.11, "grad_norm": 2.0964646339416504, "learning_rate": 1.9666591110496794e-05, "loss": 1.006, "step": 1918 }, { "epoch": 0.11, "grad_norm": 2.0655882358551025, "learning_rate": 1.9666115271226823e-05, "loss": 1.1782, "step": 1919 }, { "epoch": 0.11, "grad_norm": 2.125945806503296, "learning_rate": 1.9665639098406215e-05, "loss": 1.0972, "step": 1920 }, { "epoch": 0.11, "grad_norm": 2.007563591003418, "learning_rate": 1.9665162592051397e-05, "loss": 1.048, "step": 1921 }, { "epoch": 0.11, "grad_norm": 1.987390160560608, "learning_rate": 1.9664685752178817e-05, "loss": 1.0839, "step": 1922 }, { "epoch": 0.11, "grad_norm": 2.0545575618743896, "learning_rate": 1.9664208578804934e-05, "loss": 1.0552, "step": 1923 }, { "epoch": 0.11, "grad_norm": 1.9538164138793945, "learning_rate": 1.9663731071946207e-05, "loss": 1.0589, "step": 1924 }, { "epoch": 0.11, "grad_norm": 2.092909574508667, "learning_rate": 1.9663253231619113e-05, "loss": 1.0941, "step": 1925 }, { "epoch": 0.11, "grad_norm": 2.1029818058013916, "learning_rate": 1.9662775057840145e-05, "loss": 1.122, "step": 1926 }, { "epoch": 0.11, "grad_norm": 2.0009849071502686, "learning_rate": 1.96622965506258e-05, "loss": 1.08, "step": 1927 }, { "epoch": 0.11, "grad_norm": 1.9353209733963013, "learning_rate": 1.9661817709992593e-05, "loss": 1.0749, "step": 1928 }, { "epoch": 0.11, "grad_norm": 1.9612815380096436, "learning_rate": 1.9661338535957046e-05, "loss": 1.0463, "step": 1929 }, { "epoch": 0.11, "grad_norm": 1.9495552778244019, "learning_rate": 1.9660859028535694e-05, "loss": 1.0123, "step": 1930 }, { "epoch": 0.11, "grad_norm": 2.0252058506011963, "learning_rate": 1.966037918774508e-05, "loss": 1.0127, "step": 1931 }, { "epoch": 0.11, "grad_norm": 2.110083818435669, "learning_rate": 1.9659899013601772e-05, "loss": 1.0423, "step": 1932 }, { "epoch": 0.11, "grad_norm": 2.054593801498413, "learning_rate": 1.9659418506122328e-05, "loss": 1.0695, "step": 1933 }, { "epoch": 0.11, "grad_norm": 1.9878177642822266, "learning_rate": 1.9658937665323337e-05, "loss": 1.1189, "step": 1934 }, { "epoch": 0.11, "grad_norm": 2.144376516342163, "learning_rate": 1.9658456491221387e-05, "loss": 1.106, "step": 1935 }, { "epoch": 0.11, "grad_norm": 2.2118537425994873, "learning_rate": 1.965797498383308e-05, "loss": 1.0582, "step": 1936 }, { "epoch": 0.11, "grad_norm": 1.9113328456878662, "learning_rate": 1.965749314317504e-05, "loss": 1.1397, "step": 1937 }, { "epoch": 0.11, "grad_norm": 1.8301200866699219, "learning_rate": 1.9657010969263887e-05, "loss": 1.0227, "step": 1938 }, { "epoch": 0.11, "grad_norm": 2.0810914039611816, "learning_rate": 1.965652846211626e-05, "loss": 1.0967, "step": 1939 }, { "epoch": 0.11, "grad_norm": 1.9043028354644775, "learning_rate": 1.965604562174881e-05, "loss": 1.0748, "step": 1940 }, { "epoch": 0.11, "grad_norm": 1.9406917095184326, "learning_rate": 1.96555624481782e-05, "loss": 0.9878, "step": 1941 }, { "epoch": 0.11, "grad_norm": 2.1192684173583984, "learning_rate": 1.96550789414211e-05, "loss": 1.0755, "step": 1942 }, { "epoch": 0.11, "grad_norm": 1.9011651277542114, "learning_rate": 1.9654595101494198e-05, "loss": 1.0645, "step": 1943 }, { "epoch": 0.11, "grad_norm": 1.8229209184646606, "learning_rate": 1.965411092841419e-05, "loss": 1.0192, "step": 1944 }, { "epoch": 0.11, "grad_norm": 1.9243736267089844, "learning_rate": 1.9653626422197778e-05, "loss": 1.1633, "step": 1945 }, { "epoch": 0.11, "grad_norm": 1.8593775033950806, "learning_rate": 1.9653141582861683e-05, "loss": 1.0684, "step": 1946 }, { "epoch": 0.11, "grad_norm": 1.969380259513855, "learning_rate": 1.965265641042264e-05, "loss": 1.137, "step": 1947 }, { "epoch": 0.11, "grad_norm": 1.909165620803833, "learning_rate": 1.965217090489739e-05, "loss": 1.0478, "step": 1948 }, { "epoch": 0.11, "grad_norm": 2.000683307647705, "learning_rate": 1.965168506630268e-05, "loss": 1.0516, "step": 1949 }, { "epoch": 0.11, "grad_norm": 1.9607040882110596, "learning_rate": 1.9651198894655278e-05, "loss": 1.1354, "step": 1950 }, { "epoch": 0.11, "grad_norm": 1.953071117401123, "learning_rate": 1.9650712389971964e-05, "loss": 1.0954, "step": 1951 }, { "epoch": 0.11, "grad_norm": 2.007289171218872, "learning_rate": 1.9650225552269526e-05, "loss": 1.0816, "step": 1952 }, { "epoch": 0.11, "grad_norm": 1.94691801071167, "learning_rate": 1.964973838156476e-05, "loss": 1.0984, "step": 1953 }, { "epoch": 0.11, "grad_norm": 1.9262628555297852, "learning_rate": 1.9649250877874476e-05, "loss": 1.0559, "step": 1954 }, { "epoch": 0.11, "grad_norm": 1.9184095859527588, "learning_rate": 1.96487630412155e-05, "loss": 1.0098, "step": 1955 }, { "epoch": 0.11, "grad_norm": 2.0065128803253174, "learning_rate": 1.9648274871604663e-05, "loss": 1.0948, "step": 1956 }, { "epoch": 0.11, "grad_norm": 1.178722858428955, "learning_rate": 1.964778636905881e-05, "loss": 0.598, "step": 1957 }, { "epoch": 0.11, "grad_norm": 1.8920912742614746, "learning_rate": 1.96472975335948e-05, "loss": 1.0701, "step": 1958 }, { "epoch": 0.11, "grad_norm": 1.931671142578125, "learning_rate": 1.9646808365229506e-05, "loss": 1.137, "step": 1959 }, { "epoch": 0.11, "grad_norm": 1.992349624633789, "learning_rate": 1.9646318863979797e-05, "loss": 1.0803, "step": 1960 }, { "epoch": 0.11, "grad_norm": 1.033719778060913, "learning_rate": 1.964582902986257e-05, "loss": 0.5966, "step": 1961 }, { "epoch": 0.11, "grad_norm": 2.057173013687134, "learning_rate": 1.964533886289473e-05, "loss": 1.1329, "step": 1962 }, { "epoch": 0.11, "grad_norm": 2.076580286026001, "learning_rate": 1.964484836309319e-05, "loss": 1.0282, "step": 1963 }, { "epoch": 0.11, "grad_norm": 1.943109393119812, "learning_rate": 1.9644357530474875e-05, "loss": 1.1523, "step": 1964 }, { "epoch": 0.11, "grad_norm": 1.162402629852295, "learning_rate": 1.964386636505672e-05, "loss": 0.6459, "step": 1965 }, { "epoch": 0.11, "grad_norm": 1.960707426071167, "learning_rate": 1.9643374866855674e-05, "loss": 1.07, "step": 1966 }, { "epoch": 0.11, "grad_norm": 1.9734525680541992, "learning_rate": 1.96428830358887e-05, "loss": 1.0551, "step": 1967 }, { "epoch": 0.11, "grad_norm": 1.0760449171066284, "learning_rate": 1.9642390872172773e-05, "loss": 0.6081, "step": 1968 }, { "epoch": 0.11, "grad_norm": 1.9683618545532227, "learning_rate": 1.964189837572487e-05, "loss": 1.1434, "step": 1969 }, { "epoch": 0.11, "grad_norm": 1.862407922744751, "learning_rate": 1.9641405546561984e-05, "loss": 1.0504, "step": 1970 }, { "epoch": 0.11, "grad_norm": 1.9058871269226074, "learning_rate": 1.9640912384701124e-05, "loss": 1.0426, "step": 1971 }, { "epoch": 0.11, "grad_norm": 2.0674712657928467, "learning_rate": 1.9640418890159313e-05, "loss": 0.9859, "step": 1972 }, { "epoch": 0.11, "grad_norm": 1.9644389152526855, "learning_rate": 1.9639925062953576e-05, "loss": 1.0761, "step": 1973 }, { "epoch": 0.11, "grad_norm": 1.9584554433822632, "learning_rate": 1.963943090310095e-05, "loss": 1.1416, "step": 1974 }, { "epoch": 0.11, "grad_norm": 1.9575107097625732, "learning_rate": 1.963893641061849e-05, "loss": 1.0598, "step": 1975 }, { "epoch": 0.11, "grad_norm": 1.8889191150665283, "learning_rate": 1.963844158552326e-05, "loss": 1.108, "step": 1976 }, { "epoch": 0.11, "grad_norm": 2.106147289276123, "learning_rate": 1.9637946427832337e-05, "loss": 1.0572, "step": 1977 }, { "epoch": 0.11, "grad_norm": 1.8799062967300415, "learning_rate": 1.9637450937562805e-05, "loss": 1.028, "step": 1978 }, { "epoch": 0.11, "grad_norm": 1.9847042560577393, "learning_rate": 1.963695511473176e-05, "loss": 1.0838, "step": 1979 }, { "epoch": 0.11, "grad_norm": 1.9777265787124634, "learning_rate": 1.963645895935632e-05, "loss": 1.1227, "step": 1980 }, { "epoch": 0.11, "grad_norm": 1.9350816011428833, "learning_rate": 1.963596247145359e-05, "loss": 1.0609, "step": 1981 }, { "epoch": 0.11, "grad_norm": 2.146388292312622, "learning_rate": 1.9635465651040717e-05, "loss": 1.0326, "step": 1982 }, { "epoch": 0.11, "grad_norm": 1.9863266944885254, "learning_rate": 1.963496849813484e-05, "loss": 1.1206, "step": 1983 }, { "epoch": 0.11, "grad_norm": 1.9964183568954468, "learning_rate": 1.9634471012753115e-05, "loss": 1.1081, "step": 1984 }, { "epoch": 0.11, "grad_norm": 1.8813986778259277, "learning_rate": 1.9633973194912708e-05, "loss": 1.0745, "step": 1985 }, { "epoch": 0.11, "grad_norm": 1.8746603727340698, "learning_rate": 1.9633475044630795e-05, "loss": 1.024, "step": 1986 }, { "epoch": 0.11, "grad_norm": 1.8395682573318481, "learning_rate": 1.9632976561924572e-05, "loss": 1.0373, "step": 1987 }, { "epoch": 0.11, "grad_norm": 1.8946608304977417, "learning_rate": 1.9632477746811232e-05, "loss": 0.9967, "step": 1988 }, { "epoch": 0.11, "grad_norm": 2.043721914291382, "learning_rate": 1.9631978599308e-05, "loss": 1.0953, "step": 1989 }, { "epoch": 0.11, "grad_norm": 1.9646673202514648, "learning_rate": 1.9631479119432085e-05, "loss": 1.1034, "step": 1990 }, { "epoch": 0.11, "grad_norm": 2.1654207706451416, "learning_rate": 1.9630979307200732e-05, "loss": 1.0211, "step": 1991 }, { "epoch": 0.11, "grad_norm": 2.406587600708008, "learning_rate": 1.9630479162631183e-05, "loss": 1.0901, "step": 1992 }, { "epoch": 0.11, "grad_norm": 2.0283422470092773, "learning_rate": 1.9629978685740706e-05, "loss": 1.1152, "step": 1993 }, { "epoch": 0.11, "grad_norm": 2.0785393714904785, "learning_rate": 1.962947787654656e-05, "loss": 1.1314, "step": 1994 }, { "epoch": 0.11, "grad_norm": 1.983107089996338, "learning_rate": 1.9628976735066036e-05, "loss": 1.0518, "step": 1995 }, { "epoch": 0.11, "grad_norm": 1.92808198928833, "learning_rate": 1.962847526131642e-05, "loss": 1.1066, "step": 1996 }, { "epoch": 0.11, "grad_norm": 2.173936367034912, "learning_rate": 1.9627973455315014e-05, "loss": 1.1249, "step": 1997 }, { "epoch": 0.11, "grad_norm": 1.9850523471832275, "learning_rate": 1.9627471317079146e-05, "loss": 1.1744, "step": 1998 }, { "epoch": 0.11, "grad_norm": 1.9090620279312134, "learning_rate": 1.9626968846626134e-05, "loss": 1.0545, "step": 1999 }, { "epoch": 0.11, "grad_norm": 1.2168614864349365, "learning_rate": 1.962646604397332e-05, "loss": 0.662, "step": 2000 }, { "epoch": 0.11, "grad_norm": 2.0397729873657227, "learning_rate": 1.9625962909138048e-05, "loss": 1.1416, "step": 2001 }, { "epoch": 0.11, "grad_norm": 2.2100512981414795, "learning_rate": 1.9625459442137688e-05, "loss": 1.1167, "step": 2002 }, { "epoch": 0.11, "grad_norm": 2.1123437881469727, "learning_rate": 1.962495564298961e-05, "loss": 1.1972, "step": 2003 }, { "epoch": 0.11, "grad_norm": 1.799432396888733, "learning_rate": 1.96244515117112e-05, "loss": 1.1289, "step": 2004 }, { "epoch": 0.11, "grad_norm": 2.344395160675049, "learning_rate": 1.9623947048319854e-05, "loss": 1.1671, "step": 2005 }, { "epoch": 0.12, "grad_norm": 1.9976422786712646, "learning_rate": 1.962344225283298e-05, "loss": 1.0938, "step": 2006 }, { "epoch": 0.12, "grad_norm": 2.09765625, "learning_rate": 1.962293712526799e-05, "loss": 1.0949, "step": 2007 }, { "epoch": 0.12, "grad_norm": 1.8186978101730347, "learning_rate": 1.9622431665642324e-05, "loss": 1.0912, "step": 2008 }, { "epoch": 0.12, "grad_norm": 1.8941640853881836, "learning_rate": 1.962192587397342e-05, "loss": 1.0673, "step": 2009 }, { "epoch": 0.12, "grad_norm": 2.1862854957580566, "learning_rate": 1.9621419750278732e-05, "loss": 1.0662, "step": 2010 }, { "epoch": 0.12, "grad_norm": 2.113650321960449, "learning_rate": 1.9620913294575724e-05, "loss": 1.0753, "step": 2011 }, { "epoch": 0.12, "grad_norm": 2.036569118499756, "learning_rate": 1.9620406506881876e-05, "loss": 1.0115, "step": 2012 }, { "epoch": 0.12, "grad_norm": 1.9064944982528687, "learning_rate": 1.961989938721467e-05, "loss": 1.1051, "step": 2013 }, { "epoch": 0.12, "grad_norm": 1.9965492486953735, "learning_rate": 1.961939193559161e-05, "loss": 1.0674, "step": 2014 }, { "epoch": 0.12, "grad_norm": 2.1026313304901123, "learning_rate": 1.9618884152030206e-05, "loss": 1.0898, "step": 2015 }, { "epoch": 0.12, "grad_norm": 2.073885202407837, "learning_rate": 1.9618376036547974e-05, "loss": 1.0513, "step": 2016 }, { "epoch": 0.12, "grad_norm": 2.2151987552642822, "learning_rate": 1.9617867589162457e-05, "loss": 1.0662, "step": 2017 }, { "epoch": 0.12, "grad_norm": 2.0801613330841064, "learning_rate": 1.9617358809891198e-05, "loss": 1.0828, "step": 2018 }, { "epoch": 0.12, "grad_norm": 1.9782872200012207, "learning_rate": 1.9616849698751748e-05, "loss": 1.1069, "step": 2019 }, { "epoch": 0.12, "grad_norm": 2.11164927482605, "learning_rate": 1.9616340255761676e-05, "loss": 1.1182, "step": 2020 }, { "epoch": 0.12, "grad_norm": 2.3223094940185547, "learning_rate": 1.961583048093857e-05, "loss": 1.12, "step": 2021 }, { "epoch": 0.12, "grad_norm": 2.186772346496582, "learning_rate": 1.961532037430001e-05, "loss": 1.0665, "step": 2022 }, { "epoch": 0.12, "grad_norm": 2.7606115341186523, "learning_rate": 1.961480993586361e-05, "loss": 1.099, "step": 2023 }, { "epoch": 0.12, "grad_norm": 2.172051191329956, "learning_rate": 1.961429916564697e-05, "loss": 1.0889, "step": 2024 }, { "epoch": 0.12, "grad_norm": 2.078150987625122, "learning_rate": 1.9613788063667722e-05, "loss": 1.0649, "step": 2025 }, { "epoch": 0.12, "grad_norm": 2.193549394607544, "learning_rate": 1.9613276629943504e-05, "loss": 1.1117, "step": 2026 }, { "epoch": 0.12, "grad_norm": 2.124544143676758, "learning_rate": 1.9612764864491968e-05, "loss": 1.0571, "step": 2027 }, { "epoch": 0.12, "grad_norm": 2.6797614097595215, "learning_rate": 1.9612252767330763e-05, "loss": 1.0941, "step": 2028 }, { "epoch": 0.12, "grad_norm": 2.235015392303467, "learning_rate": 1.961174033847757e-05, "loss": 1.1047, "step": 2029 }, { "epoch": 0.12, "grad_norm": 2.124994993209839, "learning_rate": 1.9611227577950065e-05, "loss": 1.0651, "step": 2030 }, { "epoch": 0.12, "grad_norm": 1.851344347000122, "learning_rate": 1.961071448576594e-05, "loss": 1.0507, "step": 2031 }, { "epoch": 0.12, "grad_norm": 2.0498554706573486, "learning_rate": 1.9610201061942913e-05, "loss": 1.095, "step": 2032 }, { "epoch": 0.12, "grad_norm": 2.255980968475342, "learning_rate": 1.9609687306498686e-05, "loss": 1.0895, "step": 2033 }, { "epoch": 0.12, "grad_norm": 1.180277943611145, "learning_rate": 1.9609173219450998e-05, "loss": 0.6238, "step": 2034 }, { "epoch": 0.12, "grad_norm": 2.249208688735962, "learning_rate": 1.9608658800817582e-05, "loss": 1.1537, "step": 2035 }, { "epoch": 0.12, "grad_norm": 1.799227237701416, "learning_rate": 1.9608144050616192e-05, "loss": 1.0423, "step": 2036 }, { "epoch": 0.12, "grad_norm": 2.0315423011779785, "learning_rate": 1.9607628968864588e-05, "loss": 1.0973, "step": 2037 }, { "epoch": 0.12, "grad_norm": 1.9563486576080322, "learning_rate": 1.9607113555580548e-05, "loss": 1.1245, "step": 2038 }, { "epoch": 0.12, "grad_norm": 2.1117281913757324, "learning_rate": 1.9606597810781856e-05, "loss": 1.0557, "step": 2039 }, { "epoch": 0.12, "grad_norm": 2.2255120277404785, "learning_rate": 1.9606081734486307e-05, "loss": 1.248, "step": 2040 }, { "epoch": 0.12, "grad_norm": 1.9935283660888672, "learning_rate": 1.9605565326711712e-05, "loss": 1.109, "step": 2041 }, { "epoch": 0.12, "grad_norm": 2.140305995941162, "learning_rate": 1.960504858747589e-05, "loss": 1.1328, "step": 2042 }, { "epoch": 0.12, "grad_norm": 1.9693390130996704, "learning_rate": 1.960453151679667e-05, "loss": 1.1152, "step": 2043 }, { "epoch": 0.12, "grad_norm": 2.154205322265625, "learning_rate": 1.96040141146919e-05, "loss": 1.0748, "step": 2044 }, { "epoch": 0.12, "grad_norm": 1.9009878635406494, "learning_rate": 1.9603496381179428e-05, "loss": 1.1232, "step": 2045 }, { "epoch": 0.12, "grad_norm": 1.2208219766616821, "learning_rate": 1.9602978316277124e-05, "loss": 0.6276, "step": 2046 }, { "epoch": 0.12, "grad_norm": 1.9022663831710815, "learning_rate": 1.9602459920002862e-05, "loss": 1.0385, "step": 2047 }, { "epoch": 0.12, "grad_norm": 2.243781566619873, "learning_rate": 1.960194119237453e-05, "loss": 1.1051, "step": 2048 }, { "epoch": 0.12, "grad_norm": 2.0564043521881104, "learning_rate": 1.9601422133410032e-05, "loss": 1.0592, "step": 2049 }, { "epoch": 0.12, "grad_norm": 1.875037431716919, "learning_rate": 1.9600902743127276e-05, "loss": 1.0151, "step": 2050 }, { "epoch": 0.12, "grad_norm": 2.07086181640625, "learning_rate": 1.960038302154418e-05, "loss": 1.0879, "step": 2051 }, { "epoch": 0.12, "grad_norm": 1.8443243503570557, "learning_rate": 1.9599862968678687e-05, "loss": 1.0805, "step": 2052 }, { "epoch": 0.12, "grad_norm": 2.0580105781555176, "learning_rate": 1.9599342584548745e-05, "loss": 1.1073, "step": 2053 }, { "epoch": 0.12, "grad_norm": 2.344372034072876, "learning_rate": 1.9598821869172298e-05, "loss": 1.1261, "step": 2054 }, { "epoch": 0.12, "grad_norm": 1.9451229572296143, "learning_rate": 1.9598300822567324e-05, "loss": 1.1401, "step": 2055 }, { "epoch": 0.12, "grad_norm": 2.053720474243164, "learning_rate": 1.95977794447518e-05, "loss": 1.0937, "step": 2056 }, { "epoch": 0.12, "grad_norm": 1.9877467155456543, "learning_rate": 1.959725773574372e-05, "loss": 0.986, "step": 2057 }, { "epoch": 0.12, "grad_norm": 1.7685565948486328, "learning_rate": 1.9596735695561082e-05, "loss": 1.0411, "step": 2058 }, { "epoch": 0.12, "grad_norm": 1.957061529159546, "learning_rate": 1.95962133242219e-05, "loss": 1.0615, "step": 2059 }, { "epoch": 0.12, "grad_norm": 1.9303545951843262, "learning_rate": 1.9595690621744208e-05, "loss": 1.0526, "step": 2060 }, { "epoch": 0.12, "grad_norm": 2.1168856620788574, "learning_rate": 1.9595167588146036e-05, "loss": 0.9692, "step": 2061 }, { "epoch": 0.12, "grad_norm": 1.991184949874878, "learning_rate": 1.9594644223445432e-05, "loss": 1.036, "step": 2062 }, { "epoch": 0.12, "grad_norm": 1.9844151735305786, "learning_rate": 1.9594120527660453e-05, "loss": 0.9962, "step": 2063 }, { "epoch": 0.12, "grad_norm": 1.9535188674926758, "learning_rate": 1.9593596500809183e-05, "loss": 1.0292, "step": 2064 }, { "epoch": 0.12, "grad_norm": 2.180851459503174, "learning_rate": 1.9593072142909692e-05, "loss": 1.1836, "step": 2065 }, { "epoch": 0.12, "grad_norm": 2.012561321258545, "learning_rate": 1.9592547453980076e-05, "loss": 1.0704, "step": 2066 }, { "epoch": 0.12, "grad_norm": 1.056328296661377, "learning_rate": 1.9592022434038447e-05, "loss": 0.5338, "step": 2067 }, { "epoch": 0.12, "grad_norm": 2.183631420135498, "learning_rate": 1.9591497083102916e-05, "loss": 1.1009, "step": 2068 }, { "epoch": 0.12, "grad_norm": 1.96914803981781, "learning_rate": 1.9590971401191616e-05, "loss": 1.0772, "step": 2069 }, { "epoch": 0.12, "grad_norm": 1.887779712677002, "learning_rate": 1.959044538832268e-05, "loss": 1.1447, "step": 2070 }, { "epoch": 0.12, "grad_norm": 2.3084380626678467, "learning_rate": 1.9589919044514267e-05, "loss": 1.0451, "step": 2071 }, { "epoch": 0.12, "grad_norm": 2.010209083557129, "learning_rate": 1.9589392369784536e-05, "loss": 1.1424, "step": 2072 }, { "epoch": 0.12, "grad_norm": 1.9774534702301025, "learning_rate": 1.958886536415166e-05, "loss": 1.027, "step": 2073 }, { "epoch": 0.12, "grad_norm": 1.9513734579086304, "learning_rate": 1.9588338027633824e-05, "loss": 1.0508, "step": 2074 }, { "epoch": 0.12, "grad_norm": 2.01263427734375, "learning_rate": 1.9587810360249228e-05, "loss": 1.06, "step": 2075 }, { "epoch": 0.12, "grad_norm": 2.0426251888275146, "learning_rate": 1.9587282362016083e-05, "loss": 1.0911, "step": 2076 }, { "epoch": 0.12, "grad_norm": 1.9721602201461792, "learning_rate": 1.9586754032952598e-05, "loss": 1.1562, "step": 2077 }, { "epoch": 0.12, "grad_norm": 2.0045042037963867, "learning_rate": 1.9586225373077018e-05, "loss": 1.0035, "step": 2078 }, { "epoch": 0.12, "grad_norm": 1.9218757152557373, "learning_rate": 1.9585696382407573e-05, "loss": 1.0527, "step": 2079 }, { "epoch": 0.12, "grad_norm": 1.082410454750061, "learning_rate": 1.9585167060962523e-05, "loss": 0.5503, "step": 2080 }, { "epoch": 0.12, "grad_norm": 1.9107838869094849, "learning_rate": 1.9584637408760133e-05, "loss": 1.068, "step": 2081 }, { "epoch": 0.12, "grad_norm": 2.1332125663757324, "learning_rate": 1.9584107425818682e-05, "loss": 1.1124, "step": 2082 }, { "epoch": 0.12, "grad_norm": 1.8978484869003296, "learning_rate": 1.9583577112156456e-05, "loss": 1.0579, "step": 2083 }, { "epoch": 0.12, "grad_norm": 1.9840205907821655, "learning_rate": 1.958304646779175e-05, "loss": 1.0873, "step": 2084 }, { "epoch": 0.12, "grad_norm": 2.0169150829315186, "learning_rate": 1.9582515492742883e-05, "loss": 1.1131, "step": 2085 }, { "epoch": 0.12, "grad_norm": 1.6841520071029663, "learning_rate": 1.9581984187028174e-05, "loss": 1.0539, "step": 2086 }, { "epoch": 0.12, "grad_norm": 1.9439979791641235, "learning_rate": 1.9581452550665956e-05, "loss": 1.1329, "step": 2087 }, { "epoch": 0.12, "grad_norm": 2.186230421066284, "learning_rate": 1.9580920583674573e-05, "loss": 1.121, "step": 2088 }, { "epoch": 0.12, "grad_norm": 1.9828510284423828, "learning_rate": 1.9580388286072388e-05, "loss": 1.103, "step": 2089 }, { "epoch": 0.12, "grad_norm": 1.8692365884780884, "learning_rate": 1.9579855657877763e-05, "loss": 1.0305, "step": 2090 }, { "epoch": 0.12, "grad_norm": 2.121267318725586, "learning_rate": 1.957932269910908e-05, "loss": 1.0995, "step": 2091 }, { "epoch": 0.12, "grad_norm": 1.8128423690795898, "learning_rate": 1.9578789409784727e-05, "loss": 1.1519, "step": 2092 }, { "epoch": 0.12, "grad_norm": 2.1580734252929688, "learning_rate": 1.957825578992311e-05, "loss": 1.0303, "step": 2093 }, { "epoch": 0.12, "grad_norm": 2.0924642086029053, "learning_rate": 1.9577721839542646e-05, "loss": 1.1129, "step": 2094 }, { "epoch": 0.12, "grad_norm": 2.051870346069336, "learning_rate": 1.957718755866175e-05, "loss": 1.1684, "step": 2095 }, { "epoch": 0.12, "grad_norm": 2.242689847946167, "learning_rate": 1.957665294729887e-05, "loss": 1.0697, "step": 2096 }, { "epoch": 0.12, "grad_norm": 2.096494436264038, "learning_rate": 1.9576118005472442e-05, "loss": 1.0437, "step": 2097 }, { "epoch": 0.12, "grad_norm": 1.9058254957199097, "learning_rate": 1.957558273320093e-05, "loss": 1.1211, "step": 2098 }, { "epoch": 0.12, "grad_norm": 2.138946533203125, "learning_rate": 1.9575047130502813e-05, "loss": 1.0596, "step": 2099 }, { "epoch": 0.12, "grad_norm": 1.873805046081543, "learning_rate": 1.9574511197396563e-05, "loss": 1.0888, "step": 2100 }, { "epoch": 0.12, "grad_norm": 1.9857383966445923, "learning_rate": 1.9573974933900677e-05, "loss": 1.1667, "step": 2101 }, { "epoch": 0.12, "grad_norm": 2.02386736869812, "learning_rate": 1.957343834003366e-05, "loss": 1.1306, "step": 2102 }, { "epoch": 0.12, "grad_norm": 1.9653023481369019, "learning_rate": 1.9572901415814027e-05, "loss": 1.0629, "step": 2103 }, { "epoch": 0.12, "grad_norm": 2.0490076541900635, "learning_rate": 1.957236416126031e-05, "loss": 1.0303, "step": 2104 }, { "epoch": 0.12, "grad_norm": 0.9465950131416321, "learning_rate": 1.9571826576391042e-05, "loss": 0.5271, "step": 2105 }, { "epoch": 0.12, "grad_norm": 2.0076417922973633, "learning_rate": 1.957128866122478e-05, "loss": 1.1345, "step": 2106 }, { "epoch": 0.12, "grad_norm": 2.0429115295410156, "learning_rate": 1.957075041578008e-05, "loss": 1.0879, "step": 2107 }, { "epoch": 0.12, "grad_norm": 3.4451446533203125, "learning_rate": 1.9570211840075518e-05, "loss": 1.1297, "step": 2108 }, { "epoch": 0.12, "grad_norm": 2.2899601459503174, "learning_rate": 1.9569672934129676e-05, "loss": 1.0908, "step": 2109 }, { "epoch": 0.12, "grad_norm": 2.0632998943328857, "learning_rate": 1.9569133697961158e-05, "loss": 1.0509, "step": 2110 }, { "epoch": 0.12, "grad_norm": 2.1583948135375977, "learning_rate": 1.9568594131588562e-05, "loss": 1.118, "step": 2111 }, { "epoch": 0.12, "grad_norm": 1.9642024040222168, "learning_rate": 1.9568054235030515e-05, "loss": 1.0881, "step": 2112 }, { "epoch": 0.12, "grad_norm": 2.161700487136841, "learning_rate": 1.9567514008305643e-05, "loss": 1.0973, "step": 2113 }, { "epoch": 0.12, "grad_norm": 1.8933343887329102, "learning_rate": 1.9566973451432586e-05, "loss": 1.0766, "step": 2114 }, { "epoch": 0.12, "grad_norm": 2.3618459701538086, "learning_rate": 1.9566432564430003e-05, "loss": 1.0475, "step": 2115 }, { "epoch": 0.12, "grad_norm": 1.1570398807525635, "learning_rate": 1.9565891347316553e-05, "loss": 0.5926, "step": 2116 }, { "epoch": 0.12, "grad_norm": 2.2885169982910156, "learning_rate": 1.9565349800110915e-05, "loss": 1.1096, "step": 2117 }, { "epoch": 0.12, "grad_norm": 1.9491864442825317, "learning_rate": 1.9564807922831773e-05, "loss": 1.1581, "step": 2118 }, { "epoch": 0.12, "grad_norm": 1.9246591329574585, "learning_rate": 1.9564265715497827e-05, "loss": 1.0032, "step": 2119 }, { "epoch": 0.12, "grad_norm": 2.0605428218841553, "learning_rate": 1.956372317812779e-05, "loss": 1.0935, "step": 2120 }, { "epoch": 0.12, "grad_norm": 1.9146169424057007, "learning_rate": 1.956318031074038e-05, "loss": 1.0664, "step": 2121 }, { "epoch": 0.12, "grad_norm": 1.99824059009552, "learning_rate": 1.9562637113354332e-05, "loss": 1.1267, "step": 2122 }, { "epoch": 0.12, "grad_norm": 2.0577216148376465, "learning_rate": 1.9562093585988392e-05, "loss": 1.0702, "step": 2123 }, { "epoch": 0.12, "grad_norm": 1.8620915412902832, "learning_rate": 1.9561549728661312e-05, "loss": 1.0276, "step": 2124 }, { "epoch": 0.12, "grad_norm": 1.9248203039169312, "learning_rate": 1.9561005541391857e-05, "loss": 1.0848, "step": 2125 }, { "epoch": 0.12, "grad_norm": 2.223719358444214, "learning_rate": 1.956046102419881e-05, "loss": 1.179, "step": 2126 }, { "epoch": 0.12, "grad_norm": 2.071540117263794, "learning_rate": 1.9559916177100958e-05, "loss": 1.059, "step": 2127 }, { "epoch": 0.12, "grad_norm": 2.3402137756347656, "learning_rate": 1.9559371000117106e-05, "loss": 1.062, "step": 2128 }, { "epoch": 0.12, "grad_norm": 2.191251516342163, "learning_rate": 1.955882549326606e-05, "loss": 1.1068, "step": 2129 }, { "epoch": 0.12, "grad_norm": 1.9132071733474731, "learning_rate": 1.955827965656665e-05, "loss": 1.1004, "step": 2130 }, { "epoch": 0.12, "grad_norm": 1.9674185514450073, "learning_rate": 1.955773349003771e-05, "loss": 1.1286, "step": 2131 }, { "epoch": 0.12, "grad_norm": 1.9617855548858643, "learning_rate": 1.9557186993698082e-05, "loss": 1.1046, "step": 2132 }, { "epoch": 0.12, "grad_norm": 1.911357045173645, "learning_rate": 1.9556640167566632e-05, "loss": 1.0262, "step": 2133 }, { "epoch": 0.12, "grad_norm": 1.1601715087890625, "learning_rate": 1.9556093011662222e-05, "loss": 0.6082, "step": 2134 }, { "epoch": 0.12, "grad_norm": 1.9079619646072388, "learning_rate": 1.955554552600374e-05, "loss": 1.0338, "step": 2135 }, { "epoch": 0.12, "grad_norm": 2.042503595352173, "learning_rate": 1.9554997710610068e-05, "loss": 1.0693, "step": 2136 }, { "epoch": 0.12, "grad_norm": 1.8659805059432983, "learning_rate": 1.9554449565500122e-05, "loss": 1.0073, "step": 2137 }, { "epoch": 0.12, "grad_norm": 1.9416954517364502, "learning_rate": 1.955390109069281e-05, "loss": 1.1641, "step": 2138 }, { "epoch": 0.12, "grad_norm": 1.9294977188110352, "learning_rate": 1.9553352286207056e-05, "loss": 1.1296, "step": 2139 }, { "epoch": 0.12, "grad_norm": 1.7696038484573364, "learning_rate": 1.9552803152061803e-05, "loss": 1.1345, "step": 2140 }, { "epoch": 0.12, "grad_norm": 2.120213747024536, "learning_rate": 1.9552253688276e-05, "loss": 1.1134, "step": 2141 }, { "epoch": 0.12, "grad_norm": 2.024939775466919, "learning_rate": 1.9551703894868597e-05, "loss": 1.1007, "step": 2142 }, { "epoch": 0.12, "grad_norm": 2.229119062423706, "learning_rate": 1.9551153771858578e-05, "loss": 1.0399, "step": 2143 }, { "epoch": 0.12, "grad_norm": 1.8080370426177979, "learning_rate": 1.9550603319264926e-05, "loss": 1.028, "step": 2144 }, { "epoch": 0.12, "grad_norm": 2.298851728439331, "learning_rate": 1.955005253710663e-05, "loss": 1.1039, "step": 2145 }, { "epoch": 0.12, "grad_norm": 1.9364268779754639, "learning_rate": 1.95495014254027e-05, "loss": 0.9986, "step": 2146 }, { "epoch": 0.12, "grad_norm": 1.9137351512908936, "learning_rate": 1.9548949984172148e-05, "loss": 1.0187, "step": 2147 }, { "epoch": 0.12, "grad_norm": 1.0432533025741577, "learning_rate": 1.954839821343401e-05, "loss": 0.5284, "step": 2148 }, { "epoch": 0.12, "grad_norm": 1.917034387588501, "learning_rate": 1.9547846113207317e-05, "loss": 1.0481, "step": 2149 }, { "epoch": 0.12, "grad_norm": 2.120081663131714, "learning_rate": 1.954729368351113e-05, "loss": 1.129, "step": 2150 }, { "epoch": 0.12, "grad_norm": 2.048861026763916, "learning_rate": 1.9546740924364504e-05, "loss": 1.1379, "step": 2151 }, { "epoch": 0.12, "grad_norm": 2.0498645305633545, "learning_rate": 1.9546187835786515e-05, "loss": 1.0716, "step": 2152 }, { "epoch": 0.12, "grad_norm": 2.0628933906555176, "learning_rate": 1.9545634417796255e-05, "loss": 1.076, "step": 2153 }, { "epoch": 0.12, "grad_norm": 1.9649591445922852, "learning_rate": 1.9545080670412814e-05, "loss": 1.1029, "step": 2154 }, { "epoch": 0.12, "grad_norm": 1.9629981517791748, "learning_rate": 1.9544526593655296e-05, "loss": 1.0701, "step": 2155 }, { "epoch": 0.12, "grad_norm": 1.9879732131958008, "learning_rate": 1.9543972187542833e-05, "loss": 1.129, "step": 2156 }, { "epoch": 0.12, "grad_norm": 1.687589168548584, "learning_rate": 1.9543417452094552e-05, "loss": 1.0697, "step": 2157 }, { "epoch": 0.12, "grad_norm": 1.9478509426116943, "learning_rate": 1.954286238732959e-05, "loss": 1.0446, "step": 2158 }, { "epoch": 0.12, "grad_norm": 2.0066754817962646, "learning_rate": 1.9542306993267105e-05, "loss": 1.0888, "step": 2159 }, { "epoch": 0.12, "grad_norm": 1.8370342254638672, "learning_rate": 1.954175126992626e-05, "loss": 1.0222, "step": 2160 }, { "epoch": 0.12, "grad_norm": 1.8456997871398926, "learning_rate": 1.9541195217326233e-05, "loss": 1.0897, "step": 2161 }, { "epoch": 0.12, "grad_norm": 1.966375470161438, "learning_rate": 1.954063883548621e-05, "loss": 1.1349, "step": 2162 }, { "epoch": 0.12, "grad_norm": 1.8924496173858643, "learning_rate": 1.9540082124425393e-05, "loss": 1.1214, "step": 2163 }, { "epoch": 0.12, "grad_norm": 1.0375827550888062, "learning_rate": 1.9539525084162993e-05, "loss": 0.5749, "step": 2164 }, { "epoch": 0.12, "grad_norm": 1.9985837936401367, "learning_rate": 1.9538967714718226e-05, "loss": 1.0815, "step": 2165 }, { "epoch": 0.12, "grad_norm": 1.8893191814422607, "learning_rate": 1.953841001611033e-05, "loss": 1.058, "step": 2166 }, { "epoch": 0.12, "grad_norm": 1.957874059677124, "learning_rate": 1.953785198835855e-05, "loss": 1.0964, "step": 2167 }, { "epoch": 0.12, "grad_norm": 2.0782155990600586, "learning_rate": 1.953729363148214e-05, "loss": 0.9915, "step": 2168 }, { "epoch": 0.12, "grad_norm": 2.2153515815734863, "learning_rate": 1.953673494550037e-05, "loss": 1.1288, "step": 2169 }, { "epoch": 0.12, "grad_norm": 2.1902804374694824, "learning_rate": 1.9536175930432512e-05, "loss": 1.0764, "step": 2170 }, { "epoch": 0.12, "grad_norm": 1.84712815284729, "learning_rate": 1.9535616586297866e-05, "loss": 1.1065, "step": 2171 }, { "epoch": 0.12, "grad_norm": 2.0183358192443848, "learning_rate": 1.9535056913115725e-05, "loss": 1.1351, "step": 2172 }, { "epoch": 0.12, "grad_norm": 1.9436144828796387, "learning_rate": 1.9534496910905404e-05, "loss": 1.0999, "step": 2173 }, { "epoch": 0.12, "grad_norm": 1.8144478797912598, "learning_rate": 1.9533936579686233e-05, "loss": 1.0999, "step": 2174 }, { "epoch": 0.12, "grad_norm": 1.830190658569336, "learning_rate": 1.953337591947754e-05, "loss": 1.1035, "step": 2175 }, { "epoch": 0.12, "grad_norm": 1.9984219074249268, "learning_rate": 1.9532814930298673e-05, "loss": 1.07, "step": 2176 }, { "epoch": 0.12, "grad_norm": 2.066513776779175, "learning_rate": 1.9532253612168994e-05, "loss": 1.0819, "step": 2177 }, { "epoch": 0.12, "grad_norm": 2.0593111515045166, "learning_rate": 1.953169196510787e-05, "loss": 1.0248, "step": 2178 }, { "epoch": 0.12, "grad_norm": 2.387378215789795, "learning_rate": 1.953112998913468e-05, "loss": 1.0998, "step": 2179 }, { "epoch": 0.13, "grad_norm": 2.031853675842285, "learning_rate": 1.9530567684268823e-05, "loss": 1.0108, "step": 2180 }, { "epoch": 0.13, "grad_norm": 2.252674102783203, "learning_rate": 1.953000505052969e-05, "loss": 1.1022, "step": 2181 }, { "epoch": 0.13, "grad_norm": 2.006401538848877, "learning_rate": 1.952944208793671e-05, "loss": 1.1395, "step": 2182 }, { "epoch": 0.13, "grad_norm": 2.0618743896484375, "learning_rate": 1.95288787965093e-05, "loss": 1.1236, "step": 2183 }, { "epoch": 0.13, "grad_norm": 2.0417888164520264, "learning_rate": 1.9528315176266904e-05, "loss": 1.1649, "step": 2184 }, { "epoch": 0.13, "grad_norm": 2.171774387359619, "learning_rate": 1.9527751227228964e-05, "loss": 1.1012, "step": 2185 }, { "epoch": 0.13, "grad_norm": 1.8630516529083252, "learning_rate": 1.9527186949414948e-05, "loss": 1.0835, "step": 2186 }, { "epoch": 0.13, "grad_norm": 2.0474302768707275, "learning_rate": 1.9526622342844318e-05, "loss": 1.0943, "step": 2187 }, { "epoch": 0.13, "grad_norm": 1.8254406452178955, "learning_rate": 1.9526057407536565e-05, "loss": 1.0067, "step": 2188 }, { "epoch": 0.13, "grad_norm": 2.0415186882019043, "learning_rate": 1.9525492143511182e-05, "loss": 1.0362, "step": 2189 }, { "epoch": 0.13, "grad_norm": 1.998120665550232, "learning_rate": 1.952492655078767e-05, "loss": 1.0416, "step": 2190 }, { "epoch": 0.13, "grad_norm": 1.9627357721328735, "learning_rate": 1.9524360629385554e-05, "loss": 0.9909, "step": 2191 }, { "epoch": 0.13, "grad_norm": 1.9530761241912842, "learning_rate": 1.9523794379324354e-05, "loss": 1.1086, "step": 2192 }, { "epoch": 0.13, "grad_norm": 2.0925235748291016, "learning_rate": 1.9523227800623616e-05, "loss": 0.9154, "step": 2193 }, { "epoch": 0.13, "grad_norm": 1.854523777961731, "learning_rate": 1.952266089330289e-05, "loss": 1.0055, "step": 2194 }, { "epoch": 0.13, "grad_norm": 1.9151043891906738, "learning_rate": 1.9522093657381733e-05, "loss": 1.1637, "step": 2195 }, { "epoch": 0.13, "grad_norm": 2.061143398284912, "learning_rate": 1.9521526092879725e-05, "loss": 1.0934, "step": 2196 }, { "epoch": 0.13, "grad_norm": 1.7574697732925415, "learning_rate": 1.9520958199816448e-05, "loss": 1.0863, "step": 2197 }, { "epoch": 0.13, "grad_norm": 2.0976905822753906, "learning_rate": 1.95203899782115e-05, "loss": 1.1654, "step": 2198 }, { "epoch": 0.13, "grad_norm": 2.3165946006774902, "learning_rate": 1.9519821428084488e-05, "loss": 1.0739, "step": 2199 }, { "epoch": 0.13, "grad_norm": 2.0492351055145264, "learning_rate": 1.9519252549455033e-05, "loss": 1.1017, "step": 2200 }, { "epoch": 0.13, "grad_norm": 2.1428918838500977, "learning_rate": 1.9518683342342762e-05, "loss": 1.1016, "step": 2201 }, { "epoch": 0.13, "grad_norm": 2.2069575786590576, "learning_rate": 1.9518113806767316e-05, "loss": 1.0892, "step": 2202 }, { "epoch": 0.13, "grad_norm": 2.1398799419403076, "learning_rate": 1.9517543942748353e-05, "loss": 1.0395, "step": 2203 }, { "epoch": 0.13, "grad_norm": 1.9363088607788086, "learning_rate": 1.951697375030553e-05, "loss": 1.067, "step": 2204 }, { "epoch": 0.13, "grad_norm": 1.207046627998352, "learning_rate": 1.9516403229458535e-05, "loss": 0.6198, "step": 2205 }, { "epoch": 0.13, "grad_norm": 1.2757289409637451, "learning_rate": 1.9515832380227044e-05, "loss": 0.6216, "step": 2206 }, { "epoch": 0.13, "grad_norm": 1.8511155843734741, "learning_rate": 1.9515261202630758e-05, "loss": 1.1289, "step": 2207 }, { "epoch": 0.13, "grad_norm": 1.9863609075546265, "learning_rate": 1.9514689696689388e-05, "loss": 1.0329, "step": 2208 }, { "epoch": 0.13, "grad_norm": 2.21726393699646, "learning_rate": 1.9514117862422655e-05, "loss": 1.162, "step": 2209 }, { "epoch": 0.13, "grad_norm": 1.9351719617843628, "learning_rate": 1.9513545699850292e-05, "loss": 1.1288, "step": 2210 }, { "epoch": 0.13, "grad_norm": 1.9839487075805664, "learning_rate": 1.951297320899204e-05, "loss": 1.054, "step": 2211 }, { "epoch": 0.13, "grad_norm": 1.7833126783370972, "learning_rate": 1.951240038986766e-05, "loss": 0.9817, "step": 2212 }, { "epoch": 0.13, "grad_norm": 1.2012832164764404, "learning_rate": 1.951182724249691e-05, "loss": 0.5744, "step": 2213 }, { "epoch": 0.13, "grad_norm": 2.1642181873321533, "learning_rate": 1.9511253766899574e-05, "loss": 1.1509, "step": 2214 }, { "epoch": 0.13, "grad_norm": 2.121772527694702, "learning_rate": 1.9510679963095437e-05, "loss": 1.1528, "step": 2215 }, { "epoch": 0.13, "grad_norm": 1.904455304145813, "learning_rate": 1.9510105831104305e-05, "loss": 1.052, "step": 2216 }, { "epoch": 0.13, "grad_norm": 2.1524360179901123, "learning_rate": 1.9509531370945982e-05, "loss": 1.0748, "step": 2217 }, { "epoch": 0.13, "grad_norm": 1.9766215085983276, "learning_rate": 1.95089565826403e-05, "loss": 1.0318, "step": 2218 }, { "epoch": 0.13, "grad_norm": 2.9146275520324707, "learning_rate": 1.9508381466207086e-05, "loss": 1.0427, "step": 2219 }, { "epoch": 0.13, "grad_norm": 1.9278593063354492, "learning_rate": 1.9507806021666188e-05, "loss": 1.1385, "step": 2220 }, { "epoch": 0.13, "grad_norm": 2.076561689376831, "learning_rate": 1.9507230249037462e-05, "loss": 1.1266, "step": 2221 }, { "epoch": 0.13, "grad_norm": 2.0840296745300293, "learning_rate": 1.9506654148340783e-05, "loss": 1.0215, "step": 2222 }, { "epoch": 0.13, "grad_norm": 2.1414427757263184, "learning_rate": 1.950607771959602e-05, "loss": 1.1184, "step": 2223 }, { "epoch": 0.13, "grad_norm": 2.3353934288024902, "learning_rate": 1.950550096282307e-05, "loss": 1.1756, "step": 2224 }, { "epoch": 0.13, "grad_norm": 2.2285561561584473, "learning_rate": 1.9504923878041834e-05, "loss": 1.1113, "step": 2225 }, { "epoch": 0.13, "grad_norm": 1.9018921852111816, "learning_rate": 1.9504346465272225e-05, "loss": 1.0906, "step": 2226 }, { "epoch": 0.13, "grad_norm": 1.903281807899475, "learning_rate": 1.9503768724534172e-05, "loss": 1.053, "step": 2227 }, { "epoch": 0.13, "grad_norm": 2.2642760276794434, "learning_rate": 1.9503190655847605e-05, "loss": 1.0613, "step": 2228 }, { "epoch": 0.13, "grad_norm": 2.1987807750701904, "learning_rate": 1.9502612259232477e-05, "loss": 1.1309, "step": 2229 }, { "epoch": 0.13, "grad_norm": 2.0226805210113525, "learning_rate": 1.9502033534708743e-05, "loss": 1.0917, "step": 2230 }, { "epoch": 0.13, "grad_norm": 1.9941848516464233, "learning_rate": 1.9501454482296376e-05, "loss": 1.0317, "step": 2231 }, { "epoch": 0.13, "grad_norm": 1.9081685543060303, "learning_rate": 1.9500875102015354e-05, "loss": 1.1692, "step": 2232 }, { "epoch": 0.13, "grad_norm": 2.053341865539551, "learning_rate": 1.9500295393885672e-05, "loss": 1.1273, "step": 2233 }, { "epoch": 0.13, "grad_norm": 2.083022117614746, "learning_rate": 1.9499715357927335e-05, "loss": 1.0835, "step": 2234 }, { "epoch": 0.13, "grad_norm": 2.09658145904541, "learning_rate": 1.949913499416036e-05, "loss": 1.1934, "step": 2235 }, { "epoch": 0.13, "grad_norm": 2.007136583328247, "learning_rate": 1.9498554302604768e-05, "loss": 1.1158, "step": 2236 }, { "epoch": 0.13, "grad_norm": 1.944387674331665, "learning_rate": 1.94979732832806e-05, "loss": 1.0922, "step": 2237 }, { "epoch": 0.13, "grad_norm": 1.8987300395965576, "learning_rate": 1.9497391936207905e-05, "loss": 1.0937, "step": 2238 }, { "epoch": 0.13, "grad_norm": 1.9165974855422974, "learning_rate": 1.949681026140674e-05, "loss": 1.1416, "step": 2239 }, { "epoch": 0.13, "grad_norm": 1.8277428150177002, "learning_rate": 1.949622825889719e-05, "loss": 1.1068, "step": 2240 }, { "epoch": 0.13, "grad_norm": 2.1218738555908203, "learning_rate": 1.9495645928699324e-05, "loss": 1.0911, "step": 2241 }, { "epoch": 0.13, "grad_norm": 2.1599841117858887, "learning_rate": 1.9495063270833247e-05, "loss": 1.0769, "step": 2242 }, { "epoch": 0.13, "grad_norm": 2.067570686340332, "learning_rate": 1.9494480285319057e-05, "loss": 1.063, "step": 2243 }, { "epoch": 0.13, "grad_norm": 2.1810226440429688, "learning_rate": 1.949389697217687e-05, "loss": 1.0818, "step": 2244 }, { "epoch": 0.13, "grad_norm": 1.756366491317749, "learning_rate": 1.9493313331426825e-05, "loss": 1.0479, "step": 2245 }, { "epoch": 0.13, "grad_norm": 2.0172007083892822, "learning_rate": 1.949272936308905e-05, "loss": 1.051, "step": 2246 }, { "epoch": 0.13, "grad_norm": 2.112607479095459, "learning_rate": 1.9492145067183705e-05, "loss": 1.051, "step": 2247 }, { "epoch": 0.13, "grad_norm": 2.024402379989624, "learning_rate": 1.949156044373095e-05, "loss": 1.1802, "step": 2248 }, { "epoch": 0.13, "grad_norm": 2.2420358657836914, "learning_rate": 1.9490975492750953e-05, "loss": 1.0159, "step": 2249 }, { "epoch": 0.13, "grad_norm": 2.102362632751465, "learning_rate": 1.9490390214263908e-05, "loss": 1.1737, "step": 2250 }, { "epoch": 0.13, "grad_norm": 2.0069520473480225, "learning_rate": 1.9489804608290005e-05, "loss": 1.0418, "step": 2251 }, { "epoch": 0.13, "grad_norm": 1.9474588632583618, "learning_rate": 1.9489218674849454e-05, "loss": 1.0917, "step": 2252 }, { "epoch": 0.13, "grad_norm": 2.03008770942688, "learning_rate": 1.9488632413962473e-05, "loss": 1.089, "step": 2253 }, { "epoch": 0.13, "grad_norm": 1.8665211200714111, "learning_rate": 1.94880458256493e-05, "loss": 1.0565, "step": 2254 }, { "epoch": 0.13, "grad_norm": 2.0243096351623535, "learning_rate": 1.948745890993016e-05, "loss": 1.1318, "step": 2255 }, { "epoch": 0.13, "grad_norm": 2.156773567199707, "learning_rate": 1.9486871666825318e-05, "loss": 1.1509, "step": 2256 }, { "epoch": 0.13, "grad_norm": 1.857231855392456, "learning_rate": 1.9486284096355036e-05, "loss": 0.989, "step": 2257 }, { "epoch": 0.13, "grad_norm": 1.8624000549316406, "learning_rate": 1.9485696198539588e-05, "loss": 1.0586, "step": 2258 }, { "epoch": 0.13, "grad_norm": 2.1422600746154785, "learning_rate": 1.948510797339926e-05, "loss": 1.0586, "step": 2259 }, { "epoch": 0.13, "grad_norm": 2.047529935836792, "learning_rate": 1.9484519420954356e-05, "loss": 1.1701, "step": 2260 }, { "epoch": 0.13, "grad_norm": 2.060432195663452, "learning_rate": 1.9483930541225177e-05, "loss": 1.0574, "step": 2261 }, { "epoch": 0.13, "grad_norm": 2.491337537765503, "learning_rate": 1.9483341334232048e-05, "loss": 1.0739, "step": 2262 }, { "epoch": 0.13, "grad_norm": 2.4662020206451416, "learning_rate": 1.94827517999953e-05, "loss": 1.0691, "step": 2263 }, { "epoch": 0.13, "grad_norm": 2.0126819610595703, "learning_rate": 1.9482161938535275e-05, "loss": 1.0706, "step": 2264 }, { "epoch": 0.13, "grad_norm": 2.0396909713745117, "learning_rate": 1.948157174987233e-05, "loss": 1.0675, "step": 2265 }, { "epoch": 0.13, "grad_norm": 1.8847274780273438, "learning_rate": 1.948098123402683e-05, "loss": 1.0138, "step": 2266 }, { "epoch": 0.13, "grad_norm": 2.0830929279327393, "learning_rate": 1.9480390391019153e-05, "loss": 1.1052, "step": 2267 }, { "epoch": 0.13, "grad_norm": 2.6002235412597656, "learning_rate": 1.9479799220869686e-05, "loss": 1.156, "step": 2268 }, { "epoch": 0.13, "grad_norm": 2.180441379547119, "learning_rate": 1.9479207723598828e-05, "loss": 0.9706, "step": 2269 }, { "epoch": 0.13, "grad_norm": 1.9346991777420044, "learning_rate": 1.947861589922699e-05, "loss": 1.0266, "step": 2270 }, { "epoch": 0.13, "grad_norm": 2.0881309509277344, "learning_rate": 1.9478023747774593e-05, "loss": 1.0243, "step": 2271 }, { "epoch": 0.13, "grad_norm": 1.8805605173110962, "learning_rate": 1.9477431269262076e-05, "loss": 1.0334, "step": 2272 }, { "epoch": 0.13, "grad_norm": 2.139005661010742, "learning_rate": 1.9476838463709878e-05, "loss": 1.1812, "step": 2273 }, { "epoch": 0.13, "grad_norm": 1.8011958599090576, "learning_rate": 1.9476245331138455e-05, "loss": 1.0882, "step": 2274 }, { "epoch": 0.13, "grad_norm": 2.1845293045043945, "learning_rate": 1.947565187156828e-05, "loss": 1.1186, "step": 2275 }, { "epoch": 0.13, "grad_norm": 2.024052143096924, "learning_rate": 1.9475058085019825e-05, "loss": 1.106, "step": 2276 }, { "epoch": 0.13, "grad_norm": 1.8693737983703613, "learning_rate": 1.9474463971513584e-05, "loss": 1.0715, "step": 2277 }, { "epoch": 0.13, "grad_norm": 1.9654878377914429, "learning_rate": 1.947386953107006e-05, "loss": 0.9929, "step": 2278 }, { "epoch": 0.13, "grad_norm": 1.8518799543380737, "learning_rate": 1.9473274763709758e-05, "loss": 1.0829, "step": 2279 }, { "epoch": 0.13, "grad_norm": 2.038874626159668, "learning_rate": 1.9472679669453208e-05, "loss": 1.1494, "step": 2280 }, { "epoch": 0.13, "grad_norm": 2.0306472778320312, "learning_rate": 1.947208424832095e-05, "loss": 1.1107, "step": 2281 }, { "epoch": 0.13, "grad_norm": 2.212695360183716, "learning_rate": 1.9471488500333518e-05, "loss": 1.1498, "step": 2282 }, { "epoch": 0.13, "grad_norm": 1.839547038078308, "learning_rate": 1.9470892425511475e-05, "loss": 1.0395, "step": 2283 }, { "epoch": 0.13, "grad_norm": 1.993290662765503, "learning_rate": 1.9470296023875387e-05, "loss": 1.14, "step": 2284 }, { "epoch": 0.13, "grad_norm": 2.1583175659179688, "learning_rate": 1.9469699295445842e-05, "loss": 1.0084, "step": 2285 }, { "epoch": 0.13, "grad_norm": 1.9848265647888184, "learning_rate": 1.9469102240243428e-05, "loss": 1.1052, "step": 2286 }, { "epoch": 0.13, "grad_norm": 2.328465461730957, "learning_rate": 1.9468504858288747e-05, "loss": 1.0701, "step": 2287 }, { "epoch": 0.13, "grad_norm": 2.250830888748169, "learning_rate": 1.9467907149602406e-05, "loss": 1.1655, "step": 2288 }, { "epoch": 0.13, "grad_norm": 1.3036826848983765, "learning_rate": 1.9467309114205043e-05, "loss": 0.6335, "step": 2289 }, { "epoch": 0.13, "grad_norm": 1.0632299184799194, "learning_rate": 1.9466710752117286e-05, "loss": 0.6031, "step": 2290 }, { "epoch": 0.13, "grad_norm": 1.9720453023910522, "learning_rate": 1.9466112063359785e-05, "loss": 1.0932, "step": 2291 }, { "epoch": 0.13, "grad_norm": 1.8090431690216064, "learning_rate": 1.9465513047953202e-05, "loss": 1.0898, "step": 2292 }, { "epoch": 0.13, "grad_norm": 1.9945141077041626, "learning_rate": 1.94649137059182e-05, "loss": 1.0587, "step": 2293 }, { "epoch": 0.13, "grad_norm": 1.9482027292251587, "learning_rate": 1.9464314037275468e-05, "loss": 1.0473, "step": 2294 }, { "epoch": 0.13, "grad_norm": 2.1146042346954346, "learning_rate": 1.9463714042045695e-05, "loss": 1.1568, "step": 2295 }, { "epoch": 0.13, "grad_norm": 2.001692295074463, "learning_rate": 1.9463113720249587e-05, "loss": 1.1414, "step": 2296 }, { "epoch": 0.13, "grad_norm": 1.757969617843628, "learning_rate": 1.946251307190786e-05, "loss": 0.9973, "step": 2297 }, { "epoch": 0.13, "grad_norm": 2.1525330543518066, "learning_rate": 1.9461912097041238e-05, "loss": 1.0598, "step": 2298 }, { "epoch": 0.13, "grad_norm": 2.615339756011963, "learning_rate": 1.946131079567046e-05, "loss": 1.1176, "step": 2299 }, { "epoch": 0.13, "grad_norm": 2.1890640258789062, "learning_rate": 1.9460709167816274e-05, "loss": 1.1411, "step": 2300 }, { "epoch": 0.13, "grad_norm": 1.885799765586853, "learning_rate": 1.9460107213499445e-05, "loss": 1.0585, "step": 2301 }, { "epoch": 0.13, "grad_norm": 1.8118035793304443, "learning_rate": 1.945950493274074e-05, "loss": 1.0494, "step": 2302 }, { "epoch": 0.13, "grad_norm": 2.264744281768799, "learning_rate": 1.9458902325560945e-05, "loss": 1.0665, "step": 2303 }, { "epoch": 0.13, "grad_norm": 2.1410720348358154, "learning_rate": 1.945829939198085e-05, "loss": 1.1687, "step": 2304 }, { "epoch": 0.13, "grad_norm": 2.058357000350952, "learning_rate": 1.945769613202127e-05, "loss": 1.1369, "step": 2305 }, { "epoch": 0.13, "grad_norm": 1.9387421607971191, "learning_rate": 1.9457092545703008e-05, "loss": 1.0832, "step": 2306 }, { "epoch": 0.13, "grad_norm": 1.9720033407211304, "learning_rate": 1.9456488633046905e-05, "loss": 1.0818, "step": 2307 }, { "epoch": 0.13, "grad_norm": 1.9730970859527588, "learning_rate": 1.9455884394073792e-05, "loss": 1.1148, "step": 2308 }, { "epoch": 0.13, "grad_norm": 2.0752041339874268, "learning_rate": 1.9455279828804526e-05, "loss": 1.0324, "step": 2309 }, { "epoch": 0.13, "grad_norm": 1.9665589332580566, "learning_rate": 1.945467493725996e-05, "loss": 1.1027, "step": 2310 }, { "epoch": 0.13, "grad_norm": 2.019516706466675, "learning_rate": 1.945406971946098e-05, "loss": 1.1563, "step": 2311 }, { "epoch": 0.13, "grad_norm": 1.2828277349472046, "learning_rate": 1.9453464175428456e-05, "loss": 0.6239, "step": 2312 }, { "epoch": 0.13, "grad_norm": 2.040358066558838, "learning_rate": 1.945285830518329e-05, "loss": 1.0476, "step": 2313 }, { "epoch": 0.13, "grad_norm": 1.8861701488494873, "learning_rate": 1.9452252108746395e-05, "loss": 1.0633, "step": 2314 }, { "epoch": 0.13, "grad_norm": 1.930243730545044, "learning_rate": 1.945164558613868e-05, "loss": 1.0379, "step": 2315 }, { "epoch": 0.13, "grad_norm": 1.9522651433944702, "learning_rate": 1.9451038737381078e-05, "loss": 1.0818, "step": 2316 }, { "epoch": 0.13, "grad_norm": 2.057356119155884, "learning_rate": 1.945043156249453e-05, "loss": 1.0102, "step": 2317 }, { "epoch": 0.13, "grad_norm": 2.1683175563812256, "learning_rate": 1.9449824061499986e-05, "loss": 1.1164, "step": 2318 }, { "epoch": 0.13, "grad_norm": 2.1286325454711914, "learning_rate": 1.9449216234418412e-05, "loss": 1.0906, "step": 2319 }, { "epoch": 0.13, "grad_norm": 1.0584769248962402, "learning_rate": 1.9448608081270782e-05, "loss": 0.5686, "step": 2320 }, { "epoch": 0.13, "grad_norm": 2.1297574043273926, "learning_rate": 1.944799960207808e-05, "loss": 1.0278, "step": 2321 }, { "epoch": 0.13, "grad_norm": 2.008143186569214, "learning_rate": 1.9447390796861304e-05, "loss": 1.1446, "step": 2322 }, { "epoch": 0.13, "grad_norm": 2.016409397125244, "learning_rate": 1.9446781665641465e-05, "loss": 1.1213, "step": 2323 }, { "epoch": 0.13, "grad_norm": 1.8787120580673218, "learning_rate": 1.9446172208439576e-05, "loss": 1.0607, "step": 2324 }, { "epoch": 0.13, "grad_norm": 1.8465402126312256, "learning_rate": 1.944556242527667e-05, "loss": 0.992, "step": 2325 }, { "epoch": 0.13, "grad_norm": 1.9858829975128174, "learning_rate": 1.944495231617379e-05, "loss": 1.1399, "step": 2326 }, { "epoch": 0.13, "grad_norm": 1.149644136428833, "learning_rate": 1.944434188115199e-05, "loss": 0.5856, "step": 2327 }, { "epoch": 0.13, "grad_norm": 1.8389830589294434, "learning_rate": 1.9443731120232332e-05, "loss": 1.0137, "step": 2328 }, { "epoch": 0.13, "grad_norm": 2.0238380432128906, "learning_rate": 1.9443120033435895e-05, "loss": 1.1014, "step": 2329 }, { "epoch": 0.13, "grad_norm": 1.935685396194458, "learning_rate": 1.9442508620783763e-05, "loss": 1.144, "step": 2330 }, { "epoch": 0.13, "grad_norm": 2.134855270385742, "learning_rate": 1.9441896882297033e-05, "loss": 1.1397, "step": 2331 }, { "epoch": 0.13, "grad_norm": 2.136145830154419, "learning_rate": 1.944128481799682e-05, "loss": 1.0695, "step": 2332 }, { "epoch": 0.13, "grad_norm": 2.144909381866455, "learning_rate": 1.9440672427904238e-05, "loss": 1.061, "step": 2333 }, { "epoch": 0.13, "grad_norm": 1.9671698808670044, "learning_rate": 1.9440059712040424e-05, "loss": 1.0852, "step": 2334 }, { "epoch": 0.13, "grad_norm": 1.70538330078125, "learning_rate": 1.943944667042652e-05, "loss": 1.0379, "step": 2335 }, { "epoch": 0.13, "grad_norm": 1.898211121559143, "learning_rate": 1.9438833303083677e-05, "loss": 1.0494, "step": 2336 }, { "epoch": 0.13, "grad_norm": 1.8669668436050415, "learning_rate": 1.9438219610033066e-05, "loss": 1.094, "step": 2337 }, { "epoch": 0.13, "grad_norm": 1.9458999633789062, "learning_rate": 1.9437605591295857e-05, "loss": 1.068, "step": 2338 }, { "epoch": 0.13, "grad_norm": 2.055189847946167, "learning_rate": 1.9436991246893244e-05, "loss": 1.0871, "step": 2339 }, { "epoch": 0.13, "grad_norm": 1.8226596117019653, "learning_rate": 1.9436376576846422e-05, "loss": 1.0595, "step": 2340 }, { "epoch": 0.13, "grad_norm": 1.8832745552062988, "learning_rate": 1.9435761581176608e-05, "loss": 0.9507, "step": 2341 }, { "epoch": 0.13, "grad_norm": 2.0675411224365234, "learning_rate": 1.9435146259905018e-05, "loss": 1.1602, "step": 2342 }, { "epoch": 0.13, "grad_norm": 1.885493516921997, "learning_rate": 1.9434530613052883e-05, "loss": 1.0434, "step": 2343 }, { "epoch": 0.13, "grad_norm": 2.1025185585021973, "learning_rate": 1.9433914640641456e-05, "loss": 1.11, "step": 2344 }, { "epoch": 0.13, "grad_norm": 1.904587984085083, "learning_rate": 1.9433298342691987e-05, "loss": 1.0662, "step": 2345 }, { "epoch": 0.13, "grad_norm": 1.9344552755355835, "learning_rate": 1.9432681719225737e-05, "loss": 1.0696, "step": 2346 }, { "epoch": 0.13, "grad_norm": 1.9837193489074707, "learning_rate": 1.9432064770263998e-05, "loss": 1.0466, "step": 2347 }, { "epoch": 0.13, "grad_norm": 1.7730023860931396, "learning_rate": 1.9431447495828046e-05, "loss": 1.0446, "step": 2348 }, { "epoch": 0.13, "grad_norm": 1.9897903203964233, "learning_rate": 1.943082989593919e-05, "loss": 1.1333, "step": 2349 }, { "epoch": 0.13, "grad_norm": 2.148547887802124, "learning_rate": 1.9430211970618736e-05, "loss": 1.1457, "step": 2350 }, { "epoch": 0.13, "grad_norm": 1.9025495052337646, "learning_rate": 1.9429593719888008e-05, "loss": 1.0363, "step": 2351 }, { "epoch": 0.13, "grad_norm": 1.9933695793151855, "learning_rate": 1.9428975143768344e-05, "loss": 1.119, "step": 2352 }, { "epoch": 0.13, "grad_norm": 2.1240458488464355, "learning_rate": 1.9428356242281084e-05, "loss": 1.1107, "step": 2353 }, { "epoch": 0.14, "grad_norm": 1.945935845375061, "learning_rate": 1.942773701544759e-05, "loss": 1.059, "step": 2354 }, { "epoch": 0.14, "grad_norm": 1.8880828619003296, "learning_rate": 1.9427117463289223e-05, "loss": 1.0784, "step": 2355 }, { "epoch": 0.14, "grad_norm": 2.0734593868255615, "learning_rate": 1.942649758582737e-05, "loss": 1.0563, "step": 2356 }, { "epoch": 0.14, "grad_norm": 2.0611612796783447, "learning_rate": 1.9425877383083414e-05, "loss": 1.1901, "step": 2357 }, { "epoch": 0.14, "grad_norm": 1.9515230655670166, "learning_rate": 1.9425256855078762e-05, "loss": 1.0399, "step": 2358 }, { "epoch": 0.14, "grad_norm": 1.8852448463439941, "learning_rate": 1.942463600183482e-05, "loss": 1.0744, "step": 2359 }, { "epoch": 0.14, "grad_norm": 2.1457905769348145, "learning_rate": 1.9424014823373024e-05, "loss": 1.1605, "step": 2360 }, { "epoch": 0.14, "grad_norm": 2.0216314792633057, "learning_rate": 1.9423393319714797e-05, "loss": 1.058, "step": 2361 }, { "epoch": 0.14, "grad_norm": 1.9243296384811401, "learning_rate": 1.942277149088159e-05, "loss": 1.0756, "step": 2362 }, { "epoch": 0.14, "grad_norm": 2.0187437534332275, "learning_rate": 1.9422149336894858e-05, "loss": 0.9591, "step": 2363 }, { "epoch": 0.14, "grad_norm": 1.2650585174560547, "learning_rate": 1.9421526857776074e-05, "loss": 0.6359, "step": 2364 }, { "epoch": 0.14, "grad_norm": 2.1112375259399414, "learning_rate": 1.9420904053546716e-05, "loss": 1.0625, "step": 2365 }, { "epoch": 0.14, "grad_norm": 1.089199423789978, "learning_rate": 1.9420280924228277e-05, "loss": 0.5762, "step": 2366 }, { "epoch": 0.14, "grad_norm": 2.0326247215270996, "learning_rate": 1.9419657469842256e-05, "loss": 1.0875, "step": 2367 }, { "epoch": 0.14, "grad_norm": 1.9220284223556519, "learning_rate": 1.941903369041017e-05, "loss": 1.0838, "step": 2368 }, { "epoch": 0.14, "grad_norm": 2.114212989807129, "learning_rate": 1.941840958595354e-05, "loss": 1.081, "step": 2369 }, { "epoch": 0.14, "grad_norm": 2.0175459384918213, "learning_rate": 1.9417785156493906e-05, "loss": 1.0756, "step": 2370 }, { "epoch": 0.14, "grad_norm": 1.993507981300354, "learning_rate": 1.9417160402052813e-05, "loss": 1.1363, "step": 2371 }, { "epoch": 0.14, "grad_norm": 2.014669895172119, "learning_rate": 1.941653532265182e-05, "loss": 1.0987, "step": 2372 }, { "epoch": 0.14, "grad_norm": 2.3594884872436523, "learning_rate": 1.9415909918312497e-05, "loss": 1.1388, "step": 2373 }, { "epoch": 0.14, "grad_norm": 2.099210023880005, "learning_rate": 1.9415284189056426e-05, "loss": 1.1534, "step": 2374 }, { "epoch": 0.14, "grad_norm": 2.098428249359131, "learning_rate": 1.94146581349052e-05, "loss": 1.0132, "step": 2375 }, { "epoch": 0.14, "grad_norm": 2.3804538249969482, "learning_rate": 1.9414031755880417e-05, "loss": 1.1292, "step": 2376 }, { "epoch": 0.14, "grad_norm": 1.987931251525879, "learning_rate": 1.9413405052003696e-05, "loss": 1.0543, "step": 2377 }, { "epoch": 0.14, "grad_norm": 1.8174099922180176, "learning_rate": 1.9412778023296663e-05, "loss": 1.0528, "step": 2378 }, { "epoch": 0.14, "grad_norm": 1.8591465950012207, "learning_rate": 1.9412150669780952e-05, "loss": 1.1006, "step": 2379 }, { "epoch": 0.14, "grad_norm": 1.882876992225647, "learning_rate": 1.9411522991478217e-05, "loss": 1.104, "step": 2380 }, { "epoch": 0.14, "grad_norm": 1.8860564231872559, "learning_rate": 1.9410894988410113e-05, "loss": 1.0943, "step": 2381 }, { "epoch": 0.14, "grad_norm": 2.0436627864837646, "learning_rate": 1.941026666059831e-05, "loss": 1.0568, "step": 2382 }, { "epoch": 0.14, "grad_norm": 1.8623932600021362, "learning_rate": 1.9409638008064487e-05, "loss": 1.1268, "step": 2383 }, { "epoch": 0.14, "grad_norm": 1.9900166988372803, "learning_rate": 1.9409009030830347e-05, "loss": 1.068, "step": 2384 }, { "epoch": 0.14, "grad_norm": 1.8611586093902588, "learning_rate": 1.9408379728917585e-05, "loss": 1.0122, "step": 2385 }, { "epoch": 0.14, "grad_norm": 1.971740961074829, "learning_rate": 1.940775010234792e-05, "loss": 1.0872, "step": 2386 }, { "epoch": 0.14, "grad_norm": 1.8328596353530884, "learning_rate": 1.940712015114308e-05, "loss": 1.1006, "step": 2387 }, { "epoch": 0.14, "grad_norm": 1.890426516532898, "learning_rate": 1.9406489875324798e-05, "loss": 1.0538, "step": 2388 }, { "epoch": 0.14, "grad_norm": 1.7593294382095337, "learning_rate": 1.940585927491483e-05, "loss": 1.06, "step": 2389 }, { "epoch": 0.14, "grad_norm": 1.8916653394699097, "learning_rate": 1.9405228349934933e-05, "loss": 1.0819, "step": 2390 }, { "epoch": 0.14, "grad_norm": 1.983296275138855, "learning_rate": 1.9404597100406878e-05, "loss": 1.0942, "step": 2391 }, { "epoch": 0.14, "grad_norm": 2.0611073970794678, "learning_rate": 1.9403965526352447e-05, "loss": 1.1865, "step": 2392 }, { "epoch": 0.14, "grad_norm": 2.013436794281006, "learning_rate": 1.940333362779343e-05, "loss": 1.1293, "step": 2393 }, { "epoch": 0.14, "grad_norm": 1.317640781402588, "learning_rate": 1.9402701404751644e-05, "loss": 0.5838, "step": 2394 }, { "epoch": 0.14, "grad_norm": 1.8269084692001343, "learning_rate": 1.9402068857248894e-05, "loss": 1.0508, "step": 2395 }, { "epoch": 0.14, "grad_norm": 1.9530673027038574, "learning_rate": 1.940143598530701e-05, "loss": 1.0642, "step": 2396 }, { "epoch": 0.14, "grad_norm": 1.137480616569519, "learning_rate": 1.9400802788947833e-05, "loss": 0.6317, "step": 2397 }, { "epoch": 0.14, "grad_norm": 2.073695182800293, "learning_rate": 1.9400169268193213e-05, "loss": 1.0602, "step": 2398 }, { "epoch": 0.14, "grad_norm": 2.39306378364563, "learning_rate": 1.9399535423065014e-05, "loss": 1.1315, "step": 2399 }, { "epoch": 0.14, "grad_norm": 1.995789647102356, "learning_rate": 1.9398901253585097e-05, "loss": 1.0869, "step": 2400 }, { "epoch": 0.14, "grad_norm": 1.9781782627105713, "learning_rate": 1.9398266759775354e-05, "loss": 1.1367, "step": 2401 }, { "epoch": 0.14, "grad_norm": 1.9745179414749146, "learning_rate": 1.939763194165768e-05, "loss": 1.1736, "step": 2402 }, { "epoch": 0.14, "grad_norm": 1.975692629814148, "learning_rate": 1.939699679925398e-05, "loss": 1.0529, "step": 2403 }, { "epoch": 0.14, "grad_norm": 1.8244720697402954, "learning_rate": 1.9396361332586168e-05, "loss": 1.0436, "step": 2404 }, { "epoch": 0.14, "grad_norm": 2.1581244468688965, "learning_rate": 1.9395725541676174e-05, "loss": 1.1372, "step": 2405 }, { "epoch": 0.14, "grad_norm": 1.852168321609497, "learning_rate": 1.9395089426545938e-05, "loss": 1.1173, "step": 2406 }, { "epoch": 0.14, "grad_norm": 2.027801990509033, "learning_rate": 1.939445298721741e-05, "loss": 1.1059, "step": 2407 }, { "epoch": 0.14, "grad_norm": 1.3611423969268799, "learning_rate": 1.9393816223712553e-05, "loss": 0.6187, "step": 2408 }, { "epoch": 0.14, "grad_norm": 2.0657310485839844, "learning_rate": 1.9393179136053334e-05, "loss": 1.0492, "step": 2409 }, { "epoch": 0.14, "grad_norm": 1.9881436824798584, "learning_rate": 1.9392541724261745e-05, "loss": 1.2039, "step": 2410 }, { "epoch": 0.14, "grad_norm": 2.133744478225708, "learning_rate": 1.9391903988359776e-05, "loss": 1.08, "step": 2411 }, { "epoch": 0.14, "grad_norm": 1.8247300386428833, "learning_rate": 1.939126592836944e-05, "loss": 1.0087, "step": 2412 }, { "epoch": 0.14, "grad_norm": 2.0829696655273438, "learning_rate": 1.9390627544312748e-05, "loss": 1.0425, "step": 2413 }, { "epoch": 0.14, "grad_norm": 2.0927734375, "learning_rate": 1.938998883621173e-05, "loss": 1.0497, "step": 2414 }, { "epoch": 0.14, "grad_norm": 2.2136967182159424, "learning_rate": 1.938934980408843e-05, "loss": 1.0664, "step": 2415 }, { "epoch": 0.14, "grad_norm": 2.1908979415893555, "learning_rate": 1.9388710447964894e-05, "loss": 1.0983, "step": 2416 }, { "epoch": 0.14, "grad_norm": 1.8190349340438843, "learning_rate": 1.9388070767863186e-05, "loss": 1.0703, "step": 2417 }, { "epoch": 0.14, "grad_norm": 1.9157389402389526, "learning_rate": 1.9387430763805383e-05, "loss": 1.023, "step": 2418 }, { "epoch": 0.14, "grad_norm": 1.7293500900268555, "learning_rate": 1.9386790435813564e-05, "loss": 1.0421, "step": 2419 }, { "epoch": 0.14, "grad_norm": 1.9776259660720825, "learning_rate": 1.9386149783909827e-05, "loss": 1.073, "step": 2420 }, { "epoch": 0.14, "grad_norm": 2.014918565750122, "learning_rate": 1.9385508808116287e-05, "loss": 1.0156, "step": 2421 }, { "epoch": 0.14, "grad_norm": 2.047497272491455, "learning_rate": 1.938486750845505e-05, "loss": 1.034, "step": 2422 }, { "epoch": 0.14, "grad_norm": 2.0469491481781006, "learning_rate": 1.938422588494825e-05, "loss": 1.1204, "step": 2423 }, { "epoch": 0.14, "grad_norm": 1.7876993417739868, "learning_rate": 1.9383583937618034e-05, "loss": 1.101, "step": 2424 }, { "epoch": 0.14, "grad_norm": 1.9604291915893555, "learning_rate": 1.9382941666486542e-05, "loss": 1.1114, "step": 2425 }, { "epoch": 0.14, "grad_norm": 2.338437080383301, "learning_rate": 1.9382299071575947e-05, "loss": 1.0781, "step": 2426 }, { "epoch": 0.14, "grad_norm": 1.771887183189392, "learning_rate": 1.9381656152908418e-05, "loss": 1.0719, "step": 2427 }, { "epoch": 0.14, "grad_norm": 1.921116590499878, "learning_rate": 1.9381012910506146e-05, "loss": 1.0684, "step": 2428 }, { "epoch": 0.14, "grad_norm": 2.204495906829834, "learning_rate": 1.9380369344391318e-05, "loss": 1.0315, "step": 2429 }, { "epoch": 0.14, "grad_norm": 1.9952232837677002, "learning_rate": 1.9379725454586145e-05, "loss": 1.1281, "step": 2430 }, { "epoch": 0.14, "grad_norm": 2.1511647701263428, "learning_rate": 1.9379081241112855e-05, "loss": 1.1339, "step": 2431 }, { "epoch": 0.14, "grad_norm": 2.198298931121826, "learning_rate": 1.9378436703993666e-05, "loss": 1.0468, "step": 2432 }, { "epoch": 0.14, "grad_norm": 2.0758845806121826, "learning_rate": 1.9377791843250825e-05, "loss": 1.134, "step": 2433 }, { "epoch": 0.14, "grad_norm": 1.8433973789215088, "learning_rate": 1.937714665890658e-05, "loss": 1.1025, "step": 2434 }, { "epoch": 0.14, "grad_norm": 1.9933902025222778, "learning_rate": 1.9376501150983205e-05, "loss": 1.0997, "step": 2435 }, { "epoch": 0.14, "grad_norm": 2.266794204711914, "learning_rate": 1.9375855319502964e-05, "loss": 1.1177, "step": 2436 }, { "epoch": 0.14, "grad_norm": 1.857455849647522, "learning_rate": 1.9375209164488145e-05, "loss": 1.0671, "step": 2437 }, { "epoch": 0.14, "grad_norm": 1.9561847448349, "learning_rate": 1.9374562685961045e-05, "loss": 1.028, "step": 2438 }, { "epoch": 0.14, "grad_norm": 2.066419839859009, "learning_rate": 1.9373915883943975e-05, "loss": 1.1097, "step": 2439 }, { "epoch": 0.14, "grad_norm": 2.045851707458496, "learning_rate": 1.9373268758459256e-05, "loss": 0.9915, "step": 2440 }, { "epoch": 0.14, "grad_norm": 1.9956412315368652, "learning_rate": 1.9372621309529213e-05, "loss": 1.094, "step": 2441 }, { "epoch": 0.14, "grad_norm": 1.8995851278305054, "learning_rate": 1.937197353717619e-05, "loss": 1.049, "step": 2442 }, { "epoch": 0.14, "grad_norm": 1.845262050628662, "learning_rate": 1.9371325441422537e-05, "loss": 1.0271, "step": 2443 }, { "epoch": 0.14, "grad_norm": 1.9874464273452759, "learning_rate": 1.9370677022290625e-05, "loss": 1.0712, "step": 2444 }, { "epoch": 0.14, "grad_norm": 1.226375937461853, "learning_rate": 1.9370028279802825e-05, "loss": 0.5934, "step": 2445 }, { "epoch": 0.14, "grad_norm": 1.8723481893539429, "learning_rate": 1.936937921398152e-05, "loss": 1.1358, "step": 2446 }, { "epoch": 0.14, "grad_norm": 1.8159383535385132, "learning_rate": 1.936872982484911e-05, "loss": 1.0442, "step": 2447 }, { "epoch": 0.14, "grad_norm": 2.230492353439331, "learning_rate": 1.9368080112428008e-05, "loss": 1.1768, "step": 2448 }, { "epoch": 0.14, "grad_norm": 1.9398930072784424, "learning_rate": 1.936743007674063e-05, "loss": 1.0932, "step": 2449 }, { "epoch": 0.14, "grad_norm": 1.7045669555664062, "learning_rate": 1.9366779717809402e-05, "loss": 0.9882, "step": 2450 }, { "epoch": 0.14, "grad_norm": 1.8414820432662964, "learning_rate": 1.9366129035656777e-05, "loss": 1.181, "step": 2451 }, { "epoch": 0.14, "grad_norm": 1.8981040716171265, "learning_rate": 1.9365478030305195e-05, "loss": 1.0567, "step": 2452 }, { "epoch": 0.14, "grad_norm": 2.1965668201446533, "learning_rate": 1.9364826701777133e-05, "loss": 1.1267, "step": 2453 }, { "epoch": 0.14, "grad_norm": 2.2412266731262207, "learning_rate": 1.9364175050095058e-05, "loss": 1.0584, "step": 2454 }, { "epoch": 0.14, "grad_norm": 1.8191626071929932, "learning_rate": 1.9363523075281464e-05, "loss": 1.0284, "step": 2455 }, { "epoch": 0.14, "grad_norm": 2.2468509674072266, "learning_rate": 1.936287077735884e-05, "loss": 1.0617, "step": 2456 }, { "epoch": 0.14, "grad_norm": 1.997544765472412, "learning_rate": 1.9362218156349707e-05, "loss": 1.0942, "step": 2457 }, { "epoch": 0.14, "grad_norm": 1.7855714559555054, "learning_rate": 1.9361565212276572e-05, "loss": 1.0649, "step": 2458 }, { "epoch": 0.14, "grad_norm": 2.017965078353882, "learning_rate": 1.9360911945161975e-05, "loss": 1.0604, "step": 2459 }, { "epoch": 0.14, "grad_norm": 2.0366060733795166, "learning_rate": 1.9360258355028452e-05, "loss": 1.1378, "step": 2460 }, { "epoch": 0.14, "grad_norm": 2.044109344482422, "learning_rate": 1.935960444189856e-05, "loss": 1.0719, "step": 2461 }, { "epoch": 0.14, "grad_norm": 1.7298020124435425, "learning_rate": 1.9358950205794863e-05, "loss": 1.0558, "step": 2462 }, { "epoch": 0.14, "grad_norm": 2.026771068572998, "learning_rate": 1.935829564673994e-05, "loss": 1.0679, "step": 2463 }, { "epoch": 0.14, "grad_norm": 2.177840232849121, "learning_rate": 1.9357640764756377e-05, "loss": 1.0011, "step": 2464 }, { "epoch": 0.14, "grad_norm": 1.854079246520996, "learning_rate": 1.935698555986677e-05, "loss": 1.0859, "step": 2465 }, { "epoch": 0.14, "grad_norm": 1.986857295036316, "learning_rate": 1.9356330032093728e-05, "loss": 1.0594, "step": 2466 }, { "epoch": 0.14, "grad_norm": 1.891068935394287, "learning_rate": 1.935567418145987e-05, "loss": 1.0968, "step": 2467 }, { "epoch": 0.14, "grad_norm": 1.9980063438415527, "learning_rate": 1.9355018007987832e-05, "loss": 1.0502, "step": 2468 }, { "epoch": 0.14, "grad_norm": 1.9222664833068848, "learning_rate": 1.9354361511700256e-05, "loss": 0.9977, "step": 2469 }, { "epoch": 0.14, "grad_norm": 1.7580130100250244, "learning_rate": 1.935370469261979e-05, "loss": 1.059, "step": 2470 }, { "epoch": 0.14, "grad_norm": 2.0984249114990234, "learning_rate": 1.9353047550769108e-05, "loss": 1.1054, "step": 2471 }, { "epoch": 0.14, "grad_norm": 1.885148525238037, "learning_rate": 1.935239008617088e-05, "loss": 1.062, "step": 2472 }, { "epoch": 0.14, "grad_norm": 1.815417766571045, "learning_rate": 1.9351732298847797e-05, "loss": 1.0972, "step": 2473 }, { "epoch": 0.14, "grad_norm": 2.1468958854675293, "learning_rate": 1.9351074188822557e-05, "loss": 1.0896, "step": 2474 }, { "epoch": 0.14, "grad_norm": 2.023013114929199, "learning_rate": 1.9350415756117863e-05, "loss": 1.0329, "step": 2475 }, { "epoch": 0.14, "grad_norm": 1.9057968854904175, "learning_rate": 1.9349757000756442e-05, "loss": 1.0331, "step": 2476 }, { "epoch": 0.14, "grad_norm": 1.9393936395645142, "learning_rate": 1.9349097922761026e-05, "loss": 1.0806, "step": 2477 }, { "epoch": 0.14, "grad_norm": 1.968632698059082, "learning_rate": 1.9348438522154355e-05, "loss": 1.0075, "step": 2478 }, { "epoch": 0.14, "grad_norm": 1.918735146522522, "learning_rate": 1.9347778798959184e-05, "loss": 1.122, "step": 2479 }, { "epoch": 0.14, "grad_norm": 1.9503058195114136, "learning_rate": 1.934711875319828e-05, "loss": 1.0386, "step": 2480 }, { "epoch": 0.14, "grad_norm": 1.9704668521881104, "learning_rate": 1.9346458384894418e-05, "loss": 1.0505, "step": 2481 }, { "epoch": 0.14, "grad_norm": 1.732666015625, "learning_rate": 1.9345797694070387e-05, "loss": 1.1212, "step": 2482 }, { "epoch": 0.14, "grad_norm": 1.935217261314392, "learning_rate": 1.934513668074898e-05, "loss": 1.0639, "step": 2483 }, { "epoch": 0.14, "grad_norm": 1.9657056331634521, "learning_rate": 1.934447534495301e-05, "loss": 1.0973, "step": 2484 }, { "epoch": 0.14, "grad_norm": 1.199621319770813, "learning_rate": 1.9343813686705302e-05, "loss": 0.6188, "step": 2485 }, { "epoch": 0.14, "grad_norm": 2.1108856201171875, "learning_rate": 1.9343151706028684e-05, "loss": 1.0553, "step": 2486 }, { "epoch": 0.14, "grad_norm": 2.1071860790252686, "learning_rate": 1.9342489402945997e-05, "loss": 1.1577, "step": 2487 }, { "epoch": 0.14, "grad_norm": 1.8007738590240479, "learning_rate": 1.9341826777480103e-05, "loss": 1.0552, "step": 2488 }, { "epoch": 0.14, "grad_norm": 1.7889597415924072, "learning_rate": 1.934116382965386e-05, "loss": 0.9961, "step": 2489 }, { "epoch": 0.14, "grad_norm": 1.8007550239562988, "learning_rate": 1.9340500559490146e-05, "loss": 0.9874, "step": 2490 }, { "epoch": 0.14, "grad_norm": 1.8030359745025635, "learning_rate": 1.933983696701185e-05, "loss": 1.004, "step": 2491 }, { "epoch": 0.14, "grad_norm": 1.926586389541626, "learning_rate": 1.933917305224187e-05, "loss": 1.0909, "step": 2492 }, { "epoch": 0.14, "grad_norm": 1.9421710968017578, "learning_rate": 1.9338508815203116e-05, "loss": 1.0998, "step": 2493 }, { "epoch": 0.14, "grad_norm": 2.027650833129883, "learning_rate": 1.9337844255918506e-05, "loss": 1.084, "step": 2494 }, { "epoch": 0.14, "grad_norm": 1.977394461631775, "learning_rate": 1.933717937441098e-05, "loss": 1.0822, "step": 2495 }, { "epoch": 0.14, "grad_norm": 2.0998809337615967, "learning_rate": 1.933651417070347e-05, "loss": 1.0521, "step": 2496 }, { "epoch": 0.14, "grad_norm": 2.0941131114959717, "learning_rate": 1.9335848644818942e-05, "loss": 1.1314, "step": 2497 }, { "epoch": 0.14, "grad_norm": 2.0855953693389893, "learning_rate": 1.9335182796780354e-05, "loss": 1.0283, "step": 2498 }, { "epoch": 0.14, "grad_norm": 1.9205164909362793, "learning_rate": 1.9334516626610685e-05, "loss": 1.0564, "step": 2499 }, { "epoch": 0.14, "grad_norm": 2.0995781421661377, "learning_rate": 1.9333850134332918e-05, "loss": 0.9803, "step": 2500 }, { "epoch": 0.14, "grad_norm": 1.1396867036819458, "learning_rate": 1.933318331997006e-05, "loss": 0.6401, "step": 2501 }, { "epoch": 0.14, "grad_norm": 1.987968921661377, "learning_rate": 1.9332516183545116e-05, "loss": 1.1081, "step": 2502 }, { "epoch": 0.14, "grad_norm": 2.3172152042388916, "learning_rate": 1.933184872508111e-05, "loss": 1.0898, "step": 2503 }, { "epoch": 0.14, "grad_norm": 0.9781608581542969, "learning_rate": 1.9331180944601067e-05, "loss": 0.5482, "step": 2504 }, { "epoch": 0.14, "grad_norm": 2.1662814617156982, "learning_rate": 1.933051284212804e-05, "loss": 1.0913, "step": 2505 }, { "epoch": 0.14, "grad_norm": 1.9019211530685425, "learning_rate": 1.9329844417685078e-05, "loss": 1.0814, "step": 2506 }, { "epoch": 0.14, "grad_norm": 2.074995517730713, "learning_rate": 1.9329175671295247e-05, "loss": 0.9853, "step": 2507 }, { "epoch": 0.14, "grad_norm": 1.8957983255386353, "learning_rate": 1.932850660298162e-05, "loss": 1.0263, "step": 2508 }, { "epoch": 0.14, "grad_norm": 1.8568568229675293, "learning_rate": 1.932783721276729e-05, "loss": 0.9991, "step": 2509 }, { "epoch": 0.14, "grad_norm": 1.9765918254852295, "learning_rate": 1.9327167500675354e-05, "loss": 1.08, "step": 2510 }, { "epoch": 0.14, "grad_norm": 2.1233248710632324, "learning_rate": 1.9326497466728924e-05, "loss": 1.1166, "step": 2511 }, { "epoch": 0.14, "grad_norm": 1.980760097503662, "learning_rate": 1.932582711095112e-05, "loss": 1.0881, "step": 2512 }, { "epoch": 0.14, "grad_norm": 1.837073564529419, "learning_rate": 1.932515643336507e-05, "loss": 1.0646, "step": 2513 }, { "epoch": 0.14, "grad_norm": 1.8227471113204956, "learning_rate": 1.932448543399392e-05, "loss": 1.0062, "step": 2514 }, { "epoch": 0.14, "grad_norm": 2.110034704208374, "learning_rate": 1.9323814112860826e-05, "loss": 1.0069, "step": 2515 }, { "epoch": 0.14, "grad_norm": 1.9318060874938965, "learning_rate": 1.9323142469988953e-05, "loss": 1.0787, "step": 2516 }, { "epoch": 0.14, "grad_norm": 1.850795030593872, "learning_rate": 1.932247050540147e-05, "loss": 0.9666, "step": 2517 }, { "epoch": 0.14, "grad_norm": 2.138353109359741, "learning_rate": 1.9321798219121575e-05, "loss": 1.0713, "step": 2518 }, { "epoch": 0.14, "grad_norm": 1.7956981658935547, "learning_rate": 1.9321125611172468e-05, "loss": 0.9648, "step": 2519 }, { "epoch": 0.14, "grad_norm": 2.281947135925293, "learning_rate": 1.9320452681577348e-05, "loss": 1.0801, "step": 2520 }, { "epoch": 0.14, "grad_norm": 2.3190159797668457, "learning_rate": 1.9319779430359443e-05, "loss": 1.1107, "step": 2521 }, { "epoch": 0.14, "grad_norm": 1.8956717252731323, "learning_rate": 1.9319105857541983e-05, "loss": 1.0818, "step": 2522 }, { "epoch": 0.14, "grad_norm": 2.035945415496826, "learning_rate": 1.9318431963148214e-05, "loss": 1.1642, "step": 2523 }, { "epoch": 0.14, "grad_norm": 1.9879200458526611, "learning_rate": 1.9317757747201386e-05, "loss": 1.1075, "step": 2524 }, { "epoch": 0.14, "grad_norm": 1.971221685409546, "learning_rate": 1.9317083209724767e-05, "loss": 1.0936, "step": 2525 }, { "epoch": 0.14, "grad_norm": 2.1511213779449463, "learning_rate": 1.931640835074163e-05, "loss": 1.0769, "step": 2526 }, { "epoch": 0.14, "grad_norm": 1.8749141693115234, "learning_rate": 1.9315733170275268e-05, "loss": 1.0545, "step": 2527 }, { "epoch": 0.14, "grad_norm": 1.8905028104782104, "learning_rate": 1.931505766834898e-05, "loss": 1.1361, "step": 2528 }, { "epoch": 0.15, "grad_norm": 1.7451789379119873, "learning_rate": 1.931438184498607e-05, "loss": 1.0834, "step": 2529 }, { "epoch": 0.15, "grad_norm": 2.0431735515594482, "learning_rate": 1.9313705700209853e-05, "loss": 1.0975, "step": 2530 }, { "epoch": 0.15, "grad_norm": 1.8834114074707031, "learning_rate": 1.931302923404368e-05, "loss": 1.1329, "step": 2531 }, { "epoch": 0.15, "grad_norm": 2.2932240962982178, "learning_rate": 1.9312352446510877e-05, "loss": 1.0912, "step": 2532 }, { "epoch": 0.15, "grad_norm": 2.1283633708953857, "learning_rate": 1.931167533763481e-05, "loss": 1.056, "step": 2533 }, { "epoch": 0.15, "grad_norm": 1.8803224563598633, "learning_rate": 1.931099790743883e-05, "loss": 0.9982, "step": 2534 }, { "epoch": 0.15, "grad_norm": 1.971746563911438, "learning_rate": 1.9310320155946326e-05, "loss": 1.0834, "step": 2535 }, { "epoch": 0.15, "grad_norm": 1.9654436111450195, "learning_rate": 1.9309642083180682e-05, "loss": 1.1105, "step": 2536 }, { "epoch": 0.15, "grad_norm": 1.8840471506118774, "learning_rate": 1.930896368916529e-05, "loss": 1.1006, "step": 2537 }, { "epoch": 0.15, "grad_norm": 2.26127290725708, "learning_rate": 1.930828497392357e-05, "loss": 1.0847, "step": 2538 }, { "epoch": 0.15, "grad_norm": 1.3020989894866943, "learning_rate": 1.9307605937478937e-05, "loss": 0.6401, "step": 2539 }, { "epoch": 0.15, "grad_norm": 2.1201605796813965, "learning_rate": 1.930692657985482e-05, "loss": 1.0584, "step": 2540 }, { "epoch": 0.15, "grad_norm": 2.0672385692596436, "learning_rate": 1.9306246901074666e-05, "loss": 1.0254, "step": 2541 }, { "epoch": 0.15, "grad_norm": 1.8749616146087646, "learning_rate": 1.9305566901161928e-05, "loss": 1.0872, "step": 2542 }, { "epoch": 0.15, "grad_norm": 1.9761897325515747, "learning_rate": 1.930488658014007e-05, "loss": 1.0704, "step": 2543 }, { "epoch": 0.15, "grad_norm": 1.8861124515533447, "learning_rate": 1.9304205938032567e-05, "loss": 1.0419, "step": 2544 }, { "epoch": 0.15, "grad_norm": 1.8207732439041138, "learning_rate": 1.930352497486291e-05, "loss": 1.1339, "step": 2545 }, { "epoch": 0.15, "grad_norm": 1.8331044912338257, "learning_rate": 1.930284369065459e-05, "loss": 1.0997, "step": 2546 }, { "epoch": 0.15, "grad_norm": 1.9984328746795654, "learning_rate": 1.9302162085431125e-05, "loss": 1.1047, "step": 2547 }, { "epoch": 0.15, "grad_norm": 1.9100768566131592, "learning_rate": 1.9301480159216028e-05, "loss": 1.0297, "step": 2548 }, { "epoch": 0.15, "grad_norm": 2.022785186767578, "learning_rate": 1.9300797912032834e-05, "loss": 1.0806, "step": 2549 }, { "epoch": 0.15, "grad_norm": 1.9129924774169922, "learning_rate": 1.9300115343905086e-05, "loss": 1.0462, "step": 2550 }, { "epoch": 0.15, "grad_norm": 1.9246619939804077, "learning_rate": 1.9299432454856335e-05, "loss": 1.062, "step": 2551 }, { "epoch": 0.15, "grad_norm": 1.0852985382080078, "learning_rate": 1.929874924491015e-05, "loss": 0.5822, "step": 2552 }, { "epoch": 0.15, "grad_norm": 2.53330397605896, "learning_rate": 1.9298065714090098e-05, "loss": 1.1222, "step": 2553 }, { "epoch": 0.15, "grad_norm": 2.130354642868042, "learning_rate": 1.9297381862419776e-05, "loss": 1.0678, "step": 2554 }, { "epoch": 0.15, "grad_norm": 1.0234578847885132, "learning_rate": 1.9296697689922775e-05, "loss": 0.6062, "step": 2555 }, { "epoch": 0.15, "grad_norm": 1.9899568557739258, "learning_rate": 1.929601319662271e-05, "loss": 1.0975, "step": 2556 }, { "epoch": 0.15, "grad_norm": 1.061023235321045, "learning_rate": 1.929532838254319e-05, "loss": 0.554, "step": 2557 }, { "epoch": 0.15, "grad_norm": 1.9634816646575928, "learning_rate": 1.9294643247707858e-05, "loss": 1.0688, "step": 2558 }, { "epoch": 0.15, "grad_norm": 1.8687609434127808, "learning_rate": 1.9293957792140348e-05, "loss": 1.0515, "step": 2559 }, { "epoch": 0.15, "grad_norm": 2.528010606765747, "learning_rate": 1.9293272015864318e-05, "loss": 1.0907, "step": 2560 }, { "epoch": 0.15, "grad_norm": 2.0524628162384033, "learning_rate": 1.929258591890343e-05, "loss": 1.1302, "step": 2561 }, { "epoch": 0.15, "grad_norm": 2.045297145843506, "learning_rate": 1.929189950128136e-05, "loss": 1.035, "step": 2562 }, { "epoch": 0.15, "grad_norm": 1.9476463794708252, "learning_rate": 1.9291212763021792e-05, "loss": 1.0099, "step": 2563 }, { "epoch": 0.15, "grad_norm": 2.022969961166382, "learning_rate": 1.929052570414843e-05, "loss": 1.099, "step": 2564 }, { "epoch": 0.15, "grad_norm": 2.16217303276062, "learning_rate": 1.9289838324684974e-05, "loss": 1.0807, "step": 2565 }, { "epoch": 0.15, "grad_norm": 2.1511733531951904, "learning_rate": 1.928915062465515e-05, "loss": 1.0421, "step": 2566 }, { "epoch": 0.15, "grad_norm": 2.124356985092163, "learning_rate": 1.9288462604082684e-05, "loss": 0.9788, "step": 2567 }, { "epoch": 0.15, "grad_norm": 2.2225966453552246, "learning_rate": 1.9287774262991324e-05, "loss": 1.0287, "step": 2568 }, { "epoch": 0.15, "grad_norm": 1.9184764623641968, "learning_rate": 1.9287085601404813e-05, "loss": 0.9997, "step": 2569 }, { "epoch": 0.15, "grad_norm": 1.8319218158721924, "learning_rate": 1.9286396619346925e-05, "loss": 1.06, "step": 2570 }, { "epoch": 0.15, "grad_norm": 1.9651515483856201, "learning_rate": 1.9285707316841425e-05, "loss": 1.1011, "step": 2571 }, { "epoch": 0.15, "grad_norm": 2.2106685638427734, "learning_rate": 1.9285017693912107e-05, "loss": 1.0117, "step": 2572 }, { "epoch": 0.15, "grad_norm": 1.3083195686340332, "learning_rate": 1.9284327750582767e-05, "loss": 0.5883, "step": 2573 }, { "epoch": 0.15, "grad_norm": 1.9038532972335815, "learning_rate": 1.928363748687721e-05, "loss": 1.137, "step": 2574 }, { "epoch": 0.15, "grad_norm": 1.9197347164154053, "learning_rate": 1.9282946902819253e-05, "loss": 1.1562, "step": 2575 }, { "epoch": 0.15, "grad_norm": 2.170259952545166, "learning_rate": 1.928225599843273e-05, "loss": 1.1273, "step": 2576 }, { "epoch": 0.15, "grad_norm": 1.8560339212417603, "learning_rate": 1.9281564773741487e-05, "loss": 1.1398, "step": 2577 }, { "epoch": 0.15, "grad_norm": 1.9716919660568237, "learning_rate": 1.9280873228769365e-05, "loss": 1.0671, "step": 2578 }, { "epoch": 0.15, "grad_norm": 2.026346206665039, "learning_rate": 1.9280181363540236e-05, "loss": 1.0435, "step": 2579 }, { "epoch": 0.15, "grad_norm": 2.1012442111968994, "learning_rate": 1.9279489178077968e-05, "loss": 1.0075, "step": 2580 }, { "epoch": 0.15, "grad_norm": 1.9520851373672485, "learning_rate": 1.927879667240645e-05, "loss": 0.9199, "step": 2581 }, { "epoch": 0.15, "grad_norm": 1.9819574356079102, "learning_rate": 1.9278103846549582e-05, "loss": 1.0847, "step": 2582 }, { "epoch": 0.15, "grad_norm": 1.941069483757019, "learning_rate": 1.9277410700531264e-05, "loss": 0.9903, "step": 2583 }, { "epoch": 0.15, "grad_norm": 2.026662588119507, "learning_rate": 1.927671723437542e-05, "loss": 1.06, "step": 2584 }, { "epoch": 0.15, "grad_norm": 2.3707916736602783, "learning_rate": 1.927602344810598e-05, "loss": 1.0584, "step": 2585 }, { "epoch": 0.15, "grad_norm": 2.093108892440796, "learning_rate": 1.927532934174688e-05, "loss": 1.0611, "step": 2586 }, { "epoch": 0.15, "grad_norm": 1.9259631633758545, "learning_rate": 1.927463491532207e-05, "loss": 1.0199, "step": 2587 }, { "epoch": 0.15, "grad_norm": 1.926685094833374, "learning_rate": 1.9273940168855518e-05, "loss": 1.0613, "step": 2588 }, { "epoch": 0.15, "grad_norm": 1.8998945951461792, "learning_rate": 1.92732451023712e-05, "loss": 1.0206, "step": 2589 }, { "epoch": 0.15, "grad_norm": 2.0324254035949707, "learning_rate": 1.9272549715893097e-05, "loss": 1.0275, "step": 2590 }, { "epoch": 0.15, "grad_norm": 1.1829004287719727, "learning_rate": 1.9271854009445202e-05, "loss": 0.5712, "step": 2591 }, { "epoch": 0.15, "grad_norm": 2.3386924266815186, "learning_rate": 1.927115798305153e-05, "loss": 1.0463, "step": 2592 }, { "epoch": 0.15, "grad_norm": 2.066807746887207, "learning_rate": 1.9270461636736087e-05, "loss": 1.0889, "step": 2593 }, { "epoch": 0.15, "grad_norm": 1.9856654405593872, "learning_rate": 1.9269764970522915e-05, "loss": 1.0015, "step": 2594 }, { "epoch": 0.15, "grad_norm": 1.9622710943222046, "learning_rate": 1.9269067984436045e-05, "loss": 1.094, "step": 2595 }, { "epoch": 0.15, "grad_norm": 2.111218214035034, "learning_rate": 1.926837067849953e-05, "loss": 1.0647, "step": 2596 }, { "epoch": 0.15, "grad_norm": 1.9046671390533447, "learning_rate": 1.9267673052737438e-05, "loss": 1.1955, "step": 2597 }, { "epoch": 0.15, "grad_norm": 2.029859781265259, "learning_rate": 1.9266975107173834e-05, "loss": 1.0461, "step": 2598 }, { "epoch": 0.15, "grad_norm": 1.970551609992981, "learning_rate": 1.9266276841832802e-05, "loss": 1.0148, "step": 2599 }, { "epoch": 0.15, "grad_norm": 1.9541776180267334, "learning_rate": 1.9265578256738445e-05, "loss": 1.0768, "step": 2600 }, { "epoch": 0.15, "grad_norm": 2.333456039428711, "learning_rate": 1.9264879351914866e-05, "loss": 1.0762, "step": 2601 }, { "epoch": 0.15, "grad_norm": 2.1288232803344727, "learning_rate": 1.9264180127386176e-05, "loss": 1.0375, "step": 2602 }, { "epoch": 0.15, "grad_norm": 2.1100027561187744, "learning_rate": 1.9263480583176514e-05, "loss": 1.1167, "step": 2603 }, { "epoch": 0.15, "grad_norm": 1.8198777437210083, "learning_rate": 1.9262780719310008e-05, "loss": 1.0951, "step": 2604 }, { "epoch": 0.15, "grad_norm": 1.8601609468460083, "learning_rate": 1.9262080535810815e-05, "loss": 1.0751, "step": 2605 }, { "epoch": 0.15, "grad_norm": 1.8959596157073975, "learning_rate": 1.9261380032703095e-05, "loss": 1.0317, "step": 2606 }, { "epoch": 0.15, "grad_norm": 1.9837675094604492, "learning_rate": 1.9260679210011024e-05, "loss": 0.9995, "step": 2607 }, { "epoch": 0.15, "grad_norm": 2.0680274963378906, "learning_rate": 1.925997806775878e-05, "loss": 1.0594, "step": 2608 }, { "epoch": 0.15, "grad_norm": 2.2864267826080322, "learning_rate": 1.925927660597056e-05, "loss": 1.1585, "step": 2609 }, { "epoch": 0.15, "grad_norm": 1.9845783710479736, "learning_rate": 1.9258574824670567e-05, "loss": 1.1241, "step": 2610 }, { "epoch": 0.15, "grad_norm": 2.182053565979004, "learning_rate": 1.925787272388302e-05, "loss": 1.112, "step": 2611 }, { "epoch": 0.15, "grad_norm": 2.167057991027832, "learning_rate": 1.925717030363215e-05, "loss": 1.0497, "step": 2612 }, { "epoch": 0.15, "grad_norm": 2.0042035579681396, "learning_rate": 1.925646756394219e-05, "loss": 1.076, "step": 2613 }, { "epoch": 0.15, "grad_norm": 2.099884271621704, "learning_rate": 1.9255764504837387e-05, "loss": 1.0682, "step": 2614 }, { "epoch": 0.15, "grad_norm": 1.9429821968078613, "learning_rate": 1.9255061126342013e-05, "loss": 1.0021, "step": 2615 }, { "epoch": 0.15, "grad_norm": 2.0103068351745605, "learning_rate": 1.925435742848033e-05, "loss": 1.1184, "step": 2616 }, { "epoch": 0.15, "grad_norm": 1.9453660249710083, "learning_rate": 1.925365341127662e-05, "loss": 1.1758, "step": 2617 }, { "epoch": 0.15, "grad_norm": 1.9802542924880981, "learning_rate": 1.925294907475518e-05, "loss": 1.1017, "step": 2618 }, { "epoch": 0.15, "grad_norm": 1.9817343950271606, "learning_rate": 1.925224441894032e-05, "loss": 1.0786, "step": 2619 }, { "epoch": 0.15, "grad_norm": 2.0544378757476807, "learning_rate": 1.9251539443856344e-05, "loss": 0.9645, "step": 2620 }, { "epoch": 0.15, "grad_norm": 1.1256073713302612, "learning_rate": 1.925083414952759e-05, "loss": 0.5849, "step": 2621 }, { "epoch": 0.15, "grad_norm": 1.9506025314331055, "learning_rate": 1.9250128535978384e-05, "loss": 1.0662, "step": 2622 }, { "epoch": 0.15, "grad_norm": 2.0655899047851562, "learning_rate": 1.9249422603233086e-05, "loss": 1.0693, "step": 2623 }, { "epoch": 0.15, "grad_norm": 0.9858718514442444, "learning_rate": 1.9248716351316054e-05, "loss": 0.5865, "step": 2624 }, { "epoch": 0.15, "grad_norm": 1.1444947719573975, "learning_rate": 1.924800978025165e-05, "loss": 0.6595, "step": 2625 }, { "epoch": 0.15, "grad_norm": 2.2073960304260254, "learning_rate": 1.9247302890064264e-05, "loss": 1.0985, "step": 2626 }, { "epoch": 0.15, "grad_norm": 2.026515483856201, "learning_rate": 1.924659568077829e-05, "loss": 1.0698, "step": 2627 }, { "epoch": 0.15, "grad_norm": 2.5139331817626953, "learning_rate": 1.9245888152418123e-05, "loss": 1.1387, "step": 2628 }, { "epoch": 0.15, "grad_norm": 2.0619704723358154, "learning_rate": 1.9245180305008187e-05, "loss": 1.093, "step": 2629 }, { "epoch": 0.15, "grad_norm": 1.9288030862808228, "learning_rate": 1.92444721385729e-05, "loss": 1.0548, "step": 2630 }, { "epoch": 0.15, "grad_norm": 1.969240427017212, "learning_rate": 1.9243763653136707e-05, "loss": 1.1001, "step": 2631 }, { "epoch": 0.15, "grad_norm": 1.8991674184799194, "learning_rate": 1.9243054848724048e-05, "loss": 0.9836, "step": 2632 }, { "epoch": 0.15, "grad_norm": 0.947309672832489, "learning_rate": 1.9242345725359392e-05, "loss": 0.5858, "step": 2633 }, { "epoch": 0.15, "grad_norm": 1.9963483810424805, "learning_rate": 1.9241636283067197e-05, "loss": 1.0546, "step": 2634 }, { "epoch": 0.15, "grad_norm": 2.097155809402466, "learning_rate": 1.924092652187195e-05, "loss": 1.0627, "step": 2635 }, { "epoch": 0.15, "grad_norm": 2.131049633026123, "learning_rate": 1.9240216441798145e-05, "loss": 1.0536, "step": 2636 }, { "epoch": 0.15, "grad_norm": 2.0207908153533936, "learning_rate": 1.9239506042870276e-05, "loss": 1.0597, "step": 2637 }, { "epoch": 0.15, "grad_norm": 1.1051024198532104, "learning_rate": 1.9238795325112867e-05, "loss": 0.563, "step": 2638 }, { "epoch": 0.15, "grad_norm": 1.8110027313232422, "learning_rate": 1.923808428855044e-05, "loss": 0.9528, "step": 2639 }, { "epoch": 0.15, "grad_norm": 2.171895742416382, "learning_rate": 1.923737293320753e-05, "loss": 1.0618, "step": 2640 }, { "epoch": 0.15, "grad_norm": 2.0588314533233643, "learning_rate": 1.923666125910868e-05, "loss": 1.054, "step": 2641 }, { "epoch": 0.15, "grad_norm": 2.107593536376953, "learning_rate": 1.9235949266278455e-05, "loss": 1.1372, "step": 2642 }, { "epoch": 0.15, "grad_norm": 1.891647219657898, "learning_rate": 1.923523695474142e-05, "loss": 1.064, "step": 2643 }, { "epoch": 0.15, "grad_norm": 1.8993183374404907, "learning_rate": 1.9234524324522153e-05, "loss": 1.0468, "step": 2644 }, { "epoch": 0.15, "grad_norm": 1.9360493421554565, "learning_rate": 1.9233811375645247e-05, "loss": 1.0893, "step": 2645 }, { "epoch": 0.15, "grad_norm": 1.8919892311096191, "learning_rate": 1.9233098108135308e-05, "loss": 1.0294, "step": 2646 }, { "epoch": 0.15, "grad_norm": 1.8131908178329468, "learning_rate": 1.923238452201694e-05, "loss": 0.9944, "step": 2647 }, { "epoch": 0.15, "grad_norm": 1.9440006017684937, "learning_rate": 1.9231670617314778e-05, "loss": 1.0423, "step": 2648 }, { "epoch": 0.15, "grad_norm": 2.010502576828003, "learning_rate": 1.9230956394053445e-05, "loss": 1.0898, "step": 2649 }, { "epoch": 0.15, "grad_norm": 2.1989331245422363, "learning_rate": 1.9230241852257595e-05, "loss": 1.0519, "step": 2650 }, { "epoch": 0.15, "grad_norm": 1.8532699346542358, "learning_rate": 1.9229526991951883e-05, "loss": 1.1416, "step": 2651 }, { "epoch": 0.15, "grad_norm": 2.3194479942321777, "learning_rate": 1.9228811813160972e-05, "loss": 1.1164, "step": 2652 }, { "epoch": 0.15, "grad_norm": 2.0304808616638184, "learning_rate": 1.922809631590955e-05, "loss": 1.0666, "step": 2653 }, { "epoch": 0.15, "grad_norm": 2.0340416431427, "learning_rate": 1.92273805002223e-05, "loss": 1.0318, "step": 2654 }, { "epoch": 0.15, "grad_norm": 1.8742103576660156, "learning_rate": 1.922666436612392e-05, "loss": 1.0353, "step": 2655 }, { "epoch": 0.15, "grad_norm": 2.2732443809509277, "learning_rate": 1.9225947913639133e-05, "loss": 1.1026, "step": 2656 }, { "epoch": 0.15, "grad_norm": 2.2090322971343994, "learning_rate": 1.9225231142792653e-05, "loss": 1.0508, "step": 2657 }, { "epoch": 0.15, "grad_norm": 1.7809934616088867, "learning_rate": 1.9224514053609217e-05, "loss": 1.0544, "step": 2658 }, { "epoch": 0.15, "grad_norm": 1.9456626176834106, "learning_rate": 1.9223796646113567e-05, "loss": 1.0268, "step": 2659 }, { "epoch": 0.15, "grad_norm": 1.7916826009750366, "learning_rate": 1.922307892033046e-05, "loss": 1.0921, "step": 2660 }, { "epoch": 0.15, "grad_norm": 1.7888703346252441, "learning_rate": 1.9222360876284662e-05, "loss": 1.0584, "step": 2661 }, { "epoch": 0.15, "grad_norm": 1.9444453716278076, "learning_rate": 1.9221642514000955e-05, "loss": 1.0953, "step": 2662 }, { "epoch": 0.15, "grad_norm": 2.1149709224700928, "learning_rate": 1.922092383350412e-05, "loss": 1.0664, "step": 2663 }, { "epoch": 0.15, "grad_norm": 1.9952826499938965, "learning_rate": 1.9220204834818966e-05, "loss": 1.0262, "step": 2664 }, { "epoch": 0.15, "grad_norm": 1.842921495437622, "learning_rate": 1.9219485517970297e-05, "loss": 0.9498, "step": 2665 }, { "epoch": 0.15, "grad_norm": 1.7758725881576538, "learning_rate": 1.9218765882982937e-05, "loss": 1.0086, "step": 2666 }, { "epoch": 0.15, "grad_norm": 2.022393226623535, "learning_rate": 1.9218045929881715e-05, "loss": 1.0296, "step": 2667 }, { "epoch": 0.15, "grad_norm": 2.178647041320801, "learning_rate": 1.921732565869148e-05, "loss": 1.2327, "step": 2668 }, { "epoch": 0.15, "grad_norm": 2.0059146881103516, "learning_rate": 1.9216605069437086e-05, "loss": 1.1601, "step": 2669 }, { "epoch": 0.15, "grad_norm": 1.6977680921554565, "learning_rate": 1.9215884162143393e-05, "loss": 1.0294, "step": 2670 }, { "epoch": 0.15, "grad_norm": 1.9692310094833374, "learning_rate": 1.9215162936835283e-05, "loss": 1.0727, "step": 2671 }, { "epoch": 0.15, "grad_norm": 1.7743926048278809, "learning_rate": 1.9214441393537642e-05, "loss": 1.0896, "step": 2672 }, { "epoch": 0.15, "grad_norm": 1.8910232782363892, "learning_rate": 1.921371953227537e-05, "loss": 1.1039, "step": 2673 }, { "epoch": 0.15, "grad_norm": 2.16933012008667, "learning_rate": 1.9212997353073367e-05, "loss": 1.1841, "step": 2674 }, { "epoch": 0.15, "grad_norm": 2.077240467071533, "learning_rate": 1.9212274855956566e-05, "loss": 1.1365, "step": 2675 }, { "epoch": 0.15, "grad_norm": 1.9534598588943481, "learning_rate": 1.9211552040949892e-05, "loss": 1.1375, "step": 2676 }, { "epoch": 0.15, "grad_norm": 1.8483035564422607, "learning_rate": 1.9210828908078292e-05, "loss": 1.0145, "step": 2677 }, { "epoch": 0.15, "grad_norm": 1.8195797204971313, "learning_rate": 1.921010545736671e-05, "loss": 0.9977, "step": 2678 }, { "epoch": 0.15, "grad_norm": 2.084599494934082, "learning_rate": 1.920938168884012e-05, "loss": 1.0422, "step": 2679 }, { "epoch": 0.15, "grad_norm": 2.1527047157287598, "learning_rate": 1.9208657602523494e-05, "loss": 1.1163, "step": 2680 }, { "epoch": 0.15, "grad_norm": 1.9527807235717773, "learning_rate": 1.920793319844181e-05, "loss": 1.185, "step": 2681 }, { "epoch": 0.15, "grad_norm": 2.3483762741088867, "learning_rate": 1.920720847662008e-05, "loss": 1.0799, "step": 2682 }, { "epoch": 0.15, "grad_norm": 2.065059185028076, "learning_rate": 1.9206483437083302e-05, "loss": 1.0359, "step": 2683 }, { "epoch": 0.15, "grad_norm": 2.1101040840148926, "learning_rate": 1.92057580798565e-05, "loss": 1.0937, "step": 2684 }, { "epoch": 0.15, "grad_norm": 1.8338804244995117, "learning_rate": 1.92050324049647e-05, "loss": 1.1392, "step": 2685 }, { "epoch": 0.15, "grad_norm": 2.127398729324341, "learning_rate": 1.9204306412432947e-05, "loss": 1.1117, "step": 2686 }, { "epoch": 0.15, "grad_norm": 1.8999232053756714, "learning_rate": 1.920358010228629e-05, "loss": 1.0613, "step": 2687 }, { "epoch": 0.15, "grad_norm": 2.1035425662994385, "learning_rate": 1.9202853474549796e-05, "loss": 1.0639, "step": 2688 }, { "epoch": 0.15, "grad_norm": 2.0180768966674805, "learning_rate": 1.9202126529248528e-05, "loss": 1.0571, "step": 2689 }, { "epoch": 0.15, "grad_norm": 2.0199365615844727, "learning_rate": 1.9201399266407582e-05, "loss": 1.0469, "step": 2690 }, { "epoch": 0.15, "grad_norm": 2.0198137760162354, "learning_rate": 1.9200671686052053e-05, "loss": 1.1409, "step": 2691 }, { "epoch": 0.15, "grad_norm": 1.8435190916061401, "learning_rate": 1.9199943788207044e-05, "loss": 1.1006, "step": 2692 }, { "epoch": 0.15, "grad_norm": 1.826055884361267, "learning_rate": 1.9199215572897675e-05, "loss": 1.0922, "step": 2693 }, { "epoch": 0.15, "grad_norm": 2.0210392475128174, "learning_rate": 1.919848704014907e-05, "loss": 1.0804, "step": 2694 }, { "epoch": 0.15, "grad_norm": 1.8186566829681396, "learning_rate": 1.9197758189986372e-05, "loss": 1.0592, "step": 2695 }, { "epoch": 0.15, "grad_norm": 2.0916662216186523, "learning_rate": 1.9197029022434734e-05, "loss": 1.1399, "step": 2696 }, { "epoch": 0.15, "grad_norm": 1.3249537944793701, "learning_rate": 1.9196299537519314e-05, "loss": 0.5983, "step": 2697 }, { "epoch": 0.15, "grad_norm": 1.8300137519836426, "learning_rate": 1.9195569735265288e-05, "loss": 1.0982, "step": 2698 }, { "epoch": 0.15, "grad_norm": 1.977926254272461, "learning_rate": 1.9194839615697836e-05, "loss": 1.0305, "step": 2699 }, { "epoch": 0.15, "grad_norm": 2.085047960281372, "learning_rate": 1.9194109178842155e-05, "loss": 1.0664, "step": 2700 }, { "epoch": 0.15, "grad_norm": 2.0125699043273926, "learning_rate": 1.9193378424723446e-05, "loss": 1.1197, "step": 2701 }, { "epoch": 0.15, "grad_norm": 2.1143689155578613, "learning_rate": 1.919264735336693e-05, "loss": 1.1328, "step": 2702 }, { "epoch": 0.16, "grad_norm": 1.8425114154815674, "learning_rate": 1.919191596479783e-05, "loss": 1.0824, "step": 2703 }, { "epoch": 0.16, "grad_norm": 1.1642632484436035, "learning_rate": 1.919118425904139e-05, "loss": 0.61, "step": 2704 }, { "epoch": 0.16, "grad_norm": 2.1337926387786865, "learning_rate": 1.9190452236122856e-05, "loss": 1.0865, "step": 2705 }, { "epoch": 0.16, "grad_norm": 2.0472490787506104, "learning_rate": 1.9189719896067487e-05, "loss": 1.0851, "step": 2706 }, { "epoch": 0.16, "grad_norm": 2.060659646987915, "learning_rate": 1.9188987238900554e-05, "loss": 1.1239, "step": 2707 }, { "epoch": 0.16, "grad_norm": 1.8582466840744019, "learning_rate": 1.9188254264647338e-05, "loss": 1.137, "step": 2708 }, { "epoch": 0.16, "grad_norm": 2.0046050548553467, "learning_rate": 1.9187520973333136e-05, "loss": 1.058, "step": 2709 }, { "epoch": 0.16, "grad_norm": 1.8665271997451782, "learning_rate": 1.918678736498325e-05, "loss": 1.0962, "step": 2710 }, { "epoch": 0.16, "grad_norm": 1.2069183588027954, "learning_rate": 1.9186053439622995e-05, "loss": 0.6754, "step": 2711 }, { "epoch": 0.16, "grad_norm": 1.7997500896453857, "learning_rate": 1.9185319197277693e-05, "loss": 1.1427, "step": 2712 }, { "epoch": 0.16, "grad_norm": 2.048140525817871, "learning_rate": 1.9184584637972685e-05, "loss": 1.0705, "step": 2713 }, { "epoch": 0.16, "grad_norm": 1.8470405340194702, "learning_rate": 1.9183849761733316e-05, "loss": 1.0805, "step": 2714 }, { "epoch": 0.16, "grad_norm": 1.9241894483566284, "learning_rate": 1.9183114568584948e-05, "loss": 1.0193, "step": 2715 }, { "epoch": 0.16, "grad_norm": 1.9826548099517822, "learning_rate": 1.918237905855295e-05, "loss": 1.1926, "step": 2716 }, { "epoch": 0.16, "grad_norm": 2.166938543319702, "learning_rate": 1.9181643231662695e-05, "loss": 1.0218, "step": 2717 }, { "epoch": 0.16, "grad_norm": 1.9338405132293701, "learning_rate": 1.9180907087939588e-05, "loss": 1.0929, "step": 2718 }, { "epoch": 0.16, "grad_norm": 2.1510963439941406, "learning_rate": 1.9180170627409014e-05, "loss": 1.1066, "step": 2719 }, { "epoch": 0.16, "grad_norm": 1.833640217781067, "learning_rate": 1.91794338500964e-05, "loss": 1.0707, "step": 2720 }, { "epoch": 0.16, "grad_norm": 2.0463311672210693, "learning_rate": 1.9178696756027167e-05, "loss": 1.0824, "step": 2721 }, { "epoch": 0.16, "grad_norm": 1.8670568466186523, "learning_rate": 1.9177959345226746e-05, "loss": 1.1162, "step": 2722 }, { "epoch": 0.16, "grad_norm": 1.8612425327301025, "learning_rate": 1.9177221617720584e-05, "loss": 1.0989, "step": 2723 }, { "epoch": 0.16, "grad_norm": 1.9941425323486328, "learning_rate": 1.9176483573534142e-05, "loss": 1.0405, "step": 2724 }, { "epoch": 0.16, "grad_norm": 1.9859180450439453, "learning_rate": 1.917574521269289e-05, "loss": 1.1413, "step": 2725 }, { "epoch": 0.16, "grad_norm": 1.81574547290802, "learning_rate": 1.9175006535222293e-05, "loss": 1.0651, "step": 2726 }, { "epoch": 0.16, "grad_norm": 1.118943452835083, "learning_rate": 1.9174267541147856e-05, "loss": 0.5386, "step": 2727 }, { "epoch": 0.16, "grad_norm": 2.0453383922576904, "learning_rate": 1.9173528230495072e-05, "loss": 1.0662, "step": 2728 }, { "epoch": 0.16, "grad_norm": 1.864418864250183, "learning_rate": 1.9172788603289453e-05, "loss": 1.0685, "step": 2729 }, { "epoch": 0.16, "grad_norm": 2.033733606338501, "learning_rate": 1.9172048659556523e-05, "loss": 1.0565, "step": 2730 }, { "epoch": 0.16, "grad_norm": 1.9367201328277588, "learning_rate": 1.9171308399321817e-05, "loss": 1.1281, "step": 2731 }, { "epoch": 0.16, "grad_norm": 1.9522922039031982, "learning_rate": 1.9170567822610872e-05, "loss": 0.9853, "step": 2732 }, { "epoch": 0.16, "grad_norm": 1.8821351528167725, "learning_rate": 1.916982692944925e-05, "loss": 1.0346, "step": 2733 }, { "epoch": 0.16, "grad_norm": 1.8420169353485107, "learning_rate": 1.9169085719862522e-05, "loss": 1.0688, "step": 2734 }, { "epoch": 0.16, "grad_norm": 1.851286768913269, "learning_rate": 1.916834419387625e-05, "loss": 1.0329, "step": 2735 }, { "epoch": 0.16, "grad_norm": 1.9359358549118042, "learning_rate": 1.916760235151604e-05, "loss": 1.0353, "step": 2736 }, { "epoch": 0.16, "grad_norm": 2.072796106338501, "learning_rate": 1.9166860192807472e-05, "loss": 1.0303, "step": 2737 }, { "epoch": 0.16, "grad_norm": 2.11775279045105, "learning_rate": 1.9166117717776166e-05, "loss": 1.0727, "step": 2738 }, { "epoch": 0.16, "grad_norm": 1.9249122142791748, "learning_rate": 1.9165374926447748e-05, "loss": 1.1123, "step": 2739 }, { "epoch": 0.16, "grad_norm": 1.963181495666504, "learning_rate": 1.9164631818847842e-05, "loss": 1.1166, "step": 2740 }, { "epoch": 0.16, "grad_norm": 1.7474950551986694, "learning_rate": 1.916388839500209e-05, "loss": 1.0789, "step": 2741 }, { "epoch": 0.16, "grad_norm": 1.9499820470809937, "learning_rate": 1.9163144654936148e-05, "loss": 1.0622, "step": 2742 }, { "epoch": 0.16, "grad_norm": 1.911020278930664, "learning_rate": 1.9162400598675682e-05, "loss": 1.0566, "step": 2743 }, { "epoch": 0.16, "grad_norm": 1.8519973754882812, "learning_rate": 1.9161656226246362e-05, "loss": 1.0631, "step": 2744 }, { "epoch": 0.16, "grad_norm": 2.0770349502563477, "learning_rate": 1.9160911537673884e-05, "loss": 1.096, "step": 2745 }, { "epoch": 0.16, "grad_norm": 1.9932708740234375, "learning_rate": 1.9160166532983932e-05, "loss": 1.1188, "step": 2746 }, { "epoch": 0.16, "grad_norm": 1.9023292064666748, "learning_rate": 1.9159421212202223e-05, "loss": 1.0229, "step": 2747 }, { "epoch": 0.16, "grad_norm": 2.0837292671203613, "learning_rate": 1.9158675575354477e-05, "loss": 1.0788, "step": 2748 }, { "epoch": 0.16, "grad_norm": 1.885361671447754, "learning_rate": 1.9157929622466418e-05, "loss": 1.0586, "step": 2749 }, { "epoch": 0.16, "grad_norm": 2.217736005783081, "learning_rate": 1.9157183353563787e-05, "loss": 1.0771, "step": 2750 }, { "epoch": 0.16, "grad_norm": 1.992760419845581, "learning_rate": 1.9156436768672344e-05, "loss": 1.0196, "step": 2751 }, { "epoch": 0.16, "grad_norm": 1.0330630540847778, "learning_rate": 1.9155689867817845e-05, "loss": 0.5798, "step": 2752 }, { "epoch": 0.16, "grad_norm": 2.1774089336395264, "learning_rate": 1.9154942651026057e-05, "loss": 1.0755, "step": 2753 }, { "epoch": 0.16, "grad_norm": 1.9684513807296753, "learning_rate": 1.9154195118322774e-05, "loss": 1.0862, "step": 2754 }, { "epoch": 0.16, "grad_norm": 2.125251293182373, "learning_rate": 1.9153447269733794e-05, "loss": 1.0151, "step": 2755 }, { "epoch": 0.16, "grad_norm": 1.9299724102020264, "learning_rate": 1.9152699105284912e-05, "loss": 1.0336, "step": 2756 }, { "epoch": 0.16, "grad_norm": 2.053964138031006, "learning_rate": 1.9151950625001955e-05, "loss": 1.0421, "step": 2757 }, { "epoch": 0.16, "grad_norm": 1.9560880661010742, "learning_rate": 1.9151201828910745e-05, "loss": 1.0798, "step": 2758 }, { "epoch": 0.16, "grad_norm": 2.105623245239258, "learning_rate": 1.915045271703712e-05, "loss": 1.0867, "step": 2759 }, { "epoch": 0.16, "grad_norm": 1.883391261100769, "learning_rate": 1.914970328940694e-05, "loss": 1.028, "step": 2760 }, { "epoch": 0.16, "grad_norm": 1.9990217685699463, "learning_rate": 1.914895354604605e-05, "loss": 1.0616, "step": 2761 }, { "epoch": 0.16, "grad_norm": 2.2924914360046387, "learning_rate": 1.9148203486980335e-05, "loss": 1.1067, "step": 2762 }, { "epoch": 0.16, "grad_norm": 2.080641984939575, "learning_rate": 1.914745311223567e-05, "loss": 1.0492, "step": 2763 }, { "epoch": 0.16, "grad_norm": 2.065049409866333, "learning_rate": 1.9146702421837952e-05, "loss": 1.0923, "step": 2764 }, { "epoch": 0.16, "grad_norm": 2.0786685943603516, "learning_rate": 1.9145951415813084e-05, "loss": 1.034, "step": 2765 }, { "epoch": 0.16, "grad_norm": 1.9505268335342407, "learning_rate": 1.9145200094186975e-05, "loss": 1.0556, "step": 2766 }, { "epoch": 0.16, "grad_norm": 2.1301217079162598, "learning_rate": 1.914444845698556e-05, "loss": 1.1408, "step": 2767 }, { "epoch": 0.16, "grad_norm": 2.0864977836608887, "learning_rate": 1.9143696504234777e-05, "loss": 1.0588, "step": 2768 }, { "epoch": 0.16, "grad_norm": 1.9977463483810425, "learning_rate": 1.9142944235960566e-05, "loss": 1.0589, "step": 2769 }, { "epoch": 0.16, "grad_norm": 2.0642709732055664, "learning_rate": 1.914219165218889e-05, "loss": 1.1203, "step": 2770 }, { "epoch": 0.16, "grad_norm": 1.8074480295181274, "learning_rate": 1.9141438752945717e-05, "loss": 1.0095, "step": 2771 }, { "epoch": 0.16, "grad_norm": 2.1132543087005615, "learning_rate": 1.9140685538257027e-05, "loss": 1.1107, "step": 2772 }, { "epoch": 0.16, "grad_norm": 2.1416449546813965, "learning_rate": 1.9139932008148816e-05, "loss": 1.1041, "step": 2773 }, { "epoch": 0.16, "grad_norm": 1.9659407138824463, "learning_rate": 1.913917816264708e-05, "loss": 1.1376, "step": 2774 }, { "epoch": 0.16, "grad_norm": 1.9418716430664062, "learning_rate": 1.913842400177784e-05, "loss": 1.0105, "step": 2775 }, { "epoch": 0.16, "grad_norm": 1.9128941297531128, "learning_rate": 1.9137669525567108e-05, "loss": 1.097, "step": 2776 }, { "epoch": 0.16, "grad_norm": 1.9174975156784058, "learning_rate": 1.913691473404093e-05, "loss": 1.0629, "step": 2777 }, { "epoch": 0.16, "grad_norm": 2.08526349067688, "learning_rate": 1.9136159627225342e-05, "loss": 1.1187, "step": 2778 }, { "epoch": 0.16, "grad_norm": 1.918385624885559, "learning_rate": 1.9135404205146414e-05, "loss": 1.087, "step": 2779 }, { "epoch": 0.16, "grad_norm": 1.8826457262039185, "learning_rate": 1.9134648467830198e-05, "loss": 0.9506, "step": 2780 }, { "epoch": 0.16, "grad_norm": 1.9759894609451294, "learning_rate": 1.9133892415302783e-05, "loss": 1.0828, "step": 2781 }, { "epoch": 0.16, "grad_norm": 1.948956847190857, "learning_rate": 1.9133136047590258e-05, "loss": 0.9818, "step": 2782 }, { "epoch": 0.16, "grad_norm": 2.021775484085083, "learning_rate": 1.913237936471872e-05, "loss": 1.1329, "step": 2783 }, { "epoch": 0.16, "grad_norm": 1.8469105958938599, "learning_rate": 1.9131622366714277e-05, "loss": 1.1223, "step": 2784 }, { "epoch": 0.16, "grad_norm": 2.0182039737701416, "learning_rate": 1.9130865053603055e-05, "loss": 1.0881, "step": 2785 }, { "epoch": 0.16, "grad_norm": 1.9165613651275635, "learning_rate": 1.9130107425411186e-05, "loss": 1.0766, "step": 2786 }, { "epoch": 0.16, "grad_norm": 2.1084506511688232, "learning_rate": 1.9129349482164815e-05, "loss": 1.0226, "step": 2787 }, { "epoch": 0.16, "grad_norm": 1.8359850645065308, "learning_rate": 1.9128591223890094e-05, "loss": 0.9454, "step": 2788 }, { "epoch": 0.16, "grad_norm": 1.9818309545516968, "learning_rate": 1.912783265061319e-05, "loss": 1.1711, "step": 2789 }, { "epoch": 0.16, "grad_norm": 1.741380214691162, "learning_rate": 1.912707376236028e-05, "loss": 1.0016, "step": 2790 }, { "epoch": 0.16, "grad_norm": 1.9307808876037598, "learning_rate": 1.912631455915755e-05, "loss": 1.0517, "step": 2791 }, { "epoch": 0.16, "grad_norm": 1.8927243947982788, "learning_rate": 1.9125555041031196e-05, "loss": 1.1103, "step": 2792 }, { "epoch": 0.16, "grad_norm": 1.9926817417144775, "learning_rate": 1.912479520800743e-05, "loss": 1.1048, "step": 2793 }, { "epoch": 0.16, "grad_norm": 1.8304252624511719, "learning_rate": 1.912403506011247e-05, "loss": 0.9985, "step": 2794 }, { "epoch": 0.16, "grad_norm": 1.867504358291626, "learning_rate": 1.9123274597372547e-05, "loss": 1.1196, "step": 2795 }, { "epoch": 0.16, "grad_norm": 1.9199800491333008, "learning_rate": 1.91225138198139e-05, "loss": 1.1098, "step": 2796 }, { "epoch": 0.16, "grad_norm": 1.7561461925506592, "learning_rate": 1.9121752727462787e-05, "loss": 0.9618, "step": 2797 }, { "epoch": 0.16, "grad_norm": 1.8149597644805908, "learning_rate": 1.912099132034547e-05, "loss": 1.0378, "step": 2798 }, { "epoch": 0.16, "grad_norm": 2.0402047634124756, "learning_rate": 1.9120229598488218e-05, "loss": 1.1333, "step": 2799 }, { "epoch": 0.16, "grad_norm": 1.7843079566955566, "learning_rate": 1.911946756191732e-05, "loss": 1.1115, "step": 2800 }, { "epoch": 0.16, "grad_norm": 1.1256554126739502, "learning_rate": 1.9118705210659067e-05, "loss": 0.618, "step": 2801 }, { "epoch": 0.16, "grad_norm": 2.1451330184936523, "learning_rate": 1.911794254473977e-05, "loss": 1.0175, "step": 2802 }, { "epoch": 0.16, "grad_norm": 2.235419750213623, "learning_rate": 1.911717956418575e-05, "loss": 1.0795, "step": 2803 }, { "epoch": 0.16, "grad_norm": 1.8935601711273193, "learning_rate": 1.911641626902333e-05, "loss": 0.9933, "step": 2804 }, { "epoch": 0.16, "grad_norm": 1.9885404109954834, "learning_rate": 1.911565265927885e-05, "loss": 1.0476, "step": 2805 }, { "epoch": 0.16, "grad_norm": 2.0454165935516357, "learning_rate": 1.911488873497866e-05, "loss": 0.9931, "step": 2806 }, { "epoch": 0.16, "grad_norm": 1.9443235397338867, "learning_rate": 1.911412449614912e-05, "loss": 1.0239, "step": 2807 }, { "epoch": 0.16, "grad_norm": 2.016200304031372, "learning_rate": 1.9113359942816602e-05, "loss": 1.1213, "step": 2808 }, { "epoch": 0.16, "grad_norm": 1.9532594680786133, "learning_rate": 1.9112595075007492e-05, "loss": 1.0586, "step": 2809 }, { "epoch": 0.16, "grad_norm": 2.084554433822632, "learning_rate": 1.911182989274818e-05, "loss": 0.9784, "step": 2810 }, { "epoch": 0.16, "grad_norm": 1.808603048324585, "learning_rate": 1.911106439606507e-05, "loss": 1.1076, "step": 2811 }, { "epoch": 0.16, "grad_norm": 1.8961806297302246, "learning_rate": 1.911029858498458e-05, "loss": 1.0701, "step": 2812 }, { "epoch": 0.16, "grad_norm": 2.1251559257507324, "learning_rate": 1.9109532459533136e-05, "loss": 1.0397, "step": 2813 }, { "epoch": 0.16, "grad_norm": 1.9431532621383667, "learning_rate": 1.9108766019737168e-05, "loss": 1.0586, "step": 2814 }, { "epoch": 0.16, "grad_norm": 2.0151922702789307, "learning_rate": 1.9107999265623133e-05, "loss": 1.0769, "step": 2815 }, { "epoch": 0.16, "grad_norm": 1.8579295873641968, "learning_rate": 1.9107232197217483e-05, "loss": 1.0889, "step": 2816 }, { "epoch": 0.16, "grad_norm": 2.0122504234313965, "learning_rate": 1.9106464814546695e-05, "loss": 1.0166, "step": 2817 }, { "epoch": 0.16, "grad_norm": 1.951866626739502, "learning_rate": 1.910569711763724e-05, "loss": 1.1338, "step": 2818 }, { "epoch": 0.16, "grad_norm": 1.7444969415664673, "learning_rate": 1.9104929106515616e-05, "loss": 1.0497, "step": 2819 }, { "epoch": 0.16, "grad_norm": 1.7327609062194824, "learning_rate": 1.910416078120832e-05, "loss": 1.0935, "step": 2820 }, { "epoch": 0.16, "grad_norm": 2.132636070251465, "learning_rate": 1.9103392141741865e-05, "loss": 1.074, "step": 2821 }, { "epoch": 0.16, "grad_norm": 1.9666560888290405, "learning_rate": 1.910262318814278e-05, "loss": 1.1461, "step": 2822 }, { "epoch": 0.16, "grad_norm": 1.8750109672546387, "learning_rate": 1.9101853920437594e-05, "loss": 0.9635, "step": 2823 }, { "epoch": 0.16, "grad_norm": 2.0285205841064453, "learning_rate": 1.9101084338652855e-05, "loss": 1.1015, "step": 2824 }, { "epoch": 0.16, "grad_norm": 1.9769545793533325, "learning_rate": 1.910031444281512e-05, "loss": 1.1294, "step": 2825 }, { "epoch": 0.16, "grad_norm": 2.1257152557373047, "learning_rate": 1.909954423295095e-05, "loss": 1.0726, "step": 2826 }, { "epoch": 0.16, "grad_norm": 2.031496047973633, "learning_rate": 1.909877370908693e-05, "loss": 1.0683, "step": 2827 }, { "epoch": 0.16, "grad_norm": 1.915421962738037, "learning_rate": 1.9098002871249644e-05, "loss": 1.072, "step": 2828 }, { "epoch": 0.16, "grad_norm": 1.938776969909668, "learning_rate": 1.9097231719465695e-05, "loss": 1.0576, "step": 2829 }, { "epoch": 0.16, "grad_norm": 1.993424415588379, "learning_rate": 1.909646025376169e-05, "loss": 1.0303, "step": 2830 }, { "epoch": 0.16, "grad_norm": 2.067265033721924, "learning_rate": 1.9095688474164254e-05, "loss": 1.0114, "step": 2831 }, { "epoch": 0.16, "grad_norm": 1.9131224155426025, "learning_rate": 1.9094916380700015e-05, "loss": 1.0502, "step": 2832 }, { "epoch": 0.16, "grad_norm": 2.1012070178985596, "learning_rate": 1.9094143973395614e-05, "loss": 1.1246, "step": 2833 }, { "epoch": 0.16, "grad_norm": 1.9724221229553223, "learning_rate": 1.909337125227771e-05, "loss": 1.1616, "step": 2834 }, { "epoch": 0.16, "grad_norm": 1.8480886220932007, "learning_rate": 1.909259821737297e-05, "loss": 1.0921, "step": 2835 }, { "epoch": 0.16, "grad_norm": 1.9709773063659668, "learning_rate": 1.909182486870806e-05, "loss": 0.9865, "step": 2836 }, { "epoch": 0.16, "grad_norm": 1.1934001445770264, "learning_rate": 1.9091051206309674e-05, "loss": 0.5629, "step": 2837 }, { "epoch": 0.16, "grad_norm": 2.012815475463867, "learning_rate": 1.9090277230204503e-05, "loss": 1.0509, "step": 2838 }, { "epoch": 0.16, "grad_norm": 1.9511667490005493, "learning_rate": 1.9089502940419258e-05, "loss": 1.1037, "step": 2839 }, { "epoch": 0.16, "grad_norm": 1.8742932081222534, "learning_rate": 1.9088728336980656e-05, "loss": 1.0173, "step": 2840 }, { "epoch": 0.16, "grad_norm": 1.7490102052688599, "learning_rate": 1.9087953419915427e-05, "loss": 0.9781, "step": 2841 }, { "epoch": 0.16, "grad_norm": 1.992408275604248, "learning_rate": 1.9087178189250314e-05, "loss": 1.1066, "step": 2842 }, { "epoch": 0.16, "grad_norm": 1.0442935228347778, "learning_rate": 1.9086402645012065e-05, "loss": 0.6306, "step": 2843 }, { "epoch": 0.16, "grad_norm": 2.0599634647369385, "learning_rate": 1.9085626787227444e-05, "loss": 1.0428, "step": 2844 }, { "epoch": 0.16, "grad_norm": 1.9415106773376465, "learning_rate": 1.9084850615923217e-05, "loss": 1.0039, "step": 2845 }, { "epoch": 0.16, "grad_norm": 1.987179160118103, "learning_rate": 1.908407413112618e-05, "loss": 1.0064, "step": 2846 }, { "epoch": 0.16, "grad_norm": 2.0024447441101074, "learning_rate": 1.9083297332863114e-05, "loss": 1.0419, "step": 2847 }, { "epoch": 0.16, "grad_norm": 1.8485029935836792, "learning_rate": 1.9082520221160835e-05, "loss": 1.07, "step": 2848 }, { "epoch": 0.16, "grad_norm": 1.9870679378509521, "learning_rate": 1.908174279604615e-05, "loss": 0.9968, "step": 2849 }, { "epoch": 0.16, "grad_norm": 1.9794743061065674, "learning_rate": 1.9080965057545894e-05, "loss": 1.1163, "step": 2850 }, { "epoch": 0.16, "grad_norm": 1.929158329963684, "learning_rate": 1.9080187005686896e-05, "loss": 1.0327, "step": 2851 }, { "epoch": 0.16, "grad_norm": 2.0727713108062744, "learning_rate": 1.9079408640496012e-05, "loss": 1.0528, "step": 2852 }, { "epoch": 0.16, "grad_norm": 1.9584839344024658, "learning_rate": 1.90786299620001e-05, "loss": 1.1685, "step": 2853 }, { "epoch": 0.16, "grad_norm": 1.9692672491073608, "learning_rate": 1.9077850970226025e-05, "loss": 0.9874, "step": 2854 }, { "epoch": 0.16, "grad_norm": 1.855149745941162, "learning_rate": 1.907707166520067e-05, "loss": 1.1424, "step": 2855 }, { "epoch": 0.16, "grad_norm": 1.854702353477478, "learning_rate": 1.907629204695093e-05, "loss": 1.0632, "step": 2856 }, { "epoch": 0.16, "grad_norm": 1.853702187538147, "learning_rate": 1.9075512115503707e-05, "loss": 1.0292, "step": 2857 }, { "epoch": 0.16, "grad_norm": 2.0127134323120117, "learning_rate": 1.9074731870885907e-05, "loss": 1.0929, "step": 2858 }, { "epoch": 0.16, "grad_norm": 1.887139081954956, "learning_rate": 1.9073951313124462e-05, "loss": 0.9609, "step": 2859 }, { "epoch": 0.16, "grad_norm": 1.975973129272461, "learning_rate": 1.9073170442246304e-05, "loss": 1.0774, "step": 2860 }, { "epoch": 0.16, "grad_norm": 1.976212978363037, "learning_rate": 1.9072389258278378e-05, "loss": 1.0252, "step": 2861 }, { "epoch": 0.16, "grad_norm": 1.8501232862472534, "learning_rate": 1.9071607761247644e-05, "loss": 1.0484, "step": 2862 }, { "epoch": 0.16, "grad_norm": 1.8229377269744873, "learning_rate": 1.9070825951181065e-05, "loss": 1.0586, "step": 2863 }, { "epoch": 0.16, "grad_norm": 1.1713542938232422, "learning_rate": 1.9070043828105616e-05, "loss": 0.5867, "step": 2864 }, { "epoch": 0.16, "grad_norm": 2.0396790504455566, "learning_rate": 1.90692613920483e-05, "loss": 1.1764, "step": 2865 }, { "epoch": 0.16, "grad_norm": 1.997402548789978, "learning_rate": 1.9068478643036102e-05, "loss": 1.0735, "step": 2866 }, { "epoch": 0.16, "grad_norm": 2.0114099979400635, "learning_rate": 1.9067695581096037e-05, "loss": 1.107, "step": 2867 }, { "epoch": 0.16, "grad_norm": 1.8693817853927612, "learning_rate": 1.906691220625513e-05, "loss": 1.0884, "step": 2868 }, { "epoch": 0.16, "grad_norm": 1.9016669988632202, "learning_rate": 1.9066128518540408e-05, "loss": 1.0759, "step": 2869 }, { "epoch": 0.16, "grad_norm": 1.9285945892333984, "learning_rate": 1.906534451797892e-05, "loss": 1.1167, "step": 2870 }, { "epoch": 0.16, "grad_norm": 1.843967318534851, "learning_rate": 1.906456020459771e-05, "loss": 1.0286, "step": 2871 }, { "epoch": 0.16, "grad_norm": 1.9659533500671387, "learning_rate": 1.906377557842385e-05, "loss": 1.1093, "step": 2872 }, { "epoch": 0.16, "grad_norm": 2.1897130012512207, "learning_rate": 1.9062990639484416e-05, "loss": 1.0701, "step": 2873 }, { "epoch": 0.16, "grad_norm": 2.050063371658325, "learning_rate": 1.906220538780649e-05, "loss": 1.044, "step": 2874 }, { "epoch": 0.16, "grad_norm": 1.9924061298370361, "learning_rate": 1.906141982341717e-05, "loss": 1.1113, "step": 2875 }, { "epoch": 0.16, "grad_norm": 1.7729718685150146, "learning_rate": 1.906063394634356e-05, "loss": 1.053, "step": 2876 }, { "epoch": 0.17, "grad_norm": 1.7325552701950073, "learning_rate": 1.905984775661279e-05, "loss": 1.0026, "step": 2877 }, { "epoch": 0.17, "grad_norm": 1.8477683067321777, "learning_rate": 1.9059061254251978e-05, "loss": 1.0763, "step": 2878 }, { "epoch": 0.17, "grad_norm": 2.196218252182007, "learning_rate": 1.9058274439288267e-05, "loss": 1.1098, "step": 2879 }, { "epoch": 0.17, "grad_norm": 1.9315710067749023, "learning_rate": 1.905748731174881e-05, "loss": 1.1487, "step": 2880 }, { "epoch": 0.17, "grad_norm": 1.876016616821289, "learning_rate": 1.9056699871660763e-05, "loss": 1.0634, "step": 2881 }, { "epoch": 0.17, "grad_norm": 1.927635908126831, "learning_rate": 1.9055912119051305e-05, "loss": 1.0757, "step": 2882 }, { "epoch": 0.17, "grad_norm": 1.7561267614364624, "learning_rate": 1.905512405394762e-05, "loss": 1.0485, "step": 2883 }, { "epoch": 0.17, "grad_norm": 1.1301265954971313, "learning_rate": 1.9054335676376893e-05, "loss": 0.5229, "step": 2884 }, { "epoch": 0.17, "grad_norm": 2.156092643737793, "learning_rate": 1.9053546986366332e-05, "loss": 1.0645, "step": 2885 }, { "epoch": 0.17, "grad_norm": 2.2195944786071777, "learning_rate": 1.9052757983943162e-05, "loss": 1.0956, "step": 2886 }, { "epoch": 0.17, "grad_norm": 1.9298347234725952, "learning_rate": 1.9051968669134597e-05, "loss": 1.1206, "step": 2887 }, { "epoch": 0.17, "grad_norm": 1.022909164428711, "learning_rate": 1.905117904196788e-05, "loss": 0.5451, "step": 2888 }, { "epoch": 0.17, "grad_norm": 1.7126446962356567, "learning_rate": 1.9050389102470258e-05, "loss": 1.0747, "step": 2889 }, { "epoch": 0.17, "grad_norm": 2.088958978652954, "learning_rate": 1.9049598850668988e-05, "loss": 1.0442, "step": 2890 }, { "epoch": 0.17, "grad_norm": 2.002758264541626, "learning_rate": 1.9048808286591343e-05, "loss": 1.0963, "step": 2891 }, { "epoch": 0.17, "grad_norm": 1.9130730628967285, "learning_rate": 1.90480174102646e-05, "loss": 1.1149, "step": 2892 }, { "epoch": 0.17, "grad_norm": 1.9090899229049683, "learning_rate": 1.904722622171605e-05, "loss": 1.1241, "step": 2893 }, { "epoch": 0.17, "grad_norm": 1.970346212387085, "learning_rate": 1.9046434720973e-05, "loss": 1.0268, "step": 2894 }, { "epoch": 0.17, "grad_norm": 1.8828133344650269, "learning_rate": 1.904564290806275e-05, "loss": 0.985, "step": 2895 }, { "epoch": 0.17, "grad_norm": 2.0196754932403564, "learning_rate": 1.9044850783012636e-05, "loss": 1.0554, "step": 2896 }, { "epoch": 0.17, "grad_norm": 1.8040275573730469, "learning_rate": 1.9044058345849988e-05, "loss": 1.0266, "step": 2897 }, { "epoch": 0.17, "grad_norm": 1.9745535850524902, "learning_rate": 1.9043265596602146e-05, "loss": 1.0612, "step": 2898 }, { "epoch": 0.17, "grad_norm": 2.021299362182617, "learning_rate": 1.9042472535296474e-05, "loss": 1.0923, "step": 2899 }, { "epoch": 0.17, "grad_norm": 2.1055734157562256, "learning_rate": 1.904167916196033e-05, "loss": 1.0128, "step": 2900 }, { "epoch": 0.17, "grad_norm": 1.8815861940383911, "learning_rate": 1.9040885476621097e-05, "loss": 1.0756, "step": 2901 }, { "epoch": 0.17, "grad_norm": 1.8467283248901367, "learning_rate": 1.9040091479306163e-05, "loss": 1.0524, "step": 2902 }, { "epoch": 0.17, "grad_norm": 2.5922529697418213, "learning_rate": 1.9039297170042922e-05, "loss": 1.0293, "step": 2903 }, { "epoch": 0.17, "grad_norm": 1.9232429265975952, "learning_rate": 1.9038502548858786e-05, "loss": 1.0476, "step": 2904 }, { "epoch": 0.17, "grad_norm": 1.830834984779358, "learning_rate": 1.9037707615781177e-05, "loss": 1.0375, "step": 2905 }, { "epoch": 0.17, "grad_norm": 2.071190118789673, "learning_rate": 1.9036912370837523e-05, "loss": 1.042, "step": 2906 }, { "epoch": 0.17, "grad_norm": 1.2560254335403442, "learning_rate": 1.9036116814055264e-05, "loss": 0.5797, "step": 2907 }, { "epoch": 0.17, "grad_norm": 1.7809886932373047, "learning_rate": 1.903532094546186e-05, "loss": 1.0051, "step": 2908 }, { "epoch": 0.17, "grad_norm": 1.940935492515564, "learning_rate": 1.9034524765084764e-05, "loss": 1.0706, "step": 2909 }, { "epoch": 0.17, "grad_norm": 1.9670809507369995, "learning_rate": 1.9033728272951458e-05, "loss": 1.078, "step": 2910 }, { "epoch": 0.17, "grad_norm": 2.199079751968384, "learning_rate": 1.9032931469089425e-05, "loss": 1.0578, "step": 2911 }, { "epoch": 0.17, "grad_norm": 1.948774814605713, "learning_rate": 1.9032134353526158e-05, "loss": 0.9922, "step": 2912 }, { "epoch": 0.17, "grad_norm": 1.8382983207702637, "learning_rate": 1.9031336926289167e-05, "loss": 1.0867, "step": 2913 }, { "epoch": 0.17, "grad_norm": 1.9122934341430664, "learning_rate": 1.9030539187405962e-05, "loss": 1.0126, "step": 2914 }, { "epoch": 0.17, "grad_norm": 2.012712001800537, "learning_rate": 1.902974113690408e-05, "loss": 1.093, "step": 2915 }, { "epoch": 0.17, "grad_norm": 1.950904369354248, "learning_rate": 1.902894277481105e-05, "loss": 1.0707, "step": 2916 }, { "epoch": 0.17, "grad_norm": 1.1292970180511475, "learning_rate": 1.902814410115443e-05, "loss": 0.6413, "step": 2917 }, { "epoch": 0.17, "grad_norm": 2.3109323978424072, "learning_rate": 1.9027345115961778e-05, "loss": 1.0625, "step": 2918 }, { "epoch": 0.17, "grad_norm": 2.045332670211792, "learning_rate": 1.902654581926066e-05, "loss": 1.0038, "step": 2919 }, { "epoch": 0.17, "grad_norm": 1.892066240310669, "learning_rate": 1.9025746211078658e-05, "loss": 1.0534, "step": 2920 }, { "epoch": 0.17, "grad_norm": 2.176978588104248, "learning_rate": 1.902494629144337e-05, "loss": 1.076, "step": 2921 }, { "epoch": 0.17, "grad_norm": 2.276561975479126, "learning_rate": 1.9024146060382396e-05, "loss": 1.0489, "step": 2922 }, { "epoch": 0.17, "grad_norm": 1.8345601558685303, "learning_rate": 1.902334551792335e-05, "loss": 1.0183, "step": 2923 }, { "epoch": 0.17, "grad_norm": 1.9069886207580566, "learning_rate": 1.9022544664093854e-05, "loss": 1.1266, "step": 2924 }, { "epoch": 0.17, "grad_norm": 2.0337042808532715, "learning_rate": 1.9021743498921544e-05, "loss": 1.0125, "step": 2925 }, { "epoch": 0.17, "grad_norm": 1.9993294477462769, "learning_rate": 1.9020942022434072e-05, "loss": 1.0337, "step": 2926 }, { "epoch": 0.17, "grad_norm": 1.9678328037261963, "learning_rate": 1.9020140234659084e-05, "loss": 1.071, "step": 2927 }, { "epoch": 0.17, "grad_norm": 2.171757221221924, "learning_rate": 1.9019338135624256e-05, "loss": 1.0099, "step": 2928 }, { "epoch": 0.17, "grad_norm": 1.8959630727767944, "learning_rate": 1.901853572535726e-05, "loss": 1.032, "step": 2929 }, { "epoch": 0.17, "grad_norm": 2.0473315715789795, "learning_rate": 1.9017733003885794e-05, "loss": 1.1325, "step": 2930 }, { "epoch": 0.17, "grad_norm": 1.8315461874008179, "learning_rate": 1.901692997123755e-05, "loss": 1.0791, "step": 2931 }, { "epoch": 0.17, "grad_norm": 1.9948164224624634, "learning_rate": 1.901612662744024e-05, "loss": 1.0553, "step": 2932 }, { "epoch": 0.17, "grad_norm": 1.9197337627410889, "learning_rate": 1.9015322972521587e-05, "loss": 0.961, "step": 2933 }, { "epoch": 0.17, "grad_norm": 1.8900346755981445, "learning_rate": 1.901451900650932e-05, "loss": 1.0264, "step": 2934 }, { "epoch": 0.17, "grad_norm": 1.4162036180496216, "learning_rate": 1.9013714729431183e-05, "loss": 0.6802, "step": 2935 }, { "epoch": 0.17, "grad_norm": 2.1681153774261475, "learning_rate": 1.9012910141314928e-05, "loss": 1.1044, "step": 2936 }, { "epoch": 0.17, "grad_norm": 1.876365065574646, "learning_rate": 1.9012105242188323e-05, "loss": 1.0254, "step": 2937 }, { "epoch": 0.17, "grad_norm": 1.9956624507904053, "learning_rate": 1.901130003207914e-05, "loss": 1.0748, "step": 2938 }, { "epoch": 0.17, "grad_norm": 2.0879809856414795, "learning_rate": 1.9010494511015164e-05, "loss": 1.123, "step": 2939 }, { "epoch": 0.17, "grad_norm": 1.9485974311828613, "learning_rate": 1.900968867902419e-05, "loss": 1.1134, "step": 2940 }, { "epoch": 0.17, "grad_norm": 1.7256534099578857, "learning_rate": 1.9008882536134035e-05, "loss": 0.9859, "step": 2941 }, { "epoch": 0.17, "grad_norm": 1.959047794342041, "learning_rate": 1.9008076082372504e-05, "loss": 1.0945, "step": 2942 }, { "epoch": 0.17, "grad_norm": 1.3929054737091064, "learning_rate": 1.9007269317767428e-05, "loss": 0.5858, "step": 2943 }, { "epoch": 0.17, "grad_norm": 2.03013014793396, "learning_rate": 1.9006462242346654e-05, "loss": 1.0717, "step": 2944 }, { "epoch": 0.17, "grad_norm": 1.8899321556091309, "learning_rate": 1.900565485613802e-05, "loss": 1.0111, "step": 2945 }, { "epoch": 0.17, "grad_norm": 1.825768232345581, "learning_rate": 1.9004847159169397e-05, "loss": 1.0079, "step": 2946 }, { "epoch": 0.17, "grad_norm": 1.8760653734207153, "learning_rate": 1.9004039151468654e-05, "loss": 1.0923, "step": 2947 }, { "epoch": 0.17, "grad_norm": 1.9713869094848633, "learning_rate": 1.900323083306367e-05, "loss": 0.9865, "step": 2948 }, { "epoch": 0.17, "grad_norm": 2.1316449642181396, "learning_rate": 1.9002422203982342e-05, "loss": 1.1449, "step": 2949 }, { "epoch": 0.17, "grad_norm": 1.8565106391906738, "learning_rate": 1.9001613264252565e-05, "loss": 1.0821, "step": 2950 }, { "epoch": 0.17, "grad_norm": 1.7604048252105713, "learning_rate": 1.9000804013902264e-05, "loss": 1.0402, "step": 2951 }, { "epoch": 0.17, "grad_norm": 2.0256030559539795, "learning_rate": 1.8999994452959357e-05, "loss": 1.0772, "step": 2952 }, { "epoch": 0.17, "grad_norm": 1.9991750717163086, "learning_rate": 1.8999184581451783e-05, "loss": 1.0487, "step": 2953 }, { "epoch": 0.17, "grad_norm": 2.0230250358581543, "learning_rate": 1.8998374399407486e-05, "loss": 1.0922, "step": 2954 }, { "epoch": 0.17, "grad_norm": 1.9650601148605347, "learning_rate": 1.8997563906854427e-05, "loss": 1.1754, "step": 2955 }, { "epoch": 0.17, "grad_norm": 1.9861899614334106, "learning_rate": 1.899675310382057e-05, "loss": 1.0609, "step": 2956 }, { "epoch": 0.17, "grad_norm": 1.8957674503326416, "learning_rate": 1.8995941990333894e-05, "loss": 1.0995, "step": 2957 }, { "epoch": 0.17, "grad_norm": 1.7356202602386475, "learning_rate": 1.899513056642239e-05, "loss": 1.0554, "step": 2958 }, { "epoch": 0.17, "grad_norm": 2.076918601989746, "learning_rate": 1.899431883211406e-05, "loss": 1.0885, "step": 2959 }, { "epoch": 0.17, "grad_norm": 2.065945863723755, "learning_rate": 1.8993506787436905e-05, "loss": 1.056, "step": 2960 }, { "epoch": 0.17, "grad_norm": 1.993909239768982, "learning_rate": 1.8992694432418958e-05, "loss": 1.0548, "step": 2961 }, { "epoch": 0.17, "grad_norm": 2.0165367126464844, "learning_rate": 1.8991881767088245e-05, "loss": 1.0799, "step": 2962 }, { "epoch": 0.17, "grad_norm": 2.0284125804901123, "learning_rate": 1.899106879147281e-05, "loss": 1.0216, "step": 2963 }, { "epoch": 0.17, "grad_norm": 1.9928864240646362, "learning_rate": 1.8990255505600706e-05, "loss": 1.0449, "step": 2964 }, { "epoch": 0.17, "grad_norm": 2.0046610832214355, "learning_rate": 1.8989441909499998e-05, "loss": 1.1715, "step": 2965 }, { "epoch": 0.17, "grad_norm": 2.1145431995391846, "learning_rate": 1.8988628003198762e-05, "loss": 1.0832, "step": 2966 }, { "epoch": 0.17, "grad_norm": 2.1668436527252197, "learning_rate": 1.898781378672508e-05, "loss": 1.167, "step": 2967 }, { "epoch": 0.17, "grad_norm": 1.8951785564422607, "learning_rate": 1.8986999260107054e-05, "loss": 1.0043, "step": 2968 }, { "epoch": 0.17, "grad_norm": 1.2734827995300293, "learning_rate": 1.8986184423372784e-05, "loss": 0.6073, "step": 2969 }, { "epoch": 0.17, "grad_norm": 2.136033296585083, "learning_rate": 1.898536927655039e-05, "loss": 1.0259, "step": 2970 }, { "epoch": 0.17, "grad_norm": 1.9802865982055664, "learning_rate": 1.898455381966801e-05, "loss": 1.0753, "step": 2971 }, { "epoch": 0.17, "grad_norm": 1.7699776887893677, "learning_rate": 1.8983738052753767e-05, "loss": 0.9932, "step": 2972 }, { "epoch": 0.17, "grad_norm": 1.9671918153762817, "learning_rate": 1.898292197583582e-05, "loss": 1.0428, "step": 2973 }, { "epoch": 0.17, "grad_norm": 2.134178876876831, "learning_rate": 1.8982105588942333e-05, "loss": 1.0647, "step": 2974 }, { "epoch": 0.17, "grad_norm": 1.9318534135818481, "learning_rate": 1.8981288892101468e-05, "loss": 1.0227, "step": 2975 }, { "epoch": 0.17, "grad_norm": 1.9363925457000732, "learning_rate": 1.8980471885341415e-05, "loss": 1.156, "step": 2976 }, { "epoch": 0.17, "grad_norm": 1.8565434217453003, "learning_rate": 1.8979654568690363e-05, "loss": 1.0217, "step": 2977 }, { "epoch": 0.17, "grad_norm": 1.8469220399856567, "learning_rate": 1.8978836942176513e-05, "loss": 1.1264, "step": 2978 }, { "epoch": 0.17, "grad_norm": 2.0001449584960938, "learning_rate": 1.897801900582808e-05, "loss": 1.0021, "step": 2979 }, { "epoch": 0.17, "grad_norm": 2.389589786529541, "learning_rate": 1.8977200759673295e-05, "loss": 1.0617, "step": 2980 }, { "epoch": 0.17, "grad_norm": 2.0419857501983643, "learning_rate": 1.8976382203740383e-05, "loss": 1.1454, "step": 2981 }, { "epoch": 0.17, "grad_norm": 1.1098897457122803, "learning_rate": 1.8975563338057602e-05, "loss": 0.6071, "step": 2982 }, { "epoch": 0.17, "grad_norm": 1.8723129034042358, "learning_rate": 1.89747441626532e-05, "loss": 1.0691, "step": 2983 }, { "epoch": 0.17, "grad_norm": 2.0793051719665527, "learning_rate": 1.8973924677555448e-05, "loss": 1.1307, "step": 2984 }, { "epoch": 0.17, "grad_norm": 2.0163912773132324, "learning_rate": 1.897310488279262e-05, "loss": 1.023, "step": 2985 }, { "epoch": 0.17, "grad_norm": 2.043722629547119, "learning_rate": 1.897228477839301e-05, "loss": 1.1534, "step": 2986 }, { "epoch": 0.17, "grad_norm": 2.0760319232940674, "learning_rate": 1.897146436438491e-05, "loss": 1.0985, "step": 2987 }, { "epoch": 0.17, "grad_norm": 1.8473470211029053, "learning_rate": 1.8970643640796642e-05, "loss": 1.0897, "step": 2988 }, { "epoch": 0.17, "grad_norm": 2.1732301712036133, "learning_rate": 1.896982260765652e-05, "loss": 1.0501, "step": 2989 }, { "epoch": 0.17, "grad_norm": 2.3138396739959717, "learning_rate": 1.8969001264992872e-05, "loss": 1.1972, "step": 2990 }, { "epoch": 0.17, "grad_norm": 2.1274964809417725, "learning_rate": 1.8968179612834048e-05, "loss": 1.1422, "step": 2991 }, { "epoch": 0.17, "grad_norm": 1.7372972965240479, "learning_rate": 1.8967357651208396e-05, "loss": 1.0065, "step": 2992 }, { "epoch": 0.17, "grad_norm": 1.9205377101898193, "learning_rate": 1.8966535380144278e-05, "loss": 1.0313, "step": 2993 }, { "epoch": 0.17, "grad_norm": 1.1258206367492676, "learning_rate": 1.8965712799670077e-05, "loss": 0.6516, "step": 2994 }, { "epoch": 0.17, "grad_norm": 1.858955979347229, "learning_rate": 1.8964889909814167e-05, "loss": 0.9919, "step": 2995 }, { "epoch": 0.17, "grad_norm": 2.0021536350250244, "learning_rate": 1.8964066710604953e-05, "loss": 1.1322, "step": 2996 }, { "epoch": 0.17, "grad_norm": 2.010690450668335, "learning_rate": 1.8963243202070832e-05, "loss": 0.9942, "step": 2997 }, { "epoch": 0.17, "grad_norm": 1.7876743078231812, "learning_rate": 1.896241938424023e-05, "loss": 1.1047, "step": 2998 }, { "epoch": 0.17, "grad_norm": 1.7891483306884766, "learning_rate": 1.896159525714157e-05, "loss": 1.0348, "step": 2999 }, { "epoch": 0.17, "grad_norm": 1.9011098146438599, "learning_rate": 1.8960770820803286e-05, "loss": 1.0941, "step": 3000 }, { "epoch": 0.17, "grad_norm": 2.029694080352783, "learning_rate": 1.8959946075253833e-05, "loss": 1.0809, "step": 3001 }, { "epoch": 0.17, "grad_norm": 2.061575412750244, "learning_rate": 1.8959121020521674e-05, "loss": 1.1, "step": 3002 }, { "epoch": 0.17, "grad_norm": 2.0008037090301514, "learning_rate": 1.8958295656635273e-05, "loss": 1.0101, "step": 3003 }, { "epoch": 0.17, "grad_norm": 1.9375660419464111, "learning_rate": 1.8957469983623113e-05, "loss": 1.0455, "step": 3004 }, { "epoch": 0.17, "grad_norm": 1.990166425704956, "learning_rate": 1.8956644001513686e-05, "loss": 1.0524, "step": 3005 }, { "epoch": 0.17, "grad_norm": 1.8718637228012085, "learning_rate": 1.8955817710335493e-05, "loss": 1.0437, "step": 3006 }, { "epoch": 0.17, "grad_norm": 1.9028778076171875, "learning_rate": 1.895499111011705e-05, "loss": 1.1123, "step": 3007 }, { "epoch": 0.17, "grad_norm": 1.9340474605560303, "learning_rate": 1.8954164200886874e-05, "loss": 1.0475, "step": 3008 }, { "epoch": 0.17, "grad_norm": 1.953302264213562, "learning_rate": 1.8953336982673506e-05, "loss": 1.0313, "step": 3009 }, { "epoch": 0.17, "grad_norm": 1.781287670135498, "learning_rate": 1.895250945550549e-05, "loss": 0.9338, "step": 3010 }, { "epoch": 0.17, "grad_norm": 1.8158373832702637, "learning_rate": 1.895168161941138e-05, "loss": 1.027, "step": 3011 }, { "epoch": 0.17, "grad_norm": 1.973016381263733, "learning_rate": 1.8950853474419745e-05, "loss": 1.0986, "step": 3012 }, { "epoch": 0.17, "grad_norm": 1.8059771060943604, "learning_rate": 1.8950025020559155e-05, "loss": 1.0776, "step": 3013 }, { "epoch": 0.17, "grad_norm": 1.9360907077789307, "learning_rate": 1.8949196257858205e-05, "loss": 1.017, "step": 3014 }, { "epoch": 0.17, "grad_norm": 2.0138602256774902, "learning_rate": 1.894836718634549e-05, "loss": 1.0888, "step": 3015 }, { "epoch": 0.17, "grad_norm": 2.1574063301086426, "learning_rate": 1.894753780604962e-05, "loss": 0.9724, "step": 3016 }, { "epoch": 0.17, "grad_norm": 1.8222215175628662, "learning_rate": 1.8946708116999216e-05, "loss": 1.0829, "step": 3017 }, { "epoch": 0.17, "grad_norm": 2.050697088241577, "learning_rate": 1.8945878119222904e-05, "loss": 1.0059, "step": 3018 }, { "epoch": 0.17, "grad_norm": 1.9466689825057983, "learning_rate": 1.894504781274933e-05, "loss": 1.1105, "step": 3019 }, { "epoch": 0.17, "grad_norm": 1.949623942375183, "learning_rate": 1.8944217197607142e-05, "loss": 1.1131, "step": 3020 }, { "epoch": 0.17, "grad_norm": 1.9778258800506592, "learning_rate": 1.8943386273825e-05, "loss": 1.0343, "step": 3021 }, { "epoch": 0.17, "grad_norm": 2.0692856311798096, "learning_rate": 1.8942555041431584e-05, "loss": 1.1159, "step": 3022 }, { "epoch": 0.17, "grad_norm": 1.9074277877807617, "learning_rate": 1.8941723500455575e-05, "loss": 1.0707, "step": 3023 }, { "epoch": 0.17, "grad_norm": 2.133254289627075, "learning_rate": 1.8940891650925662e-05, "loss": 1.0496, "step": 3024 }, { "epoch": 0.17, "grad_norm": 1.9440248012542725, "learning_rate": 1.8940059492870552e-05, "loss": 1.0953, "step": 3025 }, { "epoch": 0.17, "grad_norm": 1.788736343383789, "learning_rate": 1.8939227026318964e-05, "loss": 1.0241, "step": 3026 }, { "epoch": 0.17, "grad_norm": 1.8978806734085083, "learning_rate": 1.8938394251299624e-05, "loss": 1.105, "step": 3027 }, { "epoch": 0.17, "grad_norm": 1.8875573873519897, "learning_rate": 1.8937561167841262e-05, "loss": 1.0331, "step": 3028 }, { "epoch": 0.17, "grad_norm": 1.9286848306655884, "learning_rate": 1.8936727775972638e-05, "loss": 1.1025, "step": 3029 }, { "epoch": 0.17, "grad_norm": 2.0652108192443848, "learning_rate": 1.8935894075722495e-05, "loss": 1.121, "step": 3030 }, { "epoch": 0.17, "grad_norm": 1.8857553005218506, "learning_rate": 1.893506006711961e-05, "loss": 1.0003, "step": 3031 }, { "epoch": 0.17, "grad_norm": 1.961744785308838, "learning_rate": 1.8934225750192762e-05, "loss": 1.0014, "step": 3032 }, { "epoch": 0.17, "grad_norm": 1.9467982053756714, "learning_rate": 1.8933391124970742e-05, "loss": 1.1093, "step": 3033 }, { "epoch": 0.17, "grad_norm": 1.111811637878418, "learning_rate": 1.8932556191482347e-05, "loss": 0.5851, "step": 3034 }, { "epoch": 0.17, "grad_norm": 1.873814344406128, "learning_rate": 1.893172094975639e-05, "loss": 1.0432, "step": 3035 }, { "epoch": 0.17, "grad_norm": 1.9854656457901, "learning_rate": 1.8930885399821693e-05, "loss": 1.017, "step": 3036 }, { "epoch": 0.17, "grad_norm": 1.216555118560791, "learning_rate": 1.8930049541707088e-05, "loss": 0.6279, "step": 3037 }, { "epoch": 0.17, "grad_norm": 1.9797500371932983, "learning_rate": 1.892921337544142e-05, "loss": 1.153, "step": 3038 }, { "epoch": 0.17, "grad_norm": 2.0771334171295166, "learning_rate": 1.892837690105354e-05, "loss": 1.1322, "step": 3039 }, { "epoch": 0.17, "grad_norm": 2.0623233318328857, "learning_rate": 1.8927540118572314e-05, "loss": 1.1078, "step": 3040 }, { "epoch": 0.17, "grad_norm": 2.0152978897094727, "learning_rate": 1.8926703028026617e-05, "loss": 1.1086, "step": 3041 }, { "epoch": 0.17, "grad_norm": 2.4055519104003906, "learning_rate": 1.892586562944533e-05, "loss": 1.0539, "step": 3042 }, { "epoch": 0.17, "grad_norm": 1.848069667816162, "learning_rate": 1.8925027922857358e-05, "loss": 1.0976, "step": 3043 }, { "epoch": 0.17, "grad_norm": 2.042470932006836, "learning_rate": 1.89241899082916e-05, "loss": 0.9783, "step": 3044 }, { "epoch": 0.17, "grad_norm": 1.8244280815124512, "learning_rate": 1.892335158577698e-05, "loss": 1.0713, "step": 3045 }, { "epoch": 0.17, "grad_norm": 1.8841606378555298, "learning_rate": 1.8922512955342423e-05, "loss": 0.9789, "step": 3046 }, { "epoch": 0.17, "grad_norm": 1.7972135543823242, "learning_rate": 1.892167401701687e-05, "loss": 1.0037, "step": 3047 }, { "epoch": 0.17, "grad_norm": 1.9971961975097656, "learning_rate": 1.8920834770829262e-05, "loss": 1.012, "step": 3048 }, { "epoch": 0.17, "grad_norm": 1.1417475938796997, "learning_rate": 1.891999521680857e-05, "loss": 0.6142, "step": 3049 }, { "epoch": 0.17, "grad_norm": 1.9814648628234863, "learning_rate": 1.891915535498376e-05, "loss": 1.1373, "step": 3050 }, { "epoch": 0.17, "grad_norm": 1.8645546436309814, "learning_rate": 1.8918315185383812e-05, "loss": 1.0334, "step": 3051 }, { "epoch": 0.18, "grad_norm": 2.1152050495147705, "learning_rate": 1.891747470803772e-05, "loss": 1.0105, "step": 3052 }, { "epoch": 0.18, "grad_norm": 1.9987908601760864, "learning_rate": 1.8916633922974487e-05, "loss": 1.0848, "step": 3053 }, { "epoch": 0.18, "grad_norm": 2.033486843109131, "learning_rate": 1.8915792830223122e-05, "loss": 1.1923, "step": 3054 }, { "epoch": 0.18, "grad_norm": 2.0743985176086426, "learning_rate": 1.8914951429812653e-05, "loss": 1.085, "step": 3055 }, { "epoch": 0.18, "grad_norm": 1.7913750410079956, "learning_rate": 1.8914109721772113e-05, "loss": 0.9951, "step": 3056 }, { "epoch": 0.18, "grad_norm": 1.9485929012298584, "learning_rate": 1.891326770613055e-05, "loss": 0.9978, "step": 3057 }, { "epoch": 0.18, "grad_norm": 2.0151426792144775, "learning_rate": 1.8912425382917013e-05, "loss": 1.0312, "step": 3058 }, { "epoch": 0.18, "grad_norm": 1.888296127319336, "learning_rate": 1.8911582752160572e-05, "loss": 1.0874, "step": 3059 }, { "epoch": 0.18, "grad_norm": 2.062451124191284, "learning_rate": 1.89107398138903e-05, "loss": 1.0763, "step": 3060 }, { "epoch": 0.18, "grad_norm": 1.9340568780899048, "learning_rate": 1.8909896568135297e-05, "loss": 0.9979, "step": 3061 }, { "epoch": 0.18, "grad_norm": 2.137028217315674, "learning_rate": 1.8909053014924646e-05, "loss": 1.1171, "step": 3062 }, { "epoch": 0.18, "grad_norm": 1.8640222549438477, "learning_rate": 1.8908209154287466e-05, "loss": 1.027, "step": 3063 }, { "epoch": 0.18, "grad_norm": 2.0309529304504395, "learning_rate": 1.890736498625287e-05, "loss": 1.0476, "step": 3064 }, { "epoch": 0.18, "grad_norm": 1.5965559482574463, "learning_rate": 1.890652051084999e-05, "loss": 0.9815, "step": 3065 }, { "epoch": 0.18, "grad_norm": 1.9703634977340698, "learning_rate": 1.8905675728107966e-05, "loss": 1.0342, "step": 3066 }, { "epoch": 0.18, "grad_norm": 2.154611349105835, "learning_rate": 1.8904830638055948e-05, "loss": 1.0083, "step": 3067 }, { "epoch": 0.18, "grad_norm": 1.9131062030792236, "learning_rate": 1.8903985240723104e-05, "loss": 1.0713, "step": 3068 }, { "epoch": 0.18, "grad_norm": 1.7599763870239258, "learning_rate": 1.89031395361386e-05, "loss": 1.0879, "step": 3069 }, { "epoch": 0.18, "grad_norm": 1.0864375829696655, "learning_rate": 1.890229352433162e-05, "loss": 0.6227, "step": 3070 }, { "epoch": 0.18, "grad_norm": 1.8956810235977173, "learning_rate": 1.8901447205331354e-05, "loss": 1.0572, "step": 3071 }, { "epoch": 0.18, "grad_norm": 2.0365216732025146, "learning_rate": 1.8900600579167014e-05, "loss": 1.076, "step": 3072 }, { "epoch": 0.18, "grad_norm": 1.890666127204895, "learning_rate": 1.8899753645867813e-05, "loss": 1.0794, "step": 3073 }, { "epoch": 0.18, "grad_norm": 2.0366103649139404, "learning_rate": 1.8898906405462972e-05, "loss": 1.0762, "step": 3074 }, { "epoch": 0.18, "grad_norm": 1.6437525749206543, "learning_rate": 1.889805885798173e-05, "loss": 1.0652, "step": 3075 }, { "epoch": 0.18, "grad_norm": 1.9615285396575928, "learning_rate": 1.889721100345333e-05, "loss": 1.0105, "step": 3076 }, { "epoch": 0.18, "grad_norm": 1.7844871282577515, "learning_rate": 1.8896362841907033e-05, "loss": 1.0286, "step": 3077 }, { "epoch": 0.18, "grad_norm": 1.0820298194885254, "learning_rate": 1.8895514373372107e-05, "loss": 0.617, "step": 3078 }, { "epoch": 0.18, "grad_norm": 2.0752005577087402, "learning_rate": 1.8894665597877824e-05, "loss": 1.076, "step": 3079 }, { "epoch": 0.18, "grad_norm": 2.0140368938446045, "learning_rate": 1.889381651545348e-05, "loss": 1.1663, "step": 3080 }, { "epoch": 0.18, "grad_norm": 2.0944695472717285, "learning_rate": 1.8892967126128373e-05, "loss": 1.0689, "step": 3081 }, { "epoch": 0.18, "grad_norm": 1.7865242958068848, "learning_rate": 1.889211742993181e-05, "loss": 1.1034, "step": 3082 }, { "epoch": 0.18, "grad_norm": 2.3344666957855225, "learning_rate": 1.8891267426893116e-05, "loss": 1.0753, "step": 3083 }, { "epoch": 0.18, "grad_norm": 1.8635098934173584, "learning_rate": 1.8890417117041618e-05, "loss": 1.0148, "step": 3084 }, { "epoch": 0.18, "grad_norm": 1.172523021697998, "learning_rate": 1.8889566500406662e-05, "loss": 0.6651, "step": 3085 }, { "epoch": 0.18, "grad_norm": 1.9754880666732788, "learning_rate": 1.8888715577017596e-05, "loss": 1.0089, "step": 3086 }, { "epoch": 0.18, "grad_norm": 2.0755326747894287, "learning_rate": 1.8887864346903784e-05, "loss": 1.1142, "step": 3087 }, { "epoch": 0.18, "grad_norm": 1.8751474618911743, "learning_rate": 1.8887012810094606e-05, "loss": 1.0768, "step": 3088 }, { "epoch": 0.18, "grad_norm": 1.7957533597946167, "learning_rate": 1.8886160966619433e-05, "loss": 1.0156, "step": 3089 }, { "epoch": 0.18, "grad_norm": 2.0717203617095947, "learning_rate": 1.8885308816507674e-05, "loss": 1.0328, "step": 3090 }, { "epoch": 0.18, "grad_norm": 1.8285044431686401, "learning_rate": 1.8884456359788725e-05, "loss": 1.0114, "step": 3091 }, { "epoch": 0.18, "grad_norm": 1.9245933294296265, "learning_rate": 1.8883603596492004e-05, "loss": 1.1163, "step": 3092 }, { "epoch": 0.18, "grad_norm": 2.1680736541748047, "learning_rate": 1.888275052664694e-05, "loss": 1.0796, "step": 3093 }, { "epoch": 0.18, "grad_norm": 1.9532865285873413, "learning_rate": 1.888189715028297e-05, "loss": 1.059, "step": 3094 }, { "epoch": 0.18, "grad_norm": 2.4088635444641113, "learning_rate": 1.8881043467429533e-05, "loss": 1.0819, "step": 3095 }, { "epoch": 0.18, "grad_norm": 1.8426268100738525, "learning_rate": 1.88801894781161e-05, "loss": 1.0336, "step": 3096 }, { "epoch": 0.18, "grad_norm": 1.975570797920227, "learning_rate": 1.8879335182372133e-05, "loss": 1.0416, "step": 3097 }, { "epoch": 0.18, "grad_norm": 2.0050601959228516, "learning_rate": 1.887848058022711e-05, "loss": 1.066, "step": 3098 }, { "epoch": 0.18, "grad_norm": 2.0849416255950928, "learning_rate": 1.887762567171053e-05, "loss": 1.1271, "step": 3099 }, { "epoch": 0.18, "grad_norm": 1.7428510189056396, "learning_rate": 1.887677045685188e-05, "loss": 1.1412, "step": 3100 }, { "epoch": 0.18, "grad_norm": 2.3142433166503906, "learning_rate": 1.887591493568068e-05, "loss": 0.9902, "step": 3101 }, { "epoch": 0.18, "grad_norm": 1.7968847751617432, "learning_rate": 1.887505910822645e-05, "loss": 0.9576, "step": 3102 }, { "epoch": 0.18, "grad_norm": 1.8358029127120972, "learning_rate": 1.887420297451872e-05, "loss": 1.0698, "step": 3103 }, { "epoch": 0.18, "grad_norm": 1.0887221097946167, "learning_rate": 1.8873346534587033e-05, "loss": 0.5851, "step": 3104 }, { "epoch": 0.18, "grad_norm": 2.169832944869995, "learning_rate": 1.8872489788460947e-05, "loss": 1.1116, "step": 3105 }, { "epoch": 0.18, "grad_norm": 1.9128682613372803, "learning_rate": 1.8871632736170024e-05, "loss": 1.1064, "step": 3106 }, { "epoch": 0.18, "grad_norm": 1.9288883209228516, "learning_rate": 1.887077537774383e-05, "loss": 1.0524, "step": 3107 }, { "epoch": 0.18, "grad_norm": 1.997115969657898, "learning_rate": 1.8869917713211964e-05, "loss": 1.1389, "step": 3108 }, { "epoch": 0.18, "grad_norm": 1.7426297664642334, "learning_rate": 1.8869059742604013e-05, "loss": 1.0374, "step": 3109 }, { "epoch": 0.18, "grad_norm": 1.9589606523513794, "learning_rate": 1.8868201465949585e-05, "loss": 1.035, "step": 3110 }, { "epoch": 0.18, "grad_norm": 1.9675849676132202, "learning_rate": 1.8867342883278293e-05, "loss": 1.0522, "step": 3111 }, { "epoch": 0.18, "grad_norm": 1.9334344863891602, "learning_rate": 1.8866483994619775e-05, "loss": 1.0036, "step": 3112 }, { "epoch": 0.18, "grad_norm": 2.07240629196167, "learning_rate": 1.886562480000366e-05, "loss": 0.9803, "step": 3113 }, { "epoch": 0.18, "grad_norm": 2.0564050674438477, "learning_rate": 1.8864765299459595e-05, "loss": 1.0422, "step": 3114 }, { "epoch": 0.18, "grad_norm": 1.148075819015503, "learning_rate": 1.8863905493017242e-05, "loss": 0.6243, "step": 3115 }, { "epoch": 0.18, "grad_norm": 2.0131330490112305, "learning_rate": 1.8863045380706275e-05, "loss": 1.083, "step": 3116 }, { "epoch": 0.18, "grad_norm": 1.8728803396224976, "learning_rate": 1.8862184962556366e-05, "loss": 1.0468, "step": 3117 }, { "epoch": 0.18, "grad_norm": 1.8426618576049805, "learning_rate": 1.8861324238597212e-05, "loss": 1.116, "step": 3118 }, { "epoch": 0.18, "grad_norm": 1.7704581022262573, "learning_rate": 1.8860463208858513e-05, "loss": 1.046, "step": 3119 }, { "epoch": 0.18, "grad_norm": 1.8465875387191772, "learning_rate": 1.8859601873369974e-05, "loss": 1.0698, "step": 3120 }, { "epoch": 0.18, "grad_norm": 1.9481022357940674, "learning_rate": 1.8858740232161325e-05, "loss": 1.0968, "step": 3121 }, { "epoch": 0.18, "grad_norm": 1.836522102355957, "learning_rate": 1.88578782852623e-05, "loss": 1.0386, "step": 3122 }, { "epoch": 0.18, "grad_norm": 1.9044045209884644, "learning_rate": 1.8857016032702634e-05, "loss": 0.998, "step": 3123 }, { "epoch": 0.18, "grad_norm": 1.1791692972183228, "learning_rate": 1.8856153474512088e-05, "loss": 0.66, "step": 3124 }, { "epoch": 0.18, "grad_norm": 1.986464500427246, "learning_rate": 1.8855290610720426e-05, "loss": 1.0451, "step": 3125 }, { "epoch": 0.18, "grad_norm": 1.9232033491134644, "learning_rate": 1.8854427441357418e-05, "loss": 0.9975, "step": 3126 }, { "epoch": 0.18, "grad_norm": 2.05118989944458, "learning_rate": 1.8853563966452854e-05, "loss": 1.115, "step": 3127 }, { "epoch": 0.18, "grad_norm": 1.7756752967834473, "learning_rate": 1.8852700186036526e-05, "loss": 1.076, "step": 3128 }, { "epoch": 0.18, "grad_norm": 1.897196888923645, "learning_rate": 1.885183610013825e-05, "loss": 1.1529, "step": 3129 }, { "epoch": 0.18, "grad_norm": 1.6670805215835571, "learning_rate": 1.8850971708787833e-05, "loss": 1.0476, "step": 3130 }, { "epoch": 0.18, "grad_norm": 1.045642614364624, "learning_rate": 1.8850107012015105e-05, "loss": 0.5897, "step": 3131 }, { "epoch": 0.18, "grad_norm": 1.773659348487854, "learning_rate": 1.884924200984991e-05, "loss": 1.0037, "step": 3132 }, { "epoch": 0.18, "grad_norm": 1.7240121364593506, "learning_rate": 1.884837670232209e-05, "loss": 1.0696, "step": 3133 }, { "epoch": 0.18, "grad_norm": 1.855925440788269, "learning_rate": 1.884751108946151e-05, "loss": 1.0964, "step": 3134 }, { "epoch": 0.18, "grad_norm": 1.9446394443511963, "learning_rate": 1.884664517129803e-05, "loss": 1.0342, "step": 3135 }, { "epoch": 0.18, "grad_norm": 1.8911516666412354, "learning_rate": 1.8845778947861545e-05, "loss": 1.0954, "step": 3136 }, { "epoch": 0.18, "grad_norm": 1.9340369701385498, "learning_rate": 1.8844912419181934e-05, "loss": 1.0858, "step": 3137 }, { "epoch": 0.18, "grad_norm": 1.9892759323120117, "learning_rate": 1.8844045585289105e-05, "loss": 1.0719, "step": 3138 }, { "epoch": 0.18, "grad_norm": 1.8575681447982788, "learning_rate": 1.8843178446212965e-05, "loss": 1.0873, "step": 3139 }, { "epoch": 0.18, "grad_norm": 1.7944180965423584, "learning_rate": 1.884231100198344e-05, "loss": 0.9844, "step": 3140 }, { "epoch": 0.18, "grad_norm": 1.7894824743270874, "learning_rate": 1.8841443252630463e-05, "loss": 1.0887, "step": 3141 }, { "epoch": 0.18, "grad_norm": 1.845396876335144, "learning_rate": 1.8840575198183977e-05, "loss": 1.0653, "step": 3142 }, { "epoch": 0.18, "grad_norm": 1.9280120134353638, "learning_rate": 1.8839706838673933e-05, "loss": 1.0616, "step": 3143 }, { "epoch": 0.18, "grad_norm": 1.9053345918655396, "learning_rate": 1.8838838174130303e-05, "loss": 1.0198, "step": 3144 }, { "epoch": 0.18, "grad_norm": 1.7476216554641724, "learning_rate": 1.8837969204583055e-05, "loss": 1.003, "step": 3145 }, { "epoch": 0.18, "grad_norm": 1.0599719285964966, "learning_rate": 1.883709993006218e-05, "loss": 0.5704, "step": 3146 }, { "epoch": 0.18, "grad_norm": 2.021195411682129, "learning_rate": 1.8836230350597667e-05, "loss": 1.0717, "step": 3147 }, { "epoch": 0.18, "grad_norm": 1.9832763671875, "learning_rate": 1.8835360466219534e-05, "loss": 1.0759, "step": 3148 }, { "epoch": 0.18, "grad_norm": 1.9397449493408203, "learning_rate": 1.8834490276957788e-05, "loss": 1.1081, "step": 3149 }, { "epoch": 0.18, "grad_norm": 1.0987125635147095, "learning_rate": 1.8833619782842464e-05, "loss": 0.608, "step": 3150 }, { "epoch": 0.18, "grad_norm": 1.9179284572601318, "learning_rate": 1.8832748983903593e-05, "loss": 1.0453, "step": 3151 }, { "epoch": 0.18, "grad_norm": 2.1638340950012207, "learning_rate": 1.8831877880171233e-05, "loss": 1.0745, "step": 3152 }, { "epoch": 0.18, "grad_norm": 1.9122291803359985, "learning_rate": 1.8831006471675433e-05, "loss": 1.0547, "step": 3153 }, { "epoch": 0.18, "grad_norm": 1.9634008407592773, "learning_rate": 1.883013475844627e-05, "loss": 1.0948, "step": 3154 }, { "epoch": 0.18, "grad_norm": 1.9297114610671997, "learning_rate": 1.8829262740513823e-05, "loss": 1.0947, "step": 3155 }, { "epoch": 0.18, "grad_norm": 2.066471815109253, "learning_rate": 1.882839041790818e-05, "loss": 1.0247, "step": 3156 }, { "epoch": 0.18, "grad_norm": 2.388782262802124, "learning_rate": 1.8827517790659447e-05, "loss": 1.0798, "step": 3157 }, { "epoch": 0.18, "grad_norm": 1.885628342628479, "learning_rate": 1.8826644858797734e-05, "loss": 1.1219, "step": 3158 }, { "epoch": 0.18, "grad_norm": 2.1876800060272217, "learning_rate": 1.8825771622353164e-05, "loss": 1.0793, "step": 3159 }, { "epoch": 0.18, "grad_norm": 1.9918705224990845, "learning_rate": 1.8824898081355866e-05, "loss": 1.1487, "step": 3160 }, { "epoch": 0.18, "grad_norm": 1.1957788467407227, "learning_rate": 1.882402423583599e-05, "loss": 0.6636, "step": 3161 }, { "epoch": 0.18, "grad_norm": 1.8979698419570923, "learning_rate": 1.8823150085823685e-05, "loss": 1.0492, "step": 3162 }, { "epoch": 0.18, "grad_norm": 1.8649994134902954, "learning_rate": 1.8822275631349115e-05, "loss": 1.045, "step": 3163 }, { "epoch": 0.18, "grad_norm": 1.918664813041687, "learning_rate": 1.8821400872442458e-05, "loss": 1.0176, "step": 3164 }, { "epoch": 0.18, "grad_norm": 1.9375090599060059, "learning_rate": 1.88205258091339e-05, "loss": 1.0676, "step": 3165 }, { "epoch": 0.18, "grad_norm": 1.8347877264022827, "learning_rate": 1.8819650441453635e-05, "loss": 1.1164, "step": 3166 }, { "epoch": 0.18, "grad_norm": 2.1002001762390137, "learning_rate": 1.881877476943187e-05, "loss": 1.0222, "step": 3167 }, { "epoch": 0.18, "grad_norm": 2.1007864475250244, "learning_rate": 1.881789879309882e-05, "loss": 1.0886, "step": 3168 }, { "epoch": 0.18, "grad_norm": 1.9871070384979248, "learning_rate": 1.8817022512484718e-05, "loss": 1.0781, "step": 3169 }, { "epoch": 0.18, "grad_norm": 1.9151084423065186, "learning_rate": 1.8816145927619795e-05, "loss": 1.0012, "step": 3170 }, { "epoch": 0.18, "grad_norm": 1.860733151435852, "learning_rate": 1.8815269038534305e-05, "loss": 1.0587, "step": 3171 }, { "epoch": 0.18, "grad_norm": 1.898160457611084, "learning_rate": 1.8814391845258507e-05, "loss": 1.0481, "step": 3172 }, { "epoch": 0.18, "grad_norm": 1.9673105478286743, "learning_rate": 1.8813514347822662e-05, "loss": 1.1394, "step": 3173 }, { "epoch": 0.18, "grad_norm": 2.014975070953369, "learning_rate": 1.8812636546257062e-05, "loss": 1.0822, "step": 3174 }, { "epoch": 0.18, "grad_norm": 1.9710543155670166, "learning_rate": 1.881175844059199e-05, "loss": 1.0113, "step": 3175 }, { "epoch": 0.18, "grad_norm": 2.058241605758667, "learning_rate": 1.881088003085775e-05, "loss": 1.1249, "step": 3176 }, { "epoch": 0.18, "grad_norm": 1.9997801780700684, "learning_rate": 1.881000131708465e-05, "loss": 0.9042, "step": 3177 }, { "epoch": 0.18, "grad_norm": 1.9435893297195435, "learning_rate": 1.8809122299303015e-05, "loss": 1.1371, "step": 3178 }, { "epoch": 0.18, "grad_norm": 1.9610495567321777, "learning_rate": 1.880824297754318e-05, "loss": 1.0608, "step": 3179 }, { "epoch": 0.18, "grad_norm": 1.8750948905944824, "learning_rate": 1.880736335183548e-05, "loss": 1.0738, "step": 3180 }, { "epoch": 0.18, "grad_norm": 1.9201828241348267, "learning_rate": 1.8806483422210275e-05, "loss": 1.1539, "step": 3181 }, { "epoch": 0.18, "grad_norm": 1.8751220703125, "learning_rate": 1.880560318869793e-05, "loss": 1.0392, "step": 3182 }, { "epoch": 0.18, "grad_norm": 1.9925799369812012, "learning_rate": 1.8804722651328814e-05, "loss": 1.1547, "step": 3183 }, { "epoch": 0.18, "grad_norm": 1.9806839227676392, "learning_rate": 1.8803841810133315e-05, "loss": 1.0571, "step": 3184 }, { "epoch": 0.18, "grad_norm": 1.8010292053222656, "learning_rate": 1.8802960665141824e-05, "loss": 1.036, "step": 3185 }, { "epoch": 0.18, "grad_norm": 2.0512712001800537, "learning_rate": 1.8802079216384754e-05, "loss": 1.1245, "step": 3186 }, { "epoch": 0.18, "grad_norm": 1.8635333776474, "learning_rate": 1.8801197463892516e-05, "loss": 1.1249, "step": 3187 }, { "epoch": 0.18, "grad_norm": 1.824039340019226, "learning_rate": 1.880031540769554e-05, "loss": 1.1187, "step": 3188 }, { "epoch": 0.18, "grad_norm": 2.1490957736968994, "learning_rate": 1.879943304782426e-05, "loss": 1.0597, "step": 3189 }, { "epoch": 0.18, "grad_norm": 1.9700634479522705, "learning_rate": 1.8798550384309128e-05, "loss": 1.022, "step": 3190 }, { "epoch": 0.18, "grad_norm": 1.1496310234069824, "learning_rate": 1.87976674171806e-05, "loss": 0.6336, "step": 3191 }, { "epoch": 0.18, "grad_norm": 1.9290885925292969, "learning_rate": 1.8796784146469146e-05, "loss": 1.1359, "step": 3192 }, { "epoch": 0.18, "grad_norm": 2.145732879638672, "learning_rate": 1.8795900572205238e-05, "loss": 1.031, "step": 3193 }, { "epoch": 0.18, "grad_norm": 1.9401350021362305, "learning_rate": 1.8795016694419378e-05, "loss": 1.04, "step": 3194 }, { "epoch": 0.18, "grad_norm": 2.002579927444458, "learning_rate": 1.8794132513142057e-05, "loss": 1.0572, "step": 3195 }, { "epoch": 0.18, "grad_norm": 1.8517241477966309, "learning_rate": 1.879324802840379e-05, "loss": 1.0808, "step": 3196 }, { "epoch": 0.18, "grad_norm": 1.9809143543243408, "learning_rate": 1.8792363240235097e-05, "loss": 0.9701, "step": 3197 }, { "epoch": 0.18, "grad_norm": 1.9905089139938354, "learning_rate": 1.8791478148666504e-05, "loss": 1.1054, "step": 3198 }, { "epoch": 0.18, "grad_norm": 2.018239736557007, "learning_rate": 1.8790592753728562e-05, "loss": 0.978, "step": 3199 }, { "epoch": 0.18, "grad_norm": 1.0903102159500122, "learning_rate": 1.878970705545182e-05, "loss": 0.6354, "step": 3200 }, { "epoch": 0.18, "grad_norm": 2.0063588619232178, "learning_rate": 1.878882105386684e-05, "loss": 1.0131, "step": 3201 }, { "epoch": 0.18, "grad_norm": 1.9619927406311035, "learning_rate": 1.8787934749004194e-05, "loss": 1.1232, "step": 3202 }, { "epoch": 0.18, "grad_norm": 1.878528118133545, "learning_rate": 1.878704814089447e-05, "loss": 0.9598, "step": 3203 }, { "epoch": 0.18, "grad_norm": 1.8036553859710693, "learning_rate": 1.878616122956826e-05, "loss": 1.0327, "step": 3204 }, { "epoch": 0.18, "grad_norm": 2.0007259845733643, "learning_rate": 1.878527401505617e-05, "loss": 1.0349, "step": 3205 }, { "epoch": 0.18, "grad_norm": 1.9058406352996826, "learning_rate": 1.8784386497388813e-05, "loss": 1.1136, "step": 3206 }, { "epoch": 0.18, "grad_norm": 1.7690937519073486, "learning_rate": 1.8783498676596815e-05, "loss": 1.0781, "step": 3207 }, { "epoch": 0.18, "grad_norm": 1.8112245798110962, "learning_rate": 1.8782610552710817e-05, "loss": 0.9604, "step": 3208 }, { "epoch": 0.18, "grad_norm": 1.7245216369628906, "learning_rate": 1.878172212576146e-05, "loss": 1.0069, "step": 3209 }, { "epoch": 0.18, "grad_norm": 1.879065752029419, "learning_rate": 1.8780833395779402e-05, "loss": 1.0045, "step": 3210 }, { "epoch": 0.18, "grad_norm": 1.9663444757461548, "learning_rate": 1.8779944362795314e-05, "loss": 1.1072, "step": 3211 }, { "epoch": 0.18, "grad_norm": 1.8845505714416504, "learning_rate": 1.877905502683987e-05, "loss": 0.965, "step": 3212 }, { "epoch": 0.18, "grad_norm": 2.3410561084747314, "learning_rate": 1.877816538794376e-05, "loss": 0.9502, "step": 3213 }, { "epoch": 0.18, "grad_norm": 2.131164789199829, "learning_rate": 1.8777275446137687e-05, "loss": 1.0873, "step": 3214 }, { "epoch": 0.18, "grad_norm": 1.8176981210708618, "learning_rate": 1.877638520145235e-05, "loss": 1.0653, "step": 3215 }, { "epoch": 0.18, "grad_norm": 1.87510085105896, "learning_rate": 1.8775494653918482e-05, "loss": 1.0659, "step": 3216 }, { "epoch": 0.18, "grad_norm": 1.8689242601394653, "learning_rate": 1.8774603803566804e-05, "loss": 1.0962, "step": 3217 }, { "epoch": 0.18, "grad_norm": 1.958552598953247, "learning_rate": 1.877371265042806e-05, "loss": 1.0491, "step": 3218 }, { "epoch": 0.18, "grad_norm": 1.8431156873703003, "learning_rate": 1.8772821194533e-05, "loss": 1.1154, "step": 3219 }, { "epoch": 0.18, "grad_norm": 1.7707875967025757, "learning_rate": 1.877192943591239e-05, "loss": 1.0431, "step": 3220 }, { "epoch": 0.18, "grad_norm": 1.7103362083435059, "learning_rate": 1.8771037374596995e-05, "loss": 0.9648, "step": 3221 }, { "epoch": 0.18, "grad_norm": 1.987158179283142, "learning_rate": 1.8770145010617604e-05, "loss": 1.1035, "step": 3222 }, { "epoch": 0.18, "grad_norm": 2.0184786319732666, "learning_rate": 1.8769252344005005e-05, "loss": 1.0932, "step": 3223 }, { "epoch": 0.18, "grad_norm": 2.153855085372925, "learning_rate": 1.876835937479e-05, "loss": 1.1398, "step": 3224 }, { "epoch": 0.18, "grad_norm": 1.9682793617248535, "learning_rate": 1.876746610300341e-05, "loss": 1.0241, "step": 3225 }, { "epoch": 0.19, "grad_norm": 1.9574861526489258, "learning_rate": 1.8766572528676055e-05, "loss": 1.1308, "step": 3226 }, { "epoch": 0.19, "grad_norm": 2.1643271446228027, "learning_rate": 1.8765678651838774e-05, "loss": 1.0126, "step": 3227 }, { "epoch": 0.19, "grad_norm": 1.8782780170440674, "learning_rate": 1.8764784472522405e-05, "loss": 1.0943, "step": 3228 }, { "epoch": 0.19, "grad_norm": 1.9205485582351685, "learning_rate": 1.8763889990757808e-05, "loss": 1.0141, "step": 3229 }, { "epoch": 0.19, "grad_norm": 2.0391829013824463, "learning_rate": 1.8762995206575848e-05, "loss": 1.0574, "step": 3230 }, { "epoch": 0.19, "grad_norm": 1.8242876529693604, "learning_rate": 1.8762100120007402e-05, "loss": 1.0889, "step": 3231 }, { "epoch": 0.19, "grad_norm": 1.039702296257019, "learning_rate": 1.876120473108336e-05, "loss": 0.6092, "step": 3232 }, { "epoch": 0.19, "grad_norm": 1.9650447368621826, "learning_rate": 1.8760309039834613e-05, "loss": 1.008, "step": 3233 }, { "epoch": 0.19, "grad_norm": 1.8859398365020752, "learning_rate": 1.875941304629207e-05, "loss": 0.9959, "step": 3234 }, { "epoch": 0.19, "grad_norm": 1.901572823524475, "learning_rate": 1.8758516750486655e-05, "loss": 1.045, "step": 3235 }, { "epoch": 0.19, "grad_norm": 2.124387502670288, "learning_rate": 1.875762015244929e-05, "loss": 0.9704, "step": 3236 }, { "epoch": 0.19, "grad_norm": 1.8097633123397827, "learning_rate": 1.8756723252210917e-05, "loss": 1.0535, "step": 3237 }, { "epoch": 0.19, "grad_norm": 1.8400793075561523, "learning_rate": 1.8755826049802487e-05, "loss": 0.9506, "step": 3238 }, { "epoch": 0.19, "grad_norm": 1.7167398929595947, "learning_rate": 1.875492854525496e-05, "loss": 1.0451, "step": 3239 }, { "epoch": 0.19, "grad_norm": 2.3077800273895264, "learning_rate": 1.8754030738599302e-05, "loss": 1.094, "step": 3240 }, { "epoch": 0.19, "grad_norm": 1.194966197013855, "learning_rate": 1.87531326298665e-05, "loss": 0.6595, "step": 3241 }, { "epoch": 0.19, "grad_norm": 2.0183727741241455, "learning_rate": 1.8752234219087538e-05, "loss": 1.0343, "step": 3242 }, { "epoch": 0.19, "grad_norm": 2.2822582721710205, "learning_rate": 1.8751335506293423e-05, "loss": 1.0888, "step": 3243 }, { "epoch": 0.19, "grad_norm": 1.9930367469787598, "learning_rate": 1.8750436491515165e-05, "loss": 1.0744, "step": 3244 }, { "epoch": 0.19, "grad_norm": 2.1007680892944336, "learning_rate": 1.8749537174783787e-05, "loss": 1.0164, "step": 3245 }, { "epoch": 0.19, "grad_norm": 1.8790193796157837, "learning_rate": 1.8748637556130323e-05, "loss": 1.0029, "step": 3246 }, { "epoch": 0.19, "grad_norm": 1.9194238185882568, "learning_rate": 1.8747737635585817e-05, "loss": 0.979, "step": 3247 }, { "epoch": 0.19, "grad_norm": 2.2853827476501465, "learning_rate": 1.874683741318132e-05, "loss": 1.0845, "step": 3248 }, { "epoch": 0.19, "grad_norm": 1.9042471647262573, "learning_rate": 1.8745936888947893e-05, "loss": 0.9847, "step": 3249 }, { "epoch": 0.19, "grad_norm": 1.9672541618347168, "learning_rate": 1.8745036062916617e-05, "loss": 1.0156, "step": 3250 }, { "epoch": 0.19, "grad_norm": 1.9093239307403564, "learning_rate": 1.8744134935118575e-05, "loss": 1.0319, "step": 3251 }, { "epoch": 0.19, "grad_norm": 1.8691750764846802, "learning_rate": 1.8743233505584863e-05, "loss": 1.0655, "step": 3252 }, { "epoch": 0.19, "grad_norm": 1.8544687032699585, "learning_rate": 1.8742331774346587e-05, "loss": 1.0632, "step": 3253 }, { "epoch": 0.19, "grad_norm": 1.965006709098816, "learning_rate": 1.874142974143486e-05, "loss": 1.0937, "step": 3254 }, { "epoch": 0.19, "grad_norm": 1.952591896057129, "learning_rate": 1.8740527406880813e-05, "loss": 1.0957, "step": 3255 }, { "epoch": 0.19, "grad_norm": 1.837761402130127, "learning_rate": 1.873962477071558e-05, "loss": 0.9728, "step": 3256 }, { "epoch": 0.19, "grad_norm": 1.9160650968551636, "learning_rate": 1.8738721832970308e-05, "loss": 1.0494, "step": 3257 }, { "epoch": 0.19, "grad_norm": 2.1636738777160645, "learning_rate": 1.8737818593676155e-05, "loss": 1.0909, "step": 3258 }, { "epoch": 0.19, "grad_norm": 1.8780964612960815, "learning_rate": 1.8736915052864293e-05, "loss": 0.9485, "step": 3259 }, { "epoch": 0.19, "grad_norm": 2.1274797916412354, "learning_rate": 1.8736011210565897e-05, "loss": 1.0766, "step": 3260 }, { "epoch": 0.19, "grad_norm": 1.7647018432617188, "learning_rate": 1.873510706681216e-05, "loss": 0.9751, "step": 3261 }, { "epoch": 0.19, "grad_norm": 2.05830454826355, "learning_rate": 1.8734202621634275e-05, "loss": 1.0551, "step": 3262 }, { "epoch": 0.19, "grad_norm": 1.8194825649261475, "learning_rate": 1.8733297875063457e-05, "loss": 1.0364, "step": 3263 }, { "epoch": 0.19, "grad_norm": 1.9840232133865356, "learning_rate": 1.8732392827130924e-05, "loss": 1.1237, "step": 3264 }, { "epoch": 0.19, "grad_norm": 2.031609296798706, "learning_rate": 1.8731487477867908e-05, "loss": 1.053, "step": 3265 }, { "epoch": 0.19, "grad_norm": 1.8754713535308838, "learning_rate": 1.873058182730565e-05, "loss": 0.9853, "step": 3266 }, { "epoch": 0.19, "grad_norm": 2.0270116329193115, "learning_rate": 1.8729675875475402e-05, "loss": 1.0821, "step": 3267 }, { "epoch": 0.19, "grad_norm": 2.1765875816345215, "learning_rate": 1.8728769622408423e-05, "loss": 1.057, "step": 3268 }, { "epoch": 0.19, "grad_norm": 2.0628955364227295, "learning_rate": 1.8727863068135987e-05, "loss": 1.0832, "step": 3269 }, { "epoch": 0.19, "grad_norm": 1.7419322729110718, "learning_rate": 1.872695621268938e-05, "loss": 1.0309, "step": 3270 }, { "epoch": 0.19, "grad_norm": 2.24467396736145, "learning_rate": 1.8726049056099887e-05, "loss": 1.0639, "step": 3271 }, { "epoch": 0.19, "grad_norm": 2.3124465942382812, "learning_rate": 1.8725141598398823e-05, "loss": 0.9719, "step": 3272 }, { "epoch": 0.19, "grad_norm": 1.9237641096115112, "learning_rate": 1.872423383961749e-05, "loss": 1.1478, "step": 3273 }, { "epoch": 0.19, "grad_norm": 2.1040890216827393, "learning_rate": 1.8723325779787218e-05, "loss": 0.968, "step": 3274 }, { "epoch": 0.19, "grad_norm": 1.6902024745941162, "learning_rate": 1.872241741893934e-05, "loss": 1.071, "step": 3275 }, { "epoch": 0.19, "grad_norm": 1.755793809890747, "learning_rate": 1.8721508757105203e-05, "loss": 1.0132, "step": 3276 }, { "epoch": 0.19, "grad_norm": 2.1633825302124023, "learning_rate": 1.872059979431616e-05, "loss": 1.0303, "step": 3277 }, { "epoch": 0.19, "grad_norm": 1.9733517169952393, "learning_rate": 1.8719690530603582e-05, "loss": 1.0649, "step": 3278 }, { "epoch": 0.19, "grad_norm": 1.9661974906921387, "learning_rate": 1.871878096599884e-05, "loss": 1.0751, "step": 3279 }, { "epoch": 0.19, "grad_norm": 2.0361452102661133, "learning_rate": 1.8717871100533317e-05, "loss": 1.047, "step": 3280 }, { "epoch": 0.19, "grad_norm": 2.1370222568511963, "learning_rate": 1.8716960934238422e-05, "loss": 1.0261, "step": 3281 }, { "epoch": 0.19, "grad_norm": 1.9634020328521729, "learning_rate": 1.871605046714555e-05, "loss": 1.1747, "step": 3282 }, { "epoch": 0.19, "grad_norm": 1.172603964805603, "learning_rate": 1.8715139699286125e-05, "loss": 0.5776, "step": 3283 }, { "epoch": 0.19, "grad_norm": 1.8870103359222412, "learning_rate": 1.8714228630691576e-05, "loss": 0.9769, "step": 3284 }, { "epoch": 0.19, "grad_norm": 1.8174453973770142, "learning_rate": 1.8713317261393337e-05, "loss": 1.0209, "step": 3285 }, { "epoch": 0.19, "grad_norm": 1.8726760149002075, "learning_rate": 1.8712405591422857e-05, "loss": 0.9879, "step": 3286 }, { "epoch": 0.19, "grad_norm": 1.9008108377456665, "learning_rate": 1.8711493620811602e-05, "loss": 0.9863, "step": 3287 }, { "epoch": 0.19, "grad_norm": 1.9532196521759033, "learning_rate": 1.8710581349591034e-05, "loss": 1.0251, "step": 3288 }, { "epoch": 0.19, "grad_norm": 2.012089729309082, "learning_rate": 1.8709668777792633e-05, "loss": 1.0548, "step": 3289 }, { "epoch": 0.19, "grad_norm": 1.8110605478286743, "learning_rate": 1.8708755905447897e-05, "loss": 1.0962, "step": 3290 }, { "epoch": 0.19, "grad_norm": 2.0551087856292725, "learning_rate": 1.870784273258832e-05, "loss": 1.123, "step": 3291 }, { "epoch": 0.19, "grad_norm": 1.985762357711792, "learning_rate": 1.8706929259245412e-05, "loss": 1.0283, "step": 3292 }, { "epoch": 0.19, "grad_norm": 1.9035676717758179, "learning_rate": 1.8706015485450697e-05, "loss": 1.0683, "step": 3293 }, { "epoch": 0.19, "grad_norm": 1.752871036529541, "learning_rate": 1.870510141123571e-05, "loss": 1.0262, "step": 3294 }, { "epoch": 0.19, "grad_norm": 1.8591305017471313, "learning_rate": 1.870418703663199e-05, "loss": 1.0719, "step": 3295 }, { "epoch": 0.19, "grad_norm": 1.891185998916626, "learning_rate": 1.870327236167109e-05, "loss": 0.9989, "step": 3296 }, { "epoch": 0.19, "grad_norm": 1.9542032480239868, "learning_rate": 1.870235738638457e-05, "loss": 1.0994, "step": 3297 }, { "epoch": 0.19, "grad_norm": 1.9030170440673828, "learning_rate": 1.8701442110804004e-05, "loss": 1.0069, "step": 3298 }, { "epoch": 0.19, "grad_norm": 1.8853739500045776, "learning_rate": 1.870052653496098e-05, "loss": 1.0701, "step": 3299 }, { "epoch": 0.19, "grad_norm": 1.7397522926330566, "learning_rate": 1.869961065888709e-05, "loss": 1.0167, "step": 3300 }, { "epoch": 0.19, "grad_norm": 1.9591866731643677, "learning_rate": 1.8698694482613937e-05, "loss": 1.0855, "step": 3301 }, { "epoch": 0.19, "grad_norm": 1.7865917682647705, "learning_rate": 1.8697778006173134e-05, "loss": 1.0791, "step": 3302 }, { "epoch": 0.19, "grad_norm": 1.6885157823562622, "learning_rate": 1.8696861229596307e-05, "loss": 1.1335, "step": 3303 }, { "epoch": 0.19, "grad_norm": 1.7753127813339233, "learning_rate": 1.86959441529151e-05, "loss": 0.9196, "step": 3304 }, { "epoch": 0.19, "grad_norm": 2.025127649307251, "learning_rate": 1.8695026776161146e-05, "loss": 1.0472, "step": 3305 }, { "epoch": 0.19, "grad_norm": 2.0208792686462402, "learning_rate": 1.869410909936611e-05, "loss": 1.0222, "step": 3306 }, { "epoch": 0.19, "grad_norm": 1.9484285116195679, "learning_rate": 1.869319112256165e-05, "loss": 1.0533, "step": 3307 }, { "epoch": 0.19, "grad_norm": 1.7399507761001587, "learning_rate": 1.8692272845779448e-05, "loss": 1.0167, "step": 3308 }, { "epoch": 0.19, "grad_norm": 1.2569055557250977, "learning_rate": 1.8691354269051192e-05, "loss": 0.6248, "step": 3309 }, { "epoch": 0.19, "grad_norm": 1.904214859008789, "learning_rate": 1.8690435392408584e-05, "loss": 1.0831, "step": 3310 }, { "epoch": 0.19, "grad_norm": 1.956451654434204, "learning_rate": 1.868951621588332e-05, "loss": 1.1825, "step": 3311 }, { "epoch": 0.19, "grad_norm": 2.0369679927825928, "learning_rate": 1.8688596739507127e-05, "loss": 0.9904, "step": 3312 }, { "epoch": 0.19, "grad_norm": 1.966463565826416, "learning_rate": 1.868767696331173e-05, "loss": 1.1563, "step": 3313 }, { "epoch": 0.19, "grad_norm": 1.8489439487457275, "learning_rate": 1.868675688732887e-05, "loss": 1.0889, "step": 3314 }, { "epoch": 0.19, "grad_norm": 1.895443081855774, "learning_rate": 1.8685836511590297e-05, "loss": 1.0644, "step": 3315 }, { "epoch": 0.19, "grad_norm": 1.765350103378296, "learning_rate": 1.8684915836127766e-05, "loss": 0.9662, "step": 3316 }, { "epoch": 0.19, "grad_norm": 1.9570988416671753, "learning_rate": 1.8683994860973053e-05, "loss": 1.0477, "step": 3317 }, { "epoch": 0.19, "grad_norm": 1.866875410079956, "learning_rate": 1.8683073586157933e-05, "loss": 1.0396, "step": 3318 }, { "epoch": 0.19, "grad_norm": 1.8933188915252686, "learning_rate": 1.86821520117142e-05, "loss": 1.0332, "step": 3319 }, { "epoch": 0.19, "grad_norm": 1.9142025709152222, "learning_rate": 1.868123013767365e-05, "loss": 0.9864, "step": 3320 }, { "epoch": 0.19, "grad_norm": 1.1308761835098267, "learning_rate": 1.86803079640681e-05, "loss": 0.6163, "step": 3321 }, { "epoch": 0.19, "grad_norm": 2.093803882598877, "learning_rate": 1.867938549092937e-05, "loss": 1.1464, "step": 3322 }, { "epoch": 0.19, "grad_norm": 2.0949089527130127, "learning_rate": 1.8678462718289293e-05, "loss": 1.0002, "step": 3323 }, { "epoch": 0.19, "grad_norm": 1.8703641891479492, "learning_rate": 1.8677539646179706e-05, "loss": 1.1262, "step": 3324 }, { "epoch": 0.19, "grad_norm": 2.0862178802490234, "learning_rate": 1.867661627463247e-05, "loss": 1.0913, "step": 3325 }, { "epoch": 0.19, "grad_norm": 1.9975895881652832, "learning_rate": 1.8675692603679443e-05, "loss": 1.061, "step": 3326 }, { "epoch": 0.19, "grad_norm": 1.8370732069015503, "learning_rate": 1.8674768633352497e-05, "loss": 0.9806, "step": 3327 }, { "epoch": 0.19, "grad_norm": 1.7796324491500854, "learning_rate": 1.8673844363683517e-05, "loss": 1.1344, "step": 3328 }, { "epoch": 0.19, "grad_norm": 1.3345690965652466, "learning_rate": 1.8672919794704398e-05, "loss": 0.649, "step": 3329 }, { "epoch": 0.19, "grad_norm": 2.0007505416870117, "learning_rate": 1.8671994926447047e-05, "loss": 1.16, "step": 3330 }, { "epoch": 0.19, "grad_norm": 1.7431893348693848, "learning_rate": 1.867106975894337e-05, "loss": 1.105, "step": 3331 }, { "epoch": 0.19, "grad_norm": 1.9228627681732178, "learning_rate": 1.86701442922253e-05, "loss": 1.0223, "step": 3332 }, { "epoch": 0.19, "grad_norm": 2.188016653060913, "learning_rate": 1.866921852632477e-05, "loss": 1.0894, "step": 3333 }, { "epoch": 0.19, "grad_norm": 1.9352843761444092, "learning_rate": 1.8668292461273726e-05, "loss": 1.0402, "step": 3334 }, { "epoch": 0.19, "grad_norm": 1.834652066230774, "learning_rate": 1.866736609710412e-05, "loss": 1.141, "step": 3335 }, { "epoch": 0.19, "grad_norm": 1.803381085395813, "learning_rate": 1.866643943384792e-05, "loss": 1.0879, "step": 3336 }, { "epoch": 0.19, "grad_norm": 1.0487242937088013, "learning_rate": 1.8665512471537108e-05, "loss": 0.5524, "step": 3337 }, { "epoch": 0.19, "grad_norm": 1.7956326007843018, "learning_rate": 1.8664585210203663e-05, "loss": 1.0008, "step": 3338 }, { "epoch": 0.19, "grad_norm": 1.905713677406311, "learning_rate": 1.866365764987959e-05, "loss": 1.0815, "step": 3339 }, { "epoch": 0.19, "grad_norm": 2.097097873687744, "learning_rate": 1.866272979059689e-05, "loss": 1.0612, "step": 3340 }, { "epoch": 0.19, "grad_norm": 2.0210537910461426, "learning_rate": 1.8661801632387586e-05, "loss": 1.2117, "step": 3341 }, { "epoch": 0.19, "grad_norm": 1.9372286796569824, "learning_rate": 1.86608731752837e-05, "loss": 1.1173, "step": 3342 }, { "epoch": 0.19, "grad_norm": 1.006133794784546, "learning_rate": 1.8659944419317275e-05, "loss": 0.5819, "step": 3343 }, { "epoch": 0.19, "grad_norm": 1.7832244634628296, "learning_rate": 1.8659015364520358e-05, "loss": 1.0938, "step": 3344 }, { "epoch": 0.19, "grad_norm": 2.0531816482543945, "learning_rate": 1.8658086010925012e-05, "loss": 1.0564, "step": 3345 }, { "epoch": 0.19, "grad_norm": 2.0361785888671875, "learning_rate": 1.8657156358563298e-05, "loss": 1.1287, "step": 3346 }, { "epoch": 0.19, "grad_norm": 1.9186803102493286, "learning_rate": 1.8656226407467307e-05, "loss": 1.0378, "step": 3347 }, { "epoch": 0.19, "grad_norm": 1.0930813550949097, "learning_rate": 1.8655296157669118e-05, "loss": 0.6092, "step": 3348 }, { "epoch": 0.19, "grad_norm": 1.7512125968933105, "learning_rate": 1.8654365609200837e-05, "loss": 1.0061, "step": 3349 }, { "epoch": 0.19, "grad_norm": 1.8459097146987915, "learning_rate": 1.8653434762094577e-05, "loss": 1.0309, "step": 3350 }, { "epoch": 0.19, "grad_norm": 1.8281456232070923, "learning_rate": 1.8652503616382453e-05, "loss": 1.0914, "step": 3351 }, { "epoch": 0.19, "grad_norm": 1.8712741136550903, "learning_rate": 1.86515721720966e-05, "loss": 1.12, "step": 3352 }, { "epoch": 0.19, "grad_norm": 1.785926342010498, "learning_rate": 1.865064042926916e-05, "loss": 0.9756, "step": 3353 }, { "epoch": 0.19, "grad_norm": 1.8476792573928833, "learning_rate": 1.8649708387932278e-05, "loss": 1.0182, "step": 3354 }, { "epoch": 0.19, "grad_norm": 2.1880877017974854, "learning_rate": 1.8648776048118123e-05, "loss": 1.0432, "step": 3355 }, { "epoch": 0.19, "grad_norm": 2.0624444484710693, "learning_rate": 1.864784340985887e-05, "loss": 1.0405, "step": 3356 }, { "epoch": 0.19, "grad_norm": 2.2413864135742188, "learning_rate": 1.8646910473186696e-05, "loss": 1.1001, "step": 3357 }, { "epoch": 0.19, "grad_norm": 1.9817475080490112, "learning_rate": 1.8645977238133794e-05, "loss": 1.0978, "step": 3358 }, { "epoch": 0.19, "grad_norm": 1.786462664604187, "learning_rate": 1.8645043704732367e-05, "loss": 1.0761, "step": 3359 }, { "epoch": 0.19, "grad_norm": 2.049424409866333, "learning_rate": 1.8644109873014637e-05, "loss": 1.0944, "step": 3360 }, { "epoch": 0.19, "grad_norm": 2.22489595413208, "learning_rate": 1.8643175743012822e-05, "loss": 1.068, "step": 3361 }, { "epoch": 0.19, "grad_norm": 1.8220051527023315, "learning_rate": 1.864224131475915e-05, "loss": 1.132, "step": 3362 }, { "epoch": 0.19, "grad_norm": 1.9313114881515503, "learning_rate": 1.8641306588285873e-05, "loss": 1.0718, "step": 3363 }, { "epoch": 0.19, "grad_norm": 1.8049237728118896, "learning_rate": 1.8640371563625246e-05, "loss": 1.0327, "step": 3364 }, { "epoch": 0.19, "grad_norm": 1.8267239332199097, "learning_rate": 1.863943624080953e-05, "loss": 1.0397, "step": 3365 }, { "epoch": 0.19, "grad_norm": 2.0097880363464355, "learning_rate": 1.8638500619871004e-05, "loss": 1.0868, "step": 3366 }, { "epoch": 0.19, "grad_norm": 1.853818655014038, "learning_rate": 1.8637564700841953e-05, "loss": 1.0749, "step": 3367 }, { "epoch": 0.19, "grad_norm": 1.8577972650527954, "learning_rate": 1.8636628483754672e-05, "loss": 1.0314, "step": 3368 }, { "epoch": 0.19, "grad_norm": 1.9461338520050049, "learning_rate": 1.8635691968641465e-05, "loss": 1.0079, "step": 3369 }, { "epoch": 0.19, "grad_norm": 1.8037561178207397, "learning_rate": 1.8634755155534653e-05, "loss": 1.0111, "step": 3370 }, { "epoch": 0.19, "grad_norm": 1.9555385112762451, "learning_rate": 1.863381804446656e-05, "loss": 1.038, "step": 3371 }, { "epoch": 0.19, "grad_norm": 2.1431832313537598, "learning_rate": 1.8632880635469526e-05, "loss": 1.1583, "step": 3372 }, { "epoch": 0.19, "grad_norm": 1.827445387840271, "learning_rate": 1.8631942928575896e-05, "loss": 1.0799, "step": 3373 }, { "epoch": 0.19, "grad_norm": 1.9537386894226074, "learning_rate": 1.8631004923818025e-05, "loss": 1.0936, "step": 3374 }, { "epoch": 0.19, "grad_norm": 1.8883380889892578, "learning_rate": 1.8630066621228285e-05, "loss": 1.0691, "step": 3375 }, { "epoch": 0.19, "grad_norm": 1.809682011604309, "learning_rate": 1.8629128020839054e-05, "loss": 1.0347, "step": 3376 }, { "epoch": 0.19, "grad_norm": 2.0518288612365723, "learning_rate": 1.8628189122682716e-05, "loss": 1.0668, "step": 3377 }, { "epoch": 0.19, "grad_norm": 1.9575802087783813, "learning_rate": 1.8627249926791678e-05, "loss": 1.0137, "step": 3378 }, { "epoch": 0.19, "grad_norm": 1.9513120651245117, "learning_rate": 1.8626310433198344e-05, "loss": 1.0154, "step": 3379 }, { "epoch": 0.19, "grad_norm": 1.9968680143356323, "learning_rate": 1.862537064193513e-05, "loss": 1.1619, "step": 3380 }, { "epoch": 0.19, "grad_norm": 1.7917680740356445, "learning_rate": 1.8624430553034472e-05, "loss": 1.0194, "step": 3381 }, { "epoch": 0.19, "grad_norm": 1.778450608253479, "learning_rate": 1.8623490166528807e-05, "loss": 1.0188, "step": 3382 }, { "epoch": 0.19, "grad_norm": 1.8233938217163086, "learning_rate": 1.8622549482450584e-05, "loss": 0.9837, "step": 3383 }, { "epoch": 0.19, "grad_norm": 1.8853827714920044, "learning_rate": 1.8621608500832265e-05, "loss": 1.0428, "step": 3384 }, { "epoch": 0.19, "grad_norm": 1.9374910593032837, "learning_rate": 1.862066722170632e-05, "loss": 1.0215, "step": 3385 }, { "epoch": 0.19, "grad_norm": 1.876924991607666, "learning_rate": 1.8619725645105228e-05, "loss": 0.9934, "step": 3386 }, { "epoch": 0.19, "grad_norm": 1.7990772724151611, "learning_rate": 1.8618783771061487e-05, "loss": 1.0593, "step": 3387 }, { "epoch": 0.19, "grad_norm": 2.000581979751587, "learning_rate": 1.8617841599607588e-05, "loss": 1.093, "step": 3388 }, { "epoch": 0.19, "grad_norm": 1.9209024906158447, "learning_rate": 1.861689913077605e-05, "loss": 1.0887, "step": 3389 }, { "epoch": 0.19, "grad_norm": 1.840140700340271, "learning_rate": 1.8615956364599395e-05, "loss": 0.9985, "step": 3390 }, { "epoch": 0.19, "grad_norm": 1.940855622291565, "learning_rate": 1.8615013301110153e-05, "loss": 1.08, "step": 3391 }, { "epoch": 0.19, "grad_norm": 1.937943696975708, "learning_rate": 1.8614069940340867e-05, "loss": 1.057, "step": 3392 }, { "epoch": 0.19, "grad_norm": 1.9030269384384155, "learning_rate": 1.8613126282324092e-05, "loss": 1.0093, "step": 3393 }, { "epoch": 0.19, "grad_norm": 2.1251370906829834, "learning_rate": 1.8612182327092386e-05, "loss": 1.0717, "step": 3394 }, { "epoch": 0.19, "grad_norm": 2.0540411472320557, "learning_rate": 1.8611238074678324e-05, "loss": 1.0871, "step": 3395 }, { "epoch": 0.19, "grad_norm": 1.980051875114441, "learning_rate": 1.8610293525114492e-05, "loss": 1.1254, "step": 3396 }, { "epoch": 0.19, "grad_norm": 1.9048339128494263, "learning_rate": 1.8609348678433485e-05, "loss": 1.0603, "step": 3397 }, { "epoch": 0.19, "grad_norm": 1.9517136812210083, "learning_rate": 1.8608403534667902e-05, "loss": 1.0888, "step": 3398 }, { "epoch": 0.19, "grad_norm": 1.987550139427185, "learning_rate": 1.860745809385036e-05, "loss": 1.0881, "step": 3399 }, { "epoch": 0.19, "grad_norm": 1.9593169689178467, "learning_rate": 1.8606512356013482e-05, "loss": 0.9967, "step": 3400 }, { "epoch": 0.2, "grad_norm": 1.9651020765304565, "learning_rate": 1.8605566321189907e-05, "loss": 1.0839, "step": 3401 }, { "epoch": 0.2, "grad_norm": 2.277466058731079, "learning_rate": 1.860461998941227e-05, "loss": 1.0311, "step": 3402 }, { "epoch": 0.2, "grad_norm": 1.9531781673431396, "learning_rate": 1.860367336071324e-05, "loss": 1.0481, "step": 3403 }, { "epoch": 0.2, "grad_norm": 2.2110350131988525, "learning_rate": 1.8602726435125472e-05, "loss": 1.0074, "step": 3404 }, { "epoch": 0.2, "grad_norm": 1.9735784530639648, "learning_rate": 1.8601779212681646e-05, "loss": 1.0323, "step": 3405 }, { "epoch": 0.2, "grad_norm": 2.0155084133148193, "learning_rate": 1.860083169341445e-05, "loss": 1.0625, "step": 3406 }, { "epoch": 0.2, "grad_norm": 1.830021858215332, "learning_rate": 1.8599883877356576e-05, "loss": 1.09, "step": 3407 }, { "epoch": 0.2, "grad_norm": 2.08437442779541, "learning_rate": 1.8598935764540732e-05, "loss": 1.0314, "step": 3408 }, { "epoch": 0.2, "grad_norm": 2.030372381210327, "learning_rate": 1.8597987354999635e-05, "loss": 1.0192, "step": 3409 }, { "epoch": 0.2, "grad_norm": 2.255364418029785, "learning_rate": 1.8597038648766013e-05, "loss": 1.0897, "step": 3410 }, { "epoch": 0.2, "grad_norm": 2.0484447479248047, "learning_rate": 1.85960896458726e-05, "loss": 1.0403, "step": 3411 }, { "epoch": 0.2, "grad_norm": 1.9664161205291748, "learning_rate": 1.859514034635215e-05, "loss": 1.1266, "step": 3412 }, { "epoch": 0.2, "grad_norm": 2.064326763153076, "learning_rate": 1.8594190750237412e-05, "loss": 1.0785, "step": 3413 }, { "epoch": 0.2, "grad_norm": 1.8610503673553467, "learning_rate": 1.859324085756116e-05, "loss": 1.0605, "step": 3414 }, { "epoch": 0.2, "grad_norm": 2.1601450443267822, "learning_rate": 1.859229066835617e-05, "loss": 1.0711, "step": 3415 }, { "epoch": 0.2, "grad_norm": 1.9858617782592773, "learning_rate": 1.8591340182655227e-05, "loss": 1.0829, "step": 3416 }, { "epoch": 0.2, "grad_norm": 1.9081016778945923, "learning_rate": 1.8590389400491137e-05, "loss": 0.9656, "step": 3417 }, { "epoch": 0.2, "grad_norm": 1.811676263809204, "learning_rate": 1.8589438321896707e-05, "loss": 1.0363, "step": 3418 }, { "epoch": 0.2, "grad_norm": 1.9044095277786255, "learning_rate": 1.858848694690475e-05, "loss": 0.9673, "step": 3419 }, { "epoch": 0.2, "grad_norm": 1.9564266204833984, "learning_rate": 1.8587535275548102e-05, "loss": 0.9847, "step": 3420 }, { "epoch": 0.2, "grad_norm": 1.8827168941497803, "learning_rate": 1.85865833078596e-05, "loss": 1.1169, "step": 3421 }, { "epoch": 0.2, "grad_norm": 2.0502302646636963, "learning_rate": 1.858563104387209e-05, "loss": 1.0226, "step": 3422 }, { "epoch": 0.2, "grad_norm": 1.9666664600372314, "learning_rate": 1.8584678483618437e-05, "loss": 1.0455, "step": 3423 }, { "epoch": 0.2, "grad_norm": 1.9862865209579468, "learning_rate": 1.858372562713151e-05, "loss": 1.0348, "step": 3424 }, { "epoch": 0.2, "grad_norm": 2.1505987644195557, "learning_rate": 1.8582772474444192e-05, "loss": 1.0554, "step": 3425 }, { "epoch": 0.2, "grad_norm": 1.7604612112045288, "learning_rate": 1.858181902558937e-05, "loss": 1.0212, "step": 3426 }, { "epoch": 0.2, "grad_norm": 1.2355408668518066, "learning_rate": 1.8580865280599943e-05, "loss": 0.591, "step": 3427 }, { "epoch": 0.2, "grad_norm": 1.901374101638794, "learning_rate": 1.8579911239508827e-05, "loss": 1.0276, "step": 3428 }, { "epoch": 0.2, "grad_norm": 2.189349412918091, "learning_rate": 1.8578956902348945e-05, "loss": 1.0036, "step": 3429 }, { "epoch": 0.2, "grad_norm": 2.777085065841675, "learning_rate": 1.857800226915322e-05, "loss": 1.0639, "step": 3430 }, { "epoch": 0.2, "grad_norm": 1.7158477306365967, "learning_rate": 1.8577047339954597e-05, "loss": 0.9864, "step": 3431 }, { "epoch": 0.2, "grad_norm": 1.9763731956481934, "learning_rate": 1.8576092114786034e-05, "loss": 1.0439, "step": 3432 }, { "epoch": 0.2, "grad_norm": 1.9182837009429932, "learning_rate": 1.8575136593680483e-05, "loss": 1.0567, "step": 3433 }, { "epoch": 0.2, "grad_norm": 2.181162118911743, "learning_rate": 1.8574180776670924e-05, "loss": 1.0551, "step": 3434 }, { "epoch": 0.2, "grad_norm": 1.9289743900299072, "learning_rate": 1.8573224663790338e-05, "loss": 1.0667, "step": 3435 }, { "epoch": 0.2, "grad_norm": 1.7823630571365356, "learning_rate": 1.8572268255071718e-05, "loss": 1.0212, "step": 3436 }, { "epoch": 0.2, "grad_norm": 1.8837275505065918, "learning_rate": 1.8571311550548064e-05, "loss": 1.0507, "step": 3437 }, { "epoch": 0.2, "grad_norm": 1.8886528015136719, "learning_rate": 1.8570354550252398e-05, "loss": 1.0988, "step": 3438 }, { "epoch": 0.2, "grad_norm": 1.7002193927764893, "learning_rate": 1.856939725421773e-05, "loss": 1.0583, "step": 3439 }, { "epoch": 0.2, "grad_norm": 1.813125491142273, "learning_rate": 1.85684396624771e-05, "loss": 0.9866, "step": 3440 }, { "epoch": 0.2, "grad_norm": 1.9589003324508667, "learning_rate": 1.8567481775063556e-05, "loss": 1.0748, "step": 3441 }, { "epoch": 0.2, "grad_norm": 1.978636622428894, "learning_rate": 1.8566523592010143e-05, "loss": 1.0436, "step": 3442 }, { "epoch": 0.2, "grad_norm": 2.0496766567230225, "learning_rate": 1.8565565113349934e-05, "loss": 1.0731, "step": 3443 }, { "epoch": 0.2, "grad_norm": 2.119873046875, "learning_rate": 1.8564606339116e-05, "loss": 1.0264, "step": 3444 }, { "epoch": 0.2, "grad_norm": 2.0026862621307373, "learning_rate": 1.856364726934143e-05, "loss": 1.1001, "step": 3445 }, { "epoch": 0.2, "grad_norm": 1.878288745880127, "learning_rate": 1.8562687904059307e-05, "loss": 1.081, "step": 3446 }, { "epoch": 0.2, "grad_norm": 1.7916675806045532, "learning_rate": 1.8561728243302745e-05, "loss": 1.0133, "step": 3447 }, { "epoch": 0.2, "grad_norm": 1.8412566184997559, "learning_rate": 1.856076828710486e-05, "loss": 1.1449, "step": 3448 }, { "epoch": 0.2, "grad_norm": 2.0950841903686523, "learning_rate": 1.855980803549877e-05, "loss": 1.078, "step": 3449 }, { "epoch": 0.2, "grad_norm": 1.8982582092285156, "learning_rate": 1.855884748851762e-05, "loss": 0.9412, "step": 3450 }, { "epoch": 0.2, "grad_norm": 1.7044872045516968, "learning_rate": 1.8557886646194553e-05, "loss": 1.0361, "step": 3451 }, { "epoch": 0.2, "grad_norm": 1.9325010776519775, "learning_rate": 1.855692550856272e-05, "loss": 1.1081, "step": 3452 }, { "epoch": 0.2, "grad_norm": 2.0346431732177734, "learning_rate": 1.855596407565529e-05, "loss": 1.094, "step": 3453 }, { "epoch": 0.2, "grad_norm": 2.117912530899048, "learning_rate": 1.855500234750544e-05, "loss": 1.0912, "step": 3454 }, { "epoch": 0.2, "grad_norm": 1.9300957918167114, "learning_rate": 1.855404032414636e-05, "loss": 1.021, "step": 3455 }, { "epoch": 0.2, "grad_norm": 1.9170085191726685, "learning_rate": 1.855307800561124e-05, "loss": 1.0564, "step": 3456 }, { "epoch": 0.2, "grad_norm": 1.8575444221496582, "learning_rate": 1.855211539193329e-05, "loss": 1.0836, "step": 3457 }, { "epoch": 0.2, "grad_norm": 1.9159964323043823, "learning_rate": 1.8551152483145728e-05, "loss": 1.0474, "step": 3458 }, { "epoch": 0.2, "grad_norm": 1.6806856393814087, "learning_rate": 1.855018927928178e-05, "loss": 0.9806, "step": 3459 }, { "epoch": 0.2, "grad_norm": 2.2402334213256836, "learning_rate": 1.8549225780374683e-05, "loss": 1.1379, "step": 3460 }, { "epoch": 0.2, "grad_norm": 1.904198169708252, "learning_rate": 1.854826198645769e-05, "loss": 1.0081, "step": 3461 }, { "epoch": 0.2, "grad_norm": 1.7969858646392822, "learning_rate": 1.8547297897564053e-05, "loss": 1.0711, "step": 3462 }, { "epoch": 0.2, "grad_norm": 1.9743412733078003, "learning_rate": 1.8546333513727037e-05, "loss": 1.0907, "step": 3463 }, { "epoch": 0.2, "grad_norm": 2.043072462081909, "learning_rate": 1.8545368834979925e-05, "loss": 1.0075, "step": 3464 }, { "epoch": 0.2, "grad_norm": 1.9212602376937866, "learning_rate": 1.854440386135601e-05, "loss": 1.0117, "step": 3465 }, { "epoch": 0.2, "grad_norm": 1.7230650186538696, "learning_rate": 1.8543438592888585e-05, "loss": 1.0933, "step": 3466 }, { "epoch": 0.2, "grad_norm": 1.9055861234664917, "learning_rate": 1.8542473029610954e-05, "loss": 1.0289, "step": 3467 }, { "epoch": 0.2, "grad_norm": 1.9236326217651367, "learning_rate": 1.8541507171556445e-05, "loss": 1.0834, "step": 3468 }, { "epoch": 0.2, "grad_norm": 1.863621473312378, "learning_rate": 1.8540541018758383e-05, "loss": 1.0588, "step": 3469 }, { "epoch": 0.2, "grad_norm": 1.8660657405853271, "learning_rate": 1.853957457125011e-05, "loss": 1.0279, "step": 3470 }, { "epoch": 0.2, "grad_norm": 1.7194898128509521, "learning_rate": 1.853860782906497e-05, "loss": 1.0292, "step": 3471 }, { "epoch": 0.2, "grad_norm": 2.0675582885742188, "learning_rate": 1.8537640792236326e-05, "loss": 1.056, "step": 3472 }, { "epoch": 0.2, "grad_norm": 1.9705510139465332, "learning_rate": 1.8536673460797546e-05, "loss": 1.0987, "step": 3473 }, { "epoch": 0.2, "grad_norm": 1.6624950170516968, "learning_rate": 1.853570583478201e-05, "loss": 0.9893, "step": 3474 }, { "epoch": 0.2, "grad_norm": 2.1362228393554688, "learning_rate": 1.8534737914223113e-05, "loss": 1.1606, "step": 3475 }, { "epoch": 0.2, "grad_norm": 2.08270001411438, "learning_rate": 1.853376969915425e-05, "loss": 1.0718, "step": 3476 }, { "epoch": 0.2, "grad_norm": 1.8066281080245972, "learning_rate": 1.8532801189608833e-05, "loss": 1.14, "step": 3477 }, { "epoch": 0.2, "grad_norm": 1.8894119262695312, "learning_rate": 1.8531832385620282e-05, "loss": 1.1019, "step": 3478 }, { "epoch": 0.2, "grad_norm": 2.0659496784210205, "learning_rate": 1.8530863287222026e-05, "loss": 1.0491, "step": 3479 }, { "epoch": 0.2, "grad_norm": 1.9510080814361572, "learning_rate": 1.852989389444751e-05, "loss": 1.1009, "step": 3480 }, { "epoch": 0.2, "grad_norm": 1.8240132331848145, "learning_rate": 1.8528924207330183e-05, "loss": 1.0875, "step": 3481 }, { "epoch": 0.2, "grad_norm": 1.8344157934188843, "learning_rate": 1.8527954225903508e-05, "loss": 1.0414, "step": 3482 }, { "epoch": 0.2, "grad_norm": 1.9397079944610596, "learning_rate": 1.852698395020095e-05, "loss": 1.0563, "step": 3483 }, { "epoch": 0.2, "grad_norm": 2.0839195251464844, "learning_rate": 1.8526013380255998e-05, "loss": 0.979, "step": 3484 }, { "epoch": 0.2, "grad_norm": 1.7874321937561035, "learning_rate": 1.852504251610214e-05, "loss": 1.1049, "step": 3485 }, { "epoch": 0.2, "grad_norm": 2.026287078857422, "learning_rate": 1.852407135777288e-05, "loss": 1.1652, "step": 3486 }, { "epoch": 0.2, "grad_norm": 1.9050946235656738, "learning_rate": 1.8523099905301726e-05, "loss": 1.0989, "step": 3487 }, { "epoch": 0.2, "grad_norm": 1.9575291872024536, "learning_rate": 1.8522128158722204e-05, "loss": 0.9963, "step": 3488 }, { "epoch": 0.2, "grad_norm": 1.9820384979248047, "learning_rate": 1.8521156118067846e-05, "loss": 1.095, "step": 3489 }, { "epoch": 0.2, "grad_norm": 1.7276792526245117, "learning_rate": 1.8520183783372193e-05, "loss": 1.0635, "step": 3490 }, { "epoch": 0.2, "grad_norm": 1.7215031385421753, "learning_rate": 1.8519211154668796e-05, "loss": 0.9974, "step": 3491 }, { "epoch": 0.2, "grad_norm": 1.8326390981674194, "learning_rate": 1.851823823199122e-05, "loss": 1.0732, "step": 3492 }, { "epoch": 0.2, "grad_norm": 1.2233939170837402, "learning_rate": 1.8517265015373035e-05, "loss": 0.681, "step": 3493 }, { "epoch": 0.2, "grad_norm": 1.9728939533233643, "learning_rate": 1.851629150484783e-05, "loss": 1.0716, "step": 3494 }, { "epoch": 0.2, "grad_norm": 2.0011157989501953, "learning_rate": 1.8515317700449195e-05, "loss": 1.043, "step": 3495 }, { "epoch": 0.2, "grad_norm": 1.7853963375091553, "learning_rate": 1.851434360221073e-05, "loss": 1.0615, "step": 3496 }, { "epoch": 0.2, "grad_norm": 1.888091802597046, "learning_rate": 1.8513369210166052e-05, "loss": 1.0872, "step": 3497 }, { "epoch": 0.2, "grad_norm": 2.10886287689209, "learning_rate": 1.8512394524348786e-05, "loss": 0.9939, "step": 3498 }, { "epoch": 0.2, "grad_norm": 2.1124346256256104, "learning_rate": 1.851141954479256e-05, "loss": 1.1193, "step": 3499 }, { "epoch": 0.2, "grad_norm": 1.958008050918579, "learning_rate": 1.8510444271531023e-05, "loss": 1.138, "step": 3500 }, { "epoch": 0.2, "grad_norm": 1.8277994394302368, "learning_rate": 1.8509468704597827e-05, "loss": 1.0539, "step": 3501 }, { "epoch": 0.2, "grad_norm": 1.9879865646362305, "learning_rate": 1.8508492844026636e-05, "loss": 1.075, "step": 3502 }, { "epoch": 0.2, "grad_norm": 1.9964429140090942, "learning_rate": 1.8507516689851126e-05, "loss": 1.035, "step": 3503 }, { "epoch": 0.2, "grad_norm": 1.8694556951522827, "learning_rate": 1.8506540242104977e-05, "loss": 1.1122, "step": 3504 }, { "epoch": 0.2, "grad_norm": 1.8541712760925293, "learning_rate": 1.8505563500821888e-05, "loss": 1.087, "step": 3505 }, { "epoch": 0.2, "grad_norm": 1.8021937608718872, "learning_rate": 1.850458646603556e-05, "loss": 1.0213, "step": 3506 }, { "epoch": 0.2, "grad_norm": 1.835868000984192, "learning_rate": 1.8503609137779712e-05, "loss": 1.0971, "step": 3507 }, { "epoch": 0.2, "grad_norm": 1.9602806568145752, "learning_rate": 1.8502631516088067e-05, "loss": 1.0101, "step": 3508 }, { "epoch": 0.2, "grad_norm": 1.9882681369781494, "learning_rate": 1.850165360099436e-05, "loss": 0.9701, "step": 3509 }, { "epoch": 0.2, "grad_norm": 2.0302655696868896, "learning_rate": 1.850067539253233e-05, "loss": 1.0132, "step": 3510 }, { "epoch": 0.2, "grad_norm": 2.0800695419311523, "learning_rate": 1.8499696890735742e-05, "loss": 1.0684, "step": 3511 }, { "epoch": 0.2, "grad_norm": 1.922810673713684, "learning_rate": 1.8498718095638357e-05, "loss": 0.9766, "step": 3512 }, { "epoch": 0.2, "grad_norm": 1.787361979484558, "learning_rate": 1.849773900727395e-05, "loss": 1.0926, "step": 3513 }, { "epoch": 0.2, "grad_norm": 1.770956039428711, "learning_rate": 1.849675962567631e-05, "loss": 1.0391, "step": 3514 }, { "epoch": 0.2, "grad_norm": 1.9045885801315308, "learning_rate": 1.8495779950879226e-05, "loss": 1.0787, "step": 3515 }, { "epoch": 0.2, "grad_norm": 1.2738239765167236, "learning_rate": 1.8494799982916512e-05, "loss": 0.6159, "step": 3516 }, { "epoch": 0.2, "grad_norm": 2.0047459602355957, "learning_rate": 1.8493819721821977e-05, "loss": 1.0209, "step": 3517 }, { "epoch": 0.2, "grad_norm": 1.923448920249939, "learning_rate": 1.849283916762945e-05, "loss": 0.9357, "step": 3518 }, { "epoch": 0.2, "grad_norm": 2.0513598918914795, "learning_rate": 1.849185832037277e-05, "loss": 1.1697, "step": 3519 }, { "epoch": 0.2, "grad_norm": 1.8110374212265015, "learning_rate": 1.8490877180085774e-05, "loss": 1.1264, "step": 3520 }, { "epoch": 0.2, "grad_norm": 1.9033101797103882, "learning_rate": 1.8489895746802333e-05, "loss": 1.0743, "step": 3521 }, { "epoch": 0.2, "grad_norm": 1.8522331714630127, "learning_rate": 1.84889140205563e-05, "loss": 1.0904, "step": 3522 }, { "epoch": 0.2, "grad_norm": 1.831287145614624, "learning_rate": 1.848793200138156e-05, "loss": 1.074, "step": 3523 }, { "epoch": 0.2, "grad_norm": 2.103928565979004, "learning_rate": 1.8486949689311995e-05, "loss": 1.0133, "step": 3524 }, { "epoch": 0.2, "grad_norm": 2.107046604156494, "learning_rate": 1.8485967084381502e-05, "loss": 1.1111, "step": 3525 }, { "epoch": 0.2, "grad_norm": 1.8897216320037842, "learning_rate": 1.8484984186623994e-05, "loss": 1.0076, "step": 3526 }, { "epoch": 0.2, "grad_norm": 2.059037208557129, "learning_rate": 1.848400099607338e-05, "loss": 1.0314, "step": 3527 }, { "epoch": 0.2, "grad_norm": 2.891284227371216, "learning_rate": 1.848301751276359e-05, "loss": 0.9937, "step": 3528 }, { "epoch": 0.2, "grad_norm": 1.828878402709961, "learning_rate": 1.8482033736728563e-05, "loss": 1.0455, "step": 3529 }, { "epoch": 0.2, "grad_norm": 1.8921170234680176, "learning_rate": 1.8481049668002246e-05, "loss": 1.0579, "step": 3530 }, { "epoch": 0.2, "grad_norm": 1.9101365804672241, "learning_rate": 1.84800653066186e-05, "loss": 0.9849, "step": 3531 }, { "epoch": 0.2, "grad_norm": 2.079787015914917, "learning_rate": 1.8479080652611584e-05, "loss": 1.0394, "step": 3532 }, { "epoch": 0.2, "grad_norm": 2.0759100914001465, "learning_rate": 1.8478095706015177e-05, "loss": 1.0842, "step": 3533 }, { "epoch": 0.2, "grad_norm": 2.086484909057617, "learning_rate": 1.847711046686337e-05, "loss": 1.0996, "step": 3534 }, { "epoch": 0.2, "grad_norm": 1.9296739101409912, "learning_rate": 1.8476124935190168e-05, "loss": 1.0444, "step": 3535 }, { "epoch": 0.2, "grad_norm": 2.2011492252349854, "learning_rate": 1.8475139111029565e-05, "loss": 1.0634, "step": 3536 }, { "epoch": 0.2, "grad_norm": 1.2124176025390625, "learning_rate": 1.8474152994415588e-05, "loss": 0.5315, "step": 3537 }, { "epoch": 0.2, "grad_norm": 1.7310326099395752, "learning_rate": 1.8473166585382266e-05, "loss": 0.9949, "step": 3538 }, { "epoch": 0.2, "grad_norm": 1.79548180103302, "learning_rate": 1.847217988396363e-05, "loss": 1.0466, "step": 3539 }, { "epoch": 0.2, "grad_norm": 2.035682439804077, "learning_rate": 1.847119289019373e-05, "loss": 1.0682, "step": 3540 }, { "epoch": 0.2, "grad_norm": 1.9045240879058838, "learning_rate": 1.847020560410663e-05, "loss": 1.0211, "step": 3541 }, { "epoch": 0.2, "grad_norm": 1.8681426048278809, "learning_rate": 1.8469218025736393e-05, "loss": 1.0566, "step": 3542 }, { "epoch": 0.2, "grad_norm": 1.7367184162139893, "learning_rate": 1.8468230155117106e-05, "loss": 1.0097, "step": 3543 }, { "epoch": 0.2, "grad_norm": 1.842137336730957, "learning_rate": 1.8467241992282842e-05, "loss": 1.0695, "step": 3544 }, { "epoch": 0.2, "grad_norm": 2.0482707023620605, "learning_rate": 1.8466253537267714e-05, "loss": 1.0629, "step": 3545 }, { "epoch": 0.2, "grad_norm": 1.8125511407852173, "learning_rate": 1.8465264790105827e-05, "loss": 1.0347, "step": 3546 }, { "epoch": 0.2, "grad_norm": 1.749755620956421, "learning_rate": 1.84642757508313e-05, "loss": 1.0436, "step": 3547 }, { "epoch": 0.2, "grad_norm": 1.8040045499801636, "learning_rate": 1.8463286419478256e-05, "loss": 1.0531, "step": 3548 }, { "epoch": 0.2, "grad_norm": 1.7575068473815918, "learning_rate": 1.8462296796080843e-05, "loss": 0.9501, "step": 3549 }, { "epoch": 0.2, "grad_norm": 1.8651032447814941, "learning_rate": 1.84613068806732e-05, "loss": 1.0134, "step": 3550 }, { "epoch": 0.2, "grad_norm": 1.925028920173645, "learning_rate": 1.84603166732895e-05, "loss": 1.1533, "step": 3551 }, { "epoch": 0.2, "grad_norm": 1.9582946300506592, "learning_rate": 1.84593261739639e-05, "loss": 1.0809, "step": 3552 }, { "epoch": 0.2, "grad_norm": 1.9587719440460205, "learning_rate": 1.8458335382730585e-05, "loss": 1.0741, "step": 3553 }, { "epoch": 0.2, "grad_norm": 1.8932819366455078, "learning_rate": 1.8457344299623747e-05, "loss": 1.0782, "step": 3554 }, { "epoch": 0.2, "grad_norm": 1.6897850036621094, "learning_rate": 1.8456352924677575e-05, "loss": 1.0466, "step": 3555 }, { "epoch": 0.2, "grad_norm": 1.9577382802963257, "learning_rate": 1.845536125792629e-05, "loss": 1.1555, "step": 3556 }, { "epoch": 0.2, "grad_norm": 1.9770492315292358, "learning_rate": 1.8454369299404106e-05, "loss": 1.0394, "step": 3557 }, { "epoch": 0.2, "grad_norm": 2.0685949325561523, "learning_rate": 1.8453377049145254e-05, "loss": 1.1444, "step": 3558 }, { "epoch": 0.2, "grad_norm": 1.9234570264816284, "learning_rate": 1.845238450718397e-05, "loss": 1.0338, "step": 3559 }, { "epoch": 0.2, "grad_norm": 1.9207348823547363, "learning_rate": 1.8451391673554514e-05, "loss": 1.0503, "step": 3560 }, { "epoch": 0.2, "grad_norm": 2.0540645122528076, "learning_rate": 1.8450398548291135e-05, "loss": 1.0758, "step": 3561 }, { "epoch": 0.2, "grad_norm": 1.8427129983901978, "learning_rate": 1.844940513142811e-05, "loss": 1.0298, "step": 3562 }, { "epoch": 0.2, "grad_norm": 1.7799781560897827, "learning_rate": 1.8448411422999714e-05, "loss": 1.0992, "step": 3563 }, { "epoch": 0.2, "grad_norm": 2.090266227722168, "learning_rate": 1.844741742304024e-05, "loss": 1.0868, "step": 3564 }, { "epoch": 0.2, "grad_norm": 1.8227992057800293, "learning_rate": 1.844642313158399e-05, "loss": 0.9584, "step": 3565 }, { "epoch": 0.2, "grad_norm": 1.9015003442764282, "learning_rate": 1.8445428548665268e-05, "loss": 1.0416, "step": 3566 }, { "epoch": 0.2, "grad_norm": 2.138782501220703, "learning_rate": 1.84444336743184e-05, "loss": 1.0707, "step": 3567 }, { "epoch": 0.2, "grad_norm": 1.1841679811477661, "learning_rate": 1.8443438508577712e-05, "loss": 0.6803, "step": 3568 }, { "epoch": 0.2, "grad_norm": 1.939046025276184, "learning_rate": 1.844244305147755e-05, "loss": 1.0225, "step": 3569 }, { "epoch": 0.2, "grad_norm": 1.09078848361969, "learning_rate": 1.8441447303052262e-05, "loss": 0.6489, "step": 3570 }, { "epoch": 0.2, "grad_norm": 2.1279072761535645, "learning_rate": 1.8440451263336204e-05, "loss": 0.9861, "step": 3571 }, { "epoch": 0.2, "grad_norm": 2.095539093017578, "learning_rate": 1.8439454932363757e-05, "loss": 0.9878, "step": 3572 }, { "epoch": 0.2, "grad_norm": 2.234347105026245, "learning_rate": 1.8438458310169287e-05, "loss": 1.0193, "step": 3573 }, { "epoch": 0.2, "grad_norm": 2.1745190620422363, "learning_rate": 1.8437461396787198e-05, "loss": 1.0471, "step": 3574 }, { "epoch": 0.21, "grad_norm": 2.1308064460754395, "learning_rate": 1.843646419225188e-05, "loss": 1.1014, "step": 3575 }, { "epoch": 0.21, "grad_norm": 1.7232040166854858, "learning_rate": 1.8435466696597758e-05, "loss": 1.0707, "step": 3576 }, { "epoch": 0.21, "grad_norm": 1.9038829803466797, "learning_rate": 1.8434468909859235e-05, "loss": 1.0573, "step": 3577 }, { "epoch": 0.21, "grad_norm": 1.886281132698059, "learning_rate": 1.8433470832070758e-05, "loss": 1.0526, "step": 3578 }, { "epoch": 0.21, "grad_norm": 2.254626512527466, "learning_rate": 1.8432472463266754e-05, "loss": 1.0314, "step": 3579 }, { "epoch": 0.21, "grad_norm": 2.07153058052063, "learning_rate": 1.8431473803481682e-05, "loss": 1.0698, "step": 3580 }, { "epoch": 0.21, "grad_norm": 1.941596269607544, "learning_rate": 1.843047485275e-05, "loss": 1.03, "step": 3581 }, { "epoch": 0.21, "grad_norm": 1.8435593843460083, "learning_rate": 1.8429475611106186e-05, "loss": 1.1292, "step": 3582 }, { "epoch": 0.21, "grad_norm": 1.9185932874679565, "learning_rate": 1.842847607858471e-05, "loss": 1.0221, "step": 3583 }, { "epoch": 0.21, "grad_norm": 1.8758773803710938, "learning_rate": 1.8427476255220074e-05, "loss": 1.076, "step": 3584 }, { "epoch": 0.21, "grad_norm": 1.9200859069824219, "learning_rate": 1.842647614104677e-05, "loss": 1.1302, "step": 3585 }, { "epoch": 0.21, "grad_norm": 1.8744639158248901, "learning_rate": 1.8425475736099316e-05, "loss": 1.0557, "step": 3586 }, { "epoch": 0.21, "grad_norm": 2.028812885284424, "learning_rate": 1.8424475040412224e-05, "loss": 1.1477, "step": 3587 }, { "epoch": 0.21, "grad_norm": 1.9398387670516968, "learning_rate": 1.8423474054020034e-05, "loss": 0.9862, "step": 3588 }, { "epoch": 0.21, "grad_norm": 1.8377379179000854, "learning_rate": 1.8422472776957287e-05, "loss": 1.046, "step": 3589 }, { "epoch": 0.21, "grad_norm": 1.956353783607483, "learning_rate": 1.8421471209258528e-05, "loss": 1.0648, "step": 3590 }, { "epoch": 0.21, "grad_norm": 1.8554823398590088, "learning_rate": 1.8420469350958323e-05, "loss": 1.0575, "step": 3591 }, { "epoch": 0.21, "grad_norm": 2.1042957305908203, "learning_rate": 1.8419467202091245e-05, "loss": 1.0791, "step": 3592 }, { "epoch": 0.21, "grad_norm": 1.987033486366272, "learning_rate": 1.841846476269187e-05, "loss": 1.0193, "step": 3593 }, { "epoch": 0.21, "grad_norm": 1.8211930990219116, "learning_rate": 1.8417462032794792e-05, "loss": 1.053, "step": 3594 }, { "epoch": 0.21, "grad_norm": 3.3913238048553467, "learning_rate": 1.8416459012434613e-05, "loss": 1.0419, "step": 3595 }, { "epoch": 0.21, "grad_norm": 1.7238380908966064, "learning_rate": 1.8415455701645942e-05, "loss": 1.0998, "step": 3596 }, { "epoch": 0.21, "grad_norm": 1.732304334640503, "learning_rate": 1.8414452100463407e-05, "loss": 1.0255, "step": 3597 }, { "epoch": 0.21, "grad_norm": 1.8758773803710938, "learning_rate": 1.8413448208921632e-05, "loss": 0.9784, "step": 3598 }, { "epoch": 0.21, "grad_norm": 1.9735000133514404, "learning_rate": 1.841244402705526e-05, "loss": 1.0186, "step": 3599 }, { "epoch": 0.21, "grad_norm": 1.9037452936172485, "learning_rate": 1.8411439554898946e-05, "loss": 1.0458, "step": 3600 }, { "epoch": 0.21, "grad_norm": 1.8801592588424683, "learning_rate": 1.841043479248735e-05, "loss": 1.0962, "step": 3601 }, { "epoch": 0.21, "grad_norm": 1.825833797454834, "learning_rate": 1.8409429739855144e-05, "loss": 1.1485, "step": 3602 }, { "epoch": 0.21, "grad_norm": 1.905579924583435, "learning_rate": 1.8408424397037004e-05, "loss": 0.9764, "step": 3603 }, { "epoch": 0.21, "grad_norm": 2.1809136867523193, "learning_rate": 1.8407418764067627e-05, "loss": 1.1141, "step": 3604 }, { "epoch": 0.21, "grad_norm": 2.0089707374572754, "learning_rate": 1.8406412840981717e-05, "loss": 1.0615, "step": 3605 }, { "epoch": 0.21, "grad_norm": 1.8194379806518555, "learning_rate": 1.8405406627813978e-05, "loss": 1.0775, "step": 3606 }, { "epoch": 0.21, "grad_norm": 1.9196796417236328, "learning_rate": 1.840440012459914e-05, "loss": 1.0786, "step": 3607 }, { "epoch": 0.21, "grad_norm": 1.8719857931137085, "learning_rate": 1.8403393331371925e-05, "loss": 1.1196, "step": 3608 }, { "epoch": 0.21, "grad_norm": 1.9615795612335205, "learning_rate": 1.8402386248167084e-05, "loss": 0.9984, "step": 3609 }, { "epoch": 0.21, "grad_norm": 1.973728060722351, "learning_rate": 1.8401378875019366e-05, "loss": 1.0674, "step": 3610 }, { "epoch": 0.21, "grad_norm": 1.8056793212890625, "learning_rate": 1.840037121196353e-05, "loss": 0.9996, "step": 3611 }, { "epoch": 0.21, "grad_norm": 1.862863540649414, "learning_rate": 1.8399363259034345e-05, "loss": 1.0211, "step": 3612 }, { "epoch": 0.21, "grad_norm": 1.9475704431533813, "learning_rate": 1.8398355016266604e-05, "loss": 1.041, "step": 3613 }, { "epoch": 0.21, "grad_norm": 1.9655884504318237, "learning_rate": 1.8397346483695085e-05, "loss": 1.0674, "step": 3614 }, { "epoch": 0.21, "grad_norm": 1.957646131515503, "learning_rate": 1.8396337661354597e-05, "loss": 1.1249, "step": 3615 }, { "epoch": 0.21, "grad_norm": 2.0246176719665527, "learning_rate": 1.8395328549279955e-05, "loss": 1.0956, "step": 3616 }, { "epoch": 0.21, "grad_norm": 1.9781476259231567, "learning_rate": 1.839431914750597e-05, "loss": 1.0352, "step": 3617 }, { "epoch": 0.21, "grad_norm": 2.0354580879211426, "learning_rate": 1.8393309456067482e-05, "loss": 0.9871, "step": 3618 }, { "epoch": 0.21, "grad_norm": 1.9261014461517334, "learning_rate": 1.8392299474999333e-05, "loss": 1.0737, "step": 3619 }, { "epoch": 0.21, "grad_norm": 1.8048653602600098, "learning_rate": 1.839128920433637e-05, "loss": 1.1286, "step": 3620 }, { "epoch": 0.21, "grad_norm": 1.799453854560852, "learning_rate": 1.8390278644113454e-05, "loss": 1.1792, "step": 3621 }, { "epoch": 0.21, "grad_norm": 1.790726900100708, "learning_rate": 1.8389267794365465e-05, "loss": 1.0064, "step": 3622 }, { "epoch": 0.21, "grad_norm": 2.0814177989959717, "learning_rate": 1.8388256655127273e-05, "loss": 1.0578, "step": 3623 }, { "epoch": 0.21, "grad_norm": 1.81192147731781, "learning_rate": 1.838724522643378e-05, "loss": 1.0251, "step": 3624 }, { "epoch": 0.21, "grad_norm": 1.9143368005752563, "learning_rate": 1.838623350831988e-05, "loss": 1.1239, "step": 3625 }, { "epoch": 0.21, "grad_norm": 1.8702278137207031, "learning_rate": 1.838522150082049e-05, "loss": 1.1061, "step": 3626 }, { "epoch": 0.21, "grad_norm": 1.6882659196853638, "learning_rate": 1.838420920397053e-05, "loss": 0.9589, "step": 3627 }, { "epoch": 0.21, "grad_norm": 1.862864375114441, "learning_rate": 1.838319661780493e-05, "loss": 1.0136, "step": 3628 }, { "epoch": 0.21, "grad_norm": 1.1542258262634277, "learning_rate": 1.8382183742358627e-05, "loss": 0.7157, "step": 3629 }, { "epoch": 0.21, "grad_norm": 1.9286577701568604, "learning_rate": 1.8381170577666584e-05, "loss": 1.0604, "step": 3630 }, { "epoch": 0.21, "grad_norm": 2.041041135787964, "learning_rate": 1.8380157123763755e-05, "loss": 1.0544, "step": 3631 }, { "epoch": 0.21, "grad_norm": 1.990922451019287, "learning_rate": 1.837914338068511e-05, "loss": 1.0884, "step": 3632 }, { "epoch": 0.21, "grad_norm": 1.0906779766082764, "learning_rate": 1.8378129348465636e-05, "loss": 0.6396, "step": 3633 }, { "epoch": 0.21, "grad_norm": 2.1289966106414795, "learning_rate": 1.837711502714032e-05, "loss": 0.9812, "step": 3634 }, { "epoch": 0.21, "grad_norm": 1.916698694229126, "learning_rate": 1.8376100416744166e-05, "loss": 1.1153, "step": 3635 }, { "epoch": 0.21, "grad_norm": 2.053574800491333, "learning_rate": 1.8375085517312185e-05, "loss": 1.0624, "step": 3636 }, { "epoch": 0.21, "grad_norm": 1.9801465272903442, "learning_rate": 1.8374070328879395e-05, "loss": 1.0463, "step": 3637 }, { "epoch": 0.21, "grad_norm": 2.109030246734619, "learning_rate": 1.8373054851480832e-05, "loss": 1.1164, "step": 3638 }, { "epoch": 0.21, "grad_norm": 1.9094069004058838, "learning_rate": 1.8372039085151537e-05, "loss": 1.1147, "step": 3639 }, { "epoch": 0.21, "grad_norm": 1.9854366779327393, "learning_rate": 1.8371023029926552e-05, "loss": 1.0585, "step": 3640 }, { "epoch": 0.21, "grad_norm": 1.7335041761398315, "learning_rate": 1.8370006685840953e-05, "loss": 0.9966, "step": 3641 }, { "epoch": 0.21, "grad_norm": 2.0833959579467773, "learning_rate": 1.8368990052929804e-05, "loss": 1.1091, "step": 3642 }, { "epoch": 0.21, "grad_norm": 1.952837347984314, "learning_rate": 1.8367973131228182e-05, "loss": 1.0576, "step": 3643 }, { "epoch": 0.21, "grad_norm": 1.7813069820404053, "learning_rate": 1.8366955920771183e-05, "loss": 1.0638, "step": 3644 }, { "epoch": 0.21, "grad_norm": 2.0346055030822754, "learning_rate": 1.836593842159391e-05, "loss": 0.9939, "step": 3645 }, { "epoch": 0.21, "grad_norm": 1.8170734643936157, "learning_rate": 1.836492063373147e-05, "loss": 0.9769, "step": 3646 }, { "epoch": 0.21, "grad_norm": 1.9153265953063965, "learning_rate": 1.8363902557218985e-05, "loss": 1.1302, "step": 3647 }, { "epoch": 0.21, "grad_norm": 1.9709967374801636, "learning_rate": 1.8362884192091588e-05, "loss": 1.1009, "step": 3648 }, { "epoch": 0.21, "grad_norm": 1.7561345100402832, "learning_rate": 1.8361865538384416e-05, "loss": 1.0893, "step": 3649 }, { "epoch": 0.21, "grad_norm": 1.8429702520370483, "learning_rate": 1.8360846596132625e-05, "loss": 1.0619, "step": 3650 }, { "epoch": 0.21, "grad_norm": 1.8159621953964233, "learning_rate": 1.835982736537137e-05, "loss": 1.0749, "step": 3651 }, { "epoch": 0.21, "grad_norm": 2.2080602645874023, "learning_rate": 1.8358807846135828e-05, "loss": 1.0278, "step": 3652 }, { "epoch": 0.21, "grad_norm": 2.0812442302703857, "learning_rate": 1.8357788038461174e-05, "loss": 1.1144, "step": 3653 }, { "epoch": 0.21, "grad_norm": 2.031829595565796, "learning_rate": 1.83567679423826e-05, "loss": 1.0162, "step": 3654 }, { "epoch": 0.21, "grad_norm": 1.7498936653137207, "learning_rate": 1.835574755793531e-05, "loss": 1.0801, "step": 3655 }, { "epoch": 0.21, "grad_norm": 1.816951036453247, "learning_rate": 1.8354726885154512e-05, "loss": 0.9975, "step": 3656 }, { "epoch": 0.21, "grad_norm": 1.783962368965149, "learning_rate": 1.835370592407543e-05, "loss": 1.0146, "step": 3657 }, { "epoch": 0.21, "grad_norm": 2.015430212020874, "learning_rate": 1.8352684674733287e-05, "loss": 1.0431, "step": 3658 }, { "epoch": 0.21, "grad_norm": 1.9650129079818726, "learning_rate": 1.8351663137163333e-05, "loss": 1.0084, "step": 3659 }, { "epoch": 0.21, "grad_norm": 1.8860795497894287, "learning_rate": 1.8350641311400813e-05, "loss": 1.0484, "step": 3660 }, { "epoch": 0.21, "grad_norm": 1.6969646215438843, "learning_rate": 1.8349619197480985e-05, "loss": 0.9705, "step": 3661 }, { "epoch": 0.21, "grad_norm": 1.6013673543930054, "learning_rate": 1.834859679543912e-05, "loss": 1.0254, "step": 3662 }, { "epoch": 0.21, "grad_norm": 1.8783289194107056, "learning_rate": 1.8347574105310508e-05, "loss": 1.134, "step": 3663 }, { "epoch": 0.21, "grad_norm": 1.8577848672866821, "learning_rate": 1.8346551127130424e-05, "loss": 1.013, "step": 3664 }, { "epoch": 0.21, "grad_norm": 1.7345956563949585, "learning_rate": 1.834552786093418e-05, "loss": 0.9427, "step": 3665 }, { "epoch": 0.21, "grad_norm": 1.9436713457107544, "learning_rate": 1.834450430675708e-05, "loss": 1.0893, "step": 3666 }, { "epoch": 0.21, "grad_norm": 1.9044986963272095, "learning_rate": 1.8343480464634448e-05, "loss": 1.0126, "step": 3667 }, { "epoch": 0.21, "grad_norm": 1.9315669536590576, "learning_rate": 1.834245633460161e-05, "loss": 1.0972, "step": 3668 }, { "epoch": 0.21, "grad_norm": 1.8200615644454956, "learning_rate": 1.8341431916693908e-05, "loss": 0.9836, "step": 3669 }, { "epoch": 0.21, "grad_norm": 2.425615072250366, "learning_rate": 1.8340407210946695e-05, "loss": 1.0994, "step": 3670 }, { "epoch": 0.21, "grad_norm": 1.1943169832229614, "learning_rate": 1.833938221739532e-05, "loss": 0.5752, "step": 3671 }, { "epoch": 0.21, "grad_norm": 1.7126250267028809, "learning_rate": 1.8338356936075165e-05, "loss": 0.9713, "step": 3672 }, { "epoch": 0.21, "grad_norm": 1.8821932077407837, "learning_rate": 1.83373313670216e-05, "loss": 1.1028, "step": 3673 }, { "epoch": 0.21, "grad_norm": 2.068502426147461, "learning_rate": 1.8336305510270025e-05, "loss": 1.0516, "step": 3674 }, { "epoch": 0.21, "grad_norm": 1.74851393699646, "learning_rate": 1.833527936585583e-05, "loss": 0.9962, "step": 3675 }, { "epoch": 0.21, "grad_norm": 1.8025509119033813, "learning_rate": 1.833425293381443e-05, "loss": 0.9765, "step": 3676 }, { "epoch": 0.21, "grad_norm": 1.7696292400360107, "learning_rate": 1.8333226214181236e-05, "loss": 1.1622, "step": 3677 }, { "epoch": 0.21, "grad_norm": 1.8834055662155151, "learning_rate": 1.833219920699169e-05, "loss": 1.0331, "step": 3678 }, { "epoch": 0.21, "grad_norm": 1.986501693725586, "learning_rate": 1.833117191228122e-05, "loss": 1.0145, "step": 3679 }, { "epoch": 0.21, "grad_norm": 1.8849607706069946, "learning_rate": 1.8330144330085283e-05, "loss": 1.0548, "step": 3680 }, { "epoch": 0.21, "grad_norm": 2.0000438690185547, "learning_rate": 1.8329116460439332e-05, "loss": 1.1064, "step": 3681 }, { "epoch": 0.21, "grad_norm": 1.923536777496338, "learning_rate": 1.832808830337884e-05, "loss": 1.1025, "step": 3682 }, { "epoch": 0.21, "grad_norm": 1.9162832498550415, "learning_rate": 1.8327059858939283e-05, "loss": 1.0627, "step": 3683 }, { "epoch": 0.21, "grad_norm": 2.022949457168579, "learning_rate": 1.832603112715615e-05, "loss": 1.0995, "step": 3684 }, { "epoch": 0.21, "grad_norm": 1.844857096672058, "learning_rate": 1.832500210806494e-05, "loss": 1.0216, "step": 3685 }, { "epoch": 0.21, "grad_norm": 1.9799689054489136, "learning_rate": 1.8323972801701166e-05, "loss": 1.0612, "step": 3686 }, { "epoch": 0.21, "grad_norm": 2.1007819175720215, "learning_rate": 1.832294320810034e-05, "loss": 1.064, "step": 3687 }, { "epoch": 0.21, "grad_norm": 1.1841572523117065, "learning_rate": 1.8321913327297997e-05, "loss": 0.6001, "step": 3688 }, { "epoch": 0.21, "grad_norm": 2.3432774543762207, "learning_rate": 1.832088315932967e-05, "loss": 1.0285, "step": 3689 }, { "epoch": 0.21, "grad_norm": 1.96401846408844, "learning_rate": 1.8319852704230903e-05, "loss": 1.02, "step": 3690 }, { "epoch": 0.21, "grad_norm": 1.9793747663497925, "learning_rate": 1.8318821962037266e-05, "loss": 0.9902, "step": 3691 }, { "epoch": 0.21, "grad_norm": 1.8920596837997437, "learning_rate": 1.8317790932784315e-05, "loss": 1.0429, "step": 3692 }, { "epoch": 0.21, "grad_norm": 1.148195505142212, "learning_rate": 1.8316759616507637e-05, "loss": 0.6211, "step": 3693 }, { "epoch": 0.21, "grad_norm": 2.043264865875244, "learning_rate": 1.8315728013242816e-05, "loss": 1.004, "step": 3694 }, { "epoch": 0.21, "grad_norm": 2.2051589488983154, "learning_rate": 1.8314696123025456e-05, "loss": 1.0406, "step": 3695 }, { "epoch": 0.21, "grad_norm": 2.099808931350708, "learning_rate": 1.8313663945891155e-05, "loss": 1.0684, "step": 3696 }, { "epoch": 0.21, "grad_norm": 1.8362714052200317, "learning_rate": 1.8312631481875532e-05, "loss": 1.0776, "step": 3697 }, { "epoch": 0.21, "grad_norm": 2.0712838172912598, "learning_rate": 1.8311598731014218e-05, "loss": 1.0563, "step": 3698 }, { "epoch": 0.21, "grad_norm": 2.089859962463379, "learning_rate": 1.831056569334285e-05, "loss": 1.0467, "step": 3699 }, { "epoch": 0.21, "grad_norm": 1.8799108266830444, "learning_rate": 1.830953236889707e-05, "loss": 1.0748, "step": 3700 }, { "epoch": 0.21, "grad_norm": 1.9862091541290283, "learning_rate": 1.8308498757712548e-05, "loss": 1.0638, "step": 3701 }, { "epoch": 0.21, "grad_norm": 2.047929525375366, "learning_rate": 1.830746485982494e-05, "loss": 0.9358, "step": 3702 }, { "epoch": 0.21, "grad_norm": 1.9723416566848755, "learning_rate": 1.8306430675269922e-05, "loss": 1.0568, "step": 3703 }, { "epoch": 0.21, "grad_norm": 1.8958004713058472, "learning_rate": 1.8305396204083185e-05, "loss": 1.0427, "step": 3704 }, { "epoch": 0.21, "grad_norm": 1.230467677116394, "learning_rate": 1.830436144630043e-05, "loss": 0.577, "step": 3705 }, { "epoch": 0.21, "grad_norm": 1.8862000703811646, "learning_rate": 1.8303326401957357e-05, "loss": 1.0562, "step": 3706 }, { "epoch": 0.21, "grad_norm": 2.2858850955963135, "learning_rate": 1.830229107108968e-05, "loss": 1.1023, "step": 3707 }, { "epoch": 0.21, "grad_norm": 1.9600496292114258, "learning_rate": 1.8301255453733135e-05, "loss": 1.0856, "step": 3708 }, { "epoch": 0.21, "grad_norm": 1.9136651754379272, "learning_rate": 1.830021954992345e-05, "loss": 1.1109, "step": 3709 }, { "epoch": 0.21, "grad_norm": 1.7664834260940552, "learning_rate": 1.8299183359696376e-05, "loss": 1.0726, "step": 3710 }, { "epoch": 0.21, "grad_norm": 1.7448902130126953, "learning_rate": 1.8298146883087663e-05, "loss": 1.0322, "step": 3711 }, { "epoch": 0.21, "grad_norm": 1.8630681037902832, "learning_rate": 1.8297110120133082e-05, "loss": 1.0952, "step": 3712 }, { "epoch": 0.21, "grad_norm": 1.9166920185089111, "learning_rate": 1.8296073070868413e-05, "loss": 1.1426, "step": 3713 }, { "epoch": 0.21, "grad_norm": 1.922372817993164, "learning_rate": 1.8295035735329433e-05, "loss": 1.0175, "step": 3714 }, { "epoch": 0.21, "grad_norm": 2.150103807449341, "learning_rate": 1.8293998113551942e-05, "loss": 1.1089, "step": 3715 }, { "epoch": 0.21, "grad_norm": 1.8648571968078613, "learning_rate": 1.8292960205571742e-05, "loss": 1.0083, "step": 3716 }, { "epoch": 0.21, "grad_norm": 1.9953773021697998, "learning_rate": 1.8291922011424655e-05, "loss": 1.0355, "step": 3717 }, { "epoch": 0.21, "grad_norm": 1.792277216911316, "learning_rate": 1.82908835311465e-05, "loss": 1.058, "step": 3718 }, { "epoch": 0.21, "grad_norm": 1.157439947128296, "learning_rate": 1.828984476477311e-05, "loss": 0.6291, "step": 3719 }, { "epoch": 0.21, "grad_norm": 1.8322031497955322, "learning_rate": 1.828880571234034e-05, "loss": 1.0024, "step": 3720 }, { "epoch": 0.21, "grad_norm": 2.0621535778045654, "learning_rate": 1.8287766373884034e-05, "loss": 1.0, "step": 3721 }, { "epoch": 0.21, "grad_norm": 1.8825392723083496, "learning_rate": 1.828672674944006e-05, "loss": 1.0303, "step": 3722 }, { "epoch": 0.21, "grad_norm": 1.9418011903762817, "learning_rate": 1.82856868390443e-05, "loss": 0.9897, "step": 3723 }, { "epoch": 0.21, "grad_norm": 1.7633953094482422, "learning_rate": 1.828464664273263e-05, "loss": 1.0081, "step": 3724 }, { "epoch": 0.21, "grad_norm": 1.840912103652954, "learning_rate": 1.8283606160540945e-05, "loss": 1.0405, "step": 3725 }, { "epoch": 0.21, "grad_norm": 1.8150430917739868, "learning_rate": 1.8282565392505152e-05, "loss": 1.0015, "step": 3726 }, { "epoch": 0.21, "grad_norm": 1.797620415687561, "learning_rate": 1.8281524338661164e-05, "loss": 0.977, "step": 3727 }, { "epoch": 0.21, "grad_norm": 1.8623230457305908, "learning_rate": 1.8280482999044905e-05, "loss": 1.0798, "step": 3728 }, { "epoch": 0.21, "grad_norm": 1.8047195672988892, "learning_rate": 1.8279441373692307e-05, "loss": 1.0924, "step": 3729 }, { "epoch": 0.21, "grad_norm": 2.004473924636841, "learning_rate": 1.8278399462639314e-05, "loss": 1.1145, "step": 3730 }, { "epoch": 0.21, "grad_norm": 1.983450174331665, "learning_rate": 1.8277357265921885e-05, "loss": 1.1546, "step": 3731 }, { "epoch": 0.21, "grad_norm": 1.800101637840271, "learning_rate": 1.827631478357597e-05, "loss": 1.0722, "step": 3732 }, { "epoch": 0.21, "grad_norm": 1.7286827564239502, "learning_rate": 1.827527201563756e-05, "loss": 1.0384, "step": 3733 }, { "epoch": 0.21, "grad_norm": 1.9980391263961792, "learning_rate": 1.8274228962142623e-05, "loss": 1.1244, "step": 3734 }, { "epoch": 0.21, "grad_norm": 1.7999382019042969, "learning_rate": 1.8273185623127162e-05, "loss": 1.1046, "step": 3735 }, { "epoch": 0.21, "grad_norm": 1.8693015575408936, "learning_rate": 1.8272141998627172e-05, "loss": 1.0314, "step": 3736 }, { "epoch": 0.21, "grad_norm": 1.8798842430114746, "learning_rate": 1.8271098088678667e-05, "loss": 1.0771, "step": 3737 }, { "epoch": 0.21, "grad_norm": 2.0632619857788086, "learning_rate": 1.8270053893317675e-05, "loss": 1.0244, "step": 3738 }, { "epoch": 0.21, "grad_norm": 1.885330319404602, "learning_rate": 1.8269009412580223e-05, "loss": 1.0393, "step": 3739 }, { "epoch": 0.21, "grad_norm": 1.0820708274841309, "learning_rate": 1.8267964646502356e-05, "loss": 0.5998, "step": 3740 }, { "epoch": 0.21, "grad_norm": 1.836755633354187, "learning_rate": 1.8266919595120126e-05, "loss": 1.0356, "step": 3741 }, { "epoch": 0.21, "grad_norm": 1.150808334350586, "learning_rate": 1.8265874258469593e-05, "loss": 0.6624, "step": 3742 }, { "epoch": 0.21, "grad_norm": 1.8213987350463867, "learning_rate": 1.8264828636586824e-05, "loss": 1.0488, "step": 3743 }, { "epoch": 0.21, "grad_norm": 1.7294355630874634, "learning_rate": 1.8263782729507912e-05, "loss": 1.0708, "step": 3744 }, { "epoch": 0.21, "grad_norm": 1.0099728107452393, "learning_rate": 1.826273653726894e-05, "loss": 0.5983, "step": 3745 }, { "epoch": 0.21, "grad_norm": 1.9380937814712524, "learning_rate": 1.8261690059906006e-05, "loss": 1.1014, "step": 3746 }, { "epoch": 0.21, "grad_norm": 1.1081117391586304, "learning_rate": 1.826064329745523e-05, "loss": 0.5906, "step": 3747 }, { "epoch": 0.21, "grad_norm": 1.816329836845398, "learning_rate": 1.825959624995273e-05, "loss": 1.0881, "step": 3748 }, { "epoch": 0.22, "grad_norm": 1.6400295495986938, "learning_rate": 1.825854891743464e-05, "loss": 1.0661, "step": 3749 }, { "epoch": 0.22, "grad_norm": 1.898664951324463, "learning_rate": 1.825750129993709e-05, "loss": 1.0862, "step": 3750 }, { "epoch": 0.22, "grad_norm": 1.7420895099639893, "learning_rate": 1.8256453397496233e-05, "loss": 1.073, "step": 3751 }, { "epoch": 0.22, "grad_norm": 1.8840978145599365, "learning_rate": 1.825540521014824e-05, "loss": 1.0515, "step": 3752 }, { "epoch": 0.22, "grad_norm": 1.8699301481246948, "learning_rate": 1.825435673792927e-05, "loss": 1.0868, "step": 3753 }, { "epoch": 0.22, "grad_norm": 1.8465110063552856, "learning_rate": 1.825330798087551e-05, "loss": 1.0654, "step": 3754 }, { "epoch": 0.22, "grad_norm": 1.9346208572387695, "learning_rate": 1.825225893902314e-05, "loss": 1.1373, "step": 3755 }, { "epoch": 0.22, "grad_norm": 1.7104047536849976, "learning_rate": 1.8251209612408375e-05, "loss": 1.0191, "step": 3756 }, { "epoch": 0.22, "grad_norm": 1.9211071729660034, "learning_rate": 1.8250160001067408e-05, "loss": 0.9918, "step": 3757 }, { "epoch": 0.22, "grad_norm": 1.994146466255188, "learning_rate": 1.8249110105036468e-05, "loss": 1.0419, "step": 3758 }, { "epoch": 0.22, "grad_norm": 1.9202604293823242, "learning_rate": 1.824805992435178e-05, "loss": 1.0157, "step": 3759 }, { "epoch": 0.22, "grad_norm": 1.7936993837356567, "learning_rate": 1.8247009459049585e-05, "loss": 1.0232, "step": 3760 }, { "epoch": 0.22, "grad_norm": 1.9010344743728638, "learning_rate": 1.824595870916613e-05, "loss": 1.0992, "step": 3761 }, { "epoch": 0.22, "grad_norm": 1.8472884893417358, "learning_rate": 1.8244907674737672e-05, "loss": 0.9942, "step": 3762 }, { "epoch": 0.22, "grad_norm": 1.7190096378326416, "learning_rate": 1.8243856355800485e-05, "loss": 1.138, "step": 3763 }, { "epoch": 0.22, "grad_norm": 1.8474845886230469, "learning_rate": 1.8242804752390844e-05, "loss": 1.1125, "step": 3764 }, { "epoch": 0.22, "grad_norm": 1.1347293853759766, "learning_rate": 1.8241752864545032e-05, "loss": 0.6192, "step": 3765 }, { "epoch": 0.22, "grad_norm": 1.795087218284607, "learning_rate": 1.8240700692299357e-05, "loss": 1.0523, "step": 3766 }, { "epoch": 0.22, "grad_norm": 1.9675209522247314, "learning_rate": 1.8239648235690115e-05, "loss": 1.0007, "step": 3767 }, { "epoch": 0.22, "grad_norm": 1.8827106952667236, "learning_rate": 1.8238595494753633e-05, "loss": 1.1101, "step": 3768 }, { "epoch": 0.22, "grad_norm": 1.8106427192687988, "learning_rate": 1.823754246952623e-05, "loss": 1.0869, "step": 3769 }, { "epoch": 0.22, "grad_norm": 1.9490290880203247, "learning_rate": 1.8236489160044247e-05, "loss": 0.9808, "step": 3770 }, { "epoch": 0.22, "grad_norm": 2.100027322769165, "learning_rate": 1.8235435566344034e-05, "loss": 1.0246, "step": 3771 }, { "epoch": 0.22, "grad_norm": 1.7031404972076416, "learning_rate": 1.8234381688461943e-05, "loss": 1.0067, "step": 3772 }, { "epoch": 0.22, "grad_norm": 1.6785268783569336, "learning_rate": 1.8233327526434342e-05, "loss": 1.0026, "step": 3773 }, { "epoch": 0.22, "grad_norm": 1.702819585800171, "learning_rate": 1.8232273080297606e-05, "loss": 1.0284, "step": 3774 }, { "epoch": 0.22, "grad_norm": 1.9300413131713867, "learning_rate": 1.8231218350088124e-05, "loss": 1.0865, "step": 3775 }, { "epoch": 0.22, "grad_norm": 2.0553581714630127, "learning_rate": 1.8230163335842288e-05, "loss": 1.0674, "step": 3776 }, { "epoch": 0.22, "grad_norm": 1.7888224124908447, "learning_rate": 1.822910803759651e-05, "loss": 0.9917, "step": 3777 }, { "epoch": 0.22, "grad_norm": 2.0336437225341797, "learning_rate": 1.8228052455387194e-05, "loss": 1.0064, "step": 3778 }, { "epoch": 0.22, "grad_norm": 1.813965082168579, "learning_rate": 1.8226996589250775e-05, "loss": 1.079, "step": 3779 }, { "epoch": 0.22, "grad_norm": 1.9376791715621948, "learning_rate": 1.8225940439223684e-05, "loss": 1.0624, "step": 3780 }, { "epoch": 0.22, "grad_norm": 1.9597405195236206, "learning_rate": 1.8224884005342367e-05, "loss": 1.1065, "step": 3781 }, { "epoch": 0.22, "grad_norm": 1.8007218837738037, "learning_rate": 1.822382728764328e-05, "loss": 0.9965, "step": 3782 }, { "epoch": 0.22, "grad_norm": 2.034085273742676, "learning_rate": 1.8222770286162884e-05, "loss": 1.0951, "step": 3783 }, { "epoch": 0.22, "grad_norm": 2.0285332202911377, "learning_rate": 1.8221713000937653e-05, "loss": 0.9982, "step": 3784 }, { "epoch": 0.22, "grad_norm": 1.7554453611373901, "learning_rate": 1.8220655432004073e-05, "loss": 1.018, "step": 3785 }, { "epoch": 0.22, "grad_norm": 1.8582874536514282, "learning_rate": 1.821959757939864e-05, "loss": 0.9533, "step": 3786 }, { "epoch": 0.22, "grad_norm": 2.01412296295166, "learning_rate": 1.8218539443157855e-05, "loss": 1.0441, "step": 3787 }, { "epoch": 0.22, "grad_norm": 1.8150533437728882, "learning_rate": 1.8217481023318232e-05, "loss": 1.0775, "step": 3788 }, { "epoch": 0.22, "grad_norm": 2.001422166824341, "learning_rate": 1.8216422319916288e-05, "loss": 1.0918, "step": 3789 }, { "epoch": 0.22, "grad_norm": 1.7037065029144287, "learning_rate": 1.8215363332988568e-05, "loss": 0.9502, "step": 3790 }, { "epoch": 0.22, "grad_norm": 1.753208041191101, "learning_rate": 1.8214304062571605e-05, "loss": 1.0031, "step": 3791 }, { "epoch": 0.22, "grad_norm": 1.0934661626815796, "learning_rate": 1.821324450870195e-05, "loss": 0.5973, "step": 3792 }, { "epoch": 0.22, "grad_norm": 1.9076343774795532, "learning_rate": 1.821218467141618e-05, "loss": 1.0807, "step": 3793 }, { "epoch": 0.22, "grad_norm": 1.901941180229187, "learning_rate": 1.8211124550750853e-05, "loss": 1.0313, "step": 3794 }, { "epoch": 0.22, "grad_norm": 2.100982666015625, "learning_rate": 1.821006414674255e-05, "loss": 1.1513, "step": 3795 }, { "epoch": 0.22, "grad_norm": 1.02571439743042, "learning_rate": 1.820900345942787e-05, "loss": 0.6006, "step": 3796 }, { "epoch": 0.22, "grad_norm": 2.005312204360962, "learning_rate": 1.8207942488843416e-05, "loss": 1.0021, "step": 3797 }, { "epoch": 0.22, "grad_norm": 1.7871723175048828, "learning_rate": 1.8206881235025786e-05, "loss": 1.0317, "step": 3798 }, { "epoch": 0.22, "grad_norm": 2.0059335231781006, "learning_rate": 1.8205819698011615e-05, "loss": 1.1389, "step": 3799 }, { "epoch": 0.22, "grad_norm": 1.8365846872329712, "learning_rate": 1.8204757877837528e-05, "loss": 1.0041, "step": 3800 }, { "epoch": 0.22, "grad_norm": 1.8571128845214844, "learning_rate": 1.8203695774540167e-05, "loss": 1.0772, "step": 3801 }, { "epoch": 0.22, "grad_norm": 1.8227801322937012, "learning_rate": 1.8202633388156176e-05, "loss": 1.0904, "step": 3802 }, { "epoch": 0.22, "grad_norm": 1.7804046869277954, "learning_rate": 1.8201570718722225e-05, "loss": 1.1316, "step": 3803 }, { "epoch": 0.22, "grad_norm": 1.7756474018096924, "learning_rate": 1.8200507766274978e-05, "loss": 1.0126, "step": 3804 }, { "epoch": 0.22, "grad_norm": 1.8382691144943237, "learning_rate": 1.819944453085111e-05, "loss": 1.0328, "step": 3805 }, { "epoch": 0.22, "grad_norm": 1.8535228967666626, "learning_rate": 1.8198381012487322e-05, "loss": 1.0575, "step": 3806 }, { "epoch": 0.22, "grad_norm": 1.9102948904037476, "learning_rate": 1.8197317211220302e-05, "loss": 1.0751, "step": 3807 }, { "epoch": 0.22, "grad_norm": 1.9193061590194702, "learning_rate": 1.8196253127086765e-05, "loss": 1.0632, "step": 3808 }, { "epoch": 0.22, "grad_norm": 1.9411903619766235, "learning_rate": 1.819518876012343e-05, "loss": 1.0677, "step": 3809 }, { "epoch": 0.22, "grad_norm": 1.9638195037841797, "learning_rate": 1.819412411036702e-05, "loss": 1.0493, "step": 3810 }, { "epoch": 0.22, "grad_norm": 1.4751646518707275, "learning_rate": 1.8193059177854278e-05, "loss": 0.6146, "step": 3811 }, { "epoch": 0.22, "grad_norm": 1.757002592086792, "learning_rate": 1.819199396262195e-05, "loss": 1.0057, "step": 3812 }, { "epoch": 0.22, "grad_norm": 1.2763197422027588, "learning_rate": 1.819092846470679e-05, "loss": 0.6732, "step": 3813 }, { "epoch": 0.22, "grad_norm": 1.876028299331665, "learning_rate": 1.8189862684145577e-05, "loss": 1.0339, "step": 3814 }, { "epoch": 0.22, "grad_norm": 1.818593978881836, "learning_rate": 1.8188796620975073e-05, "loss": 1.0312, "step": 3815 }, { "epoch": 0.22, "grad_norm": 1.893667459487915, "learning_rate": 1.8187730275232075e-05, "loss": 1.0567, "step": 3816 }, { "epoch": 0.22, "grad_norm": 1.9479000568389893, "learning_rate": 1.8186663646953376e-05, "loss": 0.9315, "step": 3817 }, { "epoch": 0.22, "grad_norm": 1.8655383586883545, "learning_rate": 1.8185596736175782e-05, "loss": 0.9801, "step": 3818 }, { "epoch": 0.22, "grad_norm": 1.8937088251113892, "learning_rate": 1.8184529542936113e-05, "loss": 0.9915, "step": 3819 }, { "epoch": 0.22, "grad_norm": 1.815807819366455, "learning_rate": 1.8183462067271193e-05, "loss": 0.9819, "step": 3820 }, { "epoch": 0.22, "grad_norm": 1.8278753757476807, "learning_rate": 1.8182394309217852e-05, "loss": 1.0679, "step": 3821 }, { "epoch": 0.22, "grad_norm": 1.7043217420578003, "learning_rate": 1.8181326268812946e-05, "loss": 0.9628, "step": 3822 }, { "epoch": 0.22, "grad_norm": 1.6850651502609253, "learning_rate": 1.8180257946093317e-05, "loss": 1.0968, "step": 3823 }, { "epoch": 0.22, "grad_norm": 1.8540571928024292, "learning_rate": 1.817918934109584e-05, "loss": 1.0846, "step": 3824 }, { "epoch": 0.22, "grad_norm": 1.836405873298645, "learning_rate": 1.817812045385739e-05, "loss": 0.9513, "step": 3825 }, { "epoch": 0.22, "grad_norm": 2.0068178176879883, "learning_rate": 1.8177051284414844e-05, "loss": 0.9994, "step": 3826 }, { "epoch": 0.22, "grad_norm": 1.716103434562683, "learning_rate": 1.81759818328051e-05, "loss": 0.5534, "step": 3827 }, { "epoch": 0.22, "grad_norm": 1.8814142942428589, "learning_rate": 1.817491209906506e-05, "loss": 1.1194, "step": 3828 }, { "epoch": 0.22, "grad_norm": 1.9128639698028564, "learning_rate": 1.8173842083231643e-05, "loss": 1.0408, "step": 3829 }, { "epoch": 0.22, "grad_norm": 2.016286611557007, "learning_rate": 1.8172771785341766e-05, "loss": 1.0341, "step": 3830 }, { "epoch": 0.22, "grad_norm": 1.343295693397522, "learning_rate": 1.8171701205432365e-05, "loss": 0.6534, "step": 3831 }, { "epoch": 0.22, "grad_norm": 1.9161622524261475, "learning_rate": 1.8170630343540382e-05, "loss": 1.0144, "step": 3832 }, { "epoch": 0.22, "grad_norm": 1.7268056869506836, "learning_rate": 1.816955919970277e-05, "loss": 1.0105, "step": 3833 }, { "epoch": 0.22, "grad_norm": 1.9056082963943481, "learning_rate": 1.8168487773956493e-05, "loss": 1.0088, "step": 3834 }, { "epoch": 0.22, "grad_norm": 1.8977272510528564, "learning_rate": 1.8167416066338518e-05, "loss": 1.0595, "step": 3835 }, { "epoch": 0.22, "grad_norm": 1.8341658115386963, "learning_rate": 1.816634407688583e-05, "loss": 1.053, "step": 3836 }, { "epoch": 0.22, "grad_norm": 1.966023325920105, "learning_rate": 1.816527180563542e-05, "loss": 1.1511, "step": 3837 }, { "epoch": 0.22, "grad_norm": 2.091402292251587, "learning_rate": 1.8164199252624285e-05, "loss": 1.1314, "step": 3838 }, { "epoch": 0.22, "grad_norm": 1.8178540468215942, "learning_rate": 1.8163126417889444e-05, "loss": 1.052, "step": 3839 }, { "epoch": 0.22, "grad_norm": 2.1357970237731934, "learning_rate": 1.816205330146791e-05, "loss": 1.0246, "step": 3840 }, { "epoch": 0.22, "grad_norm": 1.8733441829681396, "learning_rate": 1.816097990339672e-05, "loss": 1.1315, "step": 3841 }, { "epoch": 0.22, "grad_norm": 1.9849085807800293, "learning_rate": 1.815990622371291e-05, "loss": 1.0377, "step": 3842 }, { "epoch": 0.22, "grad_norm": 2.1089699268341064, "learning_rate": 1.815883226245353e-05, "loss": 1.0951, "step": 3843 }, { "epoch": 0.22, "grad_norm": 1.814235806465149, "learning_rate": 1.8157758019655633e-05, "loss": 1.0507, "step": 3844 }, { "epoch": 0.22, "grad_norm": 1.7230116128921509, "learning_rate": 1.81566834953563e-05, "loss": 1.0042, "step": 3845 }, { "epoch": 0.22, "grad_norm": 1.7044602632522583, "learning_rate": 1.8155608689592604e-05, "loss": 1.0332, "step": 3846 }, { "epoch": 0.22, "grad_norm": 1.63759183883667, "learning_rate": 1.8154533602401634e-05, "loss": 1.0546, "step": 3847 }, { "epoch": 0.22, "grad_norm": 1.8536738157272339, "learning_rate": 1.8153458233820487e-05, "loss": 1.0999, "step": 3848 }, { "epoch": 0.22, "grad_norm": 1.7037180662155151, "learning_rate": 1.8152382583886272e-05, "loss": 1.0081, "step": 3849 }, { "epoch": 0.22, "grad_norm": 2.159395933151245, "learning_rate": 1.815130665263611e-05, "loss": 1.0148, "step": 3850 }, { "epoch": 0.22, "grad_norm": 1.7641210556030273, "learning_rate": 1.815023044010712e-05, "loss": 1.0557, "step": 3851 }, { "epoch": 0.22, "grad_norm": 1.738863468170166, "learning_rate": 1.8149153946336448e-05, "loss": 0.9173, "step": 3852 }, { "epoch": 0.22, "grad_norm": 1.9852241277694702, "learning_rate": 1.8148077171361237e-05, "loss": 1.0803, "step": 3853 }, { "epoch": 0.22, "grad_norm": 1.66620934009552, "learning_rate": 1.814700011521864e-05, "loss": 1.047, "step": 3854 }, { "epoch": 0.22, "grad_norm": 1.9558688402175903, "learning_rate": 1.8145922777945832e-05, "loss": 1.0513, "step": 3855 }, { "epoch": 0.22, "grad_norm": 2.1312966346740723, "learning_rate": 1.814484515957998e-05, "loss": 1.0644, "step": 3856 }, { "epoch": 0.22, "grad_norm": 2.1521785259246826, "learning_rate": 1.8143767260158272e-05, "loss": 1.0731, "step": 3857 }, { "epoch": 0.22, "grad_norm": 1.9486254453659058, "learning_rate": 1.8142689079717908e-05, "loss": 1.0388, "step": 3858 }, { "epoch": 0.22, "grad_norm": 1.9918071031570435, "learning_rate": 1.8141610618296087e-05, "loss": 0.9962, "step": 3859 }, { "epoch": 0.22, "grad_norm": 2.089367389678955, "learning_rate": 1.814053187593003e-05, "loss": 1.0867, "step": 3860 }, { "epoch": 0.22, "grad_norm": 1.987486481666565, "learning_rate": 1.8139452852656955e-05, "loss": 1.0599, "step": 3861 }, { "epoch": 0.22, "grad_norm": 1.958200216293335, "learning_rate": 1.81383735485141e-05, "loss": 0.9996, "step": 3862 }, { "epoch": 0.22, "grad_norm": 1.8884671926498413, "learning_rate": 1.8137293963538705e-05, "loss": 1.0481, "step": 3863 }, { "epoch": 0.22, "grad_norm": 1.927946925163269, "learning_rate": 1.813621409776803e-05, "loss": 1.0794, "step": 3864 }, { "epoch": 0.22, "grad_norm": 1.9613637924194336, "learning_rate": 1.8135133951239327e-05, "loss": 1.0209, "step": 3865 }, { "epoch": 0.22, "grad_norm": 1.9980244636535645, "learning_rate": 1.8134053523989883e-05, "loss": 1.145, "step": 3866 }, { "epoch": 0.22, "grad_norm": 1.8582040071487427, "learning_rate": 1.813297281605697e-05, "loss": 1.0088, "step": 3867 }, { "epoch": 0.22, "grad_norm": 2.085115432739258, "learning_rate": 1.8131891827477884e-05, "loss": 1.1203, "step": 3868 }, { "epoch": 0.22, "grad_norm": 2.1039021015167236, "learning_rate": 1.8130810558289925e-05, "loss": 0.9948, "step": 3869 }, { "epoch": 0.22, "grad_norm": 1.7838964462280273, "learning_rate": 1.812972900853041e-05, "loss": 1.0531, "step": 3870 }, { "epoch": 0.22, "grad_norm": 1.8884449005126953, "learning_rate": 1.8128647178236654e-05, "loss": 0.9906, "step": 3871 }, { "epoch": 0.22, "grad_norm": 1.8240293264389038, "learning_rate": 1.812756506744599e-05, "loss": 1.0183, "step": 3872 }, { "epoch": 0.22, "grad_norm": 1.8504832983016968, "learning_rate": 1.812648267619576e-05, "loss": 1.059, "step": 3873 }, { "epoch": 0.22, "grad_norm": 1.988913893699646, "learning_rate": 1.8125400004523316e-05, "loss": 1.1195, "step": 3874 }, { "epoch": 0.22, "grad_norm": 1.8549951314926147, "learning_rate": 1.812431705246601e-05, "loss": 1.0648, "step": 3875 }, { "epoch": 0.22, "grad_norm": 1.9300460815429688, "learning_rate": 1.812323382006122e-05, "loss": 1.1128, "step": 3876 }, { "epoch": 0.22, "grad_norm": 1.8917664289474487, "learning_rate": 1.812215030734632e-05, "loss": 1.0421, "step": 3877 }, { "epoch": 0.22, "grad_norm": 2.0504982471466064, "learning_rate": 1.8121066514358703e-05, "loss": 0.9862, "step": 3878 }, { "epoch": 0.22, "grad_norm": 1.9585716724395752, "learning_rate": 1.8119982441135765e-05, "loss": 1.0213, "step": 3879 }, { "epoch": 0.22, "grad_norm": 1.8183696269989014, "learning_rate": 1.8118898087714915e-05, "loss": 0.9669, "step": 3880 }, { "epoch": 0.22, "grad_norm": 1.857771396636963, "learning_rate": 1.8117813454133574e-05, "loss": 0.9849, "step": 3881 }, { "epoch": 0.22, "grad_norm": 2.001291036605835, "learning_rate": 1.8116728540429164e-05, "loss": 1.0847, "step": 3882 }, { "epoch": 0.22, "grad_norm": 1.9891024827957153, "learning_rate": 1.8115643346639124e-05, "loss": 1.0465, "step": 3883 }, { "epoch": 0.22, "grad_norm": 1.9021621942520142, "learning_rate": 1.8114557872800906e-05, "loss": 1.0694, "step": 3884 }, { "epoch": 0.22, "grad_norm": 1.8179564476013184, "learning_rate": 1.811347211895196e-05, "loss": 0.9768, "step": 3885 }, { "epoch": 0.22, "grad_norm": 1.8908690214157104, "learning_rate": 1.8112386085129757e-05, "loss": 1.0638, "step": 3886 }, { "epoch": 0.22, "grad_norm": 2.0184385776519775, "learning_rate": 1.811129977137177e-05, "loss": 1.0277, "step": 3887 }, { "epoch": 0.22, "grad_norm": 1.994249939918518, "learning_rate": 1.811021317771549e-05, "loss": 1.023, "step": 3888 }, { "epoch": 0.22, "grad_norm": 1.9378025531768799, "learning_rate": 1.8109126304198402e-05, "loss": 1.0506, "step": 3889 }, { "epoch": 0.22, "grad_norm": 1.799944281578064, "learning_rate": 1.8108039150858018e-05, "loss": 1.0305, "step": 3890 }, { "epoch": 0.22, "grad_norm": 1.7559455633163452, "learning_rate": 1.8106951717731854e-05, "loss": 1.0524, "step": 3891 }, { "epoch": 0.22, "grad_norm": 1.690826654434204, "learning_rate": 1.8105864004857433e-05, "loss": 0.9264, "step": 3892 }, { "epoch": 0.22, "grad_norm": 1.9688596725463867, "learning_rate": 1.810477601227229e-05, "loss": 1.0857, "step": 3893 }, { "epoch": 0.22, "grad_norm": 2.3253986835479736, "learning_rate": 1.8103687740013958e-05, "loss": 1.0815, "step": 3894 }, { "epoch": 0.22, "grad_norm": 1.7403953075408936, "learning_rate": 1.8102599188120006e-05, "loss": 1.009, "step": 3895 }, { "epoch": 0.22, "grad_norm": 1.8972994089126587, "learning_rate": 1.810151035662799e-05, "loss": 1.1136, "step": 3896 }, { "epoch": 0.22, "grad_norm": 1.8872801065444946, "learning_rate": 1.810042124557548e-05, "loss": 0.964, "step": 3897 }, { "epoch": 0.22, "grad_norm": 2.848447561264038, "learning_rate": 1.809933185500006e-05, "loss": 1.0557, "step": 3898 }, { "epoch": 0.22, "grad_norm": 2.015592575073242, "learning_rate": 1.8098242184939324e-05, "loss": 1.05, "step": 3899 }, { "epoch": 0.22, "grad_norm": 1.927268147468567, "learning_rate": 1.8097152235430872e-05, "loss": 1.0204, "step": 3900 }, { "epoch": 0.22, "grad_norm": 2.1515603065490723, "learning_rate": 1.8096062006512308e-05, "loss": 1.0896, "step": 3901 }, { "epoch": 0.22, "grad_norm": 2.051318645477295, "learning_rate": 1.809497149822127e-05, "loss": 1.0001, "step": 3902 }, { "epoch": 0.22, "grad_norm": 1.9856501817703247, "learning_rate": 1.8093880710595372e-05, "loss": 1.0594, "step": 3903 }, { "epoch": 0.22, "grad_norm": 2.1264731884002686, "learning_rate": 1.809278964367226e-05, "loss": 1.0749, "step": 3904 }, { "epoch": 0.22, "grad_norm": 1.848768711090088, "learning_rate": 1.8091698297489585e-05, "loss": 1.1265, "step": 3905 }, { "epoch": 0.22, "grad_norm": 1.8376882076263428, "learning_rate": 1.8090606672085003e-05, "loss": 1.0638, "step": 3906 }, { "epoch": 0.22, "grad_norm": 1.8967822790145874, "learning_rate": 1.808951476749619e-05, "loss": 0.9877, "step": 3907 }, { "epoch": 0.22, "grad_norm": 1.7814844846725464, "learning_rate": 1.8088422583760814e-05, "loss": 1.0653, "step": 3908 }, { "epoch": 0.22, "grad_norm": 2.190186023712158, "learning_rate": 1.808733012091657e-05, "loss": 0.9881, "step": 3909 }, { "epoch": 0.22, "grad_norm": 1.9718918800354004, "learning_rate": 1.8086237379001156e-05, "loss": 1.0011, "step": 3910 }, { "epoch": 0.22, "grad_norm": 2.1417131423950195, "learning_rate": 1.8085144358052276e-05, "loss": 1.1141, "step": 3911 }, { "epoch": 0.22, "grad_norm": 1.9288969039916992, "learning_rate": 1.808405105810765e-05, "loss": 1.0754, "step": 3912 }, { "epoch": 0.22, "grad_norm": 1.8807377815246582, "learning_rate": 1.8082957479205006e-05, "loss": 1.122, "step": 3913 }, { "epoch": 0.22, "grad_norm": 1.9672008752822876, "learning_rate": 1.8081863621382075e-05, "loss": 1.0566, "step": 3914 }, { "epoch": 0.22, "grad_norm": 2.0628933906555176, "learning_rate": 1.8080769484676613e-05, "loss": 1.0905, "step": 3915 }, { "epoch": 0.22, "grad_norm": 1.9678635597229004, "learning_rate": 1.807967506912636e-05, "loss": 1.049, "step": 3916 }, { "epoch": 0.22, "grad_norm": 1.9444068670272827, "learning_rate": 1.8078580374769096e-05, "loss": 0.9538, "step": 3917 }, { "epoch": 0.22, "grad_norm": 2.2512242794036865, "learning_rate": 1.8077485401642586e-05, "loss": 1.1235, "step": 3918 }, { "epoch": 0.22, "grad_norm": 1.9790058135986328, "learning_rate": 1.8076390149784622e-05, "loss": 1.0946, "step": 3919 }, { "epoch": 0.22, "grad_norm": 1.9601472616195679, "learning_rate": 1.807529461923299e-05, "loss": 1.0174, "step": 3920 }, { "epoch": 0.22, "grad_norm": 2.0217413902282715, "learning_rate": 1.80741988100255e-05, "loss": 1.0095, "step": 3921 }, { "epoch": 0.22, "grad_norm": 1.9320117235183716, "learning_rate": 1.8073102722199966e-05, "loss": 0.9607, "step": 3922 }, { "epoch": 0.22, "grad_norm": 2.010375499725342, "learning_rate": 1.8072006355794206e-05, "loss": 1.0236, "step": 3923 }, { "epoch": 0.23, "grad_norm": 1.8503814935684204, "learning_rate": 1.8070909710846053e-05, "loss": 1.0506, "step": 3924 }, { "epoch": 0.23, "grad_norm": 2.0421550273895264, "learning_rate": 1.8069812787393355e-05, "loss": 1.0711, "step": 3925 }, { "epoch": 0.23, "grad_norm": 1.2336549758911133, "learning_rate": 1.8068715585473957e-05, "loss": 0.6175, "step": 3926 }, { "epoch": 0.23, "grad_norm": 2.07673716545105, "learning_rate": 1.8067618105125725e-05, "loss": 1.0859, "step": 3927 }, { "epoch": 0.23, "grad_norm": 1.8096039295196533, "learning_rate": 1.8066520346386526e-05, "loss": 1.031, "step": 3928 }, { "epoch": 0.23, "grad_norm": 1.8365750312805176, "learning_rate": 1.8065422309294245e-05, "loss": 0.9916, "step": 3929 }, { "epoch": 0.23, "grad_norm": 1.8973675966262817, "learning_rate": 1.8064323993886768e-05, "loss": 1.0873, "step": 3930 }, { "epoch": 0.23, "grad_norm": 1.7288068532943726, "learning_rate": 1.8063225400202e-05, "loss": 1.088, "step": 3931 }, { "epoch": 0.23, "grad_norm": 1.8798779249191284, "learning_rate": 1.8062126528277846e-05, "loss": 1.0496, "step": 3932 }, { "epoch": 0.23, "grad_norm": 2.0560450553894043, "learning_rate": 1.8061027378152224e-05, "loss": 1.0716, "step": 3933 }, { "epoch": 0.23, "grad_norm": 1.1856849193572998, "learning_rate": 1.8059927949863066e-05, "loss": 0.6826, "step": 3934 }, { "epoch": 0.23, "grad_norm": 1.9146655797958374, "learning_rate": 1.8058828243448308e-05, "loss": 1.0853, "step": 3935 }, { "epoch": 0.23, "grad_norm": 1.7568093538284302, "learning_rate": 1.8057728258945902e-05, "loss": 0.9345, "step": 3936 }, { "epoch": 0.23, "grad_norm": 1.7870181798934937, "learning_rate": 1.8056627996393797e-05, "loss": 1.0161, "step": 3937 }, { "epoch": 0.23, "grad_norm": 1.8653303384780884, "learning_rate": 1.8055527455829968e-05, "loss": 1.054, "step": 3938 }, { "epoch": 0.23, "grad_norm": 1.840993881225586, "learning_rate": 1.805442663729239e-05, "loss": 1.069, "step": 3939 }, { "epoch": 0.23, "grad_norm": 2.0293118953704834, "learning_rate": 1.8053325540819048e-05, "loss": 1.0657, "step": 3940 }, { "epoch": 0.23, "grad_norm": 1.6711387634277344, "learning_rate": 1.8052224166447936e-05, "loss": 0.9387, "step": 3941 }, { "epoch": 0.23, "grad_norm": 1.9441683292388916, "learning_rate": 1.805112251421706e-05, "loss": 1.0373, "step": 3942 }, { "epoch": 0.23, "grad_norm": 1.8204078674316406, "learning_rate": 1.8050020584164437e-05, "loss": 1.1179, "step": 3943 }, { "epoch": 0.23, "grad_norm": 1.8146203756332397, "learning_rate": 1.804891837632809e-05, "loss": 1.0273, "step": 3944 }, { "epoch": 0.23, "grad_norm": 1.9963690042495728, "learning_rate": 1.8047815890746056e-05, "loss": 1.1278, "step": 3945 }, { "epoch": 0.23, "grad_norm": 1.7636953592300415, "learning_rate": 1.8046713127456375e-05, "loss": 1.1083, "step": 3946 }, { "epoch": 0.23, "grad_norm": 1.7529535293579102, "learning_rate": 1.80456100864971e-05, "loss": 1.0615, "step": 3947 }, { "epoch": 0.23, "grad_norm": 1.898754596710205, "learning_rate": 1.8044506767906297e-05, "loss": 1.0569, "step": 3948 }, { "epoch": 0.23, "grad_norm": 1.186434268951416, "learning_rate": 1.8043403171722034e-05, "loss": 0.6396, "step": 3949 }, { "epoch": 0.23, "grad_norm": 1.8913542032241821, "learning_rate": 1.80422992979824e-05, "loss": 1.1204, "step": 3950 }, { "epoch": 0.23, "grad_norm": 1.8315101861953735, "learning_rate": 1.804119514672548e-05, "loss": 0.9885, "step": 3951 }, { "epoch": 0.23, "grad_norm": 1.8892968893051147, "learning_rate": 1.8040090717989378e-05, "loss": 1.0374, "step": 3952 }, { "epoch": 0.23, "grad_norm": 1.880257248878479, "learning_rate": 1.8038986011812203e-05, "loss": 1.0305, "step": 3953 }, { "epoch": 0.23, "grad_norm": 1.7758201360702515, "learning_rate": 1.803788102823208e-05, "loss": 1.0407, "step": 3954 }, { "epoch": 0.23, "grad_norm": 1.9291921854019165, "learning_rate": 1.8036775767287135e-05, "loss": 0.9676, "step": 3955 }, { "epoch": 0.23, "grad_norm": 1.881773829460144, "learning_rate": 1.803567022901551e-05, "loss": 1.0348, "step": 3956 }, { "epoch": 0.23, "grad_norm": 2.04146146774292, "learning_rate": 1.8034564413455345e-05, "loss": 1.0909, "step": 3957 }, { "epoch": 0.23, "grad_norm": 1.9385111331939697, "learning_rate": 1.803345832064481e-05, "loss": 1.0612, "step": 3958 }, { "epoch": 0.23, "grad_norm": 1.8683481216430664, "learning_rate": 1.8032351950622068e-05, "loss": 1.0705, "step": 3959 }, { "epoch": 0.23, "grad_norm": 1.9467734098434448, "learning_rate": 1.80312453034253e-05, "loss": 1.0596, "step": 3960 }, { "epoch": 0.23, "grad_norm": 1.8971284627914429, "learning_rate": 1.8030138379092688e-05, "loss": 1.0351, "step": 3961 }, { "epoch": 0.23, "grad_norm": 1.8684958219528198, "learning_rate": 1.8029031177662434e-05, "loss": 1.0458, "step": 3962 }, { "epoch": 0.23, "grad_norm": 1.8525906801223755, "learning_rate": 1.802792369917274e-05, "loss": 1.057, "step": 3963 }, { "epoch": 0.23, "grad_norm": 1.958800196647644, "learning_rate": 1.8026815943661828e-05, "loss": 1.056, "step": 3964 }, { "epoch": 0.23, "grad_norm": 1.715982437133789, "learning_rate": 1.802570791116792e-05, "loss": 1.1179, "step": 3965 }, { "epoch": 0.23, "grad_norm": 2.19415283203125, "learning_rate": 1.8024599601729245e-05, "loss": 1.0545, "step": 3966 }, { "epoch": 0.23, "grad_norm": 1.9145385026931763, "learning_rate": 1.802349101538406e-05, "loss": 1.0073, "step": 3967 }, { "epoch": 0.23, "grad_norm": 1.8941882848739624, "learning_rate": 1.8022382152170607e-05, "loss": 1.0987, "step": 3968 }, { "epoch": 0.23, "grad_norm": 1.9278075695037842, "learning_rate": 1.802127301212716e-05, "loss": 1.0493, "step": 3969 }, { "epoch": 0.23, "grad_norm": 1.8007793426513672, "learning_rate": 1.8020163595291987e-05, "loss": 0.9988, "step": 3970 }, { "epoch": 0.23, "grad_norm": 1.7699190378189087, "learning_rate": 1.801905390170337e-05, "loss": 0.9807, "step": 3971 }, { "epoch": 0.23, "grad_norm": 1.8725440502166748, "learning_rate": 1.8017943931399604e-05, "loss": 1.0459, "step": 3972 }, { "epoch": 0.23, "grad_norm": 2.173238754272461, "learning_rate": 1.801683368441899e-05, "loss": 1.0929, "step": 3973 }, { "epoch": 0.23, "grad_norm": 2.0531492233276367, "learning_rate": 1.801572316079984e-05, "loss": 1.0759, "step": 3974 }, { "epoch": 0.23, "grad_norm": 2.2277255058288574, "learning_rate": 1.8014612360580477e-05, "loss": 1.0659, "step": 3975 }, { "epoch": 0.23, "grad_norm": 1.7957488298416138, "learning_rate": 1.8013501283799225e-05, "loss": 1.0445, "step": 3976 }, { "epoch": 0.23, "grad_norm": 2.010195255279541, "learning_rate": 1.801238993049443e-05, "loss": 1.0546, "step": 3977 }, { "epoch": 0.23, "grad_norm": 1.798598051071167, "learning_rate": 1.8011278300704443e-05, "loss": 0.9403, "step": 3978 }, { "epoch": 0.23, "grad_norm": 2.0803163051605225, "learning_rate": 1.8010166394467617e-05, "loss": 1.0313, "step": 3979 }, { "epoch": 0.23, "grad_norm": 1.1398024559020996, "learning_rate": 1.8009054211822324e-05, "loss": 0.6528, "step": 3980 }, { "epoch": 0.23, "grad_norm": 1.8305085897445679, "learning_rate": 1.800794175280695e-05, "loss": 1.0046, "step": 3981 }, { "epoch": 0.23, "grad_norm": 1.8218560218811035, "learning_rate": 1.8006829017459868e-05, "loss": 1.0187, "step": 3982 }, { "epoch": 0.23, "grad_norm": 2.1577584743499756, "learning_rate": 1.8005716005819482e-05, "loss": 1.1369, "step": 3983 }, { "epoch": 0.23, "grad_norm": 1.9718064069747925, "learning_rate": 1.8004602717924204e-05, "loss": 1.0778, "step": 3984 }, { "epoch": 0.23, "grad_norm": 1.9038029909133911, "learning_rate": 1.800348915381244e-05, "loss": 1.0723, "step": 3985 }, { "epoch": 0.23, "grad_norm": 1.7515453100204468, "learning_rate": 1.800237531352263e-05, "loss": 1.0191, "step": 3986 }, { "epoch": 0.23, "grad_norm": 1.9517369270324707, "learning_rate": 1.8001261197093196e-05, "loss": 1.0386, "step": 3987 }, { "epoch": 0.23, "grad_norm": 1.7540301084518433, "learning_rate": 1.800014680456259e-05, "loss": 1.1015, "step": 3988 }, { "epoch": 0.23, "grad_norm": 2.024225950241089, "learning_rate": 1.7999032135969265e-05, "loss": 1.0741, "step": 3989 }, { "epoch": 0.23, "grad_norm": 1.8054072856903076, "learning_rate": 1.7997917191351688e-05, "loss": 0.9977, "step": 3990 }, { "epoch": 0.23, "grad_norm": 1.7811944484710693, "learning_rate": 1.7996801970748326e-05, "loss": 1.0172, "step": 3991 }, { "epoch": 0.23, "grad_norm": 1.9467167854309082, "learning_rate": 1.799568647419767e-05, "loss": 1.0672, "step": 3992 }, { "epoch": 0.23, "grad_norm": 1.906733512878418, "learning_rate": 1.7994570701738208e-05, "loss": 0.9953, "step": 3993 }, { "epoch": 0.23, "grad_norm": 1.9821357727050781, "learning_rate": 1.7993454653408443e-05, "loss": 1.0176, "step": 3994 }, { "epoch": 0.23, "grad_norm": 2.114565372467041, "learning_rate": 1.7992338329246885e-05, "loss": 1.0517, "step": 3995 }, { "epoch": 0.23, "grad_norm": 1.8689619302749634, "learning_rate": 1.799122172929206e-05, "loss": 1.0324, "step": 3996 }, { "epoch": 0.23, "grad_norm": 2.0412685871124268, "learning_rate": 1.7990104853582494e-05, "loss": 1.1121, "step": 3997 }, { "epoch": 0.23, "grad_norm": 2.068769931793213, "learning_rate": 1.7988987702156725e-05, "loss": 1.1178, "step": 3998 }, { "epoch": 0.23, "grad_norm": 1.9666004180908203, "learning_rate": 1.798787027505331e-05, "loss": 0.9617, "step": 3999 }, { "epoch": 0.23, "grad_norm": 2.1355674266815186, "learning_rate": 1.798675257231081e-05, "loss": 1.0383, "step": 4000 }, { "epoch": 0.23, "grad_norm": 1.835336685180664, "learning_rate": 1.7985634593967782e-05, "loss": 1.0279, "step": 4001 }, { "epoch": 0.23, "grad_norm": 1.9524002075195312, "learning_rate": 1.7984516340062814e-05, "loss": 1.1141, "step": 4002 }, { "epoch": 0.23, "grad_norm": 2.0472798347473145, "learning_rate": 1.7983397810634488e-05, "loss": 1.0881, "step": 4003 }, { "epoch": 0.23, "grad_norm": 1.8359564542770386, "learning_rate": 1.7982279005721408e-05, "loss": 1.0657, "step": 4004 }, { "epoch": 0.23, "grad_norm": 1.8843165636062622, "learning_rate": 1.7981159925362174e-05, "loss": 0.9319, "step": 4005 }, { "epoch": 0.23, "grad_norm": 1.9676355123519897, "learning_rate": 1.798004056959541e-05, "loss": 1.0645, "step": 4006 }, { "epoch": 0.23, "grad_norm": 1.235625147819519, "learning_rate": 1.7978920938459735e-05, "loss": 0.6301, "step": 4007 }, { "epoch": 0.23, "grad_norm": 2.0407421588897705, "learning_rate": 1.7977801031993785e-05, "loss": 1.0161, "step": 4008 }, { "epoch": 0.23, "grad_norm": 1.739117980003357, "learning_rate": 1.7976680850236204e-05, "loss": 1.07, "step": 4009 }, { "epoch": 0.23, "grad_norm": 1.9061022996902466, "learning_rate": 1.7975560393225656e-05, "loss": 1.0762, "step": 4010 }, { "epoch": 0.23, "grad_norm": 1.8710026741027832, "learning_rate": 1.7974439661000794e-05, "loss": 1.1173, "step": 4011 }, { "epoch": 0.23, "grad_norm": 1.8702064752578735, "learning_rate": 1.7973318653600294e-05, "loss": 1.0246, "step": 4012 }, { "epoch": 0.23, "grad_norm": 1.949748158454895, "learning_rate": 1.797219737106284e-05, "loss": 1.0695, "step": 4013 }, { "epoch": 0.23, "grad_norm": 1.7159366607666016, "learning_rate": 1.7971075813427125e-05, "loss": 1.0074, "step": 4014 }, { "epoch": 0.23, "grad_norm": 1.9137449264526367, "learning_rate": 1.796995398073185e-05, "loss": 1.0661, "step": 4015 }, { "epoch": 0.23, "grad_norm": 1.788398027420044, "learning_rate": 1.7968831873015725e-05, "loss": 0.9237, "step": 4016 }, { "epoch": 0.23, "grad_norm": 1.975713849067688, "learning_rate": 1.7967709490317475e-05, "loss": 1.0727, "step": 4017 }, { "epoch": 0.23, "grad_norm": 1.964179277420044, "learning_rate": 1.7966586832675824e-05, "loss": 1.1154, "step": 4018 }, { "epoch": 0.23, "grad_norm": 1.7685472965240479, "learning_rate": 1.7965463900129517e-05, "loss": 1.0324, "step": 4019 }, { "epoch": 0.23, "grad_norm": 2.0564913749694824, "learning_rate": 1.7964340692717303e-05, "loss": 1.0233, "step": 4020 }, { "epoch": 0.23, "grad_norm": 1.9170676469802856, "learning_rate": 1.796321721047794e-05, "loss": 1.1237, "step": 4021 }, { "epoch": 0.23, "grad_norm": 2.1880359649658203, "learning_rate": 1.796209345345019e-05, "loss": 1.0362, "step": 4022 }, { "epoch": 0.23, "grad_norm": 1.8392949104309082, "learning_rate": 1.7960969421672837e-05, "loss": 1.0199, "step": 4023 }, { "epoch": 0.23, "grad_norm": 2.0556066036224365, "learning_rate": 1.795984511518467e-05, "loss": 0.9562, "step": 4024 }, { "epoch": 0.23, "grad_norm": 1.8696030378341675, "learning_rate": 1.7958720534024484e-05, "loss": 0.988, "step": 4025 }, { "epoch": 0.23, "grad_norm": 1.8294761180877686, "learning_rate": 1.795759567823108e-05, "loss": 1.0314, "step": 4026 }, { "epoch": 0.23, "grad_norm": 1.9035627841949463, "learning_rate": 1.7956470547843283e-05, "loss": 1.1086, "step": 4027 }, { "epoch": 0.23, "grad_norm": 1.9037648439407349, "learning_rate": 1.795534514289991e-05, "loss": 1.0568, "step": 4028 }, { "epoch": 0.23, "grad_norm": 1.7723984718322754, "learning_rate": 1.79542194634398e-05, "loss": 0.9877, "step": 4029 }, { "epoch": 0.23, "grad_norm": 1.9419113397598267, "learning_rate": 1.7953093509501794e-05, "loss": 1.0511, "step": 4030 }, { "epoch": 0.23, "grad_norm": 1.8378154039382935, "learning_rate": 1.7951967281124746e-05, "loss": 1.0863, "step": 4031 }, { "epoch": 0.23, "grad_norm": 1.854482889175415, "learning_rate": 1.7950840778347524e-05, "loss": 1.0396, "step": 4032 }, { "epoch": 0.23, "grad_norm": 1.858854055404663, "learning_rate": 1.794971400120899e-05, "loss": 1.0536, "step": 4033 }, { "epoch": 0.23, "grad_norm": 1.7688071727752686, "learning_rate": 1.7948586949748036e-05, "loss": 0.9924, "step": 4034 }, { "epoch": 0.23, "grad_norm": 1.932106852531433, "learning_rate": 1.7947459624003553e-05, "loss": 1.056, "step": 4035 }, { "epoch": 0.23, "grad_norm": 1.9195826053619385, "learning_rate": 1.7946332024014433e-05, "loss": 1.0277, "step": 4036 }, { "epoch": 0.23, "grad_norm": 1.9722931385040283, "learning_rate": 1.7945204149819596e-05, "loss": 1.0682, "step": 4037 }, { "epoch": 0.23, "grad_norm": 1.9842535257339478, "learning_rate": 1.7944076001457958e-05, "loss": 1.0162, "step": 4038 }, { "epoch": 0.23, "grad_norm": 1.8828541040420532, "learning_rate": 1.7942947578968445e-05, "loss": 1.0254, "step": 4039 }, { "epoch": 0.23, "grad_norm": 1.9666810035705566, "learning_rate": 1.7941818882390004e-05, "loss": 1.0358, "step": 4040 }, { "epoch": 0.23, "grad_norm": 1.7862322330474854, "learning_rate": 1.7940689911761574e-05, "loss": 1.0118, "step": 4041 }, { "epoch": 0.23, "grad_norm": 1.96208918094635, "learning_rate": 1.7939560667122117e-05, "loss": 1.0565, "step": 4042 }, { "epoch": 0.23, "grad_norm": 1.79078209400177, "learning_rate": 1.7938431148510597e-05, "loss": 1.0774, "step": 4043 }, { "epoch": 0.23, "grad_norm": 1.8950724601745605, "learning_rate": 1.7937301355965997e-05, "loss": 1.074, "step": 4044 }, { "epoch": 0.23, "grad_norm": 1.86475670337677, "learning_rate": 1.7936171289527296e-05, "loss": 1.0352, "step": 4045 }, { "epoch": 0.23, "grad_norm": 1.9543344974517822, "learning_rate": 1.7935040949233496e-05, "loss": 1.041, "step": 4046 }, { "epoch": 0.23, "grad_norm": 1.9720805883407593, "learning_rate": 1.79339103351236e-05, "loss": 1.0929, "step": 4047 }, { "epoch": 0.23, "grad_norm": 1.8948408365249634, "learning_rate": 1.7932779447236613e-05, "loss": 1.0214, "step": 4048 }, { "epoch": 0.23, "grad_norm": 1.233901023864746, "learning_rate": 1.7931648285611576e-05, "loss": 0.6518, "step": 4049 }, { "epoch": 0.23, "grad_norm": 1.7442704439163208, "learning_rate": 1.7930516850287506e-05, "loss": 0.9626, "step": 4050 }, { "epoch": 0.23, "grad_norm": 2.020441770553589, "learning_rate": 1.7929385141303456e-05, "loss": 1.0738, "step": 4051 }, { "epoch": 0.23, "grad_norm": 2.0234367847442627, "learning_rate": 1.7928253158698474e-05, "loss": 1.0075, "step": 4052 }, { "epoch": 0.23, "grad_norm": 1.963760495185852, "learning_rate": 1.792712090251162e-05, "loss": 1.1171, "step": 4053 }, { "epoch": 0.23, "grad_norm": 1.6826575994491577, "learning_rate": 1.792598837278197e-05, "loss": 0.8942, "step": 4054 }, { "epoch": 0.23, "grad_norm": 1.9428327083587646, "learning_rate": 1.79248555695486e-05, "loss": 1.1102, "step": 4055 }, { "epoch": 0.23, "grad_norm": 1.7691515684127808, "learning_rate": 1.7923722492850602e-05, "loss": 0.9965, "step": 4056 }, { "epoch": 0.23, "grad_norm": 1.8592867851257324, "learning_rate": 1.7922589142727074e-05, "loss": 1.1101, "step": 4057 }, { "epoch": 0.23, "grad_norm": 1.9486405849456787, "learning_rate": 1.7921455519217127e-05, "loss": 1.0463, "step": 4058 }, { "epoch": 0.23, "grad_norm": 1.7884804010391235, "learning_rate": 1.7920321622359876e-05, "loss": 1.0523, "step": 4059 }, { "epoch": 0.23, "grad_norm": 1.8796781301498413, "learning_rate": 1.7919187452194452e-05, "loss": 0.9999, "step": 4060 }, { "epoch": 0.23, "grad_norm": 1.8021658658981323, "learning_rate": 1.791805300875999e-05, "loss": 1.0585, "step": 4061 }, { "epoch": 0.23, "grad_norm": 1.9235745668411255, "learning_rate": 1.7916918292095636e-05, "loss": 1.0027, "step": 4062 }, { "epoch": 0.23, "grad_norm": 1.882026195526123, "learning_rate": 1.7915783302240548e-05, "loss": 1.0375, "step": 4063 }, { "epoch": 0.23, "grad_norm": 1.6535778045654297, "learning_rate": 1.791464803923389e-05, "loss": 1.0345, "step": 4064 }, { "epoch": 0.23, "grad_norm": 1.1898133754730225, "learning_rate": 1.791351250311484e-05, "loss": 0.6315, "step": 4065 }, { "epoch": 0.23, "grad_norm": 1.9608352184295654, "learning_rate": 1.791237669392257e-05, "loss": 1.071, "step": 4066 }, { "epoch": 0.23, "grad_norm": 1.8725255727767944, "learning_rate": 1.791124061169629e-05, "loss": 1.0111, "step": 4067 }, { "epoch": 0.23, "grad_norm": 1.2051154375076294, "learning_rate": 1.7910104256475194e-05, "loss": 0.6134, "step": 4068 }, { "epoch": 0.23, "grad_norm": 1.8643743991851807, "learning_rate": 1.7908967628298493e-05, "loss": 1.0173, "step": 4069 }, { "epoch": 0.23, "grad_norm": 1.9550732374191284, "learning_rate": 1.7907830727205416e-05, "loss": 1.035, "step": 4070 }, { "epoch": 0.23, "grad_norm": 1.9909507036209106, "learning_rate": 1.7906693553235192e-05, "loss": 1.0533, "step": 4071 }, { "epoch": 0.23, "grad_norm": 1.9447014331817627, "learning_rate": 1.7905556106427054e-05, "loss": 0.9918, "step": 4072 }, { "epoch": 0.23, "grad_norm": 1.7819938659667969, "learning_rate": 1.7904418386820262e-05, "loss": 1.0638, "step": 4073 }, { "epoch": 0.23, "grad_norm": 2.059746265411377, "learning_rate": 1.790328039445407e-05, "loss": 1.0943, "step": 4074 }, { "epoch": 0.23, "grad_norm": 1.0561931133270264, "learning_rate": 1.790214212936775e-05, "loss": 0.5938, "step": 4075 }, { "epoch": 0.23, "grad_norm": 1.9607278108596802, "learning_rate": 1.7901003591600575e-05, "loss": 1.0749, "step": 4076 }, { "epoch": 0.23, "grad_norm": 2.0960516929626465, "learning_rate": 1.7899864781191842e-05, "loss": 1.0295, "step": 4077 }, { "epoch": 0.23, "grad_norm": 1.9445511102676392, "learning_rate": 1.789872569818084e-05, "loss": 1.0216, "step": 4078 }, { "epoch": 0.23, "grad_norm": 1.8433399200439453, "learning_rate": 1.7897586342606875e-05, "loss": 1.0463, "step": 4079 }, { "epoch": 0.23, "grad_norm": 2.2118213176727295, "learning_rate": 1.7896446714509272e-05, "loss": 1.0345, "step": 4080 }, { "epoch": 0.23, "grad_norm": 1.7436401844024658, "learning_rate": 1.789530681392735e-05, "loss": 1.0291, "step": 4081 }, { "epoch": 0.23, "grad_norm": 1.8584575653076172, "learning_rate": 1.789416664090044e-05, "loss": 1.0291, "step": 4082 }, { "epoch": 0.23, "grad_norm": 1.7580806016921997, "learning_rate": 1.7893026195467897e-05, "loss": 1.0553, "step": 4083 }, { "epoch": 0.23, "grad_norm": 1.833535075187683, "learning_rate": 1.7891885477669065e-05, "loss": 0.9951, "step": 4084 }, { "epoch": 0.23, "grad_norm": 2.125997543334961, "learning_rate": 1.789074448754331e-05, "loss": 1.0531, "step": 4085 }, { "epoch": 0.23, "grad_norm": 1.6811869144439697, "learning_rate": 1.7889603225130004e-05, "loss": 1.0068, "step": 4086 }, { "epoch": 0.23, "grad_norm": 1.740148663520813, "learning_rate": 1.788846169046853e-05, "loss": 1.0026, "step": 4087 }, { "epoch": 0.23, "grad_norm": 2.566307783126831, "learning_rate": 1.7887319883598278e-05, "loss": 0.9683, "step": 4088 }, { "epoch": 0.23, "grad_norm": 1.9611505270004272, "learning_rate": 1.788617780455865e-05, "loss": 0.977, "step": 4089 }, { "epoch": 0.23, "grad_norm": 1.9645882844924927, "learning_rate": 1.7885035453389057e-05, "loss": 1.0087, "step": 4090 }, { "epoch": 0.23, "grad_norm": 1.1712177991867065, "learning_rate": 1.7883892830128915e-05, "loss": 0.6679, "step": 4091 }, { "epoch": 0.23, "grad_norm": 1.8754403591156006, "learning_rate": 1.7882749934817654e-05, "loss": 0.9969, "step": 4092 }, { "epoch": 0.23, "grad_norm": 2.131348133087158, "learning_rate": 1.7881606767494712e-05, "loss": 1.0345, "step": 4093 }, { "epoch": 0.23, "grad_norm": 2.49139142036438, "learning_rate": 1.788046332819954e-05, "loss": 1.0746, "step": 4094 }, { "epoch": 0.23, "grad_norm": 1.1379097700119019, "learning_rate": 1.7879319616971584e-05, "loss": 0.5899, "step": 4095 }, { "epoch": 0.23, "grad_norm": 1.898483395576477, "learning_rate": 1.7878175633850326e-05, "loss": 1.0469, "step": 4096 }, { "epoch": 0.23, "grad_norm": 1.8450311422348022, "learning_rate": 1.787703137887523e-05, "loss": 0.9974, "step": 4097 }, { "epoch": 0.24, "grad_norm": 1.8753880262374878, "learning_rate": 1.7875886852085785e-05, "loss": 1.028, "step": 4098 }, { "epoch": 0.24, "grad_norm": 2.1251187324523926, "learning_rate": 1.7874742053521483e-05, "loss": 1.0544, "step": 4099 }, { "epoch": 0.24, "grad_norm": 1.8532066345214844, "learning_rate": 1.7873596983221832e-05, "loss": 1.0042, "step": 4100 }, { "epoch": 0.24, "grad_norm": 1.6483250856399536, "learning_rate": 1.7872451641226345e-05, "loss": 1.0167, "step": 4101 }, { "epoch": 0.24, "grad_norm": 2.075258493423462, "learning_rate": 1.7871306027574544e-05, "loss": 1.0776, "step": 4102 }, { "epoch": 0.24, "grad_norm": 1.879873514175415, "learning_rate": 1.7870160142305954e-05, "loss": 1.1043, "step": 4103 }, { "epoch": 0.24, "grad_norm": 1.802793264389038, "learning_rate": 1.7869013985460123e-05, "loss": 1.0716, "step": 4104 }, { "epoch": 0.24, "grad_norm": 1.625813364982605, "learning_rate": 1.7867867557076604e-05, "loss": 0.9365, "step": 4105 }, { "epoch": 0.24, "grad_norm": 1.8565022945404053, "learning_rate": 1.786672085719495e-05, "loss": 1.0765, "step": 4106 }, { "epoch": 0.24, "grad_norm": 2.4801981449127197, "learning_rate": 1.7865573885854737e-05, "loss": 0.9989, "step": 4107 }, { "epoch": 0.24, "grad_norm": 2.040534257888794, "learning_rate": 1.7864426643095537e-05, "loss": 1.0585, "step": 4108 }, { "epoch": 0.24, "grad_norm": 2.120488405227661, "learning_rate": 1.7863279128956946e-05, "loss": 1.0781, "step": 4109 }, { "epoch": 0.24, "grad_norm": 1.8138446807861328, "learning_rate": 1.7862131343478556e-05, "loss": 1.039, "step": 4110 }, { "epoch": 0.24, "grad_norm": 1.99754798412323, "learning_rate": 1.7860983286699976e-05, "loss": 1.0417, "step": 4111 }, { "epoch": 0.24, "grad_norm": 1.8607620000839233, "learning_rate": 1.785983495866082e-05, "loss": 1.0624, "step": 4112 }, { "epoch": 0.24, "grad_norm": 1.9502263069152832, "learning_rate": 1.7858686359400715e-05, "loss": 0.9974, "step": 4113 }, { "epoch": 0.24, "grad_norm": 1.8580212593078613, "learning_rate": 1.7857537488959297e-05, "loss": 1.1063, "step": 4114 }, { "epoch": 0.24, "grad_norm": 2.0122132301330566, "learning_rate": 1.785638834737621e-05, "loss": 1.0533, "step": 4115 }, { "epoch": 0.24, "grad_norm": 2.0008320808410645, "learning_rate": 1.785523893469111e-05, "loss": 1.0244, "step": 4116 }, { "epoch": 0.24, "grad_norm": 1.8050484657287598, "learning_rate": 1.785408925094365e-05, "loss": 1.0728, "step": 4117 }, { "epoch": 0.24, "grad_norm": 1.7926274538040161, "learning_rate": 1.7852939296173516e-05, "loss": 1.0203, "step": 4118 }, { "epoch": 0.24, "grad_norm": 1.9154744148254395, "learning_rate": 1.785178907042038e-05, "loss": 0.9789, "step": 4119 }, { "epoch": 0.24, "grad_norm": 1.8084694147109985, "learning_rate": 1.7850638573723932e-05, "loss": 1.0608, "step": 4120 }, { "epoch": 0.24, "grad_norm": 1.920157551765442, "learning_rate": 1.7849487806123885e-05, "loss": 1.0513, "step": 4121 }, { "epoch": 0.24, "grad_norm": 1.8571562767028809, "learning_rate": 1.7848336767659934e-05, "loss": 1.0582, "step": 4122 }, { "epoch": 0.24, "grad_norm": 1.7227673530578613, "learning_rate": 1.7847185458371808e-05, "loss": 0.9684, "step": 4123 }, { "epoch": 0.24, "grad_norm": 1.8197344541549683, "learning_rate": 1.7846033878299232e-05, "loss": 1.0166, "step": 4124 }, { "epoch": 0.24, "grad_norm": 1.9192757606506348, "learning_rate": 1.7844882027481943e-05, "loss": 1.0545, "step": 4125 }, { "epoch": 0.24, "grad_norm": 1.9091987609863281, "learning_rate": 1.7843729905959687e-05, "loss": 1.0706, "step": 4126 }, { "epoch": 0.24, "grad_norm": 2.9407949447631836, "learning_rate": 1.7842577513772227e-05, "loss": 1.1138, "step": 4127 }, { "epoch": 0.24, "grad_norm": 1.8984144926071167, "learning_rate": 1.784142485095932e-05, "loss": 0.9695, "step": 4128 }, { "epoch": 0.24, "grad_norm": 1.867648959159851, "learning_rate": 1.784027191756075e-05, "loss": 1.081, "step": 4129 }, { "epoch": 0.24, "grad_norm": 2.133528709411621, "learning_rate": 1.7839118713616296e-05, "loss": 1.1055, "step": 4130 }, { "epoch": 0.24, "grad_norm": 1.9219939708709717, "learning_rate": 1.7837965239165748e-05, "loss": 1.0505, "step": 4131 }, { "epoch": 0.24, "grad_norm": 1.851528286933899, "learning_rate": 1.7836811494248917e-05, "loss": 1.0243, "step": 4132 }, { "epoch": 0.24, "grad_norm": 1.9875849485397339, "learning_rate": 1.7835657478905613e-05, "loss": 0.9717, "step": 4133 }, { "epoch": 0.24, "grad_norm": 1.8717445135116577, "learning_rate": 1.783450319317566e-05, "loss": 1.1689, "step": 4134 }, { "epoch": 0.24, "grad_norm": 1.8347394466400146, "learning_rate": 1.7833348637098883e-05, "loss": 0.9974, "step": 4135 }, { "epoch": 0.24, "grad_norm": 2.0256121158599854, "learning_rate": 1.7832193810715125e-05, "loss": 1.0943, "step": 4136 }, { "epoch": 0.24, "grad_norm": 1.8869954347610474, "learning_rate": 1.783103871406424e-05, "loss": 1.1003, "step": 4137 }, { "epoch": 0.24, "grad_norm": 1.6921499967575073, "learning_rate": 1.7829883347186086e-05, "loss": 0.9748, "step": 4138 }, { "epoch": 0.24, "grad_norm": 1.7227833271026611, "learning_rate": 1.782872771012053e-05, "loss": 1.104, "step": 4139 }, { "epoch": 0.24, "grad_norm": 1.9167836904525757, "learning_rate": 1.7827571802907443e-05, "loss": 0.9952, "step": 4140 }, { "epoch": 0.24, "grad_norm": 1.9519836902618408, "learning_rate": 1.782641562558672e-05, "loss": 1.0962, "step": 4141 }, { "epoch": 0.24, "grad_norm": 1.9016164541244507, "learning_rate": 1.7825259178198258e-05, "loss": 1.0461, "step": 4142 }, { "epoch": 0.24, "grad_norm": 1.8856735229492188, "learning_rate": 1.7824102460781962e-05, "loss": 1.0639, "step": 4143 }, { "epoch": 0.24, "grad_norm": 1.9405943155288696, "learning_rate": 1.7822945473377744e-05, "loss": 1.0286, "step": 4144 }, { "epoch": 0.24, "grad_norm": 1.7457057237625122, "learning_rate": 1.782178821602553e-05, "loss": 1.0439, "step": 4145 }, { "epoch": 0.24, "grad_norm": 1.9829154014587402, "learning_rate": 1.7820630688765253e-05, "loss": 1.0167, "step": 4146 }, { "epoch": 0.24, "grad_norm": 1.9646515846252441, "learning_rate": 1.7819472891636863e-05, "loss": 1.0232, "step": 4147 }, { "epoch": 0.24, "grad_norm": 1.9647910594940186, "learning_rate": 1.78183148246803e-05, "loss": 1.0514, "step": 4148 }, { "epoch": 0.24, "grad_norm": 1.876084327697754, "learning_rate": 1.7817156487935534e-05, "loss": 1.025, "step": 4149 }, { "epoch": 0.24, "grad_norm": 1.8375605344772339, "learning_rate": 1.781599788144253e-05, "loss": 1.037, "step": 4150 }, { "epoch": 0.24, "grad_norm": 1.8500174283981323, "learning_rate": 1.781483900524128e-05, "loss": 1.1388, "step": 4151 }, { "epoch": 0.24, "grad_norm": 1.7848176956176758, "learning_rate": 1.781367985937176e-05, "loss": 1.0623, "step": 4152 }, { "epoch": 0.24, "grad_norm": 2.0311336517333984, "learning_rate": 1.7812520443873976e-05, "loss": 1.0831, "step": 4153 }, { "epoch": 0.24, "grad_norm": 1.8714094161987305, "learning_rate": 1.7811360758787938e-05, "loss": 0.9857, "step": 4154 }, { "epoch": 0.24, "grad_norm": 1.746452808380127, "learning_rate": 1.7810200804153657e-05, "loss": 1.0028, "step": 4155 }, { "epoch": 0.24, "grad_norm": 1.739323377609253, "learning_rate": 1.780904058001116e-05, "loss": 0.9855, "step": 4156 }, { "epoch": 0.24, "grad_norm": 1.8690398931503296, "learning_rate": 1.7807880086400496e-05, "loss": 1.0177, "step": 4157 }, { "epoch": 0.24, "grad_norm": 1.8942416906356812, "learning_rate": 1.780671932336169e-05, "loss": 1.0529, "step": 4158 }, { "epoch": 0.24, "grad_norm": 1.6740044355392456, "learning_rate": 1.7805558290934814e-05, "loss": 0.9964, "step": 4159 }, { "epoch": 0.24, "grad_norm": 1.9355802536010742, "learning_rate": 1.7804396989159923e-05, "loss": 1.0431, "step": 4160 }, { "epoch": 0.24, "grad_norm": 1.9288440942764282, "learning_rate": 1.7803235418077094e-05, "loss": 1.0848, "step": 4161 }, { "epoch": 0.24, "grad_norm": 1.8997458219528198, "learning_rate": 1.7802073577726407e-05, "loss": 1.0653, "step": 4162 }, { "epoch": 0.24, "grad_norm": 1.9431718587875366, "learning_rate": 1.7800911468147955e-05, "loss": 1.0946, "step": 4163 }, { "epoch": 0.24, "grad_norm": 1.852612853050232, "learning_rate": 1.7799749089381843e-05, "loss": 1.0666, "step": 4164 }, { "epoch": 0.24, "grad_norm": 1.7472355365753174, "learning_rate": 1.7798586441468172e-05, "loss": 1.0431, "step": 4165 }, { "epoch": 0.24, "grad_norm": 1.7824941873550415, "learning_rate": 1.779742352444707e-05, "loss": 1.0488, "step": 4166 }, { "epoch": 0.24, "grad_norm": 2.048429489135742, "learning_rate": 1.7796260338358663e-05, "loss": 1.1056, "step": 4167 }, { "epoch": 0.24, "grad_norm": 1.182837963104248, "learning_rate": 1.7795096883243088e-05, "loss": 0.6198, "step": 4168 }, { "epoch": 0.24, "grad_norm": 1.9492744207382202, "learning_rate": 1.7793933159140495e-05, "loss": 1.0678, "step": 4169 }, { "epoch": 0.24, "grad_norm": 1.8401384353637695, "learning_rate": 1.779276916609104e-05, "loss": 1.1216, "step": 4170 }, { "epoch": 0.24, "grad_norm": 1.8888349533081055, "learning_rate": 1.7791604904134893e-05, "loss": 1.0545, "step": 4171 }, { "epoch": 0.24, "grad_norm": 1.8480182886123657, "learning_rate": 1.7790440373312222e-05, "loss": 1.0774, "step": 4172 }, { "epoch": 0.24, "grad_norm": 1.7159712314605713, "learning_rate": 1.7789275573663215e-05, "loss": 0.9273, "step": 4173 }, { "epoch": 0.24, "grad_norm": 1.9522992372512817, "learning_rate": 1.7788110505228072e-05, "loss": 1.1068, "step": 4174 }, { "epoch": 0.24, "grad_norm": 1.8960472345352173, "learning_rate": 1.7786945168046983e-05, "loss": 1.1205, "step": 4175 }, { "epoch": 0.24, "grad_norm": 1.9161450862884521, "learning_rate": 1.7785779562160176e-05, "loss": 1.0631, "step": 4176 }, { "epoch": 0.24, "grad_norm": 1.8746070861816406, "learning_rate": 1.778461368760786e-05, "loss": 1.1112, "step": 4177 }, { "epoch": 0.24, "grad_norm": 1.6447917222976685, "learning_rate": 1.778344754443027e-05, "loss": 1.05, "step": 4178 }, { "epoch": 0.24, "grad_norm": 2.0296075344085693, "learning_rate": 1.778228113266765e-05, "loss": 1.1127, "step": 4179 }, { "epoch": 0.24, "grad_norm": 1.9357746839523315, "learning_rate": 1.7781114452360246e-05, "loss": 1.0803, "step": 4180 }, { "epoch": 0.24, "grad_norm": 1.743965744972229, "learning_rate": 1.7779947503548318e-05, "loss": 1.0436, "step": 4181 }, { "epoch": 0.24, "grad_norm": 1.9591288566589355, "learning_rate": 1.7778780286272134e-05, "loss": 1.0628, "step": 4182 }, { "epoch": 0.24, "grad_norm": 2.173356771469116, "learning_rate": 1.777761280057197e-05, "loss": 1.0876, "step": 4183 }, { "epoch": 0.24, "grad_norm": 1.734431266784668, "learning_rate": 1.7776445046488117e-05, "loss": 1.024, "step": 4184 }, { "epoch": 0.24, "grad_norm": 1.8294124603271484, "learning_rate": 1.7775277024060868e-05, "loss": 0.9752, "step": 4185 }, { "epoch": 0.24, "grad_norm": 1.8191791772842407, "learning_rate": 1.777410873333053e-05, "loss": 1.0552, "step": 4186 }, { "epoch": 0.24, "grad_norm": 1.8683364391326904, "learning_rate": 1.7772940174337412e-05, "loss": 1.0618, "step": 4187 }, { "epoch": 0.24, "grad_norm": 1.780064344406128, "learning_rate": 1.7771771347121842e-05, "loss": 1.0393, "step": 4188 }, { "epoch": 0.24, "grad_norm": 1.722326636314392, "learning_rate": 1.7770602251724153e-05, "loss": 1.003, "step": 4189 }, { "epoch": 0.24, "grad_norm": 1.7124961614608765, "learning_rate": 1.7769432888184685e-05, "loss": 1.0586, "step": 4190 }, { "epoch": 0.24, "grad_norm": 1.8572055101394653, "learning_rate": 1.7768263256543795e-05, "loss": 1.0152, "step": 4191 }, { "epoch": 0.24, "grad_norm": 1.7632818222045898, "learning_rate": 1.7767093356841837e-05, "loss": 1.0479, "step": 4192 }, { "epoch": 0.24, "grad_norm": 1.939591646194458, "learning_rate": 1.7765923189119182e-05, "loss": 1.0995, "step": 4193 }, { "epoch": 0.24, "grad_norm": 1.8527276515960693, "learning_rate": 1.776475275341621e-05, "loss": 1.0232, "step": 4194 }, { "epoch": 0.24, "grad_norm": 1.7614784240722656, "learning_rate": 1.7763582049773317e-05, "loss": 1.0405, "step": 4195 }, { "epoch": 0.24, "grad_norm": 1.7259291410446167, "learning_rate": 1.776241107823089e-05, "loss": 1.0399, "step": 4196 }, { "epoch": 0.24, "grad_norm": 1.925403356552124, "learning_rate": 1.776123983882934e-05, "loss": 1.0503, "step": 4197 }, { "epoch": 0.24, "grad_norm": 1.7659294605255127, "learning_rate": 1.7760068331609084e-05, "loss": 1.079, "step": 4198 }, { "epoch": 0.24, "grad_norm": 1.9547407627105713, "learning_rate": 1.7758896556610547e-05, "loss": 1.0364, "step": 4199 }, { "epoch": 0.24, "grad_norm": 1.1354563236236572, "learning_rate": 1.7757724513874164e-05, "loss": 0.6551, "step": 4200 }, { "epoch": 0.24, "grad_norm": 1.8680031299591064, "learning_rate": 1.7756552203440377e-05, "loss": 1.0394, "step": 4201 }, { "epoch": 0.24, "grad_norm": 1.780556082725525, "learning_rate": 1.775537962534964e-05, "loss": 0.9578, "step": 4202 }, { "epoch": 0.24, "grad_norm": 2.08740496635437, "learning_rate": 1.7754206779642416e-05, "loss": 1.015, "step": 4203 }, { "epoch": 0.24, "grad_norm": 1.9441726207733154, "learning_rate": 1.7753033666359178e-05, "loss": 1.0404, "step": 4204 }, { "epoch": 0.24, "grad_norm": 1.9989814758300781, "learning_rate": 1.7751860285540406e-05, "loss": 0.9956, "step": 4205 }, { "epoch": 0.24, "grad_norm": 1.9172426462173462, "learning_rate": 1.7750686637226587e-05, "loss": 1.0918, "step": 4206 }, { "epoch": 0.24, "grad_norm": 1.8495709896087646, "learning_rate": 1.7749512721458225e-05, "loss": 0.9887, "step": 4207 }, { "epoch": 0.24, "grad_norm": 1.7149168252944946, "learning_rate": 1.7748338538275826e-05, "loss": 1.0178, "step": 4208 }, { "epoch": 0.24, "grad_norm": 2.0932137966156006, "learning_rate": 1.7747164087719908e-05, "loss": 1.0206, "step": 4209 }, { "epoch": 0.24, "grad_norm": 2.12624192237854, "learning_rate": 1.7745989369831e-05, "loss": 1.077, "step": 4210 }, { "epoch": 0.24, "grad_norm": 1.929797649383545, "learning_rate": 1.7744814384649633e-05, "loss": 1.0638, "step": 4211 }, { "epoch": 0.24, "grad_norm": 1.8133859634399414, "learning_rate": 1.7743639132216355e-05, "loss": 0.9956, "step": 4212 }, { "epoch": 0.24, "grad_norm": 1.9237077236175537, "learning_rate": 1.7742463612571724e-05, "loss": 1.0771, "step": 4213 }, { "epoch": 0.24, "grad_norm": 2.0847067832946777, "learning_rate": 1.7741287825756303e-05, "loss": 1.1072, "step": 4214 }, { "epoch": 0.24, "grad_norm": 1.7332103252410889, "learning_rate": 1.774011177181066e-05, "loss": 0.9776, "step": 4215 }, { "epoch": 0.24, "grad_norm": 1.8185864686965942, "learning_rate": 1.773893545077538e-05, "loss": 1.0761, "step": 4216 }, { "epoch": 0.24, "grad_norm": 1.8146651983261108, "learning_rate": 1.773775886269106e-05, "loss": 0.9719, "step": 4217 }, { "epoch": 0.24, "grad_norm": 1.1219741106033325, "learning_rate": 1.7736582007598295e-05, "loss": 0.6138, "step": 4218 }, { "epoch": 0.24, "grad_norm": 1.7386362552642822, "learning_rate": 1.7735404885537693e-05, "loss": 1.055, "step": 4219 }, { "epoch": 0.24, "grad_norm": 1.7388397455215454, "learning_rate": 1.773422749654988e-05, "loss": 1.0367, "step": 4220 }, { "epoch": 0.24, "grad_norm": 1.9700113534927368, "learning_rate": 1.773304984067548e-05, "loss": 1.0607, "step": 4221 }, { "epoch": 0.24, "grad_norm": 1.8276171684265137, "learning_rate": 1.773187191795513e-05, "loss": 1.0387, "step": 4222 }, { "epoch": 0.24, "grad_norm": 1.882305383682251, "learning_rate": 1.773069372842948e-05, "loss": 1.0452, "step": 4223 }, { "epoch": 0.24, "grad_norm": 1.8763865232467651, "learning_rate": 1.7729515272139185e-05, "loss": 1.0268, "step": 4224 }, { "epoch": 0.24, "grad_norm": 1.6875226497650146, "learning_rate": 1.7728336549124907e-05, "loss": 0.9949, "step": 4225 }, { "epoch": 0.24, "grad_norm": 1.9628195762634277, "learning_rate": 1.7727157559427322e-05, "loss": 1.0455, "step": 4226 }, { "epoch": 0.24, "grad_norm": 1.7888374328613281, "learning_rate": 1.7725978303087117e-05, "loss": 0.9878, "step": 4227 }, { "epoch": 0.24, "grad_norm": 1.9839298725128174, "learning_rate": 1.7724798780144983e-05, "loss": 0.9171, "step": 4228 }, { "epoch": 0.24, "grad_norm": 1.847846269607544, "learning_rate": 1.772361899064162e-05, "loss": 1.0838, "step": 4229 }, { "epoch": 0.24, "grad_norm": 1.6741400957107544, "learning_rate": 1.7722438934617742e-05, "loss": 0.9886, "step": 4230 }, { "epoch": 0.24, "grad_norm": 1.8983056545257568, "learning_rate": 1.7721258612114066e-05, "loss": 1.0238, "step": 4231 }, { "epoch": 0.24, "grad_norm": 1.8268014192581177, "learning_rate": 1.7720078023171325e-05, "loss": 1.0343, "step": 4232 }, { "epoch": 0.24, "grad_norm": 1.8222181797027588, "learning_rate": 1.7718897167830257e-05, "loss": 1.0102, "step": 4233 }, { "epoch": 0.24, "grad_norm": 2.109616994857788, "learning_rate": 1.771771604613161e-05, "loss": 1.0471, "step": 4234 }, { "epoch": 0.24, "grad_norm": 1.9435971975326538, "learning_rate": 1.7716534658116135e-05, "loss": 1.0223, "step": 4235 }, { "epoch": 0.24, "grad_norm": 1.8442895412445068, "learning_rate": 1.7715353003824613e-05, "loss": 1.0654, "step": 4236 }, { "epoch": 0.24, "grad_norm": 1.049138069152832, "learning_rate": 1.7714171083297804e-05, "loss": 0.5889, "step": 4237 }, { "epoch": 0.24, "grad_norm": 1.034022331237793, "learning_rate": 1.7712988896576503e-05, "loss": 0.5975, "step": 4238 }, { "epoch": 0.24, "grad_norm": 2.1796789169311523, "learning_rate": 1.77118064437015e-05, "loss": 1.071, "step": 4239 }, { "epoch": 0.24, "grad_norm": 2.094520092010498, "learning_rate": 1.77106237247136e-05, "loss": 1.0393, "step": 4240 }, { "epoch": 0.24, "grad_norm": 2.105039358139038, "learning_rate": 1.770944073965361e-05, "loss": 1.0383, "step": 4241 }, { "epoch": 0.24, "grad_norm": 1.888999342918396, "learning_rate": 1.770825748856236e-05, "loss": 1.1102, "step": 4242 }, { "epoch": 0.24, "grad_norm": 1.8211272954940796, "learning_rate": 1.7707073971480676e-05, "loss": 1.1087, "step": 4243 }, { "epoch": 0.24, "grad_norm": 1.8571031093597412, "learning_rate": 1.7705890188449396e-05, "loss": 0.996, "step": 4244 }, { "epoch": 0.24, "grad_norm": 1.8587195873260498, "learning_rate": 1.7704706139509372e-05, "loss": 1.1054, "step": 4245 }, { "epoch": 0.24, "grad_norm": 1.9014601707458496, "learning_rate": 1.770352182470146e-05, "loss": 1.0158, "step": 4246 }, { "epoch": 0.24, "grad_norm": 1.7314354181289673, "learning_rate": 1.770233724406653e-05, "loss": 1.0906, "step": 4247 }, { "epoch": 0.24, "grad_norm": 1.8266500234603882, "learning_rate": 1.770115239764546e-05, "loss": 1.0105, "step": 4248 }, { "epoch": 0.24, "grad_norm": 1.9728732109069824, "learning_rate": 1.7699967285479126e-05, "loss": 1.0199, "step": 4249 }, { "epoch": 0.24, "grad_norm": 1.8988287448883057, "learning_rate": 1.7698781907608436e-05, "loss": 0.9853, "step": 4250 }, { "epoch": 0.24, "grad_norm": 2.057539939880371, "learning_rate": 1.7697596264074285e-05, "loss": 1.0625, "step": 4251 }, { "epoch": 0.24, "grad_norm": 1.7484532594680786, "learning_rate": 1.769641035491759e-05, "loss": 1.0259, "step": 4252 }, { "epoch": 0.24, "grad_norm": 1.7445478439331055, "learning_rate": 1.7695224180179275e-05, "loss": 1.0879, "step": 4253 }, { "epoch": 0.24, "grad_norm": 1.7427477836608887, "learning_rate": 1.7694037739900266e-05, "loss": 1.0591, "step": 4254 }, { "epoch": 0.24, "grad_norm": 1.768796443939209, "learning_rate": 1.7692851034121507e-05, "loss": 1.0464, "step": 4255 }, { "epoch": 0.24, "grad_norm": 1.9365317821502686, "learning_rate": 1.769166406288395e-05, "loss": 1.0303, "step": 4256 }, { "epoch": 0.24, "grad_norm": 1.9032156467437744, "learning_rate": 1.7690476826228555e-05, "loss": 1.0552, "step": 4257 }, { "epoch": 0.24, "grad_norm": 1.9427025318145752, "learning_rate": 1.768928932419628e-05, "loss": 1.0931, "step": 4258 }, { "epoch": 0.24, "grad_norm": 1.8406506776809692, "learning_rate": 1.7688101556828113e-05, "loss": 1.0801, "step": 4259 }, { "epoch": 0.24, "grad_norm": 2.049480676651001, "learning_rate": 1.7686913524165035e-05, "loss": 1.1148, "step": 4260 }, { "epoch": 0.24, "grad_norm": 1.7732404470443726, "learning_rate": 1.7685725226248047e-05, "loss": 1.0077, "step": 4261 }, { "epoch": 0.24, "grad_norm": 1.9690828323364258, "learning_rate": 1.7684536663118152e-05, "loss": 1.0852, "step": 4262 }, { "epoch": 0.24, "grad_norm": 1.9237252473831177, "learning_rate": 1.768334783481636e-05, "loss": 0.989, "step": 4263 }, { "epoch": 0.24, "grad_norm": 1.8147753477096558, "learning_rate": 1.7682158741383697e-05, "loss": 0.9964, "step": 4264 }, { "epoch": 0.24, "grad_norm": 1.8905417919158936, "learning_rate": 1.7680969382861194e-05, "loss": 1.0317, "step": 4265 }, { "epoch": 0.24, "grad_norm": 1.7306811809539795, "learning_rate": 1.7679779759289894e-05, "loss": 1.1156, "step": 4266 }, { "epoch": 0.24, "grad_norm": 1.8386883735656738, "learning_rate": 1.767858987071085e-05, "loss": 1.087, "step": 4267 }, { "epoch": 0.24, "grad_norm": 1.775655746459961, "learning_rate": 1.7677399717165116e-05, "loss": 1.0489, "step": 4268 }, { "epoch": 0.24, "grad_norm": 1.9242711067199707, "learning_rate": 1.7676209298693765e-05, "loss": 1.0338, "step": 4269 }, { "epoch": 0.24, "grad_norm": 1.301798939704895, "learning_rate": 1.7675018615337874e-05, "loss": 0.6448, "step": 4270 }, { "epoch": 0.24, "grad_norm": 1.8412302732467651, "learning_rate": 1.767382766713853e-05, "loss": 1.0277, "step": 4271 }, { "epoch": 0.25, "grad_norm": 1.9537311792373657, "learning_rate": 1.7672636454136826e-05, "loss": 1.0361, "step": 4272 }, { "epoch": 0.25, "grad_norm": 2.0520377159118652, "learning_rate": 1.7671444976373874e-05, "loss": 0.9908, "step": 4273 }, { "epoch": 0.25, "grad_norm": 1.8259596824645996, "learning_rate": 1.767025323389078e-05, "loss": 1.0774, "step": 4274 }, { "epoch": 0.25, "grad_norm": 1.840742588043213, "learning_rate": 1.766906122672868e-05, "loss": 0.993, "step": 4275 }, { "epoch": 0.25, "grad_norm": 2.103976249694824, "learning_rate": 1.7667868954928695e-05, "loss": 1.0403, "step": 4276 }, { "epoch": 0.25, "grad_norm": 1.9355628490447998, "learning_rate": 1.7666676418531975e-05, "loss": 1.0885, "step": 4277 }, { "epoch": 0.25, "grad_norm": 1.779179334640503, "learning_rate": 1.7665483617579666e-05, "loss": 1.1026, "step": 4278 }, { "epoch": 0.25, "grad_norm": 1.8350656032562256, "learning_rate": 1.766429055211293e-05, "loss": 1.0463, "step": 4279 }, { "epoch": 0.25, "grad_norm": 1.8098876476287842, "learning_rate": 1.7663097222172936e-05, "loss": 1.1038, "step": 4280 }, { "epoch": 0.25, "grad_norm": 1.8046627044677734, "learning_rate": 1.7661903627800864e-05, "loss": 1.0161, "step": 4281 }, { "epoch": 0.25, "grad_norm": 1.1524578332901, "learning_rate": 1.76607097690379e-05, "loss": 0.6045, "step": 4282 }, { "epoch": 0.25, "grad_norm": 1.9195706844329834, "learning_rate": 1.7659515645925242e-05, "loss": 1.0875, "step": 4283 }, { "epoch": 0.25, "grad_norm": 1.8675733804702759, "learning_rate": 1.7658321258504092e-05, "loss": 1.0441, "step": 4284 }, { "epoch": 0.25, "grad_norm": 1.755473017692566, "learning_rate": 1.7657126606815672e-05, "loss": 1.064, "step": 4285 }, { "epoch": 0.25, "grad_norm": 2.0828771591186523, "learning_rate": 1.7655931690901197e-05, "loss": 1.0681, "step": 4286 }, { "epoch": 0.25, "grad_norm": 1.719041109085083, "learning_rate": 1.765473651080191e-05, "loss": 1.0533, "step": 4287 }, { "epoch": 0.25, "grad_norm": 1.7719743251800537, "learning_rate": 1.7653541066559044e-05, "loss": 0.9605, "step": 4288 }, { "epoch": 0.25, "grad_norm": 1.6929140090942383, "learning_rate": 1.765234535821386e-05, "loss": 1.0061, "step": 4289 }, { "epoch": 0.25, "grad_norm": 2.009722948074341, "learning_rate": 1.7651149385807612e-05, "loss": 1.0627, "step": 4290 }, { "epoch": 0.25, "grad_norm": 1.9348431825637817, "learning_rate": 1.7649953149381572e-05, "loss": 1.184, "step": 4291 }, { "epoch": 0.25, "grad_norm": 1.9220441579818726, "learning_rate": 1.764875664897702e-05, "loss": 1.1498, "step": 4292 }, { "epoch": 0.25, "grad_norm": 1.8753741979599, "learning_rate": 1.7647559884635238e-05, "loss": 1.0068, "step": 4293 }, { "epoch": 0.25, "grad_norm": 1.0409554243087769, "learning_rate": 1.7646362856397527e-05, "loss": 0.5888, "step": 4294 }, { "epoch": 0.25, "grad_norm": 1.7593097686767578, "learning_rate": 1.7645165564305197e-05, "loss": 0.9922, "step": 4295 }, { "epoch": 0.25, "grad_norm": 1.7983962297439575, "learning_rate": 1.7643968008399553e-05, "loss": 0.9999, "step": 4296 }, { "epoch": 0.25, "grad_norm": 1.7414953708648682, "learning_rate": 1.764277018872193e-05, "loss": 0.9985, "step": 4297 }, { "epoch": 0.25, "grad_norm": 1.8488049507141113, "learning_rate": 1.7641572105313657e-05, "loss": 1.0848, "step": 4298 }, { "epoch": 0.25, "grad_norm": 2.142622709274292, "learning_rate": 1.7640373758216075e-05, "loss": 1.0983, "step": 4299 }, { "epoch": 0.25, "grad_norm": 1.771493673324585, "learning_rate": 1.7639175147470537e-05, "loss": 1.0218, "step": 4300 }, { "epoch": 0.25, "grad_norm": 1.9123984575271606, "learning_rate": 1.7637976273118405e-05, "loss": 1.0646, "step": 4301 }, { "epoch": 0.25, "grad_norm": 1.2467190027236938, "learning_rate": 1.763677713520105e-05, "loss": 0.6547, "step": 4302 }, { "epoch": 0.25, "grad_norm": 1.8806278705596924, "learning_rate": 1.7635577733759843e-05, "loss": 1.0897, "step": 4303 }, { "epoch": 0.25, "grad_norm": 1.9061013460159302, "learning_rate": 1.763437806883618e-05, "loss": 1.0713, "step": 4304 }, { "epoch": 0.25, "grad_norm": 2.0251996517181396, "learning_rate": 1.763317814047146e-05, "loss": 1.0655, "step": 4305 }, { "epoch": 0.25, "grad_norm": 2.1427624225616455, "learning_rate": 1.763197794870708e-05, "loss": 1.0701, "step": 4306 }, { "epoch": 0.25, "grad_norm": 1.7627131938934326, "learning_rate": 1.763077749358446e-05, "loss": 0.9516, "step": 4307 }, { "epoch": 0.25, "grad_norm": 2.0266106128692627, "learning_rate": 1.7629576775145026e-05, "loss": 1.0311, "step": 4308 }, { "epoch": 0.25, "grad_norm": 1.843187689781189, "learning_rate": 1.762837579343021e-05, "loss": 0.9992, "step": 4309 }, { "epoch": 0.25, "grad_norm": 1.825524091720581, "learning_rate": 1.7627174548481455e-05, "loss": 1.0868, "step": 4310 }, { "epoch": 0.25, "grad_norm": 1.2021127939224243, "learning_rate": 1.7625973040340208e-05, "loss": 0.6306, "step": 4311 }, { "epoch": 0.25, "grad_norm": 1.9457613229751587, "learning_rate": 1.7624771269047935e-05, "loss": 1.1067, "step": 4312 }, { "epoch": 0.25, "grad_norm": 2.35219144821167, "learning_rate": 1.7623569234646108e-05, "loss": 1.1091, "step": 4313 }, { "epoch": 0.25, "grad_norm": 1.8737244606018066, "learning_rate": 1.76223669371762e-05, "loss": 1.1332, "step": 4314 }, { "epoch": 0.25, "grad_norm": 1.7767356634140015, "learning_rate": 1.7621164376679697e-05, "loss": 1.0771, "step": 4315 }, { "epoch": 0.25, "grad_norm": 1.869818091392517, "learning_rate": 1.761996155319811e-05, "loss": 0.9751, "step": 4316 }, { "epoch": 0.25, "grad_norm": 1.7541027069091797, "learning_rate": 1.7618758466772928e-05, "loss": 1.0052, "step": 4317 }, { "epoch": 0.25, "grad_norm": 1.764745831489563, "learning_rate": 1.7617555117445674e-05, "loss": 0.979, "step": 4318 }, { "epoch": 0.25, "grad_norm": 1.7584518194198608, "learning_rate": 1.7616351505257873e-05, "loss": 0.9849, "step": 4319 }, { "epoch": 0.25, "grad_norm": 1.7625293731689453, "learning_rate": 1.7615147630251055e-05, "loss": 1.0115, "step": 4320 }, { "epoch": 0.25, "grad_norm": 1.8104900121688843, "learning_rate": 1.7613943492466767e-05, "loss": 1.0396, "step": 4321 }, { "epoch": 0.25, "grad_norm": 1.8959300518035889, "learning_rate": 1.7612739091946556e-05, "loss": 1.0087, "step": 4322 }, { "epoch": 0.25, "grad_norm": 1.7578295469284058, "learning_rate": 1.7611534428731986e-05, "loss": 1.0243, "step": 4323 }, { "epoch": 0.25, "grad_norm": 1.7342609167099, "learning_rate": 1.7610329502864625e-05, "loss": 1.0253, "step": 4324 }, { "epoch": 0.25, "grad_norm": 1.8796322345733643, "learning_rate": 1.7609124314386053e-05, "loss": 1.0681, "step": 4325 }, { "epoch": 0.25, "grad_norm": 2.453695297241211, "learning_rate": 1.760791886333785e-05, "loss": 1.046, "step": 4326 }, { "epoch": 0.25, "grad_norm": 1.800392508506775, "learning_rate": 1.7606713149761626e-05, "loss": 0.965, "step": 4327 }, { "epoch": 0.25, "grad_norm": 1.8954888582229614, "learning_rate": 1.760550717369897e-05, "loss": 0.9225, "step": 4328 }, { "epoch": 0.25, "grad_norm": 1.8303425312042236, "learning_rate": 1.7604300935191517e-05, "loss": 1.0918, "step": 4329 }, { "epoch": 0.25, "grad_norm": 1.7157204151153564, "learning_rate": 1.7603094434280878e-05, "loss": 1.0535, "step": 4330 }, { "epoch": 0.25, "grad_norm": 1.8541831970214844, "learning_rate": 1.7601887671008685e-05, "loss": 1.0628, "step": 4331 }, { "epoch": 0.25, "grad_norm": 1.8427479267120361, "learning_rate": 1.7600680645416583e-05, "loss": 0.9964, "step": 4332 }, { "epoch": 0.25, "grad_norm": 2.1192212104797363, "learning_rate": 1.759947335754623e-05, "loss": 0.9555, "step": 4333 }, { "epoch": 0.25, "grad_norm": 1.7982256412506104, "learning_rate": 1.759826580743927e-05, "loss": 1.0827, "step": 4334 }, { "epoch": 0.25, "grad_norm": 1.7763760089874268, "learning_rate": 1.759705799513739e-05, "loss": 0.9659, "step": 4335 }, { "epoch": 0.25, "grad_norm": 1.8327903747558594, "learning_rate": 1.7595849920682258e-05, "loss": 1.062, "step": 4336 }, { "epoch": 0.25, "grad_norm": 2.0384023189544678, "learning_rate": 1.7594641584115565e-05, "loss": 1.0663, "step": 4337 }, { "epoch": 0.25, "grad_norm": 1.988202452659607, "learning_rate": 1.7593432985479003e-05, "loss": 1.0789, "step": 4338 }, { "epoch": 0.25, "grad_norm": 1.9371157884597778, "learning_rate": 1.759222412481428e-05, "loss": 1.0223, "step": 4339 }, { "epoch": 0.25, "grad_norm": 1.844335675239563, "learning_rate": 1.759101500216311e-05, "loss": 1.0108, "step": 4340 }, { "epoch": 0.25, "grad_norm": 1.1359611749649048, "learning_rate": 1.7589805617567218e-05, "loss": 0.6675, "step": 4341 }, { "epoch": 0.25, "grad_norm": 1.9542769193649292, "learning_rate": 1.7588595971068335e-05, "loss": 1.0133, "step": 4342 }, { "epoch": 0.25, "grad_norm": 1.9332380294799805, "learning_rate": 1.7587386062708206e-05, "loss": 0.9716, "step": 4343 }, { "epoch": 0.25, "grad_norm": 1.8213160037994385, "learning_rate": 1.7586175892528574e-05, "loss": 1.0374, "step": 4344 }, { "epoch": 0.25, "grad_norm": 1.9793041944503784, "learning_rate": 1.7584965460571207e-05, "loss": 0.9901, "step": 4345 }, { "epoch": 0.25, "grad_norm": 2.0708868503570557, "learning_rate": 1.7583754766877864e-05, "loss": 0.9619, "step": 4346 }, { "epoch": 0.25, "grad_norm": 1.815346598625183, "learning_rate": 1.7582543811490334e-05, "loss": 1.047, "step": 4347 }, { "epoch": 0.25, "grad_norm": 1.1594117879867554, "learning_rate": 1.7581332594450394e-05, "loss": 0.6383, "step": 4348 }, { "epoch": 0.25, "grad_norm": 1.9160046577453613, "learning_rate": 1.7580121115799845e-05, "loss": 1.0448, "step": 4349 }, { "epoch": 0.25, "grad_norm": 1.9320781230926514, "learning_rate": 1.7578909375580494e-05, "loss": 1.1024, "step": 4350 }, { "epoch": 0.25, "grad_norm": 3.172434091567993, "learning_rate": 1.7577697373834147e-05, "loss": 0.981, "step": 4351 }, { "epoch": 0.25, "grad_norm": 1.8760137557983398, "learning_rate": 1.7576485110602634e-05, "loss": 1.0946, "step": 4352 }, { "epoch": 0.25, "grad_norm": 1.7859572172164917, "learning_rate": 1.757527258592778e-05, "loss": 1.0843, "step": 4353 }, { "epoch": 0.25, "grad_norm": 1.8375836610794067, "learning_rate": 1.7574059799851433e-05, "loss": 1.0549, "step": 4354 }, { "epoch": 0.25, "grad_norm": 2.125635862350464, "learning_rate": 1.757284675241544e-05, "loss": 1.0184, "step": 4355 }, { "epoch": 0.25, "grad_norm": 1.682332158088684, "learning_rate": 1.7571633443661658e-05, "loss": 1.0099, "step": 4356 }, { "epoch": 0.25, "grad_norm": 1.990230679512024, "learning_rate": 1.757041987363196e-05, "loss": 1.0209, "step": 4357 }, { "epoch": 0.25, "grad_norm": 1.9476579427719116, "learning_rate": 1.7569206042368213e-05, "loss": 1.0016, "step": 4358 }, { "epoch": 0.25, "grad_norm": 1.9963138103485107, "learning_rate": 1.7567991949912313e-05, "loss": 1.0659, "step": 4359 }, { "epoch": 0.25, "grad_norm": 1.6647831201553345, "learning_rate": 1.756677759630615e-05, "loss": 1.0327, "step": 4360 }, { "epoch": 0.25, "grad_norm": 1.8915528059005737, "learning_rate": 1.7565562981591628e-05, "loss": 0.9759, "step": 4361 }, { "epoch": 0.25, "grad_norm": 1.921199917793274, "learning_rate": 1.7564348105810663e-05, "loss": 1.1693, "step": 4362 }, { "epoch": 0.25, "grad_norm": 1.7560514211654663, "learning_rate": 1.7563132969005175e-05, "loss": 1.1282, "step": 4363 }, { "epoch": 0.25, "grad_norm": 1.7801347970962524, "learning_rate": 1.7561917571217094e-05, "loss": 1.0528, "step": 4364 }, { "epoch": 0.25, "grad_norm": 1.5496139526367188, "learning_rate": 1.7560701912488362e-05, "loss": 0.9204, "step": 4365 }, { "epoch": 0.25, "grad_norm": 1.9893279075622559, "learning_rate": 1.7559485992860924e-05, "loss": 0.9709, "step": 4366 }, { "epoch": 0.25, "grad_norm": 1.764581322669983, "learning_rate": 1.7558269812376746e-05, "loss": 1.0323, "step": 4367 }, { "epoch": 0.25, "grad_norm": 1.669557809829712, "learning_rate": 1.7557053371077785e-05, "loss": 0.9808, "step": 4368 }, { "epoch": 0.25, "grad_norm": 1.8658924102783203, "learning_rate": 1.7555836669006026e-05, "loss": 1.0525, "step": 4369 }, { "epoch": 0.25, "grad_norm": 1.9508601427078247, "learning_rate": 1.755461970620345e-05, "loss": 1.0479, "step": 4370 }, { "epoch": 0.25, "grad_norm": 1.8994783163070679, "learning_rate": 1.7553402482712048e-05, "loss": 1.0795, "step": 4371 }, { "epoch": 0.25, "grad_norm": 1.6652858257293701, "learning_rate": 1.7552184998573827e-05, "loss": 0.9697, "step": 4372 }, { "epoch": 0.25, "grad_norm": 2.014883518218994, "learning_rate": 1.7550967253830795e-05, "loss": 1.049, "step": 4373 }, { "epoch": 0.25, "grad_norm": 1.7456210851669312, "learning_rate": 1.7549749248524982e-05, "loss": 1.036, "step": 4374 }, { "epoch": 0.25, "grad_norm": 1.8527990579605103, "learning_rate": 1.7548530982698405e-05, "loss": 1.1032, "step": 4375 }, { "epoch": 0.25, "grad_norm": 1.1514252424240112, "learning_rate": 1.7547312456393116e-05, "loss": 0.5883, "step": 4376 }, { "epoch": 0.25, "grad_norm": 1.9269376993179321, "learning_rate": 1.7546093669651155e-05, "loss": 1.0394, "step": 4377 }, { "epoch": 0.25, "grad_norm": 1.9862536191940308, "learning_rate": 1.7544874622514577e-05, "loss": 1.0888, "step": 4378 }, { "epoch": 0.25, "grad_norm": 2.036888599395752, "learning_rate": 1.7543655315025458e-05, "loss": 1.0216, "step": 4379 }, { "epoch": 0.25, "grad_norm": 1.854652762413025, "learning_rate": 1.7542435747225858e-05, "loss": 1.0913, "step": 4380 }, { "epoch": 0.25, "grad_norm": 1.871500849723816, "learning_rate": 1.7541215919157876e-05, "loss": 1.0197, "step": 4381 }, { "epoch": 0.25, "grad_norm": 1.7790777683258057, "learning_rate": 1.7539995830863598e-05, "loss": 1.0294, "step": 4382 }, { "epoch": 0.25, "grad_norm": 1.8373584747314453, "learning_rate": 1.753877548238512e-05, "loss": 1.0527, "step": 4383 }, { "epoch": 0.25, "grad_norm": 1.931557536125183, "learning_rate": 1.7537554873764566e-05, "loss": 0.979, "step": 4384 }, { "epoch": 0.25, "grad_norm": 1.8186033964157104, "learning_rate": 1.7536334005044044e-05, "loss": 1.0621, "step": 4385 }, { "epoch": 0.25, "grad_norm": 1.7134222984313965, "learning_rate": 1.753511287626569e-05, "loss": 0.9737, "step": 4386 }, { "epoch": 0.25, "grad_norm": 1.976021409034729, "learning_rate": 1.7533891487471636e-05, "loss": 1.0808, "step": 4387 }, { "epoch": 0.25, "grad_norm": 1.96632719039917, "learning_rate": 1.7532669838704036e-05, "loss": 1.0336, "step": 4388 }, { "epoch": 0.25, "grad_norm": 1.7563728094100952, "learning_rate": 1.753144793000504e-05, "loss": 1.023, "step": 4389 }, { "epoch": 0.25, "grad_norm": 2.106621026992798, "learning_rate": 1.7530225761416814e-05, "loss": 1.0728, "step": 4390 }, { "epoch": 0.25, "grad_norm": 2.1241793632507324, "learning_rate": 1.752900333298153e-05, "loss": 1.1316, "step": 4391 }, { "epoch": 0.25, "grad_norm": 1.9995064735412598, "learning_rate": 1.752778064474137e-05, "loss": 0.9829, "step": 4392 }, { "epoch": 0.25, "grad_norm": 1.7408829927444458, "learning_rate": 1.7526557696738536e-05, "loss": 1.0173, "step": 4393 }, { "epoch": 0.25, "grad_norm": 1.857115387916565, "learning_rate": 1.7525334489015217e-05, "loss": 0.9446, "step": 4394 }, { "epoch": 0.25, "grad_norm": 1.8432306051254272, "learning_rate": 1.7524111021613625e-05, "loss": 1.0162, "step": 4395 }, { "epoch": 0.25, "grad_norm": 1.7006149291992188, "learning_rate": 1.7522887294575978e-05, "loss": 1.0203, "step": 4396 }, { "epoch": 0.25, "grad_norm": 1.9943126440048218, "learning_rate": 1.7521663307944504e-05, "loss": 1.0712, "step": 4397 }, { "epoch": 0.25, "grad_norm": 1.8605749607086182, "learning_rate": 1.7520439061761444e-05, "loss": 1.051, "step": 4398 }, { "epoch": 0.25, "grad_norm": 1.9962127208709717, "learning_rate": 1.7519214556069037e-05, "loss": 1.076, "step": 4399 }, { "epoch": 0.25, "grad_norm": 1.9175949096679688, "learning_rate": 1.751798979090954e-05, "loss": 1.0559, "step": 4400 }, { "epoch": 0.25, "grad_norm": 1.8520915508270264, "learning_rate": 1.751676476632522e-05, "loss": 0.9975, "step": 4401 }, { "epoch": 0.25, "grad_norm": 1.9299004077911377, "learning_rate": 1.751553948235834e-05, "loss": 0.9731, "step": 4402 }, { "epoch": 0.25, "grad_norm": 1.8639037609100342, "learning_rate": 1.751431393905119e-05, "loss": 1.1013, "step": 4403 }, { "epoch": 0.25, "grad_norm": 1.8737397193908691, "learning_rate": 1.7513088136446055e-05, "loss": 1.0056, "step": 4404 }, { "epoch": 0.25, "grad_norm": 1.7145204544067383, "learning_rate": 1.7511862074585232e-05, "loss": 1.0403, "step": 4405 }, { "epoch": 0.25, "grad_norm": 1.861163854598999, "learning_rate": 1.7510635753511037e-05, "loss": 0.9912, "step": 4406 }, { "epoch": 0.25, "grad_norm": 1.879435420036316, "learning_rate": 1.750940917326578e-05, "loss": 1.0508, "step": 4407 }, { "epoch": 0.25, "grad_norm": 1.9297387599945068, "learning_rate": 1.750818233389179e-05, "loss": 1.0349, "step": 4408 }, { "epoch": 0.25, "grad_norm": 1.729915738105774, "learning_rate": 1.75069552354314e-05, "loss": 0.9792, "step": 4409 }, { "epoch": 0.25, "grad_norm": 2.050724506378174, "learning_rate": 1.7505727877926957e-05, "loss": 1.003, "step": 4410 }, { "epoch": 0.25, "grad_norm": 1.9200385808944702, "learning_rate": 1.750450026142081e-05, "loss": 1.0401, "step": 4411 }, { "epoch": 0.25, "grad_norm": 1.7514429092407227, "learning_rate": 1.750327238595532e-05, "loss": 1.0196, "step": 4412 }, { "epoch": 0.25, "grad_norm": 2.0090529918670654, "learning_rate": 1.750204425157286e-05, "loss": 1.0068, "step": 4413 }, { "epoch": 0.25, "grad_norm": 1.790141224861145, "learning_rate": 1.750081585831581e-05, "loss": 0.9904, "step": 4414 }, { "epoch": 0.25, "grad_norm": 1.7692090272903442, "learning_rate": 1.7499587206226558e-05, "loss": 1.0581, "step": 4415 }, { "epoch": 0.25, "grad_norm": 1.8720014095306396, "learning_rate": 1.74983582953475e-05, "loss": 0.9857, "step": 4416 }, { "epoch": 0.25, "grad_norm": 2.079662322998047, "learning_rate": 1.7497129125721043e-05, "loss": 1.0777, "step": 4417 }, { "epoch": 0.25, "grad_norm": 1.6887396574020386, "learning_rate": 1.7495899697389602e-05, "loss": 1.1083, "step": 4418 }, { "epoch": 0.25, "grad_norm": 1.8303890228271484, "learning_rate": 1.7494670010395603e-05, "loss": 1.0502, "step": 4419 }, { "epoch": 0.25, "grad_norm": 1.8686891794204712, "learning_rate": 1.7493440064781477e-05, "loss": 1.019, "step": 4420 }, { "epoch": 0.25, "grad_norm": 1.9353564977645874, "learning_rate": 1.7492209860589665e-05, "loss": 1.0761, "step": 4421 }, { "epoch": 0.25, "grad_norm": 1.7239558696746826, "learning_rate": 1.749097939786262e-05, "loss": 0.9892, "step": 4422 }, { "epoch": 0.25, "grad_norm": 1.9947906732559204, "learning_rate": 1.74897486766428e-05, "loss": 1.0948, "step": 4423 }, { "epoch": 0.25, "grad_norm": 2.148057460784912, "learning_rate": 1.7488517696972675e-05, "loss": 1.072, "step": 4424 }, { "epoch": 0.25, "grad_norm": 1.994001865386963, "learning_rate": 1.7487286458894725e-05, "loss": 0.9893, "step": 4425 }, { "epoch": 0.25, "grad_norm": 1.927014708518982, "learning_rate": 1.7486054962451435e-05, "loss": 1.0698, "step": 4426 }, { "epoch": 0.25, "grad_norm": 1.7765557765960693, "learning_rate": 1.7484823207685298e-05, "loss": 1.0266, "step": 4427 }, { "epoch": 0.25, "grad_norm": 1.9424831867218018, "learning_rate": 1.748359119463882e-05, "loss": 1.0077, "step": 4428 }, { "epoch": 0.25, "grad_norm": 2.102088689804077, "learning_rate": 1.748235892335451e-05, "loss": 1.0057, "step": 4429 }, { "epoch": 0.25, "grad_norm": 1.9213680028915405, "learning_rate": 1.74811263938749e-05, "loss": 1.0632, "step": 4430 }, { "epoch": 0.25, "grad_norm": 1.8730134963989258, "learning_rate": 1.7479893606242517e-05, "loss": 1.0234, "step": 4431 }, { "epoch": 0.25, "grad_norm": 1.8309963941574097, "learning_rate": 1.74786605604999e-05, "loss": 0.9764, "step": 4432 }, { "epoch": 0.25, "grad_norm": 1.8340407609939575, "learning_rate": 1.747742725668959e-05, "loss": 1.0787, "step": 4433 }, { "epoch": 0.25, "grad_norm": 1.0280392169952393, "learning_rate": 1.747619369485416e-05, "loss": 0.5216, "step": 4434 }, { "epoch": 0.25, "grad_norm": 1.7461820840835571, "learning_rate": 1.7474959875036167e-05, "loss": 1.0842, "step": 4435 }, { "epoch": 0.25, "grad_norm": 2.0159056186676025, "learning_rate": 1.747372579727819e-05, "loss": 0.9791, "step": 4436 }, { "epoch": 0.25, "grad_norm": 2.0121371746063232, "learning_rate": 1.7472491461622813e-05, "loss": 1.0148, "step": 4437 }, { "epoch": 0.25, "grad_norm": 1.8012595176696777, "learning_rate": 1.7471256868112632e-05, "loss": 1.0073, "step": 4438 }, { "epoch": 0.25, "grad_norm": 1.9510135650634766, "learning_rate": 1.747002201679024e-05, "loss": 0.9764, "step": 4439 }, { "epoch": 0.25, "grad_norm": 1.7875256538391113, "learning_rate": 1.746878690769826e-05, "loss": 1.0573, "step": 4440 }, { "epoch": 0.25, "grad_norm": 1.7916224002838135, "learning_rate": 1.7467551540879303e-05, "loss": 0.9429, "step": 4441 }, { "epoch": 0.25, "grad_norm": 1.997593641281128, "learning_rate": 1.7466315916376008e-05, "loss": 1.1146, "step": 4442 }, { "epoch": 0.25, "grad_norm": 2.287393569946289, "learning_rate": 1.7465080034231002e-05, "loss": 1.0024, "step": 4443 }, { "epoch": 0.25, "grad_norm": 2.137901782989502, "learning_rate": 1.746384389448694e-05, "loss": 1.0588, "step": 4444 }, { "epoch": 0.25, "grad_norm": 1.8191730976104736, "learning_rate": 1.7462607497186473e-05, "loss": 1.0947, "step": 4445 }, { "epoch": 0.25, "grad_norm": 1.983291745185852, "learning_rate": 1.7461370842372263e-05, "loss": 1.0369, "step": 4446 }, { "epoch": 0.26, "grad_norm": 2.1294503211975098, "learning_rate": 1.7460133930086997e-05, "loss": 1.038, "step": 4447 }, { "epoch": 0.26, "grad_norm": 1.8829761743545532, "learning_rate": 1.7458896760373337e-05, "loss": 1.0484, "step": 4448 }, { "epoch": 0.26, "grad_norm": 1.7346512079238892, "learning_rate": 1.7457659333273995e-05, "loss": 1.0111, "step": 4449 }, { "epoch": 0.26, "grad_norm": 1.8734426498413086, "learning_rate": 1.7456421648831658e-05, "loss": 1.0173, "step": 4450 }, { "epoch": 0.26, "grad_norm": 1.8326090574264526, "learning_rate": 1.745518370708904e-05, "loss": 1.0303, "step": 4451 }, { "epoch": 0.26, "grad_norm": 1.7571946382522583, "learning_rate": 1.7453945508088853e-05, "loss": 1.0763, "step": 4452 }, { "epoch": 0.26, "grad_norm": 1.0093625783920288, "learning_rate": 1.7452707051873835e-05, "loss": 0.5638, "step": 4453 }, { "epoch": 0.26, "grad_norm": 2.1001510620117188, "learning_rate": 1.745146833848671e-05, "loss": 1.037, "step": 4454 }, { "epoch": 0.26, "grad_norm": 1.7653131484985352, "learning_rate": 1.745022936797023e-05, "loss": 1.1007, "step": 4455 }, { "epoch": 0.26, "grad_norm": 1.9968433380126953, "learning_rate": 1.7448990140367143e-05, "loss": 1.0205, "step": 4456 }, { "epoch": 0.26, "grad_norm": 1.86422860622406, "learning_rate": 1.7447750655720215e-05, "loss": 1.0656, "step": 4457 }, { "epoch": 0.26, "grad_norm": 1.1134369373321533, "learning_rate": 1.7446510914072217e-05, "loss": 0.6609, "step": 4458 }, { "epoch": 0.26, "grad_norm": 1.7147307395935059, "learning_rate": 1.7445270915465927e-05, "loss": 1.0442, "step": 4459 }, { "epoch": 0.26, "grad_norm": 1.755669116973877, "learning_rate": 1.7444030659944138e-05, "loss": 0.9659, "step": 4460 }, { "epoch": 0.26, "grad_norm": 1.8571805953979492, "learning_rate": 1.7442790147549644e-05, "loss": 0.9453, "step": 4461 }, { "epoch": 0.26, "grad_norm": 3.6257529258728027, "learning_rate": 1.744154937832525e-05, "loss": 1.0123, "step": 4462 }, { "epoch": 0.26, "grad_norm": 1.9559122323989868, "learning_rate": 1.744030835231378e-05, "loss": 1.0895, "step": 4463 }, { "epoch": 0.26, "grad_norm": 1.782806634902954, "learning_rate": 1.7439067069558048e-05, "loss": 1.0253, "step": 4464 }, { "epoch": 0.26, "grad_norm": 2.1015076637268066, "learning_rate": 1.7437825530100892e-05, "loss": 1.0328, "step": 4465 }, { "epoch": 0.26, "grad_norm": 1.8133772611618042, "learning_rate": 1.7436583733985154e-05, "loss": 1.029, "step": 4466 }, { "epoch": 0.26, "grad_norm": 1.7965253591537476, "learning_rate": 1.7435341681253683e-05, "loss": 1.0085, "step": 4467 }, { "epoch": 0.26, "grad_norm": 1.7258120775222778, "learning_rate": 1.7434099371949345e-05, "loss": 0.9902, "step": 4468 }, { "epoch": 0.26, "grad_norm": 1.8406016826629639, "learning_rate": 1.7432856806114998e-05, "loss": 1.0623, "step": 4469 }, { "epoch": 0.26, "grad_norm": 1.8869602680206299, "learning_rate": 1.7431613983793528e-05, "loss": 0.9949, "step": 4470 }, { "epoch": 0.26, "grad_norm": 1.639609932899475, "learning_rate": 1.743037090502782e-05, "loss": 0.9081, "step": 4471 }, { "epoch": 0.26, "grad_norm": 1.7113995552062988, "learning_rate": 1.7429127569860768e-05, "loss": 1.0229, "step": 4472 }, { "epoch": 0.26, "grad_norm": 1.7718567848205566, "learning_rate": 1.7427883978335275e-05, "loss": 1.0297, "step": 4473 }, { "epoch": 0.26, "grad_norm": 1.9718457460403442, "learning_rate": 1.7426640130494258e-05, "loss": 0.9871, "step": 4474 }, { "epoch": 0.26, "grad_norm": 1.7503424882888794, "learning_rate": 1.742539602638063e-05, "loss": 0.9506, "step": 4475 }, { "epoch": 0.26, "grad_norm": 1.717276692390442, "learning_rate": 1.742415166603733e-05, "loss": 1.081, "step": 4476 }, { "epoch": 0.26, "grad_norm": 1.9617878198623657, "learning_rate": 1.7422907049507295e-05, "loss": 0.9606, "step": 4477 }, { "epoch": 0.26, "grad_norm": 1.8451484441757202, "learning_rate": 1.742166217683347e-05, "loss": 0.9326, "step": 4478 }, { "epoch": 0.26, "grad_norm": 1.778012990951538, "learning_rate": 1.7420417048058816e-05, "loss": 1.0367, "step": 4479 }, { "epoch": 0.26, "grad_norm": 1.862652063369751, "learning_rate": 1.74191716632263e-05, "loss": 0.9743, "step": 4480 }, { "epoch": 0.26, "grad_norm": 1.6514818668365479, "learning_rate": 1.7417926022378894e-05, "loss": 1.0858, "step": 4481 }, { "epoch": 0.26, "grad_norm": 1.0770589113235474, "learning_rate": 1.741668012555958e-05, "loss": 0.6152, "step": 4482 }, { "epoch": 0.26, "grad_norm": 1.7427926063537598, "learning_rate": 1.7415433972811356e-05, "loss": 0.993, "step": 4483 }, { "epoch": 0.26, "grad_norm": 2.0804619789123535, "learning_rate": 1.741418756417722e-05, "loss": 1.0858, "step": 4484 }, { "epoch": 0.26, "grad_norm": 1.9790469408035278, "learning_rate": 1.741294089970018e-05, "loss": 0.9608, "step": 4485 }, { "epoch": 0.26, "grad_norm": 1.8288018703460693, "learning_rate": 1.741169397942325e-05, "loss": 0.9527, "step": 4486 }, { "epoch": 0.26, "grad_norm": 1.710623860359192, "learning_rate": 1.7410446803389477e-05, "loss": 0.9809, "step": 4487 }, { "epoch": 0.26, "grad_norm": 1.9912240505218506, "learning_rate": 1.7409199371641875e-05, "loss": 1.0205, "step": 4488 }, { "epoch": 0.26, "grad_norm": 1.7927414178848267, "learning_rate": 1.7407951684223504e-05, "loss": 1.0715, "step": 4489 }, { "epoch": 0.26, "grad_norm": 1.6931726932525635, "learning_rate": 1.7406703741177416e-05, "loss": 1.0477, "step": 4490 }, { "epoch": 0.26, "grad_norm": 1.932991862297058, "learning_rate": 1.7405455542546668e-05, "loss": 1.0067, "step": 4491 }, { "epoch": 0.26, "grad_norm": 1.8996227979660034, "learning_rate": 1.7404207088374333e-05, "loss": 1.0551, "step": 4492 }, { "epoch": 0.26, "grad_norm": 1.7182364463806152, "learning_rate": 1.74029583787035e-05, "loss": 0.9863, "step": 4493 }, { "epoch": 0.26, "grad_norm": 1.776246428489685, "learning_rate": 1.7401709413577248e-05, "loss": 1.0253, "step": 4494 }, { "epoch": 0.26, "grad_norm": 2.094330072402954, "learning_rate": 1.7400460193038684e-05, "loss": 1.107, "step": 4495 }, { "epoch": 0.26, "grad_norm": 1.8532600402832031, "learning_rate": 1.739921071713091e-05, "loss": 1.1167, "step": 4496 }, { "epoch": 0.26, "grad_norm": 1.9286553859710693, "learning_rate": 1.739796098589704e-05, "loss": 1.0665, "step": 4497 }, { "epoch": 0.26, "grad_norm": 1.848468542098999, "learning_rate": 1.7396710999380205e-05, "loss": 0.9989, "step": 4498 }, { "epoch": 0.26, "grad_norm": 2.123755931854248, "learning_rate": 1.739546075762353e-05, "loss": 1.1226, "step": 4499 }, { "epoch": 0.26, "grad_norm": 1.9550516605377197, "learning_rate": 1.739421026067017e-05, "loss": 1.085, "step": 4500 }, { "epoch": 0.26, "grad_norm": 1.7448196411132812, "learning_rate": 1.7392959508563266e-05, "loss": 1.068, "step": 4501 }, { "epoch": 0.26, "grad_norm": 1.8628637790679932, "learning_rate": 1.739170850134598e-05, "loss": 1.0651, "step": 4502 }, { "epoch": 0.26, "grad_norm": 1.7664508819580078, "learning_rate": 1.739045723906148e-05, "loss": 1.1084, "step": 4503 }, { "epoch": 0.26, "grad_norm": 1.8520745038986206, "learning_rate": 1.7389205721752952e-05, "loss": 0.9457, "step": 4504 }, { "epoch": 0.26, "grad_norm": 2.015570878982544, "learning_rate": 1.7387953949463566e-05, "loss": 0.9678, "step": 4505 }, { "epoch": 0.26, "grad_norm": 1.813125729560852, "learning_rate": 1.7386701922236534e-05, "loss": 1.0742, "step": 4506 }, { "epoch": 0.26, "grad_norm": 1.7884405851364136, "learning_rate": 1.738544964011505e-05, "loss": 1.0867, "step": 4507 }, { "epoch": 0.26, "grad_norm": 1.817070484161377, "learning_rate": 1.738419710314233e-05, "loss": 0.9926, "step": 4508 }, { "epoch": 0.26, "grad_norm": 1.8422431945800781, "learning_rate": 1.7382944311361593e-05, "loss": 1.0771, "step": 4509 }, { "epoch": 0.26, "grad_norm": 1.764309048652649, "learning_rate": 1.738169126481607e-05, "loss": 0.9801, "step": 4510 }, { "epoch": 0.26, "grad_norm": 1.784175992012024, "learning_rate": 1.7380437963549005e-05, "loss": 1.0063, "step": 4511 }, { "epoch": 0.26, "grad_norm": 1.7415882349014282, "learning_rate": 1.737918440760364e-05, "loss": 1.0233, "step": 4512 }, { "epoch": 0.26, "grad_norm": 1.759074330329895, "learning_rate": 1.7377930597023235e-05, "loss": 1.0348, "step": 4513 }, { "epoch": 0.26, "grad_norm": 1.9451662302017212, "learning_rate": 1.7376676531851053e-05, "loss": 0.9744, "step": 4514 }, { "epoch": 0.26, "grad_norm": 1.8722350597381592, "learning_rate": 1.7375422212130372e-05, "loss": 1.0651, "step": 4515 }, { "epoch": 0.26, "grad_norm": 1.734866976737976, "learning_rate": 1.737416763790447e-05, "loss": 0.9875, "step": 4516 }, { "epoch": 0.26, "grad_norm": 1.1796067953109741, "learning_rate": 1.7372912809216647e-05, "loss": 0.6717, "step": 4517 }, { "epoch": 0.26, "grad_norm": 1.8314779996871948, "learning_rate": 1.7371657726110192e-05, "loss": 1.0172, "step": 4518 }, { "epoch": 0.26, "grad_norm": 1.7801262140274048, "learning_rate": 1.7370402388628424e-05, "loss": 0.9716, "step": 4519 }, { "epoch": 0.26, "grad_norm": 1.9206441640853882, "learning_rate": 1.736914679681466e-05, "loss": 1.0691, "step": 4520 }, { "epoch": 0.26, "grad_norm": 1.8735765218734741, "learning_rate": 1.7367890950712222e-05, "loss": 1.0628, "step": 4521 }, { "epoch": 0.26, "grad_norm": 2.5241646766662598, "learning_rate": 1.736663485036445e-05, "loss": 0.9981, "step": 4522 }, { "epoch": 0.26, "grad_norm": 1.891777515411377, "learning_rate": 1.7365378495814688e-05, "loss": 1.0586, "step": 4523 }, { "epoch": 0.26, "grad_norm": 2.002377986907959, "learning_rate": 1.7364121887106285e-05, "loss": 1.0557, "step": 4524 }, { "epoch": 0.26, "grad_norm": 2.0251810550689697, "learning_rate": 1.736286502428261e-05, "loss": 1.0239, "step": 4525 }, { "epoch": 0.26, "grad_norm": 1.738773226737976, "learning_rate": 1.736160790738703e-05, "loss": 0.9556, "step": 4526 }, { "epoch": 0.26, "grad_norm": 1.6899263858795166, "learning_rate": 1.7360350536462924e-05, "loss": 1.0476, "step": 4527 }, { "epoch": 0.26, "grad_norm": 1.873380422592163, "learning_rate": 1.735909291155368e-05, "loss": 0.9954, "step": 4528 }, { "epoch": 0.26, "grad_norm": 1.7814782857894897, "learning_rate": 1.7357835032702696e-05, "loss": 1.0363, "step": 4529 }, { "epoch": 0.26, "grad_norm": 1.8374149799346924, "learning_rate": 1.735657689995338e-05, "loss": 1.0731, "step": 4530 }, { "epoch": 0.26, "grad_norm": 1.6497817039489746, "learning_rate": 1.7355318513349148e-05, "loss": 0.9492, "step": 4531 }, { "epoch": 0.26, "grad_norm": 1.8671873807907104, "learning_rate": 1.7354059872933414e-05, "loss": 0.982, "step": 4532 }, { "epoch": 0.26, "grad_norm": 1.743689775466919, "learning_rate": 1.735280097874962e-05, "loss": 1.0225, "step": 4533 }, { "epoch": 0.26, "grad_norm": 1.767421841621399, "learning_rate": 1.7351541830841205e-05, "loss": 0.9663, "step": 4534 }, { "epoch": 0.26, "grad_norm": 1.8889718055725098, "learning_rate": 1.735028242925161e-05, "loss": 1.1091, "step": 4535 }, { "epoch": 0.26, "grad_norm": 1.7472302913665771, "learning_rate": 1.7349022774024307e-05, "loss": 0.93, "step": 4536 }, { "epoch": 0.26, "grad_norm": 1.9255951642990112, "learning_rate": 1.734776286520276e-05, "loss": 1.0153, "step": 4537 }, { "epoch": 0.26, "grad_norm": 1.8141483068466187, "learning_rate": 1.7346502702830436e-05, "loss": 0.9766, "step": 4538 }, { "epoch": 0.26, "grad_norm": 1.8694795370101929, "learning_rate": 1.7345242286950825e-05, "loss": 1.0406, "step": 4539 }, { "epoch": 0.26, "grad_norm": 1.8059786558151245, "learning_rate": 1.7343981617607423e-05, "loss": 0.9805, "step": 4540 }, { "epoch": 0.26, "grad_norm": 2.116225004196167, "learning_rate": 1.734272069484373e-05, "loss": 1.0873, "step": 4541 }, { "epoch": 0.26, "grad_norm": 2.0251054763793945, "learning_rate": 1.7341459518703256e-05, "loss": 1.066, "step": 4542 }, { "epoch": 0.26, "grad_norm": 1.8019906282424927, "learning_rate": 1.7340198089229522e-05, "loss": 1.0596, "step": 4543 }, { "epoch": 0.26, "grad_norm": 1.6431965827941895, "learning_rate": 1.7338936406466052e-05, "loss": 1.0467, "step": 4544 }, { "epoch": 0.26, "grad_norm": 1.8506948947906494, "learning_rate": 1.7337674470456394e-05, "loss": 1.052, "step": 4545 }, { "epoch": 0.26, "grad_norm": 1.7110610008239746, "learning_rate": 1.7336412281244085e-05, "loss": 1.0088, "step": 4546 }, { "epoch": 0.26, "grad_norm": 1.7550017833709717, "learning_rate": 1.733514983887268e-05, "loss": 1.0249, "step": 4547 }, { "epoch": 0.26, "grad_norm": 1.937481164932251, "learning_rate": 1.7333887143385742e-05, "loss": 0.9878, "step": 4548 }, { "epoch": 0.26, "grad_norm": 1.8417145013809204, "learning_rate": 1.7332624194826847e-05, "loss": 1.0479, "step": 4549 }, { "epoch": 0.26, "grad_norm": 1.8994860649108887, "learning_rate": 1.7331360993239577e-05, "loss": 1.0669, "step": 4550 }, { "epoch": 0.26, "grad_norm": 1.829964280128479, "learning_rate": 1.7330097538667513e-05, "loss": 1.0346, "step": 4551 }, { "epoch": 0.26, "grad_norm": 1.7882614135742188, "learning_rate": 1.7328833831154264e-05, "loss": 1.0568, "step": 4552 }, { "epoch": 0.26, "grad_norm": 1.729219913482666, "learning_rate": 1.7327569870743427e-05, "loss": 1.0643, "step": 4553 }, { "epoch": 0.26, "grad_norm": 1.9621493816375732, "learning_rate": 1.7326305657478626e-05, "loss": 1.0187, "step": 4554 }, { "epoch": 0.26, "grad_norm": 0.9608837962150574, "learning_rate": 1.732504119140348e-05, "loss": 0.5806, "step": 4555 }, { "epoch": 0.26, "grad_norm": 1.086506962776184, "learning_rate": 1.7323776472561625e-05, "loss": 0.6169, "step": 4556 }, { "epoch": 0.26, "grad_norm": 2.2444100379943848, "learning_rate": 1.7322511500996704e-05, "loss": 1.0632, "step": 4557 }, { "epoch": 0.26, "grad_norm": 1.7177653312683105, "learning_rate": 1.732124627675236e-05, "loss": 0.9777, "step": 4558 }, { "epoch": 0.26, "grad_norm": 1.7906980514526367, "learning_rate": 1.7319980799872266e-05, "loss": 0.9654, "step": 4559 }, { "epoch": 0.26, "grad_norm": 1.863255262374878, "learning_rate": 1.7318715070400075e-05, "loss": 1.0643, "step": 4560 }, { "epoch": 0.26, "grad_norm": 1.7572040557861328, "learning_rate": 1.7317449088379477e-05, "loss": 1.0482, "step": 4561 }, { "epoch": 0.26, "grad_norm": 1.8347163200378418, "learning_rate": 1.7316182853854147e-05, "loss": 1.0712, "step": 4562 }, { "epoch": 0.26, "grad_norm": 2.1304402351379395, "learning_rate": 1.7314916366867784e-05, "loss": 0.9882, "step": 4563 }, { "epoch": 0.26, "grad_norm": 2.151083469390869, "learning_rate": 1.731364962746409e-05, "loss": 1.0339, "step": 4564 }, { "epoch": 0.26, "grad_norm": 2.1241965293884277, "learning_rate": 1.731238263568678e-05, "loss": 1.0243, "step": 4565 }, { "epoch": 0.26, "grad_norm": 2.1738831996917725, "learning_rate": 1.731111539157957e-05, "loss": 1.0225, "step": 4566 }, { "epoch": 0.26, "grad_norm": 1.8628838062286377, "learning_rate": 1.730984789518619e-05, "loss": 1.0596, "step": 4567 }, { "epoch": 0.26, "grad_norm": 1.861246109008789, "learning_rate": 1.7308580146550382e-05, "loss": 1.038, "step": 4568 }, { "epoch": 0.26, "grad_norm": 2.05255126953125, "learning_rate": 1.7307312145715887e-05, "loss": 0.9758, "step": 4569 }, { "epoch": 0.26, "grad_norm": 2.001434564590454, "learning_rate": 1.730604389272646e-05, "loss": 1.0073, "step": 4570 }, { "epoch": 0.26, "grad_norm": 1.8627930879592896, "learning_rate": 1.7304775387625867e-05, "loss": 1.0492, "step": 4571 }, { "epoch": 0.26, "grad_norm": 1.665010690689087, "learning_rate": 1.7303506630457882e-05, "loss": 1.1345, "step": 4572 }, { "epoch": 0.26, "grad_norm": 1.3181103467941284, "learning_rate": 1.7302237621266283e-05, "loss": 0.6996, "step": 4573 }, { "epoch": 0.26, "grad_norm": 1.8638582229614258, "learning_rate": 1.7300968360094863e-05, "loss": 1.1246, "step": 4574 }, { "epoch": 0.26, "grad_norm": 1.7731599807739258, "learning_rate": 1.7299698846987422e-05, "loss": 1.0939, "step": 4575 }, { "epoch": 0.26, "grad_norm": 1.907395362854004, "learning_rate": 1.729842908198776e-05, "loss": 1.0166, "step": 4576 }, { "epoch": 0.26, "grad_norm": 1.8753777742385864, "learning_rate": 1.72971590651397e-05, "loss": 1.104, "step": 4577 }, { "epoch": 0.26, "grad_norm": 1.8011924028396606, "learning_rate": 1.729588879648706e-05, "loss": 1.0512, "step": 4578 }, { "epoch": 0.26, "grad_norm": 2.0923328399658203, "learning_rate": 1.7294618276073684e-05, "loss": 1.1141, "step": 4579 }, { "epoch": 0.26, "grad_norm": 1.905290126800537, "learning_rate": 1.729334750394341e-05, "loss": 1.0355, "step": 4580 }, { "epoch": 0.26, "grad_norm": 1.7892175912857056, "learning_rate": 1.7292076480140078e-05, "loss": 1.0007, "step": 4581 }, { "epoch": 0.26, "grad_norm": 1.9723823070526123, "learning_rate": 1.7290805204707563e-05, "loss": 1.0848, "step": 4582 }, { "epoch": 0.26, "grad_norm": 1.8557202816009521, "learning_rate": 1.7289533677689724e-05, "loss": 1.0225, "step": 4583 }, { "epoch": 0.26, "grad_norm": 1.7672028541564941, "learning_rate": 1.728826189913044e-05, "loss": 1.0743, "step": 4584 }, { "epoch": 0.26, "grad_norm": 1.8175417184829712, "learning_rate": 1.7286989869073597e-05, "loss": 1.0305, "step": 4585 }, { "epoch": 0.26, "grad_norm": 1.7689151763916016, "learning_rate": 1.728571758756309e-05, "loss": 0.9986, "step": 4586 }, { "epoch": 0.26, "grad_norm": 1.8418059349060059, "learning_rate": 1.728444505464282e-05, "loss": 1.0289, "step": 4587 }, { "epoch": 0.26, "grad_norm": 1.8006080389022827, "learning_rate": 1.7283172270356702e-05, "loss": 0.9365, "step": 4588 }, { "epoch": 0.26, "grad_norm": 1.159023404121399, "learning_rate": 1.7281899234748648e-05, "loss": 0.6608, "step": 4589 }, { "epoch": 0.26, "grad_norm": 1.9467965364456177, "learning_rate": 1.7280625947862594e-05, "loss": 1.0871, "step": 4590 }, { "epoch": 0.26, "grad_norm": 1.8746761083602905, "learning_rate": 1.727935240974248e-05, "loss": 1.0334, "step": 4591 }, { "epoch": 0.26, "grad_norm": 1.9868035316467285, "learning_rate": 1.7278078620432247e-05, "loss": 1.1233, "step": 4592 }, { "epoch": 0.26, "grad_norm": 1.8632564544677734, "learning_rate": 1.727680457997585e-05, "loss": 1.0035, "step": 4593 }, { "epoch": 0.26, "grad_norm": 1.6722391843795776, "learning_rate": 1.7275530288417256e-05, "loss": 1.0509, "step": 4594 }, { "epoch": 0.26, "grad_norm": 1.812365174293518, "learning_rate": 1.7274255745800433e-05, "loss": 0.9502, "step": 4595 }, { "epoch": 0.26, "grad_norm": 1.8872088193893433, "learning_rate": 1.7272980952169364e-05, "loss": 1.0553, "step": 4596 }, { "epoch": 0.26, "grad_norm": 1.7986476421356201, "learning_rate": 1.727170590756804e-05, "loss": 1.0079, "step": 4597 }, { "epoch": 0.26, "grad_norm": 1.8733775615692139, "learning_rate": 1.7270430612040456e-05, "loss": 1.0904, "step": 4598 }, { "epoch": 0.26, "grad_norm": 1.9841808080673218, "learning_rate": 1.7269155065630624e-05, "loss": 1.0962, "step": 4599 }, { "epoch": 0.26, "grad_norm": 1.9636262655258179, "learning_rate": 1.7267879268382556e-05, "loss": 1.0425, "step": 4600 }, { "epoch": 0.26, "grad_norm": 1.87576162815094, "learning_rate": 1.7266603220340273e-05, "loss": 0.9587, "step": 4601 }, { "epoch": 0.26, "grad_norm": 1.8474199771881104, "learning_rate": 1.7265326921547815e-05, "loss": 0.9974, "step": 4602 }, { "epoch": 0.26, "grad_norm": 2.010124921798706, "learning_rate": 1.7264050372049216e-05, "loss": 1.054, "step": 4603 }, { "epoch": 0.26, "grad_norm": 2.1257333755493164, "learning_rate": 1.726277357188853e-05, "loss": 1.0429, "step": 4604 }, { "epoch": 0.26, "grad_norm": 2.0573055744171143, "learning_rate": 1.7261496521109817e-05, "loss": 1.062, "step": 4605 }, { "epoch": 0.26, "grad_norm": 2.0497677326202393, "learning_rate": 1.7260219219757145e-05, "loss": 1.0711, "step": 4606 }, { "epoch": 0.26, "grad_norm": 1.906580924987793, "learning_rate": 1.7258941667874587e-05, "loss": 1.0824, "step": 4607 }, { "epoch": 0.26, "grad_norm": 1.9586677551269531, "learning_rate": 1.725766386550623e-05, "loss": 1.0637, "step": 4608 }, { "epoch": 0.26, "grad_norm": 1.9318772554397583, "learning_rate": 1.7256385812696165e-05, "loss": 1.0484, "step": 4609 }, { "epoch": 0.26, "grad_norm": 2.085712194442749, "learning_rate": 1.7255107509488494e-05, "loss": 1.0499, "step": 4610 }, { "epoch": 0.26, "grad_norm": 1.8690803050994873, "learning_rate": 1.725382895592733e-05, "loss": 1.0317, "step": 4611 }, { "epoch": 0.26, "grad_norm": 1.1537272930145264, "learning_rate": 1.7252550152056795e-05, "loss": 0.6377, "step": 4612 }, { "epoch": 0.26, "grad_norm": 1.1774359941482544, "learning_rate": 1.725127109792101e-05, "loss": 0.6328, "step": 4613 }, { "epoch": 0.26, "grad_norm": 2.14308762550354, "learning_rate": 1.7249991793564116e-05, "loss": 1.065, "step": 4614 }, { "epoch": 0.26, "grad_norm": 2.4846396446228027, "learning_rate": 1.7248712239030257e-05, "loss": 1.0505, "step": 4615 }, { "epoch": 0.26, "grad_norm": 1.9136874675750732, "learning_rate": 1.724743243436359e-05, "loss": 0.9939, "step": 4616 }, { "epoch": 0.26, "grad_norm": 2.1476850509643555, "learning_rate": 1.724615237960827e-05, "loss": 1.067, "step": 4617 }, { "epoch": 0.26, "grad_norm": 1.1069937944412231, "learning_rate": 1.7244872074808478e-05, "loss": 0.6178, "step": 4618 }, { "epoch": 0.26, "grad_norm": 1.8680189847946167, "learning_rate": 1.7243591520008384e-05, "loss": 0.9612, "step": 4619 }, { "epoch": 0.26, "grad_norm": 1.7486422061920166, "learning_rate": 1.724231071525218e-05, "loss": 1.1352, "step": 4620 }, { "epoch": 0.27, "grad_norm": 1.7785797119140625, "learning_rate": 1.7241029660584068e-05, "loss": 0.9916, "step": 4621 }, { "epoch": 0.27, "grad_norm": 1.8769954442977905, "learning_rate": 1.7239748356048248e-05, "loss": 0.9827, "step": 4622 }, { "epoch": 0.27, "grad_norm": 1.692273736000061, "learning_rate": 1.7238466801688934e-05, "loss": 0.9932, "step": 4623 }, { "epoch": 0.27, "grad_norm": 1.6582568883895874, "learning_rate": 1.723718499755035e-05, "loss": 1.0232, "step": 4624 }, { "epoch": 0.27, "grad_norm": 1.938091516494751, "learning_rate": 1.723590294367673e-05, "loss": 0.9657, "step": 4625 }, { "epoch": 0.27, "grad_norm": 2.0084519386291504, "learning_rate": 1.7234620640112313e-05, "loss": 1.003, "step": 4626 }, { "epoch": 0.27, "grad_norm": 1.7339975833892822, "learning_rate": 1.7233338086901342e-05, "loss": 1.0643, "step": 4627 }, { "epoch": 0.27, "grad_norm": 1.8406918048858643, "learning_rate": 1.7232055284088085e-05, "loss": 0.9802, "step": 4628 }, { "epoch": 0.27, "grad_norm": 2.019582509994507, "learning_rate": 1.72307722317168e-05, "loss": 1.147, "step": 4629 }, { "epoch": 0.27, "grad_norm": 2.017970085144043, "learning_rate": 1.7229488929831762e-05, "loss": 1.0419, "step": 4630 }, { "epoch": 0.27, "grad_norm": 1.8190929889678955, "learning_rate": 1.7228205378477258e-05, "loss": 0.9979, "step": 4631 }, { "epoch": 0.27, "grad_norm": 2.1298515796661377, "learning_rate": 1.7226921577697575e-05, "loss": 1.0833, "step": 4632 }, { "epoch": 0.27, "grad_norm": 2.0303947925567627, "learning_rate": 1.7225637527537017e-05, "loss": 1.0403, "step": 4633 }, { "epoch": 0.27, "grad_norm": 1.7437316179275513, "learning_rate": 1.7224353228039893e-05, "loss": 0.9475, "step": 4634 }, { "epoch": 0.27, "grad_norm": 1.7206364870071411, "learning_rate": 1.722306867925052e-05, "loss": 1.0109, "step": 4635 }, { "epoch": 0.27, "grad_norm": 1.8375470638275146, "learning_rate": 1.7221783881213222e-05, "loss": 1.0014, "step": 4636 }, { "epoch": 0.27, "grad_norm": 1.2050999402999878, "learning_rate": 1.7220498833972333e-05, "loss": 0.5861, "step": 4637 }, { "epoch": 0.27, "grad_norm": 1.9113013744354248, "learning_rate": 1.7219213537572203e-05, "loss": 1.0175, "step": 4638 }, { "epoch": 0.27, "grad_norm": 2.2827603816986084, "learning_rate": 1.721792799205718e-05, "loss": 1.0758, "step": 4639 }, { "epoch": 0.27, "grad_norm": 2.275219440460205, "learning_rate": 1.7216642197471626e-05, "loss": 1.11, "step": 4640 }, { "epoch": 0.27, "grad_norm": 1.7857520580291748, "learning_rate": 1.7215356153859904e-05, "loss": 1.124, "step": 4641 }, { "epoch": 0.27, "grad_norm": 1.6672146320343018, "learning_rate": 1.7214069861266398e-05, "loss": 1.0573, "step": 4642 }, { "epoch": 0.27, "grad_norm": 1.7119561433792114, "learning_rate": 1.7212783319735492e-05, "loss": 0.9232, "step": 4643 }, { "epoch": 0.27, "grad_norm": 1.6102291345596313, "learning_rate": 1.7211496529311582e-05, "loss": 1.022, "step": 4644 }, { "epoch": 0.27, "grad_norm": 1.908078908920288, "learning_rate": 1.7210209490039075e-05, "loss": 1.0198, "step": 4645 }, { "epoch": 0.27, "grad_norm": 1.9277675151824951, "learning_rate": 1.7208922201962376e-05, "loss": 1.0722, "step": 4646 }, { "epoch": 0.27, "grad_norm": 1.9552894830703735, "learning_rate": 1.7207634665125907e-05, "loss": 1.0283, "step": 4647 }, { "epoch": 0.27, "grad_norm": 1.826055645942688, "learning_rate": 1.7206346879574104e-05, "loss": 1.0395, "step": 4648 }, { "epoch": 0.27, "grad_norm": 1.8464971780776978, "learning_rate": 1.7205058845351398e-05, "loss": 0.9822, "step": 4649 }, { "epoch": 0.27, "grad_norm": 1.8195899724960327, "learning_rate": 1.7203770562502238e-05, "loss": 1.0106, "step": 4650 }, { "epoch": 0.27, "grad_norm": 1.8553869724273682, "learning_rate": 1.720248203107108e-05, "loss": 1.0796, "step": 4651 }, { "epoch": 0.27, "grad_norm": 1.1967116594314575, "learning_rate": 1.7201193251102383e-05, "loss": 0.6502, "step": 4652 }, { "epoch": 0.27, "grad_norm": 1.8553239107131958, "learning_rate": 1.7199904222640627e-05, "loss": 1.0031, "step": 4653 }, { "epoch": 0.27, "grad_norm": 1.784236192703247, "learning_rate": 1.7198614945730287e-05, "loss": 1.0453, "step": 4654 }, { "epoch": 0.27, "grad_norm": 1.9137392044067383, "learning_rate": 1.7197325420415852e-05, "loss": 1.0384, "step": 4655 }, { "epoch": 0.27, "grad_norm": 1.961923599243164, "learning_rate": 1.7196035646741824e-05, "loss": 1.1053, "step": 4656 }, { "epoch": 0.27, "grad_norm": 1.788831114768982, "learning_rate": 1.7194745624752704e-05, "loss": 1.0171, "step": 4657 }, { "epoch": 0.27, "grad_norm": 2.0162508487701416, "learning_rate": 1.7193455354493013e-05, "loss": 1.0585, "step": 4658 }, { "epoch": 0.27, "grad_norm": 1.7559564113616943, "learning_rate": 1.719216483600727e-05, "loss": 1.032, "step": 4659 }, { "epoch": 0.27, "grad_norm": 1.7875580787658691, "learning_rate": 1.7190874069340015e-05, "loss": 0.9777, "step": 4660 }, { "epoch": 0.27, "grad_norm": 1.8427859544754028, "learning_rate": 1.7189583054535775e-05, "loss": 1.0401, "step": 4661 }, { "epoch": 0.27, "grad_norm": 1.8399351835250854, "learning_rate": 1.7188291791639108e-05, "loss": 0.9954, "step": 4662 }, { "epoch": 0.27, "grad_norm": 1.886370062828064, "learning_rate": 1.7187000280694572e-05, "loss": 1.1176, "step": 4663 }, { "epoch": 0.27, "grad_norm": 1.9610744714736938, "learning_rate": 1.7185708521746734e-05, "loss": 1.0222, "step": 4664 }, { "epoch": 0.27, "grad_norm": 1.8299083709716797, "learning_rate": 1.718441651484017e-05, "loss": 1.0333, "step": 4665 }, { "epoch": 0.27, "grad_norm": 1.9010825157165527, "learning_rate": 1.7183124260019454e-05, "loss": 0.9701, "step": 4666 }, { "epoch": 0.27, "grad_norm": 1.6604851484298706, "learning_rate": 1.7181831757329187e-05, "loss": 1.0368, "step": 4667 }, { "epoch": 0.27, "grad_norm": 1.9394150972366333, "learning_rate": 1.7180539006813973e-05, "loss": 0.971, "step": 4668 }, { "epoch": 0.27, "grad_norm": 1.8684107065200806, "learning_rate": 1.717924600851841e-05, "loss": 0.9659, "step": 4669 }, { "epoch": 0.27, "grad_norm": 1.7895206212997437, "learning_rate": 1.7177952762487125e-05, "loss": 0.9975, "step": 4670 }, { "epoch": 0.27, "grad_norm": 2.070091962814331, "learning_rate": 1.717665926876474e-05, "loss": 1.039, "step": 4671 }, { "epoch": 0.27, "grad_norm": 1.7868837118148804, "learning_rate": 1.717536552739589e-05, "loss": 0.9915, "step": 4672 }, { "epoch": 0.27, "grad_norm": 1.7172470092773438, "learning_rate": 1.717407153842522e-05, "loss": 1.0347, "step": 4673 }, { "epoch": 0.27, "grad_norm": 1.7209681272506714, "learning_rate": 1.7172777301897382e-05, "loss": 1.016, "step": 4674 }, { "epoch": 0.27, "grad_norm": 1.8555841445922852, "learning_rate": 1.7171482817857034e-05, "loss": 0.9958, "step": 4675 }, { "epoch": 0.27, "grad_norm": 1.7856978178024292, "learning_rate": 1.7170188086348847e-05, "loss": 1.0656, "step": 4676 }, { "epoch": 0.27, "grad_norm": 1.6724331378936768, "learning_rate": 1.7168893107417498e-05, "loss": 1.0039, "step": 4677 }, { "epoch": 0.27, "grad_norm": 1.0794757604599, "learning_rate": 1.7167597881107673e-05, "loss": 0.6129, "step": 4678 }, { "epoch": 0.27, "grad_norm": 1.8575519323349, "learning_rate": 1.716630240746407e-05, "loss": 1.0745, "step": 4679 }, { "epoch": 0.27, "grad_norm": 2.1793243885040283, "learning_rate": 1.7165006686531387e-05, "loss": 0.9818, "step": 4680 }, { "epoch": 0.27, "grad_norm": 2.378910779953003, "learning_rate": 1.7163710718354338e-05, "loss": 1.0682, "step": 4681 }, { "epoch": 0.27, "grad_norm": 1.91934072971344, "learning_rate": 1.7162414502977643e-05, "loss": 1.009, "step": 4682 }, { "epoch": 0.27, "grad_norm": 1.720177173614502, "learning_rate": 1.716111804044603e-05, "loss": 1.0029, "step": 4683 }, { "epoch": 0.27, "grad_norm": 1.7971895933151245, "learning_rate": 1.7159821330804237e-05, "loss": 1.0726, "step": 4684 }, { "epoch": 0.27, "grad_norm": 2.093899965286255, "learning_rate": 1.715852437409701e-05, "loss": 1.0698, "step": 4685 }, { "epoch": 0.27, "grad_norm": 1.9821538925170898, "learning_rate": 1.715722717036911e-05, "loss": 1.1076, "step": 4686 }, { "epoch": 0.27, "grad_norm": 2.133533239364624, "learning_rate": 1.7155929719665286e-05, "loss": 1.0199, "step": 4687 }, { "epoch": 0.27, "grad_norm": 1.7891730070114136, "learning_rate": 1.7154632022030318e-05, "loss": 1.0255, "step": 4688 }, { "epoch": 0.27, "grad_norm": 1.8691664934158325, "learning_rate": 1.7153334077508983e-05, "loss": 0.9721, "step": 4689 }, { "epoch": 0.27, "grad_norm": 1.7908364534378052, "learning_rate": 1.7152035886146076e-05, "loss": 1.0807, "step": 4690 }, { "epoch": 0.27, "grad_norm": 1.7693136930465698, "learning_rate": 1.7150737447986384e-05, "loss": 1.0969, "step": 4691 }, { "epoch": 0.27, "grad_norm": 1.6246225833892822, "learning_rate": 1.714943876307472e-05, "loss": 1.0339, "step": 4692 }, { "epoch": 0.27, "grad_norm": 2.029156446456909, "learning_rate": 1.7148139831455896e-05, "loss": 0.967, "step": 4693 }, { "epoch": 0.27, "grad_norm": 1.7252434492111206, "learning_rate": 1.7146840653174732e-05, "loss": 0.9975, "step": 4694 }, { "epoch": 0.27, "grad_norm": 1.956209421157837, "learning_rate": 1.7145541228276063e-05, "loss": 1.0548, "step": 4695 }, { "epoch": 0.27, "grad_norm": 1.685001015663147, "learning_rate": 1.7144241556804724e-05, "loss": 0.9349, "step": 4696 }, { "epoch": 0.27, "grad_norm": 1.8997920751571655, "learning_rate": 1.7142941638805564e-05, "loss": 1.029, "step": 4697 }, { "epoch": 0.27, "grad_norm": 1.901615858078003, "learning_rate": 1.714164147432345e-05, "loss": 1.077, "step": 4698 }, { "epoch": 0.27, "grad_norm": 1.8288229703903198, "learning_rate": 1.7140341063403226e-05, "loss": 1.0495, "step": 4699 }, { "epoch": 0.27, "grad_norm": 1.5870082378387451, "learning_rate": 1.7139040406089786e-05, "loss": 1.0468, "step": 4700 }, { "epoch": 0.27, "grad_norm": 1.9229363203048706, "learning_rate": 1.7137739502428005e-05, "loss": 1.0361, "step": 4701 }, { "epoch": 0.27, "grad_norm": 1.7878434658050537, "learning_rate": 1.7136438352462764e-05, "loss": 1.0346, "step": 4702 }, { "epoch": 0.27, "grad_norm": 1.8636884689331055, "learning_rate": 1.7135136956238977e-05, "loss": 1.1064, "step": 4703 }, { "epoch": 0.27, "grad_norm": 1.6639491319656372, "learning_rate": 1.713383531380154e-05, "loss": 1.0152, "step": 4704 }, { "epoch": 0.27, "grad_norm": 1.8125758171081543, "learning_rate": 1.713253342519538e-05, "loss": 1.0325, "step": 4705 }, { "epoch": 0.27, "grad_norm": 1.9059473276138306, "learning_rate": 1.713123129046541e-05, "loss": 0.9953, "step": 4706 }, { "epoch": 0.27, "grad_norm": 1.7418614625930786, "learning_rate": 1.7129928909656573e-05, "loss": 1.0012, "step": 4707 }, { "epoch": 0.27, "grad_norm": 1.7379263639450073, "learning_rate": 1.7128626282813803e-05, "loss": 0.9819, "step": 4708 }, { "epoch": 0.27, "grad_norm": 1.8412891626358032, "learning_rate": 1.7127323409982053e-05, "loss": 1.0359, "step": 4709 }, { "epoch": 0.27, "grad_norm": 1.7066762447357178, "learning_rate": 1.712602029120628e-05, "loss": 1.0111, "step": 4710 }, { "epoch": 0.27, "grad_norm": 1.9225943088531494, "learning_rate": 1.7124716926531454e-05, "loss": 1.0073, "step": 4711 }, { "epoch": 0.27, "grad_norm": 1.9674371480941772, "learning_rate": 1.712341331600255e-05, "loss": 1.0675, "step": 4712 }, { "epoch": 0.27, "grad_norm": 1.8190809488296509, "learning_rate": 1.712210945966455e-05, "loss": 1.1216, "step": 4713 }, { "epoch": 0.27, "grad_norm": 1.9054967164993286, "learning_rate": 1.712080535756245e-05, "loss": 0.9991, "step": 4714 }, { "epoch": 0.27, "grad_norm": 1.8096667528152466, "learning_rate": 1.7119501009741245e-05, "loss": 1.0812, "step": 4715 }, { "epoch": 0.27, "grad_norm": 1.6954387426376343, "learning_rate": 1.7118196416245947e-05, "loss": 0.9051, "step": 4716 }, { "epoch": 0.27, "grad_norm": 2.1597070693969727, "learning_rate": 1.7116891577121576e-05, "loss": 1.0382, "step": 4717 }, { "epoch": 0.27, "grad_norm": 2.1272761821746826, "learning_rate": 1.711558649241316e-05, "loss": 1.0742, "step": 4718 }, { "epoch": 0.27, "grad_norm": 1.9281593561172485, "learning_rate": 1.7114281162165726e-05, "loss": 1.0587, "step": 4719 }, { "epoch": 0.27, "grad_norm": 1.7473527193069458, "learning_rate": 1.7112975586424327e-05, "loss": 1.0668, "step": 4720 }, { "epoch": 0.27, "grad_norm": 1.8329812288284302, "learning_rate": 1.7111669765234006e-05, "loss": 0.9759, "step": 4721 }, { "epoch": 0.27, "grad_norm": 1.7359579801559448, "learning_rate": 1.7110363698639826e-05, "loss": 1.0285, "step": 4722 }, { "epoch": 0.27, "grad_norm": 1.8638657331466675, "learning_rate": 1.710905738668686e-05, "loss": 1.1208, "step": 4723 }, { "epoch": 0.27, "grad_norm": 1.8270941972732544, "learning_rate": 1.7107750829420177e-05, "loss": 1.0529, "step": 4724 }, { "epoch": 0.27, "grad_norm": 1.8194390535354614, "learning_rate": 1.7106444026884873e-05, "loss": 1.0184, "step": 4725 }, { "epoch": 0.27, "grad_norm": 1.8500094413757324, "learning_rate": 1.7105136979126036e-05, "loss": 1.034, "step": 4726 }, { "epoch": 0.27, "grad_norm": 1.8671813011169434, "learning_rate": 1.7103829686188766e-05, "loss": 0.9974, "step": 4727 }, { "epoch": 0.27, "grad_norm": 1.7800992727279663, "learning_rate": 1.710252214811818e-05, "loss": 1.0502, "step": 4728 }, { "epoch": 0.27, "grad_norm": 1.939017415046692, "learning_rate": 1.7101214364959392e-05, "loss": 0.9988, "step": 4729 }, { "epoch": 0.27, "grad_norm": 2.0655815601348877, "learning_rate": 1.7099906336757533e-05, "loss": 1.1468, "step": 4730 }, { "epoch": 0.27, "grad_norm": 1.6860171556472778, "learning_rate": 1.7098598063557744e-05, "loss": 1.0033, "step": 4731 }, { "epoch": 0.27, "grad_norm": 1.7304410934448242, "learning_rate": 1.709728954540516e-05, "loss": 0.9844, "step": 4732 }, { "epoch": 0.27, "grad_norm": 1.9833543300628662, "learning_rate": 1.7095980782344942e-05, "loss": 1.1093, "step": 4733 }, { "epoch": 0.27, "grad_norm": 1.8146415948867798, "learning_rate": 1.7094671774422245e-05, "loss": 0.9643, "step": 4734 }, { "epoch": 0.27, "grad_norm": 2.025599956512451, "learning_rate": 1.709336252168225e-05, "loss": 1.0958, "step": 4735 }, { "epoch": 0.27, "grad_norm": 1.826157569885254, "learning_rate": 1.7092053024170122e-05, "loss": 0.9876, "step": 4736 }, { "epoch": 0.27, "grad_norm": 2.041050910949707, "learning_rate": 1.709074328193106e-05, "loss": 1.033, "step": 4737 }, { "epoch": 0.27, "grad_norm": 1.7011879682540894, "learning_rate": 1.7089433295010252e-05, "loss": 1.0384, "step": 4738 }, { "epoch": 0.27, "grad_norm": 1.9061089754104614, "learning_rate": 1.7088123063452905e-05, "loss": 0.9956, "step": 4739 }, { "epoch": 0.27, "grad_norm": 1.837290644645691, "learning_rate": 1.7086812587304233e-05, "loss": 1.0254, "step": 4740 }, { "epoch": 0.27, "grad_norm": 1.9794304370880127, "learning_rate": 1.7085501866609455e-05, "loss": 1.1418, "step": 4741 }, { "epoch": 0.27, "grad_norm": 1.885650634765625, "learning_rate": 1.7084190901413795e-05, "loss": 1.0039, "step": 4742 }, { "epoch": 0.27, "grad_norm": 1.779986023902893, "learning_rate": 1.70828796917625e-05, "loss": 1.0113, "step": 4743 }, { "epoch": 0.27, "grad_norm": 1.76728355884552, "learning_rate": 1.708156823770081e-05, "loss": 1.0148, "step": 4744 }, { "epoch": 0.27, "grad_norm": 1.7319813966751099, "learning_rate": 1.7080256539273984e-05, "loss": 0.9863, "step": 4745 }, { "epoch": 0.27, "grad_norm": 1.9736579656600952, "learning_rate": 1.707894459652728e-05, "loss": 1.0475, "step": 4746 }, { "epoch": 0.27, "grad_norm": 1.776868224143982, "learning_rate": 1.7077632409505974e-05, "loss": 0.9779, "step": 4747 }, { "epoch": 0.27, "grad_norm": 1.718104600906372, "learning_rate": 1.7076319978255345e-05, "loss": 1.0393, "step": 4748 }, { "epoch": 0.27, "grad_norm": 2.0262227058410645, "learning_rate": 1.707500730282068e-05, "loss": 0.9863, "step": 4749 }, { "epoch": 0.27, "grad_norm": 1.9351303577423096, "learning_rate": 1.7073694383247273e-05, "loss": 1.0606, "step": 4750 }, { "epoch": 0.27, "grad_norm": 2.0892128944396973, "learning_rate": 1.7072381219580438e-05, "loss": 1.0418, "step": 4751 }, { "epoch": 0.27, "grad_norm": 1.8069987297058105, "learning_rate": 1.7071067811865477e-05, "loss": 1.0253, "step": 4752 }, { "epoch": 0.27, "grad_norm": 1.8135216236114502, "learning_rate": 1.706975416014772e-05, "loss": 1.0444, "step": 4753 }, { "epoch": 0.27, "grad_norm": 1.8808784484863281, "learning_rate": 1.7068440264472496e-05, "loss": 1.0551, "step": 4754 }, { "epoch": 0.27, "grad_norm": 2.0072484016418457, "learning_rate": 1.7067126124885144e-05, "loss": 1.0224, "step": 4755 }, { "epoch": 0.27, "grad_norm": 1.8094242811203003, "learning_rate": 1.706581174143101e-05, "loss": 0.9853, "step": 4756 }, { "epoch": 0.27, "grad_norm": 2.072641611099243, "learning_rate": 1.706449711415545e-05, "loss": 1.0902, "step": 4757 }, { "epoch": 0.27, "grad_norm": 1.8186986446380615, "learning_rate": 1.706318224310383e-05, "loss": 1.0231, "step": 4758 }, { "epoch": 0.27, "grad_norm": 1.7402868270874023, "learning_rate": 1.7061867128321524e-05, "loss": 0.9819, "step": 4759 }, { "epoch": 0.27, "grad_norm": 1.684104561805725, "learning_rate": 1.7060551769853904e-05, "loss": 1.039, "step": 4760 }, { "epoch": 0.27, "grad_norm": 1.9414888620376587, "learning_rate": 1.7059236167746367e-05, "loss": 1.0714, "step": 4761 }, { "epoch": 0.27, "grad_norm": 1.885617971420288, "learning_rate": 1.705792032204431e-05, "loss": 1.0369, "step": 4762 }, { "epoch": 0.27, "grad_norm": 1.7878416776657104, "learning_rate": 1.7056604232793137e-05, "loss": 1.0263, "step": 4763 }, { "epoch": 0.27, "grad_norm": 1.8834227323532104, "learning_rate": 1.7055287900038264e-05, "loss": 0.9933, "step": 4764 }, { "epoch": 0.27, "grad_norm": 1.9338070154190063, "learning_rate": 1.7053971323825114e-05, "loss": 1.0804, "step": 4765 }, { "epoch": 0.27, "grad_norm": 1.8026280403137207, "learning_rate": 1.705265450419912e-05, "loss": 0.9992, "step": 4766 }, { "epoch": 0.27, "grad_norm": 1.1608020067214966, "learning_rate": 1.705133744120572e-05, "loss": 0.5857, "step": 4767 }, { "epoch": 0.27, "grad_norm": 2.1691300868988037, "learning_rate": 1.705002013489036e-05, "loss": 1.0393, "step": 4768 }, { "epoch": 0.27, "grad_norm": 1.1441478729248047, "learning_rate": 1.7048702585298493e-05, "loss": 0.5956, "step": 4769 }, { "epoch": 0.27, "grad_norm": 1.8856031894683838, "learning_rate": 1.7047384792475597e-05, "loss": 0.9926, "step": 4770 }, { "epoch": 0.27, "grad_norm": 1.7053017616271973, "learning_rate": 1.7046066756467134e-05, "loss": 1.0177, "step": 4771 }, { "epoch": 0.27, "grad_norm": 1.7317636013031006, "learning_rate": 1.7044748477318595e-05, "loss": 0.9415, "step": 4772 }, { "epoch": 0.27, "grad_norm": 1.906692624092102, "learning_rate": 1.704342995507546e-05, "loss": 1.0166, "step": 4773 }, { "epoch": 0.27, "grad_norm": 1.8884797096252441, "learning_rate": 1.704211118978323e-05, "loss": 1.0386, "step": 4774 }, { "epoch": 0.27, "grad_norm": 1.837873101234436, "learning_rate": 1.7040792181487423e-05, "loss": 1.0969, "step": 4775 }, { "epoch": 0.27, "grad_norm": 2.0613327026367188, "learning_rate": 1.703947293023354e-05, "loss": 1.0391, "step": 4776 }, { "epoch": 0.27, "grad_norm": 1.1687113046646118, "learning_rate": 1.703815343606711e-05, "loss": 0.6493, "step": 4777 }, { "epoch": 0.27, "grad_norm": 1.845878005027771, "learning_rate": 1.7036833699033665e-05, "loss": 1.0088, "step": 4778 }, { "epoch": 0.27, "grad_norm": 1.6320486068725586, "learning_rate": 1.7035513719178747e-05, "loss": 0.9654, "step": 4779 }, { "epoch": 0.27, "grad_norm": 1.8643770217895508, "learning_rate": 1.7034193496547903e-05, "loss": 1.0328, "step": 4780 }, { "epoch": 0.27, "grad_norm": 1.97599196434021, "learning_rate": 1.703287303118669e-05, "loss": 1.0173, "step": 4781 }, { "epoch": 0.27, "grad_norm": 1.8334367275238037, "learning_rate": 1.7031552323140674e-05, "loss": 1.0995, "step": 4782 }, { "epoch": 0.27, "grad_norm": 2.1179115772247314, "learning_rate": 1.703023137245543e-05, "loss": 1.1057, "step": 4783 }, { "epoch": 0.27, "grad_norm": 1.9273347854614258, "learning_rate": 1.702891017917654e-05, "loss": 1.0102, "step": 4784 }, { "epoch": 0.27, "grad_norm": 1.70667564868927, "learning_rate": 1.7027588743349596e-05, "loss": 0.9865, "step": 4785 }, { "epoch": 0.27, "grad_norm": 1.6817822456359863, "learning_rate": 1.7026267065020193e-05, "loss": 0.9957, "step": 4786 }, { "epoch": 0.27, "grad_norm": 1.9020365476608276, "learning_rate": 1.702494514423394e-05, "loss": 1.044, "step": 4787 }, { "epoch": 0.27, "grad_norm": 1.8188600540161133, "learning_rate": 1.7023622981036454e-05, "loss": 1.0867, "step": 4788 }, { "epoch": 0.27, "grad_norm": 1.997410774230957, "learning_rate": 1.7022300575473356e-05, "loss": 1.0236, "step": 4789 }, { "epoch": 0.27, "grad_norm": 2.1429548263549805, "learning_rate": 1.7020977927590286e-05, "loss": 1.075, "step": 4790 }, { "epoch": 0.27, "grad_norm": 1.8220083713531494, "learning_rate": 1.7019655037432875e-05, "loss": 1.1165, "step": 4791 }, { "epoch": 0.27, "grad_norm": 1.569589376449585, "learning_rate": 1.701833190504678e-05, "loss": 1.0598, "step": 4792 }, { "epoch": 0.27, "grad_norm": 1.8345558643341064, "learning_rate": 1.7017008530477658e-05, "loss": 1.0093, "step": 4793 }, { "epoch": 0.27, "grad_norm": 1.2149317264556885, "learning_rate": 1.701568491377117e-05, "loss": 0.6669, "step": 4794 }, { "epoch": 0.28, "grad_norm": 1.7364712953567505, "learning_rate": 1.7014361054972995e-05, "loss": 1.081, "step": 4795 }, { "epoch": 0.28, "grad_norm": 1.9525762796401978, "learning_rate": 1.701303695412881e-05, "loss": 1.1081, "step": 4796 }, { "epoch": 0.28, "grad_norm": 1.8129020929336548, "learning_rate": 1.7011712611284316e-05, "loss": 1.0315, "step": 4797 }, { "epoch": 0.28, "grad_norm": 1.9507430791854858, "learning_rate": 1.7010388026485202e-05, "loss": 1.0581, "step": 4798 }, { "epoch": 0.28, "grad_norm": 2.078479290008545, "learning_rate": 1.700906319977718e-05, "loss": 1.0721, "step": 4799 }, { "epoch": 0.28, "grad_norm": 2.0087268352508545, "learning_rate": 1.7007738131205966e-05, "loss": 1.0073, "step": 4800 }, { "epoch": 0.28, "grad_norm": 1.795243740081787, "learning_rate": 1.7006412820817288e-05, "loss": 1.0518, "step": 4801 }, { "epoch": 0.28, "grad_norm": 1.9656835794448853, "learning_rate": 1.700508726865687e-05, "loss": 1.0878, "step": 4802 }, { "epoch": 0.28, "grad_norm": 1.929245948791504, "learning_rate": 1.7003761474770462e-05, "loss": 0.9508, "step": 4803 }, { "epoch": 0.28, "grad_norm": 1.7213767766952515, "learning_rate": 1.700243543920381e-05, "loss": 0.9865, "step": 4804 }, { "epoch": 0.28, "grad_norm": 1.7619291543960571, "learning_rate": 1.7001109162002668e-05, "loss": 0.9814, "step": 4805 }, { "epoch": 0.28, "grad_norm": 1.5582529306411743, "learning_rate": 1.699978264321281e-05, "loss": 1.093, "step": 4806 }, { "epoch": 0.28, "grad_norm": 1.6498557329177856, "learning_rate": 1.6998455882880002e-05, "loss": 0.9467, "step": 4807 }, { "epoch": 0.28, "grad_norm": 1.9310967922210693, "learning_rate": 1.6997128881050028e-05, "loss": 1.049, "step": 4808 }, { "epoch": 0.28, "grad_norm": 1.8834171295166016, "learning_rate": 1.6995801637768687e-05, "loss": 1.0561, "step": 4809 }, { "epoch": 0.28, "grad_norm": 1.9829078912734985, "learning_rate": 1.6994474153081774e-05, "loss": 0.9531, "step": 4810 }, { "epoch": 0.28, "grad_norm": 1.7405204772949219, "learning_rate": 1.6993146427035093e-05, "loss": 0.9849, "step": 4811 }, { "epoch": 0.28, "grad_norm": 2.348369598388672, "learning_rate": 1.699181845967447e-05, "loss": 1.1075, "step": 4812 }, { "epoch": 0.28, "grad_norm": 1.9127343893051147, "learning_rate": 1.6990490251045717e-05, "loss": 1.0327, "step": 4813 }, { "epoch": 0.28, "grad_norm": 1.8989533185958862, "learning_rate": 1.6989161801194675e-05, "loss": 1.0079, "step": 4814 }, { "epoch": 0.28, "grad_norm": 1.642884612083435, "learning_rate": 1.698783311016718e-05, "loss": 0.9756, "step": 4815 }, { "epoch": 0.28, "grad_norm": 1.902197003364563, "learning_rate": 1.6986504178009085e-05, "loss": 0.9689, "step": 4816 }, { "epoch": 0.28, "grad_norm": 1.7135981321334839, "learning_rate": 1.698517500476625e-05, "loss": 1.0756, "step": 4817 }, { "epoch": 0.28, "grad_norm": 0.9768487215042114, "learning_rate": 1.6983845590484535e-05, "loss": 0.6036, "step": 4818 }, { "epoch": 0.28, "grad_norm": 1.9502159357070923, "learning_rate": 1.698251593520982e-05, "loss": 0.9728, "step": 4819 }, { "epoch": 0.28, "grad_norm": 1.913528561592102, "learning_rate": 1.698118603898798e-05, "loss": 1.0414, "step": 4820 }, { "epoch": 0.28, "grad_norm": 1.8836307525634766, "learning_rate": 1.6979855901864914e-05, "loss": 0.9903, "step": 4821 }, { "epoch": 0.28, "grad_norm": 1.7608457803726196, "learning_rate": 1.6978525523886515e-05, "loss": 1.0579, "step": 4822 }, { "epoch": 0.28, "grad_norm": 1.8165781497955322, "learning_rate": 1.69771949050987e-05, "loss": 0.9679, "step": 4823 }, { "epoch": 0.28, "grad_norm": 1.764281153678894, "learning_rate": 1.6975864045547373e-05, "loss": 1.0234, "step": 4824 }, { "epoch": 0.28, "grad_norm": 1.777406930923462, "learning_rate": 1.6974532945278468e-05, "loss": 1.0061, "step": 4825 }, { "epoch": 0.28, "grad_norm": 1.918281078338623, "learning_rate": 1.697320160433791e-05, "loss": 1.0516, "step": 4826 }, { "epoch": 0.28, "grad_norm": 2.0181868076324463, "learning_rate": 1.6971870022771648e-05, "loss": 1.0691, "step": 4827 }, { "epoch": 0.28, "grad_norm": 1.971143364906311, "learning_rate": 1.6970538200625622e-05, "loss": 1.1327, "step": 4828 }, { "epoch": 0.28, "grad_norm": 1.7758426666259766, "learning_rate": 1.6969206137945797e-05, "loss": 1.0397, "step": 4829 }, { "epoch": 0.28, "grad_norm": 1.9450143575668335, "learning_rate": 1.6967873834778136e-05, "loss": 1.0463, "step": 4830 }, { "epoch": 0.28, "grad_norm": 1.8333852291107178, "learning_rate": 1.6966541291168616e-05, "loss": 1.0376, "step": 4831 }, { "epoch": 0.28, "grad_norm": 2.133472442626953, "learning_rate": 1.696520850716321e-05, "loss": 1.0861, "step": 4832 }, { "epoch": 0.28, "grad_norm": 1.8106203079223633, "learning_rate": 1.6963875482807916e-05, "loss": 1.0421, "step": 4833 }, { "epoch": 0.28, "grad_norm": 1.8422704935073853, "learning_rate": 1.6962542218148735e-05, "loss": 1.0832, "step": 4834 }, { "epoch": 0.28, "grad_norm": 1.8666435480117798, "learning_rate": 1.696120871323167e-05, "loss": 1.1086, "step": 4835 }, { "epoch": 0.28, "grad_norm": 1.793318510055542, "learning_rate": 1.6959874968102736e-05, "loss": 0.9876, "step": 4836 }, { "epoch": 0.28, "grad_norm": 1.8047312498092651, "learning_rate": 1.6958540982807958e-05, "loss": 1.0864, "step": 4837 }, { "epoch": 0.28, "grad_norm": 1.8125795125961304, "learning_rate": 1.6957206757393372e-05, "loss": 0.9864, "step": 4838 }, { "epoch": 0.28, "grad_norm": 1.914389967918396, "learning_rate": 1.6955872291905014e-05, "loss": 1.0606, "step": 4839 }, { "epoch": 0.28, "grad_norm": 1.7422670125961304, "learning_rate": 1.6954537586388932e-05, "loss": 1.0353, "step": 4840 }, { "epoch": 0.28, "grad_norm": 1.8671469688415527, "learning_rate": 1.6953202640891187e-05, "loss": 1.0014, "step": 4841 }, { "epoch": 0.28, "grad_norm": 1.7410883903503418, "learning_rate": 1.695186745545784e-05, "loss": 1.0261, "step": 4842 }, { "epoch": 0.28, "grad_norm": 1.7394968271255493, "learning_rate": 1.6950532030134966e-05, "loss": 1.002, "step": 4843 }, { "epoch": 0.28, "grad_norm": 1.7510324716567993, "learning_rate": 1.6949196364968648e-05, "loss": 0.9897, "step": 4844 }, { "epoch": 0.28, "grad_norm": 1.797247052192688, "learning_rate": 1.6947860460004974e-05, "loss": 1.051, "step": 4845 }, { "epoch": 0.28, "grad_norm": 1.8486158847808838, "learning_rate": 1.6946524315290047e-05, "loss": 1.0578, "step": 4846 }, { "epoch": 0.28, "grad_norm": 1.8467079401016235, "learning_rate": 1.6945187930869967e-05, "loss": 1.0301, "step": 4847 }, { "epoch": 0.28, "grad_norm": 1.8644989728927612, "learning_rate": 1.6943851306790852e-05, "loss": 1.0498, "step": 4848 }, { "epoch": 0.28, "grad_norm": 1.7966654300689697, "learning_rate": 1.6942514443098826e-05, "loss": 1.0643, "step": 4849 }, { "epoch": 0.28, "grad_norm": 1.6879764795303345, "learning_rate": 1.694117733984002e-05, "loss": 0.9921, "step": 4850 }, { "epoch": 0.28, "grad_norm": 1.915128469467163, "learning_rate": 1.6939839997060575e-05, "loss": 1.0155, "step": 4851 }, { "epoch": 0.28, "grad_norm": 1.8353031873703003, "learning_rate": 1.6938502414806633e-05, "loss": 1.0629, "step": 4852 }, { "epoch": 0.28, "grad_norm": 1.9062198400497437, "learning_rate": 1.693716459312436e-05, "loss": 1.0046, "step": 4853 }, { "epoch": 0.28, "grad_norm": 2.0432519912719727, "learning_rate": 1.6935826532059913e-05, "loss": 1.0641, "step": 4854 }, { "epoch": 0.28, "grad_norm": 1.7728166580200195, "learning_rate": 1.6934488231659465e-05, "loss": 0.9788, "step": 4855 }, { "epoch": 0.28, "grad_norm": 1.6331583261489868, "learning_rate": 1.69331496919692e-05, "loss": 1.0028, "step": 4856 }, { "epoch": 0.28, "grad_norm": 1.9410744905471802, "learning_rate": 1.6931810913035306e-05, "loss": 0.9283, "step": 4857 }, { "epoch": 0.28, "grad_norm": 1.8948951959609985, "learning_rate": 1.693047189490398e-05, "loss": 1.0785, "step": 4858 }, { "epoch": 0.28, "grad_norm": 2.0742077827453613, "learning_rate": 1.6929132637621433e-05, "loss": 1.0692, "step": 4859 }, { "epoch": 0.28, "grad_norm": 1.7253693342208862, "learning_rate": 1.692779314123387e-05, "loss": 1.0015, "step": 4860 }, { "epoch": 0.28, "grad_norm": 1.7998379468917847, "learning_rate": 1.6926453405787518e-05, "loss": 0.9652, "step": 4861 }, { "epoch": 0.28, "grad_norm": 1.9471490383148193, "learning_rate": 1.6925113431328605e-05, "loss": 1.0556, "step": 4862 }, { "epoch": 0.28, "grad_norm": 2.0721354484558105, "learning_rate": 1.6923773217903378e-05, "loss": 1.0313, "step": 4863 }, { "epoch": 0.28, "grad_norm": 1.9251173734664917, "learning_rate": 1.692243276555807e-05, "loss": 1.0408, "step": 4864 }, { "epoch": 0.28, "grad_norm": 1.7999091148376465, "learning_rate": 1.6921092074338953e-05, "loss": 0.9218, "step": 4865 }, { "epoch": 0.28, "grad_norm": 1.8684536218643188, "learning_rate": 1.691975114429228e-05, "loss": 1.0716, "step": 4866 }, { "epoch": 0.28, "grad_norm": 1.7986088991165161, "learning_rate": 1.691840997546432e-05, "loss": 1.0215, "step": 4867 }, { "epoch": 0.28, "grad_norm": 1.8519375324249268, "learning_rate": 1.6917068567901358e-05, "loss": 1.0603, "step": 4868 }, { "epoch": 0.28, "grad_norm": 1.848416805267334, "learning_rate": 1.6915726921649685e-05, "loss": 1.0526, "step": 4869 }, { "epoch": 0.28, "grad_norm": 1.2385339736938477, "learning_rate": 1.691438503675559e-05, "loss": 0.7072, "step": 4870 }, { "epoch": 0.28, "grad_norm": 2.118136405944824, "learning_rate": 1.6913042913265388e-05, "loss": 1.058, "step": 4871 }, { "epoch": 0.28, "grad_norm": 1.0882911682128906, "learning_rate": 1.6911700551225382e-05, "loss": 0.6453, "step": 4872 }, { "epoch": 0.28, "grad_norm": 1.727582573890686, "learning_rate": 1.6910357950681898e-05, "loss": 1.0236, "step": 4873 }, { "epoch": 0.28, "grad_norm": 1.8469053506851196, "learning_rate": 1.6909015111681265e-05, "loss": 0.9702, "step": 4874 }, { "epoch": 0.28, "grad_norm": 1.9877110719680786, "learning_rate": 1.690767203426982e-05, "loss": 1.1292, "step": 4875 }, { "epoch": 0.28, "grad_norm": 1.9954696893692017, "learning_rate": 1.6906328718493906e-05, "loss": 1.1002, "step": 4876 }, { "epoch": 0.28, "grad_norm": 1.833004355430603, "learning_rate": 1.690498516439988e-05, "loss": 1.0518, "step": 4877 }, { "epoch": 0.28, "grad_norm": 1.9706647396087646, "learning_rate": 1.6903641372034107e-05, "loss": 1.0977, "step": 4878 }, { "epoch": 0.28, "grad_norm": 2.052793264389038, "learning_rate": 1.6902297341442956e-05, "loss": 1.0011, "step": 4879 }, { "epoch": 0.28, "grad_norm": 1.9225096702575684, "learning_rate": 1.6900953072672802e-05, "loss": 0.9972, "step": 4880 }, { "epoch": 0.28, "grad_norm": 1.9569402933120728, "learning_rate": 1.6899608565770035e-05, "loss": 1.0629, "step": 4881 }, { "epoch": 0.28, "grad_norm": 2.0029420852661133, "learning_rate": 1.6898263820781047e-05, "loss": 1.1177, "step": 4882 }, { "epoch": 0.28, "grad_norm": 1.7940871715545654, "learning_rate": 1.689691883775225e-05, "loss": 1.0343, "step": 4883 }, { "epoch": 0.28, "grad_norm": 1.9395021200180054, "learning_rate": 1.6895573616730046e-05, "loss": 1.0731, "step": 4884 }, { "epoch": 0.28, "grad_norm": 1.554253339767456, "learning_rate": 1.689422815776086e-05, "loss": 1.0751, "step": 4885 }, { "epoch": 0.28, "grad_norm": 1.8766225576400757, "learning_rate": 1.6892882460891118e-05, "loss": 1.0117, "step": 4886 }, { "epoch": 0.28, "grad_norm": 1.883284330368042, "learning_rate": 1.6891536526167252e-05, "loss": 1.0988, "step": 4887 }, { "epoch": 0.28, "grad_norm": 1.821362853050232, "learning_rate": 1.689019035363572e-05, "loss": 1.0654, "step": 4888 }, { "epoch": 0.28, "grad_norm": 2.2597479820251465, "learning_rate": 1.688884394334296e-05, "loss": 1.0652, "step": 4889 }, { "epoch": 0.28, "grad_norm": 1.7758064270019531, "learning_rate": 1.688749729533544e-05, "loss": 1.0065, "step": 4890 }, { "epoch": 0.28, "grad_norm": 1.6758724451065063, "learning_rate": 1.688615040965963e-05, "loss": 1.0092, "step": 4891 }, { "epoch": 0.28, "grad_norm": 2.0197389125823975, "learning_rate": 1.6884803286362e-05, "loss": 1.1238, "step": 4892 }, { "epoch": 0.28, "grad_norm": 2.1135830879211426, "learning_rate": 1.6883455925489044e-05, "loss": 1.0507, "step": 4893 }, { "epoch": 0.28, "grad_norm": 1.1909617185592651, "learning_rate": 1.6882108327087252e-05, "loss": 0.6107, "step": 4894 }, { "epoch": 0.28, "grad_norm": 1.937817931175232, "learning_rate": 1.6880760491203124e-05, "loss": 1.0924, "step": 4895 }, { "epoch": 0.28, "grad_norm": 1.8181272745132446, "learning_rate": 1.6879412417883175e-05, "loss": 0.9535, "step": 4896 }, { "epoch": 0.28, "grad_norm": 1.9131006002426147, "learning_rate": 1.687806410717392e-05, "loss": 0.9977, "step": 4897 }, { "epoch": 0.28, "grad_norm": 1.6708403825759888, "learning_rate": 1.6876715559121883e-05, "loss": 0.9352, "step": 4898 }, { "epoch": 0.28, "grad_norm": 1.8211203813552856, "learning_rate": 1.6875366773773604e-05, "loss": 0.9778, "step": 4899 }, { "epoch": 0.28, "grad_norm": 2.0896263122558594, "learning_rate": 1.6874017751175622e-05, "loss": 1.0195, "step": 4900 }, { "epoch": 0.28, "grad_norm": 2.047825336456299, "learning_rate": 1.687266849137449e-05, "loss": 1.035, "step": 4901 }, { "epoch": 0.28, "grad_norm": 2.026298761367798, "learning_rate": 1.6871318994416766e-05, "loss": 1.0087, "step": 4902 }, { "epoch": 0.28, "grad_norm": 1.9729666709899902, "learning_rate": 1.686996926034902e-05, "loss": 1.1111, "step": 4903 }, { "epoch": 0.28, "grad_norm": 1.1248681545257568, "learning_rate": 1.686861928921782e-05, "loss": 0.6334, "step": 4904 }, { "epoch": 0.28, "grad_norm": 2.1286895275115967, "learning_rate": 1.6867269081069756e-05, "loss": 1.0766, "step": 4905 }, { "epoch": 0.28, "grad_norm": 2.057871103286743, "learning_rate": 1.6865918635951425e-05, "loss": 1.0375, "step": 4906 }, { "epoch": 0.28, "grad_norm": 1.907536268234253, "learning_rate": 1.6864567953909417e-05, "loss": 1.0141, "step": 4907 }, { "epoch": 0.28, "grad_norm": 1.9625297784805298, "learning_rate": 1.6863217034990343e-05, "loss": 0.9866, "step": 4908 }, { "epoch": 0.28, "grad_norm": 1.8968021869659424, "learning_rate": 1.6861865879240822e-05, "loss": 0.9839, "step": 4909 }, { "epoch": 0.28, "grad_norm": 1.8067208528518677, "learning_rate": 1.6860514486707477e-05, "loss": 0.9844, "step": 4910 }, { "epoch": 0.28, "grad_norm": 1.87395441532135, "learning_rate": 1.6859162857436943e-05, "loss": 1.0789, "step": 4911 }, { "epoch": 0.28, "grad_norm": 1.927905559539795, "learning_rate": 1.6857810991475857e-05, "loss": 0.9781, "step": 4912 }, { "epoch": 0.28, "grad_norm": 1.0463930368423462, "learning_rate": 1.685645888887087e-05, "loss": 0.6187, "step": 4913 }, { "epoch": 0.28, "grad_norm": 1.7887893915176392, "learning_rate": 1.685510654966864e-05, "loss": 1.0948, "step": 4914 }, { "epoch": 0.28, "grad_norm": 1.7676926851272583, "learning_rate": 1.6853753973915832e-05, "loss": 1.0488, "step": 4915 }, { "epoch": 0.28, "grad_norm": 1.7640408277511597, "learning_rate": 1.6852401161659122e-05, "loss": 0.9792, "step": 4916 }, { "epoch": 0.28, "grad_norm": 1.7754625082015991, "learning_rate": 1.685104811294519e-05, "loss": 1.0557, "step": 4917 }, { "epoch": 0.28, "grad_norm": 1.764940619468689, "learning_rate": 1.6849694827820718e-05, "loss": 1.0284, "step": 4918 }, { "epoch": 0.28, "grad_norm": 1.7750033140182495, "learning_rate": 1.6848341306332418e-05, "loss": 1.0486, "step": 4919 }, { "epoch": 0.28, "grad_norm": 1.8623255491256714, "learning_rate": 1.6846987548526987e-05, "loss": 0.9688, "step": 4920 }, { "epoch": 0.28, "grad_norm": 1.7640178203582764, "learning_rate": 1.6845633554451142e-05, "loss": 1.0442, "step": 4921 }, { "epoch": 0.28, "grad_norm": 1.8016480207443237, "learning_rate": 1.6844279324151608e-05, "loss": 0.9812, "step": 4922 }, { "epoch": 0.28, "grad_norm": 1.9299976825714111, "learning_rate": 1.684292485767511e-05, "loss": 0.9732, "step": 4923 }, { "epoch": 0.28, "grad_norm": 1.8280783891677856, "learning_rate": 1.684157015506839e-05, "loss": 0.9375, "step": 4924 }, { "epoch": 0.28, "grad_norm": 1.8758163452148438, "learning_rate": 1.6840215216378198e-05, "loss": 0.9807, "step": 4925 }, { "epoch": 0.28, "grad_norm": 1.743076205253601, "learning_rate": 1.6838860041651286e-05, "loss": 0.9689, "step": 4926 }, { "epoch": 0.28, "grad_norm": 1.9167157411575317, "learning_rate": 1.6837504630934412e-05, "loss": 0.9753, "step": 4927 }, { "epoch": 0.28, "grad_norm": 1.8990880250930786, "learning_rate": 1.683614898427436e-05, "loss": 0.9734, "step": 4928 }, { "epoch": 0.28, "grad_norm": 1.7996269464492798, "learning_rate": 1.6834793101717897e-05, "loss": 1.001, "step": 4929 }, { "epoch": 0.28, "grad_norm": 2.061493396759033, "learning_rate": 1.6833436983311823e-05, "loss": 1.0002, "step": 4930 }, { "epoch": 0.28, "grad_norm": 1.6283007860183716, "learning_rate": 1.6832080629102923e-05, "loss": 1.0448, "step": 4931 }, { "epoch": 0.28, "grad_norm": 1.7978817224502563, "learning_rate": 1.6830724039138005e-05, "loss": 1.0868, "step": 4932 }, { "epoch": 0.28, "grad_norm": 1.8094483613967896, "learning_rate": 1.682936721346388e-05, "loss": 0.9893, "step": 4933 }, { "epoch": 0.28, "grad_norm": 1.836949348449707, "learning_rate": 1.682801015212737e-05, "loss": 0.9779, "step": 4934 }, { "epoch": 0.28, "grad_norm": 1.1489976644515991, "learning_rate": 1.6826652855175304e-05, "loss": 0.5886, "step": 4935 }, { "epoch": 0.28, "grad_norm": 2.0390729904174805, "learning_rate": 1.6825295322654517e-05, "loss": 1.0726, "step": 4936 }, { "epoch": 0.28, "grad_norm": 1.9677833318710327, "learning_rate": 1.6823937554611856e-05, "loss": 1.1067, "step": 4937 }, { "epoch": 0.28, "grad_norm": 1.645825743675232, "learning_rate": 1.682257955109417e-05, "loss": 1.0226, "step": 4938 }, { "epoch": 0.28, "grad_norm": 1.138099193572998, "learning_rate": 1.6821221312148322e-05, "loss": 0.6241, "step": 4939 }, { "epoch": 0.28, "grad_norm": 1.7123996019363403, "learning_rate": 1.681986283782118e-05, "loss": 1.056, "step": 4940 }, { "epoch": 0.28, "grad_norm": 1.8262661695480347, "learning_rate": 1.6818504128159628e-05, "loss": 1.1014, "step": 4941 }, { "epoch": 0.28, "grad_norm": 1.103005051612854, "learning_rate": 1.6817145183210538e-05, "loss": 0.6455, "step": 4942 }, { "epoch": 0.28, "grad_norm": 0.9934080243110657, "learning_rate": 1.6815786003020812e-05, "loss": 0.5659, "step": 4943 }, { "epoch": 0.28, "grad_norm": 1.700119137763977, "learning_rate": 1.6814426587637354e-05, "loss": 1.0563, "step": 4944 }, { "epoch": 0.28, "grad_norm": 1.7847425937652588, "learning_rate": 1.6813066937107065e-05, "loss": 1.0749, "step": 4945 }, { "epoch": 0.28, "grad_norm": 1.731642723083496, "learning_rate": 1.6811707051476868e-05, "loss": 1.0615, "step": 4946 }, { "epoch": 0.28, "grad_norm": 1.9088008403778076, "learning_rate": 1.681034693079369e-05, "loss": 0.9486, "step": 4947 }, { "epoch": 0.28, "grad_norm": 1.9439805746078491, "learning_rate": 1.6808986575104464e-05, "loss": 1.0261, "step": 4948 }, { "epoch": 0.28, "grad_norm": 1.8109700679779053, "learning_rate": 1.680762598445613e-05, "loss": 0.9845, "step": 4949 }, { "epoch": 0.28, "grad_norm": 2.028010845184326, "learning_rate": 1.6806265158895642e-05, "loss": 1.0151, "step": 4950 }, { "epoch": 0.28, "grad_norm": 1.1376056671142578, "learning_rate": 1.6804904098469955e-05, "loss": 0.6496, "step": 4951 }, { "epoch": 0.28, "grad_norm": 1.9380031824111938, "learning_rate": 1.6803542803226034e-05, "loss": 1.0977, "step": 4952 }, { "epoch": 0.28, "grad_norm": 1.9276891946792603, "learning_rate": 1.6802181273210858e-05, "loss": 1.0836, "step": 4953 }, { "epoch": 0.28, "grad_norm": 1.8618677854537964, "learning_rate": 1.6800819508471407e-05, "loss": 1.0399, "step": 4954 }, { "epoch": 0.28, "grad_norm": 1.7445107698440552, "learning_rate": 1.679945750905467e-05, "loss": 1.1631, "step": 4955 }, { "epoch": 0.28, "grad_norm": 1.7674551010131836, "learning_rate": 1.679809527500765e-05, "loss": 1.0302, "step": 4956 }, { "epoch": 0.28, "grad_norm": 1.70111083984375, "learning_rate": 1.679673280637735e-05, "loss": 0.9669, "step": 4957 }, { "epoch": 0.28, "grad_norm": 1.9820137023925781, "learning_rate": 1.679537010321079e-05, "loss": 1.0471, "step": 4958 }, { "epoch": 0.28, "grad_norm": 1.8192939758300781, "learning_rate": 1.679400716555499e-05, "loss": 1.0805, "step": 4959 }, { "epoch": 0.28, "grad_norm": 1.147865653038025, "learning_rate": 1.6792643993456978e-05, "loss": 0.646, "step": 4960 }, { "epoch": 0.28, "grad_norm": 1.6857572793960571, "learning_rate": 1.6791280586963798e-05, "loss": 1.0064, "step": 4961 }, { "epoch": 0.28, "grad_norm": 1.774776577949524, "learning_rate": 1.6789916946122494e-05, "loss": 1.0044, "step": 4962 }, { "epoch": 0.28, "grad_norm": 1.9410014152526855, "learning_rate": 1.6788553070980126e-05, "loss": 0.9876, "step": 4963 }, { "epoch": 0.28, "grad_norm": 1.7348839044570923, "learning_rate": 1.678718896158375e-05, "loss": 0.9517, "step": 4964 }, { "epoch": 0.28, "grad_norm": 1.6816319227218628, "learning_rate": 1.6785824617980446e-05, "loss": 1.0669, "step": 4965 }, { "epoch": 0.28, "grad_norm": 1.7540005445480347, "learning_rate": 1.6784460040217286e-05, "loss": 1.0939, "step": 4966 }, { "epoch": 0.28, "grad_norm": 1.8014140129089355, "learning_rate": 1.6783095228341365e-05, "loss": 0.9779, "step": 4967 }, { "epoch": 0.28, "grad_norm": 1.8831634521484375, "learning_rate": 1.6781730182399774e-05, "loss": 1.0254, "step": 4968 }, { "epoch": 0.28, "grad_norm": 1.8566021919250488, "learning_rate": 1.678036490243962e-05, "loss": 1.0221, "step": 4969 }, { "epoch": 0.29, "grad_norm": 1.9947357177734375, "learning_rate": 1.6778999388508013e-05, "loss": 1.0193, "step": 4970 }, { "epoch": 0.29, "grad_norm": 1.705660104751587, "learning_rate": 1.6777633640652072e-05, "loss": 1.0313, "step": 4971 }, { "epoch": 0.29, "grad_norm": 1.9963876008987427, "learning_rate": 1.6776267658918927e-05, "loss": 1.0854, "step": 4972 }, { "epoch": 0.29, "grad_norm": 1.702654242515564, "learning_rate": 1.6774901443355717e-05, "loss": 1.0393, "step": 4973 }, { "epoch": 0.29, "grad_norm": 1.784767746925354, "learning_rate": 1.677353499400958e-05, "loss": 1.0797, "step": 4974 }, { "epoch": 0.29, "grad_norm": 1.0459593534469604, "learning_rate": 1.6772168310927673e-05, "loss": 0.6739, "step": 4975 }, { "epoch": 0.29, "grad_norm": 1.039383053779602, "learning_rate": 1.677080139415715e-05, "loss": 0.6218, "step": 4976 }, { "epoch": 0.29, "grad_norm": 1.7655045986175537, "learning_rate": 1.676943424374519e-05, "loss": 1.0503, "step": 4977 }, { "epoch": 0.29, "grad_norm": 1.754356861114502, "learning_rate": 1.6768066859738963e-05, "loss": 0.9762, "step": 4978 }, { "epoch": 0.29, "grad_norm": 1.992211937904358, "learning_rate": 1.6766699242185653e-05, "loss": 1.0499, "step": 4979 }, { "epoch": 0.29, "grad_norm": 1.7898869514465332, "learning_rate": 1.6765331391132454e-05, "loss": 0.9944, "step": 4980 }, { "epoch": 0.29, "grad_norm": 2.0913188457489014, "learning_rate": 1.676396330662657e-05, "loss": 1.1137, "step": 4981 }, { "epoch": 0.29, "grad_norm": 1.8172208070755005, "learning_rate": 1.6762594988715204e-05, "loss": 1.0002, "step": 4982 }, { "epoch": 0.29, "grad_norm": 1.8199137449264526, "learning_rate": 1.6761226437445577e-05, "loss": 1.0955, "step": 4983 }, { "epoch": 0.29, "grad_norm": 1.869014859199524, "learning_rate": 1.675985765286491e-05, "loss": 1.0695, "step": 4984 }, { "epoch": 0.29, "grad_norm": 1.9903357028961182, "learning_rate": 1.675848863502044e-05, "loss": 1.1009, "step": 4985 }, { "epoch": 0.29, "grad_norm": 1.8338927030563354, "learning_rate": 1.6757119383959406e-05, "loss": 1.0384, "step": 4986 }, { "epoch": 0.29, "grad_norm": 1.7249780893325806, "learning_rate": 1.6755749899729056e-05, "loss": 0.9809, "step": 4987 }, { "epoch": 0.29, "grad_norm": 1.76215398311615, "learning_rate": 1.675438018237665e-05, "loss": 0.9627, "step": 4988 }, { "epoch": 0.29, "grad_norm": 1.9811723232269287, "learning_rate": 1.675301023194945e-05, "loss": 1.0637, "step": 4989 }, { "epoch": 0.29, "grad_norm": 1.747698187828064, "learning_rate": 1.6751640048494734e-05, "loss": 0.9672, "step": 4990 }, { "epoch": 0.29, "grad_norm": 2.1844778060913086, "learning_rate": 1.6750269632059776e-05, "loss": 1.117, "step": 4991 }, { "epoch": 0.29, "grad_norm": 1.948578953742981, "learning_rate": 1.674889898269187e-05, "loss": 0.9905, "step": 4992 }, { "epoch": 0.29, "grad_norm": 1.6798242330551147, "learning_rate": 1.6747528100438316e-05, "loss": 1.026, "step": 4993 }, { "epoch": 0.29, "grad_norm": 1.8882676362991333, "learning_rate": 1.6746156985346413e-05, "loss": 1.1141, "step": 4994 }, { "epoch": 0.29, "grad_norm": 1.584547758102417, "learning_rate": 1.6744785637463476e-05, "loss": 0.9406, "step": 4995 }, { "epoch": 0.29, "grad_norm": 2.051281452178955, "learning_rate": 1.6743414056836827e-05, "loss": 0.9816, "step": 4996 }, { "epoch": 0.29, "grad_norm": 1.90603768825531, "learning_rate": 1.67420422435138e-05, "loss": 0.9714, "step": 4997 }, { "epoch": 0.29, "grad_norm": 1.7681667804718018, "learning_rate": 1.6740670197541722e-05, "loss": 1.1069, "step": 4998 }, { "epoch": 0.29, "grad_norm": 2.075596570968628, "learning_rate": 1.6739297918967948e-05, "loss": 0.9976, "step": 4999 }, { "epoch": 0.29, "grad_norm": 1.8993091583251953, "learning_rate": 1.6737925407839828e-05, "loss": 1.0662, "step": 5000 }, { "epoch": 0.29, "grad_norm": 1.6743183135986328, "learning_rate": 1.6736552664204725e-05, "loss": 0.9614, "step": 5001 }, { "epoch": 0.29, "grad_norm": 1.8447792530059814, "learning_rate": 1.6735179688110004e-05, "loss": 1.0693, "step": 5002 }, { "epoch": 0.29, "grad_norm": 1.6768145561218262, "learning_rate": 1.673380647960305e-05, "loss": 0.9933, "step": 5003 }, { "epoch": 0.29, "grad_norm": 1.7443569898605347, "learning_rate": 1.6732433038731245e-05, "loss": 0.9433, "step": 5004 }, { "epoch": 0.29, "grad_norm": 1.8673535585403442, "learning_rate": 1.673105936554198e-05, "loss": 1.0735, "step": 5005 }, { "epoch": 0.29, "grad_norm": 1.9199897050857544, "learning_rate": 1.6729685460082658e-05, "loss": 1.0182, "step": 5006 }, { "epoch": 0.29, "grad_norm": 1.6692949533462524, "learning_rate": 1.6728311322400693e-05, "loss": 0.943, "step": 5007 }, { "epoch": 0.29, "grad_norm": 2.002382516860962, "learning_rate": 1.6726936952543494e-05, "loss": 1.0232, "step": 5008 }, { "epoch": 0.29, "grad_norm": 2.011814832687378, "learning_rate": 1.6725562350558494e-05, "loss": 1.0256, "step": 5009 }, { "epoch": 0.29, "grad_norm": 1.85011625289917, "learning_rate": 1.6724187516493125e-05, "loss": 0.9765, "step": 5010 }, { "epoch": 0.29, "grad_norm": 1.9433430433273315, "learning_rate": 1.6722812450394826e-05, "loss": 1.0563, "step": 5011 }, { "epoch": 0.29, "grad_norm": 1.2836203575134277, "learning_rate": 1.6721437152311052e-05, "loss": 0.6912, "step": 5012 }, { "epoch": 0.29, "grad_norm": 1.831094741821289, "learning_rate": 1.6720061622289258e-05, "loss": 1.0179, "step": 5013 }, { "epoch": 0.29, "grad_norm": 1.651019811630249, "learning_rate": 1.6718685860376903e-05, "loss": 1.0214, "step": 5014 }, { "epoch": 0.29, "grad_norm": 1.7902356386184692, "learning_rate": 1.6717309866621473e-05, "loss": 1.0352, "step": 5015 }, { "epoch": 0.29, "grad_norm": 1.7322484254837036, "learning_rate": 1.6715933641070443e-05, "loss": 1.0983, "step": 5016 }, { "epoch": 0.29, "grad_norm": 1.7539037466049194, "learning_rate": 1.67145571837713e-05, "loss": 1.0511, "step": 5017 }, { "epoch": 0.29, "grad_norm": 1.787750005722046, "learning_rate": 1.6713180494771545e-05, "loss": 0.9072, "step": 5018 }, { "epoch": 0.29, "grad_norm": 1.8441230058670044, "learning_rate": 1.6711803574118687e-05, "loss": 1.0385, "step": 5019 }, { "epoch": 0.29, "grad_norm": 1.9119000434875488, "learning_rate": 1.6710426421860236e-05, "loss": 0.9924, "step": 5020 }, { "epoch": 0.29, "grad_norm": 1.8587931394577026, "learning_rate": 1.670904903804371e-05, "loss": 0.9626, "step": 5021 }, { "epoch": 0.29, "grad_norm": 1.7519148588180542, "learning_rate": 1.6707671422716644e-05, "loss": 1.0367, "step": 5022 }, { "epoch": 0.29, "grad_norm": 2.089341402053833, "learning_rate": 1.670629357592658e-05, "loss": 1.0558, "step": 5023 }, { "epoch": 0.29, "grad_norm": 1.2060391902923584, "learning_rate": 1.670491549772105e-05, "loss": 0.6475, "step": 5024 }, { "epoch": 0.29, "grad_norm": 1.7893842458724976, "learning_rate": 1.6703537188147622e-05, "loss": 0.9636, "step": 5025 }, { "epoch": 0.29, "grad_norm": 1.8013216257095337, "learning_rate": 1.6702158647253846e-05, "loss": 1.0143, "step": 5026 }, { "epoch": 0.29, "grad_norm": 1.8165167570114136, "learning_rate": 1.6700779875087302e-05, "loss": 1.0265, "step": 5027 }, { "epoch": 0.29, "grad_norm": 1.7995188236236572, "learning_rate": 1.6699400871695556e-05, "loss": 1.043, "step": 5028 }, { "epoch": 0.29, "grad_norm": 1.6800528764724731, "learning_rate": 1.66980216371262e-05, "loss": 0.9996, "step": 5029 }, { "epoch": 0.29, "grad_norm": 1.8691574335098267, "learning_rate": 1.6696642171426834e-05, "loss": 0.9599, "step": 5030 }, { "epoch": 0.29, "grad_norm": 1.9034206867218018, "learning_rate": 1.669526247464505e-05, "loss": 1.0361, "step": 5031 }, { "epoch": 0.29, "grad_norm": 1.7886236906051636, "learning_rate": 1.6693882546828462e-05, "loss": 0.9895, "step": 5032 }, { "epoch": 0.29, "grad_norm": 1.8249578475952148, "learning_rate": 1.6692502388024684e-05, "loss": 1.0423, "step": 5033 }, { "epoch": 0.29, "grad_norm": 1.8827708959579468, "learning_rate": 1.6691121998281343e-05, "loss": 1.0048, "step": 5034 }, { "epoch": 0.29, "grad_norm": 1.9637030363082886, "learning_rate": 1.6689741377646075e-05, "loss": 1.1184, "step": 5035 }, { "epoch": 0.29, "grad_norm": 1.108392357826233, "learning_rate": 1.6688360526166514e-05, "loss": 0.5615, "step": 5036 }, { "epoch": 0.29, "grad_norm": 1.9205940961837769, "learning_rate": 1.668697944389032e-05, "loss": 1.0902, "step": 5037 }, { "epoch": 0.29, "grad_norm": 1.9189637899398804, "learning_rate": 1.6685598130865143e-05, "loss": 0.9799, "step": 5038 }, { "epoch": 0.29, "grad_norm": 1.967287302017212, "learning_rate": 1.6684216587138647e-05, "loss": 1.0766, "step": 5039 }, { "epoch": 0.29, "grad_norm": 1.9516175985336304, "learning_rate": 1.668283481275851e-05, "loss": 0.9908, "step": 5040 }, { "epoch": 0.29, "grad_norm": 1.8366619348526, "learning_rate": 1.6681452807772413e-05, "loss": 1.0563, "step": 5041 }, { "epoch": 0.29, "grad_norm": 1.0738985538482666, "learning_rate": 1.6680070572228043e-05, "loss": 0.6312, "step": 5042 }, { "epoch": 0.29, "grad_norm": 2.040541410446167, "learning_rate": 1.6678688106173097e-05, "loss": 1.0372, "step": 5043 }, { "epoch": 0.29, "grad_norm": 1.9387565851211548, "learning_rate": 1.667730540965528e-05, "loss": 1.0402, "step": 5044 }, { "epoch": 0.29, "grad_norm": 2.158446788787842, "learning_rate": 1.667592248272231e-05, "loss": 1.0442, "step": 5045 }, { "epoch": 0.29, "grad_norm": 1.1286211013793945, "learning_rate": 1.6674539325421897e-05, "loss": 0.6194, "step": 5046 }, { "epoch": 0.29, "grad_norm": 2.0675065517425537, "learning_rate": 1.667315593780178e-05, "loss": 1.079, "step": 5047 }, { "epoch": 0.29, "grad_norm": 1.8281991481781006, "learning_rate": 1.6671772319909692e-05, "loss": 0.9893, "step": 5048 }, { "epoch": 0.29, "grad_norm": 1.7437891960144043, "learning_rate": 1.6670388471793377e-05, "loss": 0.9611, "step": 5049 }, { "epoch": 0.29, "grad_norm": 1.8292734622955322, "learning_rate": 1.666900439350059e-05, "loss": 1.0034, "step": 5050 }, { "epoch": 0.29, "grad_norm": 2.0750789642333984, "learning_rate": 1.666762008507909e-05, "loss": 1.0279, "step": 5051 }, { "epoch": 0.29, "grad_norm": 1.1034513711929321, "learning_rate": 1.6666235546576648e-05, "loss": 0.6067, "step": 5052 }, { "epoch": 0.29, "grad_norm": 1.9065146446228027, "learning_rate": 1.6664850778041036e-05, "loss": 1.0109, "step": 5053 }, { "epoch": 0.29, "grad_norm": 2.070749044418335, "learning_rate": 1.6663465779520042e-05, "loss": 0.948, "step": 5054 }, { "epoch": 0.29, "grad_norm": 1.7005939483642578, "learning_rate": 1.6662080551061458e-05, "loss": 1.0125, "step": 5055 }, { "epoch": 0.29, "grad_norm": 1.9674650430679321, "learning_rate": 1.6660695092713083e-05, "loss": 0.9087, "step": 5056 }, { "epoch": 0.29, "grad_norm": 1.8961045742034912, "learning_rate": 1.6659309404522725e-05, "loss": 1.0591, "step": 5057 }, { "epoch": 0.29, "grad_norm": 1.9492416381835938, "learning_rate": 1.6657923486538203e-05, "loss": 1.1025, "step": 5058 }, { "epoch": 0.29, "grad_norm": 1.9426721334457397, "learning_rate": 1.665653733880734e-05, "loss": 1.059, "step": 5059 }, { "epoch": 0.29, "grad_norm": 1.9625918865203857, "learning_rate": 1.665515096137797e-05, "loss": 1.0059, "step": 5060 }, { "epoch": 0.29, "grad_norm": 1.9408493041992188, "learning_rate": 1.665376435429793e-05, "loss": 1.0875, "step": 5061 }, { "epoch": 0.29, "grad_norm": 1.980059266090393, "learning_rate": 1.6652377517615065e-05, "loss": 1.1142, "step": 5062 }, { "epoch": 0.29, "grad_norm": 1.8643765449523926, "learning_rate": 1.6650990451377237e-05, "loss": 1.0869, "step": 5063 }, { "epoch": 0.29, "grad_norm": 1.7100104093551636, "learning_rate": 1.6649603155632305e-05, "loss": 1.0375, "step": 5064 }, { "epoch": 0.29, "grad_norm": 2.2048871517181396, "learning_rate": 1.6648215630428146e-05, "loss": 0.9923, "step": 5065 }, { "epoch": 0.29, "grad_norm": 1.9793179035186768, "learning_rate": 1.6646827875812635e-05, "loss": 1.0599, "step": 5066 }, { "epoch": 0.29, "grad_norm": 1.914188027381897, "learning_rate": 1.664543989183366e-05, "loss": 1.0172, "step": 5067 }, { "epoch": 0.29, "grad_norm": 1.9987032413482666, "learning_rate": 1.6644051678539122e-05, "loss": 0.963, "step": 5068 }, { "epoch": 0.29, "grad_norm": 1.7849905490875244, "learning_rate": 1.6642663235976916e-05, "loss": 1.0345, "step": 5069 }, { "epoch": 0.29, "grad_norm": 1.9672876596450806, "learning_rate": 1.6641274564194956e-05, "loss": 1.0449, "step": 5070 }, { "epoch": 0.29, "grad_norm": 1.8944051265716553, "learning_rate": 1.663988566324117e-05, "loss": 0.9923, "step": 5071 }, { "epoch": 0.29, "grad_norm": 1.7426607608795166, "learning_rate": 1.663849653316347e-05, "loss": 0.9722, "step": 5072 }, { "epoch": 0.29, "grad_norm": 1.6451948881149292, "learning_rate": 1.66371071740098e-05, "loss": 0.9605, "step": 5073 }, { "epoch": 0.29, "grad_norm": 2.1996660232543945, "learning_rate": 1.6635717585828102e-05, "loss": 0.9767, "step": 5074 }, { "epoch": 0.29, "grad_norm": 1.9899603128433228, "learning_rate": 1.6634327768666328e-05, "loss": 1.0257, "step": 5075 }, { "epoch": 0.29, "grad_norm": 2.007981538772583, "learning_rate": 1.6632937722572436e-05, "loss": 1.0709, "step": 5076 }, { "epoch": 0.29, "grad_norm": 2.003831386566162, "learning_rate": 1.6631547447594388e-05, "loss": 0.9635, "step": 5077 }, { "epoch": 0.29, "grad_norm": 1.8250536918640137, "learning_rate": 1.663015694378016e-05, "loss": 1.0368, "step": 5078 }, { "epoch": 0.29, "grad_norm": 1.9970078468322754, "learning_rate": 1.6628766211177744e-05, "loss": 0.988, "step": 5079 }, { "epoch": 0.29, "grad_norm": 1.8606919050216675, "learning_rate": 1.6627375249835117e-05, "loss": 1.0887, "step": 5080 }, { "epoch": 0.29, "grad_norm": 2.050845146179199, "learning_rate": 1.6625984059800285e-05, "loss": 1.0635, "step": 5081 }, { "epoch": 0.29, "grad_norm": 1.6655102968215942, "learning_rate": 1.6624592641121252e-05, "loss": 1.055, "step": 5082 }, { "epoch": 0.29, "grad_norm": 1.8206238746643066, "learning_rate": 1.662320099384603e-05, "loss": 1.1186, "step": 5083 }, { "epoch": 0.29, "grad_norm": 1.9102427959442139, "learning_rate": 1.6621809118022646e-05, "loss": 1.0264, "step": 5084 }, { "epoch": 0.29, "grad_norm": 1.8041369915008545, "learning_rate": 1.6620417013699122e-05, "loss": 1.0616, "step": 5085 }, { "epoch": 0.29, "grad_norm": 1.8059991598129272, "learning_rate": 1.6619024680923505e-05, "loss": 0.9807, "step": 5086 }, { "epoch": 0.29, "grad_norm": 1.7358617782592773, "learning_rate": 1.6617632119743837e-05, "loss": 1.021, "step": 5087 }, { "epoch": 0.29, "grad_norm": 1.8036466836929321, "learning_rate": 1.6616239330208163e-05, "loss": 0.9799, "step": 5088 }, { "epoch": 0.29, "grad_norm": 3.068758487701416, "learning_rate": 1.661484631236456e-05, "loss": 1.1062, "step": 5089 }, { "epoch": 0.29, "grad_norm": 2.035245656967163, "learning_rate": 1.661345306626108e-05, "loss": 1.0171, "step": 5090 }, { "epoch": 0.29, "grad_norm": 2.0842854976654053, "learning_rate": 1.6612059591945815e-05, "loss": 1.0239, "step": 5091 }, { "epoch": 0.29, "grad_norm": 1.8434911966323853, "learning_rate": 1.661066588946684e-05, "loss": 1.0085, "step": 5092 }, { "epoch": 0.29, "grad_norm": 1.8452248573303223, "learning_rate": 1.660927195887225e-05, "loss": 0.9346, "step": 5093 }, { "epoch": 0.29, "grad_norm": 1.8949992656707764, "learning_rate": 1.6607877800210156e-05, "loss": 1.0137, "step": 5094 }, { "epoch": 0.29, "grad_norm": 1.9097779989242554, "learning_rate": 1.660648341352865e-05, "loss": 1.083, "step": 5095 }, { "epoch": 0.29, "grad_norm": 1.6986191272735596, "learning_rate": 1.6605088798875856e-05, "loss": 1.0189, "step": 5096 }, { "epoch": 0.29, "grad_norm": 1.8721935749053955, "learning_rate": 1.66036939562999e-05, "loss": 1.0203, "step": 5097 }, { "epoch": 0.29, "grad_norm": 1.862821102142334, "learning_rate": 1.660229888584891e-05, "loss": 1.0707, "step": 5098 }, { "epoch": 0.29, "grad_norm": 2.097388744354248, "learning_rate": 1.6600903587571028e-05, "loss": 1.0541, "step": 5099 }, { "epoch": 0.29, "grad_norm": 1.9460285902023315, "learning_rate": 1.6599508061514404e-05, "loss": 0.9935, "step": 5100 }, { "epoch": 0.29, "grad_norm": 1.9337083101272583, "learning_rate": 1.659811230772719e-05, "loss": 1.0208, "step": 5101 }, { "epoch": 0.29, "grad_norm": 1.7883825302124023, "learning_rate": 1.6596716326257552e-05, "loss": 1.001, "step": 5102 }, { "epoch": 0.29, "grad_norm": 1.8423614501953125, "learning_rate": 1.6595320117153664e-05, "loss": 1.0452, "step": 5103 }, { "epoch": 0.29, "grad_norm": 1.9980061054229736, "learning_rate": 1.6593923680463698e-05, "loss": 1.0483, "step": 5104 }, { "epoch": 0.29, "grad_norm": 1.8504562377929688, "learning_rate": 1.6592527016235848e-05, "loss": 1.0736, "step": 5105 }, { "epoch": 0.29, "grad_norm": 1.8085793256759644, "learning_rate": 1.6591130124518305e-05, "loss": 0.9559, "step": 5106 }, { "epoch": 0.29, "grad_norm": 1.8882025480270386, "learning_rate": 1.6589733005359274e-05, "loss": 0.9677, "step": 5107 }, { "epoch": 0.29, "grad_norm": 1.173730492591858, "learning_rate": 1.6588335658806964e-05, "loss": 0.6819, "step": 5108 }, { "epoch": 0.29, "grad_norm": 2.008368730545044, "learning_rate": 1.658693808490959e-05, "loss": 0.9706, "step": 5109 }, { "epoch": 0.29, "grad_norm": 2.2888143062591553, "learning_rate": 1.658554028371539e-05, "loss": 1.0056, "step": 5110 }, { "epoch": 0.29, "grad_norm": 1.8251432180404663, "learning_rate": 1.6584142255272587e-05, "loss": 0.9935, "step": 5111 }, { "epoch": 0.29, "grad_norm": 1.8009058237075806, "learning_rate": 1.6582743999629426e-05, "loss": 0.9475, "step": 5112 }, { "epoch": 0.29, "grad_norm": 2.013921022415161, "learning_rate": 1.6581345516834158e-05, "loss": 1.057, "step": 5113 }, { "epoch": 0.29, "grad_norm": 1.7876862287521362, "learning_rate": 1.657994680693504e-05, "loss": 1.0784, "step": 5114 }, { "epoch": 0.29, "grad_norm": 1.7720376253128052, "learning_rate": 1.657854786998034e-05, "loss": 1.0035, "step": 5115 }, { "epoch": 0.29, "grad_norm": 1.6997485160827637, "learning_rate": 1.657714870601833e-05, "loss": 1.0185, "step": 5116 }, { "epoch": 0.29, "grad_norm": 1.1311860084533691, "learning_rate": 1.657574931509729e-05, "loss": 0.6178, "step": 5117 }, { "epoch": 0.29, "grad_norm": 1.0236198902130127, "learning_rate": 1.6574349697265507e-05, "loss": 0.6048, "step": 5118 }, { "epoch": 0.29, "grad_norm": 2.0134334564208984, "learning_rate": 1.657294985257128e-05, "loss": 1.0408, "step": 5119 }, { "epoch": 0.29, "grad_norm": 1.226640224456787, "learning_rate": 1.6571549781062917e-05, "loss": 0.6493, "step": 5120 }, { "epoch": 0.29, "grad_norm": 1.914505958557129, "learning_rate": 1.6570149482788732e-05, "loss": 0.9974, "step": 5121 }, { "epoch": 0.29, "grad_norm": 1.8020858764648438, "learning_rate": 1.6568748957797038e-05, "loss": 1.009, "step": 5122 }, { "epoch": 0.29, "grad_norm": 1.7695931196212769, "learning_rate": 1.6567348206136165e-05, "loss": 0.9871, "step": 5123 }, { "epoch": 0.29, "grad_norm": 1.8216640949249268, "learning_rate": 1.656594722785445e-05, "loss": 1.0726, "step": 5124 }, { "epoch": 0.29, "grad_norm": 1.829126238822937, "learning_rate": 1.6564546023000237e-05, "loss": 1.0043, "step": 5125 }, { "epoch": 0.29, "grad_norm": 1.7772107124328613, "learning_rate": 1.656314459162188e-05, "loss": 0.9755, "step": 5126 }, { "epoch": 0.29, "grad_norm": 1.9471821784973145, "learning_rate": 1.6561742933767738e-05, "loss": 1.1311, "step": 5127 }, { "epoch": 0.29, "grad_norm": 1.7489556074142456, "learning_rate": 1.6560341049486176e-05, "loss": 1.0658, "step": 5128 }, { "epoch": 0.29, "grad_norm": 1.7661609649658203, "learning_rate": 1.6558938938825568e-05, "loss": 0.9817, "step": 5129 }, { "epoch": 0.29, "grad_norm": 1.6405469179153442, "learning_rate": 1.65575366018343e-05, "loss": 1.0084, "step": 5130 }, { "epoch": 0.29, "grad_norm": 1.707180380821228, "learning_rate": 1.655613403856076e-05, "loss": 1.0275, "step": 5131 }, { "epoch": 0.29, "grad_norm": 1.7378404140472412, "learning_rate": 1.6554731249053352e-05, "loss": 1.0347, "step": 5132 }, { "epoch": 0.29, "grad_norm": 1.713561773300171, "learning_rate": 1.6553328233360477e-05, "loss": 0.9501, "step": 5133 }, { "epoch": 0.29, "grad_norm": 1.6333541870117188, "learning_rate": 1.655192499153055e-05, "loss": 1.0285, "step": 5134 }, { "epoch": 0.29, "grad_norm": 1.7564549446105957, "learning_rate": 1.655052152361199e-05, "loss": 1.0502, "step": 5135 }, { "epoch": 0.29, "grad_norm": 1.903883695602417, "learning_rate": 1.6549117829653238e-05, "loss": 0.9278, "step": 5136 }, { "epoch": 0.29, "grad_norm": 1.6957582235336304, "learning_rate": 1.6547713909702716e-05, "loss": 0.9768, "step": 5137 }, { "epoch": 0.29, "grad_norm": 1.824670672416687, "learning_rate": 1.6546309763808883e-05, "loss": 0.9913, "step": 5138 }, { "epoch": 0.29, "grad_norm": 1.724562406539917, "learning_rate": 1.6544905392020182e-05, "loss": 1.0095, "step": 5139 }, { "epoch": 0.29, "grad_norm": 1.7832239866256714, "learning_rate": 1.6543500794385084e-05, "loss": 1.0758, "step": 5140 }, { "epoch": 0.29, "grad_norm": 1.7708734273910522, "learning_rate": 1.6542095970952046e-05, "loss": 0.9918, "step": 5141 }, { "epoch": 0.29, "grad_norm": 2.0592641830444336, "learning_rate": 1.6540690921769556e-05, "loss": 0.9831, "step": 5142 }, { "epoch": 0.29, "grad_norm": 1.9066232442855835, "learning_rate": 1.653928564688609e-05, "loss": 1.0807, "step": 5143 }, { "epoch": 0.3, "grad_norm": 1.8743990659713745, "learning_rate": 1.6537880146350144e-05, "loss": 1.053, "step": 5144 }, { "epoch": 0.3, "grad_norm": 1.8483422994613647, "learning_rate": 1.6536474420210215e-05, "loss": 1.0806, "step": 5145 }, { "epoch": 0.3, "grad_norm": 1.8380682468414307, "learning_rate": 1.6535068468514817e-05, "loss": 1.0396, "step": 5146 }, { "epoch": 0.3, "grad_norm": 1.8999302387237549, "learning_rate": 1.653366229131246e-05, "loss": 1.0137, "step": 5147 }, { "epoch": 0.3, "grad_norm": 1.827161192893982, "learning_rate": 1.6532255888651665e-05, "loss": 0.9832, "step": 5148 }, { "epoch": 0.3, "grad_norm": 1.8261295557022095, "learning_rate": 1.6530849260580967e-05, "loss": 1.0127, "step": 5149 }, { "epoch": 0.3, "grad_norm": 1.6067414283752441, "learning_rate": 1.6529442407148907e-05, "loss": 0.9998, "step": 5150 }, { "epoch": 0.3, "grad_norm": 1.8082988262176514, "learning_rate": 1.6528035328404026e-05, "loss": 1.064, "step": 5151 }, { "epoch": 0.3, "grad_norm": 1.8491404056549072, "learning_rate": 1.6526628024394883e-05, "loss": 1.0527, "step": 5152 }, { "epoch": 0.3, "grad_norm": 1.740983247756958, "learning_rate": 1.6525220495170037e-05, "loss": 1.1291, "step": 5153 }, { "epoch": 0.3, "grad_norm": 1.7249277830123901, "learning_rate": 1.652381274077806e-05, "loss": 1.0145, "step": 5154 }, { "epoch": 0.3, "grad_norm": 1.8582860231399536, "learning_rate": 1.652240476126753e-05, "loss": 0.9817, "step": 5155 }, { "epoch": 0.3, "grad_norm": 1.7936407327651978, "learning_rate": 1.6520996556687026e-05, "loss": 0.9869, "step": 5156 }, { "epoch": 0.3, "grad_norm": 1.7154349088668823, "learning_rate": 1.6519588127085155e-05, "loss": 1.0545, "step": 5157 }, { "epoch": 0.3, "grad_norm": 1.817953109741211, "learning_rate": 1.6518179472510506e-05, "loss": 1.0644, "step": 5158 }, { "epoch": 0.3, "grad_norm": 2.0917179584503174, "learning_rate": 1.651677059301169e-05, "loss": 1.0515, "step": 5159 }, { "epoch": 0.3, "grad_norm": 1.9930673837661743, "learning_rate": 1.6515361488637323e-05, "loss": 0.9829, "step": 5160 }, { "epoch": 0.3, "grad_norm": 1.724372148513794, "learning_rate": 1.6513952159436033e-05, "loss": 0.982, "step": 5161 }, { "epoch": 0.3, "grad_norm": 1.8292368650436401, "learning_rate": 1.651254260545645e-05, "loss": 1.1008, "step": 5162 }, { "epoch": 0.3, "grad_norm": 1.8004170656204224, "learning_rate": 1.6511132826747212e-05, "loss": 1.0003, "step": 5163 }, { "epoch": 0.3, "grad_norm": 1.8540232181549072, "learning_rate": 1.650972282335697e-05, "loss": 1.0383, "step": 5164 }, { "epoch": 0.3, "grad_norm": 2.018101215362549, "learning_rate": 1.6508312595334378e-05, "loss": 1.0866, "step": 5165 }, { "epoch": 0.3, "grad_norm": 2.314603090286255, "learning_rate": 1.65069021427281e-05, "loss": 1.0029, "step": 5166 }, { "epoch": 0.3, "grad_norm": 1.185894250869751, "learning_rate": 1.65054914655868e-05, "loss": 0.5862, "step": 5167 }, { "epoch": 0.3, "grad_norm": 1.7470319271087646, "learning_rate": 1.6504080563959167e-05, "loss": 1.0095, "step": 5168 }, { "epoch": 0.3, "grad_norm": 1.7473456859588623, "learning_rate": 1.6502669437893878e-05, "loss": 1.1083, "step": 5169 }, { "epoch": 0.3, "grad_norm": 1.9529571533203125, "learning_rate": 1.6501258087439637e-05, "loss": 0.9961, "step": 5170 }, { "epoch": 0.3, "grad_norm": 1.7644704580307007, "learning_rate": 1.6499846512645136e-05, "loss": 1.0215, "step": 5171 }, { "epoch": 0.3, "grad_norm": 1.8650532960891724, "learning_rate": 1.649843471355909e-05, "loss": 1.0283, "step": 5172 }, { "epoch": 0.3, "grad_norm": 1.7721192836761475, "learning_rate": 1.649702269023021e-05, "loss": 1.0563, "step": 5173 }, { "epoch": 0.3, "grad_norm": 1.7326829433441162, "learning_rate": 1.649561044270723e-05, "loss": 1.0945, "step": 5174 }, { "epoch": 0.3, "grad_norm": 1.8108159303665161, "learning_rate": 1.6494197971038876e-05, "loss": 0.9759, "step": 5175 }, { "epoch": 0.3, "grad_norm": 1.7008742094039917, "learning_rate": 1.649278527527389e-05, "loss": 1.0745, "step": 5176 }, { "epoch": 0.3, "grad_norm": 1.5801929235458374, "learning_rate": 1.6491372355461028e-05, "loss": 0.9273, "step": 5177 }, { "epoch": 0.3, "grad_norm": 1.6170562505722046, "learning_rate": 1.6489959211649035e-05, "loss": 1.0075, "step": 5178 }, { "epoch": 0.3, "grad_norm": 2.0312623977661133, "learning_rate": 1.6488545843886677e-05, "loss": 0.9267, "step": 5179 }, { "epoch": 0.3, "grad_norm": 1.9025565385818481, "learning_rate": 1.648713225222273e-05, "loss": 0.9545, "step": 5180 }, { "epoch": 0.3, "grad_norm": 1.9409915208816528, "learning_rate": 1.6485718436705965e-05, "loss": 1.0998, "step": 5181 }, { "epoch": 0.3, "grad_norm": 1.8895087242126465, "learning_rate": 1.6484304397385177e-05, "loss": 1.0841, "step": 5182 }, { "epoch": 0.3, "grad_norm": 1.9464365243911743, "learning_rate": 1.6482890134309156e-05, "loss": 1.0579, "step": 5183 }, { "epoch": 0.3, "grad_norm": 1.8552864789962769, "learning_rate": 1.648147564752671e-05, "loss": 1.0387, "step": 5184 }, { "epoch": 0.3, "grad_norm": 1.9646834135055542, "learning_rate": 1.648006093708664e-05, "loss": 0.9989, "step": 5185 }, { "epoch": 0.3, "grad_norm": 1.746027946472168, "learning_rate": 1.647864600303777e-05, "loss": 1.0073, "step": 5186 }, { "epoch": 0.3, "grad_norm": 1.8757826089859009, "learning_rate": 1.6477230845428925e-05, "loss": 0.9697, "step": 5187 }, { "epoch": 0.3, "grad_norm": 1.1826835870742798, "learning_rate": 1.6475815464308933e-05, "loss": 0.6374, "step": 5188 }, { "epoch": 0.3, "grad_norm": 1.7431520223617554, "learning_rate": 1.6474399859726644e-05, "loss": 1.0647, "step": 5189 }, { "epoch": 0.3, "grad_norm": 1.8256975412368774, "learning_rate": 1.64729840317309e-05, "loss": 1.0547, "step": 5190 }, { "epoch": 0.3, "grad_norm": 2.0221009254455566, "learning_rate": 1.6471567980370556e-05, "loss": 1.1086, "step": 5191 }, { "epoch": 0.3, "grad_norm": 1.819825291633606, "learning_rate": 1.6470151705694478e-05, "loss": 0.9987, "step": 5192 }, { "epoch": 0.3, "grad_norm": 1.950721025466919, "learning_rate": 1.646873520775154e-05, "loss": 1.0497, "step": 5193 }, { "epoch": 0.3, "grad_norm": 1.791812777519226, "learning_rate": 1.6467318486590623e-05, "loss": 1.0024, "step": 5194 }, { "epoch": 0.3, "grad_norm": 1.7883446216583252, "learning_rate": 1.6465901542260607e-05, "loss": 0.9573, "step": 5195 }, { "epoch": 0.3, "grad_norm": 1.8049689531326294, "learning_rate": 1.646448437481039e-05, "loss": 1.0273, "step": 5196 }, { "epoch": 0.3, "grad_norm": 1.9341366291046143, "learning_rate": 1.646306698428888e-05, "loss": 1.0573, "step": 5197 }, { "epoch": 0.3, "grad_norm": 1.6553109884262085, "learning_rate": 1.6461649370744975e-05, "loss": 0.9983, "step": 5198 }, { "epoch": 0.3, "grad_norm": 2.001251459121704, "learning_rate": 1.6460231534227603e-05, "loss": 1.0859, "step": 5199 }, { "epoch": 0.3, "grad_norm": 1.8395774364471436, "learning_rate": 1.6458813474785685e-05, "loss": 0.9716, "step": 5200 }, { "epoch": 0.3, "grad_norm": 1.6906391382217407, "learning_rate": 1.6457395192468158e-05, "loss": 0.9144, "step": 5201 }, { "epoch": 0.3, "grad_norm": 2.063892364501953, "learning_rate": 1.645597668732396e-05, "loss": 1.0668, "step": 5202 }, { "epoch": 0.3, "grad_norm": 1.8227604627609253, "learning_rate": 1.6454557959402048e-05, "loss": 1.0833, "step": 5203 }, { "epoch": 0.3, "grad_norm": 1.9198225736618042, "learning_rate": 1.645313900875136e-05, "loss": 0.9916, "step": 5204 }, { "epoch": 0.3, "grad_norm": 1.8252094984054565, "learning_rate": 1.645171983542088e-05, "loss": 1.0474, "step": 5205 }, { "epoch": 0.3, "grad_norm": 1.7386993169784546, "learning_rate": 1.6450300439459562e-05, "loss": 1.0368, "step": 5206 }, { "epoch": 0.3, "grad_norm": 1.783159613609314, "learning_rate": 1.64488808209164e-05, "loss": 1.0037, "step": 5207 }, { "epoch": 0.3, "grad_norm": 1.6322764158248901, "learning_rate": 1.6447460979840373e-05, "loss": 0.9427, "step": 5208 }, { "epoch": 0.3, "grad_norm": 1.839501142501831, "learning_rate": 1.644604091628048e-05, "loss": 1.0476, "step": 5209 }, { "epoch": 0.3, "grad_norm": 1.9075543880462646, "learning_rate": 1.6444620630285717e-05, "loss": 1.084, "step": 5210 }, { "epoch": 0.3, "grad_norm": 1.8340729475021362, "learning_rate": 1.64432001219051e-05, "loss": 1.0892, "step": 5211 }, { "epoch": 0.3, "grad_norm": 1.9423549175262451, "learning_rate": 1.6441779391187647e-05, "loss": 1.0034, "step": 5212 }, { "epoch": 0.3, "grad_norm": 1.7347936630249023, "learning_rate": 1.6440358438182383e-05, "loss": 0.9915, "step": 5213 }, { "epoch": 0.3, "grad_norm": 1.8718029260635376, "learning_rate": 1.6438937262938336e-05, "loss": 1.0649, "step": 5214 }, { "epoch": 0.3, "grad_norm": 1.8925142288208008, "learning_rate": 1.643751586550455e-05, "loss": 0.9537, "step": 5215 }, { "epoch": 0.3, "grad_norm": 1.822401523590088, "learning_rate": 1.6436094245930077e-05, "loss": 1.0798, "step": 5216 }, { "epoch": 0.3, "grad_norm": 1.9212912321090698, "learning_rate": 1.643467240426397e-05, "loss": 0.9887, "step": 5217 }, { "epoch": 0.3, "grad_norm": 1.145285725593567, "learning_rate": 1.6433250340555292e-05, "loss": 0.6171, "step": 5218 }, { "epoch": 0.3, "grad_norm": 1.8513644933700562, "learning_rate": 1.6431828054853112e-05, "loss": 1.028, "step": 5219 }, { "epoch": 0.3, "grad_norm": 1.7444010972976685, "learning_rate": 1.6430405547206518e-05, "loss": 1.1106, "step": 5220 }, { "epoch": 0.3, "grad_norm": 1.7501215934753418, "learning_rate": 1.6428982817664586e-05, "loss": 0.9859, "step": 5221 }, { "epoch": 0.3, "grad_norm": 2.007359266281128, "learning_rate": 1.642755986627642e-05, "loss": 1.0562, "step": 5222 }, { "epoch": 0.3, "grad_norm": 2.095323324203491, "learning_rate": 1.6426136693091116e-05, "loss": 1.101, "step": 5223 }, { "epoch": 0.3, "grad_norm": 1.8750954866409302, "learning_rate": 1.6424713298157784e-05, "loss": 0.9934, "step": 5224 }, { "epoch": 0.3, "grad_norm": 1.8303214311599731, "learning_rate": 1.6423289681525544e-05, "loss": 1.0742, "step": 5225 }, { "epoch": 0.3, "grad_norm": 1.825567603111267, "learning_rate": 1.6421865843243522e-05, "loss": 0.9778, "step": 5226 }, { "epoch": 0.3, "grad_norm": 1.5514206886291504, "learning_rate": 1.6420441783360845e-05, "loss": 1.051, "step": 5227 }, { "epoch": 0.3, "grad_norm": 1.6774661540985107, "learning_rate": 1.641901750192666e-05, "loss": 0.9834, "step": 5228 }, { "epoch": 0.3, "grad_norm": 1.8114384412765503, "learning_rate": 1.6417592998990107e-05, "loss": 1.029, "step": 5229 }, { "epoch": 0.3, "grad_norm": 1.9738600254058838, "learning_rate": 1.641616827460035e-05, "loss": 1.0089, "step": 5230 }, { "epoch": 0.3, "grad_norm": 1.774478793144226, "learning_rate": 1.6414743328806547e-05, "loss": 1.2079, "step": 5231 }, { "epoch": 0.3, "grad_norm": 1.8085359334945679, "learning_rate": 1.641331816165787e-05, "loss": 1.0086, "step": 5232 }, { "epoch": 0.3, "grad_norm": 1.6217718124389648, "learning_rate": 1.64118927732035e-05, "loss": 1.0355, "step": 5233 }, { "epoch": 0.3, "grad_norm": 1.6912823915481567, "learning_rate": 1.6410467163492624e-05, "loss": 1.0614, "step": 5234 }, { "epoch": 0.3, "grad_norm": 1.951919436454773, "learning_rate": 1.640904133257443e-05, "loss": 1.1116, "step": 5235 }, { "epoch": 0.3, "grad_norm": 1.8196876049041748, "learning_rate": 1.6407615280498125e-05, "loss": 1.0041, "step": 5236 }, { "epoch": 0.3, "grad_norm": 1.6498008966445923, "learning_rate": 1.640618900731291e-05, "loss": 1.0425, "step": 5237 }, { "epoch": 0.3, "grad_norm": 1.7506357431411743, "learning_rate": 1.6404762513068014e-05, "loss": 0.9513, "step": 5238 }, { "epoch": 0.3, "grad_norm": 1.8732815980911255, "learning_rate": 1.640333579781265e-05, "loss": 0.9872, "step": 5239 }, { "epoch": 0.3, "grad_norm": 1.7445695400238037, "learning_rate": 1.6401908861596054e-05, "loss": 1.1108, "step": 5240 }, { "epoch": 0.3, "grad_norm": 1.7546354532241821, "learning_rate": 1.6400481704467468e-05, "loss": 1.0896, "step": 5241 }, { "epoch": 0.3, "grad_norm": 1.8240140676498413, "learning_rate": 1.6399054326476142e-05, "loss": 0.9225, "step": 5242 }, { "epoch": 0.3, "grad_norm": 1.7127807140350342, "learning_rate": 1.639762672767132e-05, "loss": 1.0578, "step": 5243 }, { "epoch": 0.3, "grad_norm": 1.932915449142456, "learning_rate": 1.6396198908102273e-05, "loss": 1.0383, "step": 5244 }, { "epoch": 0.3, "grad_norm": 1.8805278539657593, "learning_rate": 1.6394770867818267e-05, "loss": 1.0237, "step": 5245 }, { "epoch": 0.3, "grad_norm": 1.798864722251892, "learning_rate": 1.6393342606868582e-05, "loss": 0.9055, "step": 5246 }, { "epoch": 0.3, "grad_norm": 1.7802367210388184, "learning_rate": 1.6391914125302505e-05, "loss": 1.0358, "step": 5247 }, { "epoch": 0.3, "grad_norm": 1.6692166328430176, "learning_rate": 1.6390485423169323e-05, "loss": 1.002, "step": 5248 }, { "epoch": 0.3, "grad_norm": 1.8615047931671143, "learning_rate": 1.6389056500518343e-05, "loss": 1.0055, "step": 5249 }, { "epoch": 0.3, "grad_norm": 1.7731149196624756, "learning_rate": 1.638762735739887e-05, "loss": 1.0703, "step": 5250 }, { "epoch": 0.3, "grad_norm": 1.8083915710449219, "learning_rate": 1.6386197993860218e-05, "loss": 0.9796, "step": 5251 }, { "epoch": 0.3, "grad_norm": 2.074035167694092, "learning_rate": 1.6384768409951714e-05, "loss": 1.0288, "step": 5252 }, { "epoch": 0.3, "grad_norm": 1.7210679054260254, "learning_rate": 1.6383338605722686e-05, "loss": 0.9803, "step": 5253 }, { "epoch": 0.3, "grad_norm": 1.8645868301391602, "learning_rate": 1.6381908581222477e-05, "loss": 1.0703, "step": 5254 }, { "epoch": 0.3, "grad_norm": 2.0244362354278564, "learning_rate": 1.6380478336500427e-05, "loss": 1.0403, "step": 5255 }, { "epoch": 0.3, "grad_norm": 1.9616894721984863, "learning_rate": 1.6379047871605897e-05, "loss": 0.9535, "step": 5256 }, { "epoch": 0.3, "grad_norm": 1.73922598361969, "learning_rate": 1.6377617186588236e-05, "loss": 0.9678, "step": 5257 }, { "epoch": 0.3, "grad_norm": 1.7541407346725464, "learning_rate": 1.637618628149683e-05, "loss": 1.0348, "step": 5258 }, { "epoch": 0.3, "grad_norm": 1.8831788301467896, "learning_rate": 1.637475515638104e-05, "loss": 1.073, "step": 5259 }, { "epoch": 0.3, "grad_norm": 1.8657195568084717, "learning_rate": 1.6373323811290262e-05, "loss": 0.9982, "step": 5260 }, { "epoch": 0.3, "grad_norm": 1.7818225622177124, "learning_rate": 1.6371892246273877e-05, "loss": 1.0534, "step": 5261 }, { "epoch": 0.3, "grad_norm": 1.843577265739441, "learning_rate": 1.6370460461381292e-05, "loss": 1.0181, "step": 5262 }, { "epoch": 0.3, "grad_norm": 1.842872142791748, "learning_rate": 1.636902845666191e-05, "loss": 0.9868, "step": 5263 }, { "epoch": 0.3, "grad_norm": 1.831213355064392, "learning_rate": 1.636759623216515e-05, "loss": 1.05, "step": 5264 }, { "epoch": 0.3, "grad_norm": 1.6557680368423462, "learning_rate": 1.636616378794043e-05, "loss": 0.9932, "step": 5265 }, { "epoch": 0.3, "grad_norm": 1.8199869394302368, "learning_rate": 1.636473112403718e-05, "loss": 0.9797, "step": 5266 }, { "epoch": 0.3, "grad_norm": 1.40049409866333, "learning_rate": 1.6363298240504842e-05, "loss": 0.665, "step": 5267 }, { "epoch": 0.3, "grad_norm": 1.7990132570266724, "learning_rate": 1.6361865137392855e-05, "loss": 1.0224, "step": 5268 }, { "epoch": 0.3, "grad_norm": 1.637906551361084, "learning_rate": 1.636043181475067e-05, "loss": 0.9486, "step": 5269 }, { "epoch": 0.3, "grad_norm": 1.7352979183197021, "learning_rate": 1.6358998272627754e-05, "loss": 0.9786, "step": 5270 }, { "epoch": 0.3, "grad_norm": 1.7840981483459473, "learning_rate": 1.6357564511073567e-05, "loss": 1.0571, "step": 5271 }, { "epoch": 0.3, "grad_norm": 1.6980482339859009, "learning_rate": 1.635613053013759e-05, "loss": 1.0016, "step": 5272 }, { "epoch": 0.3, "grad_norm": 1.8260037899017334, "learning_rate": 1.6354696329869307e-05, "loss": 0.959, "step": 5273 }, { "epoch": 0.3, "grad_norm": 1.8289821147918701, "learning_rate": 1.63532619103182e-05, "loss": 1.0565, "step": 5274 }, { "epoch": 0.3, "grad_norm": 2.04422926902771, "learning_rate": 1.6351827271533774e-05, "loss": 1.0866, "step": 5275 }, { "epoch": 0.3, "grad_norm": 1.857997179031372, "learning_rate": 1.635039241356553e-05, "loss": 1.0391, "step": 5276 }, { "epoch": 0.3, "grad_norm": 1.9671493768692017, "learning_rate": 1.6348957336462982e-05, "loss": 1.0893, "step": 5277 }, { "epoch": 0.3, "grad_norm": 1.1050386428833008, "learning_rate": 1.6347522040275653e-05, "loss": 0.6131, "step": 5278 }, { "epoch": 0.3, "grad_norm": 1.7517123222351074, "learning_rate": 1.6346086525053072e-05, "loss": 1.1215, "step": 5279 }, { "epoch": 0.3, "grad_norm": 2.0481855869293213, "learning_rate": 1.634465079084477e-05, "loss": 0.9805, "step": 5280 }, { "epoch": 0.3, "grad_norm": 1.6988558769226074, "learning_rate": 1.6343214837700296e-05, "loss": 1.0837, "step": 5281 }, { "epoch": 0.3, "grad_norm": 1.8934874534606934, "learning_rate": 1.634177866566919e-05, "loss": 1.0736, "step": 5282 }, { "epoch": 0.3, "grad_norm": 1.8343819379806519, "learning_rate": 1.6340342274801024e-05, "loss": 1.0241, "step": 5283 }, { "epoch": 0.3, "grad_norm": 1.8228952884674072, "learning_rate": 1.6338905665145352e-05, "loss": 0.9884, "step": 5284 }, { "epoch": 0.3, "grad_norm": 1.8130691051483154, "learning_rate": 1.6337468836751753e-05, "loss": 0.9255, "step": 5285 }, { "epoch": 0.3, "grad_norm": 1.6233998537063599, "learning_rate": 1.6336031789669808e-05, "loss": 0.9477, "step": 5286 }, { "epoch": 0.3, "grad_norm": 1.87525475025177, "learning_rate": 1.6334594523949107e-05, "loss": 1.0055, "step": 5287 }, { "epoch": 0.3, "grad_norm": 1.672725796699524, "learning_rate": 1.633315703963924e-05, "loss": 1.0006, "step": 5288 }, { "epoch": 0.3, "grad_norm": 2.157294511795044, "learning_rate": 1.6331719336789817e-05, "loss": 0.9921, "step": 5289 }, { "epoch": 0.3, "grad_norm": 1.8449910879135132, "learning_rate": 1.6330281415450446e-05, "loss": 1.077, "step": 5290 }, { "epoch": 0.3, "grad_norm": 1.9979575872421265, "learning_rate": 1.6328843275670748e-05, "loss": 0.9675, "step": 5291 }, { "epoch": 0.3, "grad_norm": 1.9981364011764526, "learning_rate": 1.6327404917500345e-05, "loss": 1.0143, "step": 5292 }, { "epoch": 0.3, "grad_norm": 1.9902487993240356, "learning_rate": 1.6325966340988877e-05, "loss": 1.0263, "step": 5293 }, { "epoch": 0.3, "grad_norm": 1.7841180562973022, "learning_rate": 1.6324527546185977e-05, "loss": 0.9924, "step": 5294 }, { "epoch": 0.3, "grad_norm": 2.185438871383667, "learning_rate": 1.6323088533141304e-05, "loss": 0.9932, "step": 5295 }, { "epoch": 0.3, "grad_norm": 1.8306366205215454, "learning_rate": 1.6321649301904505e-05, "loss": 0.9876, "step": 5296 }, { "epoch": 0.3, "grad_norm": 1.998640775680542, "learning_rate": 1.6320209852525242e-05, "loss": 1.0704, "step": 5297 }, { "epoch": 0.3, "grad_norm": 2.028083086013794, "learning_rate": 1.6318770185053197e-05, "loss": 0.9842, "step": 5298 }, { "epoch": 0.3, "grad_norm": 1.9142224788665771, "learning_rate": 1.6317330299538046e-05, "loss": 0.9059, "step": 5299 }, { "epoch": 0.3, "grad_norm": 1.9877183437347412, "learning_rate": 1.631589019602947e-05, "loss": 1.0, "step": 5300 }, { "epoch": 0.3, "grad_norm": 1.8000242710113525, "learning_rate": 1.6314449874577166e-05, "loss": 1.0743, "step": 5301 }, { "epoch": 0.3, "grad_norm": 1.8279976844787598, "learning_rate": 1.631300933523084e-05, "loss": 0.9637, "step": 5302 }, { "epoch": 0.3, "grad_norm": 1.7045879364013672, "learning_rate": 1.631156857804019e-05, "loss": 0.9535, "step": 5303 }, { "epoch": 0.3, "grad_norm": 1.7607024908065796, "learning_rate": 1.6310127603054945e-05, "loss": 1.0108, "step": 5304 }, { "epoch": 0.3, "grad_norm": 1.92744779586792, "learning_rate": 1.630868641032482e-05, "loss": 0.991, "step": 5305 }, { "epoch": 0.3, "grad_norm": 2.0643630027770996, "learning_rate": 1.6307244999899547e-05, "loss": 1.0553, "step": 5306 }, { "epoch": 0.3, "grad_norm": 1.817264437675476, "learning_rate": 1.6305803371828874e-05, "loss": 1.0755, "step": 5307 }, { "epoch": 0.3, "grad_norm": 1.9802658557891846, "learning_rate": 1.6304361526162534e-05, "loss": 0.9962, "step": 5308 }, { "epoch": 0.3, "grad_norm": 1.743973731994629, "learning_rate": 1.6302919462950294e-05, "loss": 1.056, "step": 5309 }, { "epoch": 0.3, "grad_norm": 1.9193590879440308, "learning_rate": 1.6301477182241903e-05, "loss": 1.0127, "step": 5310 }, { "epoch": 0.3, "grad_norm": 1.1546295881271362, "learning_rate": 1.6300034684087145e-05, "loss": 0.568, "step": 5311 }, { "epoch": 0.3, "grad_norm": 1.8126161098480225, "learning_rate": 1.6298591968535784e-05, "loss": 1.0318, "step": 5312 }, { "epoch": 0.3, "grad_norm": 1.7339115142822266, "learning_rate": 1.6297149035637608e-05, "loss": 0.9933, "step": 5313 }, { "epoch": 0.3, "grad_norm": 1.9690638780593872, "learning_rate": 1.6295705885442413e-05, "loss": 1.0665, "step": 5314 }, { "epoch": 0.3, "grad_norm": 1.7355304956436157, "learning_rate": 1.6294262517999994e-05, "loss": 0.9647, "step": 5315 }, { "epoch": 0.3, "grad_norm": 1.6641390323638916, "learning_rate": 1.6292818933360153e-05, "loss": 0.9512, "step": 5316 }, { "epoch": 0.3, "grad_norm": 1.1275404691696167, "learning_rate": 1.629137513157271e-05, "loss": 0.6151, "step": 5317 }, { "epoch": 0.3, "grad_norm": 1.9910144805908203, "learning_rate": 1.628993111268749e-05, "loss": 0.9756, "step": 5318 }, { "epoch": 0.31, "grad_norm": 1.8117080926895142, "learning_rate": 1.6288486876754314e-05, "loss": 1.0279, "step": 5319 }, { "epoch": 0.31, "grad_norm": 1.9032024145126343, "learning_rate": 1.628704242382302e-05, "loss": 1.0483, "step": 5320 }, { "epoch": 0.31, "grad_norm": 1.7163499593734741, "learning_rate": 1.6285597753943458e-05, "loss": 0.9742, "step": 5321 }, { "epoch": 0.31, "grad_norm": 1.8539586067199707, "learning_rate": 1.6284152867165475e-05, "loss": 1.0372, "step": 5322 }, { "epoch": 0.31, "grad_norm": 1.7598193883895874, "learning_rate": 1.628270776353893e-05, "loss": 0.9659, "step": 5323 }, { "epoch": 0.31, "grad_norm": 1.8572660684585571, "learning_rate": 1.628126244311369e-05, "loss": 1.0197, "step": 5324 }, { "epoch": 0.31, "grad_norm": 1.7211501598358154, "learning_rate": 1.6279816905939627e-05, "loss": 1.1184, "step": 5325 }, { "epoch": 0.31, "grad_norm": 1.6807236671447754, "learning_rate": 1.627837115206663e-05, "loss": 0.9651, "step": 5326 }, { "epoch": 0.31, "grad_norm": 1.1544266939163208, "learning_rate": 1.6276925181544577e-05, "loss": 0.5849, "step": 5327 }, { "epoch": 0.31, "grad_norm": 1.0740216970443726, "learning_rate": 1.6275478994423372e-05, "loss": 0.7002, "step": 5328 }, { "epoch": 0.31, "grad_norm": 2.021015167236328, "learning_rate": 1.627403259075292e-05, "loss": 1.0665, "step": 5329 }, { "epoch": 0.31, "grad_norm": 1.7597944736480713, "learning_rate": 1.6272585970583124e-05, "loss": 1.0188, "step": 5330 }, { "epoch": 0.31, "grad_norm": 1.6504372358322144, "learning_rate": 1.6271139133963906e-05, "loss": 0.9756, "step": 5331 }, { "epoch": 0.31, "grad_norm": 1.8334649801254272, "learning_rate": 1.62696920809452e-05, "loss": 1.0298, "step": 5332 }, { "epoch": 0.31, "grad_norm": 1.993582010269165, "learning_rate": 1.6268244811576932e-05, "loss": 1.0806, "step": 5333 }, { "epoch": 0.31, "grad_norm": 1.0547441244125366, "learning_rate": 1.6266797325909045e-05, "loss": 0.6393, "step": 5334 }, { "epoch": 0.31, "grad_norm": 1.827067494392395, "learning_rate": 1.626534962399149e-05, "loss": 1.0917, "step": 5335 }, { "epoch": 0.31, "grad_norm": 1.8336405754089355, "learning_rate": 1.626390170587422e-05, "loss": 0.9961, "step": 5336 }, { "epoch": 0.31, "grad_norm": 1.6455762386322021, "learning_rate": 1.6262453571607198e-05, "loss": 0.9522, "step": 5337 }, { "epoch": 0.31, "grad_norm": 1.6795203685760498, "learning_rate": 1.6261005221240394e-05, "loss": 1.0395, "step": 5338 }, { "epoch": 0.31, "grad_norm": 1.8535797595977783, "learning_rate": 1.6259556654823793e-05, "loss": 0.9948, "step": 5339 }, { "epoch": 0.31, "grad_norm": 1.8329442739486694, "learning_rate": 1.6258107872407376e-05, "loss": 1.013, "step": 5340 }, { "epoch": 0.31, "grad_norm": 1.0877106189727783, "learning_rate": 1.625665887404114e-05, "loss": 0.6125, "step": 5341 }, { "epoch": 0.31, "grad_norm": 1.8665584325790405, "learning_rate": 1.6255209659775082e-05, "loss": 1.0, "step": 5342 }, { "epoch": 0.31, "grad_norm": 1.8733494281768799, "learning_rate": 1.625376022965921e-05, "loss": 0.9807, "step": 5343 }, { "epoch": 0.31, "grad_norm": 1.891006588935852, "learning_rate": 1.6252310583743544e-05, "loss": 0.9436, "step": 5344 }, { "epoch": 0.31, "grad_norm": 1.889841079711914, "learning_rate": 1.6250860722078106e-05, "loss": 1.0553, "step": 5345 }, { "epoch": 0.31, "grad_norm": 1.9409970045089722, "learning_rate": 1.6249410644712925e-05, "loss": 1.1267, "step": 5346 }, { "epoch": 0.31, "grad_norm": 1.9099448919296265, "learning_rate": 1.624796035169804e-05, "loss": 1.0782, "step": 5347 }, { "epoch": 0.31, "grad_norm": 1.9502898454666138, "learning_rate": 1.6246509843083492e-05, "loss": 1.0599, "step": 5348 }, { "epoch": 0.31, "grad_norm": 1.7492371797561646, "learning_rate": 1.6245059118919342e-05, "loss": 1.041, "step": 5349 }, { "epoch": 0.31, "grad_norm": 1.9055966138839722, "learning_rate": 1.6243608179255645e-05, "loss": 0.9941, "step": 5350 }, { "epoch": 0.31, "grad_norm": 1.9000831842422485, "learning_rate": 1.624215702414247e-05, "loss": 0.9467, "step": 5351 }, { "epoch": 0.31, "grad_norm": 1.8919672966003418, "learning_rate": 1.6240705653629896e-05, "loss": 1.0168, "step": 5352 }, { "epoch": 0.31, "grad_norm": 1.907387614250183, "learning_rate": 1.6239254067768002e-05, "loss": 1.0353, "step": 5353 }, { "epoch": 0.31, "grad_norm": 1.7466566562652588, "learning_rate": 1.6237802266606877e-05, "loss": 0.9752, "step": 5354 }, { "epoch": 0.31, "grad_norm": 1.6579678058624268, "learning_rate": 1.623635025019662e-05, "loss": 0.951, "step": 5355 }, { "epoch": 0.31, "grad_norm": 1.9517204761505127, "learning_rate": 1.6234898018587336e-05, "loss": 0.9835, "step": 5356 }, { "epoch": 0.31, "grad_norm": 1.91655695438385, "learning_rate": 1.623344557182914e-05, "loss": 0.9505, "step": 5357 }, { "epoch": 0.31, "grad_norm": 2.124234437942505, "learning_rate": 1.623199290997215e-05, "loss": 1.0431, "step": 5358 }, { "epoch": 0.31, "grad_norm": 2.070302724838257, "learning_rate": 1.6230540033066492e-05, "loss": 0.9741, "step": 5359 }, { "epoch": 0.31, "grad_norm": 1.90458083152771, "learning_rate": 1.62290869411623e-05, "loss": 1.0118, "step": 5360 }, { "epoch": 0.31, "grad_norm": 1.8572077751159668, "learning_rate": 1.622763363430972e-05, "loss": 0.9646, "step": 5361 }, { "epoch": 0.31, "grad_norm": 1.7768325805664062, "learning_rate": 1.6226180112558897e-05, "loss": 1.0824, "step": 5362 }, { "epoch": 0.31, "grad_norm": 1.8336414098739624, "learning_rate": 1.6224726375959994e-05, "loss": 1.0104, "step": 5363 }, { "epoch": 0.31, "grad_norm": 1.1761006116867065, "learning_rate": 1.6223272424563174e-05, "loss": 0.6472, "step": 5364 }, { "epoch": 0.31, "grad_norm": 2.2218785285949707, "learning_rate": 1.62218182584186e-05, "loss": 0.9537, "step": 5365 }, { "epoch": 0.31, "grad_norm": 2.465400218963623, "learning_rate": 1.622036387757646e-05, "loss": 1.0643, "step": 5366 }, { "epoch": 0.31, "grad_norm": 1.8587623834609985, "learning_rate": 1.621890928208694e-05, "loss": 1.1061, "step": 5367 }, { "epoch": 0.31, "grad_norm": 1.9155011177062988, "learning_rate": 1.6217454472000232e-05, "loss": 1.0137, "step": 5368 }, { "epoch": 0.31, "grad_norm": 1.8350976705551147, "learning_rate": 1.621599944736654e-05, "loss": 1.1068, "step": 5369 }, { "epoch": 0.31, "grad_norm": 0.9839016795158386, "learning_rate": 1.6214544208236066e-05, "loss": 0.5773, "step": 5370 }, { "epoch": 0.31, "grad_norm": 1.5843640565872192, "learning_rate": 1.6213088754659033e-05, "loss": 1.0513, "step": 5371 }, { "epoch": 0.31, "grad_norm": 1.8846186399459839, "learning_rate": 1.6211633086685666e-05, "loss": 1.0879, "step": 5372 }, { "epoch": 0.31, "grad_norm": 1.819551944732666, "learning_rate": 1.6210177204366187e-05, "loss": 1.0244, "step": 5373 }, { "epoch": 0.31, "grad_norm": 1.8161970376968384, "learning_rate": 1.6208721107750845e-05, "loss": 0.9999, "step": 5374 }, { "epoch": 0.31, "grad_norm": 1.9262241125106812, "learning_rate": 1.6207264796889875e-05, "loss": 1.0385, "step": 5375 }, { "epoch": 0.31, "grad_norm": 2.0153942108154297, "learning_rate": 1.6205808271833542e-05, "loss": 1.0683, "step": 5376 }, { "epoch": 0.31, "grad_norm": 1.7681382894515991, "learning_rate": 1.6204351532632098e-05, "loss": 1.1153, "step": 5377 }, { "epoch": 0.31, "grad_norm": 1.666382908821106, "learning_rate": 1.6202894579335815e-05, "loss": 1.0607, "step": 5378 }, { "epoch": 0.31, "grad_norm": 1.8089468479156494, "learning_rate": 1.6201437411994967e-05, "loss": 0.9768, "step": 5379 }, { "epoch": 0.31, "grad_norm": 1.7946926355361938, "learning_rate": 1.6199980030659837e-05, "loss": 0.9943, "step": 5380 }, { "epoch": 0.31, "grad_norm": 1.8243076801300049, "learning_rate": 1.6198522435380716e-05, "loss": 1.049, "step": 5381 }, { "epoch": 0.31, "grad_norm": 1.8830714225769043, "learning_rate": 1.61970646262079e-05, "loss": 1.0789, "step": 5382 }, { "epoch": 0.31, "grad_norm": 1.8030109405517578, "learning_rate": 1.6195606603191692e-05, "loss": 1.0082, "step": 5383 }, { "epoch": 0.31, "grad_norm": 1.8486520051956177, "learning_rate": 1.619414836638241e-05, "loss": 1.0603, "step": 5384 }, { "epoch": 0.31, "grad_norm": 2.0993711948394775, "learning_rate": 1.619268991583037e-05, "loss": 1.0644, "step": 5385 }, { "epoch": 0.31, "grad_norm": 1.1216024160385132, "learning_rate": 1.61912312515859e-05, "loss": 0.6071, "step": 5386 }, { "epoch": 0.31, "grad_norm": 1.8520299196243286, "learning_rate": 1.6189772373699334e-05, "loss": 0.9751, "step": 5387 }, { "epoch": 0.31, "grad_norm": 2.023817300796509, "learning_rate": 1.6188313282221008e-05, "loss": 1.0642, "step": 5388 }, { "epoch": 0.31, "grad_norm": 2.0483896732330322, "learning_rate": 1.618685397720128e-05, "loss": 1.0392, "step": 5389 }, { "epoch": 0.31, "grad_norm": 2.0124213695526123, "learning_rate": 1.618539445869051e-05, "loss": 1.0367, "step": 5390 }, { "epoch": 0.31, "grad_norm": 1.9030776023864746, "learning_rate": 1.618393472673905e-05, "loss": 1.0302, "step": 5391 }, { "epoch": 0.31, "grad_norm": 1.8303340673446655, "learning_rate": 1.6182474781397277e-05, "loss": 0.9987, "step": 5392 }, { "epoch": 0.31, "grad_norm": 1.844299077987671, "learning_rate": 1.6181014622715568e-05, "loss": 1.0248, "step": 5393 }, { "epoch": 0.31, "grad_norm": 1.733989953994751, "learning_rate": 1.6179554250744315e-05, "loss": 1.0522, "step": 5394 }, { "epoch": 0.31, "grad_norm": 1.9461452960968018, "learning_rate": 1.6178093665533903e-05, "loss": 0.9966, "step": 5395 }, { "epoch": 0.31, "grad_norm": 1.7193036079406738, "learning_rate": 1.6176632867134738e-05, "loss": 1.0738, "step": 5396 }, { "epoch": 0.31, "grad_norm": 2.0400829315185547, "learning_rate": 1.6175171855597224e-05, "loss": 1.0613, "step": 5397 }, { "epoch": 0.31, "grad_norm": 1.936630129814148, "learning_rate": 1.617371063097178e-05, "loss": 1.0325, "step": 5398 }, { "epoch": 0.31, "grad_norm": 1.8583730459213257, "learning_rate": 1.6172249193308827e-05, "loss": 0.976, "step": 5399 }, { "epoch": 0.31, "grad_norm": 2.042346239089966, "learning_rate": 1.61707875426588e-05, "loss": 1.0281, "step": 5400 }, { "epoch": 0.31, "grad_norm": 2.2337827682495117, "learning_rate": 1.6169325679072127e-05, "loss": 1.1197, "step": 5401 }, { "epoch": 0.31, "grad_norm": 1.8800380229949951, "learning_rate": 1.6167863602599263e-05, "loss": 1.0653, "step": 5402 }, { "epoch": 0.31, "grad_norm": 1.8144276142120361, "learning_rate": 1.616640131329065e-05, "loss": 1.0001, "step": 5403 }, { "epoch": 0.31, "grad_norm": 1.9197808504104614, "learning_rate": 1.6164938811196758e-05, "loss": 0.9966, "step": 5404 }, { "epoch": 0.31, "grad_norm": 1.933760166168213, "learning_rate": 1.6163476096368046e-05, "loss": 0.9963, "step": 5405 }, { "epoch": 0.31, "grad_norm": 1.8016561269760132, "learning_rate": 1.6162013168854992e-05, "loss": 1.0201, "step": 5406 }, { "epoch": 0.31, "grad_norm": 1.9271571636199951, "learning_rate": 1.6160550028708077e-05, "loss": 1.0579, "step": 5407 }, { "epoch": 0.31, "grad_norm": 2.022021532058716, "learning_rate": 1.6159086675977785e-05, "loss": 1.0895, "step": 5408 }, { "epoch": 0.31, "grad_norm": 1.9065383672714233, "learning_rate": 1.6157623110714618e-05, "loss": 0.9586, "step": 5409 }, { "epoch": 0.31, "grad_norm": 1.9301396608352661, "learning_rate": 1.615615933296908e-05, "loss": 1.0325, "step": 5410 }, { "epoch": 0.31, "grad_norm": 1.8570078611373901, "learning_rate": 1.6154695342791682e-05, "loss": 0.9274, "step": 5411 }, { "epoch": 0.31, "grad_norm": 1.914941668510437, "learning_rate": 1.6153231140232936e-05, "loss": 1.042, "step": 5412 }, { "epoch": 0.31, "grad_norm": 2.126347303390503, "learning_rate": 1.6151766725343373e-05, "loss": 1.0154, "step": 5413 }, { "epoch": 0.31, "grad_norm": 2.1510863304138184, "learning_rate": 1.6150302098173523e-05, "loss": 1.0231, "step": 5414 }, { "epoch": 0.31, "grad_norm": 1.855362892150879, "learning_rate": 1.6148837258773934e-05, "loss": 1.0199, "step": 5415 }, { "epoch": 0.31, "grad_norm": 1.887709617614746, "learning_rate": 1.6147372207195142e-05, "loss": 1.0571, "step": 5416 }, { "epoch": 0.31, "grad_norm": 1.8217957019805908, "learning_rate": 1.6145906943487706e-05, "loss": 1.0676, "step": 5417 }, { "epoch": 0.31, "grad_norm": 1.1708956956863403, "learning_rate": 1.6144441467702194e-05, "loss": 0.6114, "step": 5418 }, { "epoch": 0.31, "grad_norm": 1.7735998630523682, "learning_rate": 1.6142975779889167e-05, "loss": 1.0294, "step": 5419 }, { "epoch": 0.31, "grad_norm": 1.9344372749328613, "learning_rate": 1.6141509880099205e-05, "loss": 0.9958, "step": 5420 }, { "epoch": 0.31, "grad_norm": 1.8749792575836182, "learning_rate": 1.6140043768382894e-05, "loss": 1.0335, "step": 5421 }, { "epoch": 0.31, "grad_norm": 1.9425380229949951, "learning_rate": 1.6138577444790826e-05, "loss": 0.9642, "step": 5422 }, { "epoch": 0.31, "grad_norm": 1.8311879634857178, "learning_rate": 1.6137110909373595e-05, "loss": 0.9392, "step": 5423 }, { "epoch": 0.31, "grad_norm": 2.1880133152008057, "learning_rate": 1.6135644162181814e-05, "loss": 1.025, "step": 5424 }, { "epoch": 0.31, "grad_norm": 1.9264405965805054, "learning_rate": 1.6134177203266087e-05, "loss": 1.0532, "step": 5425 }, { "epoch": 0.31, "grad_norm": 1.8340034484863281, "learning_rate": 1.6132710032677043e-05, "loss": 1.0363, "step": 5426 }, { "epoch": 0.31, "grad_norm": 1.6725407838821411, "learning_rate": 1.6131242650465305e-05, "loss": 1.0238, "step": 5427 }, { "epoch": 0.31, "grad_norm": 1.705064058303833, "learning_rate": 1.6129775056681515e-05, "loss": 0.9729, "step": 5428 }, { "epoch": 0.31, "grad_norm": 1.9954407215118408, "learning_rate": 1.6128307251376304e-05, "loss": 0.966, "step": 5429 }, { "epoch": 0.31, "grad_norm": 1.7256083488464355, "learning_rate": 1.612683923460033e-05, "loss": 0.9855, "step": 5430 }, { "epoch": 0.31, "grad_norm": 1.9120780229568481, "learning_rate": 1.612537100640425e-05, "loss": 1.0178, "step": 5431 }, { "epoch": 0.31, "grad_norm": 2.009671688079834, "learning_rate": 1.6123902566838726e-05, "loss": 0.9795, "step": 5432 }, { "epoch": 0.31, "grad_norm": 1.6778771877288818, "learning_rate": 1.6122433915954433e-05, "loss": 0.9789, "step": 5433 }, { "epoch": 0.31, "grad_norm": 1.7159498929977417, "learning_rate": 1.6120965053802047e-05, "loss": 1.0189, "step": 5434 }, { "epoch": 0.31, "grad_norm": 1.1774489879608154, "learning_rate": 1.6119495980432254e-05, "loss": 0.6219, "step": 5435 }, { "epoch": 0.31, "grad_norm": 1.8734923601150513, "learning_rate": 1.611802669589575e-05, "loss": 1.0445, "step": 5436 }, { "epoch": 0.31, "grad_norm": 2.0146141052246094, "learning_rate": 1.6116557200243234e-05, "loss": 1.0338, "step": 5437 }, { "epoch": 0.31, "grad_norm": 1.8196088075637817, "learning_rate": 1.6115087493525416e-05, "loss": 0.9966, "step": 5438 }, { "epoch": 0.31, "grad_norm": 1.7152007818222046, "learning_rate": 1.611361757579301e-05, "loss": 1.0772, "step": 5439 }, { "epoch": 0.31, "grad_norm": 1.9442161321640015, "learning_rate": 1.611214744709674e-05, "loss": 1.0263, "step": 5440 }, { "epoch": 0.31, "grad_norm": 1.8195992708206177, "learning_rate": 1.611067710748733e-05, "loss": 1.0616, "step": 5441 }, { "epoch": 0.31, "grad_norm": 1.8805011510849, "learning_rate": 1.610920655701553e-05, "loss": 1.0128, "step": 5442 }, { "epoch": 0.31, "grad_norm": 1.9722251892089844, "learning_rate": 1.6107735795732072e-05, "loss": 1.03, "step": 5443 }, { "epoch": 0.31, "grad_norm": 1.9770876169204712, "learning_rate": 1.6106264823687716e-05, "loss": 1.017, "step": 5444 }, { "epoch": 0.31, "grad_norm": 1.781185507774353, "learning_rate": 1.6104793640933215e-05, "loss": 1.0172, "step": 5445 }, { "epoch": 0.31, "grad_norm": 1.7444640398025513, "learning_rate": 1.6103322247519343e-05, "loss": 1.0289, "step": 5446 }, { "epoch": 0.31, "grad_norm": 1.0717390775680542, "learning_rate": 1.6101850643496865e-05, "loss": 0.6202, "step": 5447 }, { "epoch": 0.31, "grad_norm": 1.1352232694625854, "learning_rate": 1.610037882891657e-05, "loss": 0.6291, "step": 5448 }, { "epoch": 0.31, "grad_norm": 1.6499055624008179, "learning_rate": 1.6098906803829238e-05, "loss": 0.9389, "step": 5449 }, { "epoch": 0.31, "grad_norm": 2.085078716278076, "learning_rate": 1.609743456828567e-05, "loss": 0.9547, "step": 5450 }, { "epoch": 0.31, "grad_norm": 1.8660414218902588, "learning_rate": 1.609596212233667e-05, "loss": 1.01, "step": 5451 }, { "epoch": 0.31, "grad_norm": 1.7397100925445557, "learning_rate": 1.609448946603304e-05, "loss": 1.0309, "step": 5452 }, { "epoch": 0.31, "grad_norm": 1.8257865905761719, "learning_rate": 1.609301659942561e-05, "loss": 0.9469, "step": 5453 }, { "epoch": 0.31, "grad_norm": 1.9190455675125122, "learning_rate": 1.6091543522565194e-05, "loss": 1.0128, "step": 5454 }, { "epoch": 0.31, "grad_norm": 2.0373733043670654, "learning_rate": 1.6090070235502625e-05, "loss": 1.0213, "step": 5455 }, { "epoch": 0.31, "grad_norm": 1.737097144126892, "learning_rate": 1.608859673828875e-05, "loss": 1.0284, "step": 5456 }, { "epoch": 0.31, "grad_norm": 1.8663862943649292, "learning_rate": 1.6087123030974403e-05, "loss": 1.029, "step": 5457 }, { "epoch": 0.31, "grad_norm": 1.8301271200180054, "learning_rate": 1.6085649113610447e-05, "loss": 0.9813, "step": 5458 }, { "epoch": 0.31, "grad_norm": 1.8740274906158447, "learning_rate": 1.6084174986247738e-05, "loss": 1.0954, "step": 5459 }, { "epoch": 0.31, "grad_norm": 1.8680083751678467, "learning_rate": 1.6082700648937146e-05, "loss": 0.9862, "step": 5460 }, { "epoch": 0.31, "grad_norm": 2.0268795490264893, "learning_rate": 1.6081226101729547e-05, "loss": 1.0101, "step": 5461 }, { "epoch": 0.31, "grad_norm": 1.7622219324111938, "learning_rate": 1.6079751344675823e-05, "loss": 1.0289, "step": 5462 }, { "epoch": 0.31, "grad_norm": 1.8151675462722778, "learning_rate": 1.6078276377826862e-05, "loss": 1.0342, "step": 5463 }, { "epoch": 0.31, "grad_norm": 1.8670762777328491, "learning_rate": 1.6076801201233562e-05, "loss": 0.9738, "step": 5464 }, { "epoch": 0.31, "grad_norm": 1.8911508321762085, "learning_rate": 1.6075325814946828e-05, "loss": 1.0476, "step": 5465 }, { "epoch": 0.31, "grad_norm": 1.7749561071395874, "learning_rate": 1.6073850219017572e-05, "loss": 1.0473, "step": 5466 }, { "epoch": 0.31, "grad_norm": 1.8277431726455688, "learning_rate": 1.607237441349671e-05, "loss": 1.0334, "step": 5467 }, { "epoch": 0.31, "grad_norm": 1.8503179550170898, "learning_rate": 1.6070898398435167e-05, "loss": 1.0163, "step": 5468 }, { "epoch": 0.31, "grad_norm": 1.9117094278335571, "learning_rate": 1.6069422173883883e-05, "loss": 1.0444, "step": 5469 }, { "epoch": 0.31, "grad_norm": 1.813779592514038, "learning_rate": 1.606794573989379e-05, "loss": 1.0513, "step": 5470 }, { "epoch": 0.31, "grad_norm": 1.8578535318374634, "learning_rate": 1.6066469096515845e-05, "loss": 1.0688, "step": 5471 }, { "epoch": 0.31, "grad_norm": 2.020658016204834, "learning_rate": 1.6064992243800993e-05, "loss": 1.0417, "step": 5472 }, { "epoch": 0.31, "grad_norm": 1.888777732849121, "learning_rate": 1.6063515181800203e-05, "loss": 1.0996, "step": 5473 }, { "epoch": 0.31, "grad_norm": 1.8295797109603882, "learning_rate": 1.606203791056444e-05, "loss": 1.022, "step": 5474 }, { "epoch": 0.31, "grad_norm": 1.8093209266662598, "learning_rate": 1.6060560430144683e-05, "loss": 1.0136, "step": 5475 }, { "epoch": 0.31, "grad_norm": 1.977956771850586, "learning_rate": 1.6059082740591915e-05, "loss": 1.0341, "step": 5476 }, { "epoch": 0.31, "grad_norm": 1.9586621522903442, "learning_rate": 1.6057604841957127e-05, "loss": 1.054, "step": 5477 }, { "epoch": 0.31, "grad_norm": 1.8799439668655396, "learning_rate": 1.605612673429132e-05, "loss": 1.0302, "step": 5478 }, { "epoch": 0.31, "grad_norm": 1.8172889947891235, "learning_rate": 1.6054648417645493e-05, "loss": 1.0173, "step": 5479 }, { "epoch": 0.31, "grad_norm": 1.8336008787155151, "learning_rate": 1.6053169892070664e-05, "loss": 1.0112, "step": 5480 }, { "epoch": 0.31, "grad_norm": 1.9452742338180542, "learning_rate": 1.605169115761785e-05, "loss": 0.9467, "step": 5481 }, { "epoch": 0.31, "grad_norm": 1.9023585319519043, "learning_rate": 1.6050212214338076e-05, "loss": 1.0445, "step": 5482 }, { "epoch": 0.31, "grad_norm": 1.932289481163025, "learning_rate": 1.6048733062282385e-05, "loss": 1.07, "step": 5483 }, { "epoch": 0.31, "grad_norm": 1.6552865505218506, "learning_rate": 1.6047253701501807e-05, "loss": 1.0699, "step": 5484 }, { "epoch": 0.31, "grad_norm": 1.9912241697311401, "learning_rate": 1.60457741320474e-05, "loss": 1.0184, "step": 5485 }, { "epoch": 0.31, "grad_norm": 1.9628201723098755, "learning_rate": 1.6044294353970212e-05, "loss": 0.9072, "step": 5486 }, { "epoch": 0.31, "grad_norm": 1.9564534425735474, "learning_rate": 1.6042814367321313e-05, "loss": 1.0166, "step": 5487 }, { "epoch": 0.31, "grad_norm": 1.8115447759628296, "learning_rate": 1.604133417215177e-05, "loss": 0.9885, "step": 5488 }, { "epoch": 0.31, "grad_norm": 1.9882843494415283, "learning_rate": 1.603985376851266e-05, "loss": 1.0018, "step": 5489 }, { "epoch": 0.31, "grad_norm": 1.8080862760543823, "learning_rate": 1.6038373156455068e-05, "loss": 0.9718, "step": 5490 }, { "epoch": 0.31, "grad_norm": 1.4758456945419312, "learning_rate": 1.6036892336030086e-05, "loss": 0.7086, "step": 5491 }, { "epoch": 0.31, "grad_norm": 1.8091721534729004, "learning_rate": 1.6035411307288814e-05, "loss": 1.008, "step": 5492 }, { "epoch": 0.32, "grad_norm": 2.1586380004882812, "learning_rate": 1.6033930070282357e-05, "loss": 0.9716, "step": 5493 }, { "epoch": 0.32, "grad_norm": 1.7824774980545044, "learning_rate": 1.603244862506182e-05, "loss": 0.9699, "step": 5494 }, { "epoch": 0.32, "grad_norm": 1.866288185119629, "learning_rate": 1.603096697167834e-05, "loss": 0.9373, "step": 5495 }, { "epoch": 0.32, "grad_norm": 1.987775444984436, "learning_rate": 1.6029485110183037e-05, "loss": 1.0997, "step": 5496 }, { "epoch": 0.32, "grad_norm": 1.8538475036621094, "learning_rate": 1.6028003040627042e-05, "loss": 1.0204, "step": 5497 }, { "epoch": 0.32, "grad_norm": 1.847609043121338, "learning_rate": 1.6026520763061504e-05, "loss": 1.0592, "step": 5498 }, { "epoch": 0.32, "grad_norm": 1.7513768672943115, "learning_rate": 1.602503827753757e-05, "loss": 0.9833, "step": 5499 }, { "epoch": 0.32, "grad_norm": 2.0010900497436523, "learning_rate": 1.6023555584106392e-05, "loss": 1.0425, "step": 5500 }, { "epoch": 0.32, "grad_norm": 1.662872076034546, "learning_rate": 1.6022072682819138e-05, "loss": 1.1165, "step": 5501 }, { "epoch": 0.32, "grad_norm": 1.8837188482284546, "learning_rate": 1.6020589573726976e-05, "loss": 0.9901, "step": 5502 }, { "epoch": 0.32, "grad_norm": 1.7075861692428589, "learning_rate": 1.6019106256881088e-05, "loss": 0.9575, "step": 5503 }, { "epoch": 0.32, "grad_norm": 1.8860602378845215, "learning_rate": 1.6017622732332656e-05, "loss": 1.0477, "step": 5504 }, { "epoch": 0.32, "grad_norm": 1.9120173454284668, "learning_rate": 1.6016139000132873e-05, "loss": 0.9778, "step": 5505 }, { "epoch": 0.32, "grad_norm": 1.9226152896881104, "learning_rate": 1.601465506033294e-05, "loss": 1.0247, "step": 5506 }, { "epoch": 0.32, "grad_norm": 1.8007512092590332, "learning_rate": 1.601317091298406e-05, "loss": 0.9058, "step": 5507 }, { "epoch": 0.32, "grad_norm": 1.2160261869430542, "learning_rate": 1.601168655813745e-05, "loss": 0.6049, "step": 5508 }, { "epoch": 0.32, "grad_norm": 1.8075487613677979, "learning_rate": 1.6010201995844328e-05, "loss": 1.0097, "step": 5509 }, { "epoch": 0.32, "grad_norm": 1.7775397300720215, "learning_rate": 1.6008717226155925e-05, "loss": 0.9358, "step": 5510 }, { "epoch": 0.32, "grad_norm": 1.8586055040359497, "learning_rate": 1.6007232249123478e-05, "loss": 1.0363, "step": 5511 }, { "epoch": 0.32, "grad_norm": 1.7582409381866455, "learning_rate": 1.6005747064798224e-05, "loss": 0.9495, "step": 5512 }, { "epoch": 0.32, "grad_norm": 1.885279655456543, "learning_rate": 1.6004261673231414e-05, "loss": 0.9902, "step": 5513 }, { "epoch": 0.32, "grad_norm": 1.9768409729003906, "learning_rate": 1.6002776074474308e-05, "loss": 1.0861, "step": 5514 }, { "epoch": 0.32, "grad_norm": 1.7169522047042847, "learning_rate": 1.6001290268578164e-05, "loss": 0.9648, "step": 5515 }, { "epoch": 0.32, "grad_norm": 1.8215198516845703, "learning_rate": 1.5999804255594262e-05, "loss": 1.019, "step": 5516 }, { "epoch": 0.32, "grad_norm": 1.8581769466400146, "learning_rate": 1.599831803557387e-05, "loss": 1.0159, "step": 5517 }, { "epoch": 0.32, "grad_norm": 2.4309744834899902, "learning_rate": 1.599683160856828e-05, "loss": 1.0321, "step": 5518 }, { "epoch": 0.32, "grad_norm": 1.9362409114837646, "learning_rate": 1.5995344974628787e-05, "loss": 1.0545, "step": 5519 }, { "epoch": 0.32, "grad_norm": 1.8471745252609253, "learning_rate": 1.5993858133806684e-05, "loss": 1.0043, "step": 5520 }, { "epoch": 0.32, "grad_norm": 1.6273977756500244, "learning_rate": 1.5992371086153276e-05, "loss": 0.9699, "step": 5521 }, { "epoch": 0.32, "grad_norm": 1.956954002380371, "learning_rate": 1.5990883831719886e-05, "loss": 1.0649, "step": 5522 }, { "epoch": 0.32, "grad_norm": 1.799927830696106, "learning_rate": 1.5989396370557824e-05, "loss": 0.9226, "step": 5523 }, { "epoch": 0.32, "grad_norm": 1.8341554403305054, "learning_rate": 1.598790870271843e-05, "loss": 1.0271, "step": 5524 }, { "epoch": 0.32, "grad_norm": 2.3146204948425293, "learning_rate": 1.5986420828253032e-05, "loss": 0.9936, "step": 5525 }, { "epoch": 0.32, "grad_norm": 1.9320725202560425, "learning_rate": 1.598493274721297e-05, "loss": 1.1027, "step": 5526 }, { "epoch": 0.32, "grad_norm": 1.7183300256729126, "learning_rate": 1.59834444596496e-05, "loss": 1.1023, "step": 5527 }, { "epoch": 0.32, "grad_norm": 1.8065663576126099, "learning_rate": 1.5981955965614274e-05, "loss": 1.0101, "step": 5528 }, { "epoch": 0.32, "grad_norm": 1.793352723121643, "learning_rate": 1.598046726515836e-05, "loss": 0.9984, "step": 5529 }, { "epoch": 0.32, "grad_norm": 1.7683485746383667, "learning_rate": 1.5978978358333223e-05, "loss": 0.9855, "step": 5530 }, { "epoch": 0.32, "grad_norm": 1.7994049787521362, "learning_rate": 1.597748924519025e-05, "loss": 0.912, "step": 5531 }, { "epoch": 0.32, "grad_norm": 1.7815009355545044, "learning_rate": 1.5975999925780812e-05, "loss": 0.9812, "step": 5532 }, { "epoch": 0.32, "grad_norm": 1.9630521535873413, "learning_rate": 1.5974510400156316e-05, "loss": 1.0515, "step": 5533 }, { "epoch": 0.32, "grad_norm": 1.8291627168655396, "learning_rate": 1.5973020668368155e-05, "loss": 1.0555, "step": 5534 }, { "epoch": 0.32, "grad_norm": 1.7765790224075317, "learning_rate": 1.5971530730467736e-05, "loss": 0.9725, "step": 5535 }, { "epoch": 0.32, "grad_norm": 2.124052047729492, "learning_rate": 1.597004058650647e-05, "loss": 1.0564, "step": 5536 }, { "epoch": 0.32, "grad_norm": 1.6187347173690796, "learning_rate": 1.596855023653578e-05, "loss": 1.0079, "step": 5537 }, { "epoch": 0.32, "grad_norm": 1.7103477716445923, "learning_rate": 1.5967059680607097e-05, "loss": 1.019, "step": 5538 }, { "epoch": 0.32, "grad_norm": 1.8535102605819702, "learning_rate": 1.596556891877185e-05, "loss": 0.9583, "step": 5539 }, { "epoch": 0.32, "grad_norm": 1.9701313972473145, "learning_rate": 1.5964077951081484e-05, "loss": 1.0428, "step": 5540 }, { "epoch": 0.32, "grad_norm": 1.68707275390625, "learning_rate": 1.596258677758745e-05, "loss": 0.9534, "step": 5541 }, { "epoch": 0.32, "grad_norm": 1.7589253187179565, "learning_rate": 1.59610953983412e-05, "loss": 1.0687, "step": 5542 }, { "epoch": 0.32, "grad_norm": 1.7668746709823608, "learning_rate": 1.59596038133942e-05, "loss": 1.0145, "step": 5543 }, { "epoch": 0.32, "grad_norm": 2.00268292427063, "learning_rate": 1.5958112022797917e-05, "loss": 0.9909, "step": 5544 }, { "epoch": 0.32, "grad_norm": 1.097862958908081, "learning_rate": 1.5956620026603835e-05, "loss": 0.5782, "step": 5545 }, { "epoch": 0.32, "grad_norm": 1.908186674118042, "learning_rate": 1.595512782486344e-05, "loss": 1.0802, "step": 5546 }, { "epoch": 0.32, "grad_norm": 1.9989211559295654, "learning_rate": 1.5953635417628207e-05, "loss": 1.0182, "step": 5547 }, { "epoch": 0.32, "grad_norm": 1.927767038345337, "learning_rate": 1.5952142804949654e-05, "loss": 1.0438, "step": 5548 }, { "epoch": 0.32, "grad_norm": 1.8537846803665161, "learning_rate": 1.595064998687928e-05, "loss": 1.0221, "step": 5549 }, { "epoch": 0.32, "grad_norm": 1.7231436967849731, "learning_rate": 1.5949156963468593e-05, "loss": 1.08, "step": 5550 }, { "epoch": 0.32, "grad_norm": 1.7040066719055176, "learning_rate": 1.594766373476912e-05, "loss": 1.037, "step": 5551 }, { "epoch": 0.32, "grad_norm": 1.6797269582748413, "learning_rate": 1.5946170300832385e-05, "loss": 0.9258, "step": 5552 }, { "epoch": 0.32, "grad_norm": 1.7724076509475708, "learning_rate": 1.5944676661709922e-05, "loss": 0.9654, "step": 5553 }, { "epoch": 0.32, "grad_norm": 1.7064090967178345, "learning_rate": 1.5943182817453277e-05, "loss": 1.0494, "step": 5554 }, { "epoch": 0.32, "grad_norm": 1.843024730682373, "learning_rate": 1.594168876811399e-05, "loss": 1.0331, "step": 5555 }, { "epoch": 0.32, "grad_norm": 1.7479760646820068, "learning_rate": 1.5940194513743623e-05, "loss": 0.9791, "step": 5556 }, { "epoch": 0.32, "grad_norm": 1.6669811010360718, "learning_rate": 1.593870005439374e-05, "loss": 1.0223, "step": 5557 }, { "epoch": 0.32, "grad_norm": 1.663368582725525, "learning_rate": 1.5937205390115902e-05, "loss": 0.9502, "step": 5558 }, { "epoch": 0.32, "grad_norm": 1.7865049839019775, "learning_rate": 1.5935710520961693e-05, "loss": 0.9874, "step": 5559 }, { "epoch": 0.32, "grad_norm": 1.81930673122406, "learning_rate": 1.5934215446982696e-05, "loss": 1.1008, "step": 5560 }, { "epoch": 0.32, "grad_norm": 2.0533955097198486, "learning_rate": 1.5932720168230497e-05, "loss": 0.9945, "step": 5561 }, { "epoch": 0.32, "grad_norm": 1.8189257383346558, "learning_rate": 1.5931224684756698e-05, "loss": 0.9611, "step": 5562 }, { "epoch": 0.32, "grad_norm": 1.8144233226776123, "learning_rate": 1.5929728996612905e-05, "loss": 1.0139, "step": 5563 }, { "epoch": 0.32, "grad_norm": 1.8488507270812988, "learning_rate": 1.592823310385073e-05, "loss": 0.9654, "step": 5564 }, { "epoch": 0.32, "grad_norm": 1.9606572389602661, "learning_rate": 1.5926737006521787e-05, "loss": 1.1029, "step": 5565 }, { "epoch": 0.32, "grad_norm": 1.7714370489120483, "learning_rate": 1.5925240704677708e-05, "loss": 1.0483, "step": 5566 }, { "epoch": 0.32, "grad_norm": 1.8103426694869995, "learning_rate": 1.5923744198370124e-05, "loss": 1.052, "step": 5567 }, { "epoch": 0.32, "grad_norm": 1.7000718116760254, "learning_rate": 1.5922247487650674e-05, "loss": 0.9843, "step": 5568 }, { "epoch": 0.32, "grad_norm": 1.1136692762374878, "learning_rate": 1.5920750572571004e-05, "loss": 0.619, "step": 5569 }, { "epoch": 0.32, "grad_norm": 2.1499528884887695, "learning_rate": 1.5919253453182776e-05, "loss": 0.9689, "step": 5570 }, { "epoch": 0.32, "grad_norm": 1.9264980554580688, "learning_rate": 1.591775612953764e-05, "loss": 1.0648, "step": 5571 }, { "epoch": 0.32, "grad_norm": 1.8386764526367188, "learning_rate": 1.5916258601687276e-05, "loss": 0.9729, "step": 5572 }, { "epoch": 0.32, "grad_norm": 1.8946834802627563, "learning_rate": 1.591476086968335e-05, "loss": 0.9932, "step": 5573 }, { "epoch": 0.32, "grad_norm": 1.7496845722198486, "learning_rate": 1.591326293357755e-05, "loss": 1.0286, "step": 5574 }, { "epoch": 0.32, "grad_norm": 1.745600700378418, "learning_rate": 1.5911764793421563e-05, "loss": 0.9319, "step": 5575 }, { "epoch": 0.32, "grad_norm": 1.769769310951233, "learning_rate": 1.5910266449267088e-05, "loss": 0.9636, "step": 5576 }, { "epoch": 0.32, "grad_norm": 1.09451425075531, "learning_rate": 1.590876790116583e-05, "loss": 0.6322, "step": 5577 }, { "epoch": 0.32, "grad_norm": 1.9613984823226929, "learning_rate": 1.5907269149169496e-05, "loss": 0.9264, "step": 5578 }, { "epoch": 0.32, "grad_norm": 1.8515127897262573, "learning_rate": 1.5905770193329802e-05, "loss": 0.9714, "step": 5579 }, { "epoch": 0.32, "grad_norm": 1.9208461046218872, "learning_rate": 1.590427103369848e-05, "loss": 1.1082, "step": 5580 }, { "epoch": 0.32, "grad_norm": 2.0351321697235107, "learning_rate": 1.590277167032725e-05, "loss": 0.9883, "step": 5581 }, { "epoch": 0.32, "grad_norm": 1.9064292907714844, "learning_rate": 1.5901272103267865e-05, "loss": 0.9593, "step": 5582 }, { "epoch": 0.32, "grad_norm": 2.047064781188965, "learning_rate": 1.5899772332572064e-05, "loss": 1.0164, "step": 5583 }, { "epoch": 0.32, "grad_norm": 1.8514686822891235, "learning_rate": 1.58982723582916e-05, "loss": 0.9972, "step": 5584 }, { "epoch": 0.32, "grad_norm": 1.8108243942260742, "learning_rate": 1.5896772180478232e-05, "loss": 0.9882, "step": 5585 }, { "epoch": 0.32, "grad_norm": 2.162264347076416, "learning_rate": 1.5895271799183728e-05, "loss": 0.9096, "step": 5586 }, { "epoch": 0.32, "grad_norm": 2.1043074131011963, "learning_rate": 1.589377121445986e-05, "loss": 0.9905, "step": 5587 }, { "epoch": 0.32, "grad_norm": 1.933682918548584, "learning_rate": 1.5892270426358413e-05, "loss": 1.0075, "step": 5588 }, { "epoch": 0.32, "grad_norm": 1.959614872932434, "learning_rate": 1.5890769434931173e-05, "loss": 1.0735, "step": 5589 }, { "epoch": 0.32, "grad_norm": 1.91002357006073, "learning_rate": 1.5889268240229938e-05, "loss": 1.0017, "step": 5590 }, { "epoch": 0.32, "grad_norm": 2.061537265777588, "learning_rate": 1.58877668423065e-05, "loss": 1.016, "step": 5591 }, { "epoch": 0.32, "grad_norm": 1.711720585823059, "learning_rate": 1.5886265241212684e-05, "loss": 0.9937, "step": 5592 }, { "epoch": 0.32, "grad_norm": 1.6773245334625244, "learning_rate": 1.588476343700029e-05, "loss": 1.0657, "step": 5593 }, { "epoch": 0.32, "grad_norm": 1.9883131980895996, "learning_rate": 1.588326142972115e-05, "loss": 1.0662, "step": 5594 }, { "epoch": 0.32, "grad_norm": 1.8492990732192993, "learning_rate": 1.5881759219427092e-05, "loss": 1.0012, "step": 5595 }, { "epoch": 0.32, "grad_norm": 1.649088978767395, "learning_rate": 1.5880256806169954e-05, "loss": 0.9264, "step": 5596 }, { "epoch": 0.32, "grad_norm": 1.0660146474838257, "learning_rate": 1.587875419000158e-05, "loss": 0.6128, "step": 5597 }, { "epoch": 0.32, "grad_norm": 1.83261239528656, "learning_rate": 1.587725137097382e-05, "loss": 1.0174, "step": 5598 }, { "epoch": 0.32, "grad_norm": 1.8204060792922974, "learning_rate": 1.5875748349138533e-05, "loss": 0.9952, "step": 5599 }, { "epoch": 0.32, "grad_norm": 1.7471671104431152, "learning_rate": 1.5874245124547583e-05, "loss": 0.967, "step": 5600 }, { "epoch": 0.32, "grad_norm": 1.7384865283966064, "learning_rate": 1.5872741697252843e-05, "loss": 1.0371, "step": 5601 }, { "epoch": 0.32, "grad_norm": 1.831261396408081, "learning_rate": 1.5871238067306196e-05, "loss": 1.0413, "step": 5602 }, { "epoch": 0.32, "grad_norm": 1.9228148460388184, "learning_rate": 1.5869734234759516e-05, "loss": 1.0485, "step": 5603 }, { "epoch": 0.32, "grad_norm": 1.6715623140335083, "learning_rate": 1.586823019966471e-05, "loss": 0.9621, "step": 5604 }, { "epoch": 0.32, "grad_norm": 1.996870517730713, "learning_rate": 1.586672596207367e-05, "loss": 1.0174, "step": 5605 }, { "epoch": 0.32, "grad_norm": 1.8512481451034546, "learning_rate": 1.5865221522038304e-05, "loss": 0.9463, "step": 5606 }, { "epoch": 0.32, "grad_norm": 1.84958016872406, "learning_rate": 1.5863716879610528e-05, "loss": 0.982, "step": 5607 }, { "epoch": 0.32, "grad_norm": 1.824703335762024, "learning_rate": 1.5862212034842265e-05, "loss": 0.9914, "step": 5608 }, { "epoch": 0.32, "grad_norm": 1.1317521333694458, "learning_rate": 1.5860706987785437e-05, "loss": 0.6052, "step": 5609 }, { "epoch": 0.32, "grad_norm": 2.2002053260803223, "learning_rate": 1.5859201738491982e-05, "loss": 1.0076, "step": 5610 }, { "epoch": 0.32, "grad_norm": 1.8917196989059448, "learning_rate": 1.5857696287013843e-05, "loss": 0.9436, "step": 5611 }, { "epoch": 0.32, "grad_norm": 1.8004605770111084, "learning_rate": 1.585619063340297e-05, "loss": 0.9872, "step": 5612 }, { "epoch": 0.32, "grad_norm": 1.8486406803131104, "learning_rate": 1.5854684777711312e-05, "loss": 1.0223, "step": 5613 }, { "epoch": 0.32, "grad_norm": 2.403108835220337, "learning_rate": 1.5853178719990842e-05, "loss": 0.9588, "step": 5614 }, { "epoch": 0.32, "grad_norm": 1.8655530214309692, "learning_rate": 1.585167246029352e-05, "loss": 1.0219, "step": 5615 }, { "epoch": 0.32, "grad_norm": 1.7775142192840576, "learning_rate": 1.585016599867133e-05, "loss": 0.9926, "step": 5616 }, { "epoch": 0.32, "grad_norm": 1.8702837228775024, "learning_rate": 1.584865933517625e-05, "loss": 1.0295, "step": 5617 }, { "epoch": 0.32, "grad_norm": 2.155032157897949, "learning_rate": 1.5847152469860277e-05, "loss": 1.0145, "step": 5618 }, { "epoch": 0.32, "grad_norm": 1.1903616189956665, "learning_rate": 1.5845645402775404e-05, "loss": 0.5977, "step": 5619 }, { "epoch": 0.32, "grad_norm": 2.033189535140991, "learning_rate": 1.584413813397364e-05, "loss": 1.0135, "step": 5620 }, { "epoch": 0.32, "grad_norm": 2.007765054702759, "learning_rate": 1.584263066350699e-05, "loss": 1.105, "step": 5621 }, { "epoch": 0.32, "grad_norm": 1.7189234495162964, "learning_rate": 1.584112299142748e-05, "loss": 0.9226, "step": 5622 }, { "epoch": 0.32, "grad_norm": 2.0227088928222656, "learning_rate": 1.5839615117787132e-05, "loss": 1.0538, "step": 5623 }, { "epoch": 0.32, "grad_norm": 2.1617214679718018, "learning_rate": 1.5838107042637974e-05, "loss": 1.0109, "step": 5624 }, { "epoch": 0.32, "grad_norm": 1.9102849960327148, "learning_rate": 1.5836598766032055e-05, "loss": 1.0907, "step": 5625 }, { "epoch": 0.32, "grad_norm": 2.0651750564575195, "learning_rate": 1.5835090288021414e-05, "loss": 1.097, "step": 5626 }, { "epoch": 0.32, "grad_norm": 1.8451707363128662, "learning_rate": 1.5833581608658108e-05, "loss": 1.0849, "step": 5627 }, { "epoch": 0.32, "grad_norm": 1.7253296375274658, "learning_rate": 1.5832072727994193e-05, "loss": 1.0893, "step": 5628 }, { "epoch": 0.32, "grad_norm": 1.7815706729888916, "learning_rate": 1.5830563646081746e-05, "loss": 1.0086, "step": 5629 }, { "epoch": 0.32, "grad_norm": 1.816599726676941, "learning_rate": 1.582905436297283e-05, "loss": 1.0314, "step": 5630 }, { "epoch": 0.32, "grad_norm": 1.7699759006500244, "learning_rate": 1.5827544878719532e-05, "loss": 1.0011, "step": 5631 }, { "epoch": 0.32, "grad_norm": 1.9050698280334473, "learning_rate": 1.5826035193373935e-05, "loss": 1.0166, "step": 5632 }, { "epoch": 0.32, "grad_norm": 1.8246219158172607, "learning_rate": 1.5824525306988144e-05, "loss": 0.9555, "step": 5633 }, { "epoch": 0.32, "grad_norm": 1.8009560108184814, "learning_rate": 1.582301521961425e-05, "loss": 1.1471, "step": 5634 }, { "epoch": 0.32, "grad_norm": 2.0377798080444336, "learning_rate": 1.582150493130437e-05, "loss": 1.066, "step": 5635 }, { "epoch": 0.32, "grad_norm": 1.820115327835083, "learning_rate": 1.5819994442110617e-05, "loss": 1.0461, "step": 5636 }, { "epoch": 0.32, "grad_norm": 1.898526668548584, "learning_rate": 1.581848375208511e-05, "loss": 0.9808, "step": 5637 }, { "epoch": 0.32, "grad_norm": 1.8746848106384277, "learning_rate": 1.5816972861279985e-05, "loss": 1.0011, "step": 5638 }, { "epoch": 0.32, "grad_norm": 1.7512683868408203, "learning_rate": 1.5815461769747372e-05, "loss": 1.0096, "step": 5639 }, { "epoch": 0.32, "grad_norm": 1.8255621194839478, "learning_rate": 1.581395047753942e-05, "loss": 1.0531, "step": 5640 }, { "epoch": 0.32, "grad_norm": 1.998863697052002, "learning_rate": 1.581243898470828e-05, "loss": 1.0372, "step": 5641 }, { "epoch": 0.32, "grad_norm": 1.783408761024475, "learning_rate": 1.58109272913061e-05, "loss": 1.0707, "step": 5642 }, { "epoch": 0.32, "grad_norm": 1.94863760471344, "learning_rate": 1.580941539738506e-05, "loss": 1.0407, "step": 5643 }, { "epoch": 0.32, "grad_norm": 1.849313735961914, "learning_rate": 1.580790330299732e-05, "loss": 1.0509, "step": 5644 }, { "epoch": 0.32, "grad_norm": 2.3469746112823486, "learning_rate": 1.5806391008195058e-05, "loss": 1.0892, "step": 5645 }, { "epoch": 0.32, "grad_norm": 1.767053246498108, "learning_rate": 1.5804878513030463e-05, "loss": 0.9959, "step": 5646 }, { "epoch": 0.32, "grad_norm": 1.2576100826263428, "learning_rate": 1.5803365817555726e-05, "loss": 0.6746, "step": 5647 }, { "epoch": 0.32, "grad_norm": 1.7269951105117798, "learning_rate": 1.5801852921823047e-05, "loss": 0.9814, "step": 5648 }, { "epoch": 0.32, "grad_norm": 1.8020143508911133, "learning_rate": 1.580033982588463e-05, "loss": 1.0421, "step": 5649 }, { "epoch": 0.32, "grad_norm": 1.7122759819030762, "learning_rate": 1.5798826529792684e-05, "loss": 1.0338, "step": 5650 }, { "epoch": 0.32, "grad_norm": 1.742413878440857, "learning_rate": 1.579731303359944e-05, "loss": 1.0062, "step": 5651 }, { "epoch": 0.32, "grad_norm": 1.7944848537445068, "learning_rate": 1.5795799337357115e-05, "loss": 0.9278, "step": 5652 }, { "epoch": 0.32, "grad_norm": 1.805260419845581, "learning_rate": 1.579428544111794e-05, "loss": 1.0102, "step": 5653 }, { "epoch": 0.32, "grad_norm": 1.5833542346954346, "learning_rate": 1.5792771344934167e-05, "loss": 1.0237, "step": 5654 }, { "epoch": 0.32, "grad_norm": 1.7975929975509644, "learning_rate": 1.579125704885803e-05, "loss": 0.9802, "step": 5655 }, { "epoch": 0.32, "grad_norm": 1.6812245845794678, "learning_rate": 1.5789742552941794e-05, "loss": 0.91, "step": 5656 }, { "epoch": 0.32, "grad_norm": 1.7998672723770142, "learning_rate": 1.5788227857237715e-05, "loss": 1.0114, "step": 5657 }, { "epoch": 0.32, "grad_norm": 1.8616605997085571, "learning_rate": 1.578671296179806e-05, "loss": 1.007, "step": 5658 }, { "epoch": 0.32, "grad_norm": 1.7945549488067627, "learning_rate": 1.5785197866675107e-05, "loss": 1.0494, "step": 5659 }, { "epoch": 0.32, "grad_norm": 1.8168004751205444, "learning_rate": 1.5783682571921132e-05, "loss": 0.9598, "step": 5660 }, { "epoch": 0.32, "grad_norm": 1.83732008934021, "learning_rate": 1.578216707758843e-05, "loss": 0.9933, "step": 5661 }, { "epoch": 0.32, "grad_norm": 2.0592806339263916, "learning_rate": 1.5780651383729292e-05, "loss": 1.0096, "step": 5662 }, { "epoch": 0.32, "grad_norm": 1.3441640138626099, "learning_rate": 1.5779135490396025e-05, "loss": 0.6417, "step": 5663 }, { "epoch": 0.32, "grad_norm": 2.0483028888702393, "learning_rate": 1.5777619397640937e-05, "loss": 1.0512, "step": 5664 }, { "epoch": 0.32, "grad_norm": 1.7721612453460693, "learning_rate": 1.577610310551634e-05, "loss": 0.9649, "step": 5665 }, { "epoch": 0.32, "grad_norm": 1.7482903003692627, "learning_rate": 1.577458661407456e-05, "loss": 0.9648, "step": 5666 }, { "epoch": 0.33, "grad_norm": 1.9614330530166626, "learning_rate": 1.5773069923367927e-05, "loss": 1.0459, "step": 5667 }, { "epoch": 0.33, "grad_norm": 1.7487666606903076, "learning_rate": 1.5771553033448777e-05, "loss": 1.0002, "step": 5668 }, { "epoch": 0.33, "grad_norm": 1.8344194889068604, "learning_rate": 1.5770035944369456e-05, "loss": 1.037, "step": 5669 }, { "epoch": 0.33, "grad_norm": 1.8469412326812744, "learning_rate": 1.576851865618231e-05, "loss": 0.9667, "step": 5670 }, { "epoch": 0.33, "grad_norm": 1.6935985088348389, "learning_rate": 1.57670011689397e-05, "loss": 0.9824, "step": 5671 }, { "epoch": 0.33, "grad_norm": 1.026711106300354, "learning_rate": 1.5765483482693987e-05, "loss": 0.5761, "step": 5672 }, { "epoch": 0.33, "grad_norm": 1.9349530935287476, "learning_rate": 1.5763965597497547e-05, "loss": 1.0748, "step": 5673 }, { "epoch": 0.33, "grad_norm": 1.8719233274459839, "learning_rate": 1.5762447513402755e-05, "loss": 0.9511, "step": 5674 }, { "epoch": 0.33, "grad_norm": 1.9921724796295166, "learning_rate": 1.5760929230461994e-05, "loss": 1.0241, "step": 5675 }, { "epoch": 0.33, "grad_norm": 1.8035528659820557, "learning_rate": 1.5759410748727663e-05, "loss": 1.0403, "step": 5676 }, { "epoch": 0.33, "grad_norm": 0.9983935356140137, "learning_rate": 1.5757892068252148e-05, "loss": 0.6209, "step": 5677 }, { "epoch": 0.33, "grad_norm": 1.3461449146270752, "learning_rate": 1.5756373189087864e-05, "loss": 0.6136, "step": 5678 }, { "epoch": 0.33, "grad_norm": 1.7944227457046509, "learning_rate": 1.5754854111287222e-05, "loss": 1.0384, "step": 5679 }, { "epoch": 0.33, "grad_norm": 2.3363876342773438, "learning_rate": 1.5753334834902643e-05, "loss": 1.1349, "step": 5680 }, { "epoch": 0.33, "grad_norm": 1.8927228450775146, "learning_rate": 1.5751815359986548e-05, "loss": 1.0747, "step": 5681 }, { "epoch": 0.33, "grad_norm": 1.9742066860198975, "learning_rate": 1.5750295686591372e-05, "loss": 1.0283, "step": 5682 }, { "epoch": 0.33, "grad_norm": 1.8273694515228271, "learning_rate": 1.5748775814769553e-05, "loss": 0.955, "step": 5683 }, { "epoch": 0.33, "grad_norm": 1.903059482574463, "learning_rate": 1.5747255744573542e-05, "loss": 1.0214, "step": 5684 }, { "epoch": 0.33, "grad_norm": 2.3763656616210938, "learning_rate": 1.574573547605579e-05, "loss": 1.0977, "step": 5685 }, { "epoch": 0.33, "grad_norm": 2.1012463569641113, "learning_rate": 1.574421500926875e-05, "loss": 1.0923, "step": 5686 }, { "epoch": 0.33, "grad_norm": 2.0620412826538086, "learning_rate": 1.57426943442649e-05, "loss": 0.9615, "step": 5687 }, { "epoch": 0.33, "grad_norm": 1.822073221206665, "learning_rate": 1.5741173481096713e-05, "loss": 1.0159, "step": 5688 }, { "epoch": 0.33, "grad_norm": 1.6255770921707153, "learning_rate": 1.573965241981666e-05, "loss": 0.9443, "step": 5689 }, { "epoch": 0.33, "grad_norm": 1.1343984603881836, "learning_rate": 1.5738131160477242e-05, "loss": 0.6628, "step": 5690 }, { "epoch": 0.33, "grad_norm": 1.7767374515533447, "learning_rate": 1.5736609703130942e-05, "loss": 1.013, "step": 5691 }, { "epoch": 0.33, "grad_norm": 1.6196361780166626, "learning_rate": 1.573508804783027e-05, "loss": 0.9825, "step": 5692 }, { "epoch": 0.33, "grad_norm": 1.9072165489196777, "learning_rate": 1.5733566194627722e-05, "loss": 0.9395, "step": 5693 }, { "epoch": 0.33, "grad_norm": 1.7007228136062622, "learning_rate": 1.5732044143575827e-05, "loss": 0.9975, "step": 5694 }, { "epoch": 0.33, "grad_norm": 1.7965205907821655, "learning_rate": 1.5730521894727098e-05, "loss": 1.0064, "step": 5695 }, { "epoch": 0.33, "grad_norm": 1.8211930990219116, "learning_rate": 1.572899944813407e-05, "loss": 0.9962, "step": 5696 }, { "epoch": 0.33, "grad_norm": 1.8731894493103027, "learning_rate": 1.572747680384927e-05, "loss": 1.0479, "step": 5697 }, { "epoch": 0.33, "grad_norm": 1.6269744634628296, "learning_rate": 1.5725953961925245e-05, "loss": 1.0081, "step": 5698 }, { "epoch": 0.33, "grad_norm": 1.949883222579956, "learning_rate": 1.5724430922414543e-05, "loss": 1.074, "step": 5699 }, { "epoch": 0.33, "grad_norm": 1.6587811708450317, "learning_rate": 1.5722907685369724e-05, "loss": 1.0096, "step": 5700 }, { "epoch": 0.33, "grad_norm": 2.0263965129852295, "learning_rate": 1.5721384250843343e-05, "loss": 1.0261, "step": 5701 }, { "epoch": 0.33, "grad_norm": 1.9395532608032227, "learning_rate": 1.5719860618887976e-05, "loss": 1.063, "step": 5702 }, { "epoch": 0.33, "grad_norm": 1.8952118158340454, "learning_rate": 1.5718336789556195e-05, "loss": 0.9465, "step": 5703 }, { "epoch": 0.33, "grad_norm": 1.6189630031585693, "learning_rate": 1.5716812762900588e-05, "loss": 1.0284, "step": 5704 }, { "epoch": 0.33, "grad_norm": 1.797673225402832, "learning_rate": 1.571528853897374e-05, "loss": 0.9073, "step": 5705 }, { "epoch": 0.33, "grad_norm": 1.8497487306594849, "learning_rate": 1.5713764117828253e-05, "loss": 0.9876, "step": 5706 }, { "epoch": 0.33, "grad_norm": 1.8215322494506836, "learning_rate": 1.571223949951672e-05, "loss": 0.9146, "step": 5707 }, { "epoch": 0.33, "grad_norm": 1.7548412084579468, "learning_rate": 1.5710714684091764e-05, "loss": 1.0178, "step": 5708 }, { "epoch": 0.33, "grad_norm": 1.8014018535614014, "learning_rate": 1.5709189671605992e-05, "loss": 1.0698, "step": 5709 }, { "epoch": 0.33, "grad_norm": 1.776074767112732, "learning_rate": 1.5707664462112035e-05, "loss": 1.0065, "step": 5710 }, { "epoch": 0.33, "grad_norm": 1.6989144086837769, "learning_rate": 1.570613905566252e-05, "loss": 0.9914, "step": 5711 }, { "epoch": 0.33, "grad_norm": 1.8957029581069946, "learning_rate": 1.570461345231009e-05, "loss": 1.0267, "step": 5712 }, { "epoch": 0.33, "grad_norm": 1.8715628385543823, "learning_rate": 1.570308765210738e-05, "loss": 1.0299, "step": 5713 }, { "epoch": 0.33, "grad_norm": 1.807898998260498, "learning_rate": 1.5701561655107047e-05, "loss": 1.0139, "step": 5714 }, { "epoch": 0.33, "grad_norm": 1.6347683668136597, "learning_rate": 1.5700035461361748e-05, "loss": 0.9268, "step": 5715 }, { "epoch": 0.33, "grad_norm": 1.837496042251587, "learning_rate": 1.569850907092415e-05, "loss": 1.0004, "step": 5716 }, { "epoch": 0.33, "grad_norm": 1.831099033355713, "learning_rate": 1.569698248384692e-05, "loss": 0.9972, "step": 5717 }, { "epoch": 0.33, "grad_norm": 1.9904072284698486, "learning_rate": 1.569545570018274e-05, "loss": 1.0602, "step": 5718 }, { "epoch": 0.33, "grad_norm": 1.6455543041229248, "learning_rate": 1.5693928719984292e-05, "loss": 0.9568, "step": 5719 }, { "epoch": 0.33, "grad_norm": 1.7781951427459717, "learning_rate": 1.569240154330427e-05, "loss": 1.0427, "step": 5720 }, { "epoch": 0.33, "grad_norm": 1.8802788257598877, "learning_rate": 1.5690874170195368e-05, "loss": 1.0397, "step": 5721 }, { "epoch": 0.33, "grad_norm": 1.8736258745193481, "learning_rate": 1.56893466007103e-05, "loss": 1.0137, "step": 5722 }, { "epoch": 0.33, "grad_norm": 1.831794261932373, "learning_rate": 1.568781883490177e-05, "loss": 0.9285, "step": 5723 }, { "epoch": 0.33, "grad_norm": 1.732837200164795, "learning_rate": 1.5686290872822504e-05, "loss": 0.9913, "step": 5724 }, { "epoch": 0.33, "grad_norm": 1.8350539207458496, "learning_rate": 1.5684762714525222e-05, "loss": 1.0, "step": 5725 }, { "epoch": 0.33, "grad_norm": 1.713833212852478, "learning_rate": 1.568323436006266e-05, "loss": 1.032, "step": 5726 }, { "epoch": 0.33, "grad_norm": 1.7734463214874268, "learning_rate": 1.5681705809487554e-05, "loss": 0.9892, "step": 5727 }, { "epoch": 0.33, "grad_norm": 1.8690985441207886, "learning_rate": 1.568017706285265e-05, "loss": 1.0635, "step": 5728 }, { "epoch": 0.33, "grad_norm": 1.6964085102081299, "learning_rate": 1.5678648120210703e-05, "loss": 0.9062, "step": 5729 }, { "epoch": 0.33, "grad_norm": 1.8863768577575684, "learning_rate": 1.5677118981614477e-05, "loss": 1.0252, "step": 5730 }, { "epoch": 0.33, "grad_norm": 1.8796589374542236, "learning_rate": 1.567558964711673e-05, "loss": 0.9903, "step": 5731 }, { "epoch": 0.33, "grad_norm": 2.273630142211914, "learning_rate": 1.5674060116770234e-05, "loss": 1.1655, "step": 5732 }, { "epoch": 0.33, "grad_norm": 1.728611707687378, "learning_rate": 1.567253039062778e-05, "loss": 1.0736, "step": 5733 }, { "epoch": 0.33, "grad_norm": 1.8526363372802734, "learning_rate": 1.5671000468742144e-05, "loss": 1.0292, "step": 5734 }, { "epoch": 0.33, "grad_norm": 1.833158016204834, "learning_rate": 1.5669470351166125e-05, "loss": 1.0352, "step": 5735 }, { "epoch": 0.33, "grad_norm": 1.8526487350463867, "learning_rate": 1.566794003795252e-05, "loss": 1.0781, "step": 5736 }, { "epoch": 0.33, "grad_norm": 1.829857349395752, "learning_rate": 1.5666409529154138e-05, "loss": 1.0385, "step": 5737 }, { "epoch": 0.33, "grad_norm": 1.7702175378799438, "learning_rate": 1.5664878824823794e-05, "loss": 1.1065, "step": 5738 }, { "epoch": 0.33, "grad_norm": 1.8521746397018433, "learning_rate": 1.5663347925014302e-05, "loss": 0.9984, "step": 5739 }, { "epoch": 0.33, "grad_norm": 1.819317102432251, "learning_rate": 1.5661816829778493e-05, "loss": 0.9998, "step": 5740 }, { "epoch": 0.33, "grad_norm": 1.8256032466888428, "learning_rate": 1.5660285539169202e-05, "loss": 1.0779, "step": 5741 }, { "epoch": 0.33, "grad_norm": 1.978366494178772, "learning_rate": 1.5658754053239267e-05, "loss": 1.0641, "step": 5742 }, { "epoch": 0.33, "grad_norm": 2.0026516914367676, "learning_rate": 1.565722237204154e-05, "loss": 1.0453, "step": 5743 }, { "epoch": 0.33, "grad_norm": 1.9630029201507568, "learning_rate": 1.5655690495628867e-05, "loss": 1.0341, "step": 5744 }, { "epoch": 0.33, "grad_norm": 2.0062742233276367, "learning_rate": 1.565415842405412e-05, "loss": 1.0353, "step": 5745 }, { "epoch": 0.33, "grad_norm": 1.7329479455947876, "learning_rate": 1.5652626157370154e-05, "loss": 1.0637, "step": 5746 }, { "epoch": 0.33, "grad_norm": 1.720922827720642, "learning_rate": 1.5651093695629854e-05, "loss": 1.009, "step": 5747 }, { "epoch": 0.33, "grad_norm": 1.8163689374923706, "learning_rate": 1.5649561038886093e-05, "loss": 1.0117, "step": 5748 }, { "epoch": 0.33, "grad_norm": 1.862666368484497, "learning_rate": 1.5648028187191764e-05, "loss": 1.0208, "step": 5749 }, { "epoch": 0.33, "grad_norm": 1.8742403984069824, "learning_rate": 1.5646495140599758e-05, "loss": 1.0665, "step": 5750 }, { "epoch": 0.33, "grad_norm": 1.866719365119934, "learning_rate": 1.5644961899162977e-05, "loss": 1.0787, "step": 5751 }, { "epoch": 0.33, "grad_norm": 1.8796030282974243, "learning_rate": 1.564342846293433e-05, "loss": 0.959, "step": 5752 }, { "epoch": 0.33, "grad_norm": 1.8757416009902954, "learning_rate": 1.5641894831966732e-05, "loss": 1.0584, "step": 5753 }, { "epoch": 0.33, "grad_norm": 1.7255322933197021, "learning_rate": 1.5640361006313103e-05, "loss": 1.079, "step": 5754 }, { "epoch": 0.33, "grad_norm": 1.7396575212478638, "learning_rate": 1.5638826986026373e-05, "loss": 1.0705, "step": 5755 }, { "epoch": 0.33, "grad_norm": 1.8356553316116333, "learning_rate": 1.563729277115947e-05, "loss": 1.0386, "step": 5756 }, { "epoch": 0.33, "grad_norm": 1.698919653892517, "learning_rate": 1.5635758361765345e-05, "loss": 0.989, "step": 5757 }, { "epoch": 0.33, "grad_norm": 1.8141980171203613, "learning_rate": 1.5634223757896943e-05, "loss": 0.9852, "step": 5758 }, { "epoch": 0.33, "grad_norm": 1.9051424264907837, "learning_rate": 1.563268895960721e-05, "loss": 0.9707, "step": 5759 }, { "epoch": 0.33, "grad_norm": 1.9231992959976196, "learning_rate": 1.5631153966949125e-05, "loss": 1.0113, "step": 5760 }, { "epoch": 0.33, "grad_norm": 1.7726978063583374, "learning_rate": 1.562961877997564e-05, "loss": 0.969, "step": 5761 }, { "epoch": 0.33, "grad_norm": 1.743573546409607, "learning_rate": 1.562808339873974e-05, "loss": 0.9293, "step": 5762 }, { "epoch": 0.33, "grad_norm": 1.8523484468460083, "learning_rate": 1.56265478232944e-05, "loss": 1.0762, "step": 5763 }, { "epoch": 0.33, "grad_norm": 1.9541845321655273, "learning_rate": 1.5625012053692615e-05, "loss": 0.9897, "step": 5764 }, { "epoch": 0.33, "grad_norm": 1.692139744758606, "learning_rate": 1.5623476089987376e-05, "loss": 0.9145, "step": 5765 }, { "epoch": 0.33, "grad_norm": 1.643104910850525, "learning_rate": 1.5621939932231685e-05, "loss": 0.8562, "step": 5766 }, { "epoch": 0.33, "grad_norm": 1.732215404510498, "learning_rate": 1.5620403580478552e-05, "loss": 0.9352, "step": 5767 }, { "epoch": 0.33, "grad_norm": 1.6711032390594482, "learning_rate": 1.561886703478099e-05, "loss": 1.0885, "step": 5768 }, { "epoch": 0.33, "grad_norm": 1.7550568580627441, "learning_rate": 1.5617330295192025e-05, "loss": 1.0372, "step": 5769 }, { "epoch": 0.33, "grad_norm": 1.9764037132263184, "learning_rate": 1.561579336176468e-05, "loss": 1.011, "step": 5770 }, { "epoch": 0.33, "grad_norm": 1.9520660638809204, "learning_rate": 1.5614256234551995e-05, "loss": 0.9581, "step": 5771 }, { "epoch": 0.33, "grad_norm": 1.8582338094711304, "learning_rate": 1.561271891360701e-05, "loss": 0.9808, "step": 5772 }, { "epoch": 0.33, "grad_norm": 1.9764502048492432, "learning_rate": 1.561118139898277e-05, "loss": 1.0704, "step": 5773 }, { "epoch": 0.33, "grad_norm": 1.891813039779663, "learning_rate": 1.5609643690732337e-05, "loss": 0.9736, "step": 5774 }, { "epoch": 0.33, "grad_norm": 1.73569917678833, "learning_rate": 1.560810578890877e-05, "loss": 1.0572, "step": 5775 }, { "epoch": 0.33, "grad_norm": 1.7014878988265991, "learning_rate": 1.5606567693565143e-05, "loss": 1.0153, "step": 5776 }, { "epoch": 0.33, "grad_norm": 1.748089075088501, "learning_rate": 1.560502940475452e-05, "loss": 1.0455, "step": 5777 }, { "epoch": 0.33, "grad_norm": 1.8214471340179443, "learning_rate": 1.560349092252999e-05, "loss": 1.0017, "step": 5778 }, { "epoch": 0.33, "grad_norm": 1.6725749969482422, "learning_rate": 1.5601952246944642e-05, "loss": 0.9291, "step": 5779 }, { "epoch": 0.33, "grad_norm": 2.0799410343170166, "learning_rate": 1.560041337805157e-05, "loss": 1.0551, "step": 5780 }, { "epoch": 0.33, "grad_norm": 1.1333547830581665, "learning_rate": 1.5598874315903878e-05, "loss": 0.6249, "step": 5781 }, { "epoch": 0.33, "grad_norm": 1.76744544506073, "learning_rate": 1.5597335060554673e-05, "loss": 1.0528, "step": 5782 }, { "epoch": 0.33, "grad_norm": 1.71015465259552, "learning_rate": 1.5595795612057067e-05, "loss": 1.0575, "step": 5783 }, { "epoch": 0.33, "grad_norm": 1.7831852436065674, "learning_rate": 1.5594255970464192e-05, "loss": 1.021, "step": 5784 }, { "epoch": 0.33, "grad_norm": 1.828924536705017, "learning_rate": 1.5592716135829164e-05, "loss": 0.9835, "step": 5785 }, { "epoch": 0.33, "grad_norm": 1.7111735343933105, "learning_rate": 1.5591176108205127e-05, "loss": 1.0607, "step": 5786 }, { "epoch": 0.33, "grad_norm": 1.7572084665298462, "learning_rate": 1.558963588764522e-05, "loss": 0.9941, "step": 5787 }, { "epoch": 0.33, "grad_norm": 1.7194637060165405, "learning_rate": 1.5588095474202597e-05, "loss": 0.9593, "step": 5788 }, { "epoch": 0.33, "grad_norm": 1.8021361827850342, "learning_rate": 1.5586554867930404e-05, "loss": 1.0019, "step": 5789 }, { "epoch": 0.33, "grad_norm": 1.8215588331222534, "learning_rate": 1.558501406888181e-05, "loss": 1.0669, "step": 5790 }, { "epoch": 0.33, "grad_norm": 1.8258445262908936, "learning_rate": 1.558347307710998e-05, "loss": 1.0771, "step": 5791 }, { "epoch": 0.33, "grad_norm": 1.9110820293426514, "learning_rate": 1.5581931892668093e-05, "loss": 1.0837, "step": 5792 }, { "epoch": 0.33, "grad_norm": 2.0173819065093994, "learning_rate": 1.5580390515609325e-05, "loss": 1.0265, "step": 5793 }, { "epoch": 0.33, "grad_norm": 1.7407664060592651, "learning_rate": 1.5578848945986872e-05, "loss": 1.0047, "step": 5794 }, { "epoch": 0.33, "grad_norm": 1.7931828498840332, "learning_rate": 1.5577307183853925e-05, "loss": 0.9203, "step": 5795 }, { "epoch": 0.33, "grad_norm": 1.8555265665054321, "learning_rate": 1.5575765229263686e-05, "loss": 1.0104, "step": 5796 }, { "epoch": 0.33, "grad_norm": 1.7339000701904297, "learning_rate": 1.5574223082269366e-05, "loss": 1.0278, "step": 5797 }, { "epoch": 0.33, "grad_norm": 1.973382592201233, "learning_rate": 1.5572680742924178e-05, "loss": 1.0666, "step": 5798 }, { "epoch": 0.33, "grad_norm": 1.7116643190383911, "learning_rate": 1.557113821128134e-05, "loss": 0.995, "step": 5799 }, { "epoch": 0.33, "grad_norm": 2.3167226314544678, "learning_rate": 1.556959548739409e-05, "loss": 1.0367, "step": 5800 }, { "epoch": 0.33, "grad_norm": 1.818730115890503, "learning_rate": 1.556805257131566e-05, "loss": 0.9967, "step": 5801 }, { "epoch": 0.33, "grad_norm": 1.8402304649353027, "learning_rate": 1.556650946309928e-05, "loss": 0.9614, "step": 5802 }, { "epoch": 0.33, "grad_norm": 1.8830138444900513, "learning_rate": 1.5564966162798216e-05, "loss": 0.9712, "step": 5803 }, { "epoch": 0.33, "grad_norm": 1.8251796960830688, "learning_rate": 1.556342267046571e-05, "loss": 1.0023, "step": 5804 }, { "epoch": 0.33, "grad_norm": 2.0738155841827393, "learning_rate": 1.5561878986155033e-05, "loss": 1.1168, "step": 5805 }, { "epoch": 0.33, "grad_norm": 2.943463087081909, "learning_rate": 1.5560335109919445e-05, "loss": 0.9982, "step": 5806 }, { "epoch": 0.33, "grad_norm": 1.7479894161224365, "learning_rate": 1.5558791041812226e-05, "loss": 0.9737, "step": 5807 }, { "epoch": 0.33, "grad_norm": 1.7748967409133911, "learning_rate": 1.5557246781886657e-05, "loss": 0.9924, "step": 5808 }, { "epoch": 0.33, "grad_norm": 1.8681551218032837, "learning_rate": 1.5555702330196024e-05, "loss": 1.0555, "step": 5809 }, { "epoch": 0.33, "grad_norm": 1.7283658981323242, "learning_rate": 1.5554157686793623e-05, "loss": 1.0104, "step": 5810 }, { "epoch": 0.33, "grad_norm": 1.9318522214889526, "learning_rate": 1.5552612851732757e-05, "loss": 1.0521, "step": 5811 }, { "epoch": 0.33, "grad_norm": 1.8806594610214233, "learning_rate": 1.5551067825066727e-05, "loss": 0.9614, "step": 5812 }, { "epoch": 0.33, "grad_norm": 1.769594669342041, "learning_rate": 1.5549522606848855e-05, "loss": 1.057, "step": 5813 }, { "epoch": 0.33, "grad_norm": 1.7724529504776, "learning_rate": 1.5547977197132463e-05, "loss": 0.9933, "step": 5814 }, { "epoch": 0.33, "grad_norm": 2.2300639152526855, "learning_rate": 1.5546431595970873e-05, "loss": 1.0198, "step": 5815 }, { "epoch": 0.33, "grad_norm": 1.9044530391693115, "learning_rate": 1.554488580341742e-05, "loss": 1.1298, "step": 5816 }, { "epoch": 0.33, "grad_norm": 1.7599481344223022, "learning_rate": 1.5543339819525448e-05, "loss": 1.0533, "step": 5817 }, { "epoch": 0.33, "grad_norm": 1.8673388957977295, "learning_rate": 1.5541793644348305e-05, "loss": 1.0476, "step": 5818 }, { "epoch": 0.33, "grad_norm": 1.710741400718689, "learning_rate": 1.5540247277939343e-05, "loss": 1.0187, "step": 5819 }, { "epoch": 0.33, "grad_norm": 1.7325483560562134, "learning_rate": 1.5538700720351924e-05, "loss": 1.0555, "step": 5820 }, { "epoch": 0.33, "grad_norm": 1.7160881757736206, "learning_rate": 1.5537153971639414e-05, "loss": 0.9907, "step": 5821 }, { "epoch": 0.33, "grad_norm": 1.1652578115463257, "learning_rate": 1.5535607031855188e-05, "loss": 0.646, "step": 5822 }, { "epoch": 0.33, "grad_norm": 1.6653498411178589, "learning_rate": 1.5534059901052628e-05, "loss": 0.8851, "step": 5823 }, { "epoch": 0.33, "grad_norm": 1.701088547706604, "learning_rate": 1.5532512579285118e-05, "loss": 1.014, "step": 5824 }, { "epoch": 0.33, "grad_norm": 1.0857338905334473, "learning_rate": 1.5530965066606055e-05, "loss": 0.5671, "step": 5825 }, { "epoch": 0.33, "grad_norm": 1.871946096420288, "learning_rate": 1.5529417363068832e-05, "loss": 0.9477, "step": 5826 }, { "epoch": 0.33, "grad_norm": 1.908758282661438, "learning_rate": 1.5527869468726867e-05, "loss": 0.9356, "step": 5827 }, { "epoch": 0.33, "grad_norm": 1.8757386207580566, "learning_rate": 1.552632138363357e-05, "loss": 1.0541, "step": 5828 }, { "epoch": 0.33, "grad_norm": 1.8711673021316528, "learning_rate": 1.5524773107842355e-05, "loss": 1.007, "step": 5829 }, { "epoch": 0.33, "grad_norm": 1.8330109119415283, "learning_rate": 1.5523224641406653e-05, "loss": 0.9845, "step": 5830 }, { "epoch": 0.33, "grad_norm": 1.7495567798614502, "learning_rate": 1.5521675984379898e-05, "loss": 1.0381, "step": 5831 }, { "epoch": 0.33, "grad_norm": 1.7670490741729736, "learning_rate": 1.552012713681553e-05, "loss": 1.0639, "step": 5832 }, { "epoch": 0.33, "grad_norm": 1.957664132118225, "learning_rate": 1.5518578098766993e-05, "loss": 0.9583, "step": 5833 }, { "epoch": 0.33, "grad_norm": 1.9304425716400146, "learning_rate": 1.5517028870287743e-05, "loss": 1.019, "step": 5834 }, { "epoch": 0.33, "grad_norm": 1.7708600759506226, "learning_rate": 1.5515479451431237e-05, "loss": 0.9029, "step": 5835 }, { "epoch": 0.33, "grad_norm": 1.8742115497589111, "learning_rate": 1.551392984225094e-05, "loss": 1.0259, "step": 5836 }, { "epoch": 0.33, "grad_norm": 1.6818162202835083, "learning_rate": 1.551238004280033e-05, "loss": 1.0079, "step": 5837 }, { "epoch": 0.33, "grad_norm": 1.6914740800857544, "learning_rate": 1.5510830053132882e-05, "loss": 1.0007, "step": 5838 }, { "epoch": 0.33, "grad_norm": 1.7664865255355835, "learning_rate": 1.550927987330208e-05, "loss": 0.9841, "step": 5839 }, { "epoch": 0.33, "grad_norm": 1.6322523355484009, "learning_rate": 1.550772950336142e-05, "loss": 1.0121, "step": 5840 }, { "epoch": 0.33, "grad_norm": 1.6135891675949097, "learning_rate": 1.5506178943364406e-05, "loss": 1.02, "step": 5841 }, { "epoch": 0.34, "grad_norm": 1.9580302238464355, "learning_rate": 1.550462819336453e-05, "loss": 1.0152, "step": 5842 }, { "epoch": 0.34, "grad_norm": 1.9442603588104248, "learning_rate": 1.5503077253415315e-05, "loss": 1.0498, "step": 5843 }, { "epoch": 0.34, "grad_norm": 1.7674839496612549, "learning_rate": 1.5501526123570277e-05, "loss": 1.0136, "step": 5844 }, { "epoch": 0.34, "grad_norm": 1.8188444375991821, "learning_rate": 1.549997480388294e-05, "loss": 0.9813, "step": 5845 }, { "epoch": 0.34, "grad_norm": 1.9018720388412476, "learning_rate": 1.5498423294406833e-05, "loss": 1.0025, "step": 5846 }, { "epoch": 0.34, "grad_norm": 1.9595156908035278, "learning_rate": 1.54968715951955e-05, "loss": 1.0294, "step": 5847 }, { "epoch": 0.34, "grad_norm": 1.9530587196350098, "learning_rate": 1.5495319706302485e-05, "loss": 0.9885, "step": 5848 }, { "epoch": 0.34, "grad_norm": 1.96371591091156, "learning_rate": 1.5493767627781332e-05, "loss": 1.0414, "step": 5849 }, { "epoch": 0.34, "grad_norm": 1.839888334274292, "learning_rate": 1.549221535968561e-05, "loss": 1.0287, "step": 5850 }, { "epoch": 0.34, "grad_norm": 1.7183386087417603, "learning_rate": 1.5490662902068872e-05, "loss": 1.0059, "step": 5851 }, { "epoch": 0.34, "grad_norm": 1.8995938301086426, "learning_rate": 1.54891102549847e-05, "loss": 1.0452, "step": 5852 }, { "epoch": 0.34, "grad_norm": 1.6958467960357666, "learning_rate": 1.5487557418486666e-05, "loss": 1.03, "step": 5853 }, { "epoch": 0.34, "grad_norm": 1.855422854423523, "learning_rate": 1.548600439262835e-05, "loss": 0.9593, "step": 5854 }, { "epoch": 0.34, "grad_norm": 1.736997127532959, "learning_rate": 1.548445117746335e-05, "loss": 0.9598, "step": 5855 }, { "epoch": 0.34, "grad_norm": 1.8337535858154297, "learning_rate": 1.5482897773045262e-05, "loss": 1.0499, "step": 5856 }, { "epoch": 0.34, "grad_norm": 1.7215017080307007, "learning_rate": 1.5481344179427688e-05, "loss": 0.974, "step": 5857 }, { "epoch": 0.34, "grad_norm": 1.7477277517318726, "learning_rate": 1.5479790396664235e-05, "loss": 1.0548, "step": 5858 }, { "epoch": 0.34, "grad_norm": 1.9066221714019775, "learning_rate": 1.547823642480852e-05, "loss": 0.9922, "step": 5859 }, { "epoch": 0.34, "grad_norm": 1.794390320777893, "learning_rate": 1.547668226391417e-05, "loss": 1.0412, "step": 5860 }, { "epoch": 0.34, "grad_norm": 1.836539387702942, "learning_rate": 1.5475127914034816e-05, "loss": 0.9699, "step": 5861 }, { "epoch": 0.34, "grad_norm": 1.7617093324661255, "learning_rate": 1.5473573375224093e-05, "loss": 1.0219, "step": 5862 }, { "epoch": 0.34, "grad_norm": 1.9734299182891846, "learning_rate": 1.5472018647535637e-05, "loss": 1.0659, "step": 5863 }, { "epoch": 0.34, "grad_norm": 1.8893712759017944, "learning_rate": 1.5470463731023107e-05, "loss": 0.9184, "step": 5864 }, { "epoch": 0.34, "grad_norm": 2.3721625804901123, "learning_rate": 1.5468908625740157e-05, "loss": 1.0514, "step": 5865 }, { "epoch": 0.34, "grad_norm": 1.836548924446106, "learning_rate": 1.5467353331740445e-05, "loss": 0.9108, "step": 5866 }, { "epoch": 0.34, "grad_norm": 1.7330526113510132, "learning_rate": 1.5465797849077643e-05, "loss": 1.0146, "step": 5867 }, { "epoch": 0.34, "grad_norm": 1.785692572593689, "learning_rate": 1.546424217780542e-05, "loss": 0.9858, "step": 5868 }, { "epoch": 0.34, "grad_norm": 1.9129208326339722, "learning_rate": 1.546268631797747e-05, "loss": 0.9787, "step": 5869 }, { "epoch": 0.34, "grad_norm": 1.7087873220443726, "learning_rate": 1.546113026964747e-05, "loss": 0.9836, "step": 5870 }, { "epoch": 0.34, "grad_norm": 1.8789410591125488, "learning_rate": 1.5459574032869126e-05, "loss": 1.0437, "step": 5871 }, { "epoch": 0.34, "grad_norm": 1.2202550172805786, "learning_rate": 1.5458017607696124e-05, "loss": 0.6633, "step": 5872 }, { "epoch": 0.34, "grad_norm": 1.895402431488037, "learning_rate": 1.5456460994182185e-05, "loss": 1.0153, "step": 5873 }, { "epoch": 0.34, "grad_norm": 2.1224522590637207, "learning_rate": 1.545490419238102e-05, "loss": 1.0592, "step": 5874 }, { "epoch": 0.34, "grad_norm": 1.957767128944397, "learning_rate": 1.5453347202346347e-05, "loss": 0.978, "step": 5875 }, { "epoch": 0.34, "grad_norm": 1.7957892417907715, "learning_rate": 1.5451790024131897e-05, "loss": 1.0856, "step": 5876 }, { "epoch": 0.34, "grad_norm": 1.7588075399398804, "learning_rate": 1.54502326577914e-05, "loss": 0.9343, "step": 5877 }, { "epoch": 0.34, "grad_norm": 1.670821189880371, "learning_rate": 1.54486751033786e-05, "loss": 0.9639, "step": 5878 }, { "epoch": 0.34, "grad_norm": 1.8318747282028198, "learning_rate": 1.5447117360947244e-05, "loss": 0.949, "step": 5879 }, { "epoch": 0.34, "grad_norm": 2.3086538314819336, "learning_rate": 1.5445559430551083e-05, "loss": 1.0727, "step": 5880 }, { "epoch": 0.34, "grad_norm": 2.1196787357330322, "learning_rate": 1.5444001312243876e-05, "loss": 1.0706, "step": 5881 }, { "epoch": 0.34, "grad_norm": 1.7794506549835205, "learning_rate": 1.544244300607939e-05, "loss": 0.9646, "step": 5882 }, { "epoch": 0.34, "grad_norm": 1.8270851373672485, "learning_rate": 1.54408845121114e-05, "loss": 1.0972, "step": 5883 }, { "epoch": 0.34, "grad_norm": 1.6430906057357788, "learning_rate": 1.5439325830393688e-05, "loss": 1.0369, "step": 5884 }, { "epoch": 0.34, "grad_norm": 1.8952572345733643, "learning_rate": 1.543776696098003e-05, "loss": 0.933, "step": 5885 }, { "epoch": 0.34, "grad_norm": 1.7971152067184448, "learning_rate": 1.5436207903924226e-05, "loss": 1.0009, "step": 5886 }, { "epoch": 0.34, "grad_norm": 1.692742109298706, "learning_rate": 1.5434648659280072e-05, "loss": 0.9657, "step": 5887 }, { "epoch": 0.34, "grad_norm": 1.7461479902267456, "learning_rate": 1.5433089227101374e-05, "loss": 1.0204, "step": 5888 }, { "epoch": 0.34, "grad_norm": 2.0098862648010254, "learning_rate": 1.5431529607441945e-05, "loss": 1.0327, "step": 5889 }, { "epoch": 0.34, "grad_norm": 2.040712356567383, "learning_rate": 1.5429969800355602e-05, "loss": 1.1232, "step": 5890 }, { "epoch": 0.34, "grad_norm": 1.7322931289672852, "learning_rate": 1.5428409805896166e-05, "loss": 0.9897, "step": 5891 }, { "epoch": 0.34, "grad_norm": 1.6626914739608765, "learning_rate": 1.5426849624117474e-05, "loss": 0.9986, "step": 5892 }, { "epoch": 0.34, "grad_norm": 1.6629664897918701, "learning_rate": 1.542528925507336e-05, "loss": 0.9222, "step": 5893 }, { "epoch": 0.34, "grad_norm": 2.017216205596924, "learning_rate": 1.5423728698817665e-05, "loss": 1.0044, "step": 5894 }, { "epoch": 0.34, "grad_norm": 1.89260733127594, "learning_rate": 1.542216795540425e-05, "loss": 1.0251, "step": 5895 }, { "epoch": 0.34, "grad_norm": 1.8202911615371704, "learning_rate": 1.542060702488696e-05, "loss": 1.0319, "step": 5896 }, { "epoch": 0.34, "grad_norm": 1.8159570693969727, "learning_rate": 1.5419045907319666e-05, "loss": 1.0769, "step": 5897 }, { "epoch": 0.34, "grad_norm": 1.8324774503707886, "learning_rate": 1.5417484602756237e-05, "loss": 1.0931, "step": 5898 }, { "epoch": 0.34, "grad_norm": 1.7690989971160889, "learning_rate": 1.5415923111250543e-05, "loss": 0.9444, "step": 5899 }, { "epoch": 0.34, "grad_norm": 1.9186985492706299, "learning_rate": 1.5414361432856475e-05, "loss": 1.0817, "step": 5900 }, { "epoch": 0.34, "grad_norm": 1.8851598501205444, "learning_rate": 1.5412799567627915e-05, "loss": 1.058, "step": 5901 }, { "epoch": 0.34, "grad_norm": 1.834693431854248, "learning_rate": 1.5411237515618764e-05, "loss": 0.9521, "step": 5902 }, { "epoch": 0.34, "grad_norm": 1.7473396062850952, "learning_rate": 1.540967527688292e-05, "loss": 1.0651, "step": 5903 }, { "epoch": 0.34, "grad_norm": 1.8309718370437622, "learning_rate": 1.54081128514743e-05, "loss": 1.0007, "step": 5904 }, { "epoch": 0.34, "grad_norm": 2.0772550106048584, "learning_rate": 1.5406550239446808e-05, "loss": 1.0377, "step": 5905 }, { "epoch": 0.34, "grad_norm": 1.7661163806915283, "learning_rate": 1.5404987440854367e-05, "loss": 1.0147, "step": 5906 }, { "epoch": 0.34, "grad_norm": 1.7555384635925293, "learning_rate": 1.540342445575091e-05, "loss": 1.0931, "step": 5907 }, { "epoch": 0.34, "grad_norm": 1.64422607421875, "learning_rate": 1.5401861284190368e-05, "loss": 0.9206, "step": 5908 }, { "epoch": 0.34, "grad_norm": 1.7226132154464722, "learning_rate": 1.5400297926226683e-05, "loss": 0.9446, "step": 5909 }, { "epoch": 0.34, "grad_norm": 1.9372684955596924, "learning_rate": 1.5398734381913802e-05, "loss": 0.9841, "step": 5910 }, { "epoch": 0.34, "grad_norm": 1.8741693496704102, "learning_rate": 1.539717065130568e-05, "loss": 0.9853, "step": 5911 }, { "epoch": 0.34, "grad_norm": 1.889244794845581, "learning_rate": 1.5395606734456273e-05, "loss": 1.0806, "step": 5912 }, { "epoch": 0.34, "grad_norm": 1.6520521640777588, "learning_rate": 1.539404263141955e-05, "loss": 0.9568, "step": 5913 }, { "epoch": 0.34, "grad_norm": 1.7411062717437744, "learning_rate": 1.5392478342249485e-05, "loss": 0.9141, "step": 5914 }, { "epoch": 0.34, "grad_norm": 2.041492462158203, "learning_rate": 1.5390913867000056e-05, "loss": 0.9167, "step": 5915 }, { "epoch": 0.34, "grad_norm": 1.7665635347366333, "learning_rate": 1.5389349205725244e-05, "loss": 0.9472, "step": 5916 }, { "epoch": 0.34, "grad_norm": 2.006621837615967, "learning_rate": 1.5387784358479046e-05, "loss": 0.9455, "step": 5917 }, { "epoch": 0.34, "grad_norm": 1.9990004301071167, "learning_rate": 1.5386219325315465e-05, "loss": 1.054, "step": 5918 }, { "epoch": 0.34, "grad_norm": 1.6952786445617676, "learning_rate": 1.53846541062885e-05, "loss": 1.007, "step": 5919 }, { "epoch": 0.34, "grad_norm": 1.677554726600647, "learning_rate": 1.538308870145216e-05, "loss": 1.0355, "step": 5920 }, { "epoch": 0.34, "grad_norm": 1.9471287727355957, "learning_rate": 1.5381523110860466e-05, "loss": 0.9643, "step": 5921 }, { "epoch": 0.34, "grad_norm": 1.646493911743164, "learning_rate": 1.5379957334567444e-05, "loss": 1.1153, "step": 5922 }, { "epoch": 0.34, "grad_norm": 1.8899216651916504, "learning_rate": 1.537839137262712e-05, "loss": 0.9747, "step": 5923 }, { "epoch": 0.34, "grad_norm": 1.7572290897369385, "learning_rate": 1.537682522509354e-05, "loss": 1.1002, "step": 5924 }, { "epoch": 0.34, "grad_norm": 1.8194000720977783, "learning_rate": 1.5375258892020734e-05, "loss": 1.0451, "step": 5925 }, { "epoch": 0.34, "grad_norm": 1.7214579582214355, "learning_rate": 1.5373692373462762e-05, "loss": 1.0656, "step": 5926 }, { "epoch": 0.34, "grad_norm": 1.785620927810669, "learning_rate": 1.5372125669473676e-05, "loss": 0.9336, "step": 5927 }, { "epoch": 0.34, "grad_norm": 1.7861748933792114, "learning_rate": 1.537055878010754e-05, "loss": 1.0356, "step": 5928 }, { "epoch": 0.34, "grad_norm": 1.9255220890045166, "learning_rate": 1.536899170541842e-05, "loss": 1.0626, "step": 5929 }, { "epoch": 0.34, "grad_norm": 1.8526057004928589, "learning_rate": 1.53674244454604e-05, "loss": 1.0259, "step": 5930 }, { "epoch": 0.34, "grad_norm": 1.6921361684799194, "learning_rate": 1.536585700028755e-05, "loss": 1.0125, "step": 5931 }, { "epoch": 0.34, "grad_norm": 1.1956487894058228, "learning_rate": 1.5364289369953967e-05, "loss": 0.6069, "step": 5932 }, { "epoch": 0.34, "grad_norm": 1.7433998584747314, "learning_rate": 1.5362721554513743e-05, "loss": 0.9658, "step": 5933 }, { "epoch": 0.34, "grad_norm": 1.7707922458648682, "learning_rate": 1.5361153554020977e-05, "loss": 1.1592, "step": 5934 }, { "epoch": 0.34, "grad_norm": 2.015620708465576, "learning_rate": 1.5359585368529778e-05, "loss": 1.0309, "step": 5935 }, { "epoch": 0.34, "grad_norm": 1.8554457426071167, "learning_rate": 1.5358016998094255e-05, "loss": 1.0816, "step": 5936 }, { "epoch": 0.34, "grad_norm": 1.7883185148239136, "learning_rate": 1.5356448442768535e-05, "loss": 0.9611, "step": 5937 }, { "epoch": 0.34, "grad_norm": 1.8557204008102417, "learning_rate": 1.5354879702606745e-05, "loss": 1.061, "step": 5938 }, { "epoch": 0.34, "grad_norm": 1.801736831665039, "learning_rate": 1.5353310777663014e-05, "loss": 1.0063, "step": 5939 }, { "epoch": 0.34, "grad_norm": 1.6246137619018555, "learning_rate": 1.535174166799148e-05, "loss": 1.058, "step": 5940 }, { "epoch": 0.34, "grad_norm": 1.909960389137268, "learning_rate": 1.5350172373646292e-05, "loss": 0.9802, "step": 5941 }, { "epoch": 0.34, "grad_norm": 1.1383817195892334, "learning_rate": 1.53486028946816e-05, "loss": 0.6415, "step": 5942 }, { "epoch": 0.34, "grad_norm": 1.8180601596832275, "learning_rate": 1.5347033231151562e-05, "loss": 1.0727, "step": 5943 }, { "epoch": 0.34, "grad_norm": 1.9397879838943481, "learning_rate": 1.5345463383110345e-05, "loss": 0.9781, "step": 5944 }, { "epoch": 0.34, "grad_norm": 1.8618950843811035, "learning_rate": 1.534389335061212e-05, "loss": 1.0772, "step": 5945 }, { "epoch": 0.34, "grad_norm": 1.8925445079803467, "learning_rate": 1.534232313371106e-05, "loss": 1.0837, "step": 5946 }, { "epoch": 0.34, "grad_norm": 1.876947045326233, "learning_rate": 1.5340752732461352e-05, "loss": 1.0523, "step": 5947 }, { "epoch": 0.34, "grad_norm": 1.7619332075119019, "learning_rate": 1.5339182146917185e-05, "loss": 1.0467, "step": 5948 }, { "epoch": 0.34, "grad_norm": 1.6400269269943237, "learning_rate": 1.5337611377132757e-05, "loss": 1.0656, "step": 5949 }, { "epoch": 0.34, "grad_norm": 1.7686179876327515, "learning_rate": 1.533604042316227e-05, "loss": 1.101, "step": 5950 }, { "epoch": 0.34, "grad_norm": 1.8215978145599365, "learning_rate": 1.5334469285059935e-05, "loss": 1.0201, "step": 5951 }, { "epoch": 0.34, "grad_norm": 1.0492578744888306, "learning_rate": 1.533289796287997e-05, "loss": 0.6133, "step": 5952 }, { "epoch": 0.34, "grad_norm": 1.6992031335830688, "learning_rate": 1.5331326456676588e-05, "loss": 1.0827, "step": 5953 }, { "epoch": 0.34, "grad_norm": 1.8196260929107666, "learning_rate": 1.5329754766504025e-05, "loss": 0.9568, "step": 5954 }, { "epoch": 0.34, "grad_norm": 1.8492751121520996, "learning_rate": 1.532818289241651e-05, "loss": 0.9926, "step": 5955 }, { "epoch": 0.34, "grad_norm": 1.8144549131393433, "learning_rate": 1.532661083446829e-05, "loss": 1.0395, "step": 5956 }, { "epoch": 0.34, "grad_norm": 1.6317473649978638, "learning_rate": 1.532503859271361e-05, "loss": 0.8731, "step": 5957 }, { "epoch": 0.34, "grad_norm": 1.9456760883331299, "learning_rate": 1.532346616720672e-05, "loss": 1.0129, "step": 5958 }, { "epoch": 0.34, "grad_norm": 1.693047285079956, "learning_rate": 1.5321893558001884e-05, "loss": 1.0422, "step": 5959 }, { "epoch": 0.34, "grad_norm": 0.9552749395370483, "learning_rate": 1.5320320765153367e-05, "loss": 0.5617, "step": 5960 }, { "epoch": 0.34, "grad_norm": 1.862405776977539, "learning_rate": 1.5318747788715445e-05, "loss": 1.0217, "step": 5961 }, { "epoch": 0.34, "grad_norm": 1.856614112854004, "learning_rate": 1.5317174628742387e-05, "loss": 0.9888, "step": 5962 }, { "epoch": 0.34, "grad_norm": 1.8407907485961914, "learning_rate": 1.531560128528849e-05, "loss": 1.0287, "step": 5963 }, { "epoch": 0.34, "grad_norm": 1.756582260131836, "learning_rate": 1.5314027758408046e-05, "loss": 1.0105, "step": 5964 }, { "epoch": 0.34, "grad_norm": 1.8108060359954834, "learning_rate": 1.531245404815534e-05, "loss": 0.986, "step": 5965 }, { "epoch": 0.34, "grad_norm": 1.9277286529541016, "learning_rate": 1.5310880154584693e-05, "loss": 0.97, "step": 5966 }, { "epoch": 0.34, "grad_norm": 1.8403834104537964, "learning_rate": 1.5309306077750403e-05, "loss": 0.9523, "step": 5967 }, { "epoch": 0.34, "grad_norm": 1.9529494047164917, "learning_rate": 1.530773181770679e-05, "loss": 1.0312, "step": 5968 }, { "epoch": 0.34, "grad_norm": 1.7818015813827515, "learning_rate": 1.530615737450818e-05, "loss": 1.0062, "step": 5969 }, { "epoch": 0.34, "grad_norm": 2.015958786010742, "learning_rate": 1.53045827482089e-05, "loss": 1.0113, "step": 5970 }, { "epoch": 0.34, "grad_norm": 1.821475625038147, "learning_rate": 1.5303007938863287e-05, "loss": 1.0062, "step": 5971 }, { "epoch": 0.34, "grad_norm": 1.8706809282302856, "learning_rate": 1.5301432946525684e-05, "loss": 1.0421, "step": 5972 }, { "epoch": 0.34, "grad_norm": 1.6613487005233765, "learning_rate": 1.5299857771250442e-05, "loss": 0.9531, "step": 5973 }, { "epoch": 0.34, "grad_norm": 1.906445860862732, "learning_rate": 1.529828241309191e-05, "loss": 0.9952, "step": 5974 }, { "epoch": 0.34, "grad_norm": 1.9234700202941895, "learning_rate": 1.529670687210445e-05, "loss": 1.0448, "step": 5975 }, { "epoch": 0.34, "grad_norm": 1.779085397720337, "learning_rate": 1.5295131148342432e-05, "loss": 0.9388, "step": 5976 }, { "epoch": 0.34, "grad_norm": 1.7602256536483765, "learning_rate": 1.5293555241860235e-05, "loss": 0.9842, "step": 5977 }, { "epoch": 0.34, "grad_norm": 2.092806100845337, "learning_rate": 1.529197915271223e-05, "loss": 1.0015, "step": 5978 }, { "epoch": 0.34, "grad_norm": 1.7246185541152954, "learning_rate": 1.5290402880952802e-05, "loss": 0.9688, "step": 5979 }, { "epoch": 0.34, "grad_norm": 1.6515874862670898, "learning_rate": 1.5288826426636356e-05, "loss": 1.0068, "step": 5980 }, { "epoch": 0.34, "grad_norm": 1.674303412437439, "learning_rate": 1.5287249789817283e-05, "loss": 1.0054, "step": 5981 }, { "epoch": 0.34, "grad_norm": 1.8856505155563354, "learning_rate": 1.5285672970549987e-05, "loss": 0.9598, "step": 5982 }, { "epoch": 0.34, "grad_norm": 1.8281220197677612, "learning_rate": 1.528409596888888e-05, "loss": 0.9682, "step": 5983 }, { "epoch": 0.34, "grad_norm": 1.896794080734253, "learning_rate": 1.5282518784888384e-05, "loss": 1.043, "step": 5984 }, { "epoch": 0.34, "grad_norm": 1.9264347553253174, "learning_rate": 1.528094141860292e-05, "loss": 1.0371, "step": 5985 }, { "epoch": 0.34, "grad_norm": 1.7788622379302979, "learning_rate": 1.527936387008692e-05, "loss": 1.0592, "step": 5986 }, { "epoch": 0.34, "grad_norm": 2.0962557792663574, "learning_rate": 1.527778613939482e-05, "loss": 1.1051, "step": 5987 }, { "epoch": 0.34, "grad_norm": 1.7764137983322144, "learning_rate": 1.5276208226581062e-05, "loss": 1.0425, "step": 5988 }, { "epoch": 0.34, "grad_norm": 1.8246161937713623, "learning_rate": 1.5274630131700098e-05, "loss": 0.9956, "step": 5989 }, { "epoch": 0.34, "grad_norm": 1.7876452207565308, "learning_rate": 1.5273051854806383e-05, "loss": 0.9528, "step": 5990 }, { "epoch": 0.34, "grad_norm": 1.9007985591888428, "learning_rate": 1.5271473395954374e-05, "loss": 1.0857, "step": 5991 }, { "epoch": 0.34, "grad_norm": 1.715610146522522, "learning_rate": 1.5269894755198548e-05, "loss": 0.9945, "step": 5992 }, { "epoch": 0.34, "grad_norm": 1.752297282218933, "learning_rate": 1.5268315932593373e-05, "loss": 0.9845, "step": 5993 }, { "epoch": 0.34, "grad_norm": 1.76646089553833, "learning_rate": 1.5266736928193333e-05, "loss": 0.9602, "step": 5994 }, { "epoch": 0.34, "grad_norm": 1.8575209379196167, "learning_rate": 1.5265157742052914e-05, "loss": 0.8897, "step": 5995 }, { "epoch": 0.34, "grad_norm": 1.895287275314331, "learning_rate": 1.5263578374226607e-05, "loss": 1.0185, "step": 5996 }, { "epoch": 0.34, "grad_norm": 1.8003556728363037, "learning_rate": 1.526199882476891e-05, "loss": 0.8937, "step": 5997 }, { "epoch": 0.34, "grad_norm": 1.9263912439346313, "learning_rate": 1.526041909373434e-05, "loss": 1.0516, "step": 5998 }, { "epoch": 0.34, "grad_norm": 1.8928062915802002, "learning_rate": 1.5258839181177397e-05, "loss": 1.0488, "step": 5999 }, { "epoch": 0.34, "grad_norm": 1.8394184112548828, "learning_rate": 1.5257259087152606e-05, "loss": 1.0482, "step": 6000 }, { "epoch": 0.34, "grad_norm": 1.8424128293991089, "learning_rate": 1.5255678811714489e-05, "loss": 1.0242, "step": 6001 }, { "epoch": 0.34, "grad_norm": 1.7534763813018799, "learning_rate": 1.5254098354917575e-05, "loss": 1.0441, "step": 6002 }, { "epoch": 0.34, "grad_norm": 1.9388951063156128, "learning_rate": 1.5252517716816404e-05, "loss": 0.9905, "step": 6003 }, { "epoch": 0.34, "grad_norm": 1.8896952867507935, "learning_rate": 1.5250936897465521e-05, "loss": 1.0085, "step": 6004 }, { "epoch": 0.34, "grad_norm": 1.785737156867981, "learning_rate": 1.5249355896919473e-05, "loss": 1.0191, "step": 6005 }, { "epoch": 0.34, "grad_norm": 1.6935712099075317, "learning_rate": 1.5247774715232817e-05, "loss": 0.9441, "step": 6006 }, { "epoch": 0.34, "grad_norm": 1.8514976501464844, "learning_rate": 1.5246193352460112e-05, "loss": 0.9502, "step": 6007 }, { "epoch": 0.34, "grad_norm": 1.9389610290527344, "learning_rate": 1.524461180865593e-05, "loss": 0.9978, "step": 6008 }, { "epoch": 0.34, "grad_norm": 1.8652697801589966, "learning_rate": 1.5243030083874847e-05, "loss": 1.0548, "step": 6009 }, { "epoch": 0.34, "grad_norm": 1.7653182744979858, "learning_rate": 1.5241448178171442e-05, "loss": 0.9906, "step": 6010 }, { "epoch": 0.34, "grad_norm": 1.811454176902771, "learning_rate": 1.52398660916003e-05, "loss": 1.006, "step": 6011 }, { "epoch": 0.34, "grad_norm": 1.865007758140564, "learning_rate": 1.5238283824216015e-05, "loss": 1.0213, "step": 6012 }, { "epoch": 0.34, "grad_norm": 1.127016544342041, "learning_rate": 1.5236701376073188e-05, "loss": 0.6384, "step": 6013 }, { "epoch": 0.34, "grad_norm": 1.676629662513733, "learning_rate": 1.5235118747226425e-05, "loss": 0.9842, "step": 6014 }, { "epoch": 0.34, "grad_norm": 2.1022276878356934, "learning_rate": 1.5233535937730337e-05, "loss": 1.082, "step": 6015 }, { "epoch": 0.35, "grad_norm": 1.8264867067337036, "learning_rate": 1.5231952947639546e-05, "loss": 1.0443, "step": 6016 }, { "epoch": 0.35, "grad_norm": 1.7683968544006348, "learning_rate": 1.5230369777008672e-05, "loss": 1.0689, "step": 6017 }, { "epoch": 0.35, "grad_norm": 1.632810354232788, "learning_rate": 1.5228786425892348e-05, "loss": 0.948, "step": 6018 }, { "epoch": 0.35, "grad_norm": 1.8630067110061646, "learning_rate": 1.522720289434521e-05, "loss": 1.0269, "step": 6019 }, { "epoch": 0.35, "grad_norm": 1.8933451175689697, "learning_rate": 1.52256191824219e-05, "loss": 0.9956, "step": 6020 }, { "epoch": 0.35, "grad_norm": 1.631516695022583, "learning_rate": 1.5224035290177073e-05, "loss": 0.9119, "step": 6021 }, { "epoch": 0.35, "grad_norm": 1.8633639812469482, "learning_rate": 1.5222451217665376e-05, "loss": 1.0632, "step": 6022 }, { "epoch": 0.35, "grad_norm": 1.6615869998931885, "learning_rate": 1.522086696494148e-05, "loss": 1.0187, "step": 6023 }, { "epoch": 0.35, "grad_norm": 1.8441771268844604, "learning_rate": 1.5219282532060047e-05, "loss": 1.0207, "step": 6024 }, { "epoch": 0.35, "grad_norm": 1.7313274145126343, "learning_rate": 1.521769791907575e-05, "loss": 1.095, "step": 6025 }, { "epoch": 0.35, "grad_norm": 1.977745532989502, "learning_rate": 1.5216113126043279e-05, "loss": 1.0391, "step": 6026 }, { "epoch": 0.35, "grad_norm": 2.0305464267730713, "learning_rate": 1.5214528153017311e-05, "loss": 1.0074, "step": 6027 }, { "epoch": 0.35, "grad_norm": 1.7988184690475464, "learning_rate": 1.5212943000052547e-05, "loss": 1.0014, "step": 6028 }, { "epoch": 0.35, "grad_norm": 1.647784948348999, "learning_rate": 1.5211357667203674e-05, "loss": 1.0294, "step": 6029 }, { "epoch": 0.35, "grad_norm": 1.7534931898117065, "learning_rate": 1.5209772154525411e-05, "loss": 0.9793, "step": 6030 }, { "epoch": 0.35, "grad_norm": 1.926552176475525, "learning_rate": 1.5208186462072463e-05, "loss": 1.0769, "step": 6031 }, { "epoch": 0.35, "grad_norm": 1.8396424055099487, "learning_rate": 1.520660058989955e-05, "loss": 1.0784, "step": 6032 }, { "epoch": 0.35, "grad_norm": 1.7301712036132812, "learning_rate": 1.5205014538061394e-05, "loss": 0.9417, "step": 6033 }, { "epoch": 0.35, "grad_norm": 1.8157248497009277, "learning_rate": 1.5203428306612722e-05, "loss": 0.9629, "step": 6034 }, { "epoch": 0.35, "grad_norm": 1.7172398567199707, "learning_rate": 1.520184189560828e-05, "loss": 1.0255, "step": 6035 }, { "epoch": 0.35, "grad_norm": 1.0596046447753906, "learning_rate": 1.5200255305102802e-05, "loss": 0.6104, "step": 6036 }, { "epoch": 0.35, "grad_norm": 1.848392367362976, "learning_rate": 1.519866853515104e-05, "loss": 1.0094, "step": 6037 }, { "epoch": 0.35, "grad_norm": 1.6890674829483032, "learning_rate": 1.519708158580775e-05, "loss": 0.9585, "step": 6038 }, { "epoch": 0.35, "grad_norm": 1.7839975357055664, "learning_rate": 1.519549445712769e-05, "loss": 0.9934, "step": 6039 }, { "epoch": 0.35, "grad_norm": 1.98940110206604, "learning_rate": 1.519390714916563e-05, "loss": 1.0531, "step": 6040 }, { "epoch": 0.35, "grad_norm": 1.9757086038589478, "learning_rate": 1.519231966197634e-05, "loss": 1.0638, "step": 6041 }, { "epoch": 0.35, "grad_norm": 1.8217767477035522, "learning_rate": 1.5190731995614606e-05, "loss": 0.9674, "step": 6042 }, { "epoch": 0.35, "grad_norm": 2.0176331996917725, "learning_rate": 1.5189144150135211e-05, "loss": 0.9583, "step": 6043 }, { "epoch": 0.35, "grad_norm": 1.9761642217636108, "learning_rate": 1.5187556125592946e-05, "loss": 0.9362, "step": 6044 }, { "epoch": 0.35, "grad_norm": 1.6965320110321045, "learning_rate": 1.518596792204261e-05, "loss": 1.0941, "step": 6045 }, { "epoch": 0.35, "grad_norm": 1.5800977945327759, "learning_rate": 1.5184379539539007e-05, "loss": 0.9586, "step": 6046 }, { "epoch": 0.35, "grad_norm": 1.757814645767212, "learning_rate": 1.5182790978136948e-05, "loss": 1.0211, "step": 6047 }, { "epoch": 0.35, "grad_norm": 1.762105941772461, "learning_rate": 1.518120223789125e-05, "loss": 0.9928, "step": 6048 }, { "epoch": 0.35, "grad_norm": 1.616483211517334, "learning_rate": 1.5179613318856739e-05, "loss": 0.9047, "step": 6049 }, { "epoch": 0.35, "grad_norm": 1.975540280342102, "learning_rate": 1.5178024221088237e-05, "loss": 0.9895, "step": 6050 }, { "epoch": 0.35, "grad_norm": 1.921586513519287, "learning_rate": 1.5176434944640583e-05, "loss": 0.9914, "step": 6051 }, { "epoch": 0.35, "grad_norm": 1.0907998085021973, "learning_rate": 1.5174845489568622e-05, "loss": 0.5833, "step": 6052 }, { "epoch": 0.35, "grad_norm": 1.7525326013565063, "learning_rate": 1.5173255855927194e-05, "loss": 0.9782, "step": 6053 }, { "epoch": 0.35, "grad_norm": 1.8479973077774048, "learning_rate": 1.517166604377116e-05, "loss": 1.0146, "step": 6054 }, { "epoch": 0.35, "grad_norm": 1.6547385454177856, "learning_rate": 1.5170076053155378e-05, "loss": 0.9334, "step": 6055 }, { "epoch": 0.35, "grad_norm": 1.1483852863311768, "learning_rate": 1.5168485884134714e-05, "loss": 0.613, "step": 6056 }, { "epoch": 0.35, "grad_norm": 1.7013059854507446, "learning_rate": 1.5166895536764035e-05, "loss": 1.0071, "step": 6057 }, { "epoch": 0.35, "grad_norm": 1.9270102977752686, "learning_rate": 1.5165305011098228e-05, "loss": 1.0449, "step": 6058 }, { "epoch": 0.35, "grad_norm": 1.813938856124878, "learning_rate": 1.5163714307192174e-05, "loss": 0.9821, "step": 6059 }, { "epoch": 0.35, "grad_norm": 1.597678542137146, "learning_rate": 1.5162123425100764e-05, "loss": 0.9407, "step": 6060 }, { "epoch": 0.35, "grad_norm": 1.9122754335403442, "learning_rate": 1.5160532364878892e-05, "loss": 1.094, "step": 6061 }, { "epoch": 0.35, "grad_norm": 1.734326720237732, "learning_rate": 1.5158941126581466e-05, "loss": 1.0234, "step": 6062 }, { "epoch": 0.35, "grad_norm": 1.8525351285934448, "learning_rate": 1.5157349710263391e-05, "loss": 0.9608, "step": 6063 }, { "epoch": 0.35, "grad_norm": 1.7561818361282349, "learning_rate": 1.5155758115979585e-05, "loss": 0.9605, "step": 6064 }, { "epoch": 0.35, "grad_norm": 1.6374677419662476, "learning_rate": 1.515416634378497e-05, "loss": 0.9448, "step": 6065 }, { "epoch": 0.35, "grad_norm": 1.7455236911773682, "learning_rate": 1.5152574393734467e-05, "loss": 1.0512, "step": 6066 }, { "epoch": 0.35, "grad_norm": 1.7492396831512451, "learning_rate": 1.5150982265883019e-05, "loss": 0.995, "step": 6067 }, { "epoch": 0.35, "grad_norm": 1.9079885482788086, "learning_rate": 1.514938996028556e-05, "loss": 1.0407, "step": 6068 }, { "epoch": 0.35, "grad_norm": 1.7905523777008057, "learning_rate": 1.5147797476997037e-05, "loss": 0.9975, "step": 6069 }, { "epoch": 0.35, "grad_norm": 1.74009108543396, "learning_rate": 1.5146204816072402e-05, "loss": 1.0052, "step": 6070 }, { "epoch": 0.35, "grad_norm": 1.683832049369812, "learning_rate": 1.5144611977566619e-05, "loss": 0.8887, "step": 6071 }, { "epoch": 0.35, "grad_norm": 1.9672870635986328, "learning_rate": 1.5143018961534646e-05, "loss": 1.0812, "step": 6072 }, { "epoch": 0.35, "grad_norm": 1.9116899967193604, "learning_rate": 1.5141425768031452e-05, "loss": 0.9548, "step": 6073 }, { "epoch": 0.35, "grad_norm": 1.64646315574646, "learning_rate": 1.5139832397112018e-05, "loss": 0.9556, "step": 6074 }, { "epoch": 0.35, "grad_norm": 1.7728618383407593, "learning_rate": 1.5138238848831326e-05, "loss": 0.9591, "step": 6075 }, { "epoch": 0.35, "grad_norm": 1.9289491176605225, "learning_rate": 1.5136645123244366e-05, "loss": 1.0289, "step": 6076 }, { "epoch": 0.35, "grad_norm": 1.836245059967041, "learning_rate": 1.513505122040613e-05, "loss": 1.0431, "step": 6077 }, { "epoch": 0.35, "grad_norm": 2.042660713195801, "learning_rate": 1.513345714037162e-05, "loss": 0.9803, "step": 6078 }, { "epoch": 0.35, "grad_norm": 1.7818243503570557, "learning_rate": 1.5131862883195844e-05, "loss": 1.0601, "step": 6079 }, { "epoch": 0.35, "grad_norm": 1.8721898794174194, "learning_rate": 1.5130268448933815e-05, "loss": 1.0128, "step": 6080 }, { "epoch": 0.35, "grad_norm": 1.1211024522781372, "learning_rate": 1.5128673837640552e-05, "loss": 0.6676, "step": 6081 }, { "epoch": 0.35, "grad_norm": 1.8897124528884888, "learning_rate": 1.5127079049371083e-05, "loss": 1.0237, "step": 6082 }, { "epoch": 0.35, "grad_norm": 1.8602781295776367, "learning_rate": 1.5125484084180437e-05, "loss": 0.9586, "step": 6083 }, { "epoch": 0.35, "grad_norm": 2.0244290828704834, "learning_rate": 1.5123888942123652e-05, "loss": 1.0342, "step": 6084 }, { "epoch": 0.35, "grad_norm": 1.880847454071045, "learning_rate": 1.5122293623255777e-05, "loss": 0.9993, "step": 6085 }, { "epoch": 0.35, "grad_norm": 1.6572504043579102, "learning_rate": 1.5120698127631851e-05, "loss": 1.0889, "step": 6086 }, { "epoch": 0.35, "grad_norm": 1.8460131883621216, "learning_rate": 1.5119102455306943e-05, "loss": 1.0564, "step": 6087 }, { "epoch": 0.35, "grad_norm": 2.0404727458953857, "learning_rate": 1.5117506606336105e-05, "loss": 0.9933, "step": 6088 }, { "epoch": 0.35, "grad_norm": 1.6624268293380737, "learning_rate": 1.511591058077441e-05, "loss": 0.9102, "step": 6089 }, { "epoch": 0.35, "grad_norm": 1.831697702407837, "learning_rate": 1.5114314378676928e-05, "loss": 0.9825, "step": 6090 }, { "epoch": 0.35, "grad_norm": 1.6961880922317505, "learning_rate": 1.5112718000098746e-05, "loss": 1.1015, "step": 6091 }, { "epoch": 0.35, "grad_norm": 1.8591513633728027, "learning_rate": 1.5111121445094952e-05, "loss": 1.139, "step": 6092 }, { "epoch": 0.35, "grad_norm": 1.7808781862258911, "learning_rate": 1.510952471372063e-05, "loss": 1.0073, "step": 6093 }, { "epoch": 0.35, "grad_norm": 1.6450496912002563, "learning_rate": 1.5107927806030885e-05, "loss": 0.9958, "step": 6094 }, { "epoch": 0.35, "grad_norm": 1.808458924293518, "learning_rate": 1.5106330722080815e-05, "loss": 1.0669, "step": 6095 }, { "epoch": 0.35, "grad_norm": 1.9477546215057373, "learning_rate": 1.510473346192554e-05, "loss": 1.0831, "step": 6096 }, { "epoch": 0.35, "grad_norm": 1.7996560335159302, "learning_rate": 1.5103136025620173e-05, "loss": 0.9667, "step": 6097 }, { "epoch": 0.35, "grad_norm": 1.7238742113113403, "learning_rate": 1.5101538413219834e-05, "loss": 0.9596, "step": 6098 }, { "epoch": 0.35, "grad_norm": 1.7662135362625122, "learning_rate": 1.5099940624779659e-05, "loss": 1.0272, "step": 6099 }, { "epoch": 0.35, "grad_norm": 1.8242950439453125, "learning_rate": 1.5098342660354774e-05, "loss": 1.0443, "step": 6100 }, { "epoch": 0.35, "grad_norm": 1.747917890548706, "learning_rate": 1.509674452000033e-05, "loss": 0.9921, "step": 6101 }, { "epoch": 0.35, "grad_norm": 1.7819490432739258, "learning_rate": 1.5095146203771466e-05, "loss": 1.0778, "step": 6102 }, { "epoch": 0.35, "grad_norm": 1.910750389099121, "learning_rate": 1.5093547711723343e-05, "loss": 0.9859, "step": 6103 }, { "epoch": 0.35, "grad_norm": 1.8931645154953003, "learning_rate": 1.5091949043911114e-05, "loss": 0.915, "step": 6104 }, { "epoch": 0.35, "grad_norm": 1.8700298070907593, "learning_rate": 1.5090350200389949e-05, "loss": 0.9615, "step": 6105 }, { "epoch": 0.35, "grad_norm": 1.8552329540252686, "learning_rate": 1.5088751181215018e-05, "loss": 1.0613, "step": 6106 }, { "epoch": 0.35, "grad_norm": 2.0202159881591797, "learning_rate": 1.5087151986441495e-05, "loss": 1.0778, "step": 6107 }, { "epoch": 0.35, "grad_norm": 1.7837328910827637, "learning_rate": 1.508555261612457e-05, "loss": 0.9643, "step": 6108 }, { "epoch": 0.35, "grad_norm": 1.7941837310791016, "learning_rate": 1.508395307031943e-05, "loss": 1.0355, "step": 6109 }, { "epoch": 0.35, "grad_norm": 1.1752427816390991, "learning_rate": 1.5082353349081271e-05, "loss": 0.5669, "step": 6110 }, { "epoch": 0.35, "grad_norm": 2.304492235183716, "learning_rate": 1.5080753452465296e-05, "loss": 1.0545, "step": 6111 }, { "epoch": 0.35, "grad_norm": 1.0493496656417847, "learning_rate": 1.507915338052671e-05, "loss": 0.5654, "step": 6112 }, { "epoch": 0.35, "grad_norm": 1.802672266960144, "learning_rate": 1.5077553133320732e-05, "loss": 0.9976, "step": 6113 }, { "epoch": 0.35, "grad_norm": 1.9464176893234253, "learning_rate": 1.5075952710902577e-05, "loss": 1.0095, "step": 6114 }, { "epoch": 0.35, "grad_norm": 1.8905532360076904, "learning_rate": 1.507435211332747e-05, "loss": 1.0249, "step": 6115 }, { "epoch": 0.35, "grad_norm": 1.676032543182373, "learning_rate": 1.5072751340650651e-05, "loss": 0.9244, "step": 6116 }, { "epoch": 0.35, "grad_norm": 1.775314450263977, "learning_rate": 1.5071150392927351e-05, "loss": 0.9537, "step": 6117 }, { "epoch": 0.35, "grad_norm": 1.8706495761871338, "learning_rate": 1.5069549270212818e-05, "loss": 1.0171, "step": 6118 }, { "epoch": 0.35, "grad_norm": 1.7748523950576782, "learning_rate": 1.5067947972562299e-05, "loss": 0.9707, "step": 6119 }, { "epoch": 0.35, "grad_norm": 1.915094256401062, "learning_rate": 1.5066346500031053e-05, "loss": 0.9948, "step": 6120 }, { "epoch": 0.35, "grad_norm": 1.7447072267532349, "learning_rate": 1.5064744852674343e-05, "loss": 0.9228, "step": 6121 }, { "epoch": 0.35, "grad_norm": 1.743086814880371, "learning_rate": 1.5063143030547434e-05, "loss": 0.9608, "step": 6122 }, { "epoch": 0.35, "grad_norm": 1.7502856254577637, "learning_rate": 1.50615410337056e-05, "loss": 0.9502, "step": 6123 }, { "epoch": 0.35, "grad_norm": 1.9303454160690308, "learning_rate": 1.5059938862204126e-05, "loss": 1.063, "step": 6124 }, { "epoch": 0.35, "grad_norm": 1.9362964630126953, "learning_rate": 1.5058336516098298e-05, "loss": 0.9969, "step": 6125 }, { "epoch": 0.35, "grad_norm": 1.9840667247772217, "learning_rate": 1.5056733995443407e-05, "loss": 1.0826, "step": 6126 }, { "epoch": 0.35, "grad_norm": 1.814815640449524, "learning_rate": 1.505513130029475e-05, "loss": 1.016, "step": 6127 }, { "epoch": 0.35, "grad_norm": 1.8090100288391113, "learning_rate": 1.5053528430707632e-05, "loss": 1.0587, "step": 6128 }, { "epoch": 0.35, "grad_norm": 1.698258399963379, "learning_rate": 1.5051925386737365e-05, "loss": 0.9588, "step": 6129 }, { "epoch": 0.35, "grad_norm": 1.8670573234558105, "learning_rate": 1.5050322168439265e-05, "loss": 0.9804, "step": 6130 }, { "epoch": 0.35, "grad_norm": 1.8872298002243042, "learning_rate": 1.5048718775868654e-05, "loss": 1.0135, "step": 6131 }, { "epoch": 0.35, "grad_norm": 1.180208683013916, "learning_rate": 1.504711520908086e-05, "loss": 0.6059, "step": 6132 }, { "epoch": 0.35, "grad_norm": 1.8020473718643188, "learning_rate": 1.5045511468131222e-05, "loss": 1.0464, "step": 6133 }, { "epoch": 0.35, "grad_norm": 1.9412001371383667, "learning_rate": 1.5043907553075072e-05, "loss": 0.9998, "step": 6134 }, { "epoch": 0.35, "grad_norm": 1.8525619506835938, "learning_rate": 1.5042303463967767e-05, "loss": 0.9515, "step": 6135 }, { "epoch": 0.35, "grad_norm": 1.8055158853530884, "learning_rate": 1.5040699200864653e-05, "loss": 1.0043, "step": 6136 }, { "epoch": 0.35, "grad_norm": 1.8440173864364624, "learning_rate": 1.5039094763821091e-05, "loss": 1.0779, "step": 6137 }, { "epoch": 0.35, "grad_norm": 1.8687825202941895, "learning_rate": 1.5037490152892443e-05, "loss": 0.9529, "step": 6138 }, { "epoch": 0.35, "grad_norm": 1.8497471809387207, "learning_rate": 1.5035885368134082e-05, "loss": 1.0351, "step": 6139 }, { "epoch": 0.35, "grad_norm": 1.84963858127594, "learning_rate": 1.5034280409601386e-05, "loss": 0.9766, "step": 6140 }, { "epoch": 0.35, "grad_norm": 2.051640748977661, "learning_rate": 1.5032675277349733e-05, "loss": 0.9926, "step": 6141 }, { "epoch": 0.35, "grad_norm": 1.9018441438674927, "learning_rate": 1.5031069971434517e-05, "loss": 1.0332, "step": 6142 }, { "epoch": 0.35, "grad_norm": 2.1138722896575928, "learning_rate": 1.502946449191113e-05, "loss": 1.0232, "step": 6143 }, { "epoch": 0.35, "grad_norm": 1.9888110160827637, "learning_rate": 1.502785883883497e-05, "loss": 1.0505, "step": 6144 }, { "epoch": 0.35, "grad_norm": 1.9782544374465942, "learning_rate": 1.5026253012261448e-05, "loss": 0.9862, "step": 6145 }, { "epoch": 0.35, "grad_norm": 1.8075544834136963, "learning_rate": 1.5024647012245972e-05, "loss": 0.9944, "step": 6146 }, { "epoch": 0.35, "grad_norm": 1.7304812669754028, "learning_rate": 1.5023040838843966e-05, "loss": 0.9817, "step": 6147 }, { "epoch": 0.35, "grad_norm": 2.0388548374176025, "learning_rate": 1.5021434492110851e-05, "loss": 1.0171, "step": 6148 }, { "epoch": 0.35, "grad_norm": 1.899377703666687, "learning_rate": 1.501982797210206e-05, "loss": 0.9672, "step": 6149 }, { "epoch": 0.35, "grad_norm": 1.7409586906433105, "learning_rate": 1.5018221278873028e-05, "loss": 1.0767, "step": 6150 }, { "epoch": 0.35, "grad_norm": 1.828242301940918, "learning_rate": 1.5016614412479195e-05, "loss": 0.9295, "step": 6151 }, { "epoch": 0.35, "grad_norm": 1.278900146484375, "learning_rate": 1.5015007372976013e-05, "loss": 0.6004, "step": 6152 }, { "epoch": 0.35, "grad_norm": 1.1635106801986694, "learning_rate": 1.5013400160418939e-05, "loss": 0.635, "step": 6153 }, { "epoch": 0.35, "grad_norm": 1.8994755744934082, "learning_rate": 1.5011792774863425e-05, "loss": 0.9922, "step": 6154 }, { "epoch": 0.35, "grad_norm": 1.0562361478805542, "learning_rate": 1.5010185216364947e-05, "loss": 0.5855, "step": 6155 }, { "epoch": 0.35, "grad_norm": 2.134864568710327, "learning_rate": 1.5008577484978966e-05, "loss": 1.0352, "step": 6156 }, { "epoch": 0.35, "grad_norm": 1.7977651357650757, "learning_rate": 1.5006969580760973e-05, "loss": 1.0137, "step": 6157 }, { "epoch": 0.35, "grad_norm": 1.972727656364441, "learning_rate": 1.5005361503766442e-05, "loss": 1.0332, "step": 6158 }, { "epoch": 0.35, "grad_norm": 1.76993989944458, "learning_rate": 1.500375325405087e-05, "loss": 0.9736, "step": 6159 }, { "epoch": 0.35, "grad_norm": 1.9606564044952393, "learning_rate": 1.5002144831669752e-05, "loss": 1.0053, "step": 6160 }, { "epoch": 0.35, "grad_norm": 1.914209008216858, "learning_rate": 1.5000536236678583e-05, "loss": 0.9853, "step": 6161 }, { "epoch": 0.35, "grad_norm": 1.9595354795455933, "learning_rate": 1.4998927469132881e-05, "loss": 1.031, "step": 6162 }, { "epoch": 0.35, "grad_norm": 1.8119326829910278, "learning_rate": 1.4997318529088153e-05, "loss": 0.9371, "step": 6163 }, { "epoch": 0.35, "grad_norm": 2.0008444786071777, "learning_rate": 1.4995709416599927e-05, "loss": 0.9842, "step": 6164 }, { "epoch": 0.35, "grad_norm": 1.7979505062103271, "learning_rate": 1.4994100131723721e-05, "loss": 0.9776, "step": 6165 }, { "epoch": 0.35, "grad_norm": 1.8173017501831055, "learning_rate": 1.499249067451507e-05, "loss": 1.0238, "step": 6166 }, { "epoch": 0.35, "grad_norm": 1.8522803783416748, "learning_rate": 1.4990881045029512e-05, "loss": 1.0515, "step": 6167 }, { "epoch": 0.35, "grad_norm": 1.8449554443359375, "learning_rate": 1.4989271243322592e-05, "loss": 1.0575, "step": 6168 }, { "epoch": 0.35, "grad_norm": 1.9337587356567383, "learning_rate": 1.4987661269449858e-05, "loss": 1.0309, "step": 6169 }, { "epoch": 0.35, "grad_norm": 1.5684670209884644, "learning_rate": 1.4986051123466864e-05, "loss": 0.9466, "step": 6170 }, { "epoch": 0.35, "grad_norm": 1.8391071557998657, "learning_rate": 1.4984440805429175e-05, "loss": 1.0791, "step": 6171 }, { "epoch": 0.35, "grad_norm": 1.8247380256652832, "learning_rate": 1.4982830315392357e-05, "loss": 0.928, "step": 6172 }, { "epoch": 0.35, "grad_norm": 1.9018083810806274, "learning_rate": 1.4981219653411983e-05, "loss": 0.9236, "step": 6173 }, { "epoch": 0.35, "grad_norm": 1.9272807836532593, "learning_rate": 1.4979608819543635e-05, "loss": 0.9611, "step": 6174 }, { "epoch": 0.35, "grad_norm": 1.9058659076690674, "learning_rate": 1.4977997813842894e-05, "loss": 1.0078, "step": 6175 }, { "epoch": 0.35, "grad_norm": 1.9096859693527222, "learning_rate": 1.4976386636365358e-05, "loss": 0.9601, "step": 6176 }, { "epoch": 0.35, "grad_norm": 1.6966136693954468, "learning_rate": 1.4974775287166616e-05, "loss": 1.004, "step": 6177 }, { "epoch": 0.35, "grad_norm": 2.8732268810272217, "learning_rate": 1.4973163766302279e-05, "loss": 1.0294, "step": 6178 }, { "epoch": 0.35, "grad_norm": 1.6418588161468506, "learning_rate": 1.497155207382795e-05, "loss": 0.9561, "step": 6179 }, { "epoch": 0.35, "grad_norm": 1.9602961540222168, "learning_rate": 1.4969940209799248e-05, "loss": 1.0145, "step": 6180 }, { "epoch": 0.35, "grad_norm": 1.7430047988891602, "learning_rate": 1.4968328174271791e-05, "loss": 0.8779, "step": 6181 }, { "epoch": 0.35, "grad_norm": 1.8561376333236694, "learning_rate": 1.4966715967301209e-05, "loss": 1.0128, "step": 6182 }, { "epoch": 0.35, "grad_norm": 2.0214855670928955, "learning_rate": 1.4965103588943131e-05, "loss": 0.9816, "step": 6183 }, { "epoch": 0.35, "grad_norm": 1.7786353826522827, "learning_rate": 1.4963491039253198e-05, "loss": 1.0303, "step": 6184 }, { "epoch": 0.35, "grad_norm": 1.715368628501892, "learning_rate": 1.4961878318287051e-05, "loss": 1.0221, "step": 6185 }, { "epoch": 0.35, "grad_norm": 2.076876401901245, "learning_rate": 1.4960265426100348e-05, "loss": 1.0527, "step": 6186 }, { "epoch": 0.35, "grad_norm": 1.3664613962173462, "learning_rate": 1.4958652362748741e-05, "loss": 0.658, "step": 6187 }, { "epoch": 0.35, "grad_norm": 1.1998190879821777, "learning_rate": 1.4957039128287891e-05, "loss": 0.6562, "step": 6188 }, { "epoch": 0.35, "grad_norm": 2.003871440887451, "learning_rate": 1.4955425722773467e-05, "loss": 1.0703, "step": 6189 }, { "epoch": 0.36, "grad_norm": 1.8492906093597412, "learning_rate": 1.4953812146261143e-05, "loss": 0.8865, "step": 6190 }, { "epoch": 0.36, "grad_norm": 1.8066935539245605, "learning_rate": 1.4952198398806603e-05, "loss": 1.1185, "step": 6191 }, { "epoch": 0.36, "grad_norm": 1.806857943534851, "learning_rate": 1.4950584480465526e-05, "loss": 1.0256, "step": 6192 }, { "epoch": 0.36, "grad_norm": 2.044191598892212, "learning_rate": 1.494897039129361e-05, "loss": 0.947, "step": 6193 }, { "epoch": 0.36, "grad_norm": 1.9217848777770996, "learning_rate": 1.4947356131346551e-05, "loss": 0.9707, "step": 6194 }, { "epoch": 0.36, "grad_norm": 1.7288991212844849, "learning_rate": 1.4945741700680052e-05, "loss": 0.9765, "step": 6195 }, { "epoch": 0.36, "grad_norm": 2.0345983505249023, "learning_rate": 1.4944127099349821e-05, "loss": 1.0394, "step": 6196 }, { "epoch": 0.36, "grad_norm": 1.8446462154388428, "learning_rate": 1.4942512327411574e-05, "loss": 1.0694, "step": 6197 }, { "epoch": 0.36, "grad_norm": 1.843661904335022, "learning_rate": 1.4940897384921034e-05, "loss": 0.9276, "step": 6198 }, { "epoch": 0.36, "grad_norm": 1.9998055696487427, "learning_rate": 1.4939282271933926e-05, "loss": 0.9923, "step": 6199 }, { "epoch": 0.36, "grad_norm": 1.9066799879074097, "learning_rate": 1.4937666988505984e-05, "loss": 1.0521, "step": 6200 }, { "epoch": 0.36, "grad_norm": 1.9892860651016235, "learning_rate": 1.4936051534692948e-05, "loss": 1.0712, "step": 6201 }, { "epoch": 0.36, "grad_norm": 1.926393985748291, "learning_rate": 1.4934435910550562e-05, "loss": 0.9949, "step": 6202 }, { "epoch": 0.36, "grad_norm": 2.122161388397217, "learning_rate": 1.4932820116134575e-05, "loss": 0.9733, "step": 6203 }, { "epoch": 0.36, "grad_norm": 1.6975136995315552, "learning_rate": 1.4931204151500746e-05, "loss": 0.9931, "step": 6204 }, { "epoch": 0.36, "grad_norm": 1.7935056686401367, "learning_rate": 1.4929588016704837e-05, "loss": 0.9855, "step": 6205 }, { "epoch": 0.36, "grad_norm": 1.783319354057312, "learning_rate": 1.4927971711802615e-05, "loss": 1.0147, "step": 6206 }, { "epoch": 0.36, "grad_norm": 1.7814267873764038, "learning_rate": 1.4926355236849857e-05, "loss": 1.1237, "step": 6207 }, { "epoch": 0.36, "grad_norm": 1.939418077468872, "learning_rate": 1.492473859190234e-05, "loss": 1.0465, "step": 6208 }, { "epoch": 0.36, "grad_norm": 1.6745635271072388, "learning_rate": 1.492312177701585e-05, "loss": 0.9628, "step": 6209 }, { "epoch": 0.36, "grad_norm": 1.6615424156188965, "learning_rate": 1.492150479224618e-05, "loss": 0.9644, "step": 6210 }, { "epoch": 0.36, "grad_norm": 1.9777902364730835, "learning_rate": 1.4919887637649127e-05, "loss": 1.0234, "step": 6211 }, { "epoch": 0.36, "grad_norm": 1.6302640438079834, "learning_rate": 1.4918270313280494e-05, "loss": 1.0398, "step": 6212 }, { "epoch": 0.36, "grad_norm": 1.3890011310577393, "learning_rate": 1.4916652819196091e-05, "loss": 0.6534, "step": 6213 }, { "epoch": 0.36, "grad_norm": 1.8851747512817383, "learning_rate": 1.4915035155451736e-05, "loss": 0.9988, "step": 6214 }, { "epoch": 0.36, "grad_norm": 1.9260203838348389, "learning_rate": 1.4913417322103245e-05, "loss": 1.0126, "step": 6215 }, { "epoch": 0.36, "grad_norm": 1.9152077436447144, "learning_rate": 1.4911799319206449e-05, "loss": 1.0496, "step": 6216 }, { "epoch": 0.36, "grad_norm": 1.8295962810516357, "learning_rate": 1.4910181146817178e-05, "loss": 1.0143, "step": 6217 }, { "epoch": 0.36, "grad_norm": 1.916728138923645, "learning_rate": 1.490856280499127e-05, "loss": 1.0762, "step": 6218 }, { "epoch": 0.36, "grad_norm": 1.7609875202178955, "learning_rate": 1.4906944293784574e-05, "loss": 1.046, "step": 6219 }, { "epoch": 0.36, "grad_norm": 1.7399111986160278, "learning_rate": 1.4905325613252937e-05, "loss": 1.0126, "step": 6220 }, { "epoch": 0.36, "grad_norm": 1.7363574504852295, "learning_rate": 1.4903706763452214e-05, "loss": 0.9556, "step": 6221 }, { "epoch": 0.36, "grad_norm": 3.1364474296569824, "learning_rate": 1.4902087744438269e-05, "loss": 1.0122, "step": 6222 }, { "epoch": 0.36, "grad_norm": 1.840543270111084, "learning_rate": 1.490046855626697e-05, "loss": 0.9901, "step": 6223 }, { "epoch": 0.36, "grad_norm": 2.0427427291870117, "learning_rate": 1.489884919899419e-05, "loss": 1.0179, "step": 6224 }, { "epoch": 0.36, "grad_norm": 1.7339650392532349, "learning_rate": 1.4897229672675807e-05, "loss": 0.9775, "step": 6225 }, { "epoch": 0.36, "grad_norm": 1.6437357664108276, "learning_rate": 1.489560997736771e-05, "loss": 0.9673, "step": 6226 }, { "epoch": 0.36, "grad_norm": 1.935800313949585, "learning_rate": 1.4893990113125786e-05, "loss": 1.1411, "step": 6227 }, { "epoch": 0.36, "grad_norm": 1.9649231433868408, "learning_rate": 1.4892370080005936e-05, "loss": 1.037, "step": 6228 }, { "epoch": 0.36, "grad_norm": 1.8260817527770996, "learning_rate": 1.489074987806406e-05, "loss": 1.0888, "step": 6229 }, { "epoch": 0.36, "grad_norm": 1.7246772050857544, "learning_rate": 1.4889129507356068e-05, "loss": 1.0277, "step": 6230 }, { "epoch": 0.36, "grad_norm": 1.9139410257339478, "learning_rate": 1.4887508967937874e-05, "loss": 0.9334, "step": 6231 }, { "epoch": 0.36, "grad_norm": 1.7284680604934692, "learning_rate": 1.4885888259865398e-05, "loss": 0.9824, "step": 6232 }, { "epoch": 0.36, "grad_norm": 1.6977698802947998, "learning_rate": 1.4884267383194567e-05, "loss": 1.0428, "step": 6233 }, { "epoch": 0.36, "grad_norm": 0.9950957298278809, "learning_rate": 1.488264633798131e-05, "loss": 0.593, "step": 6234 }, { "epoch": 0.36, "grad_norm": 1.7299916744232178, "learning_rate": 1.488102512428157e-05, "loss": 1.019, "step": 6235 }, { "epoch": 0.36, "grad_norm": 1.940701961517334, "learning_rate": 1.4879403742151283e-05, "loss": 0.9799, "step": 6236 }, { "epoch": 0.36, "grad_norm": 1.0809202194213867, "learning_rate": 1.4877782191646408e-05, "loss": 0.612, "step": 6237 }, { "epoch": 0.36, "grad_norm": 1.9608681201934814, "learning_rate": 1.4876160472822894e-05, "loss": 0.9512, "step": 6238 }, { "epoch": 0.36, "grad_norm": 1.684124231338501, "learning_rate": 1.48745385857367e-05, "loss": 1.0196, "step": 6239 }, { "epoch": 0.36, "grad_norm": 1.9151595830917358, "learning_rate": 1.4872916530443797e-05, "loss": 0.96, "step": 6240 }, { "epoch": 0.36, "grad_norm": 1.6824043989181519, "learning_rate": 1.4871294307000158e-05, "loss": 0.9408, "step": 6241 }, { "epoch": 0.36, "grad_norm": 1.8919451236724854, "learning_rate": 1.486967191546176e-05, "loss": 1.0664, "step": 6242 }, { "epoch": 0.36, "grad_norm": 2.03244948387146, "learning_rate": 1.4868049355884586e-05, "loss": 1.1093, "step": 6243 }, { "epoch": 0.36, "grad_norm": 1.6975504159927368, "learning_rate": 1.4866426628324625e-05, "loss": 0.9314, "step": 6244 }, { "epoch": 0.36, "grad_norm": 1.744381070137024, "learning_rate": 1.4864803732837878e-05, "loss": 1.0092, "step": 6245 }, { "epoch": 0.36, "grad_norm": 1.679764986038208, "learning_rate": 1.4863180669480344e-05, "loss": 1.0801, "step": 6246 }, { "epoch": 0.36, "grad_norm": 1.6461364030838013, "learning_rate": 1.486155743830803e-05, "loss": 0.9785, "step": 6247 }, { "epoch": 0.36, "grad_norm": 1.8205231428146362, "learning_rate": 1.4859934039376947e-05, "loss": 1.0272, "step": 6248 }, { "epoch": 0.36, "grad_norm": 2.1533944606781006, "learning_rate": 1.4858310472743117e-05, "loss": 0.9756, "step": 6249 }, { "epoch": 0.36, "grad_norm": 1.6899497509002686, "learning_rate": 1.4856686738462563e-05, "loss": 0.9829, "step": 6250 }, { "epoch": 0.36, "grad_norm": 1.9039406776428223, "learning_rate": 1.4855062836591313e-05, "loss": 1.0296, "step": 6251 }, { "epoch": 0.36, "grad_norm": 1.7875765562057495, "learning_rate": 1.4853438767185411e-05, "loss": 0.8543, "step": 6252 }, { "epoch": 0.36, "grad_norm": 1.8219566345214844, "learning_rate": 1.4851814530300895e-05, "loss": 0.9051, "step": 6253 }, { "epoch": 0.36, "grad_norm": 1.7640273571014404, "learning_rate": 1.4850190125993811e-05, "loss": 1.0048, "step": 6254 }, { "epoch": 0.36, "grad_norm": 1.7041285037994385, "learning_rate": 1.484856555432021e-05, "loss": 0.982, "step": 6255 }, { "epoch": 0.36, "grad_norm": 1.8866806030273438, "learning_rate": 1.4846940815336162e-05, "loss": 1.0454, "step": 6256 }, { "epoch": 0.36, "grad_norm": 1.8135604858398438, "learning_rate": 1.4845315909097724e-05, "loss": 0.9845, "step": 6257 }, { "epoch": 0.36, "grad_norm": 1.6515693664550781, "learning_rate": 1.4843690835660968e-05, "loss": 0.9832, "step": 6258 }, { "epoch": 0.36, "grad_norm": 1.726922631263733, "learning_rate": 1.4842065595081973e-05, "loss": 0.9974, "step": 6259 }, { "epoch": 0.36, "grad_norm": 1.7245620489120483, "learning_rate": 1.484044018741682e-05, "loss": 0.9304, "step": 6260 }, { "epoch": 0.36, "grad_norm": 1.7319328784942627, "learning_rate": 1.4838814612721599e-05, "loss": 1.0461, "step": 6261 }, { "epoch": 0.36, "grad_norm": 2.7668333053588867, "learning_rate": 1.4837188871052399e-05, "loss": 1.0281, "step": 6262 }, { "epoch": 0.36, "grad_norm": 1.839624047279358, "learning_rate": 1.4835562962465323e-05, "loss": 0.9605, "step": 6263 }, { "epoch": 0.36, "grad_norm": 1.6296578645706177, "learning_rate": 1.483393688701648e-05, "loss": 1.0166, "step": 6264 }, { "epoch": 0.36, "grad_norm": 1.7272576093673706, "learning_rate": 1.4832310644761978e-05, "loss": 1.0149, "step": 6265 }, { "epoch": 0.36, "grad_norm": 1.641893982887268, "learning_rate": 1.483068423575793e-05, "loss": 0.996, "step": 6266 }, { "epoch": 0.36, "grad_norm": 1.7102335691452026, "learning_rate": 1.4829057660060464e-05, "loss": 0.9954, "step": 6267 }, { "epoch": 0.36, "grad_norm": 1.698488712310791, "learning_rate": 1.482743091772571e-05, "loss": 0.9074, "step": 6268 }, { "epoch": 0.36, "grad_norm": 1.7537356615066528, "learning_rate": 1.4825804008809799e-05, "loss": 0.9874, "step": 6269 }, { "epoch": 0.36, "grad_norm": 1.8343656063079834, "learning_rate": 1.4824176933368873e-05, "loss": 1.013, "step": 6270 }, { "epoch": 0.36, "grad_norm": 1.8277825117111206, "learning_rate": 1.4822549691459077e-05, "loss": 0.9601, "step": 6271 }, { "epoch": 0.36, "grad_norm": 1.9550387859344482, "learning_rate": 1.482092228313656e-05, "loss": 1.0608, "step": 6272 }, { "epoch": 0.36, "grad_norm": 1.8219711780548096, "learning_rate": 1.4819294708457484e-05, "loss": 1.0604, "step": 6273 }, { "epoch": 0.36, "grad_norm": 1.8420875072479248, "learning_rate": 1.4817666967478008e-05, "loss": 0.9718, "step": 6274 }, { "epoch": 0.36, "grad_norm": 2.0286264419555664, "learning_rate": 1.4816039060254304e-05, "loss": 1.0603, "step": 6275 }, { "epoch": 0.36, "grad_norm": 1.6736133098602295, "learning_rate": 1.4814410986842544e-05, "loss": 0.99, "step": 6276 }, { "epoch": 0.36, "grad_norm": 1.9076026678085327, "learning_rate": 1.4812782747298911e-05, "loss": 1.0321, "step": 6277 }, { "epoch": 0.36, "grad_norm": 2.1877388954162598, "learning_rate": 1.4811154341679585e-05, "loss": 1.0694, "step": 6278 }, { "epoch": 0.36, "grad_norm": 1.7865681648254395, "learning_rate": 1.4809525770040764e-05, "loss": 1.0604, "step": 6279 }, { "epoch": 0.36, "grad_norm": 1.7880630493164062, "learning_rate": 1.4807897032438646e-05, "loss": 0.9887, "step": 6280 }, { "epoch": 0.36, "grad_norm": 1.5941312313079834, "learning_rate": 1.4806268128929431e-05, "loss": 0.9557, "step": 6281 }, { "epoch": 0.36, "grad_norm": 1.7703405618667603, "learning_rate": 1.4804639059569327e-05, "loss": 0.9952, "step": 6282 }, { "epoch": 0.36, "grad_norm": 1.8247032165527344, "learning_rate": 1.4803009824414552e-05, "loss": 1.0362, "step": 6283 }, { "epoch": 0.36, "grad_norm": 1.6750893592834473, "learning_rate": 1.4801380423521323e-05, "loss": 1.014, "step": 6284 }, { "epoch": 0.36, "grad_norm": 1.8853751420974731, "learning_rate": 1.4799750856945869e-05, "loss": 1.0335, "step": 6285 }, { "epoch": 0.36, "grad_norm": 2.031217336654663, "learning_rate": 1.4798121124744421e-05, "loss": 0.9928, "step": 6286 }, { "epoch": 0.36, "grad_norm": 1.7678987979888916, "learning_rate": 1.4796491226973215e-05, "loss": 0.8818, "step": 6287 }, { "epoch": 0.36, "grad_norm": 1.7393865585327148, "learning_rate": 1.4794861163688495e-05, "loss": 1.0452, "step": 6288 }, { "epoch": 0.36, "grad_norm": 1.8612635135650635, "learning_rate": 1.479323093494651e-05, "loss": 1.0073, "step": 6289 }, { "epoch": 0.36, "grad_norm": 1.8555412292480469, "learning_rate": 1.4791600540803514e-05, "loss": 0.9929, "step": 6290 }, { "epoch": 0.36, "grad_norm": 1.912619709968567, "learning_rate": 1.478996998131577e-05, "loss": 1.0378, "step": 6291 }, { "epoch": 0.36, "grad_norm": 1.6263010501861572, "learning_rate": 1.4788339256539543e-05, "loss": 1.0443, "step": 6292 }, { "epoch": 0.36, "grad_norm": 2.1084511280059814, "learning_rate": 1.47867083665311e-05, "loss": 0.9555, "step": 6293 }, { "epoch": 0.36, "grad_norm": 1.8931496143341064, "learning_rate": 1.4785077311346725e-05, "loss": 1.0787, "step": 6294 }, { "epoch": 0.36, "grad_norm": 1.0524985790252686, "learning_rate": 1.4783446091042698e-05, "loss": 0.6046, "step": 6295 }, { "epoch": 0.36, "grad_norm": 1.8386329412460327, "learning_rate": 1.478181470567531e-05, "loss": 1.0903, "step": 6296 }, { "epoch": 0.36, "grad_norm": 1.8875433206558228, "learning_rate": 1.4780183155300853e-05, "loss": 1.027, "step": 6297 }, { "epoch": 0.36, "grad_norm": 1.003554344177246, "learning_rate": 1.4778551439975629e-05, "loss": 0.5215, "step": 6298 }, { "epoch": 0.36, "grad_norm": 1.7032344341278076, "learning_rate": 1.477691955975594e-05, "loss": 1.079, "step": 6299 }, { "epoch": 0.36, "grad_norm": 1.8045324087142944, "learning_rate": 1.4775287514698105e-05, "loss": 0.971, "step": 6300 }, { "epoch": 0.36, "grad_norm": 1.139028787612915, "learning_rate": 1.4773655304858434e-05, "loss": 0.6167, "step": 6301 }, { "epoch": 0.36, "grad_norm": 1.7120414972305298, "learning_rate": 1.4772022930293256e-05, "loss": 1.0214, "step": 6302 }, { "epoch": 0.36, "grad_norm": 1.8945528268814087, "learning_rate": 1.4770390391058894e-05, "loss": 1.0712, "step": 6303 }, { "epoch": 0.36, "grad_norm": 1.6438384056091309, "learning_rate": 1.4768757687211685e-05, "loss": 0.9501, "step": 6304 }, { "epoch": 0.36, "grad_norm": 1.6727845668792725, "learning_rate": 1.476712481880797e-05, "loss": 0.9553, "step": 6305 }, { "epoch": 0.36, "grad_norm": 1.8367928266525269, "learning_rate": 1.4765491785904094e-05, "loss": 0.9926, "step": 6306 }, { "epoch": 0.36, "grad_norm": 1.7160555124282837, "learning_rate": 1.476385858855641e-05, "loss": 1.0785, "step": 6307 }, { "epoch": 0.36, "grad_norm": 1.7079044580459595, "learning_rate": 1.4762225226821272e-05, "loss": 1.105, "step": 6308 }, { "epoch": 0.36, "grad_norm": 1.6129440069198608, "learning_rate": 1.4760591700755042e-05, "loss": 0.9922, "step": 6309 }, { "epoch": 0.36, "grad_norm": 1.846893310546875, "learning_rate": 1.4758958010414094e-05, "loss": 0.9897, "step": 6310 }, { "epoch": 0.36, "grad_norm": 1.9567654132843018, "learning_rate": 1.4757324155854798e-05, "loss": 1.0259, "step": 6311 }, { "epoch": 0.36, "grad_norm": 1.7855849266052246, "learning_rate": 1.4755690137133534e-05, "loss": 0.9321, "step": 6312 }, { "epoch": 0.36, "grad_norm": 1.7195664644241333, "learning_rate": 1.4754055954306687e-05, "loss": 0.9601, "step": 6313 }, { "epoch": 0.36, "grad_norm": 1.9063020944595337, "learning_rate": 1.4752421607430649e-05, "loss": 0.9526, "step": 6314 }, { "epoch": 0.36, "grad_norm": 1.7135204076766968, "learning_rate": 1.4750787096561818e-05, "loss": 0.9973, "step": 6315 }, { "epoch": 0.36, "grad_norm": 1.8679938316345215, "learning_rate": 1.4749152421756596e-05, "loss": 1.0395, "step": 6316 }, { "epoch": 0.36, "grad_norm": 1.8654956817626953, "learning_rate": 1.4747517583071386e-05, "loss": 1.0344, "step": 6317 }, { "epoch": 0.36, "grad_norm": 1.706494927406311, "learning_rate": 1.4745882580562609e-05, "loss": 0.9955, "step": 6318 }, { "epoch": 0.36, "grad_norm": 1.710939645767212, "learning_rate": 1.4744247414286681e-05, "loss": 1.0618, "step": 6319 }, { "epoch": 0.36, "grad_norm": 1.870609998703003, "learning_rate": 1.4742612084300025e-05, "loss": 0.9628, "step": 6320 }, { "epoch": 0.36, "grad_norm": 1.637471079826355, "learning_rate": 1.4740976590659075e-05, "loss": 1.0338, "step": 6321 }, { "epoch": 0.36, "grad_norm": 1.7514467239379883, "learning_rate": 1.4739340933420268e-05, "loss": 0.9147, "step": 6322 }, { "epoch": 0.36, "grad_norm": 1.685638666152954, "learning_rate": 1.4737705112640044e-05, "loss": 1.1065, "step": 6323 }, { "epoch": 0.36, "grad_norm": 1.6635195016860962, "learning_rate": 1.4736069128374851e-05, "loss": 0.9881, "step": 6324 }, { "epoch": 0.36, "grad_norm": 1.1888723373413086, "learning_rate": 1.473443298068114e-05, "loss": 0.6156, "step": 6325 }, { "epoch": 0.36, "grad_norm": 1.7495137453079224, "learning_rate": 1.4732796669615372e-05, "loss": 0.8819, "step": 6326 }, { "epoch": 0.36, "grad_norm": 1.8505098819732666, "learning_rate": 1.4731160195234013e-05, "loss": 1.0184, "step": 6327 }, { "epoch": 0.36, "grad_norm": 1.7478581666946411, "learning_rate": 1.4729523557593532e-05, "loss": 1.0741, "step": 6328 }, { "epoch": 0.36, "grad_norm": 1.7161797285079956, "learning_rate": 1.4727886756750404e-05, "loss": 0.9647, "step": 6329 }, { "epoch": 0.36, "grad_norm": 1.9601303339004517, "learning_rate": 1.472624979276111e-05, "loss": 1.0916, "step": 6330 }, { "epoch": 0.36, "grad_norm": 1.6027662754058838, "learning_rate": 1.4724612665682139e-05, "loss": 0.9793, "step": 6331 }, { "epoch": 0.36, "grad_norm": 1.978075385093689, "learning_rate": 1.4722975375569978e-05, "loss": 0.9719, "step": 6332 }, { "epoch": 0.36, "grad_norm": 1.8380613327026367, "learning_rate": 1.4721337922481135e-05, "loss": 0.9598, "step": 6333 }, { "epoch": 0.36, "grad_norm": 1.1491286754608154, "learning_rate": 1.4719700306472108e-05, "loss": 0.6358, "step": 6334 }, { "epoch": 0.36, "grad_norm": 1.7171493768692017, "learning_rate": 1.4718062527599408e-05, "loss": 0.9946, "step": 6335 }, { "epoch": 0.36, "grad_norm": 2.051431894302368, "learning_rate": 1.4716424585919548e-05, "loss": 1.1288, "step": 6336 }, { "epoch": 0.36, "grad_norm": 1.7927138805389404, "learning_rate": 1.4714786481489052e-05, "loss": 0.9239, "step": 6337 }, { "epoch": 0.36, "grad_norm": 1.7960854768753052, "learning_rate": 1.4713148214364443e-05, "loss": 0.9576, "step": 6338 }, { "epoch": 0.36, "grad_norm": 1.7240616083145142, "learning_rate": 1.4711509784602256e-05, "loss": 1.0141, "step": 6339 }, { "epoch": 0.36, "grad_norm": 1.897529125213623, "learning_rate": 1.4709871192259027e-05, "loss": 1.0801, "step": 6340 }, { "epoch": 0.36, "grad_norm": 1.0229244232177734, "learning_rate": 1.4708232437391299e-05, "loss": 0.6452, "step": 6341 }, { "epoch": 0.36, "grad_norm": 1.8683282136917114, "learning_rate": 1.4706593520055624e-05, "loss": 1.0475, "step": 6342 }, { "epoch": 0.36, "grad_norm": 1.7752506732940674, "learning_rate": 1.470495444030855e-05, "loss": 0.9797, "step": 6343 }, { "epoch": 0.36, "grad_norm": 1.7672481536865234, "learning_rate": 1.4703315198206643e-05, "loss": 1.0958, "step": 6344 }, { "epoch": 0.36, "grad_norm": 1.7327262163162231, "learning_rate": 1.4701675793806464e-05, "loss": 1.0391, "step": 6345 }, { "epoch": 0.36, "grad_norm": 1.6891984939575195, "learning_rate": 1.4700036227164592e-05, "loss": 0.9485, "step": 6346 }, { "epoch": 0.36, "grad_norm": 1.9141579866409302, "learning_rate": 1.4698396498337595e-05, "loss": 1.1333, "step": 6347 }, { "epoch": 0.36, "grad_norm": 1.8592197895050049, "learning_rate": 1.469675660738206e-05, "loss": 0.9578, "step": 6348 }, { "epoch": 0.36, "grad_norm": 2.065803289413452, "learning_rate": 1.4695116554354576e-05, "loss": 1.0292, "step": 6349 }, { "epoch": 0.36, "grad_norm": 1.8045185804367065, "learning_rate": 1.4693476339311734e-05, "loss": 0.9475, "step": 6350 }, { "epoch": 0.36, "grad_norm": 1.7326642274856567, "learning_rate": 1.4691835962310135e-05, "loss": 1.0681, "step": 6351 }, { "epoch": 0.36, "grad_norm": 1.9410340785980225, "learning_rate": 1.4690195423406381e-05, "loss": 1.0174, "step": 6352 }, { "epoch": 0.36, "grad_norm": 1.0535422563552856, "learning_rate": 1.4688554722657087e-05, "loss": 0.6152, "step": 6353 }, { "epoch": 0.36, "grad_norm": 1.753043532371521, "learning_rate": 1.4686913860118865e-05, "loss": 1.0644, "step": 6354 }, { "epoch": 0.36, "grad_norm": 2.030503511428833, "learning_rate": 1.4685272835848336e-05, "loss": 0.948, "step": 6355 }, { "epoch": 0.36, "grad_norm": 2.0237739086151123, "learning_rate": 1.4683631649902132e-05, "loss": 1.0836, "step": 6356 }, { "epoch": 0.36, "grad_norm": 1.687549114227295, "learning_rate": 1.4681990302336884e-05, "loss": 0.9991, "step": 6357 }, { "epoch": 0.36, "grad_norm": 1.7775124311447144, "learning_rate": 1.4680348793209227e-05, "loss": 0.9783, "step": 6358 }, { "epoch": 0.36, "grad_norm": 1.7740108966827393, "learning_rate": 1.4678707122575806e-05, "loss": 0.9924, "step": 6359 }, { "epoch": 0.36, "grad_norm": 1.9009690284729004, "learning_rate": 1.4677065290493273e-05, "loss": 0.9882, "step": 6360 }, { "epoch": 0.36, "grad_norm": 1.6432981491088867, "learning_rate": 1.4675423297018283e-05, "loss": 0.9709, "step": 6361 }, { "epoch": 0.36, "grad_norm": 1.709234356880188, "learning_rate": 1.4673781142207496e-05, "loss": 0.9513, "step": 6362 }, { "epoch": 0.36, "grad_norm": 1.6530418395996094, "learning_rate": 1.4672138826117576e-05, "loss": 0.9322, "step": 6363 }, { "epoch": 0.36, "grad_norm": 1.7418795824050903, "learning_rate": 1.4670496348805197e-05, "loss": 0.9852, "step": 6364 }, { "epoch": 0.37, "grad_norm": 1.8017892837524414, "learning_rate": 1.4668853710327033e-05, "loss": 1.0185, "step": 6365 }, { "epoch": 0.37, "grad_norm": 2.0400454998016357, "learning_rate": 1.466721091073977e-05, "loss": 0.9944, "step": 6366 }, { "epoch": 0.37, "grad_norm": 1.8270628452301025, "learning_rate": 1.46655679501001e-05, "loss": 1.0031, "step": 6367 }, { "epoch": 0.37, "grad_norm": 0.9862697720527649, "learning_rate": 1.4663924828464709e-05, "loss": 0.5768, "step": 6368 }, { "epoch": 0.37, "grad_norm": 1.9824302196502686, "learning_rate": 1.46622815458903e-05, "loss": 1.108, "step": 6369 }, { "epoch": 0.37, "grad_norm": 1.803748607635498, "learning_rate": 1.466063810243358e-05, "loss": 1.0048, "step": 6370 }, { "epoch": 0.37, "grad_norm": 1.897152066230774, "learning_rate": 1.4658994498151255e-05, "loss": 1.081, "step": 6371 }, { "epoch": 0.37, "grad_norm": 1.8599333763122559, "learning_rate": 1.465735073310005e-05, "loss": 1.0439, "step": 6372 }, { "epoch": 0.37, "grad_norm": 1.7976322174072266, "learning_rate": 1.4655706807336676e-05, "loss": 0.9446, "step": 6373 }, { "epoch": 0.37, "grad_norm": 1.88971745967865, "learning_rate": 1.4654062720917868e-05, "loss": 1.0468, "step": 6374 }, { "epoch": 0.37, "grad_norm": 1.8467110395431519, "learning_rate": 1.4652418473900355e-05, "loss": 1.0239, "step": 6375 }, { "epoch": 0.37, "grad_norm": 1.7547907829284668, "learning_rate": 1.4650774066340877e-05, "loss": 0.9928, "step": 6376 }, { "epoch": 0.37, "grad_norm": 1.7801387310028076, "learning_rate": 1.4649129498296175e-05, "loss": 0.9929, "step": 6377 }, { "epoch": 0.37, "grad_norm": 2.1547889709472656, "learning_rate": 1.4647484769823004e-05, "loss": 1.089, "step": 6378 }, { "epoch": 0.37, "grad_norm": 1.9286860227584839, "learning_rate": 1.4645839880978114e-05, "loss": 0.9628, "step": 6379 }, { "epoch": 0.37, "grad_norm": 1.6162844896316528, "learning_rate": 1.4644194831818268e-05, "loss": 0.9821, "step": 6380 }, { "epoch": 0.37, "grad_norm": 1.7161037921905518, "learning_rate": 1.4642549622400233e-05, "loss": 0.9686, "step": 6381 }, { "epoch": 0.37, "grad_norm": 1.887052059173584, "learning_rate": 1.4640904252780776e-05, "loss": 0.9602, "step": 6382 }, { "epoch": 0.37, "grad_norm": 1.7911127805709839, "learning_rate": 1.4639258723016676e-05, "loss": 1.0207, "step": 6383 }, { "epoch": 0.37, "grad_norm": 1.834877610206604, "learning_rate": 1.4637613033164719e-05, "loss": 1.0345, "step": 6384 }, { "epoch": 0.37, "grad_norm": 1.651520848274231, "learning_rate": 1.4635967183281692e-05, "loss": 0.9821, "step": 6385 }, { "epoch": 0.37, "grad_norm": 1.8115218877792358, "learning_rate": 1.4634321173424386e-05, "loss": 1.0714, "step": 6386 }, { "epoch": 0.37, "grad_norm": 2.091355562210083, "learning_rate": 1.4632675003649604e-05, "loss": 1.0827, "step": 6387 }, { "epoch": 0.37, "grad_norm": 2.0397772789001465, "learning_rate": 1.4631028674014143e-05, "loss": 1.1114, "step": 6388 }, { "epoch": 0.37, "grad_norm": 1.6521655321121216, "learning_rate": 1.4629382184574823e-05, "loss": 0.9815, "step": 6389 }, { "epoch": 0.37, "grad_norm": 1.8779476881027222, "learning_rate": 1.4627735535388455e-05, "loss": 1.0096, "step": 6390 }, { "epoch": 0.37, "grad_norm": 1.9257357120513916, "learning_rate": 1.462608872651186e-05, "loss": 1.0815, "step": 6391 }, { "epoch": 0.37, "grad_norm": 1.8574851751327515, "learning_rate": 1.4624441758001865e-05, "loss": 0.9644, "step": 6392 }, { "epoch": 0.37, "grad_norm": 1.8194639682769775, "learning_rate": 1.4622794629915306e-05, "loss": 0.9431, "step": 6393 }, { "epoch": 0.37, "grad_norm": 1.8634132146835327, "learning_rate": 1.4621147342309016e-05, "loss": 1.0495, "step": 6394 }, { "epoch": 0.37, "grad_norm": 1.9924507141113281, "learning_rate": 1.4619499895239839e-05, "loss": 0.9619, "step": 6395 }, { "epoch": 0.37, "grad_norm": 1.1242406368255615, "learning_rate": 1.4617852288764624e-05, "loss": 0.6205, "step": 6396 }, { "epoch": 0.37, "grad_norm": 1.799599051475525, "learning_rate": 1.4616204522940227e-05, "loss": 1.0294, "step": 6397 }, { "epoch": 0.37, "grad_norm": 1.718880534172058, "learning_rate": 1.4614556597823506e-05, "loss": 0.9918, "step": 6398 }, { "epoch": 0.37, "grad_norm": 1.833927869796753, "learning_rate": 1.4612908513471329e-05, "loss": 0.9039, "step": 6399 }, { "epoch": 0.37, "grad_norm": 1.8740451335906982, "learning_rate": 1.4611260269940563e-05, "loss": 1.0074, "step": 6400 }, { "epoch": 0.37, "grad_norm": 1.761946678161621, "learning_rate": 1.4609611867288087e-05, "loss": 0.9872, "step": 6401 }, { "epoch": 0.37, "grad_norm": 1.6574726104736328, "learning_rate": 1.4607963305570783e-05, "loss": 1.0523, "step": 6402 }, { "epoch": 0.37, "grad_norm": 1.8972047567367554, "learning_rate": 1.4606314584845536e-05, "loss": 0.952, "step": 6403 }, { "epoch": 0.37, "grad_norm": 1.1345824003219604, "learning_rate": 1.4604665705169239e-05, "loss": 0.64, "step": 6404 }, { "epoch": 0.37, "grad_norm": 1.8390077352523804, "learning_rate": 1.4603016666598793e-05, "loss": 1.0317, "step": 6405 }, { "epoch": 0.37, "grad_norm": 1.8301200866699219, "learning_rate": 1.4601367469191098e-05, "loss": 1.07, "step": 6406 }, { "epoch": 0.37, "grad_norm": 2.0173637866973877, "learning_rate": 1.4599718113003065e-05, "loss": 1.0378, "step": 6407 }, { "epoch": 0.37, "grad_norm": 1.7590110301971436, "learning_rate": 1.459806859809161e-05, "loss": 1.004, "step": 6408 }, { "epoch": 0.37, "grad_norm": 1.4076590538024902, "learning_rate": 1.4596418924513652e-05, "loss": 0.608, "step": 6409 }, { "epoch": 0.37, "grad_norm": 1.7717429399490356, "learning_rate": 1.4594769092326113e-05, "loss": 1.0398, "step": 6410 }, { "epoch": 0.37, "grad_norm": 1.9422847032546997, "learning_rate": 1.4593119101585931e-05, "loss": 0.9805, "step": 6411 }, { "epoch": 0.37, "grad_norm": 1.9014897346496582, "learning_rate": 1.4591468952350039e-05, "loss": 1.0414, "step": 6412 }, { "epoch": 0.37, "grad_norm": 1.776681661605835, "learning_rate": 1.4589818644675378e-05, "loss": 1.0561, "step": 6413 }, { "epoch": 0.37, "grad_norm": 1.7230682373046875, "learning_rate": 1.4588168178618897e-05, "loss": 1.0137, "step": 6414 }, { "epoch": 0.37, "grad_norm": 1.0297831296920776, "learning_rate": 1.4586517554237549e-05, "loss": 0.6216, "step": 6415 }, { "epoch": 0.37, "grad_norm": 2.0024008750915527, "learning_rate": 1.4584866771588294e-05, "loss": 0.9889, "step": 6416 }, { "epoch": 0.37, "grad_norm": 1.6077396869659424, "learning_rate": 1.4583215830728092e-05, "loss": 1.0507, "step": 6417 }, { "epoch": 0.37, "grad_norm": 1.6634632349014282, "learning_rate": 1.4581564731713915e-05, "loss": 1.0837, "step": 6418 }, { "epoch": 0.37, "grad_norm": 1.8909369707107544, "learning_rate": 1.4579913474602738e-05, "loss": 1.0191, "step": 6419 }, { "epoch": 0.37, "grad_norm": 1.847243070602417, "learning_rate": 1.4578262059451538e-05, "loss": 1.0072, "step": 6420 }, { "epoch": 0.37, "grad_norm": 1.7284141778945923, "learning_rate": 1.4576610486317302e-05, "loss": 1.0019, "step": 6421 }, { "epoch": 0.37, "grad_norm": 1.6591328382492065, "learning_rate": 1.4574958755257024e-05, "loss": 0.9397, "step": 6422 }, { "epoch": 0.37, "grad_norm": 1.8538094758987427, "learning_rate": 1.4573306866327702e-05, "loss": 1.0108, "step": 6423 }, { "epoch": 0.37, "grad_norm": 1.6468374729156494, "learning_rate": 1.4571654819586334e-05, "loss": 1.0046, "step": 6424 }, { "epoch": 0.37, "grad_norm": 1.8968156576156616, "learning_rate": 1.4570002615089924e-05, "loss": 1.0159, "step": 6425 }, { "epoch": 0.37, "grad_norm": 1.8063596487045288, "learning_rate": 1.4568350252895494e-05, "loss": 1.0217, "step": 6426 }, { "epoch": 0.37, "grad_norm": 2.019664764404297, "learning_rate": 1.4566697733060057e-05, "loss": 0.9998, "step": 6427 }, { "epoch": 0.37, "grad_norm": 1.7261220216751099, "learning_rate": 1.4565045055640639e-05, "loss": 0.9801, "step": 6428 }, { "epoch": 0.37, "grad_norm": 1.1193735599517822, "learning_rate": 1.4563392220694265e-05, "loss": 0.5985, "step": 6429 }, { "epoch": 0.37, "grad_norm": 1.9723857641220093, "learning_rate": 1.4561739228277976e-05, "loss": 0.9833, "step": 6430 }, { "epoch": 0.37, "grad_norm": 1.8732644319534302, "learning_rate": 1.4560086078448807e-05, "loss": 1.0046, "step": 6431 }, { "epoch": 0.37, "grad_norm": 1.885548710823059, "learning_rate": 1.4558432771263806e-05, "loss": 1.0395, "step": 6432 }, { "epoch": 0.37, "grad_norm": 1.9934170246124268, "learning_rate": 1.4556779306780024e-05, "loss": 1.0837, "step": 6433 }, { "epoch": 0.37, "grad_norm": 1.682637333869934, "learning_rate": 1.4555125685054519e-05, "loss": 0.9274, "step": 6434 }, { "epoch": 0.37, "grad_norm": 1.8780604600906372, "learning_rate": 1.4553471906144347e-05, "loss": 1.0211, "step": 6435 }, { "epoch": 0.37, "grad_norm": 1.812703251838684, "learning_rate": 1.455181797010658e-05, "loss": 0.9943, "step": 6436 }, { "epoch": 0.37, "grad_norm": 1.7835032939910889, "learning_rate": 1.4550163876998288e-05, "loss": 1.0734, "step": 6437 }, { "epoch": 0.37, "grad_norm": 1.8049976825714111, "learning_rate": 1.4548509626876554e-05, "loss": 1.0012, "step": 6438 }, { "epoch": 0.37, "grad_norm": 1.6568845510482788, "learning_rate": 1.454685521979846e-05, "loss": 1.0279, "step": 6439 }, { "epoch": 0.37, "grad_norm": 1.812314510345459, "learning_rate": 1.454520065582109e-05, "loss": 0.9825, "step": 6440 }, { "epoch": 0.37, "grad_norm": 1.9423649311065674, "learning_rate": 1.4543545935001544e-05, "loss": 1.0251, "step": 6441 }, { "epoch": 0.37, "grad_norm": 1.859851360321045, "learning_rate": 1.4541891057396917e-05, "loss": 1.012, "step": 6442 }, { "epoch": 0.37, "grad_norm": 1.8464343547821045, "learning_rate": 1.454023602306432e-05, "loss": 0.9928, "step": 6443 }, { "epoch": 0.37, "grad_norm": 1.9029483795166016, "learning_rate": 1.4538580832060861e-05, "loss": 1.025, "step": 6444 }, { "epoch": 0.37, "grad_norm": 1.5566385984420776, "learning_rate": 1.4536925484443653e-05, "loss": 0.9894, "step": 6445 }, { "epoch": 0.37, "grad_norm": 1.5982820987701416, "learning_rate": 1.4535269980269822e-05, "loss": 0.9552, "step": 6446 }, { "epoch": 0.37, "grad_norm": 1.888586401939392, "learning_rate": 1.4533614319596489e-05, "loss": 1.0156, "step": 6447 }, { "epoch": 0.37, "grad_norm": 1.7846451997756958, "learning_rate": 1.4531958502480794e-05, "loss": 0.8955, "step": 6448 }, { "epoch": 0.37, "grad_norm": 1.7921074628829956, "learning_rate": 1.4530302528979868e-05, "loss": 1.0495, "step": 6449 }, { "epoch": 0.37, "grad_norm": 1.711888313293457, "learning_rate": 1.4528646399150857e-05, "loss": 0.9183, "step": 6450 }, { "epoch": 0.37, "grad_norm": 1.621599793434143, "learning_rate": 1.452699011305091e-05, "loss": 1.0458, "step": 6451 }, { "epoch": 0.37, "grad_norm": 1.770071029663086, "learning_rate": 1.4525333670737181e-05, "loss": 1.0048, "step": 6452 }, { "epoch": 0.37, "grad_norm": 1.7624177932739258, "learning_rate": 1.4523677072266825e-05, "loss": 0.956, "step": 6453 }, { "epoch": 0.37, "grad_norm": 1.8492014408111572, "learning_rate": 1.452202031769701e-05, "loss": 1.0619, "step": 6454 }, { "epoch": 0.37, "grad_norm": 1.7370258569717407, "learning_rate": 1.4520363407084905e-05, "loss": 1.0078, "step": 6455 }, { "epoch": 0.37, "grad_norm": 1.7034357786178589, "learning_rate": 1.4518706340487689e-05, "loss": 0.9863, "step": 6456 }, { "epoch": 0.37, "grad_norm": 1.729231834411621, "learning_rate": 1.4517049117962539e-05, "loss": 1.039, "step": 6457 }, { "epoch": 0.37, "grad_norm": 1.7120835781097412, "learning_rate": 1.4515391739566642e-05, "loss": 0.9876, "step": 6458 }, { "epoch": 0.37, "grad_norm": 1.7186849117279053, "learning_rate": 1.4513734205357186e-05, "loss": 0.9244, "step": 6459 }, { "epoch": 0.37, "grad_norm": 1.9158945083618164, "learning_rate": 1.4512076515391375e-05, "loss": 0.9648, "step": 6460 }, { "epoch": 0.37, "grad_norm": 2.0482406616210938, "learning_rate": 1.4510418669726407e-05, "loss": 0.946, "step": 6461 }, { "epoch": 0.37, "grad_norm": 1.9669641256332397, "learning_rate": 1.4508760668419489e-05, "loss": 0.9763, "step": 6462 }, { "epoch": 0.37, "grad_norm": 1.8329441547393799, "learning_rate": 1.4507102511527834e-05, "loss": 1.0406, "step": 6463 }, { "epoch": 0.37, "grad_norm": 1.0751601457595825, "learning_rate": 1.4505444199108662e-05, "loss": 0.6183, "step": 6464 }, { "epoch": 0.37, "grad_norm": 1.8225510120391846, "learning_rate": 1.4503785731219195e-05, "loss": 0.9144, "step": 6465 }, { "epoch": 0.37, "grad_norm": 1.6742932796478271, "learning_rate": 1.4502127107916666e-05, "loss": 0.973, "step": 6466 }, { "epoch": 0.37, "grad_norm": 1.7082477807998657, "learning_rate": 1.4500468329258305e-05, "loss": 1.0539, "step": 6467 }, { "epoch": 0.37, "grad_norm": 1.8570054769515991, "learning_rate": 1.4498809395301356e-05, "loss": 1.0599, "step": 6468 }, { "epoch": 0.37, "grad_norm": 1.7183752059936523, "learning_rate": 1.4497150306103061e-05, "loss": 0.9962, "step": 6469 }, { "epoch": 0.37, "grad_norm": 1.8262039422988892, "learning_rate": 1.4495491061720671e-05, "loss": 0.9805, "step": 6470 }, { "epoch": 0.37, "grad_norm": 1.7520779371261597, "learning_rate": 1.4493831662211439e-05, "loss": 0.9918, "step": 6471 }, { "epoch": 0.37, "grad_norm": 1.983081579208374, "learning_rate": 1.449217210763263e-05, "loss": 1.0585, "step": 6472 }, { "epoch": 0.37, "grad_norm": 1.738904356956482, "learning_rate": 1.4490512398041515e-05, "loss": 0.9685, "step": 6473 }, { "epoch": 0.37, "grad_norm": 1.8170249462127686, "learning_rate": 1.4488852533495357e-05, "loss": 0.9681, "step": 6474 }, { "epoch": 0.37, "grad_norm": 1.8998768329620361, "learning_rate": 1.4487192514051437e-05, "loss": 1.015, "step": 6475 }, { "epoch": 0.37, "grad_norm": 1.7171269655227661, "learning_rate": 1.4485532339767036e-05, "loss": 0.9578, "step": 6476 }, { "epoch": 0.37, "grad_norm": 1.7854276895523071, "learning_rate": 1.4483872010699446e-05, "loss": 1.0138, "step": 6477 }, { "epoch": 0.37, "grad_norm": 1.6501131057739258, "learning_rate": 1.448221152690596e-05, "loss": 1.0863, "step": 6478 }, { "epoch": 0.37, "grad_norm": 1.7809715270996094, "learning_rate": 1.4480550888443871e-05, "loss": 0.9552, "step": 6479 }, { "epoch": 0.37, "grad_norm": 1.7217483520507812, "learning_rate": 1.4478890095370491e-05, "loss": 0.9768, "step": 6480 }, { "epoch": 0.37, "grad_norm": 1.8227529525756836, "learning_rate": 1.447722914774312e-05, "loss": 0.9444, "step": 6481 }, { "epoch": 0.37, "grad_norm": 1.9200953245162964, "learning_rate": 1.4475568045619084e-05, "loss": 1.1285, "step": 6482 }, { "epoch": 0.37, "grad_norm": 1.7601783275604248, "learning_rate": 1.4473906789055692e-05, "loss": 0.9523, "step": 6483 }, { "epoch": 0.37, "grad_norm": 1.842558741569519, "learning_rate": 1.4472245378110276e-05, "loss": 0.942, "step": 6484 }, { "epoch": 0.37, "grad_norm": 1.9267534017562866, "learning_rate": 1.4470583812840164e-05, "loss": 0.9987, "step": 6485 }, { "epoch": 0.37, "grad_norm": 2.05393123626709, "learning_rate": 1.4468922093302693e-05, "loss": 1.0468, "step": 6486 }, { "epoch": 0.37, "grad_norm": 1.6043117046356201, "learning_rate": 1.44672602195552e-05, "loss": 0.954, "step": 6487 }, { "epoch": 0.37, "grad_norm": 1.1881109476089478, "learning_rate": 1.4465598191655042e-05, "loss": 0.6342, "step": 6488 }, { "epoch": 0.37, "grad_norm": 1.7878910303115845, "learning_rate": 1.4463936009659563e-05, "loss": 1.0544, "step": 6489 }, { "epoch": 0.37, "grad_norm": 1.8502981662750244, "learning_rate": 1.446227367362612e-05, "loss": 0.9376, "step": 6490 }, { "epoch": 0.37, "grad_norm": 1.0396090745925903, "learning_rate": 1.4460611183612074e-05, "loss": 0.526, "step": 6491 }, { "epoch": 0.37, "grad_norm": 1.8148415088653564, "learning_rate": 1.4458948539674802e-05, "loss": 0.9569, "step": 6492 }, { "epoch": 0.37, "grad_norm": 1.768983244895935, "learning_rate": 1.445728574187167e-05, "loss": 0.9423, "step": 6493 }, { "epoch": 0.37, "grad_norm": 1.862540364265442, "learning_rate": 1.4455622790260057e-05, "loss": 1.0599, "step": 6494 }, { "epoch": 0.37, "grad_norm": 1.8864184617996216, "learning_rate": 1.445395968489735e-05, "loss": 1.0239, "step": 6495 }, { "epoch": 0.37, "grad_norm": 1.8307377099990845, "learning_rate": 1.4452296425840935e-05, "loss": 1.0407, "step": 6496 }, { "epoch": 0.37, "grad_norm": 1.954124093055725, "learning_rate": 1.4450633013148205e-05, "loss": 0.963, "step": 6497 }, { "epoch": 0.37, "grad_norm": 1.7054812908172607, "learning_rate": 1.4448969446876567e-05, "loss": 0.9572, "step": 6498 }, { "epoch": 0.37, "grad_norm": 1.7556042671203613, "learning_rate": 1.4447305727083416e-05, "loss": 1.0189, "step": 6499 }, { "epoch": 0.37, "grad_norm": 1.8345447778701782, "learning_rate": 1.4445641853826172e-05, "loss": 0.9704, "step": 6500 }, { "epoch": 0.37, "grad_norm": 2.005666732788086, "learning_rate": 1.4443977827162242e-05, "loss": 1.0405, "step": 6501 }, { "epoch": 0.37, "grad_norm": 2.002800941467285, "learning_rate": 1.4442313647149053e-05, "loss": 0.952, "step": 6502 }, { "epoch": 0.37, "grad_norm": 1.8509241342544556, "learning_rate": 1.4440649313844026e-05, "loss": 1.0527, "step": 6503 }, { "epoch": 0.37, "grad_norm": 1.7495306730270386, "learning_rate": 1.44389848273046e-05, "loss": 0.9529, "step": 6504 }, { "epoch": 0.37, "grad_norm": 1.722464919090271, "learning_rate": 1.4437320187588204e-05, "loss": 0.9982, "step": 6505 }, { "epoch": 0.37, "grad_norm": 1.8324607610702515, "learning_rate": 1.4435655394752287e-05, "loss": 0.9718, "step": 6506 }, { "epoch": 0.37, "grad_norm": 1.6986242532730103, "learning_rate": 1.4433990448854289e-05, "loss": 0.9991, "step": 6507 }, { "epoch": 0.37, "grad_norm": 1.8668314218521118, "learning_rate": 1.4432325349951668e-05, "loss": 0.9255, "step": 6508 }, { "epoch": 0.37, "grad_norm": 1.831986665725708, "learning_rate": 1.443066009810188e-05, "loss": 1.0221, "step": 6509 }, { "epoch": 0.37, "grad_norm": 2.221524715423584, "learning_rate": 1.442899469336239e-05, "loss": 1.0437, "step": 6510 }, { "epoch": 0.37, "grad_norm": 1.8463190793991089, "learning_rate": 1.4427329135790667e-05, "loss": 1.0553, "step": 6511 }, { "epoch": 0.37, "grad_norm": 1.7398473024368286, "learning_rate": 1.4425663425444179e-05, "loss": 1.0165, "step": 6512 }, { "epoch": 0.37, "grad_norm": 1.7463241815567017, "learning_rate": 1.442399756238041e-05, "loss": 1.0556, "step": 6513 }, { "epoch": 0.37, "grad_norm": 1.7856740951538086, "learning_rate": 1.4422331546656842e-05, "loss": 1.0234, "step": 6514 }, { "epoch": 0.37, "grad_norm": 1.804658055305481, "learning_rate": 1.4420665378330964e-05, "loss": 0.9906, "step": 6515 }, { "epoch": 0.37, "grad_norm": 1.9732418060302734, "learning_rate": 1.4418999057460277e-05, "loss": 1.0376, "step": 6516 }, { "epoch": 0.37, "grad_norm": 1.8557568788528442, "learning_rate": 1.4417332584102273e-05, "loss": 1.0359, "step": 6517 }, { "epoch": 0.37, "grad_norm": 2.0801892280578613, "learning_rate": 1.4415665958314465e-05, "loss": 1.0006, "step": 6518 }, { "epoch": 0.37, "grad_norm": 1.638874888420105, "learning_rate": 1.4413999180154355e-05, "loss": 1.0863, "step": 6519 }, { "epoch": 0.37, "grad_norm": 1.7250996828079224, "learning_rate": 1.4412332249679463e-05, "loss": 1.0019, "step": 6520 }, { "epoch": 0.37, "grad_norm": 1.8014070987701416, "learning_rate": 1.4410665166947312e-05, "loss": 1.0091, "step": 6521 }, { "epoch": 0.37, "grad_norm": 2.0862135887145996, "learning_rate": 1.4408997932015428e-05, "loss": 1.0418, "step": 6522 }, { "epoch": 0.37, "grad_norm": 1.7028090953826904, "learning_rate": 1.440733054494134e-05, "loss": 0.98, "step": 6523 }, { "epoch": 0.37, "grad_norm": 1.893875002861023, "learning_rate": 1.440566300578259e-05, "loss": 1.0148, "step": 6524 }, { "epoch": 0.37, "grad_norm": 1.834351658821106, "learning_rate": 1.4403995314596711e-05, "loss": 1.0467, "step": 6525 }, { "epoch": 0.37, "grad_norm": 1.9340988397598267, "learning_rate": 1.4402327471441257e-05, "loss": 1.0258, "step": 6526 }, { "epoch": 0.37, "grad_norm": 1.6812983751296997, "learning_rate": 1.4400659476373781e-05, "loss": 0.9937, "step": 6527 }, { "epoch": 0.37, "grad_norm": 2.060377359390259, "learning_rate": 1.439899132945184e-05, "loss": 1.0227, "step": 6528 }, { "epoch": 0.37, "grad_norm": 1.7195736169815063, "learning_rate": 1.4397323030732994e-05, "loss": 1.0075, "step": 6529 }, { "epoch": 0.37, "grad_norm": 1.198493242263794, "learning_rate": 1.4395654580274814e-05, "loss": 0.5961, "step": 6530 }, { "epoch": 0.37, "grad_norm": 1.8344513177871704, "learning_rate": 1.4393985978134874e-05, "loss": 1.1127, "step": 6531 }, { "epoch": 0.37, "grad_norm": 1.5952489376068115, "learning_rate": 1.4392317224370751e-05, "loss": 0.9578, "step": 6532 }, { "epoch": 0.37, "grad_norm": 1.8457087278366089, "learning_rate": 1.4390648319040032e-05, "loss": 1.0494, "step": 6533 }, { "epoch": 0.37, "grad_norm": 1.8811098337173462, "learning_rate": 1.4388979262200302e-05, "loss": 1.0539, "step": 6534 }, { "epoch": 0.37, "grad_norm": 1.7097623348236084, "learning_rate": 1.438731005390916e-05, "loss": 0.9877, "step": 6535 }, { "epoch": 0.37, "grad_norm": 1.8394737243652344, "learning_rate": 1.4385640694224203e-05, "loss": 1.0304, "step": 6536 }, { "epoch": 0.37, "grad_norm": 1.7272309064865112, "learning_rate": 1.4383971183203036e-05, "loss": 1.0012, "step": 6537 }, { "epoch": 0.37, "grad_norm": 2.0144572257995605, "learning_rate": 1.4382301520903267e-05, "loss": 0.9956, "step": 6538 }, { "epoch": 0.38, "grad_norm": 1.7602953910827637, "learning_rate": 1.4380631707382517e-05, "loss": 0.9671, "step": 6539 }, { "epoch": 0.38, "grad_norm": 1.8607141971588135, "learning_rate": 1.4378961742698403e-05, "loss": 1.0021, "step": 6540 }, { "epoch": 0.38, "grad_norm": 1.8680787086486816, "learning_rate": 1.4377291626908552e-05, "loss": 0.9664, "step": 6541 }, { "epoch": 0.38, "grad_norm": 1.6423228979110718, "learning_rate": 1.4375621360070593e-05, "loss": 0.9743, "step": 6542 }, { "epoch": 0.38, "grad_norm": 1.8883764743804932, "learning_rate": 1.4373950942242166e-05, "loss": 0.9784, "step": 6543 }, { "epoch": 0.38, "grad_norm": 1.9475061893463135, "learning_rate": 1.4372280373480907e-05, "loss": 1.0052, "step": 6544 }, { "epoch": 0.38, "grad_norm": 1.9167377948760986, "learning_rate": 1.4370609653844467e-05, "loss": 0.9683, "step": 6545 }, { "epoch": 0.38, "grad_norm": 1.782257318496704, "learning_rate": 1.4368938783390498e-05, "loss": 0.9838, "step": 6546 }, { "epoch": 0.38, "grad_norm": 1.8462469577789307, "learning_rate": 1.4367267762176655e-05, "loss": 1.0275, "step": 6547 }, { "epoch": 0.38, "grad_norm": 1.7571134567260742, "learning_rate": 1.43655965902606e-05, "loss": 0.9948, "step": 6548 }, { "epoch": 0.38, "grad_norm": 1.936461091041565, "learning_rate": 1.4363925267700003e-05, "loss": 1.046, "step": 6549 }, { "epoch": 0.38, "grad_norm": 1.8618601560592651, "learning_rate": 1.4362253794552534e-05, "loss": 1.0277, "step": 6550 }, { "epoch": 0.38, "grad_norm": 1.6052945852279663, "learning_rate": 1.4360582170875872e-05, "loss": 0.9506, "step": 6551 }, { "epoch": 0.38, "grad_norm": 1.8968398571014404, "learning_rate": 1.4358910396727701e-05, "loss": 0.9646, "step": 6552 }, { "epoch": 0.38, "grad_norm": 1.9234265089035034, "learning_rate": 1.4357238472165707e-05, "loss": 0.9882, "step": 6553 }, { "epoch": 0.38, "grad_norm": 1.7836437225341797, "learning_rate": 1.4355566397247584e-05, "loss": 0.9707, "step": 6554 }, { "epoch": 0.38, "grad_norm": 2.2310750484466553, "learning_rate": 1.4353894172031033e-05, "loss": 1.0473, "step": 6555 }, { "epoch": 0.38, "grad_norm": 2.0779621601104736, "learning_rate": 1.4352221796573758e-05, "loss": 0.9836, "step": 6556 }, { "epoch": 0.38, "grad_norm": 1.8310410976409912, "learning_rate": 1.4350549270933463e-05, "loss": 1.0107, "step": 6557 }, { "epoch": 0.38, "grad_norm": 1.7569674253463745, "learning_rate": 1.4348876595167865e-05, "loss": 0.9478, "step": 6558 }, { "epoch": 0.38, "grad_norm": 1.3535518646240234, "learning_rate": 1.4347203769334685e-05, "loss": 0.643, "step": 6559 }, { "epoch": 0.38, "grad_norm": 1.6123547554016113, "learning_rate": 1.434553079349165e-05, "loss": 1.0544, "step": 6560 }, { "epoch": 0.38, "grad_norm": 2.4126694202423096, "learning_rate": 1.4343857667696481e-05, "loss": 0.9932, "step": 6561 }, { "epoch": 0.38, "grad_norm": 1.9189002513885498, "learning_rate": 1.4342184392006922e-05, "loss": 0.9629, "step": 6562 }, { "epoch": 0.38, "grad_norm": 1.8644987344741821, "learning_rate": 1.4340510966480707e-05, "loss": 1.0123, "step": 6563 }, { "epoch": 0.38, "grad_norm": 1.769840121269226, "learning_rate": 1.4338837391175582e-05, "loss": 0.975, "step": 6564 }, { "epoch": 0.38, "grad_norm": 1.8416407108306885, "learning_rate": 1.4337163666149301e-05, "loss": 0.9625, "step": 6565 }, { "epoch": 0.38, "grad_norm": 0.9921787977218628, "learning_rate": 1.4335489791459615e-05, "loss": 0.6578, "step": 6566 }, { "epoch": 0.38, "grad_norm": 1.8825032711029053, "learning_rate": 1.433381576716429e-05, "loss": 1.0717, "step": 6567 }, { "epoch": 0.38, "grad_norm": 1.7608774900436401, "learning_rate": 1.4332141593321087e-05, "loss": 0.9692, "step": 6568 }, { "epoch": 0.38, "grad_norm": 1.8526568412780762, "learning_rate": 1.4330467269987778e-05, "loss": 1.0264, "step": 6569 }, { "epoch": 0.38, "grad_norm": 2.2145869731903076, "learning_rate": 1.4328792797222142e-05, "loss": 1.0822, "step": 6570 }, { "epoch": 0.38, "grad_norm": 1.8882297277450562, "learning_rate": 1.4327118175081957e-05, "loss": 1.0456, "step": 6571 }, { "epoch": 0.38, "grad_norm": 1.6885595321655273, "learning_rate": 1.4325443403625012e-05, "loss": 1.0059, "step": 6572 }, { "epoch": 0.38, "grad_norm": 1.6838698387145996, "learning_rate": 1.4323768482909097e-05, "loss": 0.9376, "step": 6573 }, { "epoch": 0.38, "grad_norm": 1.997868299484253, "learning_rate": 1.432209341299201e-05, "loss": 0.9534, "step": 6574 }, { "epoch": 0.38, "grad_norm": 1.8583295345306396, "learning_rate": 1.4320418193931556e-05, "loss": 1.1367, "step": 6575 }, { "epoch": 0.38, "grad_norm": 1.7825112342834473, "learning_rate": 1.4318742825785535e-05, "loss": 1.0238, "step": 6576 }, { "epoch": 0.38, "grad_norm": 1.830778956413269, "learning_rate": 1.4317067308611762e-05, "loss": 1.0095, "step": 6577 }, { "epoch": 0.38, "grad_norm": 1.7713563442230225, "learning_rate": 1.4315391642468059e-05, "loss": 1.0108, "step": 6578 }, { "epoch": 0.38, "grad_norm": 1.8108627796173096, "learning_rate": 1.4313715827412243e-05, "loss": 0.9999, "step": 6579 }, { "epoch": 0.38, "grad_norm": 1.9374496936798096, "learning_rate": 1.4312039863502145e-05, "loss": 0.9924, "step": 6580 }, { "epoch": 0.38, "grad_norm": 1.8295040130615234, "learning_rate": 1.4310363750795593e-05, "loss": 1.0622, "step": 6581 }, { "epoch": 0.38, "grad_norm": 1.069732666015625, "learning_rate": 1.430868748935043e-05, "loss": 0.596, "step": 6582 }, { "epoch": 0.38, "grad_norm": 1.973830223083496, "learning_rate": 1.4307011079224498e-05, "loss": 1.0789, "step": 6583 }, { "epoch": 0.38, "grad_norm": 1.6908007860183716, "learning_rate": 1.4305334520475647e-05, "loss": 0.8879, "step": 6584 }, { "epoch": 0.38, "grad_norm": 1.850256085395813, "learning_rate": 1.4303657813161725e-05, "loss": 1.0637, "step": 6585 }, { "epoch": 0.38, "grad_norm": 1.7185535430908203, "learning_rate": 1.4301980957340593e-05, "loss": 0.9622, "step": 6586 }, { "epoch": 0.38, "grad_norm": 1.8960317373275757, "learning_rate": 1.4300303953070118e-05, "loss": 0.9339, "step": 6587 }, { "epoch": 0.38, "grad_norm": 0.9350983500480652, "learning_rate": 1.4298626800408166e-05, "loss": 0.5962, "step": 6588 }, { "epoch": 0.38, "grad_norm": 1.9939955472946167, "learning_rate": 1.4296949499412609e-05, "loss": 1.0499, "step": 6589 }, { "epoch": 0.38, "grad_norm": 1.7241488695144653, "learning_rate": 1.4295272050141329e-05, "loss": 0.9086, "step": 6590 }, { "epoch": 0.38, "grad_norm": 1.9936388731002808, "learning_rate": 1.4293594452652212e-05, "loss": 1.0211, "step": 6591 }, { "epoch": 0.38, "grad_norm": 1.8468677997589111, "learning_rate": 1.429191670700314e-05, "loss": 1.0188, "step": 6592 }, { "epoch": 0.38, "grad_norm": 1.759409785270691, "learning_rate": 1.4290238813252011e-05, "loss": 1.0064, "step": 6593 }, { "epoch": 0.38, "grad_norm": 1.863101840019226, "learning_rate": 1.4288560771456727e-05, "loss": 1.005, "step": 6594 }, { "epoch": 0.38, "grad_norm": 1.7478598356246948, "learning_rate": 1.428688258167519e-05, "loss": 0.9722, "step": 6595 }, { "epoch": 0.38, "grad_norm": 1.8459408283233643, "learning_rate": 1.4285204243965307e-05, "loss": 1.0568, "step": 6596 }, { "epoch": 0.38, "grad_norm": 1.86139714717865, "learning_rate": 1.4283525758385e-05, "loss": 0.9188, "step": 6597 }, { "epoch": 0.38, "grad_norm": 1.7600229978561401, "learning_rate": 1.4281847124992181e-05, "loss": 0.9026, "step": 6598 }, { "epoch": 0.38, "grad_norm": 1.7581877708435059, "learning_rate": 1.428016834384478e-05, "loss": 0.9924, "step": 6599 }, { "epoch": 0.38, "grad_norm": 1.7071934938430786, "learning_rate": 1.4278489415000727e-05, "loss": 1.0168, "step": 6600 }, { "epoch": 0.38, "grad_norm": 1.8092888593673706, "learning_rate": 1.4276810338517955e-05, "loss": 1.0107, "step": 6601 }, { "epoch": 0.38, "grad_norm": 1.741211175918579, "learning_rate": 1.4275131114454405e-05, "loss": 1.008, "step": 6602 }, { "epoch": 0.38, "grad_norm": 1.9269744157791138, "learning_rate": 1.4273451742868023e-05, "loss": 1.0116, "step": 6603 }, { "epoch": 0.38, "grad_norm": 1.9591504335403442, "learning_rate": 1.4271772223816758e-05, "loss": 1.0416, "step": 6604 }, { "epoch": 0.38, "grad_norm": 2.297698736190796, "learning_rate": 1.4270092557358566e-05, "loss": 1.0133, "step": 6605 }, { "epoch": 0.38, "grad_norm": 2.0246243476867676, "learning_rate": 1.426841274355141e-05, "loss": 1.0371, "step": 6606 }, { "epoch": 0.38, "grad_norm": 1.7749741077423096, "learning_rate": 1.4266732782453252e-05, "loss": 1.0152, "step": 6607 }, { "epoch": 0.38, "grad_norm": 1.7851496934890747, "learning_rate": 1.4265052674122063e-05, "loss": 0.9268, "step": 6608 }, { "epoch": 0.38, "grad_norm": 2.0950188636779785, "learning_rate": 1.426337241861582e-05, "loss": 0.9803, "step": 6609 }, { "epoch": 0.38, "grad_norm": 1.8657783269882202, "learning_rate": 1.4261692015992505e-05, "loss": 1.0346, "step": 6610 }, { "epoch": 0.38, "grad_norm": 1.8111631870269775, "learning_rate": 1.4260011466310104e-05, "loss": 0.9339, "step": 6611 }, { "epoch": 0.38, "grad_norm": 2.068686008453369, "learning_rate": 1.4258330769626607e-05, "loss": 1.0543, "step": 6612 }, { "epoch": 0.38, "grad_norm": 1.8311845064163208, "learning_rate": 1.425664992600001e-05, "loss": 0.9377, "step": 6613 }, { "epoch": 0.38, "grad_norm": 1.7941935062408447, "learning_rate": 1.4254968935488314e-05, "loss": 0.9926, "step": 6614 }, { "epoch": 0.38, "grad_norm": 1.934528112411499, "learning_rate": 1.4253287798149526e-05, "loss": 0.9832, "step": 6615 }, { "epoch": 0.38, "grad_norm": 1.912278175354004, "learning_rate": 1.4251606514041659e-05, "loss": 1.0557, "step": 6616 }, { "epoch": 0.38, "grad_norm": 1.6515284776687622, "learning_rate": 1.4249925083222724e-05, "loss": 0.9598, "step": 6617 }, { "epoch": 0.38, "grad_norm": 1.774816870689392, "learning_rate": 1.424824350575075e-05, "loss": 1.0422, "step": 6618 }, { "epoch": 0.38, "grad_norm": 1.816463828086853, "learning_rate": 1.4246561781683754e-05, "loss": 1.0368, "step": 6619 }, { "epoch": 0.38, "grad_norm": 1.824295163154602, "learning_rate": 1.4244879911079779e-05, "loss": 0.9043, "step": 6620 }, { "epoch": 0.38, "grad_norm": 1.8551970720291138, "learning_rate": 1.4243197893996855e-05, "loss": 0.9994, "step": 6621 }, { "epoch": 0.38, "grad_norm": 1.8014181852340698, "learning_rate": 1.4241515730493021e-05, "loss": 1.0175, "step": 6622 }, { "epoch": 0.38, "grad_norm": 1.9140993356704712, "learning_rate": 1.4239833420626328e-05, "loss": 0.9517, "step": 6623 }, { "epoch": 0.38, "grad_norm": 1.9484046697616577, "learning_rate": 1.423815096445483e-05, "loss": 1.0351, "step": 6624 }, { "epoch": 0.38, "grad_norm": 1.500910758972168, "learning_rate": 1.423646836203658e-05, "loss": 0.953, "step": 6625 }, { "epoch": 0.38, "grad_norm": 1.5796787738800049, "learning_rate": 1.423478561342964e-05, "loss": 0.939, "step": 6626 }, { "epoch": 0.38, "grad_norm": 1.881302833557129, "learning_rate": 1.4233102718692078e-05, "loss": 1.0325, "step": 6627 }, { "epoch": 0.38, "grad_norm": 1.8480826616287231, "learning_rate": 1.4231419677881966e-05, "loss": 0.9877, "step": 6628 }, { "epoch": 0.38, "grad_norm": 1.8555617332458496, "learning_rate": 1.4229736491057382e-05, "loss": 1.027, "step": 6629 }, { "epoch": 0.38, "grad_norm": 1.8782488107681274, "learning_rate": 1.4228053158276407e-05, "loss": 0.9052, "step": 6630 }, { "epoch": 0.38, "grad_norm": 1.79643714427948, "learning_rate": 1.4226369679597127e-05, "loss": 0.9605, "step": 6631 }, { "epoch": 0.38, "grad_norm": 1.7273995876312256, "learning_rate": 1.4224686055077636e-05, "loss": 0.9487, "step": 6632 }, { "epoch": 0.38, "grad_norm": 1.8837653398513794, "learning_rate": 1.422300228477603e-05, "loss": 1.0666, "step": 6633 }, { "epoch": 0.38, "grad_norm": 1.7214385271072388, "learning_rate": 1.4221318368750411e-05, "loss": 0.9393, "step": 6634 }, { "epoch": 0.38, "grad_norm": 1.8205844163894653, "learning_rate": 1.4219634307058888e-05, "loss": 0.973, "step": 6635 }, { "epoch": 0.38, "grad_norm": 1.814313292503357, "learning_rate": 1.421795009975957e-05, "loss": 0.9621, "step": 6636 }, { "epoch": 0.38, "grad_norm": 1.6666721105575562, "learning_rate": 1.4216265746910579e-05, "loss": 0.9338, "step": 6637 }, { "epoch": 0.38, "grad_norm": 1.8279637098312378, "learning_rate": 1.4214581248570034e-05, "loss": 0.9916, "step": 6638 }, { "epoch": 0.38, "grad_norm": 1.8509894609451294, "learning_rate": 1.4212896604796064e-05, "loss": 1.0023, "step": 6639 }, { "epoch": 0.38, "grad_norm": 1.7838656902313232, "learning_rate": 1.42112118156468e-05, "loss": 0.9304, "step": 6640 }, { "epoch": 0.38, "grad_norm": 1.7575708627700806, "learning_rate": 1.4209526881180377e-05, "loss": 0.9876, "step": 6641 }, { "epoch": 0.38, "grad_norm": 1.1256905794143677, "learning_rate": 1.4207841801454945e-05, "loss": 0.6455, "step": 6642 }, { "epoch": 0.38, "grad_norm": 1.654044508934021, "learning_rate": 1.4206156576528643e-05, "loss": 0.9592, "step": 6643 }, { "epoch": 0.38, "grad_norm": 1.731591820716858, "learning_rate": 1.4204471206459629e-05, "loss": 0.9452, "step": 6644 }, { "epoch": 0.38, "grad_norm": 1.7719027996063232, "learning_rate": 1.4202785691306056e-05, "loss": 1.0676, "step": 6645 }, { "epoch": 0.38, "grad_norm": 1.768100619316101, "learning_rate": 1.4201100031126091e-05, "loss": 0.9503, "step": 6646 }, { "epoch": 0.38, "grad_norm": 2.136711597442627, "learning_rate": 1.4199414225977897e-05, "loss": 1.0227, "step": 6647 }, { "epoch": 0.38, "grad_norm": 1.775230884552002, "learning_rate": 1.4197728275919649e-05, "loss": 1.0366, "step": 6648 }, { "epoch": 0.38, "grad_norm": 1.9589234590530396, "learning_rate": 1.4196042181009525e-05, "loss": 1.0704, "step": 6649 }, { "epoch": 0.38, "grad_norm": 1.8622713088989258, "learning_rate": 1.4194355941305706e-05, "loss": 1.0001, "step": 6650 }, { "epoch": 0.38, "grad_norm": 1.9297192096710205, "learning_rate": 1.419266955686638e-05, "loss": 1.0856, "step": 6651 }, { "epoch": 0.38, "grad_norm": 1.7351832389831543, "learning_rate": 1.419098302774974e-05, "loss": 0.9397, "step": 6652 }, { "epoch": 0.38, "grad_norm": 1.7967897653579712, "learning_rate": 1.4189296354013982e-05, "loss": 0.9777, "step": 6653 }, { "epoch": 0.38, "grad_norm": 1.6718227863311768, "learning_rate": 1.418760953571731e-05, "loss": 1.0362, "step": 6654 }, { "epoch": 0.38, "grad_norm": 1.8721386194229126, "learning_rate": 1.4185922572917931e-05, "loss": 0.8875, "step": 6655 }, { "epoch": 0.38, "grad_norm": 1.6840529441833496, "learning_rate": 1.4184235465674055e-05, "loss": 0.9631, "step": 6656 }, { "epoch": 0.38, "grad_norm": 1.6847478151321411, "learning_rate": 1.4182548214043902e-05, "loss": 1.0208, "step": 6657 }, { "epoch": 0.38, "grad_norm": 1.7295044660568237, "learning_rate": 1.4180860818085695e-05, "loss": 0.867, "step": 6658 }, { "epoch": 0.38, "grad_norm": 2.013784408569336, "learning_rate": 1.4179173277857658e-05, "loss": 0.9899, "step": 6659 }, { "epoch": 0.38, "grad_norm": 1.6859959363937378, "learning_rate": 1.4177485593418028e-05, "loss": 0.9711, "step": 6660 }, { "epoch": 0.38, "grad_norm": 1.724387288093567, "learning_rate": 1.4175797764825036e-05, "loss": 1.0283, "step": 6661 }, { "epoch": 0.38, "grad_norm": 1.8912831544876099, "learning_rate": 1.417410979213693e-05, "loss": 1.0973, "step": 6662 }, { "epoch": 0.38, "grad_norm": 1.7722456455230713, "learning_rate": 1.4172421675411954e-05, "loss": 1.0363, "step": 6663 }, { "epoch": 0.38, "grad_norm": 1.0262619256973267, "learning_rate": 1.4170733414708363e-05, "loss": 0.6007, "step": 6664 }, { "epoch": 0.38, "grad_norm": 1.8084995746612549, "learning_rate": 1.4169045010084411e-05, "loss": 0.9921, "step": 6665 }, { "epoch": 0.38, "grad_norm": 2.0457794666290283, "learning_rate": 1.416735646159836e-05, "loss": 1.0544, "step": 6666 }, { "epoch": 0.38, "grad_norm": 1.8310672044754028, "learning_rate": 1.416566776930848e-05, "loss": 1.0378, "step": 6667 }, { "epoch": 0.38, "grad_norm": 1.8013845682144165, "learning_rate": 1.4163978933273041e-05, "loss": 0.9975, "step": 6668 }, { "epoch": 0.38, "grad_norm": 1.7868287563323975, "learning_rate": 1.4162289953550322e-05, "loss": 0.9903, "step": 6669 }, { "epoch": 0.38, "grad_norm": 1.899398922920227, "learning_rate": 1.4160600830198602e-05, "loss": 0.9949, "step": 6670 }, { "epoch": 0.38, "grad_norm": 1.8585964441299438, "learning_rate": 1.4158911563276172e-05, "loss": 0.9943, "step": 6671 }, { "epoch": 0.38, "grad_norm": 1.8636623620986938, "learning_rate": 1.415722215284132e-05, "loss": 1.0294, "step": 6672 }, { "epoch": 0.38, "grad_norm": 1.705580472946167, "learning_rate": 1.4155532598952345e-05, "loss": 0.9942, "step": 6673 }, { "epoch": 0.38, "grad_norm": 1.713517665863037, "learning_rate": 1.4153842901667543e-05, "loss": 1.0279, "step": 6674 }, { "epoch": 0.38, "grad_norm": 1.743633508682251, "learning_rate": 1.415215306104523e-05, "loss": 0.9224, "step": 6675 }, { "epoch": 0.38, "grad_norm": 1.8368037939071655, "learning_rate": 1.4150463077143712e-05, "loss": 0.995, "step": 6676 }, { "epoch": 0.38, "grad_norm": 1.9264042377471924, "learning_rate": 1.414877295002131e-05, "loss": 0.9746, "step": 6677 }, { "epoch": 0.38, "grad_norm": 1.871768832206726, "learning_rate": 1.4147082679736342e-05, "loss": 0.9933, "step": 6678 }, { "epoch": 0.38, "grad_norm": 1.6611647605895996, "learning_rate": 1.4145392266347133e-05, "loss": 1.0094, "step": 6679 }, { "epoch": 0.38, "grad_norm": 1.935665249824524, "learning_rate": 1.4143701709912017e-05, "loss": 1.0416, "step": 6680 }, { "epoch": 0.38, "grad_norm": 1.9268814325332642, "learning_rate": 1.4142011010489332e-05, "loss": 0.9669, "step": 6681 }, { "epoch": 0.38, "grad_norm": 2.011317014694214, "learning_rate": 1.4140320168137414e-05, "loss": 1.0625, "step": 6682 }, { "epoch": 0.38, "grad_norm": 1.7357138395309448, "learning_rate": 1.4138629182914617e-05, "loss": 1.0074, "step": 6683 }, { "epoch": 0.38, "grad_norm": 1.8733261823654175, "learning_rate": 1.4136938054879284e-05, "loss": 1.0088, "step": 6684 }, { "epoch": 0.38, "grad_norm": 1.8755565881729126, "learning_rate": 1.4135246784089774e-05, "loss": 0.957, "step": 6685 }, { "epoch": 0.38, "grad_norm": 1.7794195413589478, "learning_rate": 1.413355537060445e-05, "loss": 0.9682, "step": 6686 }, { "epoch": 0.38, "grad_norm": 1.5756385326385498, "learning_rate": 1.413186381448168e-05, "loss": 0.9283, "step": 6687 }, { "epoch": 0.38, "grad_norm": 1.9130840301513672, "learning_rate": 1.4130172115779828e-05, "loss": 1.1402, "step": 6688 }, { "epoch": 0.38, "grad_norm": 1.7617560625076294, "learning_rate": 1.412848027455727e-05, "loss": 0.9422, "step": 6689 }, { "epoch": 0.38, "grad_norm": 1.7759631872177124, "learning_rate": 1.4126788290872395e-05, "loss": 0.9418, "step": 6690 }, { "epoch": 0.38, "grad_norm": 1.7937480211257935, "learning_rate": 1.4125096164783586e-05, "loss": 0.9844, "step": 6691 }, { "epoch": 0.38, "grad_norm": 1.6398866176605225, "learning_rate": 1.4123403896349227e-05, "loss": 0.9924, "step": 6692 }, { "epoch": 0.38, "grad_norm": 1.7397969961166382, "learning_rate": 1.412171148562772e-05, "loss": 0.9804, "step": 6693 }, { "epoch": 0.38, "grad_norm": 3.4397199153900146, "learning_rate": 1.4120018932677461e-05, "loss": 1.0353, "step": 6694 }, { "epoch": 0.38, "grad_norm": 1.7143027782440186, "learning_rate": 1.411832623755686e-05, "loss": 0.9389, "step": 6695 }, { "epoch": 0.38, "grad_norm": 1.8062744140625, "learning_rate": 1.4116633400324325e-05, "loss": 0.9498, "step": 6696 }, { "epoch": 0.38, "grad_norm": 1.8488270044326782, "learning_rate": 1.411494042103827e-05, "loss": 0.9168, "step": 6697 }, { "epoch": 0.38, "grad_norm": 1.7461563348770142, "learning_rate": 1.4113247299757116e-05, "loss": 0.864, "step": 6698 }, { "epoch": 0.38, "grad_norm": 1.8032582998275757, "learning_rate": 1.4111554036539285e-05, "loss": 1.0144, "step": 6699 }, { "epoch": 0.38, "grad_norm": 1.9199655055999756, "learning_rate": 1.4109860631443214e-05, "loss": 1.0324, "step": 6700 }, { "epoch": 0.38, "grad_norm": 1.8765710592269897, "learning_rate": 1.4108167084527327e-05, "loss": 1.0288, "step": 6701 }, { "epoch": 0.38, "grad_norm": 1.9236133098602295, "learning_rate": 1.4106473395850073e-05, "loss": 1.003, "step": 6702 }, { "epoch": 0.38, "grad_norm": 1.9396934509277344, "learning_rate": 1.4104779565469892e-05, "loss": 1.0093, "step": 6703 }, { "epoch": 0.38, "grad_norm": 1.7690138816833496, "learning_rate": 1.4103085593445236e-05, "loss": 0.9209, "step": 6704 }, { "epoch": 0.38, "grad_norm": 1.8163502216339111, "learning_rate": 1.4101391479834558e-05, "loss": 1.0551, "step": 6705 }, { "epoch": 0.38, "grad_norm": 1.6216959953308105, "learning_rate": 1.4099697224696316e-05, "loss": 1.0234, "step": 6706 }, { "epoch": 0.38, "grad_norm": 1.9014475345611572, "learning_rate": 1.4098002828088974e-05, "loss": 1.0392, "step": 6707 }, { "epoch": 0.38, "grad_norm": 1.1175824403762817, "learning_rate": 1.4096308290071003e-05, "loss": 0.603, "step": 6708 }, { "epoch": 0.38, "grad_norm": 1.0823798179626465, "learning_rate": 1.4094613610700876e-05, "loss": 0.6117, "step": 6709 }, { "epoch": 0.38, "grad_norm": 1.9367735385894775, "learning_rate": 1.4092918790037069e-05, "loss": 1.0192, "step": 6710 }, { "epoch": 0.38, "grad_norm": 2.0108258724212646, "learning_rate": 1.4091223828138068e-05, "loss": 1.0669, "step": 6711 }, { "epoch": 0.38, "grad_norm": 1.77130126953125, "learning_rate": 1.4089528725062362e-05, "loss": 1.011, "step": 6712 }, { "epoch": 0.39, "grad_norm": 1.7122178077697754, "learning_rate": 1.4087833480868442e-05, "loss": 0.9043, "step": 6713 }, { "epoch": 0.39, "grad_norm": 1.718496322631836, "learning_rate": 1.4086138095614808e-05, "loss": 0.9639, "step": 6714 }, { "epoch": 0.39, "grad_norm": 1.8527882099151611, "learning_rate": 1.4084442569359964e-05, "loss": 0.926, "step": 6715 }, { "epoch": 0.39, "grad_norm": 1.9653902053833008, "learning_rate": 1.4082746902162414e-05, "loss": 1.0, "step": 6716 }, { "epoch": 0.39, "grad_norm": 1.9845950603485107, "learning_rate": 1.4081051094080675e-05, "loss": 0.9188, "step": 6717 }, { "epoch": 0.39, "grad_norm": 1.8779118061065674, "learning_rate": 1.407935514517326e-05, "loss": 1.0235, "step": 6718 }, { "epoch": 0.39, "grad_norm": 1.11811363697052, "learning_rate": 1.4077659055498695e-05, "loss": 0.6848, "step": 6719 }, { "epoch": 0.39, "grad_norm": 1.69560706615448, "learning_rate": 1.407596282511551e-05, "loss": 1.006, "step": 6720 }, { "epoch": 0.39, "grad_norm": 1.7870867252349854, "learning_rate": 1.407426645408223e-05, "loss": 1.003, "step": 6721 }, { "epoch": 0.39, "grad_norm": 1.717315673828125, "learning_rate": 1.4072569942457399e-05, "loss": 0.9686, "step": 6722 }, { "epoch": 0.39, "grad_norm": 2.03489351272583, "learning_rate": 1.4070873290299554e-05, "loss": 0.9558, "step": 6723 }, { "epoch": 0.39, "grad_norm": 1.8468509912490845, "learning_rate": 1.4069176497667242e-05, "loss": 1.016, "step": 6724 }, { "epoch": 0.39, "grad_norm": 1.8713451623916626, "learning_rate": 1.4067479564619018e-05, "loss": 0.9966, "step": 6725 }, { "epoch": 0.39, "grad_norm": 1.603393793106079, "learning_rate": 1.4065782491213433e-05, "loss": 0.9698, "step": 6726 }, { "epoch": 0.39, "grad_norm": 2.21260929107666, "learning_rate": 1.4064085277509055e-05, "loss": 1.0467, "step": 6727 }, { "epoch": 0.39, "grad_norm": 1.6719169616699219, "learning_rate": 1.4062387923564443e-05, "loss": 0.9636, "step": 6728 }, { "epoch": 0.39, "grad_norm": 1.8306374549865723, "learning_rate": 1.4060690429438176e-05, "loss": 0.9977, "step": 6729 }, { "epoch": 0.39, "grad_norm": 1.0939902067184448, "learning_rate": 1.4058992795188822e-05, "loss": 0.6028, "step": 6730 }, { "epoch": 0.39, "grad_norm": 0.9729236364364624, "learning_rate": 1.4057295020874966e-05, "loss": 0.5598, "step": 6731 }, { "epoch": 0.39, "grad_norm": 2.0358269214630127, "learning_rate": 1.4055597106555193e-05, "loss": 0.9921, "step": 6732 }, { "epoch": 0.39, "grad_norm": 1.9271126985549927, "learning_rate": 1.4053899052288091e-05, "loss": 1.0187, "step": 6733 }, { "epoch": 0.39, "grad_norm": 1.7927757501602173, "learning_rate": 1.405220085813226e-05, "loss": 1.0521, "step": 6734 }, { "epoch": 0.39, "grad_norm": 2.0081684589385986, "learning_rate": 1.4050502524146294e-05, "loss": 0.9678, "step": 6735 }, { "epoch": 0.39, "grad_norm": 1.77253258228302, "learning_rate": 1.4048804050388802e-05, "loss": 0.9605, "step": 6736 }, { "epoch": 0.39, "grad_norm": 1.882011890411377, "learning_rate": 1.4047105436918392e-05, "loss": 0.9201, "step": 6737 }, { "epoch": 0.39, "grad_norm": 1.9428033828735352, "learning_rate": 1.4045406683793677e-05, "loss": 0.9867, "step": 6738 }, { "epoch": 0.39, "grad_norm": 1.6767597198486328, "learning_rate": 1.4043707791073278e-05, "loss": 1.0221, "step": 6739 }, { "epoch": 0.39, "grad_norm": 1.70387601852417, "learning_rate": 1.404200875881582e-05, "loss": 1.0827, "step": 6740 }, { "epoch": 0.39, "grad_norm": 2.03206205368042, "learning_rate": 1.4040309587079928e-05, "loss": 1.0983, "step": 6741 }, { "epoch": 0.39, "grad_norm": 1.788863182067871, "learning_rate": 1.403861027592424e-05, "loss": 0.9548, "step": 6742 }, { "epoch": 0.39, "grad_norm": 1.174391269683838, "learning_rate": 1.4036910825407395e-05, "loss": 0.6276, "step": 6743 }, { "epoch": 0.39, "grad_norm": 1.7766765356063843, "learning_rate": 1.4035211235588032e-05, "loss": 0.9986, "step": 6744 }, { "epoch": 0.39, "grad_norm": 2.102076292037964, "learning_rate": 1.4033511506524802e-05, "loss": 0.9219, "step": 6745 }, { "epoch": 0.39, "grad_norm": 1.8500827550888062, "learning_rate": 1.4031811638276356e-05, "loss": 0.9636, "step": 6746 }, { "epoch": 0.39, "grad_norm": 1.9082595109939575, "learning_rate": 1.4030111630901354e-05, "loss": 0.9861, "step": 6747 }, { "epoch": 0.39, "grad_norm": 1.641597867012024, "learning_rate": 1.4028411484458456e-05, "loss": 0.9329, "step": 6748 }, { "epoch": 0.39, "grad_norm": 2.1459851264953613, "learning_rate": 1.4026711199006331e-05, "loss": 1.0106, "step": 6749 }, { "epoch": 0.39, "grad_norm": 1.8246526718139648, "learning_rate": 1.402501077460365e-05, "loss": 0.9948, "step": 6750 }, { "epoch": 0.39, "grad_norm": 1.7775335311889648, "learning_rate": 1.4023310211309092e-05, "loss": 1.0089, "step": 6751 }, { "epoch": 0.39, "grad_norm": 1.8576596975326538, "learning_rate": 1.4021609509181335e-05, "loss": 1.0497, "step": 6752 }, { "epoch": 0.39, "grad_norm": 1.9051514863967896, "learning_rate": 1.401990866827907e-05, "loss": 1.0876, "step": 6753 }, { "epoch": 0.39, "grad_norm": 1.922931432723999, "learning_rate": 1.4018207688660985e-05, "loss": 0.979, "step": 6754 }, { "epoch": 0.39, "grad_norm": 1.7118853330612183, "learning_rate": 1.4016506570385775e-05, "loss": 0.9974, "step": 6755 }, { "epoch": 0.39, "grad_norm": 1.8786288499832153, "learning_rate": 1.4014805313512146e-05, "loss": 1.0175, "step": 6756 }, { "epoch": 0.39, "grad_norm": 1.7783482074737549, "learning_rate": 1.4013103918098801e-05, "loss": 0.9528, "step": 6757 }, { "epoch": 0.39, "grad_norm": 1.9152061939239502, "learning_rate": 1.401140238420445e-05, "loss": 1.0323, "step": 6758 }, { "epoch": 0.39, "grad_norm": 1.6809674501419067, "learning_rate": 1.4009700711887806e-05, "loss": 0.9861, "step": 6759 }, { "epoch": 0.39, "grad_norm": 1.7512511014938354, "learning_rate": 1.400799890120759e-05, "loss": 1.0672, "step": 6760 }, { "epoch": 0.39, "grad_norm": 1.1490086317062378, "learning_rate": 1.400629695222253e-05, "loss": 0.6663, "step": 6761 }, { "epoch": 0.39, "grad_norm": 1.817386507987976, "learning_rate": 1.4004594864991354e-05, "loss": 0.9611, "step": 6762 }, { "epoch": 0.39, "grad_norm": 2.295001268386841, "learning_rate": 1.4002892639572795e-05, "loss": 1.0126, "step": 6763 }, { "epoch": 0.39, "grad_norm": 2.11795711517334, "learning_rate": 1.4001190276025593e-05, "loss": 1.0335, "step": 6764 }, { "epoch": 0.39, "grad_norm": 2.0941977500915527, "learning_rate": 1.3999487774408491e-05, "loss": 0.9809, "step": 6765 }, { "epoch": 0.39, "grad_norm": 1.8022984266281128, "learning_rate": 1.3997785134780239e-05, "loss": 0.9806, "step": 6766 }, { "epoch": 0.39, "grad_norm": 1.7007839679718018, "learning_rate": 1.3996082357199585e-05, "loss": 1.0436, "step": 6767 }, { "epoch": 0.39, "grad_norm": 1.8461931943893433, "learning_rate": 1.3994379441725297e-05, "loss": 0.9859, "step": 6768 }, { "epoch": 0.39, "grad_norm": 1.7574536800384521, "learning_rate": 1.3992676388416128e-05, "loss": 0.996, "step": 6769 }, { "epoch": 0.39, "grad_norm": 1.9120289087295532, "learning_rate": 1.3990973197330853e-05, "loss": 0.9505, "step": 6770 }, { "epoch": 0.39, "grad_norm": 1.7500176429748535, "learning_rate": 1.3989269868528242e-05, "loss": 0.9464, "step": 6771 }, { "epoch": 0.39, "grad_norm": 1.78048837184906, "learning_rate": 1.398756640206707e-05, "loss": 0.9325, "step": 6772 }, { "epoch": 0.39, "grad_norm": 1.755582571029663, "learning_rate": 1.398586279800612e-05, "loss": 0.9235, "step": 6773 }, { "epoch": 0.39, "grad_norm": 1.1130605936050415, "learning_rate": 1.3984159056404178e-05, "loss": 0.583, "step": 6774 }, { "epoch": 0.39, "grad_norm": 1.7887859344482422, "learning_rate": 1.3982455177320038e-05, "loss": 0.9839, "step": 6775 }, { "epoch": 0.39, "grad_norm": 1.7818692922592163, "learning_rate": 1.3980751160812491e-05, "loss": 0.9966, "step": 6776 }, { "epoch": 0.39, "grad_norm": 1.048404335975647, "learning_rate": 1.3979047006940345e-05, "loss": 0.5909, "step": 6777 }, { "epoch": 0.39, "grad_norm": 2.016364812850952, "learning_rate": 1.39773427157624e-05, "loss": 1.0495, "step": 6778 }, { "epoch": 0.39, "grad_norm": 1.6175923347473145, "learning_rate": 1.397563828733747e-05, "loss": 0.9709, "step": 6779 }, { "epoch": 0.39, "grad_norm": 2.005697727203369, "learning_rate": 1.3973933721724364e-05, "loss": 0.9522, "step": 6780 }, { "epoch": 0.39, "grad_norm": 1.1397303342819214, "learning_rate": 1.3972229018981911e-05, "loss": 0.6316, "step": 6781 }, { "epoch": 0.39, "grad_norm": 1.811553955078125, "learning_rate": 1.3970524179168927e-05, "loss": 1.0376, "step": 6782 }, { "epoch": 0.39, "grad_norm": 1.7347733974456787, "learning_rate": 1.3968819202344246e-05, "loss": 0.9579, "step": 6783 }, { "epoch": 0.39, "grad_norm": 1.750199556350708, "learning_rate": 1.39671140885667e-05, "loss": 0.9878, "step": 6784 }, { "epoch": 0.39, "grad_norm": 1.7455170154571533, "learning_rate": 1.3965408837895129e-05, "loss": 1.022, "step": 6785 }, { "epoch": 0.39, "grad_norm": 1.7671785354614258, "learning_rate": 1.3963703450388377e-05, "loss": 1.0123, "step": 6786 }, { "epoch": 0.39, "grad_norm": 1.8536680936813354, "learning_rate": 1.396199792610529e-05, "loss": 1.0056, "step": 6787 }, { "epoch": 0.39, "grad_norm": 1.8019664287567139, "learning_rate": 1.3960292265104723e-05, "loss": 0.9702, "step": 6788 }, { "epoch": 0.39, "grad_norm": 2.0354859828948975, "learning_rate": 1.3958586467445532e-05, "loss": 1.0861, "step": 6789 }, { "epoch": 0.39, "grad_norm": 1.8166725635528564, "learning_rate": 1.3956880533186582e-05, "loss": 1.005, "step": 6790 }, { "epoch": 0.39, "grad_norm": 1.6663466691970825, "learning_rate": 1.3955174462386733e-05, "loss": 1.0318, "step": 6791 }, { "epoch": 0.39, "grad_norm": 1.814225435256958, "learning_rate": 1.3953468255104865e-05, "loss": 1.0157, "step": 6792 }, { "epoch": 0.39, "grad_norm": 1.781333565711975, "learning_rate": 1.395176191139985e-05, "loss": 0.9414, "step": 6793 }, { "epoch": 0.39, "grad_norm": 1.7532334327697754, "learning_rate": 1.3950055431330568e-05, "loss": 1.0687, "step": 6794 }, { "epoch": 0.39, "grad_norm": 1.8517855405807495, "learning_rate": 1.3948348814955912e-05, "loss": 0.9925, "step": 6795 }, { "epoch": 0.39, "grad_norm": 1.7015671730041504, "learning_rate": 1.3946642062334765e-05, "loss": 1.0437, "step": 6796 }, { "epoch": 0.39, "grad_norm": 1.6779364347457886, "learning_rate": 1.3944935173526026e-05, "loss": 1.0208, "step": 6797 }, { "epoch": 0.39, "grad_norm": 1.703795075416565, "learning_rate": 1.3943228148588595e-05, "loss": 0.9557, "step": 6798 }, { "epoch": 0.39, "grad_norm": 1.821058750152588, "learning_rate": 1.3941520987581371e-05, "loss": 0.9087, "step": 6799 }, { "epoch": 0.39, "grad_norm": 2.049903154373169, "learning_rate": 1.3939813690563274e-05, "loss": 1.0012, "step": 6800 }, { "epoch": 0.39, "grad_norm": 1.5722832679748535, "learning_rate": 1.3938106257593207e-05, "loss": 0.9447, "step": 6801 }, { "epoch": 0.39, "grad_norm": 1.8461346626281738, "learning_rate": 1.3936398688730095e-05, "loss": 0.9467, "step": 6802 }, { "epoch": 0.39, "grad_norm": 1.6874440908432007, "learning_rate": 1.393469098403286e-05, "loss": 0.9443, "step": 6803 }, { "epoch": 0.39, "grad_norm": 1.957535982131958, "learning_rate": 1.3932983143560433e-05, "loss": 0.9429, "step": 6804 }, { "epoch": 0.39, "grad_norm": 1.8040337562561035, "learning_rate": 1.3931275167371743e-05, "loss": 0.927, "step": 6805 }, { "epoch": 0.39, "grad_norm": 1.7531129121780396, "learning_rate": 1.3929567055525726e-05, "loss": 1.073, "step": 6806 }, { "epoch": 0.39, "grad_norm": 1.7696815729141235, "learning_rate": 1.3927858808081327e-05, "loss": 0.9081, "step": 6807 }, { "epoch": 0.39, "grad_norm": 1.8680943250656128, "learning_rate": 1.3926150425097493e-05, "loss": 0.8907, "step": 6808 }, { "epoch": 0.39, "grad_norm": 1.846307396888733, "learning_rate": 1.3924441906633174e-05, "loss": 1.0195, "step": 6809 }, { "epoch": 0.39, "grad_norm": 1.6509976387023926, "learning_rate": 1.3922733252747332e-05, "loss": 1.0058, "step": 6810 }, { "epoch": 0.39, "grad_norm": 1.6506422758102417, "learning_rate": 1.392102446349892e-05, "loss": 0.8888, "step": 6811 }, { "epoch": 0.39, "grad_norm": 1.5828776359558105, "learning_rate": 1.3919315538946907e-05, "loss": 0.9122, "step": 6812 }, { "epoch": 0.39, "grad_norm": 1.8559921979904175, "learning_rate": 1.3917606479150263e-05, "loss": 1.0352, "step": 6813 }, { "epoch": 0.39, "grad_norm": 1.7429535388946533, "learning_rate": 1.3915897284167963e-05, "loss": 1.0226, "step": 6814 }, { "epoch": 0.39, "grad_norm": 1.1186811923980713, "learning_rate": 1.3914187954058986e-05, "loss": 0.6241, "step": 6815 }, { "epoch": 0.39, "grad_norm": 1.86961829662323, "learning_rate": 1.3912478488882317e-05, "loss": 1.0792, "step": 6816 }, { "epoch": 0.39, "grad_norm": 1.73786461353302, "learning_rate": 1.3910768888696941e-05, "loss": 0.9467, "step": 6817 }, { "epoch": 0.39, "grad_norm": 1.9818168878555298, "learning_rate": 1.3909059153561859e-05, "loss": 0.9886, "step": 6818 }, { "epoch": 0.39, "grad_norm": 1.6181373596191406, "learning_rate": 1.3907349283536065e-05, "loss": 0.9501, "step": 6819 }, { "epoch": 0.39, "grad_norm": 1.706716775894165, "learning_rate": 1.390563927867856e-05, "loss": 0.9533, "step": 6820 }, { "epoch": 0.39, "grad_norm": 2.0678229331970215, "learning_rate": 1.3903929139048353e-05, "loss": 1.0081, "step": 6821 }, { "epoch": 0.39, "grad_norm": 2.0080697536468506, "learning_rate": 1.3902218864704458e-05, "loss": 1.0186, "step": 6822 }, { "epoch": 0.39, "grad_norm": 1.6773377656936646, "learning_rate": 1.390050845570589e-05, "loss": 1.0428, "step": 6823 }, { "epoch": 0.39, "grad_norm": 1.8540889024734497, "learning_rate": 1.3898797912111673e-05, "loss": 0.9548, "step": 6824 }, { "epoch": 0.39, "grad_norm": 1.7127068042755127, "learning_rate": 1.3897087233980828e-05, "loss": 1.0449, "step": 6825 }, { "epoch": 0.39, "grad_norm": 1.7764031887054443, "learning_rate": 1.3895376421372391e-05, "loss": 0.9341, "step": 6826 }, { "epoch": 0.39, "grad_norm": 1.7259466648101807, "learning_rate": 1.3893665474345392e-05, "loss": 0.9454, "step": 6827 }, { "epoch": 0.39, "grad_norm": 1.684853196144104, "learning_rate": 1.3891954392958877e-05, "loss": 0.9527, "step": 6828 }, { "epoch": 0.39, "grad_norm": 1.7955219745635986, "learning_rate": 1.3890243177271887e-05, "loss": 1.0108, "step": 6829 }, { "epoch": 0.39, "grad_norm": 1.741733193397522, "learning_rate": 1.388853182734347e-05, "loss": 1.0072, "step": 6830 }, { "epoch": 0.39, "grad_norm": 1.7035363912582397, "learning_rate": 1.3886820343232685e-05, "loss": 0.9151, "step": 6831 }, { "epoch": 0.39, "grad_norm": 1.9676433801651, "learning_rate": 1.3885108724998583e-05, "loss": 1.0395, "step": 6832 }, { "epoch": 0.39, "grad_norm": 1.7374156713485718, "learning_rate": 1.3883396972700233e-05, "loss": 1.0525, "step": 6833 }, { "epoch": 0.39, "grad_norm": 1.7043983936309814, "learning_rate": 1.3881685086396704e-05, "loss": 1.0299, "step": 6834 }, { "epoch": 0.39, "grad_norm": 1.656599998474121, "learning_rate": 1.3879973066147063e-05, "loss": 0.9625, "step": 6835 }, { "epoch": 0.39, "grad_norm": 1.7608959674835205, "learning_rate": 1.3878260912010393e-05, "loss": 0.9978, "step": 6836 }, { "epoch": 0.39, "grad_norm": 1.8383724689483643, "learning_rate": 1.3876548624045769e-05, "loss": 1.0015, "step": 6837 }, { "epoch": 0.39, "grad_norm": 1.7328393459320068, "learning_rate": 1.3874836202312283e-05, "loss": 1.0019, "step": 6838 }, { "epoch": 0.39, "grad_norm": 2.0246083736419678, "learning_rate": 1.3873123646869022e-05, "loss": 1.0334, "step": 6839 }, { "epoch": 0.39, "grad_norm": 1.849651575088501, "learning_rate": 1.3871410957775085e-05, "loss": 1.0286, "step": 6840 }, { "epoch": 0.39, "grad_norm": 1.8208962678909302, "learning_rate": 1.3869698135089566e-05, "loss": 0.9502, "step": 6841 }, { "epoch": 0.39, "grad_norm": 1.6560593843460083, "learning_rate": 1.3867985178871579e-05, "loss": 1.0554, "step": 6842 }, { "epoch": 0.39, "grad_norm": 1.6062977313995361, "learning_rate": 1.3866272089180224e-05, "loss": 0.9123, "step": 6843 }, { "epoch": 0.39, "grad_norm": 1.8182982206344604, "learning_rate": 1.3864558866074622e-05, "loss": 1.0288, "step": 6844 }, { "epoch": 0.39, "grad_norm": 1.6653316020965576, "learning_rate": 1.386284550961389e-05, "loss": 1.029, "step": 6845 }, { "epoch": 0.39, "grad_norm": 1.941753625869751, "learning_rate": 1.3861132019857143e-05, "loss": 1.0552, "step": 6846 }, { "epoch": 0.39, "grad_norm": 1.833056092262268, "learning_rate": 1.3859418396863522e-05, "loss": 0.9556, "step": 6847 }, { "epoch": 0.39, "grad_norm": 1.657699704170227, "learning_rate": 1.385770464069215e-05, "loss": 0.9047, "step": 6848 }, { "epoch": 0.39, "grad_norm": 1.9108439683914185, "learning_rate": 1.3855990751402169e-05, "loss": 1.0864, "step": 6849 }, { "epoch": 0.39, "grad_norm": 1.7507915496826172, "learning_rate": 1.3854276729052716e-05, "loss": 0.8967, "step": 6850 }, { "epoch": 0.39, "grad_norm": 1.08841073513031, "learning_rate": 1.3852562573702941e-05, "loss": 0.6313, "step": 6851 }, { "epoch": 0.39, "grad_norm": 1.0323333740234375, "learning_rate": 1.3850848285411994e-05, "loss": 0.5743, "step": 6852 }, { "epoch": 0.39, "grad_norm": 0.9940459728240967, "learning_rate": 1.384913386423903e-05, "loss": 0.5656, "step": 6853 }, { "epoch": 0.39, "grad_norm": 1.0336462259292603, "learning_rate": 1.3847419310243209e-05, "loss": 0.5885, "step": 6854 }, { "epoch": 0.39, "grad_norm": 1.7249271869659424, "learning_rate": 1.3845704623483691e-05, "loss": 0.9527, "step": 6855 }, { "epoch": 0.39, "grad_norm": 1.7896692752838135, "learning_rate": 1.3843989804019653e-05, "loss": 1.0075, "step": 6856 }, { "epoch": 0.39, "grad_norm": 1.8599122762680054, "learning_rate": 1.3842274851910263e-05, "loss": 0.9814, "step": 6857 }, { "epoch": 0.39, "grad_norm": 1.9365019798278809, "learning_rate": 1.38405597672147e-05, "loss": 0.9887, "step": 6858 }, { "epoch": 0.39, "grad_norm": 1.6805779933929443, "learning_rate": 1.383884454999215e-05, "loss": 1.0246, "step": 6859 }, { "epoch": 0.39, "grad_norm": 1.837536096572876, "learning_rate": 1.3837129200301794e-05, "loss": 1.0547, "step": 6860 }, { "epoch": 0.39, "grad_norm": 1.8172812461853027, "learning_rate": 1.3835413718202831e-05, "loss": 0.9238, "step": 6861 }, { "epoch": 0.39, "grad_norm": 1.101880431175232, "learning_rate": 1.3833698103754454e-05, "loss": 0.5785, "step": 6862 }, { "epoch": 0.39, "grad_norm": 1.6543262004852295, "learning_rate": 1.3831982357015866e-05, "loss": 1.0067, "step": 6863 }, { "epoch": 0.39, "grad_norm": 1.8261693716049194, "learning_rate": 1.3830266478046268e-05, "loss": 1.0631, "step": 6864 }, { "epoch": 0.39, "grad_norm": 1.795305609703064, "learning_rate": 1.3828550466904876e-05, "loss": 0.883, "step": 6865 }, { "epoch": 0.39, "grad_norm": 1.6449183225631714, "learning_rate": 1.3826834323650899e-05, "loss": 0.9758, "step": 6866 }, { "epoch": 0.39, "grad_norm": 1.8044474124908447, "learning_rate": 1.3825118048343562e-05, "loss": 1.0641, "step": 6867 }, { "epoch": 0.39, "grad_norm": 1.758386492729187, "learning_rate": 1.3823401641042085e-05, "loss": 0.9946, "step": 6868 }, { "epoch": 0.39, "grad_norm": 1.7067718505859375, "learning_rate": 1.3821685101805697e-05, "loss": 0.9277, "step": 6869 }, { "epoch": 0.39, "grad_norm": 1.7688261270523071, "learning_rate": 1.3819968430693633e-05, "loss": 1.1033, "step": 6870 }, { "epoch": 0.39, "grad_norm": 1.5806578397750854, "learning_rate": 1.3818251627765129e-05, "loss": 1.0465, "step": 6871 }, { "epoch": 0.39, "grad_norm": 1.844901204109192, "learning_rate": 1.3816534693079426e-05, "loss": 0.9981, "step": 6872 }, { "epoch": 0.39, "grad_norm": 1.6712594032287598, "learning_rate": 1.3814817626695771e-05, "loss": 0.9352, "step": 6873 }, { "epoch": 0.39, "grad_norm": 1.9066039323806763, "learning_rate": 1.3813100428673419e-05, "loss": 0.9937, "step": 6874 }, { "epoch": 0.39, "grad_norm": 1.7405380010604858, "learning_rate": 1.3811383099071618e-05, "loss": 0.9602, "step": 6875 }, { "epoch": 0.39, "grad_norm": 1.8275471925735474, "learning_rate": 1.3809665637949636e-05, "loss": 0.9518, "step": 6876 }, { "epoch": 0.39, "grad_norm": 1.7132142782211304, "learning_rate": 1.3807948045366737e-05, "loss": 0.966, "step": 6877 }, { "epoch": 0.39, "grad_norm": 1.9485523700714111, "learning_rate": 1.3806230321382183e-05, "loss": 1.0468, "step": 6878 }, { "epoch": 0.39, "grad_norm": 1.7423691749572754, "learning_rate": 1.3804512466055257e-05, "loss": 0.934, "step": 6879 }, { "epoch": 0.39, "grad_norm": 1.8203736543655396, "learning_rate": 1.3802794479445232e-05, "loss": 1.032, "step": 6880 }, { "epoch": 0.39, "grad_norm": 1.8252575397491455, "learning_rate": 1.3801076361611393e-05, "loss": 1.0588, "step": 6881 }, { "epoch": 0.39, "grad_norm": 1.797673225402832, "learning_rate": 1.3799358112613026e-05, "loss": 1.0269, "step": 6882 }, { "epoch": 0.39, "grad_norm": 1.7945324182510376, "learning_rate": 1.379763973250942e-05, "loss": 1.0038, "step": 6883 }, { "epoch": 0.39, "grad_norm": 1.857056975364685, "learning_rate": 1.3795921221359877e-05, "loss": 1.0242, "step": 6884 }, { "epoch": 0.39, "grad_norm": 1.6810377836227417, "learning_rate": 1.3794202579223699e-05, "loss": 0.9972, "step": 6885 }, { "epoch": 0.39, "grad_norm": 1.825913429260254, "learning_rate": 1.3792483806160188e-05, "loss": 1.0659, "step": 6886 }, { "epoch": 0.39, "grad_norm": 1.655322551727295, "learning_rate": 1.3790764902228653e-05, "loss": 1.0265, "step": 6887 }, { "epoch": 0.4, "grad_norm": 1.776117205619812, "learning_rate": 1.3789045867488411e-05, "loss": 1.0244, "step": 6888 }, { "epoch": 0.4, "grad_norm": 1.7154232263565063, "learning_rate": 1.378732670199878e-05, "loss": 0.9343, "step": 6889 }, { "epoch": 0.4, "grad_norm": 1.900773048400879, "learning_rate": 1.3785607405819085e-05, "loss": 0.8913, "step": 6890 }, { "epoch": 0.4, "grad_norm": 1.7355782985687256, "learning_rate": 1.3783887979008652e-05, "loss": 1.0004, "step": 6891 }, { "epoch": 0.4, "grad_norm": 1.7621214389801025, "learning_rate": 1.3782168421626817e-05, "loss": 1.0453, "step": 6892 }, { "epoch": 0.4, "grad_norm": 1.6618645191192627, "learning_rate": 1.3780448733732911e-05, "loss": 0.9573, "step": 6893 }, { "epoch": 0.4, "grad_norm": 1.9396284818649292, "learning_rate": 1.3778728915386284e-05, "loss": 0.9723, "step": 6894 }, { "epoch": 0.4, "grad_norm": 1.8881758451461792, "learning_rate": 1.3777008966646275e-05, "loss": 0.995, "step": 6895 }, { "epoch": 0.4, "grad_norm": 1.7376147508621216, "learning_rate": 1.3775288887572238e-05, "loss": 1.0451, "step": 6896 }, { "epoch": 0.4, "grad_norm": 1.6251953840255737, "learning_rate": 1.3773568678223525e-05, "loss": 1.0418, "step": 6897 }, { "epoch": 0.4, "grad_norm": 1.9007586240768433, "learning_rate": 1.3771848338659502e-05, "loss": 1.0188, "step": 6898 }, { "epoch": 0.4, "grad_norm": 1.880305528640747, "learning_rate": 1.3770127868939528e-05, "loss": 0.906, "step": 6899 }, { "epoch": 0.4, "grad_norm": 1.8768893480300903, "learning_rate": 1.3768407269122968e-05, "loss": 0.9587, "step": 6900 }, { "epoch": 0.4, "grad_norm": 1.7819699048995972, "learning_rate": 1.3766686539269203e-05, "loss": 0.9231, "step": 6901 }, { "epoch": 0.4, "grad_norm": 1.6596421003341675, "learning_rate": 1.3764965679437608e-05, "loss": 1.0304, "step": 6902 }, { "epoch": 0.4, "grad_norm": 1.8156112432479858, "learning_rate": 1.3763244689687562e-05, "loss": 0.9472, "step": 6903 }, { "epoch": 0.4, "grad_norm": 1.604722261428833, "learning_rate": 1.3761523570078459e-05, "loss": 0.989, "step": 6904 }, { "epoch": 0.4, "grad_norm": 1.7487188577651978, "learning_rate": 1.3759802320669681e-05, "loss": 0.9988, "step": 6905 }, { "epoch": 0.4, "grad_norm": 1.8543919324874878, "learning_rate": 1.3758080941520628e-05, "loss": 0.9908, "step": 6906 }, { "epoch": 0.4, "grad_norm": 1.846685767173767, "learning_rate": 1.37563594326907e-05, "loss": 1.0371, "step": 6907 }, { "epoch": 0.4, "grad_norm": 1.8190960884094238, "learning_rate": 1.3754637794239303e-05, "loss": 1.0285, "step": 6908 }, { "epoch": 0.4, "grad_norm": 1.7319422960281372, "learning_rate": 1.375291602622584e-05, "loss": 0.9807, "step": 6909 }, { "epoch": 0.4, "grad_norm": 1.9516987800598145, "learning_rate": 1.3751194128709731e-05, "loss": 0.9238, "step": 6910 }, { "epoch": 0.4, "grad_norm": 1.894558072090149, "learning_rate": 1.374947210175039e-05, "loss": 1.0307, "step": 6911 }, { "epoch": 0.4, "grad_norm": 1.7581887245178223, "learning_rate": 1.3747749945407238e-05, "loss": 0.9743, "step": 6912 }, { "epoch": 0.4, "grad_norm": 1.0241750478744507, "learning_rate": 1.374602765973971e-05, "loss": 0.6493, "step": 6913 }, { "epoch": 0.4, "grad_norm": 1.7180957794189453, "learning_rate": 1.3744305244807228e-05, "loss": 0.966, "step": 6914 }, { "epoch": 0.4, "grad_norm": 1.8838133811950684, "learning_rate": 1.3742582700669229e-05, "loss": 1.0685, "step": 6915 }, { "epoch": 0.4, "grad_norm": 1.8051873445510864, "learning_rate": 1.374086002738516e-05, "loss": 0.9628, "step": 6916 }, { "epoch": 0.4, "grad_norm": 1.8534681797027588, "learning_rate": 1.3739137225014458e-05, "loss": 0.9043, "step": 6917 }, { "epoch": 0.4, "grad_norm": 1.6830724477767944, "learning_rate": 1.3737414293616575e-05, "loss": 0.9057, "step": 6918 }, { "epoch": 0.4, "grad_norm": 1.7066988945007324, "learning_rate": 1.3735691233250968e-05, "loss": 0.9748, "step": 6919 }, { "epoch": 0.4, "grad_norm": 1.8414219617843628, "learning_rate": 1.373396804397709e-05, "loss": 1.0403, "step": 6920 }, { "epoch": 0.4, "grad_norm": 0.9916975498199463, "learning_rate": 1.3732244725854407e-05, "loss": 0.5958, "step": 6921 }, { "epoch": 0.4, "grad_norm": 1.7530094385147095, "learning_rate": 1.3730521278942382e-05, "loss": 0.9549, "step": 6922 }, { "epoch": 0.4, "grad_norm": 1.7721494436264038, "learning_rate": 1.3728797703300489e-05, "loss": 1.0594, "step": 6923 }, { "epoch": 0.4, "grad_norm": 1.7973453998565674, "learning_rate": 1.3727073998988202e-05, "loss": 0.9808, "step": 6924 }, { "epoch": 0.4, "grad_norm": 1.945422887802124, "learning_rate": 1.3725350166065006e-05, "loss": 0.9587, "step": 6925 }, { "epoch": 0.4, "grad_norm": 1.1564247608184814, "learning_rate": 1.3723626204590376e-05, "loss": 0.6194, "step": 6926 }, { "epoch": 0.4, "grad_norm": 1.943947434425354, "learning_rate": 1.3721902114623812e-05, "loss": 0.9891, "step": 6927 }, { "epoch": 0.4, "grad_norm": 1.7373510599136353, "learning_rate": 1.3720177896224802e-05, "loss": 1.0493, "step": 6928 }, { "epoch": 0.4, "grad_norm": 1.6268665790557861, "learning_rate": 1.3718453549452843e-05, "loss": 0.9699, "step": 6929 }, { "epoch": 0.4, "grad_norm": 1.950081706047058, "learning_rate": 1.3716729074367443e-05, "loss": 0.9893, "step": 6930 }, { "epoch": 0.4, "grad_norm": 1.9322916269302368, "learning_rate": 1.3715004471028101e-05, "loss": 0.9537, "step": 6931 }, { "epoch": 0.4, "grad_norm": 1.6880568265914917, "learning_rate": 1.3713279739494334e-05, "loss": 0.9861, "step": 6932 }, { "epoch": 0.4, "grad_norm": 1.6641559600830078, "learning_rate": 1.3711554879825655e-05, "loss": 0.9366, "step": 6933 }, { "epoch": 0.4, "grad_norm": 1.087035059928894, "learning_rate": 1.3709829892081588e-05, "loss": 0.5984, "step": 6934 }, { "epoch": 0.4, "grad_norm": 1.8527957201004028, "learning_rate": 1.3708104776321652e-05, "loss": 0.9584, "step": 6935 }, { "epoch": 0.4, "grad_norm": 1.5877742767333984, "learning_rate": 1.3706379532605377e-05, "loss": 0.999, "step": 6936 }, { "epoch": 0.4, "grad_norm": 1.744698405265808, "learning_rate": 1.3704654160992298e-05, "loss": 0.977, "step": 6937 }, { "epoch": 0.4, "grad_norm": 1.8975149393081665, "learning_rate": 1.3702928661541955e-05, "loss": 1.0295, "step": 6938 }, { "epoch": 0.4, "grad_norm": 1.8641890287399292, "learning_rate": 1.3701203034313884e-05, "loss": 1.0936, "step": 6939 }, { "epoch": 0.4, "grad_norm": 1.7193855047225952, "learning_rate": 1.3699477279367636e-05, "loss": 0.9429, "step": 6940 }, { "epoch": 0.4, "grad_norm": 1.9784059524536133, "learning_rate": 1.3697751396762762e-05, "loss": 0.9404, "step": 6941 }, { "epoch": 0.4, "grad_norm": 1.6951725482940674, "learning_rate": 1.3696025386558817e-05, "loss": 0.9774, "step": 6942 }, { "epoch": 0.4, "grad_norm": 1.7396291494369507, "learning_rate": 1.3694299248815362e-05, "loss": 0.9084, "step": 6943 }, { "epoch": 0.4, "grad_norm": 1.7987840175628662, "learning_rate": 1.3692572983591957e-05, "loss": 1.0254, "step": 6944 }, { "epoch": 0.4, "grad_norm": 1.7492117881774902, "learning_rate": 1.3690846590948172e-05, "loss": 1.0077, "step": 6945 }, { "epoch": 0.4, "grad_norm": 1.8775699138641357, "learning_rate": 1.3689120070943584e-05, "loss": 1.074, "step": 6946 }, { "epoch": 0.4, "grad_norm": 1.823535680770874, "learning_rate": 1.3687393423637767e-05, "loss": 0.9436, "step": 6947 }, { "epoch": 0.4, "grad_norm": 1.7458266019821167, "learning_rate": 1.3685666649090301e-05, "loss": 0.9932, "step": 6948 }, { "epoch": 0.4, "grad_norm": 1.8429923057556152, "learning_rate": 1.3683939747360776e-05, "loss": 0.938, "step": 6949 }, { "epoch": 0.4, "grad_norm": 1.896311640739441, "learning_rate": 1.3682212718508781e-05, "loss": 1.007, "step": 6950 }, { "epoch": 0.4, "grad_norm": 1.826287031173706, "learning_rate": 1.3680485562593911e-05, "loss": 0.934, "step": 6951 }, { "epoch": 0.4, "grad_norm": 1.824033498764038, "learning_rate": 1.3678758279675766e-05, "loss": 0.9872, "step": 6952 }, { "epoch": 0.4, "grad_norm": 1.7327228784561157, "learning_rate": 1.3677030869813946e-05, "loss": 0.969, "step": 6953 }, { "epoch": 0.4, "grad_norm": 1.8105082511901855, "learning_rate": 1.3675303333068062e-05, "loss": 1.0242, "step": 6954 }, { "epoch": 0.4, "grad_norm": 1.9966410398483276, "learning_rate": 1.3673575669497729e-05, "loss": 0.9841, "step": 6955 }, { "epoch": 0.4, "grad_norm": 1.1057027578353882, "learning_rate": 1.3671847879162562e-05, "loss": 0.6237, "step": 6956 }, { "epoch": 0.4, "grad_norm": 1.9029927253723145, "learning_rate": 1.3670119962122182e-05, "loss": 0.9535, "step": 6957 }, { "epoch": 0.4, "grad_norm": 2.3604249954223633, "learning_rate": 1.3668391918436212e-05, "loss": 0.9971, "step": 6958 }, { "epoch": 0.4, "grad_norm": 1.9180368185043335, "learning_rate": 1.3666663748164286e-05, "loss": 1.1134, "step": 6959 }, { "epoch": 0.4, "grad_norm": 1.919784426689148, "learning_rate": 1.3664935451366035e-05, "loss": 0.9921, "step": 6960 }, { "epoch": 0.4, "grad_norm": 1.7360732555389404, "learning_rate": 1.36632070281011e-05, "loss": 1.0155, "step": 6961 }, { "epoch": 0.4, "grad_norm": 1.751959204673767, "learning_rate": 1.3661478478429123e-05, "loss": 1.0331, "step": 6962 }, { "epoch": 0.4, "grad_norm": 1.7400237321853638, "learning_rate": 1.3659749802409752e-05, "loss": 0.9868, "step": 6963 }, { "epoch": 0.4, "grad_norm": 1.7216694355010986, "learning_rate": 1.3658021000102638e-05, "loss": 0.9858, "step": 6964 }, { "epoch": 0.4, "grad_norm": 1.7637081146240234, "learning_rate": 1.3656292071567436e-05, "loss": 1.0183, "step": 6965 }, { "epoch": 0.4, "grad_norm": 1.9082213640213013, "learning_rate": 1.365456301686381e-05, "loss": 0.9342, "step": 6966 }, { "epoch": 0.4, "grad_norm": 1.762948989868164, "learning_rate": 1.365283383605142e-05, "loss": 0.9563, "step": 6967 }, { "epoch": 0.4, "grad_norm": 1.9059149026870728, "learning_rate": 1.365110452918994e-05, "loss": 1.012, "step": 6968 }, { "epoch": 0.4, "grad_norm": 1.7151412963867188, "learning_rate": 1.3649375096339044e-05, "loss": 0.9545, "step": 6969 }, { "epoch": 0.4, "grad_norm": 1.6515100002288818, "learning_rate": 1.3647645537558406e-05, "loss": 0.955, "step": 6970 }, { "epoch": 0.4, "grad_norm": 1.8940740823745728, "learning_rate": 1.3645915852907709e-05, "loss": 0.9831, "step": 6971 }, { "epoch": 0.4, "grad_norm": 1.8581762313842773, "learning_rate": 1.3644186042446641e-05, "loss": 0.9713, "step": 6972 }, { "epoch": 0.4, "grad_norm": 1.0670125484466553, "learning_rate": 1.364245610623489e-05, "loss": 0.5949, "step": 6973 }, { "epoch": 0.4, "grad_norm": 1.8109681606292725, "learning_rate": 1.3640726044332157e-05, "loss": 1.1458, "step": 6974 }, { "epoch": 0.4, "grad_norm": 1.7546714544296265, "learning_rate": 1.3638995856798138e-05, "loss": 0.9952, "step": 6975 }, { "epoch": 0.4, "grad_norm": 1.6644359827041626, "learning_rate": 1.3637265543692536e-05, "loss": 0.9806, "step": 6976 }, { "epoch": 0.4, "grad_norm": 1.7586390972137451, "learning_rate": 1.363553510507506e-05, "loss": 0.9561, "step": 6977 }, { "epoch": 0.4, "grad_norm": 1.7128090858459473, "learning_rate": 1.3633804541005423e-05, "loss": 0.9075, "step": 6978 }, { "epoch": 0.4, "grad_norm": 1.7180993556976318, "learning_rate": 1.363207385154334e-05, "loss": 1.0022, "step": 6979 }, { "epoch": 0.4, "grad_norm": 1.7044589519500732, "learning_rate": 1.3630343036748536e-05, "loss": 1.0041, "step": 6980 }, { "epoch": 0.4, "grad_norm": 1.9900785684585571, "learning_rate": 1.3628612096680738e-05, "loss": 1.0296, "step": 6981 }, { "epoch": 0.4, "grad_norm": 1.6514198780059814, "learning_rate": 1.3626881031399669e-05, "loss": 1.0069, "step": 6982 }, { "epoch": 0.4, "grad_norm": 1.9177031517028809, "learning_rate": 1.3625149840965066e-05, "loss": 0.9796, "step": 6983 }, { "epoch": 0.4, "grad_norm": 1.8580693006515503, "learning_rate": 1.3623418525436668e-05, "loss": 0.9831, "step": 6984 }, { "epoch": 0.4, "grad_norm": 1.6237668991088867, "learning_rate": 1.3621687084874222e-05, "loss": 0.9995, "step": 6985 }, { "epoch": 0.4, "grad_norm": 1.6746234893798828, "learning_rate": 1.361995551933747e-05, "loss": 0.978, "step": 6986 }, { "epoch": 0.4, "grad_norm": 1.6842522621154785, "learning_rate": 1.3618223828886165e-05, "loss": 1.0357, "step": 6987 }, { "epoch": 0.4, "grad_norm": 1.8021342754364014, "learning_rate": 1.3616492013580063e-05, "loss": 0.9742, "step": 6988 }, { "epoch": 0.4, "grad_norm": 1.9917511940002441, "learning_rate": 1.3614760073478923e-05, "loss": 0.9304, "step": 6989 }, { "epoch": 0.4, "grad_norm": 1.7161378860473633, "learning_rate": 1.3613028008642512e-05, "loss": 0.9298, "step": 6990 }, { "epoch": 0.4, "grad_norm": 1.8116116523742676, "learning_rate": 1.3611295819130597e-05, "loss": 1.0215, "step": 6991 }, { "epoch": 0.4, "grad_norm": 1.8013050556182861, "learning_rate": 1.3609563505002949e-05, "loss": 0.9854, "step": 6992 }, { "epoch": 0.4, "grad_norm": 1.9005085229873657, "learning_rate": 1.3607831066319346e-05, "loss": 1.029, "step": 6993 }, { "epoch": 0.4, "grad_norm": 1.7874274253845215, "learning_rate": 1.3606098503139573e-05, "loss": 0.9384, "step": 6994 }, { "epoch": 0.4, "grad_norm": 1.910658836364746, "learning_rate": 1.3604365815523415e-05, "loss": 1.0032, "step": 6995 }, { "epoch": 0.4, "grad_norm": 1.856735110282898, "learning_rate": 1.3602633003530658e-05, "loss": 0.9484, "step": 6996 }, { "epoch": 0.4, "grad_norm": 1.8812077045440674, "learning_rate": 1.3600900067221103e-05, "loss": 1.0087, "step": 6997 }, { "epoch": 0.4, "grad_norm": 1.8383809328079224, "learning_rate": 1.3599167006654545e-05, "loss": 0.9521, "step": 6998 }, { "epoch": 0.4, "grad_norm": 1.7604742050170898, "learning_rate": 1.3597433821890787e-05, "loss": 0.9567, "step": 6999 }, { "epoch": 0.4, "grad_norm": 1.7840937376022339, "learning_rate": 1.3595700512989635e-05, "loss": 1.0047, "step": 7000 }, { "epoch": 0.4, "grad_norm": 1.9289796352386475, "learning_rate": 1.3593967080010905e-05, "loss": 0.9753, "step": 7001 }, { "epoch": 0.4, "grad_norm": 1.7640876770019531, "learning_rate": 1.359223352301441e-05, "loss": 1.0175, "step": 7002 }, { "epoch": 0.4, "grad_norm": 1.800082802772522, "learning_rate": 1.359049984205997e-05, "loss": 0.9792, "step": 7003 }, { "epoch": 0.4, "grad_norm": 1.99664306640625, "learning_rate": 1.3588766037207411e-05, "loss": 1.1687, "step": 7004 }, { "epoch": 0.4, "grad_norm": 1.6999276876449585, "learning_rate": 1.3587032108516555e-05, "loss": 1.0019, "step": 7005 }, { "epoch": 0.4, "grad_norm": 1.9114924669265747, "learning_rate": 1.3585298056047247e-05, "loss": 0.9636, "step": 7006 }, { "epoch": 0.4, "grad_norm": 1.9935848712921143, "learning_rate": 1.3583563879859318e-05, "loss": 0.9371, "step": 7007 }, { "epoch": 0.4, "grad_norm": 1.8532017469406128, "learning_rate": 1.358182958001261e-05, "loss": 1.0441, "step": 7008 }, { "epoch": 0.4, "grad_norm": 1.7209014892578125, "learning_rate": 1.3580095156566966e-05, "loss": 1.0167, "step": 7009 }, { "epoch": 0.4, "grad_norm": 1.6647123098373413, "learning_rate": 1.3578360609582242e-05, "loss": 0.949, "step": 7010 }, { "epoch": 0.4, "grad_norm": 1.8390610218048096, "learning_rate": 1.3576625939118286e-05, "loss": 1.0074, "step": 7011 }, { "epoch": 0.4, "grad_norm": 1.6940075159072876, "learning_rate": 1.3574891145234962e-05, "loss": 0.9493, "step": 7012 }, { "epoch": 0.4, "grad_norm": 1.7103420495986938, "learning_rate": 1.357315622799213e-05, "loss": 0.9497, "step": 7013 }, { "epoch": 0.4, "grad_norm": 1.854063630104065, "learning_rate": 1.3571421187449656e-05, "loss": 1.0326, "step": 7014 }, { "epoch": 0.4, "grad_norm": 1.7468311786651611, "learning_rate": 1.3569686023667415e-05, "loss": 1.0708, "step": 7015 }, { "epoch": 0.4, "grad_norm": 1.913588523864746, "learning_rate": 1.356795073670528e-05, "loss": 1.0091, "step": 7016 }, { "epoch": 0.4, "grad_norm": 1.942193865776062, "learning_rate": 1.3566215326623131e-05, "loss": 0.9449, "step": 7017 }, { "epoch": 0.4, "grad_norm": 1.7962298393249512, "learning_rate": 1.3564479793480856e-05, "loss": 1.0253, "step": 7018 }, { "epoch": 0.4, "grad_norm": 1.058428168296814, "learning_rate": 1.3562744137338336e-05, "loss": 0.602, "step": 7019 }, { "epoch": 0.4, "grad_norm": 1.857191801071167, "learning_rate": 1.356100835825547e-05, "loss": 0.9692, "step": 7020 }, { "epoch": 0.4, "grad_norm": 1.7414098978042603, "learning_rate": 1.3559272456292153e-05, "loss": 0.9871, "step": 7021 }, { "epoch": 0.4, "grad_norm": 1.7420209646224976, "learning_rate": 1.3557536431508287e-05, "loss": 0.9514, "step": 7022 }, { "epoch": 0.4, "grad_norm": 1.7479898929595947, "learning_rate": 1.3555800283963775e-05, "loss": 0.9887, "step": 7023 }, { "epoch": 0.4, "grad_norm": 1.7601523399353027, "learning_rate": 1.3554064013718528e-05, "loss": 1.0652, "step": 7024 }, { "epoch": 0.4, "grad_norm": 1.8693417310714722, "learning_rate": 1.3552327620832461e-05, "loss": 0.9381, "step": 7025 }, { "epoch": 0.4, "grad_norm": 1.7442224025726318, "learning_rate": 1.3550591105365492e-05, "loss": 1.0031, "step": 7026 }, { "epoch": 0.4, "grad_norm": 1.6652826070785522, "learning_rate": 1.354885446737754e-05, "loss": 0.9465, "step": 7027 }, { "epoch": 0.4, "grad_norm": 1.6905291080474854, "learning_rate": 1.3547117706928532e-05, "loss": 0.9896, "step": 7028 }, { "epoch": 0.4, "grad_norm": 1.7153137922286987, "learning_rate": 1.3545380824078403e-05, "loss": 1.0494, "step": 7029 }, { "epoch": 0.4, "grad_norm": 1.7378743886947632, "learning_rate": 1.3543643818887084e-05, "loss": 0.9866, "step": 7030 }, { "epoch": 0.4, "grad_norm": 1.6543859243392944, "learning_rate": 1.3541906691414517e-05, "loss": 0.9887, "step": 7031 }, { "epoch": 0.4, "grad_norm": 1.9482614994049072, "learning_rate": 1.3540169441720641e-05, "loss": 1.0719, "step": 7032 }, { "epoch": 0.4, "grad_norm": 1.8409395217895508, "learning_rate": 1.3538432069865408e-05, "loss": 0.925, "step": 7033 }, { "epoch": 0.4, "grad_norm": 1.8252531290054321, "learning_rate": 1.353669457590877e-05, "loss": 1.0035, "step": 7034 }, { "epoch": 0.4, "grad_norm": 1.8132827281951904, "learning_rate": 1.3534956959910682e-05, "loss": 0.9581, "step": 7035 }, { "epoch": 0.4, "grad_norm": 1.6536005735397339, "learning_rate": 1.3533219221931102e-05, "loss": 0.9243, "step": 7036 }, { "epoch": 0.4, "grad_norm": 1.0901060104370117, "learning_rate": 1.3531481362029997e-05, "loss": 0.6558, "step": 7037 }, { "epoch": 0.4, "grad_norm": 1.8233550786972046, "learning_rate": 1.3529743380267335e-05, "loss": 0.9779, "step": 7038 }, { "epoch": 0.4, "grad_norm": 1.0900726318359375, "learning_rate": 1.3528005276703089e-05, "loss": 0.5776, "step": 7039 }, { "epoch": 0.4, "grad_norm": 1.9141634702682495, "learning_rate": 1.3526267051397235e-05, "loss": 1.0117, "step": 7040 }, { "epoch": 0.4, "grad_norm": 1.820834994316101, "learning_rate": 1.3524528704409759e-05, "loss": 1.0554, "step": 7041 }, { "epoch": 0.4, "grad_norm": 1.6915125846862793, "learning_rate": 1.3522790235800638e-05, "loss": 0.966, "step": 7042 }, { "epoch": 0.4, "grad_norm": 1.0108846426010132, "learning_rate": 1.3521051645629867e-05, "loss": 0.6209, "step": 7043 }, { "epoch": 0.4, "grad_norm": 1.7411608695983887, "learning_rate": 1.351931293395744e-05, "loss": 0.966, "step": 7044 }, { "epoch": 0.4, "grad_norm": 0.961833119392395, "learning_rate": 1.3517574100843356e-05, "loss": 0.4893, "step": 7045 }, { "epoch": 0.4, "grad_norm": 1.1222561597824097, "learning_rate": 1.3515835146347616e-05, "loss": 0.6678, "step": 7046 }, { "epoch": 0.4, "grad_norm": 1.9756367206573486, "learning_rate": 1.3514096070530225e-05, "loss": 0.9883, "step": 7047 }, { "epoch": 0.4, "grad_norm": 1.9438972473144531, "learning_rate": 1.3512356873451191e-05, "loss": 1.0327, "step": 7048 }, { "epoch": 0.4, "grad_norm": 1.832377552986145, "learning_rate": 1.3510617555170538e-05, "loss": 1.0228, "step": 7049 }, { "epoch": 0.4, "grad_norm": 1.6580421924591064, "learning_rate": 1.3508878115748279e-05, "loss": 0.9652, "step": 7050 }, { "epoch": 0.4, "grad_norm": 1.6248708963394165, "learning_rate": 1.3507138555244436e-05, "loss": 0.9789, "step": 7051 }, { "epoch": 0.4, "grad_norm": 1.8159586191177368, "learning_rate": 1.350539887371904e-05, "loss": 0.965, "step": 7052 }, { "epoch": 0.4, "grad_norm": 1.7084332704544067, "learning_rate": 1.350365907123212e-05, "loss": 1.0746, "step": 7053 }, { "epoch": 0.4, "grad_norm": 1.7992106676101685, "learning_rate": 1.3501919147843715e-05, "loss": 0.9973, "step": 7054 }, { "epoch": 0.4, "grad_norm": 1.803873896598816, "learning_rate": 1.350017910361386e-05, "loss": 0.926, "step": 7055 }, { "epoch": 0.4, "grad_norm": 2.045374631881714, "learning_rate": 1.3498438938602601e-05, "loss": 1.0142, "step": 7056 }, { "epoch": 0.4, "grad_norm": 1.6891943216323853, "learning_rate": 1.3496698652869985e-05, "loss": 0.9741, "step": 7057 }, { "epoch": 0.4, "grad_norm": 1.6306461095809937, "learning_rate": 1.3494958246476071e-05, "loss": 1.007, "step": 7058 }, { "epoch": 0.4, "grad_norm": 1.6677608489990234, "learning_rate": 1.3493217719480907e-05, "loss": 1.0304, "step": 7059 }, { "epoch": 0.4, "grad_norm": 1.9547065496444702, "learning_rate": 1.349147707194456e-05, "loss": 0.9932, "step": 7060 }, { "epoch": 0.4, "grad_norm": 1.8835636377334595, "learning_rate": 1.3489736303927088e-05, "loss": 1.0529, "step": 7061 }, { "epoch": 0.41, "grad_norm": 1.9034584760665894, "learning_rate": 1.3487995415488568e-05, "loss": 0.9915, "step": 7062 }, { "epoch": 0.41, "grad_norm": 1.7705930471420288, "learning_rate": 1.348625440668907e-05, "loss": 0.9218, "step": 7063 }, { "epoch": 0.41, "grad_norm": 1.8528423309326172, "learning_rate": 1.3484513277588668e-05, "loss": 0.9631, "step": 7064 }, { "epoch": 0.41, "grad_norm": 1.9423719644546509, "learning_rate": 1.3482772028247448e-05, "loss": 0.9791, "step": 7065 }, { "epoch": 0.41, "grad_norm": 1.749715805053711, "learning_rate": 1.3481030658725496e-05, "loss": 0.9833, "step": 7066 }, { "epoch": 0.41, "grad_norm": 1.7337987422943115, "learning_rate": 1.3479289169082899e-05, "loss": 0.9834, "step": 7067 }, { "epoch": 0.41, "grad_norm": 1.6208419799804688, "learning_rate": 1.3477547559379748e-05, "loss": 0.926, "step": 7068 }, { "epoch": 0.41, "grad_norm": 1.850780725479126, "learning_rate": 1.3475805829676149e-05, "loss": 0.9694, "step": 7069 }, { "epoch": 0.41, "grad_norm": 1.962349534034729, "learning_rate": 1.34740639800322e-05, "loss": 1.0264, "step": 7070 }, { "epoch": 0.41, "grad_norm": 1.781683087348938, "learning_rate": 1.3472322010508003e-05, "loss": 0.9417, "step": 7071 }, { "epoch": 0.41, "grad_norm": 1.6838470697402954, "learning_rate": 1.3470579921163675e-05, "loss": 1.0583, "step": 7072 }, { "epoch": 0.41, "grad_norm": 1.8170595169067383, "learning_rate": 1.3468837712059331e-05, "loss": 1.0239, "step": 7073 }, { "epoch": 0.41, "grad_norm": 1.7725492715835571, "learning_rate": 1.3467095383255087e-05, "loss": 1.0163, "step": 7074 }, { "epoch": 0.41, "grad_norm": 1.7359145879745483, "learning_rate": 1.3465352934811065e-05, "loss": 0.9067, "step": 7075 }, { "epoch": 0.41, "grad_norm": 2.535945177078247, "learning_rate": 1.3463610366787392e-05, "loss": 1.078, "step": 7076 }, { "epoch": 0.41, "grad_norm": 1.7793300151824951, "learning_rate": 1.3461867679244203e-05, "loss": 0.9522, "step": 7077 }, { "epoch": 0.41, "grad_norm": 1.8639154434204102, "learning_rate": 1.346012487224163e-05, "loss": 1.0918, "step": 7078 }, { "epoch": 0.41, "grad_norm": 1.718047857284546, "learning_rate": 1.3458381945839814e-05, "loss": 1.0124, "step": 7079 }, { "epoch": 0.41, "grad_norm": 1.898523211479187, "learning_rate": 1.3456638900098895e-05, "loss": 0.9628, "step": 7080 }, { "epoch": 0.41, "grad_norm": 1.6874685287475586, "learning_rate": 1.3454895735079024e-05, "loss": 0.9438, "step": 7081 }, { "epoch": 0.41, "grad_norm": 1.7769930362701416, "learning_rate": 1.3453152450840353e-05, "loss": 1.0077, "step": 7082 }, { "epoch": 0.41, "grad_norm": 1.7403358221054077, "learning_rate": 1.3451409047443036e-05, "loss": 0.9738, "step": 7083 }, { "epoch": 0.41, "grad_norm": 1.5668665170669556, "learning_rate": 1.3449665524947234e-05, "loss": 0.9835, "step": 7084 }, { "epoch": 0.41, "grad_norm": 1.709720492362976, "learning_rate": 1.3447921883413114e-05, "loss": 1.0087, "step": 7085 }, { "epoch": 0.41, "grad_norm": 1.7522914409637451, "learning_rate": 1.3446178122900837e-05, "loss": 1.0208, "step": 7086 }, { "epoch": 0.41, "grad_norm": 1.8360804319381714, "learning_rate": 1.344443424347058e-05, "loss": 1.0475, "step": 7087 }, { "epoch": 0.41, "grad_norm": 1.9595894813537598, "learning_rate": 1.3442690245182521e-05, "loss": 1.0712, "step": 7088 }, { "epoch": 0.41, "grad_norm": 1.747092604637146, "learning_rate": 1.3440946128096836e-05, "loss": 0.9465, "step": 7089 }, { "epoch": 0.41, "grad_norm": 1.1992155313491821, "learning_rate": 1.3439201892273715e-05, "loss": 0.6542, "step": 7090 }, { "epoch": 0.41, "grad_norm": 1.942929744720459, "learning_rate": 1.3437457537773341e-05, "loss": 1.0208, "step": 7091 }, { "epoch": 0.41, "grad_norm": 2.0027027130126953, "learning_rate": 1.3435713064655913e-05, "loss": 1.0236, "step": 7092 }, { "epoch": 0.41, "grad_norm": 1.7496685981750488, "learning_rate": 1.3433968472981622e-05, "loss": 0.9923, "step": 7093 }, { "epoch": 0.41, "grad_norm": 1.9361763000488281, "learning_rate": 1.3432223762810672e-05, "loss": 1.0381, "step": 7094 }, { "epoch": 0.41, "grad_norm": 1.713435173034668, "learning_rate": 1.3430478934203265e-05, "loss": 1.0205, "step": 7095 }, { "epoch": 0.41, "grad_norm": 2.0750579833984375, "learning_rate": 1.3428733987219618e-05, "loss": 1.022, "step": 7096 }, { "epoch": 0.41, "grad_norm": 1.9263370037078857, "learning_rate": 1.3426988921919934e-05, "loss": 0.9751, "step": 7097 }, { "epoch": 0.41, "grad_norm": 1.691001296043396, "learning_rate": 1.3425243738364435e-05, "loss": 0.9429, "step": 7098 }, { "epoch": 0.41, "grad_norm": 1.7930668592453003, "learning_rate": 1.3423498436613347e-05, "loss": 0.938, "step": 7099 }, { "epoch": 0.41, "grad_norm": 1.732703447341919, "learning_rate": 1.3421753016726889e-05, "loss": 0.9909, "step": 7100 }, { "epoch": 0.41, "grad_norm": 1.6890366077423096, "learning_rate": 1.3420007478765291e-05, "loss": 0.9478, "step": 7101 }, { "epoch": 0.41, "grad_norm": 1.833478569984436, "learning_rate": 1.3418261822788789e-05, "loss": 0.9757, "step": 7102 }, { "epoch": 0.41, "grad_norm": 1.6822532415390015, "learning_rate": 1.3416516048857623e-05, "loss": 0.8885, "step": 7103 }, { "epoch": 0.41, "grad_norm": 1.7534940242767334, "learning_rate": 1.3414770157032026e-05, "loss": 0.9465, "step": 7104 }, { "epoch": 0.41, "grad_norm": 1.797637701034546, "learning_rate": 1.3413024147372256e-05, "loss": 0.9854, "step": 7105 }, { "epoch": 0.41, "grad_norm": 1.754135251045227, "learning_rate": 1.3411278019938552e-05, "loss": 1.0972, "step": 7106 }, { "epoch": 0.41, "grad_norm": 1.7949976921081543, "learning_rate": 1.3409531774791175e-05, "loss": 0.9662, "step": 7107 }, { "epoch": 0.41, "grad_norm": 1.7370597124099731, "learning_rate": 1.340778541199038e-05, "loss": 0.9612, "step": 7108 }, { "epoch": 0.41, "grad_norm": 1.7453358173370361, "learning_rate": 1.340603893159643e-05, "loss": 0.9048, "step": 7109 }, { "epoch": 0.41, "grad_norm": 1.835410475730896, "learning_rate": 1.3404292333669588e-05, "loss": 0.8921, "step": 7110 }, { "epoch": 0.41, "grad_norm": 1.8565512895584106, "learning_rate": 1.3402545618270128e-05, "loss": 1.0153, "step": 7111 }, { "epoch": 0.41, "grad_norm": 1.8340508937835693, "learning_rate": 1.3400798785458326e-05, "loss": 1.0048, "step": 7112 }, { "epoch": 0.41, "grad_norm": 1.602967619895935, "learning_rate": 1.339905183529446e-05, "loss": 0.9358, "step": 7113 }, { "epoch": 0.41, "grad_norm": 1.698876142501831, "learning_rate": 1.3397304767838801e-05, "loss": 1.0939, "step": 7114 }, { "epoch": 0.41, "grad_norm": 1.7387785911560059, "learning_rate": 1.339555758315165e-05, "loss": 0.999, "step": 7115 }, { "epoch": 0.41, "grad_norm": 1.1303051710128784, "learning_rate": 1.3393810281293294e-05, "loss": 0.6442, "step": 7116 }, { "epoch": 0.41, "grad_norm": 1.8230528831481934, "learning_rate": 1.3392062862324023e-05, "loss": 0.9495, "step": 7117 }, { "epoch": 0.41, "grad_norm": 1.907477855682373, "learning_rate": 1.3390315326304138e-05, "loss": 1.0355, "step": 7118 }, { "epoch": 0.41, "grad_norm": 1.5617702007293701, "learning_rate": 1.3388567673293942e-05, "loss": 0.9178, "step": 7119 }, { "epoch": 0.41, "grad_norm": 1.7005559206008911, "learning_rate": 1.338681990335374e-05, "loss": 1.031, "step": 7120 }, { "epoch": 0.41, "grad_norm": 2.0625462532043457, "learning_rate": 1.3385072016543846e-05, "loss": 0.9813, "step": 7121 }, { "epoch": 0.41, "grad_norm": 2.018613338470459, "learning_rate": 1.3383324012924571e-05, "loss": 1.0617, "step": 7122 }, { "epoch": 0.41, "grad_norm": 1.6336359977722168, "learning_rate": 1.3381575892556236e-05, "loss": 1.0361, "step": 7123 }, { "epoch": 0.41, "grad_norm": 1.5899081230163574, "learning_rate": 1.3379827655499163e-05, "loss": 1.0524, "step": 7124 }, { "epoch": 0.41, "grad_norm": 1.9728938341140747, "learning_rate": 1.3378079301813676e-05, "loss": 1.0791, "step": 7125 }, { "epoch": 0.41, "grad_norm": 1.9653327465057373, "learning_rate": 1.3376330831560111e-05, "loss": 1.0768, "step": 7126 }, { "epoch": 0.41, "grad_norm": 1.6958613395690918, "learning_rate": 1.33745822447988e-05, "loss": 0.9449, "step": 7127 }, { "epoch": 0.41, "grad_norm": 1.6509110927581787, "learning_rate": 1.3372833541590082e-05, "loss": 1.0184, "step": 7128 }, { "epoch": 0.41, "grad_norm": 1.7942200899124146, "learning_rate": 1.33710847219943e-05, "loss": 1.0238, "step": 7129 }, { "epoch": 0.41, "grad_norm": 1.6847031116485596, "learning_rate": 1.3369335786071805e-05, "loss": 0.9873, "step": 7130 }, { "epoch": 0.41, "grad_norm": 1.8261550664901733, "learning_rate": 1.3367586733882941e-05, "loss": 1.008, "step": 7131 }, { "epoch": 0.41, "grad_norm": 1.85873544216156, "learning_rate": 1.3365837565488065e-05, "loss": 1.0458, "step": 7132 }, { "epoch": 0.41, "grad_norm": 1.6591531038284302, "learning_rate": 1.3364088280947535e-05, "loss": 0.9591, "step": 7133 }, { "epoch": 0.41, "grad_norm": 1.7034783363342285, "learning_rate": 1.336233888032172e-05, "loss": 0.9304, "step": 7134 }, { "epoch": 0.41, "grad_norm": 2.0323245525360107, "learning_rate": 1.3360589363670979e-05, "loss": 1.0623, "step": 7135 }, { "epoch": 0.41, "grad_norm": 1.8291207551956177, "learning_rate": 1.3358839731055688e-05, "loss": 0.9202, "step": 7136 }, { "epoch": 0.41, "grad_norm": 1.7845864295959473, "learning_rate": 1.3357089982536217e-05, "loss": 0.9608, "step": 7137 }, { "epoch": 0.41, "grad_norm": 1.5888057947158813, "learning_rate": 1.3355340118172953e-05, "loss": 0.964, "step": 7138 }, { "epoch": 0.41, "grad_norm": 1.9319008588790894, "learning_rate": 1.3353590138026273e-05, "loss": 1.0629, "step": 7139 }, { "epoch": 0.41, "grad_norm": 1.931626558303833, "learning_rate": 1.3351840042156565e-05, "loss": 0.9517, "step": 7140 }, { "epoch": 0.41, "grad_norm": 1.8275007009506226, "learning_rate": 1.335008983062422e-05, "loss": 0.9566, "step": 7141 }, { "epoch": 0.41, "grad_norm": 1.721826195716858, "learning_rate": 1.3348339503489634e-05, "loss": 0.9645, "step": 7142 }, { "epoch": 0.41, "grad_norm": 1.6183193922042847, "learning_rate": 1.3346589060813205e-05, "loss": 0.9714, "step": 7143 }, { "epoch": 0.41, "grad_norm": 1.7025712728500366, "learning_rate": 1.3344838502655333e-05, "loss": 1.0471, "step": 7144 }, { "epoch": 0.41, "grad_norm": 1.911240577697754, "learning_rate": 1.334308782907643e-05, "loss": 1.0072, "step": 7145 }, { "epoch": 0.41, "grad_norm": 1.7872273921966553, "learning_rate": 1.3341337040136905e-05, "loss": 0.9913, "step": 7146 }, { "epoch": 0.41, "grad_norm": 1.749800443649292, "learning_rate": 1.3339586135897168e-05, "loss": 1.0408, "step": 7147 }, { "epoch": 0.41, "grad_norm": 2.014974594116211, "learning_rate": 1.3337835116417649e-05, "loss": 0.9006, "step": 7148 }, { "epoch": 0.41, "grad_norm": 1.8732120990753174, "learning_rate": 1.3336083981758758e-05, "loss": 0.9283, "step": 7149 }, { "epoch": 0.41, "grad_norm": 1.8451341390609741, "learning_rate": 1.3334332731980933e-05, "loss": 1.004, "step": 7150 }, { "epoch": 0.41, "grad_norm": 1.6996300220489502, "learning_rate": 1.3332581367144598e-05, "loss": 0.991, "step": 7151 }, { "epoch": 0.41, "grad_norm": 1.7470327615737915, "learning_rate": 1.3330829887310186e-05, "loss": 1.0038, "step": 7152 }, { "epoch": 0.41, "grad_norm": 1.8499749898910522, "learning_rate": 1.332907829253814e-05, "loss": 0.9096, "step": 7153 }, { "epoch": 0.41, "grad_norm": 1.6973888874053955, "learning_rate": 1.3327326582888902e-05, "loss": 1.0011, "step": 7154 }, { "epoch": 0.41, "grad_norm": 1.8498083353042603, "learning_rate": 1.3325574758422919e-05, "loss": 1.0434, "step": 7155 }, { "epoch": 0.41, "grad_norm": 1.9180599451065063, "learning_rate": 1.3323822819200642e-05, "loss": 0.9956, "step": 7156 }, { "epoch": 0.41, "grad_norm": 1.7870728969573975, "learning_rate": 1.3322070765282522e-05, "loss": 0.9784, "step": 7157 }, { "epoch": 0.41, "grad_norm": 1.5605841875076294, "learning_rate": 1.332031859672902e-05, "loss": 0.9699, "step": 7158 }, { "epoch": 0.41, "grad_norm": 1.1336910724639893, "learning_rate": 1.33185663136006e-05, "loss": 0.6108, "step": 7159 }, { "epoch": 0.41, "grad_norm": 1.9131513833999634, "learning_rate": 1.3316813915957724e-05, "loss": 0.9672, "step": 7160 }, { "epoch": 0.41, "grad_norm": 1.8049116134643555, "learning_rate": 1.3315061403860868e-05, "loss": 0.9577, "step": 7161 }, { "epoch": 0.41, "grad_norm": 1.9845693111419678, "learning_rate": 1.3313308777370502e-05, "loss": 0.9679, "step": 7162 }, { "epoch": 0.41, "grad_norm": 1.634359359741211, "learning_rate": 1.3311556036547104e-05, "loss": 0.9336, "step": 7163 }, { "epoch": 0.41, "grad_norm": 1.7526347637176514, "learning_rate": 1.3309803181451155e-05, "loss": 1.0303, "step": 7164 }, { "epoch": 0.41, "grad_norm": 2.046959161758423, "learning_rate": 1.330805021214315e-05, "loss": 1.0261, "step": 7165 }, { "epoch": 0.41, "grad_norm": 1.863598108291626, "learning_rate": 1.330629712868357e-05, "loss": 1.083, "step": 7166 }, { "epoch": 0.41, "grad_norm": 2.0913314819335938, "learning_rate": 1.330454393113291e-05, "loss": 0.9598, "step": 7167 }, { "epoch": 0.41, "grad_norm": 1.0542597770690918, "learning_rate": 1.3302790619551673e-05, "loss": 0.5728, "step": 7168 }, { "epoch": 0.41, "grad_norm": 1.8831356763839722, "learning_rate": 1.3301037194000355e-05, "loss": 1.0135, "step": 7169 }, { "epoch": 0.41, "grad_norm": 1.7314691543579102, "learning_rate": 1.3299283654539467e-05, "loss": 1.0009, "step": 7170 }, { "epoch": 0.41, "grad_norm": 1.7369725704193115, "learning_rate": 1.3297530001229515e-05, "loss": 0.9467, "step": 7171 }, { "epoch": 0.41, "grad_norm": 1.62538480758667, "learning_rate": 1.3295776234131015e-05, "loss": 0.9317, "step": 7172 }, { "epoch": 0.41, "grad_norm": 2.219205141067505, "learning_rate": 1.3294022353304481e-05, "loss": 1.016, "step": 7173 }, { "epoch": 0.41, "grad_norm": 1.77105712890625, "learning_rate": 1.329226835881044e-05, "loss": 1.0153, "step": 7174 }, { "epoch": 0.41, "grad_norm": 1.790759563446045, "learning_rate": 1.3290514250709414e-05, "loss": 0.9287, "step": 7175 }, { "epoch": 0.41, "grad_norm": 1.7438914775848389, "learning_rate": 1.3288760029061929e-05, "loss": 0.9405, "step": 7176 }, { "epoch": 0.41, "grad_norm": 1.7851142883300781, "learning_rate": 1.3287005693928525e-05, "loss": 1.0582, "step": 7177 }, { "epoch": 0.41, "grad_norm": 1.8819966316223145, "learning_rate": 1.3285251245369736e-05, "loss": 0.9538, "step": 7178 }, { "epoch": 0.41, "grad_norm": 1.8589634895324707, "learning_rate": 1.3283496683446106e-05, "loss": 0.984, "step": 7179 }, { "epoch": 0.41, "grad_norm": 1.6994068622589111, "learning_rate": 1.3281742008218173e-05, "loss": 0.9692, "step": 7180 }, { "epoch": 0.41, "grad_norm": 1.7802623510360718, "learning_rate": 1.3279987219746495e-05, "loss": 1.0179, "step": 7181 }, { "epoch": 0.41, "grad_norm": 1.731086254119873, "learning_rate": 1.3278232318091618e-05, "loss": 1.0565, "step": 7182 }, { "epoch": 0.41, "grad_norm": 1.723964810371399, "learning_rate": 1.3276477303314102e-05, "loss": 0.9658, "step": 7183 }, { "epoch": 0.41, "grad_norm": 1.7675085067749023, "learning_rate": 1.3274722175474505e-05, "loss": 1.0576, "step": 7184 }, { "epoch": 0.41, "grad_norm": 1.9074996709823608, "learning_rate": 1.3272966934633396e-05, "loss": 1.117, "step": 7185 }, { "epoch": 0.41, "grad_norm": 1.7390044927597046, "learning_rate": 1.327121158085134e-05, "loss": 0.9902, "step": 7186 }, { "epoch": 0.41, "grad_norm": 1.738469123840332, "learning_rate": 1.3269456114188908e-05, "loss": 0.9622, "step": 7187 }, { "epoch": 0.41, "grad_norm": 1.7782527208328247, "learning_rate": 1.326770053470668e-05, "loss": 1.0451, "step": 7188 }, { "epoch": 0.41, "grad_norm": 1.7204747200012207, "learning_rate": 1.3265944842465236e-05, "loss": 1.0044, "step": 7189 }, { "epoch": 0.41, "grad_norm": 1.970015287399292, "learning_rate": 1.3264189037525154e-05, "loss": 1.0453, "step": 7190 }, { "epoch": 0.41, "grad_norm": 1.6674587726593018, "learning_rate": 1.3262433119947028e-05, "loss": 1.0404, "step": 7191 }, { "epoch": 0.41, "grad_norm": 1.6568924188613892, "learning_rate": 1.3260677089791449e-05, "loss": 0.9335, "step": 7192 }, { "epoch": 0.41, "grad_norm": 1.9165407419204712, "learning_rate": 1.3258920947119013e-05, "loss": 0.944, "step": 7193 }, { "epoch": 0.41, "grad_norm": 1.6607844829559326, "learning_rate": 1.3257164691990321e-05, "loss": 1.0182, "step": 7194 }, { "epoch": 0.41, "grad_norm": 1.7086931467056274, "learning_rate": 1.3255408324465971e-05, "loss": 1.005, "step": 7195 }, { "epoch": 0.41, "grad_norm": 1.8508728742599487, "learning_rate": 1.3253651844606571e-05, "loss": 1.021, "step": 7196 }, { "epoch": 0.41, "grad_norm": 1.7938482761383057, "learning_rate": 1.3251895252472738e-05, "loss": 0.9506, "step": 7197 }, { "epoch": 0.41, "grad_norm": 1.7508562803268433, "learning_rate": 1.3250138548125082e-05, "loss": 1.0347, "step": 7198 }, { "epoch": 0.41, "grad_norm": 1.7429879903793335, "learning_rate": 1.3248381731624225e-05, "loss": 0.9703, "step": 7199 }, { "epoch": 0.41, "grad_norm": 1.802351713180542, "learning_rate": 1.3246624803030787e-05, "loss": 1.0595, "step": 7200 }, { "epoch": 0.41, "grad_norm": 2.117609739303589, "learning_rate": 1.3244867762405398e-05, "loss": 1.0867, "step": 7201 }, { "epoch": 0.41, "grad_norm": 1.744667649269104, "learning_rate": 1.3243110609808685e-05, "loss": 0.8805, "step": 7202 }, { "epoch": 0.41, "grad_norm": 1.7889891862869263, "learning_rate": 1.3241353345301282e-05, "loss": 1.0372, "step": 7203 }, { "epoch": 0.41, "grad_norm": 1.8246865272521973, "learning_rate": 1.3239595968943832e-05, "loss": 1.0495, "step": 7204 }, { "epoch": 0.41, "grad_norm": 1.7911354303359985, "learning_rate": 1.3237838480796976e-05, "loss": 0.9979, "step": 7205 }, { "epoch": 0.41, "grad_norm": 1.9357727766036987, "learning_rate": 1.3236080880921355e-05, "loss": 0.9865, "step": 7206 }, { "epoch": 0.41, "grad_norm": 1.9034751653671265, "learning_rate": 1.3234323169377627e-05, "loss": 0.9316, "step": 7207 }, { "epoch": 0.41, "grad_norm": 1.8116472959518433, "learning_rate": 1.3232565346226439e-05, "loss": 0.9508, "step": 7208 }, { "epoch": 0.41, "grad_norm": 1.5838007926940918, "learning_rate": 1.323080741152845e-05, "loss": 0.9717, "step": 7209 }, { "epoch": 0.41, "grad_norm": 1.7649577856063843, "learning_rate": 1.3229049365344322e-05, "loss": 0.9972, "step": 7210 }, { "epoch": 0.41, "grad_norm": 1.8748691082000732, "learning_rate": 1.322729120773472e-05, "loss": 1.024, "step": 7211 }, { "epoch": 0.41, "grad_norm": 1.8980518579483032, "learning_rate": 1.3225532938760317e-05, "loss": 0.9375, "step": 7212 }, { "epoch": 0.41, "grad_norm": 1.8921834230422974, "learning_rate": 1.3223774558481776e-05, "loss": 0.9723, "step": 7213 }, { "epoch": 0.41, "grad_norm": 1.560067057609558, "learning_rate": 1.3222016066959786e-05, "loss": 0.8823, "step": 7214 }, { "epoch": 0.41, "grad_norm": 1.986478328704834, "learning_rate": 1.322025746425502e-05, "loss": 0.9961, "step": 7215 }, { "epoch": 0.41, "grad_norm": 1.8217487335205078, "learning_rate": 1.3218498750428164e-05, "loss": 1.0161, "step": 7216 }, { "epoch": 0.41, "grad_norm": 1.6017341613769531, "learning_rate": 1.3216739925539908e-05, "loss": 0.9567, "step": 7217 }, { "epoch": 0.41, "grad_norm": 1.0322140455245972, "learning_rate": 1.3214980989650939e-05, "loss": 0.5558, "step": 7218 }, { "epoch": 0.41, "grad_norm": 1.8201757669448853, "learning_rate": 1.3213221942821958e-05, "loss": 0.9655, "step": 7219 }, { "epoch": 0.41, "grad_norm": 2.087665557861328, "learning_rate": 1.3211462785113666e-05, "loss": 0.9508, "step": 7220 }, { "epoch": 0.41, "grad_norm": 1.868225336074829, "learning_rate": 1.3209703516586763e-05, "loss": 1.0054, "step": 7221 }, { "epoch": 0.41, "grad_norm": 1.1085224151611328, "learning_rate": 1.3207944137301958e-05, "loss": 0.6108, "step": 7222 }, { "epoch": 0.41, "grad_norm": 1.904455542564392, "learning_rate": 1.3206184647319961e-05, "loss": 1.0375, "step": 7223 }, { "epoch": 0.41, "grad_norm": 1.720018744468689, "learning_rate": 1.3204425046701487e-05, "loss": 1.0041, "step": 7224 }, { "epoch": 0.41, "grad_norm": 1.8209823369979858, "learning_rate": 1.3202665335507261e-05, "loss": 1.0151, "step": 7225 }, { "epoch": 0.41, "grad_norm": 1.7863492965698242, "learning_rate": 1.3200905513797997e-05, "loss": 1.0613, "step": 7226 }, { "epoch": 0.41, "grad_norm": 1.901233434677124, "learning_rate": 1.3199145581634425e-05, "loss": 1.0055, "step": 7227 }, { "epoch": 0.41, "grad_norm": 1.7353391647338867, "learning_rate": 1.3197385539077274e-05, "loss": 1.0249, "step": 7228 }, { "epoch": 0.41, "grad_norm": 1.7799358367919922, "learning_rate": 1.319562538618728e-05, "loss": 0.9793, "step": 7229 }, { "epoch": 0.41, "grad_norm": 1.7576563358306885, "learning_rate": 1.319386512302518e-05, "loss": 1.0232, "step": 7230 }, { "epoch": 0.41, "grad_norm": 1.70844566822052, "learning_rate": 1.3192104749651717e-05, "loss": 0.9748, "step": 7231 }, { "epoch": 0.41, "grad_norm": 1.7767360210418701, "learning_rate": 1.3190344266127639e-05, "loss": 0.9327, "step": 7232 }, { "epoch": 0.41, "grad_norm": 1.8299583196640015, "learning_rate": 1.318858367251369e-05, "loss": 1.0248, "step": 7233 }, { "epoch": 0.41, "grad_norm": 1.6567928791046143, "learning_rate": 1.3186822968870624e-05, "loss": 0.9787, "step": 7234 }, { "epoch": 0.41, "grad_norm": 1.0927796363830566, "learning_rate": 1.31850621552592e-05, "loss": 0.5708, "step": 7235 }, { "epoch": 0.41, "grad_norm": 1.6882855892181396, "learning_rate": 1.3183301231740182e-05, "loss": 1.0278, "step": 7236 }, { "epoch": 0.42, "grad_norm": 1.6057868003845215, "learning_rate": 1.3181540198374325e-05, "loss": 0.9118, "step": 7237 }, { "epoch": 0.42, "grad_norm": 1.7376574277877808, "learning_rate": 1.3179779055222407e-05, "loss": 1.0209, "step": 7238 }, { "epoch": 0.42, "grad_norm": 1.5406112670898438, "learning_rate": 1.3178017802345196e-05, "loss": 0.9771, "step": 7239 }, { "epoch": 0.42, "grad_norm": 1.91118586063385, "learning_rate": 1.3176256439803465e-05, "loss": 1.0388, "step": 7240 }, { "epoch": 0.42, "grad_norm": 1.890557050704956, "learning_rate": 1.3174494967658e-05, "loss": 0.8887, "step": 7241 }, { "epoch": 0.42, "grad_norm": 1.65822172164917, "learning_rate": 1.3172733385969579e-05, "loss": 1.0199, "step": 7242 }, { "epoch": 0.42, "grad_norm": 1.7154620885849, "learning_rate": 1.317097169479899e-05, "loss": 1.0009, "step": 7243 }, { "epoch": 0.42, "grad_norm": 1.7485640048980713, "learning_rate": 1.316920989420703e-05, "loss": 1.0079, "step": 7244 }, { "epoch": 0.42, "grad_norm": 1.6414271593093872, "learning_rate": 1.3167447984254486e-05, "loss": 1.0301, "step": 7245 }, { "epoch": 0.42, "grad_norm": 1.7783876657485962, "learning_rate": 1.3165685965002159e-05, "loss": 1.0128, "step": 7246 }, { "epoch": 0.42, "grad_norm": 2.0908946990966797, "learning_rate": 1.3163923836510854e-05, "loss": 1.0684, "step": 7247 }, { "epoch": 0.42, "grad_norm": 1.7661899328231812, "learning_rate": 1.3162161598841378e-05, "loss": 1.0287, "step": 7248 }, { "epoch": 0.42, "grad_norm": 1.7683403491973877, "learning_rate": 1.3160399252054536e-05, "loss": 0.8959, "step": 7249 }, { "epoch": 0.42, "grad_norm": 1.848463535308838, "learning_rate": 1.3158636796211143e-05, "loss": 0.95, "step": 7250 }, { "epoch": 0.42, "grad_norm": 1.726299524307251, "learning_rate": 1.3156874231372022e-05, "loss": 0.9156, "step": 7251 }, { "epoch": 0.42, "grad_norm": 1.8885399103164673, "learning_rate": 1.3155111557597987e-05, "loss": 0.9677, "step": 7252 }, { "epoch": 0.42, "grad_norm": 1.6882926225662231, "learning_rate": 1.3153348774949864e-05, "loss": 1.0393, "step": 7253 }, { "epoch": 0.42, "grad_norm": 1.8314157724380493, "learning_rate": 1.3151585883488485e-05, "loss": 1.042, "step": 7254 }, { "epoch": 0.42, "grad_norm": 1.6638668775558472, "learning_rate": 1.314982288327468e-05, "loss": 0.9388, "step": 7255 }, { "epoch": 0.42, "grad_norm": 1.8940447568893433, "learning_rate": 1.3148059774369286e-05, "loss": 1.0152, "step": 7256 }, { "epoch": 0.42, "grad_norm": 1.8125624656677246, "learning_rate": 1.314629655683314e-05, "loss": 0.9426, "step": 7257 }, { "epoch": 0.42, "grad_norm": 1.8944802284240723, "learning_rate": 1.3144533230727092e-05, "loss": 1.0074, "step": 7258 }, { "epoch": 0.42, "grad_norm": 1.7745938301086426, "learning_rate": 1.3142769796111987e-05, "loss": 0.9879, "step": 7259 }, { "epoch": 0.42, "grad_norm": 1.7447702884674072, "learning_rate": 1.3141006253048674e-05, "loss": 1.0788, "step": 7260 }, { "epoch": 0.42, "grad_norm": 1.86833918094635, "learning_rate": 1.313924260159801e-05, "loss": 1.045, "step": 7261 }, { "epoch": 0.42, "grad_norm": 1.8839757442474365, "learning_rate": 1.3137478841820853e-05, "loss": 0.9568, "step": 7262 }, { "epoch": 0.42, "grad_norm": 1.644152283668518, "learning_rate": 1.3135714973778064e-05, "loss": 0.9814, "step": 7263 }, { "epoch": 0.42, "grad_norm": 1.5099202394485474, "learning_rate": 1.3133950997530512e-05, "loss": 0.9721, "step": 7264 }, { "epoch": 0.42, "grad_norm": 1.7904218435287476, "learning_rate": 1.3132186913139064e-05, "loss": 0.9817, "step": 7265 }, { "epoch": 0.42, "grad_norm": 1.7721269130706787, "learning_rate": 1.3130422720664596e-05, "loss": 0.9944, "step": 7266 }, { "epoch": 0.42, "grad_norm": 1.7509828805923462, "learning_rate": 1.3128658420167985e-05, "loss": 0.9935, "step": 7267 }, { "epoch": 0.42, "grad_norm": 1.729807734489441, "learning_rate": 1.312689401171011e-05, "loss": 0.9767, "step": 7268 }, { "epoch": 0.42, "grad_norm": 1.6518383026123047, "learning_rate": 1.3125129495351856e-05, "loss": 1.0219, "step": 7269 }, { "epoch": 0.42, "grad_norm": 1.7479612827301025, "learning_rate": 1.3123364871154113e-05, "loss": 1.0241, "step": 7270 }, { "epoch": 0.42, "grad_norm": 1.7142798900604248, "learning_rate": 1.3121600139177777e-05, "loss": 1.0556, "step": 7271 }, { "epoch": 0.42, "grad_norm": 1.8249727487564087, "learning_rate": 1.3119835299483738e-05, "loss": 0.9209, "step": 7272 }, { "epoch": 0.42, "grad_norm": 1.7366821765899658, "learning_rate": 1.3118070352132896e-05, "loss": 1.0192, "step": 7273 }, { "epoch": 0.42, "grad_norm": 1.795507788658142, "learning_rate": 1.3116305297186159e-05, "loss": 0.9037, "step": 7274 }, { "epoch": 0.42, "grad_norm": 1.6106244325637817, "learning_rate": 1.311454013470443e-05, "loss": 0.9856, "step": 7275 }, { "epoch": 0.42, "grad_norm": 1.8018512725830078, "learning_rate": 1.311277486474862e-05, "loss": 0.9717, "step": 7276 }, { "epoch": 0.42, "grad_norm": 1.7001457214355469, "learning_rate": 1.3111009487379647e-05, "loss": 0.9706, "step": 7277 }, { "epoch": 0.42, "grad_norm": 2.211688756942749, "learning_rate": 1.3109244002658425e-05, "loss": 1.0413, "step": 7278 }, { "epoch": 0.42, "grad_norm": 1.6731610298156738, "learning_rate": 1.3107478410645875e-05, "loss": 0.9965, "step": 7279 }, { "epoch": 0.42, "grad_norm": 1.6553643941879272, "learning_rate": 1.310571271140293e-05, "loss": 0.947, "step": 7280 }, { "epoch": 0.42, "grad_norm": 1.8816719055175781, "learning_rate": 1.3103946904990515e-05, "loss": 0.9851, "step": 7281 }, { "epoch": 0.42, "grad_norm": 1.8195414543151855, "learning_rate": 1.310218099146956e-05, "loss": 1.0232, "step": 7282 }, { "epoch": 0.42, "grad_norm": 1.7219852209091187, "learning_rate": 1.3100414970901008e-05, "loss": 1.0163, "step": 7283 }, { "epoch": 0.42, "grad_norm": 1.802555799484253, "learning_rate": 1.3098648843345789e-05, "loss": 1.0537, "step": 7284 }, { "epoch": 0.42, "grad_norm": 1.5903615951538086, "learning_rate": 1.309688260886486e-05, "loss": 1.0201, "step": 7285 }, { "epoch": 0.42, "grad_norm": 1.8213194608688354, "learning_rate": 1.3095116267519163e-05, "loss": 0.9884, "step": 7286 }, { "epoch": 0.42, "grad_norm": 1.7187060117721558, "learning_rate": 1.3093349819369647e-05, "loss": 1.0325, "step": 7287 }, { "epoch": 0.42, "grad_norm": 1.052876353263855, "learning_rate": 1.3091583264477273e-05, "loss": 0.5511, "step": 7288 }, { "epoch": 0.42, "grad_norm": 1.8216989040374756, "learning_rate": 1.3089816602902993e-05, "loss": 0.9528, "step": 7289 }, { "epoch": 0.42, "grad_norm": 1.8386342525482178, "learning_rate": 1.3088049834707777e-05, "loss": 1.0003, "step": 7290 }, { "epoch": 0.42, "grad_norm": 1.0155565738677979, "learning_rate": 1.3086282959952583e-05, "loss": 0.6111, "step": 7291 }, { "epoch": 0.42, "grad_norm": 1.6320468187332153, "learning_rate": 1.3084515978698389e-05, "loss": 0.9852, "step": 7292 }, { "epoch": 0.42, "grad_norm": 1.714259147644043, "learning_rate": 1.3082748891006164e-05, "loss": 1.0311, "step": 7293 }, { "epoch": 0.42, "grad_norm": 1.7873072624206543, "learning_rate": 1.3080981696936883e-05, "loss": 0.9709, "step": 7294 }, { "epoch": 0.42, "grad_norm": 0.9880605340003967, "learning_rate": 1.3079214396551532e-05, "loss": 0.5795, "step": 7295 }, { "epoch": 0.42, "grad_norm": 1.7565131187438965, "learning_rate": 1.3077446989911092e-05, "loss": 1.033, "step": 7296 }, { "epoch": 0.42, "grad_norm": 1.7618820667266846, "learning_rate": 1.3075679477076556e-05, "loss": 0.894, "step": 7297 }, { "epoch": 0.42, "grad_norm": 1.6277841329574585, "learning_rate": 1.3073911858108911e-05, "loss": 0.9919, "step": 7298 }, { "epoch": 0.42, "grad_norm": 1.8146573305130005, "learning_rate": 1.3072144133069156e-05, "loss": 0.9427, "step": 7299 }, { "epoch": 0.42, "grad_norm": 2.079033374786377, "learning_rate": 1.3070376302018287e-05, "loss": 0.9939, "step": 7300 }, { "epoch": 0.42, "grad_norm": 1.6928759813308716, "learning_rate": 1.3068608365017308e-05, "loss": 1.0517, "step": 7301 }, { "epoch": 0.42, "grad_norm": 1.5805751085281372, "learning_rate": 1.3066840322127227e-05, "loss": 0.9607, "step": 7302 }, { "epoch": 0.42, "grad_norm": 1.7294808626174927, "learning_rate": 1.3065072173409055e-05, "loss": 1.0378, "step": 7303 }, { "epoch": 0.42, "grad_norm": 1.8863801956176758, "learning_rate": 1.3063303918923802e-05, "loss": 0.9914, "step": 7304 }, { "epoch": 0.42, "grad_norm": 2.181049346923828, "learning_rate": 1.306153555873249e-05, "loss": 1.0169, "step": 7305 }, { "epoch": 0.42, "grad_norm": 1.8362523317337036, "learning_rate": 1.3059767092896136e-05, "loss": 0.9995, "step": 7306 }, { "epoch": 0.42, "grad_norm": 1.7565770149230957, "learning_rate": 1.3057998521475768e-05, "loss": 1.0351, "step": 7307 }, { "epoch": 0.42, "grad_norm": 1.8455919027328491, "learning_rate": 1.305622984453241e-05, "loss": 1.0054, "step": 7308 }, { "epoch": 0.42, "grad_norm": 1.5867325067520142, "learning_rate": 1.3054461062127099e-05, "loss": 0.902, "step": 7309 }, { "epoch": 0.42, "grad_norm": 1.780295491218567, "learning_rate": 1.305269217432087e-05, "loss": 0.9707, "step": 7310 }, { "epoch": 0.42, "grad_norm": 1.6085394620895386, "learning_rate": 1.3050923181174762e-05, "loss": 0.937, "step": 7311 }, { "epoch": 0.42, "grad_norm": 1.6629457473754883, "learning_rate": 1.3049154082749813e-05, "loss": 1.0596, "step": 7312 }, { "epoch": 0.42, "grad_norm": 1.814444661140442, "learning_rate": 1.3047384879107079e-05, "loss": 0.9792, "step": 7313 }, { "epoch": 0.42, "grad_norm": 1.8534595966339111, "learning_rate": 1.3045615570307604e-05, "loss": 0.9752, "step": 7314 }, { "epoch": 0.42, "grad_norm": 1.9800876379013062, "learning_rate": 1.3043846156412443e-05, "loss": 0.9695, "step": 7315 }, { "epoch": 0.42, "grad_norm": 1.8575453758239746, "learning_rate": 1.3042076637482655e-05, "loss": 0.9578, "step": 7316 }, { "epoch": 0.42, "grad_norm": 1.806773066520691, "learning_rate": 1.3040307013579299e-05, "loss": 1.05, "step": 7317 }, { "epoch": 0.42, "grad_norm": 1.8021187782287598, "learning_rate": 1.3038537284763443e-05, "loss": 0.9848, "step": 7318 }, { "epoch": 0.42, "grad_norm": 1.9050631523132324, "learning_rate": 1.3036767451096148e-05, "loss": 0.9586, "step": 7319 }, { "epoch": 0.42, "grad_norm": 1.667320728302002, "learning_rate": 1.3034997512638493e-05, "loss": 0.9902, "step": 7320 }, { "epoch": 0.42, "grad_norm": 1.8604519367218018, "learning_rate": 1.3033227469451555e-05, "loss": 0.9761, "step": 7321 }, { "epoch": 0.42, "grad_norm": 1.6019165515899658, "learning_rate": 1.3031457321596409e-05, "loss": 0.9924, "step": 7322 }, { "epoch": 0.42, "grad_norm": 1.70156729221344, "learning_rate": 1.3029687069134134e-05, "loss": 0.9556, "step": 7323 }, { "epoch": 0.42, "grad_norm": 1.903174638748169, "learning_rate": 1.3027916712125825e-05, "loss": 0.966, "step": 7324 }, { "epoch": 0.42, "grad_norm": 1.7214933633804321, "learning_rate": 1.302614625063257e-05, "loss": 0.961, "step": 7325 }, { "epoch": 0.42, "grad_norm": 1.663572072982788, "learning_rate": 1.3024375684715458e-05, "loss": 0.8844, "step": 7326 }, { "epoch": 0.42, "grad_norm": 1.064779281616211, "learning_rate": 1.3022605014435591e-05, "loss": 0.5537, "step": 7327 }, { "epoch": 0.42, "grad_norm": 1.7520649433135986, "learning_rate": 1.3020834239854068e-05, "loss": 0.9155, "step": 7328 }, { "epoch": 0.42, "grad_norm": 1.7561284303665161, "learning_rate": 1.3019063361031994e-05, "loss": 1.0436, "step": 7329 }, { "epoch": 0.42, "grad_norm": 1.6289538145065308, "learning_rate": 1.3017292378030477e-05, "loss": 0.9728, "step": 7330 }, { "epoch": 0.42, "grad_norm": 1.8337544202804565, "learning_rate": 1.3015521290910628e-05, "loss": 1.0454, "step": 7331 }, { "epoch": 0.42, "grad_norm": 1.6629467010498047, "learning_rate": 1.3013750099733561e-05, "loss": 0.9906, "step": 7332 }, { "epoch": 0.42, "grad_norm": 1.8491228818893433, "learning_rate": 1.3011978804560401e-05, "loss": 0.8925, "step": 7333 }, { "epoch": 0.42, "grad_norm": 1.7411162853240967, "learning_rate": 1.3010207405452265e-05, "loss": 1.0043, "step": 7334 }, { "epoch": 0.42, "grad_norm": 1.8061507940292358, "learning_rate": 1.3008435902470276e-05, "loss": 1.0701, "step": 7335 }, { "epoch": 0.42, "grad_norm": 1.9804774522781372, "learning_rate": 1.300666429567557e-05, "loss": 0.9755, "step": 7336 }, { "epoch": 0.42, "grad_norm": 1.7164946794509888, "learning_rate": 1.3004892585129279e-05, "loss": 0.9141, "step": 7337 }, { "epoch": 0.42, "grad_norm": 1.6265473365783691, "learning_rate": 1.3003120770892536e-05, "loss": 0.9445, "step": 7338 }, { "epoch": 0.42, "grad_norm": 1.803780436515808, "learning_rate": 1.3001348853026488e-05, "loss": 0.9294, "step": 7339 }, { "epoch": 0.42, "grad_norm": 1.7544447183609009, "learning_rate": 1.2999576831592273e-05, "loss": 1.0138, "step": 7340 }, { "epoch": 0.42, "grad_norm": 1.751644492149353, "learning_rate": 1.299780470665104e-05, "loss": 1.002, "step": 7341 }, { "epoch": 0.42, "grad_norm": 1.6455060243606567, "learning_rate": 1.2996032478263943e-05, "loss": 0.9731, "step": 7342 }, { "epoch": 0.42, "grad_norm": 1.7202250957489014, "learning_rate": 1.2994260146492133e-05, "loss": 1.0097, "step": 7343 }, { "epoch": 0.42, "grad_norm": 1.7249274253845215, "learning_rate": 1.2992487711396768e-05, "loss": 0.959, "step": 7344 }, { "epoch": 0.42, "grad_norm": 1.7013154029846191, "learning_rate": 1.299071517303901e-05, "loss": 1.0827, "step": 7345 }, { "epoch": 0.42, "grad_norm": 1.958591341972351, "learning_rate": 1.2988942531480028e-05, "loss": 1.1027, "step": 7346 }, { "epoch": 0.42, "grad_norm": 1.5968042612075806, "learning_rate": 1.2987169786780988e-05, "loss": 0.9098, "step": 7347 }, { "epoch": 0.42, "grad_norm": 1.6995233297348022, "learning_rate": 1.2985396939003065e-05, "loss": 0.9614, "step": 7348 }, { "epoch": 0.42, "grad_norm": 1.8575081825256348, "learning_rate": 1.2983623988207432e-05, "loss": 1.0174, "step": 7349 }, { "epoch": 0.42, "grad_norm": 1.8969746828079224, "learning_rate": 1.2981850934455267e-05, "loss": 1.0359, "step": 7350 }, { "epoch": 0.42, "grad_norm": 1.7084649801254272, "learning_rate": 1.2980077777807755e-05, "loss": 0.9774, "step": 7351 }, { "epoch": 0.42, "grad_norm": 1.8542094230651855, "learning_rate": 1.2978304518326088e-05, "loss": 0.939, "step": 7352 }, { "epoch": 0.42, "grad_norm": 1.9423540830612183, "learning_rate": 1.297653115607145e-05, "loss": 1.062, "step": 7353 }, { "epoch": 0.42, "grad_norm": 1.897090196609497, "learning_rate": 1.2974757691105038e-05, "loss": 0.9799, "step": 7354 }, { "epoch": 0.42, "grad_norm": 1.8937289714813232, "learning_rate": 1.2972984123488045e-05, "loss": 1.013, "step": 7355 }, { "epoch": 0.42, "grad_norm": 1.8164328336715698, "learning_rate": 1.2971210453281675e-05, "loss": 0.9474, "step": 7356 }, { "epoch": 0.42, "grad_norm": 1.8939083814620972, "learning_rate": 1.2969436680547132e-05, "loss": 0.9777, "step": 7357 }, { "epoch": 0.42, "grad_norm": 1.850019931793213, "learning_rate": 1.2967662805345625e-05, "loss": 1.0043, "step": 7358 }, { "epoch": 0.42, "grad_norm": 1.152945876121521, "learning_rate": 1.2965888827738365e-05, "loss": 0.5927, "step": 7359 }, { "epoch": 0.42, "grad_norm": 1.8057949542999268, "learning_rate": 1.2964114747786564e-05, "loss": 0.995, "step": 7360 }, { "epoch": 0.42, "grad_norm": 1.6508755683898926, "learning_rate": 1.2962340565551443e-05, "loss": 0.9802, "step": 7361 }, { "epoch": 0.42, "grad_norm": 1.7664345502853394, "learning_rate": 1.2960566281094224e-05, "loss": 0.9584, "step": 7362 }, { "epoch": 0.42, "grad_norm": 2.007944107055664, "learning_rate": 1.2958791894476134e-05, "loss": 0.8857, "step": 7363 }, { "epoch": 0.42, "grad_norm": 1.7279704809188843, "learning_rate": 1.29570174057584e-05, "loss": 0.8938, "step": 7364 }, { "epoch": 0.42, "grad_norm": 1.7731008529663086, "learning_rate": 1.2955242815002258e-05, "loss": 0.9324, "step": 7365 }, { "epoch": 0.42, "grad_norm": 1.8896539211273193, "learning_rate": 1.295346812226894e-05, "loss": 0.9683, "step": 7366 }, { "epoch": 0.42, "grad_norm": 1.9950226545333862, "learning_rate": 1.2951693327619689e-05, "loss": 0.9765, "step": 7367 }, { "epoch": 0.42, "grad_norm": 1.7307549715042114, "learning_rate": 1.2949918431115742e-05, "loss": 0.9967, "step": 7368 }, { "epoch": 0.42, "grad_norm": 1.878368616104126, "learning_rate": 1.2948143432818352e-05, "loss": 0.978, "step": 7369 }, { "epoch": 0.42, "grad_norm": 1.8204724788665771, "learning_rate": 1.294636833278877e-05, "loss": 0.9809, "step": 7370 }, { "epoch": 0.42, "grad_norm": 1.819919466972351, "learning_rate": 1.2944593131088246e-05, "loss": 1.0129, "step": 7371 }, { "epoch": 0.42, "grad_norm": 1.6566475629806519, "learning_rate": 1.2942817827778037e-05, "loss": 0.9865, "step": 7372 }, { "epoch": 0.42, "grad_norm": 1.9487743377685547, "learning_rate": 1.2941042422919405e-05, "loss": 0.999, "step": 7373 }, { "epoch": 0.42, "grad_norm": 1.7331446409225464, "learning_rate": 1.2939266916573614e-05, "loss": 0.9563, "step": 7374 }, { "epoch": 0.42, "grad_norm": 1.8267320394515991, "learning_rate": 1.2937491308801936e-05, "loss": 1.0745, "step": 7375 }, { "epoch": 0.42, "grad_norm": 1.1133317947387695, "learning_rate": 1.2935715599665635e-05, "loss": 0.612, "step": 7376 }, { "epoch": 0.42, "grad_norm": 1.6918549537658691, "learning_rate": 1.293393978922599e-05, "loss": 0.8991, "step": 7377 }, { "epoch": 0.42, "grad_norm": 1.829156756401062, "learning_rate": 1.2932163877544277e-05, "loss": 0.9734, "step": 7378 }, { "epoch": 0.42, "grad_norm": 2.5627474784851074, "learning_rate": 1.293038786468178e-05, "loss": 0.9992, "step": 7379 }, { "epoch": 0.42, "grad_norm": 1.8583108186721802, "learning_rate": 1.2928611750699784e-05, "loss": 0.9215, "step": 7380 }, { "epoch": 0.42, "grad_norm": 1.8099446296691895, "learning_rate": 1.2926835535659579e-05, "loss": 0.9328, "step": 7381 }, { "epoch": 0.42, "grad_norm": 1.0165079832077026, "learning_rate": 1.2925059219622455e-05, "loss": 0.6059, "step": 7382 }, { "epoch": 0.42, "grad_norm": 1.6953763961791992, "learning_rate": 1.2923282802649708e-05, "loss": 0.9706, "step": 7383 }, { "epoch": 0.42, "grad_norm": 1.8917162418365479, "learning_rate": 1.2921506284802636e-05, "loss": 1.0104, "step": 7384 }, { "epoch": 0.42, "grad_norm": 1.0757641792297363, "learning_rate": 1.2919729666142545e-05, "loss": 0.6491, "step": 7385 }, { "epoch": 0.42, "grad_norm": 1.9731879234313965, "learning_rate": 1.2917952946730737e-05, "loss": 1.0203, "step": 7386 }, { "epoch": 0.42, "grad_norm": 1.8856728076934814, "learning_rate": 1.2916176126628527e-05, "loss": 0.9763, "step": 7387 }, { "epoch": 0.42, "grad_norm": 1.8110123872756958, "learning_rate": 1.2914399205897221e-05, "loss": 1.0556, "step": 7388 }, { "epoch": 0.42, "grad_norm": 1.7422555685043335, "learning_rate": 1.2912622184598138e-05, "loss": 0.9648, "step": 7389 }, { "epoch": 0.42, "grad_norm": 1.8223832845687866, "learning_rate": 1.2910845062792604e-05, "loss": 1.0258, "step": 7390 }, { "epoch": 0.42, "grad_norm": 1.8142021894454956, "learning_rate": 1.2909067840541935e-05, "loss": 1.0817, "step": 7391 }, { "epoch": 0.42, "grad_norm": 1.776807188987732, "learning_rate": 1.2907290517907462e-05, "loss": 1.0026, "step": 7392 }, { "epoch": 0.42, "grad_norm": 1.6979384422302246, "learning_rate": 1.2905513094950517e-05, "loss": 0.9178, "step": 7393 }, { "epoch": 0.42, "grad_norm": 1.8462640047073364, "learning_rate": 1.290373557173243e-05, "loss": 1.0767, "step": 7394 }, { "epoch": 0.42, "grad_norm": 1.8140606880187988, "learning_rate": 1.2901957948314539e-05, "loss": 1.0402, "step": 7395 }, { "epoch": 0.42, "grad_norm": 1.7951174974441528, "learning_rate": 1.2900180224758186e-05, "loss": 0.9011, "step": 7396 }, { "epoch": 0.42, "grad_norm": 1.9115961790084839, "learning_rate": 1.2898402401124713e-05, "loss": 1.0766, "step": 7397 }, { "epoch": 0.42, "grad_norm": 2.0440149307250977, "learning_rate": 1.289662447747547e-05, "loss": 1.0204, "step": 7398 }, { "epoch": 0.42, "grad_norm": 1.7560451030731201, "learning_rate": 1.289484645387181e-05, "loss": 0.9514, "step": 7399 }, { "epoch": 0.42, "grad_norm": 1.8194767236709595, "learning_rate": 1.2893068330375082e-05, "loss": 1.047, "step": 7400 }, { "epoch": 0.42, "grad_norm": 1.9434022903442383, "learning_rate": 1.2891290107046647e-05, "loss": 0.9533, "step": 7401 }, { "epoch": 0.42, "grad_norm": 1.7496813535690308, "learning_rate": 1.288951178394787e-05, "loss": 0.9679, "step": 7402 }, { "epoch": 0.42, "grad_norm": 1.859490156173706, "learning_rate": 1.288773336114011e-05, "loss": 1.0121, "step": 7403 }, { "epoch": 0.42, "grad_norm": 1.778328776359558, "learning_rate": 1.2885954838684742e-05, "loss": 0.9174, "step": 7404 }, { "epoch": 0.42, "grad_norm": 1.059141755104065, "learning_rate": 1.2884176216643132e-05, "loss": 0.5498, "step": 7405 }, { "epoch": 0.42, "grad_norm": 1.801032304763794, "learning_rate": 1.2882397495076657e-05, "loss": 1.0558, "step": 7406 }, { "epoch": 0.42, "grad_norm": 1.6838574409484863, "learning_rate": 1.28806186740467e-05, "loss": 0.9721, "step": 7407 }, { "epoch": 0.42, "grad_norm": 1.7295992374420166, "learning_rate": 1.2878839753614633e-05, "loss": 0.9707, "step": 7408 }, { "epoch": 0.42, "grad_norm": 1.8102807998657227, "learning_rate": 1.287706073384185e-05, "loss": 1.0825, "step": 7409 }, { "epoch": 0.42, "grad_norm": 1.6353942155838013, "learning_rate": 1.287528161478974e-05, "loss": 0.9245, "step": 7410 }, { "epoch": 0.43, "grad_norm": 1.7287774085998535, "learning_rate": 1.2873502396519692e-05, "loss": 1.027, "step": 7411 }, { "epoch": 0.43, "grad_norm": 1.825989007949829, "learning_rate": 1.2871723079093101e-05, "loss": 0.9144, "step": 7412 }, { "epoch": 0.43, "grad_norm": 1.811667799949646, "learning_rate": 1.2869943662571372e-05, "loss": 0.994, "step": 7413 }, { "epoch": 0.43, "grad_norm": 1.970324158668518, "learning_rate": 1.28681641470159e-05, "loss": 1.0173, "step": 7414 }, { "epoch": 0.43, "grad_norm": 1.9308664798736572, "learning_rate": 1.2866384532488098e-05, "loss": 1.0567, "step": 7415 }, { "epoch": 0.43, "grad_norm": 1.6111783981323242, "learning_rate": 1.286460481904937e-05, "loss": 0.9629, "step": 7416 }, { "epoch": 0.43, "grad_norm": 1.7394113540649414, "learning_rate": 1.2862825006761136e-05, "loss": 0.9924, "step": 7417 }, { "epoch": 0.43, "grad_norm": 1.8474863767623901, "learning_rate": 1.2861045095684805e-05, "loss": 0.9743, "step": 7418 }, { "epoch": 0.43, "grad_norm": 1.7442917823791504, "learning_rate": 1.28592650858818e-05, "loss": 1.0255, "step": 7419 }, { "epoch": 0.43, "grad_norm": 1.9154739379882812, "learning_rate": 1.2857484977413545e-05, "loss": 1.0643, "step": 7420 }, { "epoch": 0.43, "grad_norm": 1.830653429031372, "learning_rate": 1.2855704770341463e-05, "loss": 1.0451, "step": 7421 }, { "epoch": 0.43, "grad_norm": 1.7709776163101196, "learning_rate": 1.285392446472699e-05, "loss": 0.9858, "step": 7422 }, { "epoch": 0.43, "grad_norm": 1.8861711025238037, "learning_rate": 1.2852144060631556e-05, "loss": 0.8848, "step": 7423 }, { "epoch": 0.43, "grad_norm": 1.8036357164382935, "learning_rate": 1.2850363558116596e-05, "loss": 1.0782, "step": 7424 }, { "epoch": 0.43, "grad_norm": 1.729099988937378, "learning_rate": 1.2848582957243552e-05, "loss": 0.919, "step": 7425 }, { "epoch": 0.43, "grad_norm": 1.6754510402679443, "learning_rate": 1.2846802258073867e-05, "loss": 0.9521, "step": 7426 }, { "epoch": 0.43, "grad_norm": 1.7148399353027344, "learning_rate": 1.2845021460668988e-05, "loss": 0.6629, "step": 7427 }, { "epoch": 0.43, "grad_norm": 1.8017168045043945, "learning_rate": 1.2843240565090365e-05, "loss": 0.9795, "step": 7428 }, { "epoch": 0.43, "grad_norm": 1.7993075847625732, "learning_rate": 1.2841459571399453e-05, "loss": 0.9253, "step": 7429 }, { "epoch": 0.43, "grad_norm": 1.8552762269973755, "learning_rate": 1.2839678479657709e-05, "loss": 0.9809, "step": 7430 }, { "epoch": 0.43, "grad_norm": 1.77656888961792, "learning_rate": 1.2837897289926592e-05, "loss": 0.9731, "step": 7431 }, { "epoch": 0.43, "grad_norm": 1.5843607187271118, "learning_rate": 1.283611600226757e-05, "loss": 0.8335, "step": 7432 }, { "epoch": 0.43, "grad_norm": 1.7424556016921997, "learning_rate": 1.2834334616742104e-05, "loss": 0.9747, "step": 7433 }, { "epoch": 0.43, "grad_norm": 1.724855661392212, "learning_rate": 1.2832553133411666e-05, "loss": 1.0031, "step": 7434 }, { "epoch": 0.43, "grad_norm": 1.924340844154358, "learning_rate": 1.2830771552337735e-05, "loss": 1.1026, "step": 7435 }, { "epoch": 0.43, "grad_norm": 1.8070130348205566, "learning_rate": 1.2828989873581786e-05, "loss": 1.0251, "step": 7436 }, { "epoch": 0.43, "grad_norm": 1.75368070602417, "learning_rate": 1.2827208097205298e-05, "loss": 1.009, "step": 7437 }, { "epoch": 0.43, "grad_norm": 1.8113374710083008, "learning_rate": 1.2825426223269755e-05, "loss": 1.0823, "step": 7438 }, { "epoch": 0.43, "grad_norm": 1.751863956451416, "learning_rate": 1.2823644251836647e-05, "loss": 0.9411, "step": 7439 }, { "epoch": 0.43, "grad_norm": 1.8712741136550903, "learning_rate": 1.282186218296746e-05, "loss": 1.0149, "step": 7440 }, { "epoch": 0.43, "grad_norm": 1.980992078781128, "learning_rate": 1.2820080016723695e-05, "loss": 1.0026, "step": 7441 }, { "epoch": 0.43, "grad_norm": 1.9852229356765747, "learning_rate": 1.2818297753166844e-05, "loss": 1.0551, "step": 7442 }, { "epoch": 0.43, "grad_norm": 1.7283494472503662, "learning_rate": 1.2816515392358413e-05, "loss": 1.039, "step": 7443 }, { "epoch": 0.43, "grad_norm": 1.9550997018814087, "learning_rate": 1.2814732934359901e-05, "loss": 1.0094, "step": 7444 }, { "epoch": 0.43, "grad_norm": 2.0406551361083984, "learning_rate": 1.2812950379232816e-05, "loss": 1.0285, "step": 7445 }, { "epoch": 0.43, "grad_norm": 1.9118375778198242, "learning_rate": 1.2811167727038675e-05, "loss": 0.9471, "step": 7446 }, { "epoch": 0.43, "grad_norm": 1.7764334678649902, "learning_rate": 1.2809384977838988e-05, "loss": 1.0103, "step": 7447 }, { "epoch": 0.43, "grad_norm": 1.877103567123413, "learning_rate": 1.2807602131695274e-05, "loss": 0.9244, "step": 7448 }, { "epoch": 0.43, "grad_norm": 1.8031103610992432, "learning_rate": 1.2805819188669051e-05, "loss": 1.0056, "step": 7449 }, { "epoch": 0.43, "grad_norm": 1.6693018674850464, "learning_rate": 1.2804036148821846e-05, "loss": 0.9546, "step": 7450 }, { "epoch": 0.43, "grad_norm": 1.9337722063064575, "learning_rate": 1.2802253012215187e-05, "loss": 0.9952, "step": 7451 }, { "epoch": 0.43, "grad_norm": 1.594313144683838, "learning_rate": 1.2800469778910603e-05, "loss": 0.9433, "step": 7452 }, { "epoch": 0.43, "grad_norm": 1.5926663875579834, "learning_rate": 1.279868644896963e-05, "loss": 0.9507, "step": 7453 }, { "epoch": 0.43, "grad_norm": 1.8125909566879272, "learning_rate": 1.2796903022453808e-05, "loss": 0.9863, "step": 7454 }, { "epoch": 0.43, "grad_norm": 1.7210795879364014, "learning_rate": 1.279511949942467e-05, "loss": 0.9471, "step": 7455 }, { "epoch": 0.43, "grad_norm": 1.2077887058258057, "learning_rate": 1.2793335879943771e-05, "loss": 0.6219, "step": 7456 }, { "epoch": 0.43, "grad_norm": 1.1810319423675537, "learning_rate": 1.2791552164072652e-05, "loss": 0.5648, "step": 7457 }, { "epoch": 0.43, "grad_norm": 1.7424389123916626, "learning_rate": 1.2789768351872867e-05, "loss": 0.9128, "step": 7458 }, { "epoch": 0.43, "grad_norm": 1.820887565612793, "learning_rate": 1.2787984443405966e-05, "loss": 1.0088, "step": 7459 }, { "epoch": 0.43, "grad_norm": 1.6214430332183838, "learning_rate": 1.2786200438733512e-05, "loss": 0.8781, "step": 7460 }, { "epoch": 0.43, "grad_norm": 1.0633918046951294, "learning_rate": 1.2784416337917063e-05, "loss": 0.5948, "step": 7461 }, { "epoch": 0.43, "grad_norm": 1.6853479146957397, "learning_rate": 1.2782632141018185e-05, "loss": 1.0068, "step": 7462 }, { "epoch": 0.43, "grad_norm": 1.7700046300888062, "learning_rate": 1.2780847848098445e-05, "loss": 1.0108, "step": 7463 }, { "epoch": 0.43, "grad_norm": 1.879514217376709, "learning_rate": 1.2779063459219414e-05, "loss": 0.984, "step": 7464 }, { "epoch": 0.43, "grad_norm": 1.722773790359497, "learning_rate": 1.2777278974442664e-05, "loss": 1.0109, "step": 7465 }, { "epoch": 0.43, "grad_norm": 1.8056957721710205, "learning_rate": 1.2775494393829777e-05, "loss": 1.0415, "step": 7466 }, { "epoch": 0.43, "grad_norm": 1.9671504497528076, "learning_rate": 1.2773709717442326e-05, "loss": 1.0492, "step": 7467 }, { "epoch": 0.43, "grad_norm": 1.986178994178772, "learning_rate": 1.2771924945341906e-05, "loss": 0.9528, "step": 7468 }, { "epoch": 0.43, "grad_norm": 1.8241297006607056, "learning_rate": 1.2770140077590098e-05, "loss": 1.0811, "step": 7469 }, { "epoch": 0.43, "grad_norm": 1.9104567766189575, "learning_rate": 1.2768355114248493e-05, "loss": 1.0288, "step": 7470 }, { "epoch": 0.43, "grad_norm": 1.701006293296814, "learning_rate": 1.276657005537869e-05, "loss": 0.9418, "step": 7471 }, { "epoch": 0.43, "grad_norm": 1.7929165363311768, "learning_rate": 1.276478490104228e-05, "loss": 0.9289, "step": 7472 }, { "epoch": 0.43, "grad_norm": 1.7173610925674438, "learning_rate": 1.2762999651300865e-05, "loss": 0.9649, "step": 7473 }, { "epoch": 0.43, "grad_norm": 1.8171846866607666, "learning_rate": 1.2761214306216052e-05, "loss": 1.0279, "step": 7474 }, { "epoch": 0.43, "grad_norm": 2.109318256378174, "learning_rate": 1.2759428865849445e-05, "loss": 0.9936, "step": 7475 }, { "epoch": 0.43, "grad_norm": 1.917927622795105, "learning_rate": 1.2757643330262656e-05, "loss": 0.925, "step": 7476 }, { "epoch": 0.43, "grad_norm": 1.9283018112182617, "learning_rate": 1.27558576995173e-05, "loss": 0.9698, "step": 7477 }, { "epoch": 0.43, "grad_norm": 1.7373062372207642, "learning_rate": 1.275407197367499e-05, "loss": 0.9673, "step": 7478 }, { "epoch": 0.43, "grad_norm": 1.8121575117111206, "learning_rate": 1.2752286152797352e-05, "loss": 1.0262, "step": 7479 }, { "epoch": 0.43, "grad_norm": 1.773172378540039, "learning_rate": 1.2750500236946008e-05, "loss": 0.941, "step": 7480 }, { "epoch": 0.43, "grad_norm": 1.9105595350265503, "learning_rate": 1.2748714226182583e-05, "loss": 1.1366, "step": 7481 }, { "epoch": 0.43, "grad_norm": 1.6814420223236084, "learning_rate": 1.2746928120568707e-05, "loss": 1.0099, "step": 7482 }, { "epoch": 0.43, "grad_norm": 1.756831169128418, "learning_rate": 1.2745141920166016e-05, "loss": 1.0091, "step": 7483 }, { "epoch": 0.43, "grad_norm": 1.653228998184204, "learning_rate": 1.2743355625036145e-05, "loss": 1.0106, "step": 7484 }, { "epoch": 0.43, "grad_norm": 1.8476029634475708, "learning_rate": 1.2741569235240733e-05, "loss": 0.995, "step": 7485 }, { "epoch": 0.43, "grad_norm": 1.7879499197006226, "learning_rate": 1.2739782750841428e-05, "loss": 0.9372, "step": 7486 }, { "epoch": 0.43, "grad_norm": 1.9753953218460083, "learning_rate": 1.2737996171899873e-05, "loss": 1.0142, "step": 7487 }, { "epoch": 0.43, "grad_norm": 1.8585056066513062, "learning_rate": 1.2736209498477719e-05, "loss": 0.9522, "step": 7488 }, { "epoch": 0.43, "grad_norm": 1.7398544549942017, "learning_rate": 1.2734422730636617e-05, "loss": 0.9327, "step": 7489 }, { "epoch": 0.43, "grad_norm": 1.7137055397033691, "learning_rate": 1.2732635868438225e-05, "loss": 0.9418, "step": 7490 }, { "epoch": 0.43, "grad_norm": 1.5938587188720703, "learning_rate": 1.2730848911944204e-05, "loss": 1.0401, "step": 7491 }, { "epoch": 0.43, "grad_norm": 1.7662118673324585, "learning_rate": 1.2729061861216214e-05, "loss": 1.039, "step": 7492 }, { "epoch": 0.43, "grad_norm": 1.983756184577942, "learning_rate": 1.2727274716315922e-05, "loss": 0.9517, "step": 7493 }, { "epoch": 0.43, "grad_norm": 1.3117972612380981, "learning_rate": 1.2725487477304999e-05, "loss": 0.6674, "step": 7494 }, { "epoch": 0.43, "grad_norm": 1.7978792190551758, "learning_rate": 1.2723700144245115e-05, "loss": 0.9926, "step": 7495 }, { "epoch": 0.43, "grad_norm": 1.7574079036712646, "learning_rate": 1.2721912717197949e-05, "loss": 0.9103, "step": 7496 }, { "epoch": 0.43, "grad_norm": 1.6949598789215088, "learning_rate": 1.2720125196225178e-05, "loss": 1.0089, "step": 7497 }, { "epoch": 0.43, "grad_norm": 1.9450141191482544, "learning_rate": 1.2718337581388485e-05, "loss": 1.0455, "step": 7498 }, { "epoch": 0.43, "grad_norm": 1.7450401782989502, "learning_rate": 1.2716549872749555e-05, "loss": 0.9784, "step": 7499 }, { "epoch": 0.43, "grad_norm": 1.7866034507751465, "learning_rate": 1.2714762070370078e-05, "loss": 0.978, "step": 7500 }, { "epoch": 0.43, "grad_norm": 1.8792423009872437, "learning_rate": 1.2712974174311743e-05, "loss": 0.9371, "step": 7501 }, { "epoch": 0.43, "grad_norm": 1.7580718994140625, "learning_rate": 1.271118618463625e-05, "loss": 1.059, "step": 7502 }, { "epoch": 0.43, "grad_norm": 1.822844386100769, "learning_rate": 1.2709398101405296e-05, "loss": 0.9845, "step": 7503 }, { "epoch": 0.43, "grad_norm": 1.7986738681793213, "learning_rate": 1.270760992468058e-05, "loss": 0.8986, "step": 7504 }, { "epoch": 0.43, "grad_norm": 1.7601100206375122, "learning_rate": 1.2705821654523809e-05, "loss": 1.0599, "step": 7505 }, { "epoch": 0.43, "grad_norm": 1.813385009765625, "learning_rate": 1.270403329099669e-05, "loss": 0.9624, "step": 7506 }, { "epoch": 0.43, "grad_norm": 1.6762651205062866, "learning_rate": 1.2702244834160937e-05, "loss": 1.043, "step": 7507 }, { "epoch": 0.43, "grad_norm": 1.7727586030960083, "learning_rate": 1.2700456284078263e-05, "loss": 1.0071, "step": 7508 }, { "epoch": 0.43, "grad_norm": 2.211315155029297, "learning_rate": 1.2698667640810386e-05, "loss": 0.993, "step": 7509 }, { "epoch": 0.43, "grad_norm": 1.9319093227386475, "learning_rate": 1.2696878904419028e-05, "loss": 0.9659, "step": 7510 }, { "epoch": 0.43, "grad_norm": 1.8859407901763916, "learning_rate": 1.269509007496591e-05, "loss": 0.9505, "step": 7511 }, { "epoch": 0.43, "grad_norm": 1.8175667524337769, "learning_rate": 1.2693301152512765e-05, "loss": 0.9882, "step": 7512 }, { "epoch": 0.43, "grad_norm": 1.6579301357269287, "learning_rate": 1.269151213712132e-05, "loss": 0.8946, "step": 7513 }, { "epoch": 0.43, "grad_norm": 1.8423120975494385, "learning_rate": 1.268972302885331e-05, "loss": 0.9619, "step": 7514 }, { "epoch": 0.43, "grad_norm": 1.7640901803970337, "learning_rate": 1.2687933827770469e-05, "loss": 1.0234, "step": 7515 }, { "epoch": 0.43, "grad_norm": 1.6898865699768066, "learning_rate": 1.268614453393454e-05, "loss": 0.8837, "step": 7516 }, { "epoch": 0.43, "grad_norm": 1.79181969165802, "learning_rate": 1.268435514740727e-05, "loss": 0.9206, "step": 7517 }, { "epoch": 0.43, "grad_norm": 1.774214506149292, "learning_rate": 1.2682565668250401e-05, "loss": 1.0339, "step": 7518 }, { "epoch": 0.43, "grad_norm": 1.1134421825408936, "learning_rate": 1.2680776096525684e-05, "loss": 0.6506, "step": 7519 }, { "epoch": 0.43, "grad_norm": 1.8094152212142944, "learning_rate": 1.2678986432294872e-05, "loss": 0.9961, "step": 7520 }, { "epoch": 0.43, "grad_norm": 2.1506853103637695, "learning_rate": 1.267719667561972e-05, "loss": 0.9644, "step": 7521 }, { "epoch": 0.43, "grad_norm": 1.753096342086792, "learning_rate": 1.2675406826561991e-05, "loss": 1.0185, "step": 7522 }, { "epoch": 0.43, "grad_norm": 1.7113707065582275, "learning_rate": 1.2673616885183449e-05, "loss": 0.983, "step": 7523 }, { "epoch": 0.43, "grad_norm": 1.7953295707702637, "learning_rate": 1.2671826851545851e-05, "loss": 1.0852, "step": 7524 }, { "epoch": 0.43, "grad_norm": 1.8164705038070679, "learning_rate": 1.2670036725710974e-05, "loss": 0.9757, "step": 7525 }, { "epoch": 0.43, "grad_norm": 1.865089774131775, "learning_rate": 1.266824650774059e-05, "loss": 0.9623, "step": 7526 }, { "epoch": 0.43, "grad_norm": 2.0285768508911133, "learning_rate": 1.2666456197696473e-05, "loss": 0.9957, "step": 7527 }, { "epoch": 0.43, "grad_norm": 1.6920452117919922, "learning_rate": 1.2664665795640399e-05, "loss": 1.0866, "step": 7528 }, { "epoch": 0.43, "grad_norm": 1.9247956275939941, "learning_rate": 1.2662875301634152e-05, "loss": 1.0301, "step": 7529 }, { "epoch": 0.43, "grad_norm": 1.8386216163635254, "learning_rate": 1.2661084715739516e-05, "loss": 1.0137, "step": 7530 }, { "epoch": 0.43, "grad_norm": 1.1674944162368774, "learning_rate": 1.2659294038018279e-05, "loss": 0.6569, "step": 7531 }, { "epoch": 0.43, "grad_norm": 1.7139341831207275, "learning_rate": 1.2657503268532236e-05, "loss": 0.9706, "step": 7532 }, { "epoch": 0.43, "grad_norm": 1.9618979692459106, "learning_rate": 1.2655712407343175e-05, "loss": 1.0096, "step": 7533 }, { "epoch": 0.43, "grad_norm": 1.7249456644058228, "learning_rate": 1.26539214545129e-05, "loss": 0.8991, "step": 7534 }, { "epoch": 0.43, "grad_norm": 1.8893961906433105, "learning_rate": 1.265213041010321e-05, "loss": 1.0104, "step": 7535 }, { "epoch": 0.43, "grad_norm": 1.101256012916565, "learning_rate": 1.2650339274175906e-05, "loss": 0.6349, "step": 7536 }, { "epoch": 0.43, "grad_norm": 1.0688300132751465, "learning_rate": 1.26485480467928e-05, "loss": 0.5672, "step": 7537 }, { "epoch": 0.43, "grad_norm": 1.8091368675231934, "learning_rate": 1.2646756728015696e-05, "loss": 0.98, "step": 7538 }, { "epoch": 0.43, "grad_norm": 1.759499192237854, "learning_rate": 1.2644965317906413e-05, "loss": 0.9828, "step": 7539 }, { "epoch": 0.43, "grad_norm": 1.136631965637207, "learning_rate": 1.2643173816526763e-05, "loss": 0.5935, "step": 7540 }, { "epoch": 0.43, "grad_norm": 2.0222177505493164, "learning_rate": 1.264138222393857e-05, "loss": 0.9537, "step": 7541 }, { "epoch": 0.43, "grad_norm": 1.7345727682113647, "learning_rate": 1.2639590540203654e-05, "loss": 0.9587, "step": 7542 }, { "epoch": 0.43, "grad_norm": 1.874272346496582, "learning_rate": 1.2637798765383842e-05, "loss": 0.94, "step": 7543 }, { "epoch": 0.43, "grad_norm": 1.6331371068954468, "learning_rate": 1.263600689954096e-05, "loss": 1.004, "step": 7544 }, { "epoch": 0.43, "grad_norm": 1.7173949480056763, "learning_rate": 1.2634214942736847e-05, "loss": 0.9903, "step": 7545 }, { "epoch": 0.43, "grad_norm": 2.018784523010254, "learning_rate": 1.2632422895033333e-05, "loss": 1.0095, "step": 7546 }, { "epoch": 0.43, "grad_norm": 2.0410802364349365, "learning_rate": 1.263063075649226e-05, "loss": 1.0811, "step": 7547 }, { "epoch": 0.43, "grad_norm": 1.8046157360076904, "learning_rate": 1.2628838527175464e-05, "loss": 1.0396, "step": 7548 }, { "epoch": 0.43, "grad_norm": 1.1469578742980957, "learning_rate": 1.2627046207144798e-05, "loss": 0.6852, "step": 7549 }, { "epoch": 0.43, "grad_norm": 2.041348934173584, "learning_rate": 1.2625253796462104e-05, "loss": 0.9877, "step": 7550 }, { "epoch": 0.43, "grad_norm": 1.8755077123641968, "learning_rate": 1.2623461295189236e-05, "loss": 1.0647, "step": 7551 }, { "epoch": 0.43, "grad_norm": 1.6887931823730469, "learning_rate": 1.2621668703388046e-05, "loss": 0.9837, "step": 7552 }, { "epoch": 0.43, "grad_norm": 1.7082523107528687, "learning_rate": 1.2619876021120394e-05, "loss": 1.0328, "step": 7553 }, { "epoch": 0.43, "grad_norm": 1.6883245706558228, "learning_rate": 1.2618083248448137e-05, "loss": 0.9997, "step": 7554 }, { "epoch": 0.43, "grad_norm": 1.8061022758483887, "learning_rate": 1.2616290385433141e-05, "loss": 0.9801, "step": 7555 }, { "epoch": 0.43, "grad_norm": 1.6123614311218262, "learning_rate": 1.2614497432137274e-05, "loss": 0.9141, "step": 7556 }, { "epoch": 0.43, "grad_norm": 1.7140716314315796, "learning_rate": 1.26127043886224e-05, "loss": 0.9544, "step": 7557 }, { "epoch": 0.43, "grad_norm": 1.6678967475891113, "learning_rate": 1.26109112549504e-05, "loss": 0.9468, "step": 7558 }, { "epoch": 0.43, "grad_norm": 1.795954942703247, "learning_rate": 1.2609118031183144e-05, "loss": 0.9323, "step": 7559 }, { "epoch": 0.43, "grad_norm": 1.6206252574920654, "learning_rate": 1.260732471738251e-05, "loss": 0.9129, "step": 7560 }, { "epoch": 0.43, "grad_norm": 1.8071534633636475, "learning_rate": 1.2605531313610386e-05, "loss": 1.0176, "step": 7561 }, { "epoch": 0.43, "grad_norm": 1.7835968732833862, "learning_rate": 1.2603737819928656e-05, "loss": 1.0631, "step": 7562 }, { "epoch": 0.43, "grad_norm": 1.8254762887954712, "learning_rate": 1.2601944236399207e-05, "loss": 1.0146, "step": 7563 }, { "epoch": 0.43, "grad_norm": 1.772290825843811, "learning_rate": 1.2600150563083929e-05, "loss": 0.9648, "step": 7564 }, { "epoch": 0.43, "grad_norm": 1.8369179964065552, "learning_rate": 1.2598356800044717e-05, "loss": 0.8646, "step": 7565 }, { "epoch": 0.43, "grad_norm": 1.7670364379882812, "learning_rate": 1.2596562947343473e-05, "loss": 0.8997, "step": 7566 }, { "epoch": 0.43, "grad_norm": 1.8656688928604126, "learning_rate": 1.2594769005042093e-05, "loss": 1.0005, "step": 7567 }, { "epoch": 0.43, "grad_norm": 1.8663579225540161, "learning_rate": 1.2592974973202486e-05, "loss": 0.9596, "step": 7568 }, { "epoch": 0.43, "grad_norm": 1.6148930788040161, "learning_rate": 1.2591180851886554e-05, "loss": 0.9745, "step": 7569 }, { "epoch": 0.43, "grad_norm": 1.8621864318847656, "learning_rate": 1.2589386641156208e-05, "loss": 0.9315, "step": 7570 }, { "epoch": 0.43, "grad_norm": 1.6840916872024536, "learning_rate": 1.2587592341073362e-05, "loss": 0.9844, "step": 7571 }, { "epoch": 0.43, "grad_norm": 1.6677579879760742, "learning_rate": 1.2585797951699932e-05, "loss": 1.0499, "step": 7572 }, { "epoch": 0.43, "grad_norm": 1.8730182647705078, "learning_rate": 1.2584003473097837e-05, "loss": 0.9146, "step": 7573 }, { "epoch": 0.43, "grad_norm": 1.676588773727417, "learning_rate": 1.2582208905329004e-05, "loss": 0.9929, "step": 7574 }, { "epoch": 0.43, "grad_norm": 2.028895378112793, "learning_rate": 1.2580414248455352e-05, "loss": 1.0946, "step": 7575 }, { "epoch": 0.43, "grad_norm": 1.8122638463974, "learning_rate": 1.2578619502538814e-05, "loss": 1.0032, "step": 7576 }, { "epoch": 0.43, "grad_norm": 1.5693435668945312, "learning_rate": 1.2576824667641317e-05, "loss": 0.9244, "step": 7577 }, { "epoch": 0.43, "grad_norm": 1.7245745658874512, "learning_rate": 1.2575029743824803e-05, "loss": 0.933, "step": 7578 }, { "epoch": 0.43, "grad_norm": 1.736441731452942, "learning_rate": 1.2573234731151203e-05, "loss": 0.9737, "step": 7579 }, { "epoch": 0.43, "grad_norm": 1.622036337852478, "learning_rate": 1.257143962968246e-05, "loss": 0.929, "step": 7580 }, { "epoch": 0.43, "grad_norm": 1.7343754768371582, "learning_rate": 1.256964443948052e-05, "loss": 1.0073, "step": 7581 }, { "epoch": 0.43, "grad_norm": 1.7654688358306885, "learning_rate": 1.2567849160607327e-05, "loss": 0.9599, "step": 7582 }, { "epoch": 0.43, "grad_norm": 1.6042214632034302, "learning_rate": 1.2566053793124834e-05, "loss": 0.9153, "step": 7583 }, { "epoch": 0.43, "grad_norm": 1.6926013231277466, "learning_rate": 1.2564258337094994e-05, "loss": 0.9967, "step": 7584 }, { "epoch": 0.44, "grad_norm": 1.724165439605713, "learning_rate": 1.2562462792579759e-05, "loss": 0.9516, "step": 7585 }, { "epoch": 0.44, "grad_norm": 1.7788724899291992, "learning_rate": 1.2560667159641092e-05, "loss": 0.9947, "step": 7586 }, { "epoch": 0.44, "grad_norm": 1.7585062980651855, "learning_rate": 1.2558871438340951e-05, "loss": 0.9913, "step": 7587 }, { "epoch": 0.44, "grad_norm": 1.8138631582260132, "learning_rate": 1.2557075628741309e-05, "loss": 1.032, "step": 7588 }, { "epoch": 0.44, "grad_norm": 1.6059385538101196, "learning_rate": 1.2555279730904128e-05, "loss": 1.036, "step": 7589 }, { "epoch": 0.44, "grad_norm": 1.7083841562271118, "learning_rate": 1.2553483744891382e-05, "loss": 1.0165, "step": 7590 }, { "epoch": 0.44, "grad_norm": 1.8931975364685059, "learning_rate": 1.2551687670765045e-05, "loss": 0.9667, "step": 7591 }, { "epoch": 0.44, "grad_norm": 1.1866353750228882, "learning_rate": 1.2549891508587095e-05, "loss": 0.6447, "step": 7592 }, { "epoch": 0.44, "grad_norm": 1.8239480257034302, "learning_rate": 1.254809525841951e-05, "loss": 1.0698, "step": 7593 }, { "epoch": 0.44, "grad_norm": 1.6755266189575195, "learning_rate": 1.2546298920324277e-05, "loss": 1.0232, "step": 7594 }, { "epoch": 0.44, "grad_norm": 1.7988396883010864, "learning_rate": 1.2544502494363382e-05, "loss": 1.0231, "step": 7595 }, { "epoch": 0.44, "grad_norm": 1.7721139192581177, "learning_rate": 1.2542705980598813e-05, "loss": 1.022, "step": 7596 }, { "epoch": 0.44, "grad_norm": 1.8031858205795288, "learning_rate": 1.254090937909256e-05, "loss": 0.9576, "step": 7597 }, { "epoch": 0.44, "grad_norm": 1.7613509893417358, "learning_rate": 1.2539112689906627e-05, "loss": 0.9114, "step": 7598 }, { "epoch": 0.44, "grad_norm": 1.8318668603897095, "learning_rate": 1.2537315913103003e-05, "loss": 1.0007, "step": 7599 }, { "epoch": 0.44, "grad_norm": 1.8220456838607788, "learning_rate": 1.2535519048743696e-05, "loss": 1.0284, "step": 7600 }, { "epoch": 0.44, "grad_norm": 1.7607215642929077, "learning_rate": 1.2533722096890713e-05, "loss": 1.0121, "step": 7601 }, { "epoch": 0.44, "grad_norm": 1.7073352336883545, "learning_rate": 1.2531925057606053e-05, "loss": 0.9583, "step": 7602 }, { "epoch": 0.44, "grad_norm": 2.0248639583587646, "learning_rate": 1.2530127930951736e-05, "loss": 1.0115, "step": 7603 }, { "epoch": 0.44, "grad_norm": 1.9395157098770142, "learning_rate": 1.252833071698977e-05, "loss": 1.0344, "step": 7604 }, { "epoch": 0.44, "grad_norm": 1.098695158958435, "learning_rate": 1.2526533415782173e-05, "loss": 0.6311, "step": 7605 }, { "epoch": 0.44, "grad_norm": 1.638378620147705, "learning_rate": 1.2524736027390968e-05, "loss": 0.9969, "step": 7606 }, { "epoch": 0.44, "grad_norm": 1.9952657222747803, "learning_rate": 1.2522938551878171e-05, "loss": 0.9236, "step": 7607 }, { "epoch": 0.44, "grad_norm": 1.6767350435256958, "learning_rate": 1.2521140989305816e-05, "loss": 0.9621, "step": 7608 }, { "epoch": 0.44, "grad_norm": 1.055700421333313, "learning_rate": 1.2519343339735925e-05, "loss": 0.6334, "step": 7609 }, { "epoch": 0.44, "grad_norm": 1.6364747285842896, "learning_rate": 1.2517545603230534e-05, "loss": 0.9466, "step": 7610 }, { "epoch": 0.44, "grad_norm": 1.7909554243087769, "learning_rate": 1.2515747779851677e-05, "loss": 1.0274, "step": 7611 }, { "epoch": 0.44, "grad_norm": 1.9285619258880615, "learning_rate": 1.251394986966139e-05, "loss": 0.9975, "step": 7612 }, { "epoch": 0.44, "grad_norm": 1.7856172323226929, "learning_rate": 1.2512151872721718e-05, "loss": 1.0448, "step": 7613 }, { "epoch": 0.44, "grad_norm": 1.8012738227844238, "learning_rate": 1.25103537890947e-05, "loss": 0.973, "step": 7614 }, { "epoch": 0.44, "grad_norm": 1.797629475593567, "learning_rate": 1.2508555618842386e-05, "loss": 0.9628, "step": 7615 }, { "epoch": 0.44, "grad_norm": 1.8345519304275513, "learning_rate": 1.2506757362026824e-05, "loss": 0.9593, "step": 7616 }, { "epoch": 0.44, "grad_norm": 1.7342952489852905, "learning_rate": 1.250495901871007e-05, "loss": 0.9834, "step": 7617 }, { "epoch": 0.44, "grad_norm": 1.8109760284423828, "learning_rate": 1.2503160588954178e-05, "loss": 0.9337, "step": 7618 }, { "epoch": 0.44, "grad_norm": 1.911319613456726, "learning_rate": 1.2501362072821204e-05, "loss": 1.0144, "step": 7619 }, { "epoch": 0.44, "grad_norm": 2.0442311763763428, "learning_rate": 1.2499563470373213e-05, "loss": 1.0142, "step": 7620 }, { "epoch": 0.44, "grad_norm": 1.9256324768066406, "learning_rate": 1.249776478167227e-05, "loss": 1.0058, "step": 7621 }, { "epoch": 0.44, "grad_norm": 1.0462584495544434, "learning_rate": 1.249596600678044e-05, "loss": 0.5621, "step": 7622 }, { "epoch": 0.44, "grad_norm": 1.8107529878616333, "learning_rate": 1.2494167145759797e-05, "loss": 0.9807, "step": 7623 }, { "epoch": 0.44, "grad_norm": 1.7807387113571167, "learning_rate": 1.2492368198672411e-05, "loss": 0.9759, "step": 7624 }, { "epoch": 0.44, "grad_norm": 1.803370475769043, "learning_rate": 1.2490569165580363e-05, "loss": 1.0265, "step": 7625 }, { "epoch": 0.44, "grad_norm": 2.0468337535858154, "learning_rate": 1.2488770046545727e-05, "loss": 0.9558, "step": 7626 }, { "epoch": 0.44, "grad_norm": 2.0263333320617676, "learning_rate": 1.248697084163059e-05, "loss": 1.0306, "step": 7627 }, { "epoch": 0.44, "grad_norm": 1.8878659009933472, "learning_rate": 1.2485171550897037e-05, "loss": 0.9994, "step": 7628 }, { "epoch": 0.44, "grad_norm": 1.81484055519104, "learning_rate": 1.2483372174407155e-05, "loss": 0.9838, "step": 7629 }, { "epoch": 0.44, "grad_norm": 1.8727859258651733, "learning_rate": 1.2481572712223038e-05, "loss": 1.0808, "step": 7630 }, { "epoch": 0.44, "grad_norm": 1.78900945186615, "learning_rate": 1.2479773164406779e-05, "loss": 0.9959, "step": 7631 }, { "epoch": 0.44, "grad_norm": 0.9696929454803467, "learning_rate": 1.2477973531020473e-05, "loss": 0.5828, "step": 7632 }, { "epoch": 0.44, "grad_norm": 1.8541227579116821, "learning_rate": 1.2476173812126224e-05, "loss": 1.0434, "step": 7633 }, { "epoch": 0.44, "grad_norm": 1.861672043800354, "learning_rate": 1.2474374007786133e-05, "loss": 1.0057, "step": 7634 }, { "epoch": 0.44, "grad_norm": 1.8727742433547974, "learning_rate": 1.2472574118062305e-05, "loss": 0.8561, "step": 7635 }, { "epoch": 0.44, "grad_norm": 1.8147246837615967, "learning_rate": 1.2470774143016854e-05, "loss": 0.9538, "step": 7636 }, { "epoch": 0.44, "grad_norm": 1.119937539100647, "learning_rate": 1.2468974082711889e-05, "loss": 0.6745, "step": 7637 }, { "epoch": 0.44, "grad_norm": 1.9202128648757935, "learning_rate": 1.2467173937209523e-05, "loss": 1.0411, "step": 7638 }, { "epoch": 0.44, "grad_norm": 1.7603487968444824, "learning_rate": 1.2465373706571878e-05, "loss": 1.011, "step": 7639 }, { "epoch": 0.44, "grad_norm": 1.7497239112854004, "learning_rate": 1.246357339086107e-05, "loss": 1.0062, "step": 7640 }, { "epoch": 0.44, "grad_norm": 1.776878833770752, "learning_rate": 1.246177299013923e-05, "loss": 0.985, "step": 7641 }, { "epoch": 0.44, "grad_norm": 1.7239664793014526, "learning_rate": 1.2459972504468479e-05, "loss": 0.9534, "step": 7642 }, { "epoch": 0.44, "grad_norm": 1.7702629566192627, "learning_rate": 1.2458171933910946e-05, "loss": 0.932, "step": 7643 }, { "epoch": 0.44, "grad_norm": 1.8115798234939575, "learning_rate": 1.2456371278528769e-05, "loss": 1.0158, "step": 7644 }, { "epoch": 0.44, "grad_norm": 1.7451653480529785, "learning_rate": 1.2454570538384081e-05, "loss": 1.0677, "step": 7645 }, { "epoch": 0.44, "grad_norm": 1.817245364189148, "learning_rate": 1.245276971353902e-05, "loss": 0.934, "step": 7646 }, { "epoch": 0.44, "grad_norm": 1.8699016571044922, "learning_rate": 1.2450968804055728e-05, "loss": 1.0077, "step": 7647 }, { "epoch": 0.44, "grad_norm": 1.6603144407272339, "learning_rate": 1.244916780999635e-05, "loss": 0.9548, "step": 7648 }, { "epoch": 0.44, "grad_norm": 1.8910303115844727, "learning_rate": 1.2447366731423029e-05, "loss": 1.0763, "step": 7649 }, { "epoch": 0.44, "grad_norm": 1.8313363790512085, "learning_rate": 1.2445565568397921e-05, "loss": 0.9859, "step": 7650 }, { "epoch": 0.44, "grad_norm": 1.9035089015960693, "learning_rate": 1.2443764320983179e-05, "loss": 1.0293, "step": 7651 }, { "epoch": 0.44, "grad_norm": 1.6988542079925537, "learning_rate": 1.2441962989240953e-05, "loss": 1.0218, "step": 7652 }, { "epoch": 0.44, "grad_norm": 1.8387292623519897, "learning_rate": 1.2440161573233404e-05, "loss": 1.1222, "step": 7653 }, { "epoch": 0.44, "grad_norm": 1.8995107412338257, "learning_rate": 1.24383600730227e-05, "loss": 1.0028, "step": 7654 }, { "epoch": 0.44, "grad_norm": 1.0339196920394897, "learning_rate": 1.2436558488670997e-05, "loss": 0.6084, "step": 7655 }, { "epoch": 0.44, "grad_norm": 1.8126020431518555, "learning_rate": 1.243475682024047e-05, "loss": 0.9405, "step": 7656 }, { "epoch": 0.44, "grad_norm": 1.0082863569259644, "learning_rate": 1.2432955067793286e-05, "loss": 0.5183, "step": 7657 }, { "epoch": 0.44, "grad_norm": 1.778639316558838, "learning_rate": 1.2431153231391617e-05, "loss": 0.9502, "step": 7658 }, { "epoch": 0.44, "grad_norm": 1.1354997158050537, "learning_rate": 1.2429351311097643e-05, "loss": 0.5436, "step": 7659 }, { "epoch": 0.44, "grad_norm": 1.6467963457107544, "learning_rate": 1.242754930697354e-05, "loss": 1.0063, "step": 7660 }, { "epoch": 0.44, "grad_norm": 1.8586868047714233, "learning_rate": 1.2425747219081494e-05, "loss": 1.0034, "step": 7661 }, { "epoch": 0.44, "grad_norm": 1.7889364957809448, "learning_rate": 1.2423945047483686e-05, "loss": 1.0121, "step": 7662 }, { "epoch": 0.44, "grad_norm": 1.883612871170044, "learning_rate": 1.2422142792242305e-05, "loss": 1.0411, "step": 7663 }, { "epoch": 0.44, "grad_norm": 1.7482819557189941, "learning_rate": 1.2420340453419542e-05, "loss": 0.9479, "step": 7664 }, { "epoch": 0.44, "grad_norm": 1.66978120803833, "learning_rate": 1.241853803107759e-05, "loss": 0.93, "step": 7665 }, { "epoch": 0.44, "grad_norm": 1.9105827808380127, "learning_rate": 1.2416735525278648e-05, "loss": 1.0029, "step": 7666 }, { "epoch": 0.44, "grad_norm": 1.589983344078064, "learning_rate": 1.2414932936084914e-05, "loss": 0.9611, "step": 7667 }, { "epoch": 0.44, "grad_norm": 1.971234679222107, "learning_rate": 1.2413130263558588e-05, "loss": 0.9823, "step": 7668 }, { "epoch": 0.44, "grad_norm": 1.9066078662872314, "learning_rate": 1.2411327507761879e-05, "loss": 0.9584, "step": 7669 }, { "epoch": 0.44, "grad_norm": 1.8142876625061035, "learning_rate": 1.2409524668756996e-05, "loss": 0.9721, "step": 7670 }, { "epoch": 0.44, "grad_norm": 1.6707004308700562, "learning_rate": 1.2407721746606145e-05, "loss": 0.9261, "step": 7671 }, { "epoch": 0.44, "grad_norm": 1.970712661743164, "learning_rate": 1.240591874137154e-05, "loss": 1.0006, "step": 7672 }, { "epoch": 0.44, "grad_norm": 1.7521528005599976, "learning_rate": 1.2404115653115403e-05, "loss": 0.9639, "step": 7673 }, { "epoch": 0.44, "grad_norm": 1.7101728916168213, "learning_rate": 1.240231248189995e-05, "loss": 0.9361, "step": 7674 }, { "epoch": 0.44, "grad_norm": 1.7056487798690796, "learning_rate": 1.2400509227787406e-05, "loss": 0.9641, "step": 7675 }, { "epoch": 0.44, "grad_norm": 1.8381524085998535, "learning_rate": 1.2398705890839988e-05, "loss": 0.9816, "step": 7676 }, { "epoch": 0.44, "grad_norm": 2.0057384967803955, "learning_rate": 1.2396902471119934e-05, "loss": 0.9668, "step": 7677 }, { "epoch": 0.44, "grad_norm": 1.6697067022323608, "learning_rate": 1.2395098968689471e-05, "loss": 0.8417, "step": 7678 }, { "epoch": 0.44, "grad_norm": 2.0697829723358154, "learning_rate": 1.2393295383610832e-05, "loss": 1.1301, "step": 7679 }, { "epoch": 0.44, "grad_norm": 1.8120660781860352, "learning_rate": 1.2391491715946256e-05, "loss": 1.0325, "step": 7680 }, { "epoch": 0.44, "grad_norm": 1.709254264831543, "learning_rate": 1.238968796575798e-05, "loss": 0.8657, "step": 7681 }, { "epoch": 0.44, "grad_norm": 1.9467140436172485, "learning_rate": 1.238788413310825e-05, "loss": 0.9921, "step": 7682 }, { "epoch": 0.44, "grad_norm": 1.815127968788147, "learning_rate": 1.2386080218059307e-05, "loss": 0.9492, "step": 7683 }, { "epoch": 0.44, "grad_norm": 1.869968295097351, "learning_rate": 1.2384276220673401e-05, "loss": 1.0001, "step": 7684 }, { "epoch": 0.44, "grad_norm": 1.6904853582382202, "learning_rate": 1.2382472141012785e-05, "loss": 0.9237, "step": 7685 }, { "epoch": 0.44, "grad_norm": 2.066488265991211, "learning_rate": 1.2380667979139709e-05, "loss": 1.0309, "step": 7686 }, { "epoch": 0.44, "grad_norm": 1.782029390335083, "learning_rate": 1.237886373511643e-05, "loss": 0.9428, "step": 7687 }, { "epoch": 0.44, "grad_norm": 1.8218485116958618, "learning_rate": 1.237705940900521e-05, "loss": 0.9441, "step": 7688 }, { "epoch": 0.44, "grad_norm": 1.8111777305603027, "learning_rate": 1.2375255000868309e-05, "loss": 0.9998, "step": 7689 }, { "epoch": 0.44, "grad_norm": 1.9606832265853882, "learning_rate": 1.2373450510767993e-05, "loss": 0.9992, "step": 7690 }, { "epoch": 0.44, "grad_norm": 1.948456883430481, "learning_rate": 1.2371645938766532e-05, "loss": 0.9433, "step": 7691 }, { "epoch": 0.44, "grad_norm": 2.0028083324432373, "learning_rate": 1.236984128492619e-05, "loss": 0.9702, "step": 7692 }, { "epoch": 0.44, "grad_norm": 1.822094440460205, "learning_rate": 1.2368036549309248e-05, "loss": 0.976, "step": 7693 }, { "epoch": 0.44, "grad_norm": 1.6483851671218872, "learning_rate": 1.236623173197798e-05, "loss": 1.0515, "step": 7694 }, { "epoch": 0.44, "grad_norm": 1.798875093460083, "learning_rate": 1.2364426832994663e-05, "loss": 0.9366, "step": 7695 }, { "epoch": 0.44, "grad_norm": 1.9957859516143799, "learning_rate": 1.2362621852421583e-05, "loss": 1.0341, "step": 7696 }, { "epoch": 0.44, "grad_norm": 1.893798589706421, "learning_rate": 1.2360816790321023e-05, "loss": 0.9339, "step": 7697 }, { "epoch": 0.44, "grad_norm": 1.888737678527832, "learning_rate": 1.2359011646755268e-05, "loss": 0.9602, "step": 7698 }, { "epoch": 0.44, "grad_norm": 1.9217383861541748, "learning_rate": 1.2357206421786611e-05, "loss": 1.0519, "step": 7699 }, { "epoch": 0.44, "grad_norm": 1.7134835720062256, "learning_rate": 1.2355401115477347e-05, "loss": 0.9607, "step": 7700 }, { "epoch": 0.44, "grad_norm": 1.7297958135604858, "learning_rate": 1.2353595727889767e-05, "loss": 0.971, "step": 7701 }, { "epoch": 0.44, "grad_norm": 1.7277300357818604, "learning_rate": 1.2351790259086174e-05, "loss": 0.9512, "step": 7702 }, { "epoch": 0.44, "grad_norm": 1.9131211042404175, "learning_rate": 1.234998470912887e-05, "loss": 0.9907, "step": 7703 }, { "epoch": 0.44, "grad_norm": 1.914372444152832, "learning_rate": 1.2348179078080155e-05, "loss": 1.0042, "step": 7704 }, { "epoch": 0.44, "grad_norm": 1.7643848657608032, "learning_rate": 1.2346373366002342e-05, "loss": 1.0128, "step": 7705 }, { "epoch": 0.44, "grad_norm": 1.8489370346069336, "learning_rate": 1.2344567572957738e-05, "loss": 0.9128, "step": 7706 }, { "epoch": 0.44, "grad_norm": 1.8097583055496216, "learning_rate": 1.2342761699008656e-05, "loss": 1.0348, "step": 7707 }, { "epoch": 0.44, "grad_norm": 1.8438140153884888, "learning_rate": 1.2340955744217413e-05, "loss": 1.0085, "step": 7708 }, { "epoch": 0.44, "grad_norm": 1.8783791065216064, "learning_rate": 1.2339149708646323e-05, "loss": 0.9383, "step": 7709 }, { "epoch": 0.44, "grad_norm": 1.824316143989563, "learning_rate": 1.2337343592357713e-05, "loss": 0.8786, "step": 7710 }, { "epoch": 0.44, "grad_norm": 1.642930269241333, "learning_rate": 1.2335537395413906e-05, "loss": 0.9145, "step": 7711 }, { "epoch": 0.44, "grad_norm": 1.7573450803756714, "learning_rate": 1.2333731117877228e-05, "loss": 1.0059, "step": 7712 }, { "epoch": 0.44, "grad_norm": 1.7118175029754639, "learning_rate": 1.2331924759810008e-05, "loss": 0.9653, "step": 7713 }, { "epoch": 0.44, "grad_norm": 2.0859718322753906, "learning_rate": 1.2330118321274576e-05, "loss": 0.9895, "step": 7714 }, { "epoch": 0.44, "grad_norm": 1.8060108423233032, "learning_rate": 1.232831180233327e-05, "loss": 0.9569, "step": 7715 }, { "epoch": 0.44, "grad_norm": 1.9823864698410034, "learning_rate": 1.232650520304843e-05, "loss": 0.9702, "step": 7716 }, { "epoch": 0.44, "grad_norm": 1.8027502298355103, "learning_rate": 1.2324698523482393e-05, "loss": 0.9291, "step": 7717 }, { "epoch": 0.44, "grad_norm": 1.7098044157028198, "learning_rate": 1.2322891763697505e-05, "loss": 0.9706, "step": 7718 }, { "epoch": 0.44, "grad_norm": 1.840698003768921, "learning_rate": 1.2321084923756108e-05, "loss": 1.0376, "step": 7719 }, { "epoch": 0.44, "grad_norm": 1.799296498298645, "learning_rate": 1.2319278003720554e-05, "loss": 0.9955, "step": 7720 }, { "epoch": 0.44, "grad_norm": 1.6471272706985474, "learning_rate": 1.2317471003653196e-05, "loss": 0.9732, "step": 7721 }, { "epoch": 0.44, "grad_norm": 1.9313384294509888, "learning_rate": 1.2315663923616388e-05, "loss": 1.0424, "step": 7722 }, { "epoch": 0.44, "grad_norm": 1.9198538064956665, "learning_rate": 1.2313856763672486e-05, "loss": 0.9698, "step": 7723 }, { "epoch": 0.44, "grad_norm": 1.7545133829116821, "learning_rate": 1.2312049523883851e-05, "loss": 1.0192, "step": 7724 }, { "epoch": 0.44, "grad_norm": 1.8588281869888306, "learning_rate": 1.2310242204312845e-05, "loss": 1.0653, "step": 7725 }, { "epoch": 0.44, "grad_norm": 1.8679583072662354, "learning_rate": 1.2308434805021836e-05, "loss": 0.9915, "step": 7726 }, { "epoch": 0.44, "grad_norm": 1.875706672668457, "learning_rate": 1.2306627326073189e-05, "loss": 0.9695, "step": 7727 }, { "epoch": 0.44, "grad_norm": 1.8187111616134644, "learning_rate": 1.2304819767529274e-05, "loss": 0.9801, "step": 7728 }, { "epoch": 0.44, "grad_norm": 1.9930998086929321, "learning_rate": 1.2303012129452469e-05, "loss": 0.9449, "step": 7729 }, { "epoch": 0.44, "grad_norm": 1.8013794422149658, "learning_rate": 1.230120441190515e-05, "loss": 1.008, "step": 7730 }, { "epoch": 0.44, "grad_norm": 1.8797240257263184, "learning_rate": 1.2299396614949691e-05, "loss": 0.9384, "step": 7731 }, { "epoch": 0.44, "grad_norm": 1.7570873498916626, "learning_rate": 1.2297588738648481e-05, "loss": 0.983, "step": 7732 }, { "epoch": 0.44, "grad_norm": 1.6738688945770264, "learning_rate": 1.22957807830639e-05, "loss": 0.9441, "step": 7733 }, { "epoch": 0.44, "grad_norm": 1.7166593074798584, "learning_rate": 1.2293972748258338e-05, "loss": 1.0917, "step": 7734 }, { "epoch": 0.44, "grad_norm": 1.752254843711853, "learning_rate": 1.2292164634294184e-05, "loss": 0.9098, "step": 7735 }, { "epoch": 0.44, "grad_norm": 1.5763689279556274, "learning_rate": 1.2290356441233833e-05, "loss": 0.946, "step": 7736 }, { "epoch": 0.44, "grad_norm": 1.8233758211135864, "learning_rate": 1.2288548169139676e-05, "loss": 0.9967, "step": 7737 }, { "epoch": 0.44, "grad_norm": 1.7821087837219238, "learning_rate": 1.2286739818074116e-05, "loss": 0.8766, "step": 7738 }, { "epoch": 0.44, "grad_norm": 1.9034427404403687, "learning_rate": 1.2284931388099554e-05, "loss": 1.0466, "step": 7739 }, { "epoch": 0.44, "grad_norm": 1.8669012784957886, "learning_rate": 1.2283122879278393e-05, "loss": 1.0304, "step": 7740 }, { "epoch": 0.44, "grad_norm": 1.7810660600662231, "learning_rate": 1.2281314291673036e-05, "loss": 0.9702, "step": 7741 }, { "epoch": 0.44, "grad_norm": 1.9883533716201782, "learning_rate": 1.2279505625345896e-05, "loss": 0.8985, "step": 7742 }, { "epoch": 0.44, "grad_norm": 1.9422756433486938, "learning_rate": 1.2277696880359384e-05, "loss": 1.0119, "step": 7743 }, { "epoch": 0.44, "grad_norm": 1.8473113775253296, "learning_rate": 1.2275888056775918e-05, "loss": 0.9856, "step": 7744 }, { "epoch": 0.44, "grad_norm": 1.7119449377059937, "learning_rate": 1.227407915465791e-05, "loss": 0.9976, "step": 7745 }, { "epoch": 0.44, "grad_norm": 2.1289796829223633, "learning_rate": 1.2272270174067782e-05, "loss": 1.008, "step": 7746 }, { "epoch": 0.44, "grad_norm": 1.9426058530807495, "learning_rate": 1.227046111506796e-05, "loss": 0.9196, "step": 7747 }, { "epoch": 0.44, "grad_norm": 1.9121708869934082, "learning_rate": 1.2268651977720867e-05, "loss": 0.9814, "step": 7748 }, { "epoch": 0.44, "grad_norm": 1.7229068279266357, "learning_rate": 1.2266842762088932e-05, "loss": 0.9712, "step": 7749 }, { "epoch": 0.44, "grad_norm": 1.9257116317749023, "learning_rate": 1.2265033468234584e-05, "loss": 0.9506, "step": 7750 }, { "epoch": 0.44, "grad_norm": 1.7360285520553589, "learning_rate": 1.2263224096220258e-05, "loss": 1.006, "step": 7751 }, { "epoch": 0.44, "grad_norm": 1.664992094039917, "learning_rate": 1.2261414646108391e-05, "loss": 0.976, "step": 7752 }, { "epoch": 0.44, "grad_norm": 1.885969638824463, "learning_rate": 1.2259605117961422e-05, "loss": 1.0596, "step": 7753 }, { "epoch": 0.44, "grad_norm": 1.6871249675750732, "learning_rate": 1.2257795511841792e-05, "loss": 1.0256, "step": 7754 }, { "epoch": 0.44, "grad_norm": 1.1521257162094116, "learning_rate": 1.2255985827811947e-05, "loss": 0.5943, "step": 7755 }, { "epoch": 0.44, "grad_norm": 1.6556180715560913, "learning_rate": 1.2254176065934332e-05, "loss": 0.9489, "step": 7756 }, { "epoch": 0.44, "grad_norm": 1.7253577709197998, "learning_rate": 1.2252366226271398e-05, "loss": 1.0308, "step": 7757 }, { "epoch": 0.44, "grad_norm": 1.6542346477508545, "learning_rate": 1.2250556308885595e-05, "loss": 1.0086, "step": 7758 }, { "epoch": 0.44, "grad_norm": 1.724663257598877, "learning_rate": 1.224874631383938e-05, "loss": 0.9801, "step": 7759 }, { "epoch": 0.45, "grad_norm": 1.1577228307724, "learning_rate": 1.2246936241195215e-05, "loss": 0.6109, "step": 7760 }, { "epoch": 0.45, "grad_norm": 2.045353651046753, "learning_rate": 1.2245126091015556e-05, "loss": 1.0538, "step": 7761 }, { "epoch": 0.45, "grad_norm": 1.7128798961639404, "learning_rate": 1.2243315863362866e-05, "loss": 0.9856, "step": 7762 }, { "epoch": 0.45, "grad_norm": 2.0578300952911377, "learning_rate": 1.2241505558299614e-05, "loss": 0.9877, "step": 7763 }, { "epoch": 0.45, "grad_norm": 1.7511426210403442, "learning_rate": 1.2239695175888264e-05, "loss": 1.0442, "step": 7764 }, { "epoch": 0.45, "grad_norm": 1.7828105688095093, "learning_rate": 1.223788471619129e-05, "loss": 0.9138, "step": 7765 }, { "epoch": 0.45, "grad_norm": 1.2071444988250732, "learning_rate": 1.223607417927117e-05, "loss": 0.6095, "step": 7766 }, { "epoch": 0.45, "grad_norm": 2.0276472568511963, "learning_rate": 1.2234263565190372e-05, "loss": 0.9679, "step": 7767 }, { "epoch": 0.45, "grad_norm": 1.854109287261963, "learning_rate": 1.2232452874011381e-05, "loss": 1.0191, "step": 7768 }, { "epoch": 0.45, "grad_norm": 1.8650341033935547, "learning_rate": 1.2230642105796674e-05, "loss": 1.0227, "step": 7769 }, { "epoch": 0.45, "grad_norm": 0.9766743183135986, "learning_rate": 1.2228831260608745e-05, "loss": 0.5656, "step": 7770 }, { "epoch": 0.45, "grad_norm": 1.8758357763290405, "learning_rate": 1.222702033851007e-05, "loss": 0.9577, "step": 7771 }, { "epoch": 0.45, "grad_norm": 1.7652212381362915, "learning_rate": 1.2225209339563144e-05, "loss": 0.9378, "step": 7772 }, { "epoch": 0.45, "grad_norm": 1.9341726303100586, "learning_rate": 1.2223398263830463e-05, "loss": 0.9483, "step": 7773 }, { "epoch": 0.45, "grad_norm": 1.6811860799789429, "learning_rate": 1.2221587111374519e-05, "loss": 0.9361, "step": 7774 }, { "epoch": 0.45, "grad_norm": 1.9038540124893188, "learning_rate": 1.2219775882257804e-05, "loss": 0.9776, "step": 7775 }, { "epoch": 0.45, "grad_norm": 1.8591549396514893, "learning_rate": 1.2217964576542829e-05, "loss": 0.983, "step": 7776 }, { "epoch": 0.45, "grad_norm": 1.8306639194488525, "learning_rate": 1.221615319429209e-05, "loss": 1.0137, "step": 7777 }, { "epoch": 0.45, "grad_norm": 1.8915072679519653, "learning_rate": 1.2214341735568099e-05, "loss": 1.0339, "step": 7778 }, { "epoch": 0.45, "grad_norm": 1.753797173500061, "learning_rate": 1.2212530200433355e-05, "loss": 0.9676, "step": 7779 }, { "epoch": 0.45, "grad_norm": 1.785038948059082, "learning_rate": 1.2210718588950376e-05, "loss": 0.9451, "step": 7780 }, { "epoch": 0.45, "grad_norm": 1.7709153890609741, "learning_rate": 1.2208906901181675e-05, "loss": 0.9271, "step": 7781 }, { "epoch": 0.45, "grad_norm": 1.7793200016021729, "learning_rate": 1.2207095137189766e-05, "loss": 0.8301, "step": 7782 }, { "epoch": 0.45, "grad_norm": 2.197963237762451, "learning_rate": 1.2205283297037172e-05, "loss": 1.0018, "step": 7783 }, { "epoch": 0.45, "grad_norm": 1.7792103290557861, "learning_rate": 1.2203471380786407e-05, "loss": 1.0014, "step": 7784 }, { "epoch": 0.45, "grad_norm": 1.6629672050476074, "learning_rate": 1.22016593885e-05, "loss": 0.9557, "step": 7785 }, { "epoch": 0.45, "grad_norm": 1.7825459241867065, "learning_rate": 1.219984732024048e-05, "loss": 1.009, "step": 7786 }, { "epoch": 0.45, "grad_norm": 1.8265187740325928, "learning_rate": 1.2198035176070375e-05, "loss": 1.0344, "step": 7787 }, { "epoch": 0.45, "grad_norm": 1.7456082105636597, "learning_rate": 1.2196222956052215e-05, "loss": 0.9857, "step": 7788 }, { "epoch": 0.45, "grad_norm": 1.899399757385254, "learning_rate": 1.2194410660248535e-05, "loss": 1.0053, "step": 7789 }, { "epoch": 0.45, "grad_norm": 1.8614953756332397, "learning_rate": 1.2192598288721873e-05, "loss": 0.9883, "step": 7790 }, { "epoch": 0.45, "grad_norm": 1.7467591762542725, "learning_rate": 1.219078584153477e-05, "loss": 1.0712, "step": 7791 }, { "epoch": 0.45, "grad_norm": 1.6163705587387085, "learning_rate": 1.2188973318749766e-05, "loss": 0.9939, "step": 7792 }, { "epoch": 0.45, "grad_norm": 1.9156336784362793, "learning_rate": 1.2187160720429407e-05, "loss": 0.9976, "step": 7793 }, { "epoch": 0.45, "grad_norm": 1.8359031677246094, "learning_rate": 1.2185348046636243e-05, "loss": 1.0469, "step": 7794 }, { "epoch": 0.45, "grad_norm": 1.8725281953811646, "learning_rate": 1.2183535297432821e-05, "loss": 0.9965, "step": 7795 }, { "epoch": 0.45, "grad_norm": 1.815598726272583, "learning_rate": 1.2181722472881697e-05, "loss": 1.0652, "step": 7796 }, { "epoch": 0.45, "grad_norm": 1.960411787033081, "learning_rate": 1.217990957304542e-05, "loss": 0.9459, "step": 7797 }, { "epoch": 0.45, "grad_norm": 1.717895746231079, "learning_rate": 1.2178096597986557e-05, "loss": 0.958, "step": 7798 }, { "epoch": 0.45, "grad_norm": 1.7529269456863403, "learning_rate": 1.2176283547767665e-05, "loss": 0.9301, "step": 7799 }, { "epoch": 0.45, "grad_norm": 1.8150732517242432, "learning_rate": 1.2174470422451306e-05, "loss": 0.9927, "step": 7800 }, { "epoch": 0.45, "grad_norm": 1.0498924255371094, "learning_rate": 1.2172657222100047e-05, "loss": 0.6094, "step": 7801 }, { "epoch": 0.45, "grad_norm": 1.8583680391311646, "learning_rate": 1.2170843946776457e-05, "loss": 0.987, "step": 7802 }, { "epoch": 0.45, "grad_norm": 1.7384819984436035, "learning_rate": 1.2169030596543106e-05, "loss": 0.9862, "step": 7803 }, { "epoch": 0.45, "grad_norm": 1.7967960834503174, "learning_rate": 1.2167217171462566e-05, "loss": 1.0252, "step": 7804 }, { "epoch": 0.45, "grad_norm": 1.8554235696792603, "learning_rate": 1.2165403671597418e-05, "loss": 0.9671, "step": 7805 }, { "epoch": 0.45, "grad_norm": 1.789666771888733, "learning_rate": 1.2163590097010239e-05, "loss": 0.966, "step": 7806 }, { "epoch": 0.45, "grad_norm": 1.9626660346984863, "learning_rate": 1.2161776447763607e-05, "loss": 0.9962, "step": 7807 }, { "epoch": 0.45, "grad_norm": 1.7834597826004028, "learning_rate": 1.2159962723920107e-05, "loss": 0.983, "step": 7808 }, { "epoch": 0.45, "grad_norm": 1.6984755992889404, "learning_rate": 1.2158148925542328e-05, "loss": 0.8972, "step": 7809 }, { "epoch": 0.45, "grad_norm": 1.6644375324249268, "learning_rate": 1.215633505269286e-05, "loss": 1.0004, "step": 7810 }, { "epoch": 0.45, "grad_norm": 1.7690021991729736, "learning_rate": 1.2154521105434292e-05, "loss": 0.9507, "step": 7811 }, { "epoch": 0.45, "grad_norm": 2.0975890159606934, "learning_rate": 1.2152707083829218e-05, "loss": 0.9303, "step": 7812 }, { "epoch": 0.45, "grad_norm": 1.6571881771087646, "learning_rate": 1.2150892987940236e-05, "loss": 0.8909, "step": 7813 }, { "epoch": 0.45, "grad_norm": 1.0248130559921265, "learning_rate": 1.2149078817829947e-05, "loss": 0.5726, "step": 7814 }, { "epoch": 0.45, "grad_norm": 1.7024688720703125, "learning_rate": 1.214726457356095e-05, "loss": 0.9333, "step": 7815 }, { "epoch": 0.45, "grad_norm": 1.6930779218673706, "learning_rate": 1.2145450255195852e-05, "loss": 0.933, "step": 7816 }, { "epoch": 0.45, "grad_norm": 1.7007426023483276, "learning_rate": 1.2143635862797258e-05, "loss": 0.95, "step": 7817 }, { "epoch": 0.45, "grad_norm": 2.0356431007385254, "learning_rate": 1.214182139642778e-05, "loss": 0.9791, "step": 7818 }, { "epoch": 0.45, "grad_norm": 1.6855437755584717, "learning_rate": 1.2140006856150026e-05, "loss": 0.9086, "step": 7819 }, { "epoch": 0.45, "grad_norm": 1.6805263757705688, "learning_rate": 1.2138192242026613e-05, "loss": 0.9183, "step": 7820 }, { "epoch": 0.45, "grad_norm": 1.8973623514175415, "learning_rate": 1.213637755412016e-05, "loss": 0.9967, "step": 7821 }, { "epoch": 0.45, "grad_norm": 1.6158584356307983, "learning_rate": 1.2134562792493285e-05, "loss": 1.0595, "step": 7822 }, { "epoch": 0.45, "grad_norm": 1.979712724685669, "learning_rate": 1.2132747957208613e-05, "loss": 0.9056, "step": 7823 }, { "epoch": 0.45, "grad_norm": 1.799809455871582, "learning_rate": 1.2130933048328762e-05, "loss": 0.9442, "step": 7824 }, { "epoch": 0.45, "grad_norm": 2.094165563583374, "learning_rate": 1.2129118065916366e-05, "loss": 1.0416, "step": 7825 }, { "epoch": 0.45, "grad_norm": 1.7371410131454468, "learning_rate": 1.2127303010034052e-05, "loss": 0.9937, "step": 7826 }, { "epoch": 0.45, "grad_norm": 1.6301283836364746, "learning_rate": 1.2125487880744456e-05, "loss": 0.9775, "step": 7827 }, { "epoch": 0.45, "grad_norm": 1.8274223804473877, "learning_rate": 1.212367267811021e-05, "loss": 0.9874, "step": 7828 }, { "epoch": 0.45, "grad_norm": 1.8016401529312134, "learning_rate": 1.2121857402193951e-05, "loss": 0.9359, "step": 7829 }, { "epoch": 0.45, "grad_norm": 1.8613020181655884, "learning_rate": 1.212004205305832e-05, "loss": 0.9893, "step": 7830 }, { "epoch": 0.45, "grad_norm": 1.8588120937347412, "learning_rate": 1.211822663076596e-05, "loss": 0.9684, "step": 7831 }, { "epoch": 0.45, "grad_norm": 1.768686056137085, "learning_rate": 1.2116411135379517e-05, "loss": 0.9467, "step": 7832 }, { "epoch": 0.45, "grad_norm": 1.6390390396118164, "learning_rate": 1.211459556696164e-05, "loss": 0.9352, "step": 7833 }, { "epoch": 0.45, "grad_norm": 1.9199225902557373, "learning_rate": 1.2112779925574973e-05, "loss": 1.0228, "step": 7834 }, { "epoch": 0.45, "grad_norm": 1.7528667449951172, "learning_rate": 1.2110964211282175e-05, "loss": 1.0292, "step": 7835 }, { "epoch": 0.45, "grad_norm": 1.9582585096359253, "learning_rate": 1.2109148424145897e-05, "loss": 0.9846, "step": 7836 }, { "epoch": 0.45, "grad_norm": 1.739980936050415, "learning_rate": 1.2107332564228798e-05, "loss": 0.997, "step": 7837 }, { "epoch": 0.45, "grad_norm": 1.6895437240600586, "learning_rate": 1.2105516631593539e-05, "loss": 0.992, "step": 7838 }, { "epoch": 0.45, "grad_norm": 1.7470614910125732, "learning_rate": 1.2103700626302784e-05, "loss": 0.9675, "step": 7839 }, { "epoch": 0.45, "grad_norm": 2.7087090015411377, "learning_rate": 1.2101884548419196e-05, "loss": 0.9401, "step": 7840 }, { "epoch": 0.45, "grad_norm": 1.5694940090179443, "learning_rate": 1.2100068398005443e-05, "loss": 0.9315, "step": 7841 }, { "epoch": 0.45, "grad_norm": 1.8077785968780518, "learning_rate": 1.2098252175124197e-05, "loss": 0.9982, "step": 7842 }, { "epoch": 0.45, "grad_norm": 1.1268465518951416, "learning_rate": 1.2096435879838129e-05, "loss": 0.5888, "step": 7843 }, { "epoch": 0.45, "grad_norm": 1.7400039434432983, "learning_rate": 1.2094619512209915e-05, "loss": 0.9264, "step": 7844 }, { "epoch": 0.45, "grad_norm": 1.7248669862747192, "learning_rate": 1.2092803072302233e-05, "loss": 0.9711, "step": 7845 }, { "epoch": 0.45, "grad_norm": 1.725252389907837, "learning_rate": 1.2090986560177764e-05, "loss": 0.9837, "step": 7846 }, { "epoch": 0.45, "grad_norm": 1.8736921548843384, "learning_rate": 1.208916997589919e-05, "loss": 0.9879, "step": 7847 }, { "epoch": 0.45, "grad_norm": 1.5278563499450684, "learning_rate": 1.2087353319529193e-05, "loss": 0.9096, "step": 7848 }, { "epoch": 0.45, "grad_norm": 1.782125473022461, "learning_rate": 1.2085536591130467e-05, "loss": 0.929, "step": 7849 }, { "epoch": 0.45, "grad_norm": 1.8642730712890625, "learning_rate": 1.2083719790765698e-05, "loss": 1.0413, "step": 7850 }, { "epoch": 0.45, "grad_norm": 2.118824005126953, "learning_rate": 1.2081902918497577e-05, "loss": 0.9199, "step": 7851 }, { "epoch": 0.45, "grad_norm": 1.9222227334976196, "learning_rate": 1.2080085974388802e-05, "loss": 1.0483, "step": 7852 }, { "epoch": 0.45, "grad_norm": 1.1640421152114868, "learning_rate": 1.2078268958502073e-05, "loss": 0.6379, "step": 7853 }, { "epoch": 0.45, "grad_norm": 1.7426316738128662, "learning_rate": 1.2076451870900087e-05, "loss": 1.0212, "step": 7854 }, { "epoch": 0.45, "grad_norm": 1.9776769876480103, "learning_rate": 1.2074634711645548e-05, "loss": 1.0042, "step": 7855 }, { "epoch": 0.45, "grad_norm": 2.3835716247558594, "learning_rate": 1.207281748080116e-05, "loss": 0.9722, "step": 7856 }, { "epoch": 0.45, "grad_norm": 1.0638364553451538, "learning_rate": 1.207100017842963e-05, "loss": 0.5733, "step": 7857 }, { "epoch": 0.45, "grad_norm": 1.876054286956787, "learning_rate": 1.2069182804593671e-05, "loss": 0.9346, "step": 7858 }, { "epoch": 0.45, "grad_norm": 1.6998441219329834, "learning_rate": 1.2067365359355991e-05, "loss": 0.9379, "step": 7859 }, { "epoch": 0.45, "grad_norm": 1.9469122886657715, "learning_rate": 1.206554784277931e-05, "loss": 0.9101, "step": 7860 }, { "epoch": 0.45, "grad_norm": 1.681156873703003, "learning_rate": 1.206373025492634e-05, "loss": 1.0031, "step": 7861 }, { "epoch": 0.45, "grad_norm": 1.9619028568267822, "learning_rate": 1.2061912595859806e-05, "loss": 1.0183, "step": 7862 }, { "epoch": 0.45, "grad_norm": 1.8572429418563843, "learning_rate": 1.2060094865642427e-05, "loss": 0.9742, "step": 7863 }, { "epoch": 0.45, "grad_norm": 1.036673665046692, "learning_rate": 1.2058277064336928e-05, "loss": 0.6323, "step": 7864 }, { "epoch": 0.45, "grad_norm": 1.765555739402771, "learning_rate": 1.2056459192006038e-05, "loss": 0.9486, "step": 7865 }, { "epoch": 0.45, "grad_norm": 0.9937422275543213, "learning_rate": 1.2054641248712487e-05, "loss": 0.6318, "step": 7866 }, { "epoch": 0.45, "grad_norm": 0.9794669151306152, "learning_rate": 1.2052823234519004e-05, "loss": 0.5837, "step": 7867 }, { "epoch": 0.45, "grad_norm": 1.6570795774459839, "learning_rate": 1.2051005149488326e-05, "loss": 0.8826, "step": 7868 }, { "epoch": 0.45, "grad_norm": 1.8475841283798218, "learning_rate": 1.204918699368319e-05, "loss": 1.0037, "step": 7869 }, { "epoch": 0.45, "grad_norm": 2.0002281665802, "learning_rate": 1.2047368767166334e-05, "loss": 0.9915, "step": 7870 }, { "epoch": 0.45, "grad_norm": 1.937207818031311, "learning_rate": 1.2045550470000502e-05, "loss": 1.0088, "step": 7871 }, { "epoch": 0.45, "grad_norm": 1.6322695016860962, "learning_rate": 1.2043732102248437e-05, "loss": 0.9787, "step": 7872 }, { "epoch": 0.45, "grad_norm": 1.7556989192962646, "learning_rate": 1.2041913663972886e-05, "loss": 1.0925, "step": 7873 }, { "epoch": 0.45, "grad_norm": 1.6921501159667969, "learning_rate": 1.2040095155236597e-05, "loss": 0.9283, "step": 7874 }, { "epoch": 0.45, "grad_norm": 1.780142068862915, "learning_rate": 1.2038276576102324e-05, "loss": 0.8611, "step": 7875 }, { "epoch": 0.45, "grad_norm": 1.6809784173965454, "learning_rate": 1.203645792663282e-05, "loss": 0.9607, "step": 7876 }, { "epoch": 0.45, "grad_norm": 1.7665553092956543, "learning_rate": 1.2034639206890843e-05, "loss": 0.9632, "step": 7877 }, { "epoch": 0.45, "grad_norm": 0.9764895439147949, "learning_rate": 1.2032820416939148e-05, "loss": 0.5531, "step": 7878 }, { "epoch": 0.45, "grad_norm": 1.643336296081543, "learning_rate": 1.20310015568405e-05, "loss": 0.9801, "step": 7879 }, { "epoch": 0.45, "grad_norm": 1.797683835029602, "learning_rate": 1.2029182626657662e-05, "loss": 1.0349, "step": 7880 }, { "epoch": 0.45, "grad_norm": 1.8186728954315186, "learning_rate": 1.20273636264534e-05, "loss": 0.9759, "step": 7881 }, { "epoch": 0.45, "grad_norm": 1.934433937072754, "learning_rate": 1.2025544556290483e-05, "loss": 1.0516, "step": 7882 }, { "epoch": 0.45, "grad_norm": 1.9675723314285278, "learning_rate": 1.202372541623168e-05, "loss": 1.0712, "step": 7883 }, { "epoch": 0.45, "grad_norm": 1.7322179079055786, "learning_rate": 1.2021906206339766e-05, "loss": 0.9636, "step": 7884 }, { "epoch": 0.45, "grad_norm": 1.852795958518982, "learning_rate": 1.202008692667752e-05, "loss": 1.0067, "step": 7885 }, { "epoch": 0.45, "grad_norm": 1.8027853965759277, "learning_rate": 1.2018267577307714e-05, "loss": 0.9376, "step": 7886 }, { "epoch": 0.45, "grad_norm": 1.6885671615600586, "learning_rate": 1.2016448158293133e-05, "loss": 0.9805, "step": 7887 }, { "epoch": 0.45, "grad_norm": 1.6978824138641357, "learning_rate": 1.2014628669696557e-05, "loss": 0.9744, "step": 7888 }, { "epoch": 0.45, "grad_norm": 1.6917245388031006, "learning_rate": 1.2012809111580774e-05, "loss": 0.9792, "step": 7889 }, { "epoch": 0.45, "grad_norm": 1.9685620069503784, "learning_rate": 1.201098948400857e-05, "loss": 0.9795, "step": 7890 }, { "epoch": 0.45, "grad_norm": 2.0712857246398926, "learning_rate": 1.2009169787042739e-05, "loss": 0.9485, "step": 7891 }, { "epoch": 0.45, "grad_norm": 1.0722960233688354, "learning_rate": 1.2007350020746069e-05, "loss": 0.5886, "step": 7892 }, { "epoch": 0.45, "grad_norm": 1.6066468954086304, "learning_rate": 1.2005530185181358e-05, "loss": 1.0109, "step": 7893 }, { "epoch": 0.45, "grad_norm": 1.867124080657959, "learning_rate": 1.2003710280411403e-05, "loss": 0.9757, "step": 7894 }, { "epoch": 0.45, "grad_norm": 1.778659462928772, "learning_rate": 1.2001890306499003e-05, "loss": 0.9148, "step": 7895 }, { "epoch": 0.45, "grad_norm": 1.7782255411148071, "learning_rate": 1.200007026350696e-05, "loss": 0.907, "step": 7896 }, { "epoch": 0.45, "grad_norm": 1.765851616859436, "learning_rate": 1.1998250151498078e-05, "loss": 0.9826, "step": 7897 }, { "epoch": 0.45, "grad_norm": 1.8597368001937866, "learning_rate": 1.1996429970535169e-05, "loss": 0.9972, "step": 7898 }, { "epoch": 0.45, "grad_norm": 1.056516408920288, "learning_rate": 1.1994609720681036e-05, "loss": 0.6222, "step": 7899 }, { "epoch": 0.45, "grad_norm": 1.5763027667999268, "learning_rate": 1.1992789401998492e-05, "loss": 0.9862, "step": 7900 }, { "epoch": 0.45, "grad_norm": 1.8898308277130127, "learning_rate": 1.1990969014550355e-05, "loss": 1.0403, "step": 7901 }, { "epoch": 0.45, "grad_norm": 1.6723557710647583, "learning_rate": 1.1989148558399436e-05, "loss": 0.9295, "step": 7902 }, { "epoch": 0.45, "grad_norm": 1.9528573751449585, "learning_rate": 1.198732803360856e-05, "loss": 0.9839, "step": 7903 }, { "epoch": 0.45, "grad_norm": 1.119760274887085, "learning_rate": 1.1985507440240543e-05, "loss": 0.5821, "step": 7904 }, { "epoch": 0.45, "grad_norm": 1.7641072273254395, "learning_rate": 1.198368677835821e-05, "loss": 0.9693, "step": 7905 }, { "epoch": 0.45, "grad_norm": 1.8989454507827759, "learning_rate": 1.1981866048024388e-05, "loss": 0.9342, "step": 7906 }, { "epoch": 0.45, "grad_norm": 1.7934963703155518, "learning_rate": 1.1980045249301904e-05, "loss": 1.0204, "step": 7907 }, { "epoch": 0.45, "grad_norm": 1.7942888736724854, "learning_rate": 1.197822438225359e-05, "loss": 0.9444, "step": 7908 }, { "epoch": 0.45, "grad_norm": 1.915575623512268, "learning_rate": 1.197640344694228e-05, "loss": 1.032, "step": 7909 }, { "epoch": 0.45, "grad_norm": 1.8801555633544922, "learning_rate": 1.1974582443430807e-05, "loss": 0.9634, "step": 7910 }, { "epoch": 0.45, "grad_norm": 2.098273515701294, "learning_rate": 1.1972761371782008e-05, "loss": 1.0093, "step": 7911 }, { "epoch": 0.45, "grad_norm": 1.729427456855774, "learning_rate": 1.1970940232058727e-05, "loss": 1.0106, "step": 7912 }, { "epoch": 0.45, "grad_norm": 1.6927303075790405, "learning_rate": 1.1969119024323805e-05, "loss": 0.9869, "step": 7913 }, { "epoch": 0.45, "grad_norm": 1.7167794704437256, "learning_rate": 1.1967297748640085e-05, "loss": 0.9666, "step": 7914 }, { "epoch": 0.45, "grad_norm": 1.813736081123352, "learning_rate": 1.1965476405070415e-05, "loss": 1.0, "step": 7915 }, { "epoch": 0.45, "grad_norm": 1.1631368398666382, "learning_rate": 1.1963654993677645e-05, "loss": 0.6381, "step": 7916 }, { "epoch": 0.45, "grad_norm": 1.1261005401611328, "learning_rate": 1.1961833514524624e-05, "loss": 0.605, "step": 7917 }, { "epoch": 0.45, "grad_norm": 1.8264689445495605, "learning_rate": 1.196001196767421e-05, "loss": 0.9705, "step": 7918 }, { "epoch": 0.45, "grad_norm": 1.828500509262085, "learning_rate": 1.1958190353189259e-05, "loss": 0.9949, "step": 7919 }, { "epoch": 0.45, "grad_norm": 1.8491536378860474, "learning_rate": 1.1956368671132628e-05, "loss": 0.9823, "step": 7920 }, { "epoch": 0.45, "grad_norm": 1.795048475265503, "learning_rate": 1.1954546921567179e-05, "loss": 1.0092, "step": 7921 }, { "epoch": 0.45, "grad_norm": 2.0442445278167725, "learning_rate": 1.1952725104555775e-05, "loss": 1.0333, "step": 7922 }, { "epoch": 0.45, "grad_norm": 1.9495928287506104, "learning_rate": 1.1950903220161286e-05, "loss": 1.0267, "step": 7923 }, { "epoch": 0.45, "grad_norm": 1.8137692213058472, "learning_rate": 1.1949081268446573e-05, "loss": 1.0312, "step": 7924 }, { "epoch": 0.45, "grad_norm": 1.158942461013794, "learning_rate": 1.194725924947451e-05, "loss": 0.5532, "step": 7925 }, { "epoch": 0.45, "grad_norm": 1.695214033126831, "learning_rate": 1.1945437163307971e-05, "loss": 0.8582, "step": 7926 }, { "epoch": 0.45, "grad_norm": 1.821165919303894, "learning_rate": 1.1943615010009828e-05, "loss": 0.9679, "step": 7927 }, { "epoch": 0.45, "grad_norm": 1.7312781810760498, "learning_rate": 1.1941792789642963e-05, "loss": 0.9957, "step": 7928 }, { "epoch": 0.45, "grad_norm": 1.896626591682434, "learning_rate": 1.1939970502270253e-05, "loss": 1.0303, "step": 7929 }, { "epoch": 0.45, "grad_norm": 1.9577184915542603, "learning_rate": 1.1938148147954575e-05, "loss": 0.9517, "step": 7930 }, { "epoch": 0.45, "grad_norm": 1.6960945129394531, "learning_rate": 1.1936325726758822e-05, "loss": 0.9566, "step": 7931 }, { "epoch": 0.45, "grad_norm": 1.7003276348114014, "learning_rate": 1.1934503238745878e-05, "loss": 0.9321, "step": 7932 }, { "epoch": 0.45, "grad_norm": 1.6604646444320679, "learning_rate": 1.1932680683978631e-05, "loss": 1.0728, "step": 7933 }, { "epoch": 0.46, "grad_norm": 1.7263479232788086, "learning_rate": 1.193085806251997e-05, "loss": 0.9398, "step": 7934 }, { "epoch": 0.46, "grad_norm": 1.780462622642517, "learning_rate": 1.1929035374432794e-05, "loss": 1.0487, "step": 7935 }, { "epoch": 0.46, "grad_norm": 1.9222217798233032, "learning_rate": 1.1927212619779994e-05, "loss": 0.9582, "step": 7936 }, { "epoch": 0.46, "grad_norm": 1.9137370586395264, "learning_rate": 1.192538979862447e-05, "loss": 0.9669, "step": 7937 }, { "epoch": 0.46, "grad_norm": 1.7538650035858154, "learning_rate": 1.1923566911029123e-05, "loss": 0.9555, "step": 7938 }, { "epoch": 0.46, "grad_norm": 1.064792275428772, "learning_rate": 1.1921743957056854e-05, "loss": 0.5748, "step": 7939 }, { "epoch": 0.46, "grad_norm": 1.6400911808013916, "learning_rate": 1.1919920936770568e-05, "loss": 0.9217, "step": 7940 }, { "epoch": 0.46, "grad_norm": 1.7615841627120972, "learning_rate": 1.1918097850233177e-05, "loss": 0.9309, "step": 7941 }, { "epoch": 0.46, "grad_norm": 1.0309568643569946, "learning_rate": 1.1916274697507583e-05, "loss": 0.6075, "step": 7942 }, { "epoch": 0.46, "grad_norm": 1.8571319580078125, "learning_rate": 1.1914451478656708e-05, "loss": 0.9384, "step": 7943 }, { "epoch": 0.46, "grad_norm": 1.8746356964111328, "learning_rate": 1.1912628193743454e-05, "loss": 0.991, "step": 7944 }, { "epoch": 0.46, "grad_norm": 1.9811104536056519, "learning_rate": 1.1910804842830746e-05, "loss": 0.9576, "step": 7945 }, { "epoch": 0.46, "grad_norm": 1.821722149848938, "learning_rate": 1.1908981425981502e-05, "loss": 0.9125, "step": 7946 }, { "epoch": 0.46, "grad_norm": 0.9698566794395447, "learning_rate": 1.190715794325864e-05, "loss": 0.5708, "step": 7947 }, { "epoch": 0.46, "grad_norm": 1.666179895401001, "learning_rate": 1.1905334394725086e-05, "loss": 1.0343, "step": 7948 }, { "epoch": 0.46, "grad_norm": 1.7561556100845337, "learning_rate": 1.1903510780443765e-05, "loss": 1.0325, "step": 7949 }, { "epoch": 0.46, "grad_norm": 1.8011491298675537, "learning_rate": 1.1901687100477604e-05, "loss": 1.0059, "step": 7950 }, { "epoch": 0.46, "grad_norm": 2.019629955291748, "learning_rate": 1.1899863354889532e-05, "loss": 1.0272, "step": 7951 }, { "epoch": 0.46, "grad_norm": 1.8673694133758545, "learning_rate": 1.1898039543742484e-05, "loss": 0.9911, "step": 7952 }, { "epoch": 0.46, "grad_norm": 1.8260685205459595, "learning_rate": 1.189621566709939e-05, "loss": 0.9564, "step": 7953 }, { "epoch": 0.46, "grad_norm": 1.7339130640029907, "learning_rate": 1.1894391725023194e-05, "loss": 0.9645, "step": 7954 }, { "epoch": 0.46, "grad_norm": 1.8669044971466064, "learning_rate": 1.1892567717576831e-05, "loss": 1.0324, "step": 7955 }, { "epoch": 0.46, "grad_norm": 1.8711845874786377, "learning_rate": 1.1890743644823242e-05, "loss": 1.0058, "step": 7956 }, { "epoch": 0.46, "grad_norm": 1.1033128499984741, "learning_rate": 1.188891950682537e-05, "loss": 0.5578, "step": 7957 }, { "epoch": 0.46, "grad_norm": 1.7915196418762207, "learning_rate": 1.1887095303646161e-05, "loss": 0.8906, "step": 7958 }, { "epoch": 0.46, "grad_norm": 1.736484408378601, "learning_rate": 1.1885271035348569e-05, "loss": 1.0089, "step": 7959 }, { "epoch": 0.46, "grad_norm": 1.8839147090911865, "learning_rate": 1.1883446701995536e-05, "loss": 0.9998, "step": 7960 }, { "epoch": 0.46, "grad_norm": 1.742846131324768, "learning_rate": 1.1881622303650022e-05, "loss": 0.9501, "step": 7961 }, { "epoch": 0.46, "grad_norm": 1.9567749500274658, "learning_rate": 1.1879797840374976e-05, "loss": 0.9694, "step": 7962 }, { "epoch": 0.46, "grad_norm": 1.821663498878479, "learning_rate": 1.1877973312233358e-05, "loss": 1.0121, "step": 7963 }, { "epoch": 0.46, "grad_norm": 1.778533697128296, "learning_rate": 1.1876148719288128e-05, "loss": 1.0397, "step": 7964 }, { "epoch": 0.46, "grad_norm": 1.8641010522842407, "learning_rate": 1.1874324061602245e-05, "loss": 0.8945, "step": 7965 }, { "epoch": 0.46, "grad_norm": 1.8051846027374268, "learning_rate": 1.1872499339238677e-05, "loss": 1.0041, "step": 7966 }, { "epoch": 0.46, "grad_norm": 1.6744557619094849, "learning_rate": 1.1870674552260384e-05, "loss": 0.9544, "step": 7967 }, { "epoch": 0.46, "grad_norm": 1.8837062120437622, "learning_rate": 1.186884970073034e-05, "loss": 1.0154, "step": 7968 }, { "epoch": 0.46, "grad_norm": 1.777496576309204, "learning_rate": 1.186702478471151e-05, "loss": 1.0355, "step": 7969 }, { "epoch": 0.46, "grad_norm": 1.7890945672988892, "learning_rate": 1.1865199804266872e-05, "loss": 0.9163, "step": 7970 }, { "epoch": 0.46, "grad_norm": 1.7591489553451538, "learning_rate": 1.1863374759459401e-05, "loss": 0.9771, "step": 7971 }, { "epoch": 0.46, "grad_norm": 2.0123162269592285, "learning_rate": 1.1861549650352069e-05, "loss": 0.957, "step": 7972 }, { "epoch": 0.46, "grad_norm": 1.7793066501617432, "learning_rate": 1.185972447700786e-05, "loss": 0.9913, "step": 7973 }, { "epoch": 0.46, "grad_norm": 1.8957061767578125, "learning_rate": 1.1857899239489753e-05, "loss": 0.9469, "step": 7974 }, { "epoch": 0.46, "grad_norm": 1.1403964757919312, "learning_rate": 1.1856073937860735e-05, "loss": 0.6193, "step": 7975 }, { "epoch": 0.46, "grad_norm": 1.6930732727050781, "learning_rate": 1.1854248572183789e-05, "loss": 1.0251, "step": 7976 }, { "epoch": 0.46, "grad_norm": 1.6233232021331787, "learning_rate": 1.1852423142521904e-05, "loss": 0.8828, "step": 7977 }, { "epoch": 0.46, "grad_norm": 1.8118256330490112, "learning_rate": 1.1850597648938073e-05, "loss": 0.9743, "step": 7978 }, { "epoch": 0.46, "grad_norm": 1.783571481704712, "learning_rate": 1.1848772091495287e-05, "loss": 0.9379, "step": 7979 }, { "epoch": 0.46, "grad_norm": 1.7771198749542236, "learning_rate": 1.1846946470256537e-05, "loss": 0.9804, "step": 7980 }, { "epoch": 0.46, "grad_norm": 1.9490060806274414, "learning_rate": 1.1845120785284827e-05, "loss": 0.9742, "step": 7981 }, { "epoch": 0.46, "grad_norm": 1.6073194742202759, "learning_rate": 1.184329503664315e-05, "loss": 1.0533, "step": 7982 }, { "epoch": 0.46, "grad_norm": 1.8105151653289795, "learning_rate": 1.184146922439451e-05, "loss": 0.9468, "step": 7983 }, { "epoch": 0.46, "grad_norm": 1.0121195316314697, "learning_rate": 1.1839643348601912e-05, "loss": 0.5665, "step": 7984 }, { "epoch": 0.46, "grad_norm": 1.763900876045227, "learning_rate": 1.1837817409328362e-05, "loss": 1.0206, "step": 7985 }, { "epoch": 0.46, "grad_norm": 1.8150074481964111, "learning_rate": 1.1835991406636866e-05, "loss": 0.8469, "step": 7986 }, { "epoch": 0.46, "grad_norm": 2.016606330871582, "learning_rate": 1.1834165340590436e-05, "loss": 0.9691, "step": 7987 }, { "epoch": 0.46, "grad_norm": 1.6387733221054077, "learning_rate": 1.1832339211252084e-05, "loss": 0.9324, "step": 7988 }, { "epoch": 0.46, "grad_norm": 1.8975329399108887, "learning_rate": 1.1830513018684824e-05, "loss": 0.9369, "step": 7989 }, { "epoch": 0.46, "grad_norm": 1.7825971841812134, "learning_rate": 1.1828686762951674e-05, "loss": 0.9607, "step": 7990 }, { "epoch": 0.46, "grad_norm": 1.6111706495285034, "learning_rate": 1.1826860444115648e-05, "loss": 0.9725, "step": 7991 }, { "epoch": 0.46, "grad_norm": 1.7063044309616089, "learning_rate": 1.1825034062239775e-05, "loss": 1.0124, "step": 7992 }, { "epoch": 0.46, "grad_norm": 1.6753889322280884, "learning_rate": 1.1823207617387073e-05, "loss": 0.9982, "step": 7993 }, { "epoch": 0.46, "grad_norm": 1.6649279594421387, "learning_rate": 1.1821381109620571e-05, "loss": 1.0296, "step": 7994 }, { "epoch": 0.46, "grad_norm": 1.8102540969848633, "learning_rate": 1.1819554539003292e-05, "loss": 0.927, "step": 7995 }, { "epoch": 0.46, "grad_norm": 1.8892713785171509, "learning_rate": 1.1817727905598268e-05, "loss": 1.0201, "step": 7996 }, { "epoch": 0.46, "grad_norm": 1.9476550817489624, "learning_rate": 1.1815901209468535e-05, "loss": 0.9812, "step": 7997 }, { "epoch": 0.46, "grad_norm": 1.8439606428146362, "learning_rate": 1.181407445067712e-05, "loss": 1.0306, "step": 7998 }, { "epoch": 0.46, "grad_norm": 2.011625051498413, "learning_rate": 1.1812247629287065e-05, "loss": 0.9561, "step": 7999 }, { "epoch": 0.46, "grad_norm": 1.7481447458267212, "learning_rate": 1.1810420745361408e-05, "loss": 1.0133, "step": 8000 }, { "epoch": 0.46, "grad_norm": 1.8607566356658936, "learning_rate": 1.1808593798963185e-05, "loss": 0.9616, "step": 8001 }, { "epoch": 0.46, "grad_norm": 1.7358875274658203, "learning_rate": 1.1806766790155446e-05, "loss": 0.9752, "step": 8002 }, { "epoch": 0.46, "grad_norm": 1.7777528762817383, "learning_rate": 1.180493971900123e-05, "loss": 0.9703, "step": 8003 }, { "epoch": 0.46, "grad_norm": 1.813475489616394, "learning_rate": 1.1803112585563587e-05, "loss": 0.919, "step": 8004 }, { "epoch": 0.46, "grad_norm": 1.6517568826675415, "learning_rate": 1.1801285389905567e-05, "loss": 0.9347, "step": 8005 }, { "epoch": 0.46, "grad_norm": 1.7512457370758057, "learning_rate": 1.1799458132090214e-05, "loss": 1.0562, "step": 8006 }, { "epoch": 0.46, "grad_norm": 1.1174851655960083, "learning_rate": 1.1797630812180592e-05, "loss": 0.6142, "step": 8007 }, { "epoch": 0.46, "grad_norm": 1.728916883468628, "learning_rate": 1.1795803430239752e-05, "loss": 1.0521, "step": 8008 }, { "epoch": 0.46, "grad_norm": 1.625531554222107, "learning_rate": 1.179397598633075e-05, "loss": 0.9305, "step": 8009 }, { "epoch": 0.46, "grad_norm": 1.7653635740280151, "learning_rate": 1.1792148480516648e-05, "loss": 0.9921, "step": 8010 }, { "epoch": 0.46, "grad_norm": 1.6762639284133911, "learning_rate": 1.1790320912860508e-05, "loss": 0.9083, "step": 8011 }, { "epoch": 0.46, "grad_norm": 1.7834948301315308, "learning_rate": 1.1788493283425398e-05, "loss": 1.0187, "step": 8012 }, { "epoch": 0.46, "grad_norm": 1.823069453239441, "learning_rate": 1.1786665592274378e-05, "loss": 0.9573, "step": 8013 }, { "epoch": 0.46, "grad_norm": 1.6952208280563354, "learning_rate": 1.1784837839470519e-05, "loss": 0.9276, "step": 8014 }, { "epoch": 0.46, "grad_norm": 1.7396788597106934, "learning_rate": 1.1783010025076893e-05, "loss": 0.903, "step": 8015 }, { "epoch": 0.46, "grad_norm": 1.7886943817138672, "learning_rate": 1.1781182149156572e-05, "loss": 0.9444, "step": 8016 }, { "epoch": 0.46, "grad_norm": 1.8170719146728516, "learning_rate": 1.1779354211772632e-05, "loss": 1.0505, "step": 8017 }, { "epoch": 0.46, "grad_norm": 1.860854983329773, "learning_rate": 1.1777526212988144e-05, "loss": 0.9851, "step": 8018 }, { "epoch": 0.46, "grad_norm": 1.619338870048523, "learning_rate": 1.1775698152866195e-05, "loss": 0.9354, "step": 8019 }, { "epoch": 0.46, "grad_norm": 1.883421778678894, "learning_rate": 1.1773870031469863e-05, "loss": 0.9667, "step": 8020 }, { "epoch": 0.46, "grad_norm": 1.8250725269317627, "learning_rate": 1.177204184886223e-05, "loss": 0.9406, "step": 8021 }, { "epoch": 0.46, "grad_norm": 1.766154408454895, "learning_rate": 1.1770213605106384e-05, "loss": 0.9322, "step": 8022 }, { "epoch": 0.46, "grad_norm": 1.8425568342208862, "learning_rate": 1.1768385300265409e-05, "loss": 1.1135, "step": 8023 }, { "epoch": 0.46, "grad_norm": 1.6613640785217285, "learning_rate": 1.1766556934402398e-05, "loss": 0.9558, "step": 8024 }, { "epoch": 0.46, "grad_norm": 1.895185947418213, "learning_rate": 1.1764728507580442e-05, "loss": 0.8801, "step": 8025 }, { "epoch": 0.46, "grad_norm": 1.5727230310440063, "learning_rate": 1.1762900019862635e-05, "loss": 0.8939, "step": 8026 }, { "epoch": 0.46, "grad_norm": 1.8296129703521729, "learning_rate": 1.1761071471312075e-05, "loss": 0.9284, "step": 8027 }, { "epoch": 0.46, "grad_norm": 1.7631586790084839, "learning_rate": 1.1759242861991855e-05, "loss": 0.9583, "step": 8028 }, { "epoch": 0.46, "grad_norm": 1.9332904815673828, "learning_rate": 1.175741419196508e-05, "loss": 0.9527, "step": 8029 }, { "epoch": 0.46, "grad_norm": 1.8091992139816284, "learning_rate": 1.175558546129485e-05, "loss": 0.9601, "step": 8030 }, { "epoch": 0.46, "grad_norm": 1.7562631368637085, "learning_rate": 1.175375667004427e-05, "loss": 0.9975, "step": 8031 }, { "epoch": 0.46, "grad_norm": 1.8765333890914917, "learning_rate": 1.1751927818276445e-05, "loss": 1.0765, "step": 8032 }, { "epoch": 0.46, "grad_norm": 1.712401270866394, "learning_rate": 1.1750098906054485e-05, "loss": 1.0067, "step": 8033 }, { "epoch": 0.46, "grad_norm": 1.7910075187683105, "learning_rate": 1.1748269933441501e-05, "loss": 0.9828, "step": 8034 }, { "epoch": 0.46, "grad_norm": 1.8870741128921509, "learning_rate": 1.1746440900500604e-05, "loss": 0.9482, "step": 8035 }, { "epoch": 0.46, "grad_norm": 1.8458998203277588, "learning_rate": 1.174461180729491e-05, "loss": 0.9521, "step": 8036 }, { "epoch": 0.46, "grad_norm": 1.7518543004989624, "learning_rate": 1.1742782653887537e-05, "loss": 0.9304, "step": 8037 }, { "epoch": 0.46, "grad_norm": 1.9056423902511597, "learning_rate": 1.1740953440341602e-05, "loss": 0.9669, "step": 8038 }, { "epoch": 0.46, "grad_norm": 1.0244139432907104, "learning_rate": 1.1739124166720228e-05, "loss": 0.5684, "step": 8039 }, { "epoch": 0.46, "grad_norm": 1.7574100494384766, "learning_rate": 1.1737294833086537e-05, "loss": 0.9641, "step": 8040 }, { "epoch": 0.46, "grad_norm": 1.6881746053695679, "learning_rate": 1.1735465439503652e-05, "loss": 0.9955, "step": 8041 }, { "epoch": 0.46, "grad_norm": 0.9910071492195129, "learning_rate": 1.1733635986034706e-05, "loss": 0.5522, "step": 8042 }, { "epoch": 0.46, "grad_norm": 1.742068886756897, "learning_rate": 1.1731806472742823e-05, "loss": 0.9728, "step": 8043 }, { "epoch": 0.46, "grad_norm": 1.839636206626892, "learning_rate": 1.1729976899691138e-05, "loss": 1.0572, "step": 8044 }, { "epoch": 0.46, "grad_norm": 1.7851154804229736, "learning_rate": 1.172814726694278e-05, "loss": 0.9615, "step": 8045 }, { "epoch": 0.46, "grad_norm": 1.874877691268921, "learning_rate": 1.1726317574560888e-05, "loss": 0.9762, "step": 8046 }, { "epoch": 0.46, "grad_norm": 1.7921478748321533, "learning_rate": 1.1724487822608602e-05, "loss": 0.9702, "step": 8047 }, { "epoch": 0.46, "grad_norm": 1.8193144798278809, "learning_rate": 1.1722658011149055e-05, "loss": 0.9823, "step": 8048 }, { "epoch": 0.46, "grad_norm": 1.918825626373291, "learning_rate": 1.1720828140245393e-05, "loss": 0.9573, "step": 8049 }, { "epoch": 0.46, "grad_norm": 1.8105534315109253, "learning_rate": 1.1718998209960755e-05, "loss": 0.9805, "step": 8050 }, { "epoch": 0.46, "grad_norm": 1.630724549293518, "learning_rate": 1.1717168220358296e-05, "loss": 0.9217, "step": 8051 }, { "epoch": 0.46, "grad_norm": 1.7872074842453003, "learning_rate": 1.1715338171501156e-05, "loss": 0.9542, "step": 8052 }, { "epoch": 0.46, "grad_norm": 1.8341808319091797, "learning_rate": 1.1713508063452487e-05, "loss": 0.9985, "step": 8053 }, { "epoch": 0.46, "grad_norm": 1.8159358501434326, "learning_rate": 1.1711677896275444e-05, "loss": 0.969, "step": 8054 }, { "epoch": 0.46, "grad_norm": 1.7973802089691162, "learning_rate": 1.1709847670033176e-05, "loss": 1.0004, "step": 8055 }, { "epoch": 0.46, "grad_norm": 1.7079880237579346, "learning_rate": 1.1708017384788842e-05, "loss": 0.9966, "step": 8056 }, { "epoch": 0.46, "grad_norm": 1.6823166608810425, "learning_rate": 1.1706187040605598e-05, "loss": 1.0462, "step": 8057 }, { "epoch": 0.46, "grad_norm": 1.7455824613571167, "learning_rate": 1.1704356637546606e-05, "loss": 0.8762, "step": 8058 }, { "epoch": 0.46, "grad_norm": 1.7439374923706055, "learning_rate": 1.1702526175675026e-05, "loss": 0.9992, "step": 8059 }, { "epoch": 0.46, "grad_norm": 1.8739033937454224, "learning_rate": 1.1700695655054027e-05, "loss": 1.0688, "step": 8060 }, { "epoch": 0.46, "grad_norm": 1.8617148399353027, "learning_rate": 1.1698865075746768e-05, "loss": 1.0507, "step": 8061 }, { "epoch": 0.46, "grad_norm": 1.8059519529342651, "learning_rate": 1.169703443781642e-05, "loss": 1.0325, "step": 8062 }, { "epoch": 0.46, "grad_norm": 1.6627306938171387, "learning_rate": 1.1695203741326157e-05, "loss": 0.9753, "step": 8063 }, { "epoch": 0.46, "grad_norm": 1.745413064956665, "learning_rate": 1.169337298633915e-05, "loss": 0.954, "step": 8064 }, { "epoch": 0.46, "grad_norm": 1.9172461032867432, "learning_rate": 1.1691542172918566e-05, "loss": 0.9741, "step": 8065 }, { "epoch": 0.46, "grad_norm": 1.9184553623199463, "learning_rate": 1.1689711301127591e-05, "loss": 0.9493, "step": 8066 }, { "epoch": 0.46, "grad_norm": 1.7978546619415283, "learning_rate": 1.1687880371029398e-05, "loss": 1.0013, "step": 8067 }, { "epoch": 0.46, "grad_norm": 1.8455827236175537, "learning_rate": 1.1686049382687168e-05, "loss": 0.9538, "step": 8068 }, { "epoch": 0.46, "grad_norm": 1.8107343912124634, "learning_rate": 1.1684218336164083e-05, "loss": 0.9853, "step": 8069 }, { "epoch": 0.46, "grad_norm": 1.9131147861480713, "learning_rate": 1.1682387231523328e-05, "loss": 1.0223, "step": 8070 }, { "epoch": 0.46, "grad_norm": 1.8731969594955444, "learning_rate": 1.1680556068828092e-05, "loss": 0.9415, "step": 8071 }, { "epoch": 0.46, "grad_norm": 1.6293609142303467, "learning_rate": 1.1678724848141555e-05, "loss": 0.9388, "step": 8072 }, { "epoch": 0.46, "grad_norm": 1.7343569993972778, "learning_rate": 1.1676893569526918e-05, "loss": 0.9575, "step": 8073 }, { "epoch": 0.46, "grad_norm": 1.7709144353866577, "learning_rate": 1.1675062233047365e-05, "loss": 0.9331, "step": 8074 }, { "epoch": 0.46, "grad_norm": 1.6687047481536865, "learning_rate": 1.1673230838766094e-05, "loss": 1.0134, "step": 8075 }, { "epoch": 0.46, "grad_norm": 2.072532892227173, "learning_rate": 1.1671399386746301e-05, "loss": 0.9763, "step": 8076 }, { "epoch": 0.46, "grad_norm": 1.6952625513076782, "learning_rate": 1.1669567877051184e-05, "loss": 0.9338, "step": 8077 }, { "epoch": 0.46, "grad_norm": 1.8068554401397705, "learning_rate": 1.1667736309743945e-05, "loss": 0.9963, "step": 8078 }, { "epoch": 0.46, "grad_norm": 1.6265000104904175, "learning_rate": 1.1665904684887784e-05, "loss": 1.0063, "step": 8079 }, { "epoch": 0.46, "grad_norm": 1.6523091793060303, "learning_rate": 1.1664073002545903e-05, "loss": 0.9118, "step": 8080 }, { "epoch": 0.46, "grad_norm": 1.6876542568206787, "learning_rate": 1.1662241262781515e-05, "loss": 0.9032, "step": 8081 }, { "epoch": 0.46, "grad_norm": 2.1174182891845703, "learning_rate": 1.1660409465657822e-05, "loss": 1.0409, "step": 8082 }, { "epoch": 0.46, "grad_norm": 2.0477702617645264, "learning_rate": 1.1658577611238037e-05, "loss": 1.0491, "step": 8083 }, { "epoch": 0.46, "grad_norm": 1.9220936298370361, "learning_rate": 1.1656745699585373e-05, "loss": 0.949, "step": 8084 }, { "epoch": 0.46, "grad_norm": 1.7343533039093018, "learning_rate": 1.165491373076304e-05, "loss": 1.018, "step": 8085 }, { "epoch": 0.46, "grad_norm": 1.8112566471099854, "learning_rate": 1.1653081704834259e-05, "loss": 0.979, "step": 8086 }, { "epoch": 0.46, "grad_norm": 1.9316182136535645, "learning_rate": 1.1651249621862245e-05, "loss": 0.9953, "step": 8087 }, { "epoch": 0.46, "grad_norm": 1.8631106615066528, "learning_rate": 1.164941748191022e-05, "loss": 1.0681, "step": 8088 }, { "epoch": 0.46, "grad_norm": 1.7775073051452637, "learning_rate": 1.1647585285041405e-05, "loss": 1.0616, "step": 8089 }, { "epoch": 0.46, "grad_norm": 1.7869524955749512, "learning_rate": 1.1645753031319022e-05, "loss": 0.9905, "step": 8090 }, { "epoch": 0.46, "grad_norm": 1.8374818563461304, "learning_rate": 1.1643920720806304e-05, "loss": 0.9769, "step": 8091 }, { "epoch": 0.46, "grad_norm": 1.7218824625015259, "learning_rate": 1.164208835356647e-05, "loss": 0.9342, "step": 8092 }, { "epoch": 0.46, "grad_norm": 1.7000293731689453, "learning_rate": 1.1640255929662756e-05, "loss": 1.0114, "step": 8093 }, { "epoch": 0.46, "grad_norm": 2.101402759552002, "learning_rate": 1.1638423449158388e-05, "loss": 1.0469, "step": 8094 }, { "epoch": 0.46, "grad_norm": 1.9071333408355713, "learning_rate": 1.163659091211661e-05, "loss": 1.003, "step": 8095 }, { "epoch": 0.46, "grad_norm": 1.7965080738067627, "learning_rate": 1.1634758318600648e-05, "loss": 1.038, "step": 8096 }, { "epoch": 0.46, "grad_norm": 1.691931128501892, "learning_rate": 1.1632925668673743e-05, "loss": 1.0022, "step": 8097 }, { "epoch": 0.46, "grad_norm": 1.7160139083862305, "learning_rate": 1.1631092962399134e-05, "loss": 0.937, "step": 8098 }, { "epoch": 0.46, "grad_norm": 1.8190345764160156, "learning_rate": 1.1629260199840063e-05, "loss": 0.9406, "step": 8099 }, { "epoch": 0.46, "grad_norm": 1.7773182392120361, "learning_rate": 1.1627427381059773e-05, "loss": 0.9615, "step": 8100 }, { "epoch": 0.46, "grad_norm": 1.7664669752120972, "learning_rate": 1.162559450612151e-05, "loss": 1.0337, "step": 8101 }, { "epoch": 0.46, "grad_norm": 1.8930587768554688, "learning_rate": 1.162376157508852e-05, "loss": 0.9714, "step": 8102 }, { "epoch": 0.46, "grad_norm": 1.6648093461990356, "learning_rate": 1.1621928588024058e-05, "loss": 0.9915, "step": 8103 }, { "epoch": 0.46, "grad_norm": 1.9049309492111206, "learning_rate": 1.162009554499137e-05, "loss": 1.0744, "step": 8104 }, { "epoch": 0.46, "grad_norm": 1.859045386314392, "learning_rate": 1.1618262446053708e-05, "loss": 0.8913, "step": 8105 }, { "epoch": 0.46, "grad_norm": 1.735291600227356, "learning_rate": 1.1616429291274331e-05, "loss": 0.9519, "step": 8106 }, { "epoch": 0.46, "grad_norm": 1.9270039796829224, "learning_rate": 1.1614596080716493e-05, "loss": 0.9057, "step": 8107 }, { "epoch": 0.47, "grad_norm": 1.7491557598114014, "learning_rate": 1.1612762814443459e-05, "loss": 0.9619, "step": 8108 }, { "epoch": 0.47, "grad_norm": 1.7298047542572021, "learning_rate": 1.1610929492518481e-05, "loss": 1.0075, "step": 8109 }, { "epoch": 0.47, "grad_norm": 1.8623096942901611, "learning_rate": 1.1609096115004827e-05, "loss": 1.0101, "step": 8110 }, { "epoch": 0.47, "grad_norm": 1.8681354522705078, "learning_rate": 1.1607262681965763e-05, "loss": 0.9269, "step": 8111 }, { "epoch": 0.47, "grad_norm": 1.691609501838684, "learning_rate": 1.1605429193464553e-05, "loss": 0.9408, "step": 8112 }, { "epoch": 0.47, "grad_norm": 1.0658972263336182, "learning_rate": 1.1603595649564466e-05, "loss": 0.5628, "step": 8113 }, { "epoch": 0.47, "grad_norm": 1.9071061611175537, "learning_rate": 1.160176205032877e-05, "loss": 0.9006, "step": 8114 }, { "epoch": 0.47, "grad_norm": 1.7213611602783203, "learning_rate": 1.1599928395820743e-05, "loss": 0.9455, "step": 8115 }, { "epoch": 0.47, "grad_norm": 1.8260375261306763, "learning_rate": 1.1598094686103654e-05, "loss": 0.9476, "step": 8116 }, { "epoch": 0.47, "grad_norm": 1.9121737480163574, "learning_rate": 1.159626092124078e-05, "loss": 1.0094, "step": 8117 }, { "epoch": 0.47, "grad_norm": 1.0367008447647095, "learning_rate": 1.1594427101295404e-05, "loss": 0.5985, "step": 8118 }, { "epoch": 0.47, "grad_norm": 1.7241759300231934, "learning_rate": 1.1592593226330802e-05, "loss": 0.9197, "step": 8119 }, { "epoch": 0.47, "grad_norm": 1.6419388055801392, "learning_rate": 1.1590759296410256e-05, "loss": 1.0405, "step": 8120 }, { "epoch": 0.47, "grad_norm": 1.7511953115463257, "learning_rate": 1.1588925311597052e-05, "loss": 0.9967, "step": 8121 }, { "epoch": 0.47, "grad_norm": 1.6367125511169434, "learning_rate": 1.1587091271954471e-05, "loss": 0.9708, "step": 8122 }, { "epoch": 0.47, "grad_norm": 1.884300708770752, "learning_rate": 1.1585257177545805e-05, "loss": 0.9662, "step": 8123 }, { "epoch": 0.47, "grad_norm": 2.030393600463867, "learning_rate": 1.1583423028434343e-05, "loss": 0.9684, "step": 8124 }, { "epoch": 0.47, "grad_norm": 1.8546231985092163, "learning_rate": 1.1581588824683375e-05, "loss": 0.9397, "step": 8125 }, { "epoch": 0.47, "grad_norm": 1.9013009071350098, "learning_rate": 1.1579754566356195e-05, "loss": 0.9464, "step": 8126 }, { "epoch": 0.47, "grad_norm": 1.8146413564682007, "learning_rate": 1.1577920253516097e-05, "loss": 0.9242, "step": 8127 }, { "epoch": 0.47, "grad_norm": 1.724095106124878, "learning_rate": 1.1576085886226376e-05, "loss": 0.9851, "step": 8128 }, { "epoch": 0.47, "grad_norm": 1.7956159114837646, "learning_rate": 1.1574251464550337e-05, "loss": 1.0046, "step": 8129 }, { "epoch": 0.47, "grad_norm": 1.9521915912628174, "learning_rate": 1.1572416988551277e-05, "loss": 0.9898, "step": 8130 }, { "epoch": 0.47, "grad_norm": 1.0285500288009644, "learning_rate": 1.1570582458292499e-05, "loss": 0.5782, "step": 8131 }, { "epoch": 0.47, "grad_norm": 1.8780637979507446, "learning_rate": 1.1568747873837307e-05, "loss": 0.9056, "step": 8132 }, { "epoch": 0.47, "grad_norm": 1.7956031560897827, "learning_rate": 1.1566913235249008e-05, "loss": 1.0004, "step": 8133 }, { "epoch": 0.47, "grad_norm": 1.7687028646469116, "learning_rate": 1.1565078542590912e-05, "loss": 0.9725, "step": 8134 }, { "epoch": 0.47, "grad_norm": 1.656584620475769, "learning_rate": 1.1563243795926327e-05, "loss": 1.0376, "step": 8135 }, { "epoch": 0.47, "grad_norm": 1.8720474243164062, "learning_rate": 1.1561408995318565e-05, "loss": 1.0016, "step": 8136 }, { "epoch": 0.47, "grad_norm": 1.7733625173568726, "learning_rate": 1.1559574140830938e-05, "loss": 0.9423, "step": 8137 }, { "epoch": 0.47, "grad_norm": 1.8585197925567627, "learning_rate": 1.1557739232526766e-05, "loss": 0.9559, "step": 8138 }, { "epoch": 0.47, "grad_norm": 1.8304883241653442, "learning_rate": 1.1555904270469363e-05, "loss": 0.9238, "step": 8139 }, { "epoch": 0.47, "grad_norm": 1.6804660558700562, "learning_rate": 1.155406925472205e-05, "loss": 0.9287, "step": 8140 }, { "epoch": 0.47, "grad_norm": 1.6479648351669312, "learning_rate": 1.155223418534815e-05, "loss": 0.9242, "step": 8141 }, { "epoch": 0.47, "grad_norm": 1.7876489162445068, "learning_rate": 1.1550399062410984e-05, "loss": 1.0236, "step": 8142 }, { "epoch": 0.47, "grad_norm": 1.9216400384902954, "learning_rate": 1.1548563885973873e-05, "loss": 0.9277, "step": 8143 }, { "epoch": 0.47, "grad_norm": 2.0850942134857178, "learning_rate": 1.1546728656100153e-05, "loss": 1.1015, "step": 8144 }, { "epoch": 0.47, "grad_norm": 1.9480878114700317, "learning_rate": 1.1544893372853145e-05, "loss": 1.0199, "step": 8145 }, { "epoch": 0.47, "grad_norm": 1.8592286109924316, "learning_rate": 1.1543058036296185e-05, "loss": 0.9735, "step": 8146 }, { "epoch": 0.47, "grad_norm": 1.7742880582809448, "learning_rate": 1.15412226464926e-05, "loss": 0.9424, "step": 8147 }, { "epoch": 0.47, "grad_norm": 1.8639676570892334, "learning_rate": 1.1539387203505728e-05, "loss": 0.9639, "step": 8148 }, { "epoch": 0.47, "grad_norm": 1.6871354579925537, "learning_rate": 1.1537551707398904e-05, "loss": 0.8919, "step": 8149 }, { "epoch": 0.47, "grad_norm": 1.184121012687683, "learning_rate": 1.1535716158235466e-05, "loss": 0.5423, "step": 8150 }, { "epoch": 0.47, "grad_norm": 2.103196382522583, "learning_rate": 1.1533880556078751e-05, "loss": 0.933, "step": 8151 }, { "epoch": 0.47, "grad_norm": 1.9446314573287964, "learning_rate": 1.1532044900992105e-05, "loss": 0.897, "step": 8152 }, { "epoch": 0.47, "grad_norm": 1.8579375743865967, "learning_rate": 1.1530209193038868e-05, "loss": 0.9252, "step": 8153 }, { "epoch": 0.47, "grad_norm": 1.8165243864059448, "learning_rate": 1.1528373432282388e-05, "loss": 0.9217, "step": 8154 }, { "epoch": 0.47, "grad_norm": 1.8631258010864258, "learning_rate": 1.1526537618786005e-05, "loss": 0.9518, "step": 8155 }, { "epoch": 0.47, "grad_norm": 1.6529138088226318, "learning_rate": 1.1524701752613074e-05, "loss": 1.0056, "step": 8156 }, { "epoch": 0.47, "grad_norm": 1.9313029050827026, "learning_rate": 1.1522865833826948e-05, "loss": 0.9577, "step": 8157 }, { "epoch": 0.47, "grad_norm": 1.844384789466858, "learning_rate": 1.1521029862490976e-05, "loss": 0.9835, "step": 8158 }, { "epoch": 0.47, "grad_norm": 2.1799063682556152, "learning_rate": 1.1519193838668513e-05, "loss": 1.0568, "step": 8159 }, { "epoch": 0.47, "grad_norm": 2.042477607727051, "learning_rate": 1.151735776242291e-05, "loss": 1.042, "step": 8160 }, { "epoch": 0.47, "grad_norm": 2.056021213531494, "learning_rate": 1.1515521633817532e-05, "loss": 1.023, "step": 8161 }, { "epoch": 0.47, "grad_norm": 1.7760075330734253, "learning_rate": 1.1513685452915737e-05, "loss": 1.0423, "step": 8162 }, { "epoch": 0.47, "grad_norm": 1.7028388977050781, "learning_rate": 1.1511849219780883e-05, "loss": 0.9746, "step": 8163 }, { "epoch": 0.47, "grad_norm": 1.80674409866333, "learning_rate": 1.1510012934476337e-05, "loss": 1.0195, "step": 8164 }, { "epoch": 0.47, "grad_norm": 1.8487589359283447, "learning_rate": 1.1508176597065463e-05, "loss": 1.0557, "step": 8165 }, { "epoch": 0.47, "grad_norm": 1.7171343564987183, "learning_rate": 1.1506340207611626e-05, "loss": 1.0017, "step": 8166 }, { "epoch": 0.47, "grad_norm": 1.734755039215088, "learning_rate": 1.1504503766178197e-05, "loss": 0.9395, "step": 8167 }, { "epoch": 0.47, "grad_norm": 1.762942910194397, "learning_rate": 1.1502667272828545e-05, "loss": 1.0116, "step": 8168 }, { "epoch": 0.47, "grad_norm": 1.7260783910751343, "learning_rate": 1.1500830727626044e-05, "loss": 0.9647, "step": 8169 }, { "epoch": 0.47, "grad_norm": 1.949357032775879, "learning_rate": 1.1498994130634068e-05, "loss": 1.0262, "step": 8170 }, { "epoch": 0.47, "grad_norm": 1.8129600286483765, "learning_rate": 1.1497157481915987e-05, "loss": 0.9656, "step": 8171 }, { "epoch": 0.47, "grad_norm": 1.7713143825531006, "learning_rate": 1.1495320781535186e-05, "loss": 0.9101, "step": 8172 }, { "epoch": 0.47, "grad_norm": 1.7725486755371094, "learning_rate": 1.1493484029555043e-05, "loss": 0.9887, "step": 8173 }, { "epoch": 0.47, "grad_norm": 1.740011215209961, "learning_rate": 1.1491647226038938e-05, "loss": 0.9305, "step": 8174 }, { "epoch": 0.47, "grad_norm": 1.7816308736801147, "learning_rate": 1.1489810371050254e-05, "loss": 0.9035, "step": 8175 }, { "epoch": 0.47, "grad_norm": 1.7888948917388916, "learning_rate": 1.1487973464652375e-05, "loss": 1.0225, "step": 8176 }, { "epoch": 0.47, "grad_norm": 1.7568554878234863, "learning_rate": 1.1486136506908689e-05, "loss": 1.09, "step": 8177 }, { "epoch": 0.47, "grad_norm": 1.6500009298324585, "learning_rate": 1.1484299497882585e-05, "loss": 0.9712, "step": 8178 }, { "epoch": 0.47, "grad_norm": 1.6973532438278198, "learning_rate": 1.148246243763745e-05, "loss": 0.906, "step": 8179 }, { "epoch": 0.47, "grad_norm": 1.7078568935394287, "learning_rate": 1.1480625326236676e-05, "loss": 0.9455, "step": 8180 }, { "epoch": 0.47, "grad_norm": 1.6860296726226807, "learning_rate": 1.1478788163743659e-05, "loss": 0.9972, "step": 8181 }, { "epoch": 0.47, "grad_norm": 1.8339258432388306, "learning_rate": 1.1476950950221793e-05, "loss": 0.922, "step": 8182 }, { "epoch": 0.47, "grad_norm": 1.6800051927566528, "learning_rate": 1.1475113685734476e-05, "loss": 1.0284, "step": 8183 }, { "epoch": 0.47, "grad_norm": 1.6942602396011353, "learning_rate": 1.1473276370345105e-05, "loss": 0.9844, "step": 8184 }, { "epoch": 0.47, "grad_norm": 1.689346432685852, "learning_rate": 1.1471439004117082e-05, "loss": 1.1338, "step": 8185 }, { "epoch": 0.47, "grad_norm": 1.7736237049102783, "learning_rate": 1.146960158711381e-05, "loss": 0.9605, "step": 8186 }, { "epoch": 0.47, "grad_norm": 1.5671947002410889, "learning_rate": 1.1467764119398695e-05, "loss": 0.9928, "step": 8187 }, { "epoch": 0.47, "grad_norm": 1.5330764055252075, "learning_rate": 1.1465926601035137e-05, "loss": 0.9783, "step": 8188 }, { "epoch": 0.47, "grad_norm": 1.7810726165771484, "learning_rate": 1.1464089032086547e-05, "loss": 0.9475, "step": 8189 }, { "epoch": 0.47, "grad_norm": 1.6199408769607544, "learning_rate": 1.1462251412616337e-05, "loss": 0.9272, "step": 8190 }, { "epoch": 0.47, "grad_norm": 1.9038352966308594, "learning_rate": 1.1460413742687912e-05, "loss": 1.0067, "step": 8191 }, { "epoch": 0.47, "grad_norm": 1.7979843616485596, "learning_rate": 1.1458576022364692e-05, "loss": 0.9196, "step": 8192 }, { "epoch": 0.47, "grad_norm": 1.077195167541504, "learning_rate": 1.1456738251710085e-05, "loss": 0.5643, "step": 8193 }, { "epoch": 0.47, "grad_norm": 1.7150534391403198, "learning_rate": 1.1454900430787507e-05, "loss": 1.0892, "step": 8194 }, { "epoch": 0.47, "grad_norm": 1.6887699365615845, "learning_rate": 1.1453062559660384e-05, "loss": 1.0092, "step": 8195 }, { "epoch": 0.47, "grad_norm": 1.7780054807662964, "learning_rate": 1.145122463839213e-05, "loss": 0.9276, "step": 8196 }, { "epoch": 0.47, "grad_norm": 1.7952513694763184, "learning_rate": 1.1449386667046167e-05, "loss": 0.9888, "step": 8197 }, { "epoch": 0.47, "grad_norm": 1.9794857501983643, "learning_rate": 1.1447548645685919e-05, "loss": 1.032, "step": 8198 }, { "epoch": 0.47, "grad_norm": 1.6215165853500366, "learning_rate": 1.144571057437481e-05, "loss": 0.9448, "step": 8199 }, { "epoch": 0.47, "grad_norm": 1.6376278400421143, "learning_rate": 1.144387245317627e-05, "loss": 0.9151, "step": 8200 }, { "epoch": 0.47, "grad_norm": 1.8030284643173218, "learning_rate": 1.1442034282153725e-05, "loss": 0.9387, "step": 8201 }, { "epoch": 0.47, "grad_norm": 1.0381600856781006, "learning_rate": 1.1440196061370603e-05, "loss": 0.5239, "step": 8202 }, { "epoch": 0.47, "grad_norm": 1.7246286869049072, "learning_rate": 1.143835779089034e-05, "loss": 0.889, "step": 8203 }, { "epoch": 0.47, "grad_norm": 1.6741750240325928, "learning_rate": 1.1436519470776362e-05, "loss": 0.9248, "step": 8204 }, { "epoch": 0.47, "grad_norm": 1.6802411079406738, "learning_rate": 1.1434681101092116e-05, "loss": 1.0066, "step": 8205 }, { "epoch": 0.47, "grad_norm": 1.6129857301712036, "learning_rate": 1.1432842681901031e-05, "loss": 0.9193, "step": 8206 }, { "epoch": 0.47, "grad_norm": 1.931418538093567, "learning_rate": 1.1431004213266545e-05, "loss": 0.8692, "step": 8207 }, { "epoch": 0.47, "grad_norm": 1.7123961448669434, "learning_rate": 1.1429165695252105e-05, "loss": 1.0199, "step": 8208 }, { "epoch": 0.47, "grad_norm": 1.6577389240264893, "learning_rate": 1.1427327127921144e-05, "loss": 1.008, "step": 8209 }, { "epoch": 0.47, "grad_norm": 1.6065069437026978, "learning_rate": 1.1425488511337115e-05, "loss": 1.0165, "step": 8210 }, { "epoch": 0.47, "grad_norm": 1.624403715133667, "learning_rate": 1.1423649845563458e-05, "loss": 0.9704, "step": 8211 }, { "epoch": 0.47, "grad_norm": 1.7185124158859253, "learning_rate": 1.1421811130663622e-05, "loss": 0.9004, "step": 8212 }, { "epoch": 0.47, "grad_norm": 1.7191646099090576, "learning_rate": 1.1419972366701057e-05, "loss": 0.9725, "step": 8213 }, { "epoch": 0.47, "grad_norm": 0.9825719594955444, "learning_rate": 1.141813355373921e-05, "loss": 0.5585, "step": 8214 }, { "epoch": 0.47, "grad_norm": 1.939244270324707, "learning_rate": 1.1416294691841539e-05, "loss": 1.0116, "step": 8215 }, { "epoch": 0.47, "grad_norm": 1.7359364032745361, "learning_rate": 1.1414455781071489e-05, "loss": 0.9356, "step": 8216 }, { "epoch": 0.47, "grad_norm": 1.899379849433899, "learning_rate": 1.1412616821492526e-05, "loss": 0.9651, "step": 8217 }, { "epoch": 0.47, "grad_norm": 1.6946544647216797, "learning_rate": 1.1410777813168102e-05, "loss": 0.9667, "step": 8218 }, { "epoch": 0.47, "grad_norm": 1.7538397312164307, "learning_rate": 1.1408938756161675e-05, "loss": 1.01, "step": 8219 }, { "epoch": 0.47, "grad_norm": 1.6150877475738525, "learning_rate": 1.1407099650536706e-05, "loss": 0.9478, "step": 8220 }, { "epoch": 0.47, "grad_norm": 1.6541019678115845, "learning_rate": 1.1405260496356658e-05, "loss": 0.8805, "step": 8221 }, { "epoch": 0.47, "grad_norm": 1.9111180305480957, "learning_rate": 1.1403421293684997e-05, "loss": 0.9774, "step": 8222 }, { "epoch": 0.47, "grad_norm": 1.6048967838287354, "learning_rate": 1.1401582042585188e-05, "loss": 1.0198, "step": 8223 }, { "epoch": 0.47, "grad_norm": 1.737343430519104, "learning_rate": 1.1399742743120699e-05, "loss": 0.9027, "step": 8224 }, { "epoch": 0.47, "grad_norm": 1.6954782009124756, "learning_rate": 1.1397903395354996e-05, "loss": 0.9416, "step": 8225 }, { "epoch": 0.47, "grad_norm": 1.8143391609191895, "learning_rate": 1.1396063999351551e-05, "loss": 0.9159, "step": 8226 }, { "epoch": 0.47, "grad_norm": 1.825358271598816, "learning_rate": 1.1394224555173841e-05, "loss": 0.9694, "step": 8227 }, { "epoch": 0.47, "grad_norm": 1.9426848888397217, "learning_rate": 1.1392385062885334e-05, "loss": 0.9851, "step": 8228 }, { "epoch": 0.47, "grad_norm": 1.6380014419555664, "learning_rate": 1.1390545522549508e-05, "loss": 0.9515, "step": 8229 }, { "epoch": 0.47, "grad_norm": 1.8274394273757935, "learning_rate": 1.138870593422984e-05, "loss": 1.0284, "step": 8230 }, { "epoch": 0.47, "grad_norm": 1.7856947183609009, "learning_rate": 1.1386866297989809e-05, "loss": 0.8886, "step": 8231 }, { "epoch": 0.47, "grad_norm": 1.764729619026184, "learning_rate": 1.1385026613892898e-05, "loss": 0.8709, "step": 8232 }, { "epoch": 0.47, "grad_norm": 1.9077820777893066, "learning_rate": 1.1383186882002584e-05, "loss": 0.978, "step": 8233 }, { "epoch": 0.47, "grad_norm": 1.7588090896606445, "learning_rate": 1.1381347102382356e-05, "loss": 0.9351, "step": 8234 }, { "epoch": 0.47, "grad_norm": 1.8538366556167603, "learning_rate": 1.13795072750957e-05, "loss": 1.0181, "step": 8235 }, { "epoch": 0.47, "grad_norm": 1.7108629941940308, "learning_rate": 1.13776674002061e-05, "loss": 0.9766, "step": 8236 }, { "epoch": 0.47, "grad_norm": 1.7506940364837646, "learning_rate": 1.1375827477777044e-05, "loss": 0.9131, "step": 8237 }, { "epoch": 0.47, "grad_norm": 1.7736252546310425, "learning_rate": 1.1373987507872028e-05, "loss": 0.9013, "step": 8238 }, { "epoch": 0.47, "grad_norm": 1.69211745262146, "learning_rate": 1.1372147490554541e-05, "loss": 1.0076, "step": 8239 }, { "epoch": 0.47, "grad_norm": 2.0225110054016113, "learning_rate": 1.1370307425888077e-05, "loss": 0.9391, "step": 8240 }, { "epoch": 0.47, "grad_norm": 1.788693904876709, "learning_rate": 1.136846731393613e-05, "loss": 1.0511, "step": 8241 }, { "epoch": 0.47, "grad_norm": 1.6437410116195679, "learning_rate": 1.1366627154762202e-05, "loss": 0.9467, "step": 8242 }, { "epoch": 0.47, "grad_norm": 1.671946406364441, "learning_rate": 1.1364786948429788e-05, "loss": 0.9052, "step": 8243 }, { "epoch": 0.47, "grad_norm": 1.76242995262146, "learning_rate": 1.1362946695002383e-05, "loss": 1.0641, "step": 8244 }, { "epoch": 0.47, "grad_norm": 1.6688573360443115, "learning_rate": 1.1361106394543502e-05, "loss": 0.8956, "step": 8245 }, { "epoch": 0.47, "grad_norm": 1.8608148097991943, "learning_rate": 1.1359266047116636e-05, "loss": 0.9705, "step": 8246 }, { "epoch": 0.47, "grad_norm": 1.8750923871994019, "learning_rate": 1.13574256527853e-05, "loss": 1.006, "step": 8247 }, { "epoch": 0.47, "grad_norm": 1.8330316543579102, "learning_rate": 1.1355585211612992e-05, "loss": 1.048, "step": 8248 }, { "epoch": 0.47, "grad_norm": 1.8987964391708374, "learning_rate": 1.1353744723663227e-05, "loss": 1.017, "step": 8249 }, { "epoch": 0.47, "grad_norm": 2.010899305343628, "learning_rate": 1.1351904188999513e-05, "loss": 1.018, "step": 8250 }, { "epoch": 0.47, "grad_norm": 1.716174840927124, "learning_rate": 1.1350063607685364e-05, "loss": 0.9265, "step": 8251 }, { "epoch": 0.47, "grad_norm": 1.7473540306091309, "learning_rate": 1.1348222979784289e-05, "loss": 0.9861, "step": 8252 }, { "epoch": 0.47, "grad_norm": 1.9039522409439087, "learning_rate": 1.1346382305359807e-05, "loss": 1.0257, "step": 8253 }, { "epoch": 0.47, "grad_norm": 1.9177871942520142, "learning_rate": 1.1344541584475432e-05, "loss": 0.9438, "step": 8254 }, { "epoch": 0.47, "grad_norm": 1.8129955530166626, "learning_rate": 1.1342700817194681e-05, "loss": 1.0386, "step": 8255 }, { "epoch": 0.47, "grad_norm": 1.7839810848236084, "learning_rate": 1.1340860003581078e-05, "loss": 0.9758, "step": 8256 }, { "epoch": 0.47, "grad_norm": 1.7627233266830444, "learning_rate": 1.133901914369814e-05, "loss": 0.9774, "step": 8257 }, { "epoch": 0.47, "grad_norm": 1.714215636253357, "learning_rate": 1.1337178237609391e-05, "loss": 0.9081, "step": 8258 }, { "epoch": 0.47, "grad_norm": 1.6672221422195435, "learning_rate": 1.1335337285378359e-05, "loss": 0.9405, "step": 8259 }, { "epoch": 0.47, "grad_norm": 1.7233778238296509, "learning_rate": 1.1333496287068565e-05, "loss": 0.8735, "step": 8260 }, { "epoch": 0.47, "grad_norm": 2.244443655014038, "learning_rate": 1.1331655242743538e-05, "loss": 0.9356, "step": 8261 }, { "epoch": 0.47, "grad_norm": 1.7163015604019165, "learning_rate": 1.1329814152466811e-05, "loss": 0.9486, "step": 8262 }, { "epoch": 0.47, "grad_norm": 2.0177924633026123, "learning_rate": 1.1327973016301912e-05, "loss": 0.9723, "step": 8263 }, { "epoch": 0.47, "grad_norm": 1.7468641996383667, "learning_rate": 1.1326131834312372e-05, "loss": 0.9985, "step": 8264 }, { "epoch": 0.47, "grad_norm": 1.8751897811889648, "learning_rate": 1.132429060656173e-05, "loss": 0.8387, "step": 8265 }, { "epoch": 0.47, "grad_norm": 1.6528464555740356, "learning_rate": 1.1322449333113517e-05, "loss": 0.8574, "step": 8266 }, { "epoch": 0.47, "grad_norm": 1.9983199834823608, "learning_rate": 1.1320608014031272e-05, "loss": 0.9916, "step": 8267 }, { "epoch": 0.47, "grad_norm": 1.9700661897659302, "learning_rate": 1.1318766649378532e-05, "loss": 1.0186, "step": 8268 }, { "epoch": 0.47, "grad_norm": 1.9696274995803833, "learning_rate": 1.1316925239218838e-05, "loss": 1.0042, "step": 8269 }, { "epoch": 0.47, "grad_norm": 1.747190237045288, "learning_rate": 1.1315083783615734e-05, "loss": 0.9391, "step": 8270 }, { "epoch": 0.47, "grad_norm": 1.7099398374557495, "learning_rate": 1.1313242282632762e-05, "loss": 0.9736, "step": 8271 }, { "epoch": 0.47, "grad_norm": 1.8568205833435059, "learning_rate": 1.1311400736333466e-05, "loss": 0.9857, "step": 8272 }, { "epoch": 0.47, "grad_norm": 1.8906890153884888, "learning_rate": 1.1309559144781397e-05, "loss": 0.9708, "step": 8273 }, { "epoch": 0.47, "grad_norm": 2.0576772689819336, "learning_rate": 1.1307717508040099e-05, "loss": 0.9543, "step": 8274 }, { "epoch": 0.47, "grad_norm": 1.752837896347046, "learning_rate": 1.1305875826173119e-05, "loss": 1.0138, "step": 8275 }, { "epoch": 0.47, "grad_norm": 1.6970767974853516, "learning_rate": 1.1304034099244015e-05, "loss": 1.0017, "step": 8276 }, { "epoch": 0.47, "grad_norm": 1.7145463228225708, "learning_rate": 1.1302192327316338e-05, "loss": 0.9518, "step": 8277 }, { "epoch": 0.47, "grad_norm": 1.8148404359817505, "learning_rate": 1.130035051045364e-05, "loss": 0.8566, "step": 8278 }, { "epoch": 0.47, "grad_norm": 1.7286200523376465, "learning_rate": 1.129850864871948e-05, "loss": 1.036, "step": 8279 }, { "epoch": 0.47, "grad_norm": 1.6262943744659424, "learning_rate": 1.129666674217741e-05, "loss": 0.9612, "step": 8280 }, { "epoch": 0.47, "grad_norm": 1.700950264930725, "learning_rate": 1.1294824790890997e-05, "loss": 0.816, "step": 8281 }, { "epoch": 0.47, "grad_norm": 1.7229434251785278, "learning_rate": 1.1292982794923795e-05, "loss": 0.8762, "step": 8282 }, { "epoch": 0.48, "grad_norm": 1.9246916770935059, "learning_rate": 1.129114075433937e-05, "loss": 0.8443, "step": 8283 }, { "epoch": 0.48, "grad_norm": 1.68325674533844, "learning_rate": 1.1289298669201284e-05, "loss": 0.8917, "step": 8284 }, { "epoch": 0.48, "grad_norm": 2.2466845512390137, "learning_rate": 1.12874565395731e-05, "loss": 0.9945, "step": 8285 }, { "epoch": 0.48, "grad_norm": 1.8770309686660767, "learning_rate": 1.1285614365518392e-05, "loss": 0.9111, "step": 8286 }, { "epoch": 0.48, "grad_norm": 1.8043227195739746, "learning_rate": 1.1283772147100717e-05, "loss": 0.9361, "step": 8287 }, { "epoch": 0.48, "grad_norm": 1.7761905193328857, "learning_rate": 1.1281929884383655e-05, "loss": 1.0958, "step": 8288 }, { "epoch": 0.48, "grad_norm": 1.702733039855957, "learning_rate": 1.1280087577430775e-05, "loss": 0.9483, "step": 8289 }, { "epoch": 0.48, "grad_norm": 2.0956785678863525, "learning_rate": 1.1278245226305646e-05, "loss": 1.0398, "step": 8290 }, { "epoch": 0.48, "grad_norm": 1.981734275817871, "learning_rate": 1.1276402831071844e-05, "loss": 0.9186, "step": 8291 }, { "epoch": 0.48, "grad_norm": 1.828492522239685, "learning_rate": 1.1274560391792948e-05, "loss": 0.9848, "step": 8292 }, { "epoch": 0.48, "grad_norm": 1.8461949825286865, "learning_rate": 1.1272717908532533e-05, "loss": 0.9095, "step": 8293 }, { "epoch": 0.48, "grad_norm": 1.9354281425476074, "learning_rate": 1.1270875381354178e-05, "loss": 0.9926, "step": 8294 }, { "epoch": 0.48, "grad_norm": 1.7985162734985352, "learning_rate": 1.1269032810321464e-05, "loss": 1.0163, "step": 8295 }, { "epoch": 0.48, "grad_norm": 1.726866602897644, "learning_rate": 1.1267190195497973e-05, "loss": 0.9998, "step": 8296 }, { "epoch": 0.48, "grad_norm": 1.859765887260437, "learning_rate": 1.1265347536947286e-05, "loss": 0.9978, "step": 8297 }, { "epoch": 0.48, "grad_norm": 1.7985138893127441, "learning_rate": 1.1263504834732993e-05, "loss": 1.0298, "step": 8298 }, { "epoch": 0.48, "grad_norm": 1.705773949623108, "learning_rate": 1.1261662088918675e-05, "loss": 0.8854, "step": 8299 }, { "epoch": 0.48, "grad_norm": 1.581881046295166, "learning_rate": 1.1259819299567922e-05, "loss": 0.9663, "step": 8300 }, { "epoch": 0.48, "grad_norm": 1.8307021856307983, "learning_rate": 1.1257976466744326e-05, "loss": 1.0094, "step": 8301 }, { "epoch": 0.48, "grad_norm": 1.6894458532333374, "learning_rate": 1.1256133590511475e-05, "loss": 0.9881, "step": 8302 }, { "epoch": 0.48, "grad_norm": 1.8815500736236572, "learning_rate": 1.1254290670932964e-05, "loss": 0.9345, "step": 8303 }, { "epoch": 0.48, "grad_norm": 1.7606112957000732, "learning_rate": 1.1252447708072386e-05, "loss": 0.9682, "step": 8304 }, { "epoch": 0.48, "grad_norm": 1.950756549835205, "learning_rate": 1.1250604701993334e-05, "loss": 0.9814, "step": 8305 }, { "epoch": 0.48, "grad_norm": 1.8115973472595215, "learning_rate": 1.1248761652759408e-05, "loss": 0.964, "step": 8306 }, { "epoch": 0.48, "grad_norm": 1.9059438705444336, "learning_rate": 1.1246918560434206e-05, "loss": 0.966, "step": 8307 }, { "epoch": 0.48, "grad_norm": 1.974716067314148, "learning_rate": 1.1245075425081328e-05, "loss": 0.9919, "step": 8308 }, { "epoch": 0.48, "grad_norm": 1.6902050971984863, "learning_rate": 1.1243232246764376e-05, "loss": 0.9353, "step": 8309 }, { "epoch": 0.48, "grad_norm": 1.8598041534423828, "learning_rate": 1.124138902554695e-05, "loss": 0.9788, "step": 8310 }, { "epoch": 0.48, "grad_norm": 1.9292845726013184, "learning_rate": 1.1239545761492658e-05, "loss": 1.0386, "step": 8311 }, { "epoch": 0.48, "grad_norm": 1.803775668144226, "learning_rate": 1.1237702454665102e-05, "loss": 0.9999, "step": 8312 }, { "epoch": 0.48, "grad_norm": 1.9135504961013794, "learning_rate": 1.1235859105127895e-05, "loss": 0.9917, "step": 8313 }, { "epoch": 0.48, "grad_norm": 1.8946150541305542, "learning_rate": 1.1234015712944639e-05, "loss": 0.9743, "step": 8314 }, { "epoch": 0.48, "grad_norm": 1.1521081924438477, "learning_rate": 1.123217227817895e-05, "loss": 0.5679, "step": 8315 }, { "epoch": 0.48, "grad_norm": 1.7653307914733887, "learning_rate": 1.1230328800894437e-05, "loss": 0.9603, "step": 8316 }, { "epoch": 0.48, "grad_norm": 1.795918583869934, "learning_rate": 1.1228485281154713e-05, "loss": 0.9187, "step": 8317 }, { "epoch": 0.48, "grad_norm": 1.9153954982757568, "learning_rate": 1.1226641719023395e-05, "loss": 0.9938, "step": 8318 }, { "epoch": 0.48, "grad_norm": 1.774968147277832, "learning_rate": 1.1224798114564097e-05, "loss": 0.9587, "step": 8319 }, { "epoch": 0.48, "grad_norm": 2.1230523586273193, "learning_rate": 1.1222954467840439e-05, "loss": 1.0183, "step": 8320 }, { "epoch": 0.48, "grad_norm": 1.866174340248108, "learning_rate": 1.1221110778916037e-05, "loss": 0.9619, "step": 8321 }, { "epoch": 0.48, "grad_norm": 1.9503875970840454, "learning_rate": 1.1219267047854515e-05, "loss": 1.0731, "step": 8322 }, { "epoch": 0.48, "grad_norm": 1.8368667364120483, "learning_rate": 1.121742327471949e-05, "loss": 0.9705, "step": 8323 }, { "epoch": 0.48, "grad_norm": 1.6803312301635742, "learning_rate": 1.121557945957459e-05, "loss": 0.9222, "step": 8324 }, { "epoch": 0.48, "grad_norm": 1.8311927318572998, "learning_rate": 1.1213735602483439e-05, "loss": 0.9675, "step": 8325 }, { "epoch": 0.48, "grad_norm": 1.8211675882339478, "learning_rate": 1.121189170350966e-05, "loss": 0.9592, "step": 8326 }, { "epoch": 0.48, "grad_norm": 1.7949206829071045, "learning_rate": 1.1210047762716885e-05, "loss": 1.0078, "step": 8327 }, { "epoch": 0.48, "grad_norm": 1.6693055629730225, "learning_rate": 1.1208203780168743e-05, "loss": 0.8943, "step": 8328 }, { "epoch": 0.48, "grad_norm": 1.8428863286972046, "learning_rate": 1.1206359755928865e-05, "loss": 0.9754, "step": 8329 }, { "epoch": 0.48, "grad_norm": 1.856624722480774, "learning_rate": 1.1204515690060878e-05, "loss": 0.987, "step": 8330 }, { "epoch": 0.48, "grad_norm": 1.9839272499084473, "learning_rate": 1.1202671582628422e-05, "loss": 0.9798, "step": 8331 }, { "epoch": 0.48, "grad_norm": 1.8181593418121338, "learning_rate": 1.1200827433695128e-05, "loss": 0.9482, "step": 8332 }, { "epoch": 0.48, "grad_norm": 1.7232624292373657, "learning_rate": 1.1198983243324635e-05, "loss": 0.9369, "step": 8333 }, { "epoch": 0.48, "grad_norm": 1.7491872310638428, "learning_rate": 1.1197139011580578e-05, "loss": 0.997, "step": 8334 }, { "epoch": 0.48, "grad_norm": 1.760096549987793, "learning_rate": 1.1195294738526598e-05, "loss": 0.9388, "step": 8335 }, { "epoch": 0.48, "grad_norm": 1.8956553936004639, "learning_rate": 1.1193450424226333e-05, "loss": 1.008, "step": 8336 }, { "epoch": 0.48, "grad_norm": 1.9834154844284058, "learning_rate": 1.119160606874343e-05, "loss": 0.9404, "step": 8337 }, { "epoch": 0.48, "grad_norm": 1.741002082824707, "learning_rate": 1.118976167214153e-05, "loss": 1.007, "step": 8338 }, { "epoch": 0.48, "grad_norm": 1.7230005264282227, "learning_rate": 1.118791723448428e-05, "loss": 0.9516, "step": 8339 }, { "epoch": 0.48, "grad_norm": 1.6550581455230713, "learning_rate": 1.1186072755835322e-05, "loss": 0.9014, "step": 8340 }, { "epoch": 0.48, "grad_norm": 1.8168171644210815, "learning_rate": 1.1184228236258306e-05, "loss": 1.0582, "step": 8341 }, { "epoch": 0.48, "grad_norm": 1.7454965114593506, "learning_rate": 1.1182383675816884e-05, "loss": 0.99, "step": 8342 }, { "epoch": 0.48, "grad_norm": 1.8558862209320068, "learning_rate": 1.1180539074574703e-05, "loss": 1.0687, "step": 8343 }, { "epoch": 0.48, "grad_norm": 1.795082926750183, "learning_rate": 1.1178694432595415e-05, "loss": 0.9355, "step": 8344 }, { "epoch": 0.48, "grad_norm": 1.940369963645935, "learning_rate": 1.1176849749942677e-05, "loss": 1.0404, "step": 8345 }, { "epoch": 0.48, "grad_norm": 1.7173689603805542, "learning_rate": 1.1175005026680141e-05, "loss": 0.9502, "step": 8346 }, { "epoch": 0.48, "grad_norm": 1.7479103803634644, "learning_rate": 1.1173160262871465e-05, "loss": 0.9302, "step": 8347 }, { "epoch": 0.48, "grad_norm": 1.9141708612442017, "learning_rate": 1.1171315458580302e-05, "loss": 0.9349, "step": 8348 }, { "epoch": 0.48, "grad_norm": 1.8113453388214111, "learning_rate": 1.1169470613870318e-05, "loss": 0.9286, "step": 8349 }, { "epoch": 0.48, "grad_norm": 1.6964112520217896, "learning_rate": 1.1167625728805169e-05, "loss": 0.9412, "step": 8350 }, { "epoch": 0.48, "grad_norm": 2.0398805141448975, "learning_rate": 1.1165780803448516e-05, "loss": 0.9488, "step": 8351 }, { "epoch": 0.48, "grad_norm": 2.011366605758667, "learning_rate": 1.1163935837864026e-05, "loss": 1.017, "step": 8352 }, { "epoch": 0.48, "grad_norm": 1.75300931930542, "learning_rate": 1.1162090832115357e-05, "loss": 0.946, "step": 8353 }, { "epoch": 0.48, "grad_norm": 1.8279283046722412, "learning_rate": 1.1160245786266184e-05, "loss": 0.9376, "step": 8354 }, { "epoch": 0.48, "grad_norm": 1.1573725938796997, "learning_rate": 1.115840070038017e-05, "loss": 0.579, "step": 8355 }, { "epoch": 0.48, "grad_norm": 1.9647483825683594, "learning_rate": 1.1156555574520982e-05, "loss": 0.9478, "step": 8356 }, { "epoch": 0.48, "grad_norm": 2.0390822887420654, "learning_rate": 1.1154710408752294e-05, "loss": 0.9201, "step": 8357 }, { "epoch": 0.48, "grad_norm": 1.9553459882736206, "learning_rate": 1.1152865203137773e-05, "loss": 0.9865, "step": 8358 }, { "epoch": 0.48, "grad_norm": 1.8672056198120117, "learning_rate": 1.1151019957741096e-05, "loss": 1.0424, "step": 8359 }, { "epoch": 0.48, "grad_norm": 1.752069354057312, "learning_rate": 1.1149174672625933e-05, "loss": 0.9823, "step": 8360 }, { "epoch": 0.48, "grad_norm": 1.8578617572784424, "learning_rate": 1.1147329347855964e-05, "loss": 1.0587, "step": 8361 }, { "epoch": 0.48, "grad_norm": 1.7429560422897339, "learning_rate": 1.1145483983494865e-05, "loss": 0.9378, "step": 8362 }, { "epoch": 0.48, "grad_norm": 1.9389619827270508, "learning_rate": 1.1143638579606313e-05, "loss": 1.0593, "step": 8363 }, { "epoch": 0.48, "grad_norm": 1.7277865409851074, "learning_rate": 1.1141793136253987e-05, "loss": 0.9136, "step": 8364 }, { "epoch": 0.48, "grad_norm": 1.920994758605957, "learning_rate": 1.1139947653501569e-05, "loss": 1.0137, "step": 8365 }, { "epoch": 0.48, "grad_norm": 1.847996711730957, "learning_rate": 1.1138102131412742e-05, "loss": 0.9533, "step": 8366 }, { "epoch": 0.48, "grad_norm": 1.7338600158691406, "learning_rate": 1.1136256570051192e-05, "loss": 0.8973, "step": 8367 }, { "epoch": 0.48, "grad_norm": 1.866511344909668, "learning_rate": 1.1134410969480598e-05, "loss": 0.8825, "step": 8368 }, { "epoch": 0.48, "grad_norm": 1.8479801416397095, "learning_rate": 1.1132565329764651e-05, "loss": 0.9843, "step": 8369 }, { "epoch": 0.48, "grad_norm": 1.8212615251541138, "learning_rate": 1.113071965096704e-05, "loss": 1.0527, "step": 8370 }, { "epoch": 0.48, "grad_norm": 1.8662315607070923, "learning_rate": 1.1128873933151452e-05, "loss": 0.9717, "step": 8371 }, { "epoch": 0.48, "grad_norm": 1.891660451889038, "learning_rate": 1.1127028176381577e-05, "loss": 0.9898, "step": 8372 }, { "epoch": 0.48, "grad_norm": 1.72786545753479, "learning_rate": 1.1125182380721109e-05, "loss": 0.8789, "step": 8373 }, { "epoch": 0.48, "grad_norm": 1.6998101472854614, "learning_rate": 1.112333654623374e-05, "loss": 1.0874, "step": 8374 }, { "epoch": 0.48, "grad_norm": 1.7416257858276367, "learning_rate": 1.1121490672983167e-05, "loss": 0.9948, "step": 8375 }, { "epoch": 0.48, "grad_norm": 1.8130559921264648, "learning_rate": 1.1119644761033079e-05, "loss": 1.0171, "step": 8376 }, { "epoch": 0.48, "grad_norm": 1.724853515625, "learning_rate": 1.1117798810447182e-05, "loss": 0.981, "step": 8377 }, { "epoch": 0.48, "grad_norm": 1.957699179649353, "learning_rate": 1.1115952821289168e-05, "loss": 1.0227, "step": 8378 }, { "epoch": 0.48, "grad_norm": 1.802990436553955, "learning_rate": 1.1114106793622742e-05, "loss": 0.9424, "step": 8379 }, { "epoch": 0.48, "grad_norm": 1.7563058137893677, "learning_rate": 1.1112260727511597e-05, "loss": 0.945, "step": 8380 }, { "epoch": 0.48, "grad_norm": 1.8949158191680908, "learning_rate": 1.1110414623019446e-05, "loss": 0.9147, "step": 8381 }, { "epoch": 0.48, "grad_norm": 1.5763353109359741, "learning_rate": 1.1108568480209986e-05, "loss": 0.8286, "step": 8382 }, { "epoch": 0.48, "grad_norm": 1.0792378187179565, "learning_rate": 1.1106722299146926e-05, "loss": 0.6325, "step": 8383 }, { "epoch": 0.48, "grad_norm": 1.9433655738830566, "learning_rate": 1.110487607989397e-05, "loss": 1.0396, "step": 8384 }, { "epoch": 0.48, "grad_norm": 1.9687572717666626, "learning_rate": 1.1103029822514828e-05, "loss": 1.0082, "step": 8385 }, { "epoch": 0.48, "grad_norm": 1.8394485712051392, "learning_rate": 1.1101183527073207e-05, "loss": 1.0391, "step": 8386 }, { "epoch": 0.48, "grad_norm": 1.8573013544082642, "learning_rate": 1.109933719363282e-05, "loss": 0.9547, "step": 8387 }, { "epoch": 0.48, "grad_norm": 1.713350534439087, "learning_rate": 1.1097490822257377e-05, "loss": 1.0192, "step": 8388 }, { "epoch": 0.48, "grad_norm": 1.8887578248977661, "learning_rate": 1.1095644413010591e-05, "loss": 1.0308, "step": 8389 }, { "epoch": 0.48, "grad_norm": 1.717733383178711, "learning_rate": 1.1093797965956177e-05, "loss": 1.0069, "step": 8390 }, { "epoch": 0.48, "grad_norm": 2.9532909393310547, "learning_rate": 1.109195148115785e-05, "loss": 0.6081, "step": 8391 }, { "epoch": 0.48, "grad_norm": 1.7393585443496704, "learning_rate": 1.1090104958679323e-05, "loss": 0.9115, "step": 8392 }, { "epoch": 0.48, "grad_norm": 1.6626917123794556, "learning_rate": 1.1088258398584327e-05, "loss": 1.0603, "step": 8393 }, { "epoch": 0.48, "grad_norm": 1.9202543497085571, "learning_rate": 1.108641180093657e-05, "loss": 0.9076, "step": 8394 }, { "epoch": 0.48, "grad_norm": 1.6771575212478638, "learning_rate": 1.1084565165799777e-05, "loss": 0.979, "step": 8395 }, { "epoch": 0.48, "grad_norm": 2.724423885345459, "learning_rate": 1.108271849323767e-05, "loss": 0.9327, "step": 8396 }, { "epoch": 0.48, "grad_norm": 1.928701400756836, "learning_rate": 1.108087178331397e-05, "loss": 0.9942, "step": 8397 }, { "epoch": 0.48, "grad_norm": 1.933451771736145, "learning_rate": 1.1079025036092408e-05, "loss": 0.9508, "step": 8398 }, { "epoch": 0.48, "grad_norm": 1.6016159057617188, "learning_rate": 1.1077178251636702e-05, "loss": 1.053, "step": 8399 }, { "epoch": 0.48, "grad_norm": 1.8350372314453125, "learning_rate": 1.1075331430010587e-05, "loss": 0.9047, "step": 8400 }, { "epoch": 0.48, "grad_norm": 1.2960610389709473, "learning_rate": 1.1073484571277786e-05, "loss": 0.5847, "step": 8401 }, { "epoch": 0.48, "grad_norm": 1.6901441812515259, "learning_rate": 1.107163767550203e-05, "loss": 0.9589, "step": 8402 }, { "epoch": 0.48, "grad_norm": 2.028442859649658, "learning_rate": 1.1069790742747053e-05, "loss": 0.9041, "step": 8403 }, { "epoch": 0.48, "grad_norm": 1.9091041088104248, "learning_rate": 1.1067943773076585e-05, "loss": 1.0307, "step": 8404 }, { "epoch": 0.48, "grad_norm": 2.0231430530548096, "learning_rate": 1.1066096766554365e-05, "loss": 0.9905, "step": 8405 }, { "epoch": 0.48, "grad_norm": 1.8749932050704956, "learning_rate": 1.1064249723244117e-05, "loss": 1.0841, "step": 8406 }, { "epoch": 0.48, "grad_norm": 1.3131743669509888, "learning_rate": 1.1062402643209586e-05, "loss": 0.6178, "step": 8407 }, { "epoch": 0.48, "grad_norm": 1.8546671867370605, "learning_rate": 1.1060555526514508e-05, "loss": 1.0173, "step": 8408 }, { "epoch": 0.48, "grad_norm": 1.8187686204910278, "learning_rate": 1.1058708373222622e-05, "loss": 0.9755, "step": 8409 }, { "epoch": 0.48, "grad_norm": 1.7127286195755005, "learning_rate": 1.1056861183397669e-05, "loss": 0.9295, "step": 8410 }, { "epoch": 0.48, "grad_norm": 1.6071020364761353, "learning_rate": 1.1055013957103387e-05, "loss": 0.9157, "step": 8411 }, { "epoch": 0.48, "grad_norm": 1.5687869787216187, "learning_rate": 1.105316669440352e-05, "loss": 0.8657, "step": 8412 }, { "epoch": 0.48, "grad_norm": 1.7713377475738525, "learning_rate": 1.1051319395361812e-05, "loss": 1.0018, "step": 8413 }, { "epoch": 0.48, "grad_norm": 1.8914685249328613, "learning_rate": 1.104947206004201e-05, "loss": 0.974, "step": 8414 }, { "epoch": 0.48, "grad_norm": 1.1528468132019043, "learning_rate": 1.104762468850786e-05, "loss": 0.6008, "step": 8415 }, { "epoch": 0.48, "grad_norm": 1.817579746246338, "learning_rate": 1.1045777280823105e-05, "loss": 0.9958, "step": 8416 }, { "epoch": 0.48, "grad_norm": 1.805662989616394, "learning_rate": 1.10439298370515e-05, "loss": 0.9923, "step": 8417 }, { "epoch": 0.48, "grad_norm": 2.104353427886963, "learning_rate": 1.1042082357256789e-05, "loss": 1.0412, "step": 8418 }, { "epoch": 0.48, "grad_norm": 1.6318070888519287, "learning_rate": 1.1040234841502728e-05, "loss": 0.922, "step": 8419 }, { "epoch": 0.48, "grad_norm": 1.6960625648498535, "learning_rate": 1.1038387289853069e-05, "loss": 0.9692, "step": 8420 }, { "epoch": 0.48, "grad_norm": 1.8021087646484375, "learning_rate": 1.1036539702371565e-05, "loss": 0.9113, "step": 8421 }, { "epoch": 0.48, "grad_norm": 1.6216925382614136, "learning_rate": 1.1034692079121972e-05, "loss": 0.9411, "step": 8422 }, { "epoch": 0.48, "grad_norm": 1.7137560844421387, "learning_rate": 1.1032844420168045e-05, "loss": 1.0105, "step": 8423 }, { "epoch": 0.48, "grad_norm": 1.7940731048583984, "learning_rate": 1.1030996725573544e-05, "loss": 0.9095, "step": 8424 }, { "epoch": 0.48, "grad_norm": 1.6126354932785034, "learning_rate": 1.1029148995402224e-05, "loss": 0.8851, "step": 8425 }, { "epoch": 0.48, "grad_norm": 1.9155354499816895, "learning_rate": 1.1027301229717849e-05, "loss": 1.0097, "step": 8426 }, { "epoch": 0.48, "grad_norm": 1.7939229011535645, "learning_rate": 1.1025453428584176e-05, "loss": 1.014, "step": 8427 }, { "epoch": 0.48, "grad_norm": 1.815770149230957, "learning_rate": 1.102360559206497e-05, "loss": 1.0026, "step": 8428 }, { "epoch": 0.48, "grad_norm": 1.6997798681259155, "learning_rate": 1.1021757720223996e-05, "loss": 0.9044, "step": 8429 }, { "epoch": 0.48, "grad_norm": 1.8816899061203003, "learning_rate": 1.1019909813125016e-05, "loss": 0.9436, "step": 8430 }, { "epoch": 0.48, "grad_norm": 1.8161057233810425, "learning_rate": 1.1018061870831795e-05, "loss": 0.9965, "step": 8431 }, { "epoch": 0.48, "grad_norm": 1.7540086507797241, "learning_rate": 1.1016213893408105e-05, "loss": 1.0504, "step": 8432 }, { "epoch": 0.48, "grad_norm": 1.8399978876113892, "learning_rate": 1.1014365880917713e-05, "loss": 1.0044, "step": 8433 }, { "epoch": 0.48, "grad_norm": 1.6707216501235962, "learning_rate": 1.1012517833424387e-05, "loss": 1.0404, "step": 8434 }, { "epoch": 0.48, "grad_norm": 1.6970584392547607, "learning_rate": 1.1010669750991898e-05, "loss": 0.9086, "step": 8435 }, { "epoch": 0.48, "grad_norm": 1.7782795429229736, "learning_rate": 1.100882163368402e-05, "loss": 0.9556, "step": 8436 }, { "epoch": 0.48, "grad_norm": 1.909121036529541, "learning_rate": 1.1006973481564527e-05, "loss": 0.9839, "step": 8437 }, { "epoch": 0.48, "grad_norm": 1.827270746231079, "learning_rate": 1.100512529469719e-05, "loss": 0.9375, "step": 8438 }, { "epoch": 0.48, "grad_norm": 1.916403889656067, "learning_rate": 1.1003277073145788e-05, "loss": 0.9614, "step": 8439 }, { "epoch": 0.48, "grad_norm": 1.8410837650299072, "learning_rate": 1.1001428816974095e-05, "loss": 0.894, "step": 8440 }, { "epoch": 0.48, "grad_norm": 1.8386131525039673, "learning_rate": 1.0999580526245894e-05, "loss": 1.0033, "step": 8441 }, { "epoch": 0.48, "grad_norm": 2.0226311683654785, "learning_rate": 1.099773220102496e-05, "loss": 0.9071, "step": 8442 }, { "epoch": 0.48, "grad_norm": 1.64186692237854, "learning_rate": 1.0995883841375073e-05, "loss": 0.9223, "step": 8443 }, { "epoch": 0.48, "grad_norm": 1.689921498298645, "learning_rate": 1.0994035447360018e-05, "loss": 1.0059, "step": 8444 }, { "epoch": 0.48, "grad_norm": 1.7537623643875122, "learning_rate": 1.0992187019043576e-05, "loss": 0.9516, "step": 8445 }, { "epoch": 0.48, "grad_norm": 1.6835432052612305, "learning_rate": 1.0990338556489531e-05, "loss": 0.9587, "step": 8446 }, { "epoch": 0.48, "grad_norm": 1.7420859336853027, "learning_rate": 1.098849005976167e-05, "loss": 1.077, "step": 8447 }, { "epoch": 0.48, "grad_norm": 1.9376620054244995, "learning_rate": 1.0986641528923776e-05, "loss": 1.0043, "step": 8448 }, { "epoch": 0.48, "grad_norm": 1.762676477432251, "learning_rate": 1.0984792964039641e-05, "loss": 0.9822, "step": 8449 }, { "epoch": 0.48, "grad_norm": 1.7219209671020508, "learning_rate": 1.0982944365173052e-05, "loss": 0.9196, "step": 8450 }, { "epoch": 0.48, "grad_norm": 1.6521238088607788, "learning_rate": 1.0981095732387799e-05, "loss": 0.9653, "step": 8451 }, { "epoch": 0.48, "grad_norm": 1.9159728288650513, "learning_rate": 1.0979247065747672e-05, "loss": 1.0285, "step": 8452 }, { "epoch": 0.48, "grad_norm": 2.0150840282440186, "learning_rate": 1.0977398365316464e-05, "loss": 1.046, "step": 8453 }, { "epoch": 0.48, "grad_norm": 1.6001360416412354, "learning_rate": 1.0975549631157969e-05, "loss": 0.9198, "step": 8454 }, { "epoch": 0.48, "grad_norm": 1.113195776939392, "learning_rate": 1.0973700863335981e-05, "loss": 0.5876, "step": 8455 }, { "epoch": 0.48, "grad_norm": 1.7141731977462769, "learning_rate": 1.0971852061914296e-05, "loss": 0.9513, "step": 8456 }, { "epoch": 0.49, "grad_norm": 1.6028388738632202, "learning_rate": 1.0970003226956713e-05, "loss": 1.0388, "step": 8457 }, { "epoch": 0.49, "grad_norm": 1.9380491971969604, "learning_rate": 1.0968154358527024e-05, "loss": 0.994, "step": 8458 }, { "epoch": 0.49, "grad_norm": 1.7320345640182495, "learning_rate": 1.0966305456689034e-05, "loss": 0.8716, "step": 8459 }, { "epoch": 0.49, "grad_norm": 1.802369475364685, "learning_rate": 1.0964456521506545e-05, "loss": 0.9675, "step": 8460 }, { "epoch": 0.49, "grad_norm": 1.738146185874939, "learning_rate": 1.0962607553043354e-05, "loss": 0.9809, "step": 8461 }, { "epoch": 0.49, "grad_norm": 1.6915006637573242, "learning_rate": 1.0960758551363265e-05, "loss": 0.9239, "step": 8462 }, { "epoch": 0.49, "grad_norm": 1.7923160791397095, "learning_rate": 1.0958909516530082e-05, "loss": 0.9883, "step": 8463 }, { "epoch": 0.49, "grad_norm": 1.8369743824005127, "learning_rate": 1.095706044860761e-05, "loss": 1.0698, "step": 8464 }, { "epoch": 0.49, "grad_norm": 1.6356401443481445, "learning_rate": 1.0955211347659655e-05, "loss": 0.9155, "step": 8465 }, { "epoch": 0.49, "grad_norm": 1.8413978815078735, "learning_rate": 1.0953362213750027e-05, "loss": 0.9745, "step": 8466 }, { "epoch": 0.49, "grad_norm": 1.6407099962234497, "learning_rate": 1.095151304694253e-05, "loss": 1.02, "step": 8467 }, { "epoch": 0.49, "grad_norm": 1.0899373292922974, "learning_rate": 1.0949663847300976e-05, "loss": 0.6003, "step": 8468 }, { "epoch": 0.49, "grad_norm": 1.731991171836853, "learning_rate": 1.0947814614889174e-05, "loss": 0.9317, "step": 8469 }, { "epoch": 0.49, "grad_norm": 1.7802760601043701, "learning_rate": 1.094596534977094e-05, "loss": 0.97, "step": 8470 }, { "epoch": 0.49, "grad_norm": 1.6222083568572998, "learning_rate": 1.094411605201008e-05, "loss": 1.0078, "step": 8471 }, { "epoch": 0.49, "grad_norm": 1.7317465543746948, "learning_rate": 1.0942266721670418e-05, "loss": 0.9625, "step": 8472 }, { "epoch": 0.49, "grad_norm": 1.783359169960022, "learning_rate": 1.0940417358815758e-05, "loss": 0.8993, "step": 8473 }, { "epoch": 0.49, "grad_norm": 1.7969245910644531, "learning_rate": 1.0938567963509925e-05, "loss": 0.9271, "step": 8474 }, { "epoch": 0.49, "grad_norm": 1.8140931129455566, "learning_rate": 1.0936718535816733e-05, "loss": 0.9919, "step": 8475 }, { "epoch": 0.49, "grad_norm": 1.5985990762710571, "learning_rate": 1.09348690758e-05, "loss": 0.9681, "step": 8476 }, { "epoch": 0.49, "grad_norm": 1.8745919466018677, "learning_rate": 1.0933019583523549e-05, "loss": 1.0543, "step": 8477 }, { "epoch": 0.49, "grad_norm": 0.9741621017456055, "learning_rate": 1.0931170059051198e-05, "loss": 0.5679, "step": 8478 }, { "epoch": 0.49, "grad_norm": 1.719829797744751, "learning_rate": 1.0929320502446768e-05, "loss": 0.9621, "step": 8479 }, { "epoch": 0.49, "grad_norm": 1.8366910219192505, "learning_rate": 1.0927470913774085e-05, "loss": 1.014, "step": 8480 }, { "epoch": 0.49, "grad_norm": 1.743626594543457, "learning_rate": 1.0925621293096971e-05, "loss": 0.9749, "step": 8481 }, { "epoch": 0.49, "grad_norm": 1.0183520317077637, "learning_rate": 1.0923771640479251e-05, "loss": 0.5842, "step": 8482 }, { "epoch": 0.49, "grad_norm": 1.734700083732605, "learning_rate": 1.0921921955984753e-05, "loss": 1.0663, "step": 8483 }, { "epoch": 0.49, "grad_norm": 1.6550425291061401, "learning_rate": 1.0920072239677302e-05, "loss": 0.9093, "step": 8484 }, { "epoch": 0.49, "grad_norm": 1.676248550415039, "learning_rate": 1.0918222491620726e-05, "loss": 1.0079, "step": 8485 }, { "epoch": 0.49, "grad_norm": 1.7471917867660522, "learning_rate": 1.0916372711878862e-05, "loss": 0.9783, "step": 8486 }, { "epoch": 0.49, "grad_norm": 1.7320146560668945, "learning_rate": 1.0914522900515535e-05, "loss": 0.9686, "step": 8487 }, { "epoch": 0.49, "grad_norm": 1.6843491792678833, "learning_rate": 1.0912673057594574e-05, "loss": 0.9572, "step": 8488 }, { "epoch": 0.49, "grad_norm": 1.8031753301620483, "learning_rate": 1.0910823183179818e-05, "loss": 0.9828, "step": 8489 }, { "epoch": 0.49, "grad_norm": 1.805920124053955, "learning_rate": 1.0908973277335097e-05, "loss": 1.0482, "step": 8490 }, { "epoch": 0.49, "grad_norm": 1.8119913339614868, "learning_rate": 1.0907123340124248e-05, "loss": 0.9473, "step": 8491 }, { "epoch": 0.49, "grad_norm": 1.8013060092926025, "learning_rate": 1.0905273371611104e-05, "loss": 1.0078, "step": 8492 }, { "epoch": 0.49, "grad_norm": 1.6332967281341553, "learning_rate": 1.090342337185951e-05, "loss": 0.9601, "step": 8493 }, { "epoch": 0.49, "grad_norm": 1.704505205154419, "learning_rate": 1.0901573340933296e-05, "loss": 0.9334, "step": 8494 }, { "epoch": 0.49, "grad_norm": 1.7328894138336182, "learning_rate": 1.0899723278896304e-05, "loss": 0.9862, "step": 8495 }, { "epoch": 0.49, "grad_norm": 1.8468217849731445, "learning_rate": 1.0897873185812375e-05, "loss": 0.9961, "step": 8496 }, { "epoch": 0.49, "grad_norm": 1.6798930168151855, "learning_rate": 1.089602306174535e-05, "loss": 1.0261, "step": 8497 }, { "epoch": 0.49, "grad_norm": 1.6860201358795166, "learning_rate": 1.089417290675907e-05, "loss": 0.9836, "step": 8498 }, { "epoch": 0.49, "grad_norm": 1.8676429986953735, "learning_rate": 1.0892322720917386e-05, "loss": 1.0077, "step": 8499 }, { "epoch": 0.49, "grad_norm": 1.8589872121810913, "learning_rate": 1.0890472504284133e-05, "loss": 1.0459, "step": 8500 }, { "epoch": 0.49, "grad_norm": 1.7926000356674194, "learning_rate": 1.0888622256923165e-05, "loss": 1.01, "step": 8501 }, { "epoch": 0.49, "grad_norm": 1.8592532873153687, "learning_rate": 1.0886771978898321e-05, "loss": 1.0103, "step": 8502 }, { "epoch": 0.49, "grad_norm": 0.9396649599075317, "learning_rate": 1.0884921670273453e-05, "loss": 0.532, "step": 8503 }, { "epoch": 0.49, "grad_norm": 1.9368165731430054, "learning_rate": 1.0883071331112416e-05, "loss": 0.9072, "step": 8504 }, { "epoch": 0.49, "grad_norm": 2.020904064178467, "learning_rate": 1.0881220961479052e-05, "loss": 0.9232, "step": 8505 }, { "epoch": 0.49, "grad_norm": 1.7988852262496948, "learning_rate": 1.0879370561437211e-05, "loss": 0.9263, "step": 8506 }, { "epoch": 0.49, "grad_norm": 1.6805570125579834, "learning_rate": 1.0877520131050749e-05, "loss": 0.9777, "step": 8507 }, { "epoch": 0.49, "grad_norm": 1.5848116874694824, "learning_rate": 1.0875669670383521e-05, "loss": 0.8678, "step": 8508 }, { "epoch": 0.49, "grad_norm": 1.7621204853057861, "learning_rate": 1.0873819179499378e-05, "loss": 0.8752, "step": 8509 }, { "epoch": 0.49, "grad_norm": 1.67289400100708, "learning_rate": 1.0871968658462176e-05, "loss": 0.9045, "step": 8510 }, { "epoch": 0.49, "grad_norm": 1.8348678350448608, "learning_rate": 1.0870118107335772e-05, "loss": 0.9645, "step": 8511 }, { "epoch": 0.49, "grad_norm": 2.053372383117676, "learning_rate": 1.086826752618402e-05, "loss": 1.0494, "step": 8512 }, { "epoch": 0.49, "grad_norm": 1.7624744176864624, "learning_rate": 1.0866416915070781e-05, "loss": 1.0051, "step": 8513 }, { "epoch": 0.49, "grad_norm": 1.7851784229278564, "learning_rate": 1.0864566274059919e-05, "loss": 0.9694, "step": 8514 }, { "epoch": 0.49, "grad_norm": 1.9425764083862305, "learning_rate": 1.0862715603215285e-05, "loss": 0.9749, "step": 8515 }, { "epoch": 0.49, "grad_norm": 1.8974881172180176, "learning_rate": 1.0860864902600748e-05, "loss": 1.0146, "step": 8516 }, { "epoch": 0.49, "grad_norm": 1.948978066444397, "learning_rate": 1.085901417228017e-05, "loss": 0.9767, "step": 8517 }, { "epoch": 0.49, "grad_norm": 1.036729097366333, "learning_rate": 1.085716341231741e-05, "loss": 0.5687, "step": 8518 }, { "epoch": 0.49, "grad_norm": 1.75920569896698, "learning_rate": 1.0855312622776333e-05, "loss": 0.9728, "step": 8519 }, { "epoch": 0.49, "grad_norm": 1.692769169807434, "learning_rate": 1.0853461803720809e-05, "loss": 0.9564, "step": 8520 }, { "epoch": 0.49, "grad_norm": 1.9231067895889282, "learning_rate": 1.0851610955214701e-05, "loss": 0.9771, "step": 8521 }, { "epoch": 0.49, "grad_norm": 1.7166630029678345, "learning_rate": 1.084976007732188e-05, "loss": 0.8731, "step": 8522 }, { "epoch": 0.49, "grad_norm": 1.053574562072754, "learning_rate": 1.0847909170106213e-05, "loss": 0.6437, "step": 8523 }, { "epoch": 0.49, "grad_norm": 1.8035495281219482, "learning_rate": 1.0846058233631565e-05, "loss": 1.0652, "step": 8524 }, { "epoch": 0.49, "grad_norm": 1.7434369325637817, "learning_rate": 1.0844207267961813e-05, "loss": 1.0059, "step": 8525 }, { "epoch": 0.49, "grad_norm": 0.9722028374671936, "learning_rate": 1.0842356273160825e-05, "loss": 0.5816, "step": 8526 }, { "epoch": 0.49, "grad_norm": 1.784459114074707, "learning_rate": 1.0840505249292477e-05, "loss": 0.9321, "step": 8527 }, { "epoch": 0.49, "grad_norm": 1.665932536125183, "learning_rate": 1.083865419642064e-05, "loss": 1.0443, "step": 8528 }, { "epoch": 0.49, "grad_norm": 1.8101881742477417, "learning_rate": 1.083680311460919e-05, "loss": 0.9172, "step": 8529 }, { "epoch": 0.49, "grad_norm": 1.843503713607788, "learning_rate": 1.0834952003922e-05, "loss": 0.9846, "step": 8530 }, { "epoch": 0.49, "grad_norm": 2.0145201683044434, "learning_rate": 1.0833100864422952e-05, "loss": 0.9449, "step": 8531 }, { "epoch": 0.49, "grad_norm": 1.7863091230392456, "learning_rate": 1.0831249696175918e-05, "loss": 0.9597, "step": 8532 }, { "epoch": 0.49, "grad_norm": 1.0934700965881348, "learning_rate": 1.0829398499244781e-05, "loss": 0.6095, "step": 8533 }, { "epoch": 0.49, "grad_norm": 1.7591910362243652, "learning_rate": 1.0827547273693418e-05, "loss": 0.906, "step": 8534 }, { "epoch": 0.49, "grad_norm": 1.6965126991271973, "learning_rate": 1.0825696019585705e-05, "loss": 0.9477, "step": 8535 }, { "epoch": 0.49, "grad_norm": 1.7844785451889038, "learning_rate": 1.0823844736985534e-05, "loss": 0.9171, "step": 8536 }, { "epoch": 0.49, "grad_norm": 1.69741952419281, "learning_rate": 1.0821993425956782e-05, "loss": 1.0158, "step": 8537 }, { "epoch": 0.49, "grad_norm": 1.942112922668457, "learning_rate": 1.0820142086563331e-05, "loss": 0.9425, "step": 8538 }, { "epoch": 0.49, "grad_norm": 1.5140103101730347, "learning_rate": 1.0818290718869068e-05, "loss": 0.89, "step": 8539 }, { "epoch": 0.49, "grad_norm": 1.6266496181488037, "learning_rate": 1.081643932293788e-05, "loss": 0.9271, "step": 8540 }, { "epoch": 0.49, "grad_norm": 1.7963783740997314, "learning_rate": 1.0814587898833651e-05, "loss": 0.9418, "step": 8541 }, { "epoch": 0.49, "grad_norm": 1.9350452423095703, "learning_rate": 1.0812736446620269e-05, "loss": 0.933, "step": 8542 }, { "epoch": 0.49, "grad_norm": 1.8321863412857056, "learning_rate": 1.0810884966361624e-05, "loss": 0.9576, "step": 8543 }, { "epoch": 0.49, "grad_norm": 1.7120146751403809, "learning_rate": 1.0809033458121603e-05, "loss": 0.9496, "step": 8544 }, { "epoch": 0.49, "grad_norm": 1.9368250370025635, "learning_rate": 1.0807181921964096e-05, "loss": 0.9965, "step": 8545 }, { "epoch": 0.49, "grad_norm": 1.7208921909332275, "learning_rate": 1.0805330357952996e-05, "loss": 0.937, "step": 8546 }, { "epoch": 0.49, "grad_norm": 1.5980980396270752, "learning_rate": 1.0803478766152196e-05, "loss": 0.9018, "step": 8547 }, { "epoch": 0.49, "grad_norm": 1.7228633165359497, "learning_rate": 1.0801627146625588e-05, "loss": 0.9324, "step": 8548 }, { "epoch": 0.49, "grad_norm": 1.8946014642715454, "learning_rate": 1.0799775499437066e-05, "loss": 1.0265, "step": 8549 }, { "epoch": 0.49, "grad_norm": 1.8867732286453247, "learning_rate": 1.0797923824650525e-05, "loss": 0.9614, "step": 8550 }, { "epoch": 0.49, "grad_norm": 1.8067049980163574, "learning_rate": 1.0796072122329862e-05, "loss": 1.0661, "step": 8551 }, { "epoch": 0.49, "grad_norm": 1.8514765501022339, "learning_rate": 1.0794220392538972e-05, "loss": 0.9821, "step": 8552 }, { "epoch": 0.49, "grad_norm": 1.7458326816558838, "learning_rate": 1.079236863534176e-05, "loss": 0.9568, "step": 8553 }, { "epoch": 0.49, "grad_norm": 1.7729965448379517, "learning_rate": 1.0790516850802115e-05, "loss": 1.0427, "step": 8554 }, { "epoch": 0.49, "grad_norm": 1.8417407274246216, "learning_rate": 1.0788665038983942e-05, "loss": 1.0374, "step": 8555 }, { "epoch": 0.49, "grad_norm": 1.7935092449188232, "learning_rate": 1.0786813199951145e-05, "loss": 0.8952, "step": 8556 }, { "epoch": 0.49, "grad_norm": 1.9512757062911987, "learning_rate": 1.0784961333767621e-05, "loss": 0.9476, "step": 8557 }, { "epoch": 0.49, "grad_norm": 1.6605255603790283, "learning_rate": 1.0783109440497273e-05, "loss": 1.0046, "step": 8558 }, { "epoch": 0.49, "grad_norm": 1.7901972532272339, "learning_rate": 1.0781257520204004e-05, "loss": 1.0459, "step": 8559 }, { "epoch": 0.49, "grad_norm": 1.9524850845336914, "learning_rate": 1.0779405572951724e-05, "loss": 1.0617, "step": 8560 }, { "epoch": 0.49, "grad_norm": 1.7421863079071045, "learning_rate": 1.0777553598804333e-05, "loss": 1.0059, "step": 8561 }, { "epoch": 0.49, "grad_norm": 1.7468451261520386, "learning_rate": 1.0775701597825741e-05, "loss": 0.9311, "step": 8562 }, { "epoch": 0.49, "grad_norm": 0.9935927391052246, "learning_rate": 1.077384957007985e-05, "loss": 0.5852, "step": 8563 }, { "epoch": 0.49, "grad_norm": 1.6447386741638184, "learning_rate": 1.0771997515630574e-05, "loss": 0.9573, "step": 8564 }, { "epoch": 0.49, "grad_norm": 1.7000644207000732, "learning_rate": 1.077014543454182e-05, "loss": 0.9478, "step": 8565 }, { "epoch": 0.49, "grad_norm": 1.5797619819641113, "learning_rate": 1.07682933268775e-05, "loss": 1.0009, "step": 8566 }, { "epoch": 0.49, "grad_norm": 1.6906930208206177, "learning_rate": 1.0766441192701521e-05, "loss": 0.9448, "step": 8567 }, { "epoch": 0.49, "grad_norm": 1.6575133800506592, "learning_rate": 1.0764589032077799e-05, "loss": 0.9115, "step": 8568 }, { "epoch": 0.49, "grad_norm": 1.831507921218872, "learning_rate": 1.0762736845070244e-05, "loss": 0.9995, "step": 8569 }, { "epoch": 0.49, "grad_norm": 1.8337693214416504, "learning_rate": 1.0760884631742771e-05, "loss": 0.9545, "step": 8570 }, { "epoch": 0.49, "grad_norm": 1.821090817451477, "learning_rate": 1.0759032392159296e-05, "loss": 0.9029, "step": 8571 }, { "epoch": 0.49, "grad_norm": 1.9774340391159058, "learning_rate": 1.0757180126383736e-05, "loss": 1.0274, "step": 8572 }, { "epoch": 0.49, "grad_norm": 1.7982439994812012, "learning_rate": 1.0755327834480001e-05, "loss": 0.9105, "step": 8573 }, { "epoch": 0.49, "grad_norm": 1.7567111253738403, "learning_rate": 1.0753475516512015e-05, "loss": 0.9358, "step": 8574 }, { "epoch": 0.49, "grad_norm": 1.771444320678711, "learning_rate": 1.0751623172543693e-05, "loss": 1.0675, "step": 8575 }, { "epoch": 0.49, "grad_norm": 1.5541614294052124, "learning_rate": 1.0749770802638952e-05, "loss": 1.0017, "step": 8576 }, { "epoch": 0.49, "grad_norm": 1.624999761581421, "learning_rate": 1.0747918406861719e-05, "loss": 0.893, "step": 8577 }, { "epoch": 0.49, "grad_norm": 1.8379334211349487, "learning_rate": 1.0746065985275907e-05, "loss": 1.0735, "step": 8578 }, { "epoch": 0.49, "grad_norm": 1.7358770370483398, "learning_rate": 1.0744213537945444e-05, "loss": 0.9421, "step": 8579 }, { "epoch": 0.49, "grad_norm": 1.5639175176620483, "learning_rate": 1.074236106493425e-05, "loss": 0.9337, "step": 8580 }, { "epoch": 0.49, "grad_norm": 1.010817050933838, "learning_rate": 1.074050856630625e-05, "loss": 0.5791, "step": 8581 }, { "epoch": 0.49, "grad_norm": 2.046964406967163, "learning_rate": 1.0738656042125368e-05, "loss": 0.8862, "step": 8582 }, { "epoch": 0.49, "grad_norm": 1.8380920886993408, "learning_rate": 1.073680349245553e-05, "loss": 1.0147, "step": 8583 }, { "epoch": 0.49, "grad_norm": 1.7033231258392334, "learning_rate": 1.0734950917360663e-05, "loss": 0.9233, "step": 8584 }, { "epoch": 0.49, "grad_norm": 1.7934552431106567, "learning_rate": 1.073309831690469e-05, "loss": 1.034, "step": 8585 }, { "epoch": 0.49, "grad_norm": 1.7326147556304932, "learning_rate": 1.0731245691151544e-05, "loss": 0.9815, "step": 8586 }, { "epoch": 0.49, "grad_norm": 1.6948521137237549, "learning_rate": 1.072939304016515e-05, "loss": 0.9377, "step": 8587 }, { "epoch": 0.49, "grad_norm": 1.6737289428710938, "learning_rate": 1.072754036400944e-05, "loss": 0.9791, "step": 8588 }, { "epoch": 0.49, "grad_norm": 1.917029857635498, "learning_rate": 1.0725687662748345e-05, "loss": 1.0345, "step": 8589 }, { "epoch": 0.49, "grad_norm": 1.5554600954055786, "learning_rate": 1.0723834936445795e-05, "loss": 0.946, "step": 8590 }, { "epoch": 0.49, "grad_norm": 1.8516888618469238, "learning_rate": 1.0721982185165723e-05, "loss": 0.9756, "step": 8591 }, { "epoch": 0.49, "grad_norm": 1.6908961534500122, "learning_rate": 1.0720129408972063e-05, "loss": 0.9606, "step": 8592 }, { "epoch": 0.49, "grad_norm": 1.7380443811416626, "learning_rate": 1.0718276607928751e-05, "loss": 0.8969, "step": 8593 }, { "epoch": 0.49, "grad_norm": 1.7556151151657104, "learning_rate": 1.0716423782099716e-05, "loss": 0.9836, "step": 8594 }, { "epoch": 0.49, "grad_norm": 1.11409330368042, "learning_rate": 1.07145709315489e-05, "loss": 0.6102, "step": 8595 }, { "epoch": 0.49, "grad_norm": 1.0003728866577148, "learning_rate": 1.0712718056340236e-05, "loss": 0.5407, "step": 8596 }, { "epoch": 0.49, "grad_norm": 2.0980660915374756, "learning_rate": 1.0710865156537664e-05, "loss": 0.9516, "step": 8597 }, { "epoch": 0.49, "grad_norm": 1.6600666046142578, "learning_rate": 1.070901223220512e-05, "loss": 0.985, "step": 8598 }, { "epoch": 0.49, "grad_norm": 1.784712314605713, "learning_rate": 1.0707159283406546e-05, "loss": 0.9074, "step": 8599 }, { "epoch": 0.49, "grad_norm": 1.8646495342254639, "learning_rate": 1.0705306310205878e-05, "loss": 1.0179, "step": 8600 }, { "epoch": 0.49, "grad_norm": 1.7804954051971436, "learning_rate": 1.070345331266706e-05, "loss": 0.9179, "step": 8601 }, { "epoch": 0.49, "grad_norm": 1.810348629951477, "learning_rate": 1.0701600290854032e-05, "loss": 0.9391, "step": 8602 }, { "epoch": 0.49, "grad_norm": 1.7820229530334473, "learning_rate": 1.0699747244830742e-05, "loss": 0.9815, "step": 8603 }, { "epoch": 0.49, "grad_norm": 1.737383246421814, "learning_rate": 1.0697894174661128e-05, "loss": 1.0452, "step": 8604 }, { "epoch": 0.49, "grad_norm": 1.803390383720398, "learning_rate": 1.0696041080409132e-05, "loss": 1.0465, "step": 8605 }, { "epoch": 0.49, "grad_norm": 1.7518434524536133, "learning_rate": 1.0694187962138705e-05, "loss": 0.9799, "step": 8606 }, { "epoch": 0.49, "grad_norm": 1.9417475461959839, "learning_rate": 1.069233481991379e-05, "loss": 0.9524, "step": 8607 }, { "epoch": 0.49, "grad_norm": 1.729644775390625, "learning_rate": 1.0690481653798337e-05, "loss": 0.9223, "step": 8608 }, { "epoch": 0.49, "grad_norm": 1.8701280355453491, "learning_rate": 1.0688628463856287e-05, "loss": 1.0167, "step": 8609 }, { "epoch": 0.49, "grad_norm": 1.5511900186538696, "learning_rate": 1.0686775250151595e-05, "loss": 0.9707, "step": 8610 }, { "epoch": 0.49, "grad_norm": 1.8549238443374634, "learning_rate": 1.0684922012748207e-05, "loss": 1.0297, "step": 8611 }, { "epoch": 0.49, "grad_norm": 1.6823989152908325, "learning_rate": 1.0683068751710075e-05, "loss": 0.9275, "step": 8612 }, { "epoch": 0.49, "grad_norm": 1.819419264793396, "learning_rate": 1.0681215467101147e-05, "loss": 1.0072, "step": 8613 }, { "epoch": 0.49, "grad_norm": 1.7211225032806396, "learning_rate": 1.0679362158985376e-05, "loss": 0.9739, "step": 8614 }, { "epoch": 0.49, "grad_norm": 1.9419435262680054, "learning_rate": 1.0677508827426715e-05, "loss": 1.0042, "step": 8615 }, { "epoch": 0.49, "grad_norm": 1.0536625385284424, "learning_rate": 1.0675655472489117e-05, "loss": 0.6108, "step": 8616 }, { "epoch": 0.49, "grad_norm": 1.92514967918396, "learning_rate": 1.0673802094236532e-05, "loss": 0.9804, "step": 8617 }, { "epoch": 0.49, "grad_norm": 1.9182807207107544, "learning_rate": 1.0671948692732923e-05, "loss": 0.9956, "step": 8618 }, { "epoch": 0.49, "grad_norm": 0.9738763570785522, "learning_rate": 1.0670095268042242e-05, "loss": 0.6096, "step": 8619 }, { "epoch": 0.49, "grad_norm": 1.7380576133728027, "learning_rate": 1.0668241820228445e-05, "loss": 0.9563, "step": 8620 }, { "epoch": 0.49, "grad_norm": 1.790464997291565, "learning_rate": 1.0666388349355487e-05, "loss": 1.0334, "step": 8621 }, { "epoch": 0.49, "grad_norm": 1.8091809749603271, "learning_rate": 1.0664534855487331e-05, "loss": 1.034, "step": 8622 }, { "epoch": 0.49, "grad_norm": 1.6292237043380737, "learning_rate": 1.0662681338687932e-05, "loss": 1.0229, "step": 8623 }, { "epoch": 0.49, "grad_norm": 1.8130451440811157, "learning_rate": 1.0660827799021253e-05, "loss": 0.9698, "step": 8624 }, { "epoch": 0.49, "grad_norm": 1.8048807382583618, "learning_rate": 1.0658974236551252e-05, "loss": 1.0049, "step": 8625 }, { "epoch": 0.49, "grad_norm": 1.9106788635253906, "learning_rate": 1.065712065134189e-05, "loss": 0.9699, "step": 8626 }, { "epoch": 0.49, "grad_norm": 1.6256356239318848, "learning_rate": 1.065526704345713e-05, "loss": 0.9665, "step": 8627 }, { "epoch": 0.49, "grad_norm": 1.7158743143081665, "learning_rate": 1.0653413412960936e-05, "loss": 0.9371, "step": 8628 }, { "epoch": 0.49, "grad_norm": 1.830836296081543, "learning_rate": 1.0651559759917266e-05, "loss": 0.998, "step": 8629 }, { "epoch": 0.49, "grad_norm": 2.944166660308838, "learning_rate": 1.0649706084390093e-05, "loss": 0.9869, "step": 8630 }, { "epoch": 0.5, "grad_norm": 1.752475380897522, "learning_rate": 1.0647852386443375e-05, "loss": 0.9472, "step": 8631 }, { "epoch": 0.5, "grad_norm": 1.762967586517334, "learning_rate": 1.0645998666141085e-05, "loss": 0.9819, "step": 8632 }, { "epoch": 0.5, "grad_norm": 1.8161859512329102, "learning_rate": 1.0644144923547184e-05, "loss": 0.8903, "step": 8633 }, { "epoch": 0.5, "grad_norm": 1.8336495161056519, "learning_rate": 1.0642291158725638e-05, "loss": 0.9357, "step": 8634 }, { "epoch": 0.5, "grad_norm": 1.7623893022537231, "learning_rate": 1.064043737174042e-05, "loss": 0.9326, "step": 8635 }, { "epoch": 0.5, "grad_norm": 1.7667280435562134, "learning_rate": 1.0638583562655498e-05, "loss": 1.0291, "step": 8636 }, { "epoch": 0.5, "grad_norm": 1.6274546384811401, "learning_rate": 1.063672973153484e-05, "loss": 0.9588, "step": 8637 }, { "epoch": 0.5, "grad_norm": 1.7482718229293823, "learning_rate": 1.0634875878442422e-05, "loss": 1.0384, "step": 8638 }, { "epoch": 0.5, "grad_norm": 1.7935895919799805, "learning_rate": 1.0633022003442206e-05, "loss": 0.9079, "step": 8639 }, { "epoch": 0.5, "grad_norm": 1.688215732574463, "learning_rate": 1.0631168106598171e-05, "loss": 1.0355, "step": 8640 }, { "epoch": 0.5, "grad_norm": 1.7240748405456543, "learning_rate": 1.0629314187974287e-05, "loss": 1.0004, "step": 8641 }, { "epoch": 0.5, "grad_norm": 1.7775052785873413, "learning_rate": 1.0627460247634529e-05, "loss": 1.0444, "step": 8642 }, { "epoch": 0.5, "grad_norm": 1.7331091165542603, "learning_rate": 1.062560628564287e-05, "loss": 1.0176, "step": 8643 }, { "epoch": 0.5, "grad_norm": 1.6832427978515625, "learning_rate": 1.0623752302063284e-05, "loss": 0.9782, "step": 8644 }, { "epoch": 0.5, "grad_norm": 1.744986653327942, "learning_rate": 1.062189829695975e-05, "loss": 0.9356, "step": 8645 }, { "epoch": 0.5, "grad_norm": 1.8521672487258911, "learning_rate": 1.0620044270396244e-05, "loss": 0.8572, "step": 8646 }, { "epoch": 0.5, "grad_norm": 1.6382694244384766, "learning_rate": 1.0618190222436741e-05, "loss": 0.9336, "step": 8647 }, { "epoch": 0.5, "grad_norm": 1.0543371438980103, "learning_rate": 1.0616336153145221e-05, "loss": 0.6145, "step": 8648 }, { "epoch": 0.5, "grad_norm": 1.0356029272079468, "learning_rate": 1.061448206258566e-05, "loss": 0.5438, "step": 8649 }, { "epoch": 0.5, "grad_norm": 1.735732078552246, "learning_rate": 1.0612627950822044e-05, "loss": 0.9816, "step": 8650 }, { "epoch": 0.5, "grad_norm": 1.0616755485534668, "learning_rate": 1.0610773817918346e-05, "loss": 0.6212, "step": 8651 }, { "epoch": 0.5, "grad_norm": 1.567923665046692, "learning_rate": 1.0608919663938549e-05, "loss": 0.9941, "step": 8652 }, { "epoch": 0.5, "grad_norm": 1.8313571214675903, "learning_rate": 1.0607065488946635e-05, "loss": 0.8924, "step": 8653 }, { "epoch": 0.5, "grad_norm": 1.8070112466812134, "learning_rate": 1.0605211293006587e-05, "loss": 1.0449, "step": 8654 }, { "epoch": 0.5, "grad_norm": 1.7310975790023804, "learning_rate": 1.060335707618239e-05, "loss": 0.9425, "step": 8655 }, { "epoch": 0.5, "grad_norm": 1.725441813468933, "learning_rate": 1.0601502838538022e-05, "loss": 0.8793, "step": 8656 }, { "epoch": 0.5, "grad_norm": 1.7714563608169556, "learning_rate": 1.0599648580137474e-05, "loss": 0.9703, "step": 8657 }, { "epoch": 0.5, "grad_norm": 1.7605868577957153, "learning_rate": 1.0597794301044728e-05, "loss": 1.0535, "step": 8658 }, { "epoch": 0.5, "grad_norm": 1.8259702920913696, "learning_rate": 1.0595940001323771e-05, "loss": 1.0009, "step": 8659 }, { "epoch": 0.5, "grad_norm": 1.6600556373596191, "learning_rate": 1.0594085681038589e-05, "loss": 0.941, "step": 8660 }, { "epoch": 0.5, "grad_norm": 1.7088278532028198, "learning_rate": 1.059223134025317e-05, "loss": 0.9976, "step": 8661 }, { "epoch": 0.5, "grad_norm": 1.7935503721237183, "learning_rate": 1.0590376979031501e-05, "loss": 0.9267, "step": 8662 }, { "epoch": 0.5, "grad_norm": 1.655744194984436, "learning_rate": 1.0588522597437571e-05, "loss": 1.0564, "step": 8663 }, { "epoch": 0.5, "grad_norm": 1.650330901145935, "learning_rate": 1.0586668195535373e-05, "loss": 0.9354, "step": 8664 }, { "epoch": 0.5, "grad_norm": 1.7604529857635498, "learning_rate": 1.058481377338889e-05, "loss": 0.9867, "step": 8665 }, { "epoch": 0.5, "grad_norm": 1.0636042356491089, "learning_rate": 1.058295933106212e-05, "loss": 0.5901, "step": 8666 }, { "epoch": 0.5, "grad_norm": 1.598160743713379, "learning_rate": 1.058110486861905e-05, "loss": 0.9588, "step": 8667 }, { "epoch": 0.5, "grad_norm": 1.8154085874557495, "learning_rate": 1.0579250386123676e-05, "loss": 0.9962, "step": 8668 }, { "epoch": 0.5, "grad_norm": 1.0499058961868286, "learning_rate": 1.057739588363999e-05, "loss": 0.5989, "step": 8669 }, { "epoch": 0.5, "grad_norm": 1.7893593311309814, "learning_rate": 1.0575541361231984e-05, "loss": 0.9857, "step": 8670 }, { "epoch": 0.5, "grad_norm": 0.9737614989280701, "learning_rate": 1.0573686818963651e-05, "loss": 0.5495, "step": 8671 }, { "epoch": 0.5, "grad_norm": 1.6470822095870972, "learning_rate": 1.0571832256898991e-05, "loss": 0.8776, "step": 8672 }, { "epoch": 0.5, "grad_norm": 1.8030527830123901, "learning_rate": 1.0569977675101997e-05, "loss": 1.0023, "step": 8673 }, { "epoch": 0.5, "grad_norm": 1.705561637878418, "learning_rate": 1.0568123073636666e-05, "loss": 0.9378, "step": 8674 }, { "epoch": 0.5, "grad_norm": 1.7909302711486816, "learning_rate": 1.0566268452566995e-05, "loss": 0.9221, "step": 8675 }, { "epoch": 0.5, "grad_norm": 1.9130544662475586, "learning_rate": 1.056441381195698e-05, "loss": 0.9481, "step": 8676 }, { "epoch": 0.5, "grad_norm": 1.7799227237701416, "learning_rate": 1.0562559151870621e-05, "loss": 0.9503, "step": 8677 }, { "epoch": 0.5, "grad_norm": 1.95915687084198, "learning_rate": 1.0560704472371919e-05, "loss": 0.9859, "step": 8678 }, { "epoch": 0.5, "grad_norm": 1.8908278942108154, "learning_rate": 1.055884977352487e-05, "loss": 0.9141, "step": 8679 }, { "epoch": 0.5, "grad_norm": 1.6821506023406982, "learning_rate": 1.055699505539348e-05, "loss": 0.9237, "step": 8680 }, { "epoch": 0.5, "grad_norm": 1.8918871879577637, "learning_rate": 1.0555140318041743e-05, "loss": 0.9715, "step": 8681 }, { "epoch": 0.5, "grad_norm": 1.9311010837554932, "learning_rate": 1.0553285561533664e-05, "loss": 0.9606, "step": 8682 }, { "epoch": 0.5, "grad_norm": 1.8428478240966797, "learning_rate": 1.0551430785933246e-05, "loss": 0.9243, "step": 8683 }, { "epoch": 0.5, "grad_norm": 1.9872162342071533, "learning_rate": 1.0549575991304493e-05, "loss": 1.0045, "step": 8684 }, { "epoch": 0.5, "grad_norm": 1.168229341506958, "learning_rate": 1.0547721177711407e-05, "loss": 0.5894, "step": 8685 }, { "epoch": 0.5, "grad_norm": 1.6577624082565308, "learning_rate": 1.0545866345217994e-05, "loss": 0.9368, "step": 8686 }, { "epoch": 0.5, "grad_norm": 1.8130950927734375, "learning_rate": 1.0544011493888258e-05, "loss": 0.9924, "step": 8687 }, { "epoch": 0.5, "grad_norm": 1.862364411354065, "learning_rate": 1.0542156623786206e-05, "loss": 1.0334, "step": 8688 }, { "epoch": 0.5, "grad_norm": 1.1388707160949707, "learning_rate": 1.0540301734975842e-05, "loss": 0.607, "step": 8689 }, { "epoch": 0.5, "grad_norm": 0.9884395599365234, "learning_rate": 1.0538446827521174e-05, "loss": 0.5887, "step": 8690 }, { "epoch": 0.5, "grad_norm": 1.850339412689209, "learning_rate": 1.053659190148621e-05, "loss": 1.0648, "step": 8691 }, { "epoch": 0.5, "grad_norm": 1.8652937412261963, "learning_rate": 1.0534736956934962e-05, "loss": 0.9676, "step": 8692 }, { "epoch": 0.5, "grad_norm": 1.992677927017212, "learning_rate": 1.0532881993931432e-05, "loss": 1.02, "step": 8693 }, { "epoch": 0.5, "grad_norm": 1.7602018117904663, "learning_rate": 1.0531027012539632e-05, "loss": 0.9478, "step": 8694 }, { "epoch": 0.5, "grad_norm": 1.908563494682312, "learning_rate": 1.0529172012823575e-05, "loss": 0.9808, "step": 8695 }, { "epoch": 0.5, "grad_norm": 1.672378420829773, "learning_rate": 1.0527316994847268e-05, "loss": 0.9865, "step": 8696 }, { "epoch": 0.5, "grad_norm": 1.615802526473999, "learning_rate": 1.0525461958674725e-05, "loss": 1.0088, "step": 8697 }, { "epoch": 0.5, "grad_norm": 2.0042924880981445, "learning_rate": 1.0523606904369961e-05, "loss": 1.0518, "step": 8698 }, { "epoch": 0.5, "grad_norm": 1.7680364847183228, "learning_rate": 1.0521751831996983e-05, "loss": 0.9802, "step": 8699 }, { "epoch": 0.5, "grad_norm": 1.9430030584335327, "learning_rate": 1.0519896741619803e-05, "loss": 0.9559, "step": 8700 }, { "epoch": 0.5, "grad_norm": 1.7214672565460205, "learning_rate": 1.0518041633302442e-05, "loss": 1.0449, "step": 8701 }, { "epoch": 0.5, "grad_norm": 1.128002405166626, "learning_rate": 1.0516186507108915e-05, "loss": 0.6241, "step": 8702 }, { "epoch": 0.5, "grad_norm": 2.013005018234253, "learning_rate": 1.051433136310323e-05, "loss": 0.9865, "step": 8703 }, { "epoch": 0.5, "grad_norm": 1.8287417888641357, "learning_rate": 1.0512476201349407e-05, "loss": 0.9167, "step": 8704 }, { "epoch": 0.5, "grad_norm": 1.8643275499343872, "learning_rate": 1.051062102191146e-05, "loss": 0.9495, "step": 8705 }, { "epoch": 0.5, "grad_norm": 1.7194923162460327, "learning_rate": 1.0508765824853411e-05, "loss": 1.0212, "step": 8706 }, { "epoch": 0.5, "grad_norm": 1.0926965475082397, "learning_rate": 1.0506910610239274e-05, "loss": 0.5821, "step": 8707 }, { "epoch": 0.5, "grad_norm": 1.8850185871124268, "learning_rate": 1.0505055378133067e-05, "loss": 1.1141, "step": 8708 }, { "epoch": 0.5, "grad_norm": 1.7096799612045288, "learning_rate": 1.050320012859881e-05, "loss": 0.9563, "step": 8709 }, { "epoch": 0.5, "grad_norm": 2.082404613494873, "learning_rate": 1.0501344861700518e-05, "loss": 0.9406, "step": 8710 }, { "epoch": 0.5, "grad_norm": 1.7201992273330688, "learning_rate": 1.049948957750222e-05, "loss": 0.9683, "step": 8711 }, { "epoch": 0.5, "grad_norm": 1.7442508935928345, "learning_rate": 1.0497634276067932e-05, "loss": 0.9851, "step": 8712 }, { "epoch": 0.5, "grad_norm": 1.8243485689163208, "learning_rate": 1.0495778957461673e-05, "loss": 0.9937, "step": 8713 }, { "epoch": 0.5, "grad_norm": 1.6692544221878052, "learning_rate": 1.0493923621747468e-05, "loss": 0.9115, "step": 8714 }, { "epoch": 0.5, "grad_norm": 1.888960361480713, "learning_rate": 1.0492068268989339e-05, "loss": 1.0181, "step": 8715 }, { "epoch": 0.5, "grad_norm": 1.6490732431411743, "learning_rate": 1.0490212899251308e-05, "loss": 1.0199, "step": 8716 }, { "epoch": 0.5, "grad_norm": 1.7625213861465454, "learning_rate": 1.0488357512597402e-05, "loss": 1.017, "step": 8717 }, { "epoch": 0.5, "grad_norm": 1.6850634813308716, "learning_rate": 1.0486502109091639e-05, "loss": 0.9494, "step": 8718 }, { "epoch": 0.5, "grad_norm": 1.9551935195922852, "learning_rate": 1.0484646688798049e-05, "loss": 0.9527, "step": 8719 }, { "epoch": 0.5, "grad_norm": 1.760522484779358, "learning_rate": 1.0482791251780655e-05, "loss": 0.9768, "step": 8720 }, { "epoch": 0.5, "grad_norm": 1.7895591259002686, "learning_rate": 1.0480935798103485e-05, "loss": 1.0364, "step": 8721 }, { "epoch": 0.5, "grad_norm": 1.7450265884399414, "learning_rate": 1.047908032783056e-05, "loss": 0.9664, "step": 8722 }, { "epoch": 0.5, "grad_norm": 1.7582743167877197, "learning_rate": 1.0477224841025912e-05, "loss": 0.9432, "step": 8723 }, { "epoch": 0.5, "grad_norm": 1.8529181480407715, "learning_rate": 1.047536933775357e-05, "loss": 0.9086, "step": 8724 }, { "epoch": 0.5, "grad_norm": 1.6644318103790283, "learning_rate": 1.047351381807756e-05, "loss": 0.9339, "step": 8725 }, { "epoch": 0.5, "grad_norm": 1.651681900024414, "learning_rate": 1.0471658282061909e-05, "loss": 1.0104, "step": 8726 }, { "epoch": 0.5, "grad_norm": 1.7770278453826904, "learning_rate": 1.0469802729770651e-05, "loss": 0.9328, "step": 8727 }, { "epoch": 0.5, "grad_norm": 1.6897521018981934, "learning_rate": 1.0467947161267811e-05, "loss": 0.9768, "step": 8728 }, { "epoch": 0.5, "grad_norm": 1.978057861328125, "learning_rate": 1.0466091576617423e-05, "loss": 0.9959, "step": 8729 }, { "epoch": 0.5, "grad_norm": 1.6739064455032349, "learning_rate": 1.0464235975883516e-05, "loss": 0.9397, "step": 8730 }, { "epoch": 0.5, "grad_norm": 1.8200616836547852, "learning_rate": 1.0462380359130122e-05, "loss": 0.9548, "step": 8731 }, { "epoch": 0.5, "grad_norm": 1.8289320468902588, "learning_rate": 1.0460524726421275e-05, "loss": 1.0325, "step": 8732 }, { "epoch": 0.5, "grad_norm": 1.6130201816558838, "learning_rate": 1.0458669077821002e-05, "loss": 0.9099, "step": 8733 }, { "epoch": 0.5, "grad_norm": 1.781553864479065, "learning_rate": 1.0456813413393342e-05, "loss": 0.9231, "step": 8734 }, { "epoch": 0.5, "grad_norm": 1.9842435121536255, "learning_rate": 1.0454957733202329e-05, "loss": 0.9439, "step": 8735 }, { "epoch": 0.5, "grad_norm": 1.614741563796997, "learning_rate": 1.0453102037311995e-05, "loss": 0.9931, "step": 8736 }, { "epoch": 0.5, "grad_norm": 1.9577792882919312, "learning_rate": 1.0451246325786373e-05, "loss": 0.9155, "step": 8737 }, { "epoch": 0.5, "grad_norm": 1.122384786605835, "learning_rate": 1.0449390598689504e-05, "loss": 0.6086, "step": 8738 }, { "epoch": 0.5, "grad_norm": 1.666574239730835, "learning_rate": 1.044753485608542e-05, "loss": 0.9682, "step": 8739 }, { "epoch": 0.5, "grad_norm": 1.9014045000076294, "learning_rate": 1.0445679098038158e-05, "loss": 0.9088, "step": 8740 }, { "epoch": 0.5, "grad_norm": 1.6517398357391357, "learning_rate": 1.0443823324611754e-05, "loss": 0.9677, "step": 8741 }, { "epoch": 0.5, "grad_norm": 1.8512718677520752, "learning_rate": 1.0441967535870248e-05, "loss": 0.9181, "step": 8742 }, { "epoch": 0.5, "grad_norm": 1.9636318683624268, "learning_rate": 1.0440111731877678e-05, "loss": 0.951, "step": 8743 }, { "epoch": 0.5, "grad_norm": 1.8172403573989868, "learning_rate": 1.043825591269808e-05, "loss": 0.9628, "step": 8744 }, { "epoch": 0.5, "grad_norm": 1.697481632232666, "learning_rate": 1.0436400078395497e-05, "loss": 0.9462, "step": 8745 }, { "epoch": 0.5, "grad_norm": 1.811568021774292, "learning_rate": 1.0434544229033964e-05, "loss": 1.0213, "step": 8746 }, { "epoch": 0.5, "grad_norm": 1.9566123485565186, "learning_rate": 1.0432688364677523e-05, "loss": 1.0229, "step": 8747 }, { "epoch": 0.5, "grad_norm": 1.0337415933609009, "learning_rate": 1.0430832485390217e-05, "loss": 0.5674, "step": 8748 }, { "epoch": 0.5, "grad_norm": 1.7874358892440796, "learning_rate": 1.0428976591236082e-05, "loss": 0.9048, "step": 8749 }, { "epoch": 0.5, "grad_norm": 1.9547349214553833, "learning_rate": 1.0427120682279166e-05, "loss": 0.9517, "step": 8750 }, { "epoch": 0.5, "grad_norm": 1.9897099733352661, "learning_rate": 1.0425264758583509e-05, "loss": 1.064, "step": 8751 }, { "epoch": 0.5, "grad_norm": 2.0182952880859375, "learning_rate": 1.0423408820213153e-05, "loss": 0.9427, "step": 8752 }, { "epoch": 0.5, "grad_norm": 1.8943119049072266, "learning_rate": 1.0421552867232141e-05, "loss": 0.9241, "step": 8753 }, { "epoch": 0.5, "grad_norm": 1.8008861541748047, "learning_rate": 1.0419696899704517e-05, "loss": 0.9365, "step": 8754 }, { "epoch": 0.5, "grad_norm": 1.5976773500442505, "learning_rate": 1.0417840917694324e-05, "loss": 0.9511, "step": 8755 }, { "epoch": 0.5, "grad_norm": 1.7022970914840698, "learning_rate": 1.041598492126561e-05, "loss": 1.0267, "step": 8756 }, { "epoch": 0.5, "grad_norm": 1.725212812423706, "learning_rate": 1.0414128910482417e-05, "loss": 0.957, "step": 8757 }, { "epoch": 0.5, "grad_norm": 1.7276993989944458, "learning_rate": 1.0412272885408793e-05, "loss": 0.9447, "step": 8758 }, { "epoch": 0.5, "grad_norm": 1.633353352546692, "learning_rate": 1.0410416846108783e-05, "loss": 0.9868, "step": 8759 }, { "epoch": 0.5, "grad_norm": 1.7715717554092407, "learning_rate": 1.0408560792646433e-05, "loss": 0.9975, "step": 8760 }, { "epoch": 0.5, "grad_norm": 1.9322177171707153, "learning_rate": 1.0406704725085792e-05, "loss": 0.9184, "step": 8761 }, { "epoch": 0.5, "grad_norm": 1.9761765003204346, "learning_rate": 1.0404848643490908e-05, "loss": 1.0402, "step": 8762 }, { "epoch": 0.5, "grad_norm": 1.7888035774230957, "learning_rate": 1.0402992547925827e-05, "loss": 0.9688, "step": 8763 }, { "epoch": 0.5, "grad_norm": 1.7115849256515503, "learning_rate": 1.04011364384546e-05, "loss": 0.9447, "step": 8764 }, { "epoch": 0.5, "grad_norm": 1.8671318292617798, "learning_rate": 1.0399280315141275e-05, "loss": 0.9366, "step": 8765 }, { "epoch": 0.5, "grad_norm": 1.0708019733428955, "learning_rate": 1.03974241780499e-05, "loss": 0.6696, "step": 8766 }, { "epoch": 0.5, "grad_norm": 1.826255440711975, "learning_rate": 1.0395568027244527e-05, "loss": 0.9416, "step": 8767 }, { "epoch": 0.5, "grad_norm": 1.5463672876358032, "learning_rate": 1.0393711862789209e-05, "loss": 0.9357, "step": 8768 }, { "epoch": 0.5, "grad_norm": 1.9841461181640625, "learning_rate": 1.039185568474799e-05, "loss": 1.0082, "step": 8769 }, { "epoch": 0.5, "grad_norm": 1.7973681688308716, "learning_rate": 1.038999949318493e-05, "loss": 0.9111, "step": 8770 }, { "epoch": 0.5, "grad_norm": 1.6945699453353882, "learning_rate": 1.0388143288164077e-05, "loss": 0.9624, "step": 8771 }, { "epoch": 0.5, "grad_norm": 1.7679191827774048, "learning_rate": 1.038628706974948e-05, "loss": 0.8842, "step": 8772 }, { "epoch": 0.5, "grad_norm": 1.7038698196411133, "learning_rate": 1.03844308380052e-05, "loss": 0.9593, "step": 8773 }, { "epoch": 0.5, "grad_norm": 1.725569248199463, "learning_rate": 1.0382574592995283e-05, "loss": 1.021, "step": 8774 }, { "epoch": 0.5, "grad_norm": 1.8513617515563965, "learning_rate": 1.0380718334783785e-05, "loss": 1.0249, "step": 8775 }, { "epoch": 0.5, "grad_norm": 1.7253975868225098, "learning_rate": 1.0378862063434757e-05, "loss": 0.982, "step": 8776 }, { "epoch": 0.5, "grad_norm": 1.7462536096572876, "learning_rate": 1.0377005779012264e-05, "loss": 0.976, "step": 8777 }, { "epoch": 0.5, "grad_norm": 1.8043029308319092, "learning_rate": 1.0375149481580352e-05, "loss": 0.9873, "step": 8778 }, { "epoch": 0.5, "grad_norm": 1.7627674341201782, "learning_rate": 1.037329317120308e-05, "loss": 1.0279, "step": 8779 }, { "epoch": 0.5, "grad_norm": 1.6667206287384033, "learning_rate": 1.0371436847944503e-05, "loss": 0.8473, "step": 8780 }, { "epoch": 0.5, "grad_norm": 2.0618886947631836, "learning_rate": 1.0369580511868678e-05, "loss": 1.0151, "step": 8781 }, { "epoch": 0.5, "grad_norm": 1.0162452459335327, "learning_rate": 1.0367724163039663e-05, "loss": 0.5428, "step": 8782 }, { "epoch": 0.5, "grad_norm": 1.601648211479187, "learning_rate": 1.0365867801521515e-05, "loss": 0.9997, "step": 8783 }, { "epoch": 0.5, "grad_norm": 1.6492953300476074, "learning_rate": 1.036401142737829e-05, "loss": 0.9617, "step": 8784 }, { "epoch": 0.5, "grad_norm": 1.7361164093017578, "learning_rate": 1.0362155040674045e-05, "loss": 1.0498, "step": 8785 }, { "epoch": 0.5, "grad_norm": 1.9997910261154175, "learning_rate": 1.0360298641472843e-05, "loss": 0.9897, "step": 8786 }, { "epoch": 0.5, "grad_norm": 1.9723209142684937, "learning_rate": 1.0358442229838742e-05, "loss": 0.9892, "step": 8787 }, { "epoch": 0.5, "grad_norm": 1.7785959243774414, "learning_rate": 1.0356585805835796e-05, "loss": 0.9454, "step": 8788 }, { "epoch": 0.5, "grad_norm": 1.896394968032837, "learning_rate": 1.0354729369528076e-05, "loss": 0.9076, "step": 8789 }, { "epoch": 0.5, "grad_norm": 1.7003003358840942, "learning_rate": 1.0352872920979636e-05, "loss": 0.9885, "step": 8790 }, { "epoch": 0.5, "grad_norm": 1.789709210395813, "learning_rate": 1.0351016460254536e-05, "loss": 0.9294, "step": 8791 }, { "epoch": 0.5, "grad_norm": 1.708404541015625, "learning_rate": 1.0349159987416837e-05, "loss": 0.9278, "step": 8792 }, { "epoch": 0.5, "grad_norm": 1.8927345275878906, "learning_rate": 1.0347303502530605e-05, "loss": 1.0119, "step": 8793 }, { "epoch": 0.5, "grad_norm": 1.6412158012390137, "learning_rate": 1.0345447005659897e-05, "loss": 0.8636, "step": 8794 }, { "epoch": 0.5, "grad_norm": 1.8670347929000854, "learning_rate": 1.0343590496868778e-05, "loss": 0.9322, "step": 8795 }, { "epoch": 0.5, "grad_norm": 1.6055206060409546, "learning_rate": 1.0341733976221313e-05, "loss": 0.8791, "step": 8796 }, { "epoch": 0.5, "grad_norm": 1.8486522436141968, "learning_rate": 1.033987744378156e-05, "loss": 0.9328, "step": 8797 }, { "epoch": 0.5, "grad_norm": 1.7057690620422363, "learning_rate": 1.0338020899613588e-05, "loss": 1.021, "step": 8798 }, { "epoch": 0.5, "grad_norm": 1.9186482429504395, "learning_rate": 1.0336164343781457e-05, "loss": 1.0249, "step": 8799 }, { "epoch": 0.5, "grad_norm": 1.8854731321334839, "learning_rate": 1.0334307776349235e-05, "loss": 0.9576, "step": 8800 }, { "epoch": 0.5, "grad_norm": 1.795735478401184, "learning_rate": 1.0332451197380987e-05, "loss": 0.9984, "step": 8801 }, { "epoch": 0.5, "grad_norm": 1.6469866037368774, "learning_rate": 1.0330594606940773e-05, "loss": 0.9222, "step": 8802 }, { "epoch": 0.5, "grad_norm": 1.6200100183486938, "learning_rate": 1.0328738005092662e-05, "loss": 0.9542, "step": 8803 }, { "epoch": 0.5, "grad_norm": 1.627124547958374, "learning_rate": 1.0326881391900726e-05, "loss": 0.9727, "step": 8804 }, { "epoch": 0.5, "grad_norm": 1.6503161191940308, "learning_rate": 1.0325024767429022e-05, "loss": 0.9748, "step": 8805 }, { "epoch": 0.51, "grad_norm": 1.7582732439041138, "learning_rate": 1.0323168131741623e-05, "loss": 0.9398, "step": 8806 }, { "epoch": 0.51, "grad_norm": 1.88052499294281, "learning_rate": 1.0321311484902594e-05, "loss": 1.0136, "step": 8807 }, { "epoch": 0.51, "grad_norm": 2.01294207572937, "learning_rate": 1.0319454826976006e-05, "loss": 1.0337, "step": 8808 }, { "epoch": 0.51, "grad_norm": 1.7034651041030884, "learning_rate": 1.0317598158025921e-05, "loss": 0.9511, "step": 8809 }, { "epoch": 0.51, "grad_norm": 1.7480237483978271, "learning_rate": 1.0315741478116413e-05, "loss": 0.9574, "step": 8810 }, { "epoch": 0.51, "grad_norm": 1.6877936124801636, "learning_rate": 1.0313884787311545e-05, "loss": 0.9429, "step": 8811 }, { "epoch": 0.51, "grad_norm": 1.7521706819534302, "learning_rate": 1.0312028085675393e-05, "loss": 0.99, "step": 8812 }, { "epoch": 0.51, "grad_norm": 1.7182331085205078, "learning_rate": 1.0310171373272021e-05, "loss": 0.9597, "step": 8813 }, { "epoch": 0.51, "grad_norm": 1.8290330171585083, "learning_rate": 1.0308314650165505e-05, "loss": 0.9407, "step": 8814 }, { "epoch": 0.51, "grad_norm": 1.7604749202728271, "learning_rate": 1.0306457916419907e-05, "loss": 0.9294, "step": 8815 }, { "epoch": 0.51, "grad_norm": 1.5807100534439087, "learning_rate": 1.0304601172099304e-05, "loss": 1.0314, "step": 8816 }, { "epoch": 0.51, "grad_norm": 1.7661210298538208, "learning_rate": 1.0302744417267767e-05, "loss": 0.9776, "step": 8817 }, { "epoch": 0.51, "grad_norm": 1.787975549697876, "learning_rate": 1.0300887651989363e-05, "loss": 1.0192, "step": 8818 }, { "epoch": 0.51, "grad_norm": 1.6769720315933228, "learning_rate": 1.029903087632817e-05, "loss": 0.9379, "step": 8819 }, { "epoch": 0.51, "grad_norm": 1.9327137470245361, "learning_rate": 1.0297174090348257e-05, "loss": 0.8904, "step": 8820 }, { "epoch": 0.51, "grad_norm": 1.7335028648376465, "learning_rate": 1.0295317294113694e-05, "loss": 0.9949, "step": 8821 }, { "epoch": 0.51, "grad_norm": 1.7678446769714355, "learning_rate": 1.0293460487688557e-05, "loss": 1.0138, "step": 8822 }, { "epoch": 0.51, "grad_norm": 1.6729885339736938, "learning_rate": 1.0291603671136918e-05, "loss": 0.881, "step": 8823 }, { "epoch": 0.51, "grad_norm": 1.8116122484207153, "learning_rate": 1.0289746844522851e-05, "loss": 0.9003, "step": 8824 }, { "epoch": 0.51, "grad_norm": 1.689509630203247, "learning_rate": 1.028789000791043e-05, "loss": 0.9123, "step": 8825 }, { "epoch": 0.51, "grad_norm": 1.691733479499817, "learning_rate": 1.0286033161363728e-05, "loss": 0.9698, "step": 8826 }, { "epoch": 0.51, "grad_norm": 1.8427883386611938, "learning_rate": 1.0284176304946823e-05, "loss": 0.9147, "step": 8827 }, { "epoch": 0.51, "grad_norm": 1.6397954225540161, "learning_rate": 1.0282319438723783e-05, "loss": 1.0396, "step": 8828 }, { "epoch": 0.51, "grad_norm": 1.9507653713226318, "learning_rate": 1.028046256275869e-05, "loss": 0.8731, "step": 8829 }, { "epoch": 0.51, "grad_norm": 2.4024715423583984, "learning_rate": 1.0278605677115618e-05, "loss": 0.9398, "step": 8830 }, { "epoch": 0.51, "grad_norm": 1.860568881034851, "learning_rate": 1.0276748781858643e-05, "loss": 1.0037, "step": 8831 }, { "epoch": 0.51, "grad_norm": 1.1306332349777222, "learning_rate": 1.0274891877051838e-05, "loss": 0.619, "step": 8832 }, { "epoch": 0.51, "grad_norm": 1.7977855205535889, "learning_rate": 1.0273034962759286e-05, "loss": 0.9487, "step": 8833 }, { "epoch": 0.51, "grad_norm": 1.559626817703247, "learning_rate": 1.0271178039045058e-05, "loss": 0.9163, "step": 8834 }, { "epoch": 0.51, "grad_norm": 1.644394874572754, "learning_rate": 1.0269321105973233e-05, "loss": 0.9621, "step": 8835 }, { "epoch": 0.51, "grad_norm": 1.7126811742782593, "learning_rate": 1.026746416360789e-05, "loss": 0.9586, "step": 8836 }, { "epoch": 0.51, "grad_norm": 1.6614716053009033, "learning_rate": 1.0265607212013107e-05, "loss": 0.9819, "step": 8837 }, { "epoch": 0.51, "grad_norm": 1.6473829746246338, "learning_rate": 1.026375025125296e-05, "loss": 0.9725, "step": 8838 }, { "epoch": 0.51, "grad_norm": 1.7341830730438232, "learning_rate": 1.0261893281391526e-05, "loss": 0.959, "step": 8839 }, { "epoch": 0.51, "grad_norm": 1.8899998664855957, "learning_rate": 1.026003630249289e-05, "loss": 1.114, "step": 8840 }, { "epoch": 0.51, "grad_norm": 1.821725845336914, "learning_rate": 1.0258179314621125e-05, "loss": 0.955, "step": 8841 }, { "epoch": 0.51, "grad_norm": 1.7690879106521606, "learning_rate": 1.0256322317840313e-05, "loss": 0.9138, "step": 8842 }, { "epoch": 0.51, "grad_norm": 1.8985460996627808, "learning_rate": 1.0254465312214534e-05, "loss": 0.9087, "step": 8843 }, { "epoch": 0.51, "grad_norm": 1.691991925239563, "learning_rate": 1.0252608297807871e-05, "loss": 1.0119, "step": 8844 }, { "epoch": 0.51, "grad_norm": 1.9804728031158447, "learning_rate": 1.0250751274684399e-05, "loss": 0.976, "step": 8845 }, { "epoch": 0.51, "grad_norm": 2.195158004760742, "learning_rate": 1.02488942429082e-05, "loss": 1.0353, "step": 8846 }, { "epoch": 0.51, "grad_norm": 1.6387323141098022, "learning_rate": 1.0247037202543357e-05, "loss": 0.9196, "step": 8847 }, { "epoch": 0.51, "grad_norm": 1.6925315856933594, "learning_rate": 1.024518015365395e-05, "loss": 0.9938, "step": 8848 }, { "epoch": 0.51, "grad_norm": 1.6406574249267578, "learning_rate": 1.0243323096304063e-05, "loss": 0.9232, "step": 8849 }, { "epoch": 0.51, "grad_norm": 1.7291712760925293, "learning_rate": 1.0241466030557775e-05, "loss": 0.9431, "step": 8850 }, { "epoch": 0.51, "grad_norm": 1.7111749649047852, "learning_rate": 1.0239608956479165e-05, "loss": 1.0127, "step": 8851 }, { "epoch": 0.51, "grad_norm": 1.795896291732788, "learning_rate": 1.0237751874132323e-05, "loss": 0.9374, "step": 8852 }, { "epoch": 0.51, "grad_norm": 1.9535866975784302, "learning_rate": 1.0235894783581328e-05, "loss": 1.0129, "step": 8853 }, { "epoch": 0.51, "grad_norm": 1.9126861095428467, "learning_rate": 1.0234037684890258e-05, "loss": 0.9117, "step": 8854 }, { "epoch": 0.51, "grad_norm": 1.7033377885818481, "learning_rate": 1.0232180578123206e-05, "loss": 0.8874, "step": 8855 }, { "epoch": 0.51, "grad_norm": 1.0191949605941772, "learning_rate": 1.023032346334425e-05, "loss": 0.602, "step": 8856 }, { "epoch": 0.51, "grad_norm": 1.818963646888733, "learning_rate": 1.0228466340617473e-05, "loss": 0.9796, "step": 8857 }, { "epoch": 0.51, "grad_norm": 1.814379334449768, "learning_rate": 1.0226609210006963e-05, "loss": 1.0384, "step": 8858 }, { "epoch": 0.51, "grad_norm": 1.7110366821289062, "learning_rate": 1.0224752071576803e-05, "loss": 0.949, "step": 8859 }, { "epoch": 0.51, "grad_norm": 2.020367383956909, "learning_rate": 1.0222894925391074e-05, "loss": 0.9004, "step": 8860 }, { "epoch": 0.51, "grad_norm": 1.9149428606033325, "learning_rate": 1.0221037771513867e-05, "loss": 1.0647, "step": 8861 }, { "epoch": 0.51, "grad_norm": 1.6878336668014526, "learning_rate": 1.021918061000926e-05, "loss": 0.9423, "step": 8862 }, { "epoch": 0.51, "grad_norm": 1.0334285497665405, "learning_rate": 1.0217323440941345e-05, "loss": 0.5866, "step": 8863 }, { "epoch": 0.51, "grad_norm": 1.7308261394500732, "learning_rate": 1.0215466264374205e-05, "loss": 0.9525, "step": 8864 }, { "epoch": 0.51, "grad_norm": 1.5370523929595947, "learning_rate": 1.0213609080371922e-05, "loss": 0.9416, "step": 8865 }, { "epoch": 0.51, "grad_norm": 1.9030011892318726, "learning_rate": 1.0211751888998592e-05, "loss": 1.0226, "step": 8866 }, { "epoch": 0.51, "grad_norm": 1.6708587408065796, "learning_rate": 1.0209894690318293e-05, "loss": 0.9322, "step": 8867 }, { "epoch": 0.51, "grad_norm": 1.767116665840149, "learning_rate": 1.0208037484395114e-05, "loss": 0.9616, "step": 8868 }, { "epoch": 0.51, "grad_norm": 1.7106602191925049, "learning_rate": 1.0206180271293143e-05, "loss": 0.927, "step": 8869 }, { "epoch": 0.51, "grad_norm": 1.7157697677612305, "learning_rate": 1.0204323051076467e-05, "loss": 1.0525, "step": 8870 }, { "epoch": 0.51, "grad_norm": 1.6959165334701538, "learning_rate": 1.0202465823809176e-05, "loss": 0.967, "step": 8871 }, { "epoch": 0.51, "grad_norm": 1.992066502571106, "learning_rate": 1.0200608589555352e-05, "loss": 0.9559, "step": 8872 }, { "epoch": 0.51, "grad_norm": 1.719935655593872, "learning_rate": 1.0198751348379085e-05, "loss": 0.9949, "step": 8873 }, { "epoch": 0.51, "grad_norm": 1.9515131711959839, "learning_rate": 1.0196894100344467e-05, "loss": 0.9402, "step": 8874 }, { "epoch": 0.51, "grad_norm": 1.7132283449172974, "learning_rate": 1.0195036845515583e-05, "loss": 0.9366, "step": 8875 }, { "epoch": 0.51, "grad_norm": 1.781367301940918, "learning_rate": 1.0193179583956523e-05, "loss": 0.9035, "step": 8876 }, { "epoch": 0.51, "grad_norm": 1.778655767440796, "learning_rate": 1.0191322315731374e-05, "loss": 0.9026, "step": 8877 }, { "epoch": 0.51, "grad_norm": 1.7946217060089111, "learning_rate": 1.0189465040904224e-05, "loss": 1.0595, "step": 8878 }, { "epoch": 0.51, "grad_norm": 1.7899078130722046, "learning_rate": 1.0187607759539168e-05, "loss": 0.9926, "step": 8879 }, { "epoch": 0.51, "grad_norm": 1.6764757633209229, "learning_rate": 1.0185750471700293e-05, "loss": 0.9681, "step": 8880 }, { "epoch": 0.51, "grad_norm": 1.797339916229248, "learning_rate": 1.0183893177451683e-05, "loss": 0.9043, "step": 8881 }, { "epoch": 0.51, "grad_norm": 0.9926712512969971, "learning_rate": 1.0182035876857437e-05, "loss": 0.5918, "step": 8882 }, { "epoch": 0.51, "grad_norm": 1.9849005937576294, "learning_rate": 1.0180178569981641e-05, "loss": 1.024, "step": 8883 }, { "epoch": 0.51, "grad_norm": 2.123988389968872, "learning_rate": 1.0178321256888386e-05, "loss": 0.9618, "step": 8884 }, { "epoch": 0.51, "grad_norm": 1.7594813108444214, "learning_rate": 1.0176463937641763e-05, "loss": 1.0231, "step": 8885 }, { "epoch": 0.51, "grad_norm": 1.1616874933242798, "learning_rate": 1.0174606612305863e-05, "loss": 0.6022, "step": 8886 }, { "epoch": 0.51, "grad_norm": 1.6073048114776611, "learning_rate": 1.0172749280944773e-05, "loss": 0.9811, "step": 8887 }, { "epoch": 0.51, "grad_norm": 1.6909401416778564, "learning_rate": 1.0170891943622593e-05, "loss": 0.9614, "step": 8888 }, { "epoch": 0.51, "grad_norm": 1.6995933055877686, "learning_rate": 1.0169034600403404e-05, "loss": 1.0046, "step": 8889 }, { "epoch": 0.51, "grad_norm": 1.7703731060028076, "learning_rate": 1.0167177251351305e-05, "loss": 0.9076, "step": 8890 }, { "epoch": 0.51, "grad_norm": 2.0440077781677246, "learning_rate": 1.0165319896530388e-05, "loss": 0.9302, "step": 8891 }, { "epoch": 0.51, "grad_norm": 1.8103872537612915, "learning_rate": 1.0163462536004742e-05, "loss": 0.8734, "step": 8892 }, { "epoch": 0.51, "grad_norm": 1.7994543313980103, "learning_rate": 1.0161605169838459e-05, "loss": 0.9685, "step": 8893 }, { "epoch": 0.51, "grad_norm": 1.6386631727218628, "learning_rate": 1.0159747798095635e-05, "loss": 0.9588, "step": 8894 }, { "epoch": 0.51, "grad_norm": 2.087648391723633, "learning_rate": 1.015789042084036e-05, "loss": 1.0591, "step": 8895 }, { "epoch": 0.51, "grad_norm": 1.7218666076660156, "learning_rate": 1.0156033038136728e-05, "loss": 1.0145, "step": 8896 }, { "epoch": 0.51, "grad_norm": 1.7784907817840576, "learning_rate": 1.015417565004883e-05, "loss": 0.9369, "step": 8897 }, { "epoch": 0.51, "grad_norm": 1.7313958406448364, "learning_rate": 1.0152318256640761e-05, "loss": 0.922, "step": 8898 }, { "epoch": 0.51, "grad_norm": 1.7791234254837036, "learning_rate": 1.0150460857976616e-05, "loss": 1.0624, "step": 8899 }, { "epoch": 0.51, "grad_norm": 1.7113159894943237, "learning_rate": 1.0148603454120487e-05, "loss": 0.9925, "step": 8900 }, { "epoch": 0.51, "grad_norm": 1.7801169157028198, "learning_rate": 1.0146746045136468e-05, "loss": 0.9928, "step": 8901 }, { "epoch": 0.51, "grad_norm": 1.9388582706451416, "learning_rate": 1.0144888631088652e-05, "loss": 0.9619, "step": 8902 }, { "epoch": 0.51, "grad_norm": 1.879156231880188, "learning_rate": 1.0143031212041136e-05, "loss": 0.9727, "step": 8903 }, { "epoch": 0.51, "grad_norm": 2.6208269596099854, "learning_rate": 1.0141173788058012e-05, "loss": 0.9769, "step": 8904 }, { "epoch": 0.51, "grad_norm": 2.333559989929199, "learning_rate": 1.0139316359203373e-05, "loss": 1.0546, "step": 8905 }, { "epoch": 0.51, "grad_norm": 1.6685092449188232, "learning_rate": 1.0137458925541317e-05, "loss": 0.9691, "step": 8906 }, { "epoch": 0.51, "grad_norm": 1.7328840494155884, "learning_rate": 1.0135601487135937e-05, "loss": 0.9246, "step": 8907 }, { "epoch": 0.51, "grad_norm": 1.7420519590377808, "learning_rate": 1.0133744044051329e-05, "loss": 0.9754, "step": 8908 }, { "epoch": 0.51, "grad_norm": 1.6979494094848633, "learning_rate": 1.0131886596351585e-05, "loss": 1.0703, "step": 8909 }, { "epoch": 0.51, "grad_norm": 1.7054520845413208, "learning_rate": 1.0130029144100806e-05, "loss": 0.915, "step": 8910 }, { "epoch": 0.51, "grad_norm": 1.7403396368026733, "learning_rate": 1.0128171687363084e-05, "loss": 0.9798, "step": 8911 }, { "epoch": 0.51, "grad_norm": 1.8815827369689941, "learning_rate": 1.0126314226202517e-05, "loss": 0.9809, "step": 8912 }, { "epoch": 0.51, "grad_norm": 1.6624183654785156, "learning_rate": 1.0124456760683194e-05, "loss": 0.9823, "step": 8913 }, { "epoch": 0.51, "grad_norm": 1.774604082107544, "learning_rate": 1.012259929086922e-05, "loss": 0.9673, "step": 8914 }, { "epoch": 0.51, "grad_norm": 1.7927039861679077, "learning_rate": 1.0120741816824686e-05, "loss": 0.9381, "step": 8915 }, { "epoch": 0.51, "grad_norm": 1.8010519742965698, "learning_rate": 1.0118884338613688e-05, "loss": 0.9378, "step": 8916 }, { "epoch": 0.51, "grad_norm": 1.8775206804275513, "learning_rate": 1.0117026856300326e-05, "loss": 0.9342, "step": 8917 }, { "epoch": 0.51, "grad_norm": 1.7072855234146118, "learning_rate": 1.0115169369948692e-05, "loss": 1.0085, "step": 8918 }, { "epoch": 0.51, "grad_norm": 1.9570808410644531, "learning_rate": 1.0113311879622884e-05, "loss": 0.955, "step": 8919 }, { "epoch": 0.51, "grad_norm": 1.76759934425354, "learning_rate": 1.0111454385387001e-05, "loss": 0.9547, "step": 8920 }, { "epoch": 0.51, "grad_norm": 1.7058250904083252, "learning_rate": 1.0109596887305137e-05, "loss": 1.056, "step": 8921 }, { "epoch": 0.51, "grad_norm": 1.7628761529922485, "learning_rate": 1.010773938544139e-05, "loss": 0.9218, "step": 8922 }, { "epoch": 0.51, "grad_norm": 1.9185079336166382, "learning_rate": 1.0105881879859862e-05, "loss": 0.9718, "step": 8923 }, { "epoch": 0.51, "grad_norm": 1.6816920042037964, "learning_rate": 1.0104024370624644e-05, "loss": 0.8992, "step": 8924 }, { "epoch": 0.51, "grad_norm": 1.9165916442871094, "learning_rate": 1.0102166857799835e-05, "loss": 1.0513, "step": 8925 }, { "epoch": 0.51, "grad_norm": 1.7411118745803833, "learning_rate": 1.0100309341449532e-05, "loss": 0.9157, "step": 8926 }, { "epoch": 0.51, "grad_norm": 1.8978520631790161, "learning_rate": 1.0098451821637837e-05, "loss": 0.9807, "step": 8927 }, { "epoch": 0.51, "grad_norm": 1.7074702978134155, "learning_rate": 1.0096594298428841e-05, "loss": 0.9944, "step": 8928 }, { "epoch": 0.51, "grad_norm": 1.689185619354248, "learning_rate": 1.009473677188665e-05, "loss": 0.9556, "step": 8929 }, { "epoch": 0.51, "grad_norm": 1.8243353366851807, "learning_rate": 1.0092879242075352e-05, "loss": 0.9741, "step": 8930 }, { "epoch": 0.51, "grad_norm": 1.7011597156524658, "learning_rate": 1.0091021709059054e-05, "loss": 0.9364, "step": 8931 }, { "epoch": 0.51, "grad_norm": 1.8377373218536377, "learning_rate": 1.008916417290185e-05, "loss": 0.9692, "step": 8932 }, { "epoch": 0.51, "grad_norm": 1.6834347248077393, "learning_rate": 1.0087306633667842e-05, "loss": 0.9618, "step": 8933 }, { "epoch": 0.51, "grad_norm": 1.777701735496521, "learning_rate": 1.0085449091421124e-05, "loss": 0.9922, "step": 8934 }, { "epoch": 0.51, "grad_norm": 1.694658637046814, "learning_rate": 1.0083591546225794e-05, "loss": 0.8588, "step": 8935 }, { "epoch": 0.51, "grad_norm": 1.8077694177627563, "learning_rate": 1.0081733998145957e-05, "loss": 0.8945, "step": 8936 }, { "epoch": 0.51, "grad_norm": 1.8565845489501953, "learning_rate": 1.0079876447245706e-05, "loss": 0.9706, "step": 8937 }, { "epoch": 0.51, "grad_norm": 1.8217909336090088, "learning_rate": 1.0078018893589142e-05, "loss": 1.0318, "step": 8938 }, { "epoch": 0.51, "grad_norm": 2.1791296005249023, "learning_rate": 1.0076161337240366e-05, "loss": 0.9221, "step": 8939 }, { "epoch": 0.51, "grad_norm": 1.892891764640808, "learning_rate": 1.0074303778263475e-05, "loss": 1.0535, "step": 8940 }, { "epoch": 0.51, "grad_norm": 1.0799294710159302, "learning_rate": 1.0072446216722566e-05, "loss": 0.5249, "step": 8941 }, { "epoch": 0.51, "grad_norm": 1.8259260654449463, "learning_rate": 1.0070588652681743e-05, "loss": 0.9855, "step": 8942 }, { "epoch": 0.51, "grad_norm": 1.9901882410049438, "learning_rate": 1.0068731086205102e-05, "loss": 0.963, "step": 8943 }, { "epoch": 0.51, "grad_norm": 1.8897918462753296, "learning_rate": 1.0066873517356743e-05, "loss": 0.9397, "step": 8944 }, { "epoch": 0.51, "grad_norm": 1.9522923231124878, "learning_rate": 1.0065015946200765e-05, "loss": 1.0194, "step": 8945 }, { "epoch": 0.51, "grad_norm": 1.6233583688735962, "learning_rate": 1.006315837280127e-05, "loss": 0.975, "step": 8946 }, { "epoch": 0.51, "grad_norm": 1.6960440874099731, "learning_rate": 1.0061300797222351e-05, "loss": 0.9465, "step": 8947 }, { "epoch": 0.51, "grad_norm": 1.8390296697616577, "learning_rate": 1.0059443219528117e-05, "loss": 0.909, "step": 8948 }, { "epoch": 0.51, "grad_norm": 1.7989524602890015, "learning_rate": 1.0057585639782663e-05, "loss": 0.9638, "step": 8949 }, { "epoch": 0.51, "grad_norm": 1.7077968120574951, "learning_rate": 1.005572805805009e-05, "loss": 0.9626, "step": 8950 }, { "epoch": 0.51, "grad_norm": 1.977864146232605, "learning_rate": 1.0053870474394495e-05, "loss": 0.9194, "step": 8951 }, { "epoch": 0.51, "grad_norm": 1.7608526945114136, "learning_rate": 1.0052012888879982e-05, "loss": 0.9273, "step": 8952 }, { "epoch": 0.51, "grad_norm": 1.054431438446045, "learning_rate": 1.0050155301570652e-05, "loss": 0.5972, "step": 8953 }, { "epoch": 0.51, "grad_norm": 1.8544378280639648, "learning_rate": 1.0048297712530599e-05, "loss": 0.9707, "step": 8954 }, { "epoch": 0.51, "grad_norm": 1.8765212297439575, "learning_rate": 1.0046440121823928e-05, "loss": 0.9882, "step": 8955 }, { "epoch": 0.51, "grad_norm": 1.73099946975708, "learning_rate": 1.0044582529514739e-05, "loss": 0.9418, "step": 8956 }, { "epoch": 0.51, "grad_norm": 1.791661262512207, "learning_rate": 1.0042724935667132e-05, "loss": 0.9792, "step": 8957 }, { "epoch": 0.51, "grad_norm": 1.7827130556106567, "learning_rate": 1.0040867340345204e-05, "loss": 0.8902, "step": 8958 }, { "epoch": 0.51, "grad_norm": 1.8416775465011597, "learning_rate": 1.003900974361306e-05, "loss": 0.9386, "step": 8959 }, { "epoch": 0.51, "grad_norm": 1.8485275506973267, "learning_rate": 1.0037152145534797e-05, "loss": 0.9101, "step": 8960 }, { "epoch": 0.51, "grad_norm": 2.318046808242798, "learning_rate": 1.0035294546174519e-05, "loss": 1.0374, "step": 8961 }, { "epoch": 0.51, "grad_norm": 1.6909818649291992, "learning_rate": 1.0033436945596325e-05, "loss": 0.9476, "step": 8962 }, { "epoch": 0.51, "grad_norm": 1.8507784605026245, "learning_rate": 1.0031579343864316e-05, "loss": 0.9463, "step": 8963 }, { "epoch": 0.51, "grad_norm": 1.888702630996704, "learning_rate": 1.0029721741042587e-05, "loss": 0.9336, "step": 8964 }, { "epoch": 0.51, "grad_norm": 1.8669872283935547, "learning_rate": 1.0027864137195247e-05, "loss": 0.958, "step": 8965 }, { "epoch": 0.51, "grad_norm": 1.6972261667251587, "learning_rate": 1.0026006532386394e-05, "loss": 0.8913, "step": 8966 }, { "epoch": 0.51, "grad_norm": 1.8669794797897339, "learning_rate": 1.002414892668013e-05, "loss": 0.9802, "step": 8967 }, { "epoch": 0.51, "grad_norm": 1.6567277908325195, "learning_rate": 1.0022291320140552e-05, "loss": 0.9698, "step": 8968 }, { "epoch": 0.51, "grad_norm": 1.8291791677474976, "learning_rate": 1.0020433712831763e-05, "loss": 1.0483, "step": 8969 }, { "epoch": 0.51, "grad_norm": 1.7178864479064941, "learning_rate": 1.0018576104817866e-05, "loss": 1.0496, "step": 8970 }, { "epoch": 0.51, "grad_norm": 1.6637693643569946, "learning_rate": 1.0016718496162957e-05, "loss": 0.9756, "step": 8971 }, { "epoch": 0.51, "grad_norm": 1.7813897132873535, "learning_rate": 1.0014860886931138e-05, "loss": 0.941, "step": 8972 }, { "epoch": 0.51, "grad_norm": 1.6179001331329346, "learning_rate": 1.0013003277186513e-05, "loss": 0.9509, "step": 8973 }, { "epoch": 0.51, "grad_norm": 1.680076003074646, "learning_rate": 1.001114566699318e-05, "loss": 0.9895, "step": 8974 }, { "epoch": 0.51, "grad_norm": 1.8443573713302612, "learning_rate": 1.0009288056415243e-05, "loss": 1.0379, "step": 8975 }, { "epoch": 0.51, "grad_norm": 1.7588536739349365, "learning_rate": 1.00074304455168e-05, "loss": 0.9279, "step": 8976 }, { "epoch": 0.51, "grad_norm": 1.6640065908432007, "learning_rate": 1.0005572834361954e-05, "loss": 0.8448, "step": 8977 }, { "epoch": 0.51, "grad_norm": 1.8051307201385498, "learning_rate": 1.0003715223014805e-05, "loss": 0.9805, "step": 8978 }, { "epoch": 0.51, "grad_norm": 1.9108141660690308, "learning_rate": 1.0001857611539454e-05, "loss": 0.9759, "step": 8979 }, { "epoch": 0.52, "grad_norm": 2.000741958618164, "learning_rate": 1e-05, "loss": 0.9706, "step": 8980 }, { "epoch": 0.52, "grad_norm": 1.8142032623291016, "learning_rate": 9.99814238846055e-06, "loss": 1.072, "step": 8981 }, { "epoch": 0.52, "grad_norm": 1.7238868474960327, "learning_rate": 9.996284776985199e-06, "loss": 0.9755, "step": 8982 }, { "epoch": 0.52, "grad_norm": 1.7324669361114502, "learning_rate": 9.99442716563805e-06, "loss": 0.9509, "step": 8983 }, { "epoch": 0.52, "grad_norm": 1.5926146507263184, "learning_rate": 9.992569554483202e-06, "loss": 0.9385, "step": 8984 }, { "epoch": 0.52, "grad_norm": 1.5944912433624268, "learning_rate": 9.99071194358476e-06, "loss": 0.9183, "step": 8985 }, { "epoch": 0.52, "grad_norm": 1.9157428741455078, "learning_rate": 9.988854333006823e-06, "loss": 0.9524, "step": 8986 }, { "epoch": 0.52, "grad_norm": 1.0344617366790771, "learning_rate": 9.986996722813489e-06, "loss": 0.5842, "step": 8987 }, { "epoch": 0.52, "grad_norm": 2.112147569656372, "learning_rate": 9.985139113068865e-06, "loss": 0.9307, "step": 8988 }, { "epoch": 0.52, "grad_norm": 1.874718427658081, "learning_rate": 9.983281503837047e-06, "loss": 0.9547, "step": 8989 }, { "epoch": 0.52, "grad_norm": 1.783829689025879, "learning_rate": 9.981423895182139e-06, "loss": 1.0163, "step": 8990 }, { "epoch": 0.52, "grad_norm": 2.0992252826690674, "learning_rate": 9.97956628716824e-06, "loss": 0.996, "step": 8991 }, { "epoch": 0.52, "grad_norm": 1.7792052030563354, "learning_rate": 9.977708679859451e-06, "loss": 1.0038, "step": 8992 }, { "epoch": 0.52, "grad_norm": 1.7909373044967651, "learning_rate": 9.975851073319873e-06, "loss": 0.9234, "step": 8993 }, { "epoch": 0.52, "grad_norm": 1.96729576587677, "learning_rate": 9.973993467613607e-06, "loss": 1.0248, "step": 8994 }, { "epoch": 0.52, "grad_norm": 1.6747527122497559, "learning_rate": 9.972135862804755e-06, "loss": 0.8754, "step": 8995 }, { "epoch": 0.52, "grad_norm": 1.1096645593643188, "learning_rate": 9.970278258957415e-06, "loss": 0.62, "step": 8996 }, { "epoch": 0.52, "grad_norm": 1.9389084577560425, "learning_rate": 9.968420656135691e-06, "loss": 0.9249, "step": 8997 }, { "epoch": 0.52, "grad_norm": 1.842962384223938, "learning_rate": 9.96656305440368e-06, "loss": 0.9776, "step": 8998 }, { "epoch": 0.52, "grad_norm": 1.7134102582931519, "learning_rate": 9.964705453825484e-06, "loss": 0.8636, "step": 8999 }, { "epoch": 0.52, "grad_norm": 1.9879229068756104, "learning_rate": 9.962847854465206e-06, "loss": 0.9611, "step": 9000 }, { "epoch": 0.52, "grad_norm": 1.693435549736023, "learning_rate": 9.960990256386944e-06, "loss": 0.8929, "step": 9001 }, { "epoch": 0.52, "grad_norm": 1.752920389175415, "learning_rate": 9.9591326596548e-06, "loss": 0.8911, "step": 9002 }, { "epoch": 0.52, "grad_norm": 1.840528130531311, "learning_rate": 9.957275064332874e-06, "loss": 0.9188, "step": 9003 }, { "epoch": 0.52, "grad_norm": 1.6822412014007568, "learning_rate": 9.955417470485267e-06, "loss": 1.0313, "step": 9004 }, { "epoch": 0.52, "grad_norm": 1.6589536666870117, "learning_rate": 9.953559878176077e-06, "loss": 0.9199, "step": 9005 }, { "epoch": 0.52, "grad_norm": 1.7368932962417603, "learning_rate": 9.951702287469406e-06, "loss": 0.9307, "step": 9006 }, { "epoch": 0.52, "grad_norm": 1.5662494897842407, "learning_rate": 9.949844698429354e-06, "loss": 0.9266, "step": 9007 }, { "epoch": 0.52, "grad_norm": 1.9565941095352173, "learning_rate": 9.947987111120023e-06, "loss": 1.0666, "step": 9008 }, { "epoch": 0.52, "grad_norm": 1.7266814708709717, "learning_rate": 9.94612952560551e-06, "loss": 0.9671, "step": 9009 }, { "epoch": 0.52, "grad_norm": 1.889132022857666, "learning_rate": 9.944271941949916e-06, "loss": 0.9849, "step": 9010 }, { "epoch": 0.52, "grad_norm": 1.6573015451431274, "learning_rate": 9.942414360217339e-06, "loss": 0.9979, "step": 9011 }, { "epoch": 0.52, "grad_norm": 1.8467018604278564, "learning_rate": 9.940556780471886e-06, "loss": 1.0034, "step": 9012 }, { "epoch": 0.52, "grad_norm": 1.8084139823913574, "learning_rate": 9.938699202777652e-06, "loss": 0.9201, "step": 9013 }, { "epoch": 0.52, "grad_norm": 1.7212934494018555, "learning_rate": 9.936841627198734e-06, "loss": 1.0466, "step": 9014 }, { "epoch": 0.52, "grad_norm": 1.7020750045776367, "learning_rate": 9.934984053799239e-06, "loss": 0.9605, "step": 9015 }, { "epoch": 0.52, "grad_norm": 1.8461289405822754, "learning_rate": 9.933126482643259e-06, "loss": 1.0082, "step": 9016 }, { "epoch": 0.52, "grad_norm": 1.9211375713348389, "learning_rate": 9.931268913794898e-06, "loss": 1.0497, "step": 9017 }, { "epoch": 0.52, "grad_norm": 1.7913408279418945, "learning_rate": 9.929411347318257e-06, "loss": 0.9537, "step": 9018 }, { "epoch": 0.52, "grad_norm": 1.671887993812561, "learning_rate": 9.927553783277432e-06, "loss": 0.9663, "step": 9019 }, { "epoch": 0.52, "grad_norm": 1.7826820611953735, "learning_rate": 9.925696221736525e-06, "loss": 1.0174, "step": 9020 }, { "epoch": 0.52, "grad_norm": 1.9593912363052368, "learning_rate": 9.923838662759632e-06, "loss": 0.9936, "step": 9021 }, { "epoch": 0.52, "grad_norm": 1.9109236001968384, "learning_rate": 9.921981106410856e-06, "loss": 0.8992, "step": 9022 }, { "epoch": 0.52, "grad_norm": 1.737614393234253, "learning_rate": 9.920123552754294e-06, "loss": 0.9765, "step": 9023 }, { "epoch": 0.52, "grad_norm": 1.8170056343078613, "learning_rate": 9.918266001854045e-06, "loss": 0.8944, "step": 9024 }, { "epoch": 0.52, "grad_norm": 1.7842293977737427, "learning_rate": 9.916408453774207e-06, "loss": 0.9621, "step": 9025 }, { "epoch": 0.52, "grad_norm": 1.8636560440063477, "learning_rate": 9.91455090857888e-06, "loss": 0.8785, "step": 9026 }, { "epoch": 0.52, "grad_norm": 2.0510454177856445, "learning_rate": 9.912693366332161e-06, "loss": 0.886, "step": 9027 }, { "epoch": 0.52, "grad_norm": 1.9517890214920044, "learning_rate": 9.91083582709815e-06, "loss": 0.8668, "step": 9028 }, { "epoch": 0.52, "grad_norm": 1.7905247211456299, "learning_rate": 9.908978290940948e-06, "loss": 0.9611, "step": 9029 }, { "epoch": 0.52, "grad_norm": 1.6684703826904297, "learning_rate": 9.90712075792465e-06, "loss": 0.9932, "step": 9030 }, { "epoch": 0.52, "grad_norm": 1.6955595016479492, "learning_rate": 9.905263228113354e-06, "loss": 0.9546, "step": 9031 }, { "epoch": 0.52, "grad_norm": 1.7910109758377075, "learning_rate": 9.90340570157116e-06, "loss": 0.9908, "step": 9032 }, { "epoch": 0.52, "grad_norm": 1.597535252571106, "learning_rate": 9.901548178362167e-06, "loss": 0.8774, "step": 9033 }, { "epoch": 0.52, "grad_norm": 1.9226889610290527, "learning_rate": 9.89969065855047e-06, "loss": 0.9957, "step": 9034 }, { "epoch": 0.52, "grad_norm": 1.8893277645111084, "learning_rate": 9.897833142200168e-06, "loss": 0.9714, "step": 9035 }, { "epoch": 0.52, "grad_norm": 1.7162786722183228, "learning_rate": 9.89597562937536e-06, "loss": 0.9643, "step": 9036 }, { "epoch": 0.52, "grad_norm": 1.919396996498108, "learning_rate": 9.894118120140141e-06, "loss": 0.9581, "step": 9037 }, { "epoch": 0.52, "grad_norm": 1.0325645208358765, "learning_rate": 9.892260614558611e-06, "loss": 0.5513, "step": 9038 }, { "epoch": 0.52, "grad_norm": 1.783811330795288, "learning_rate": 9.890403112694867e-06, "loss": 1.0061, "step": 9039 }, { "epoch": 0.52, "grad_norm": 1.9469741582870483, "learning_rate": 9.888545614613e-06, "loss": 0.9227, "step": 9040 }, { "epoch": 0.52, "grad_norm": 1.6788498163223267, "learning_rate": 9.886688120377118e-06, "loss": 0.9669, "step": 9041 }, { "epoch": 0.52, "grad_norm": 1.9167431592941284, "learning_rate": 9.88483063005131e-06, "loss": 0.9351, "step": 9042 }, { "epoch": 0.52, "grad_norm": 1.9411070346832275, "learning_rate": 9.882973143699678e-06, "loss": 0.9683, "step": 9043 }, { "epoch": 0.52, "grad_norm": 1.7986396551132202, "learning_rate": 9.881115661386314e-06, "loss": 0.9064, "step": 9044 }, { "epoch": 0.52, "grad_norm": 1.6843129396438599, "learning_rate": 9.879258183175317e-06, "loss": 0.9591, "step": 9045 }, { "epoch": 0.52, "grad_norm": 1.621228814125061, "learning_rate": 9.877400709130784e-06, "loss": 0.904, "step": 9046 }, { "epoch": 0.52, "grad_norm": 1.8656718730926514, "learning_rate": 9.875543239316808e-06, "loss": 0.9345, "step": 9047 }, { "epoch": 0.52, "grad_norm": 1.7246835231781006, "learning_rate": 9.873685773797488e-06, "loss": 0.9726, "step": 9048 }, { "epoch": 0.52, "grad_norm": 1.9007236957550049, "learning_rate": 9.871828312636919e-06, "loss": 1.0377, "step": 9049 }, { "epoch": 0.52, "grad_norm": 2.052535057067871, "learning_rate": 9.869970855899197e-06, "loss": 1.0168, "step": 9050 }, { "epoch": 0.52, "grad_norm": 1.7416030168533325, "learning_rate": 9.868113403648416e-06, "loss": 0.9112, "step": 9051 }, { "epoch": 0.52, "grad_norm": 1.8203192949295044, "learning_rate": 9.866255955948676e-06, "loss": 1.0144, "step": 9052 }, { "epoch": 0.52, "grad_norm": 1.7618767023086548, "learning_rate": 9.864398512864065e-06, "loss": 0.9815, "step": 9053 }, { "epoch": 0.52, "grad_norm": 1.8067446947097778, "learning_rate": 9.862541074458685e-06, "loss": 0.9816, "step": 9054 }, { "epoch": 0.52, "grad_norm": 1.6581450700759888, "learning_rate": 9.860683640796629e-06, "loss": 0.9614, "step": 9055 }, { "epoch": 0.52, "grad_norm": 1.6084994077682495, "learning_rate": 9.858826211941993e-06, "loss": 0.9857, "step": 9056 }, { "epoch": 0.52, "grad_norm": 1.766340970993042, "learning_rate": 9.856968787958867e-06, "loss": 0.9352, "step": 9057 }, { "epoch": 0.52, "grad_norm": 1.782696008682251, "learning_rate": 9.85511136891135e-06, "loss": 0.9287, "step": 9058 }, { "epoch": 0.52, "grad_norm": 1.718967080116272, "learning_rate": 9.853253954863535e-06, "loss": 1.016, "step": 9059 }, { "epoch": 0.52, "grad_norm": 1.7111783027648926, "learning_rate": 9.851396545879517e-06, "loss": 1.0233, "step": 9060 }, { "epoch": 0.52, "grad_norm": 1.5144329071044922, "learning_rate": 9.849539142023386e-06, "loss": 0.9756, "step": 9061 }, { "epoch": 0.52, "grad_norm": 1.5721980333328247, "learning_rate": 9.847681743359242e-06, "loss": 1.036, "step": 9062 }, { "epoch": 0.52, "grad_norm": 1.6753511428833008, "learning_rate": 9.845824349951175e-06, "loss": 0.9393, "step": 9063 }, { "epoch": 0.52, "grad_norm": 1.693739652633667, "learning_rate": 9.843966961863279e-06, "loss": 1.024, "step": 9064 }, { "epoch": 0.52, "grad_norm": 1.6939172744750977, "learning_rate": 9.842109579159645e-06, "loss": 0.9415, "step": 9065 }, { "epoch": 0.52, "grad_norm": 1.7573553323745728, "learning_rate": 9.840252201904369e-06, "loss": 0.9214, "step": 9066 }, { "epoch": 0.52, "grad_norm": 1.852636694908142, "learning_rate": 9.838394830161546e-06, "loss": 0.9495, "step": 9067 }, { "epoch": 0.52, "grad_norm": 1.9712855815887451, "learning_rate": 9.836537463995263e-06, "loss": 0.8734, "step": 9068 }, { "epoch": 0.52, "grad_norm": 1.7290847301483154, "learning_rate": 9.834680103469617e-06, "loss": 0.9727, "step": 9069 }, { "epoch": 0.52, "grad_norm": 1.8492132425308228, "learning_rate": 9.832822748648699e-06, "loss": 0.9864, "step": 9070 }, { "epoch": 0.52, "grad_norm": 1.850852131843567, "learning_rate": 9.8309653995966e-06, "loss": 0.9453, "step": 9071 }, { "epoch": 0.52, "grad_norm": 1.6377599239349365, "learning_rate": 9.829108056377414e-06, "loss": 0.9797, "step": 9072 }, { "epoch": 0.52, "grad_norm": 1.8012235164642334, "learning_rate": 9.827250719055232e-06, "loss": 0.8401, "step": 9073 }, { "epoch": 0.52, "grad_norm": 1.924228549003601, "learning_rate": 9.825393387694144e-06, "loss": 0.9195, "step": 9074 }, { "epoch": 0.52, "grad_norm": 1.701206922531128, "learning_rate": 9.823536062358244e-06, "loss": 0.9048, "step": 9075 }, { "epoch": 0.52, "grad_norm": 1.7253954410552979, "learning_rate": 9.82167874311162e-06, "loss": 0.979, "step": 9076 }, { "epoch": 0.52, "grad_norm": 1.8000190258026123, "learning_rate": 9.81982143001836e-06, "loss": 0.9002, "step": 9077 }, { "epoch": 0.52, "grad_norm": 1.1878587007522583, "learning_rate": 9.817964123142566e-06, "loss": 0.6406, "step": 9078 }, { "epoch": 0.52, "grad_norm": 1.7436158657073975, "learning_rate": 9.816106822548319e-06, "loss": 1.0117, "step": 9079 }, { "epoch": 0.52, "grad_norm": 1.6903648376464844, "learning_rate": 9.81424952829971e-06, "loss": 0.9145, "step": 9080 }, { "epoch": 0.52, "grad_norm": 1.1014875173568726, "learning_rate": 9.812392240460833e-06, "loss": 0.6022, "step": 9081 }, { "epoch": 0.52, "grad_norm": 1.596754550933838, "learning_rate": 9.810534959095775e-06, "loss": 0.9253, "step": 9082 }, { "epoch": 0.52, "grad_norm": 1.84114408493042, "learning_rate": 9.808677684268628e-06, "loss": 1.0053, "step": 9083 }, { "epoch": 0.52, "grad_norm": 1.8379429578781128, "learning_rate": 9.806820416043478e-06, "loss": 0.9541, "step": 9084 }, { "epoch": 0.52, "grad_norm": 1.8191065788269043, "learning_rate": 9.804963154484417e-06, "loss": 1.0128, "step": 9085 }, { "epoch": 0.52, "grad_norm": 1.8007383346557617, "learning_rate": 9.803105899655533e-06, "loss": 0.9645, "step": 9086 }, { "epoch": 0.52, "grad_norm": 1.796562910079956, "learning_rate": 9.801248651620913e-06, "loss": 1.0077, "step": 9087 }, { "epoch": 0.52, "grad_norm": 1.6809942722320557, "learning_rate": 9.799391410444648e-06, "loss": 1.0116, "step": 9088 }, { "epoch": 0.52, "grad_norm": 2.0320732593536377, "learning_rate": 9.797534176190826e-06, "loss": 0.989, "step": 9089 }, { "epoch": 0.52, "grad_norm": 1.7093507051467896, "learning_rate": 9.795676948923533e-06, "loss": 0.9236, "step": 9090 }, { "epoch": 0.52, "grad_norm": 1.7944904565811157, "learning_rate": 9.793819728706859e-06, "loss": 1.089, "step": 9091 }, { "epoch": 0.52, "grad_norm": 1.809480905532837, "learning_rate": 9.791962515604887e-06, "loss": 0.9345, "step": 9092 }, { "epoch": 0.52, "grad_norm": 1.785309076309204, "learning_rate": 9.79010530968171e-06, "loss": 0.9445, "step": 9093 }, { "epoch": 0.52, "grad_norm": 1.6351906061172485, "learning_rate": 9.78824811100141e-06, "loss": 0.9112, "step": 9094 }, { "epoch": 0.52, "grad_norm": 1.8967688083648682, "learning_rate": 9.786390919628076e-06, "loss": 0.9493, "step": 9095 }, { "epoch": 0.52, "grad_norm": 1.7735562324523926, "learning_rate": 9.784533735625798e-06, "loss": 0.92, "step": 9096 }, { "epoch": 0.52, "grad_norm": 1.552429437637329, "learning_rate": 9.782676559058658e-06, "loss": 0.9485, "step": 9097 }, { "epoch": 0.52, "grad_norm": 1.8040074110031128, "learning_rate": 9.780819389990742e-06, "loss": 1.0107, "step": 9098 }, { "epoch": 0.52, "grad_norm": 1.7570675611495972, "learning_rate": 9.778962228486138e-06, "loss": 0.9501, "step": 9099 }, { "epoch": 0.52, "grad_norm": 1.812116026878357, "learning_rate": 9.777105074608927e-06, "loss": 1.0212, "step": 9100 }, { "epoch": 0.52, "grad_norm": 1.8255603313446045, "learning_rate": 9.7752479284232e-06, "loss": 0.9661, "step": 9101 }, { "epoch": 0.52, "grad_norm": 1.671401858329773, "learning_rate": 9.773390789993038e-06, "loss": 0.8711, "step": 9102 }, { "epoch": 0.52, "grad_norm": 1.9694647789001465, "learning_rate": 9.771533659382528e-06, "loss": 0.9044, "step": 9103 }, { "epoch": 0.52, "grad_norm": 1.122879147529602, "learning_rate": 9.769676536655754e-06, "loss": 0.6526, "step": 9104 }, { "epoch": 0.52, "grad_norm": 1.8121559619903564, "learning_rate": 9.767819421876798e-06, "loss": 0.9984, "step": 9105 }, { "epoch": 0.52, "grad_norm": 1.6679710149765015, "learning_rate": 9.765962315109743e-06, "loss": 0.9182, "step": 9106 }, { "epoch": 0.52, "grad_norm": 1.7525192499160767, "learning_rate": 9.764105216418675e-06, "loss": 0.9105, "step": 9107 }, { "epoch": 0.52, "grad_norm": 1.7969474792480469, "learning_rate": 9.762248125867679e-06, "loss": 0.9791, "step": 9108 }, { "epoch": 0.52, "grad_norm": 1.665952205657959, "learning_rate": 9.760391043520837e-06, "loss": 0.8695, "step": 9109 }, { "epoch": 0.52, "grad_norm": 1.9379795789718628, "learning_rate": 9.758533969442229e-06, "loss": 0.9559, "step": 9110 }, { "epoch": 0.52, "grad_norm": 1.8320332765579224, "learning_rate": 9.75667690369594e-06, "loss": 0.9557, "step": 9111 }, { "epoch": 0.52, "grad_norm": 1.6530497074127197, "learning_rate": 9.754819846346051e-06, "loss": 0.9094, "step": 9112 }, { "epoch": 0.52, "grad_norm": 1.6904035806655884, "learning_rate": 9.752962797456645e-06, "loss": 0.8795, "step": 9113 }, { "epoch": 0.52, "grad_norm": 1.5185389518737793, "learning_rate": 9.751105757091802e-06, "loss": 0.902, "step": 9114 }, { "epoch": 0.52, "grad_norm": 1.8129384517669678, "learning_rate": 9.749248725315605e-06, "loss": 0.937, "step": 9115 }, { "epoch": 0.52, "grad_norm": 1.819229245185852, "learning_rate": 9.747391702192132e-06, "loss": 0.9767, "step": 9116 }, { "epoch": 0.52, "grad_norm": 1.6527334451675415, "learning_rate": 9.745534687785467e-06, "loss": 0.931, "step": 9117 }, { "epoch": 0.52, "grad_norm": 0.9471772313117981, "learning_rate": 9.74367768215969e-06, "loss": 0.5261, "step": 9118 }, { "epoch": 0.52, "grad_norm": 1.8168249130249023, "learning_rate": 9.741820685378876e-06, "loss": 0.9973, "step": 9119 }, { "epoch": 0.52, "grad_norm": 1.9497843980789185, "learning_rate": 9.739963697507113e-06, "loss": 0.9345, "step": 9120 }, { "epoch": 0.52, "grad_norm": 1.8046777248382568, "learning_rate": 9.738106718608475e-06, "loss": 0.974, "step": 9121 }, { "epoch": 0.52, "grad_norm": 1.705181360244751, "learning_rate": 9.736249748747045e-06, "loss": 0.9757, "step": 9122 }, { "epoch": 0.52, "grad_norm": 1.7673641443252563, "learning_rate": 9.734392787986896e-06, "loss": 0.8812, "step": 9123 }, { "epoch": 0.52, "grad_norm": 1.8405790328979492, "learning_rate": 9.732535836392112e-06, "loss": 0.924, "step": 9124 }, { "epoch": 0.52, "grad_norm": 1.811075210571289, "learning_rate": 9.730678894026769e-06, "loss": 0.9168, "step": 9125 }, { "epoch": 0.52, "grad_norm": 1.8107473850250244, "learning_rate": 9.728821960954945e-06, "loss": 0.9082, "step": 9126 }, { "epoch": 0.52, "grad_norm": 1.6464771032333374, "learning_rate": 9.726965037240718e-06, "loss": 0.9954, "step": 9127 }, { "epoch": 0.52, "grad_norm": 1.6136360168457031, "learning_rate": 9.725108122948163e-06, "loss": 0.8873, "step": 9128 }, { "epoch": 0.52, "grad_norm": 1.04267156124115, "learning_rate": 9.72325121814136e-06, "loss": 0.5504, "step": 9129 }, { "epoch": 0.52, "grad_norm": 1.7047576904296875, "learning_rate": 9.721394322884386e-06, "loss": 0.9303, "step": 9130 }, { "epoch": 0.52, "grad_norm": 1.710576057434082, "learning_rate": 9.719537437241311e-06, "loss": 0.8982, "step": 9131 }, { "epoch": 0.52, "grad_norm": 1.874295711517334, "learning_rate": 9.717680561276219e-06, "loss": 0.9556, "step": 9132 }, { "epoch": 0.52, "grad_norm": 1.632091999053955, "learning_rate": 9.715823695053182e-06, "loss": 0.9892, "step": 9133 }, { "epoch": 0.52, "grad_norm": 1.6833040714263916, "learning_rate": 9.713966838636277e-06, "loss": 0.9379, "step": 9134 }, { "epoch": 0.52, "grad_norm": 1.7168601751327515, "learning_rate": 9.712109992089575e-06, "loss": 0.9189, "step": 9135 }, { "epoch": 0.52, "grad_norm": 1.6759908199310303, "learning_rate": 9.710253155477154e-06, "loss": 0.9862, "step": 9136 }, { "epoch": 0.52, "grad_norm": 1.9267933368682861, "learning_rate": 9.708396328863087e-06, "loss": 1.0107, "step": 9137 }, { "epoch": 0.52, "grad_norm": 1.8829721212387085, "learning_rate": 9.706539512311448e-06, "loss": 0.9729, "step": 9138 }, { "epoch": 0.52, "grad_norm": 1.8684232234954834, "learning_rate": 9.70468270588631e-06, "loss": 0.952, "step": 9139 }, { "epoch": 0.52, "grad_norm": 1.7286484241485596, "learning_rate": 9.702825909651748e-06, "loss": 1.0044, "step": 9140 }, { "epoch": 0.52, "grad_norm": 1.702661395072937, "learning_rate": 9.700969123671834e-06, "loss": 1.0631, "step": 9141 }, { "epoch": 0.52, "grad_norm": 1.8028011322021484, "learning_rate": 9.69911234801064e-06, "loss": 0.9915, "step": 9142 }, { "epoch": 0.52, "grad_norm": 1.9446077346801758, "learning_rate": 9.697255582732235e-06, "loss": 1.0217, "step": 9143 }, { "epoch": 0.52, "grad_norm": 1.7118337154388428, "learning_rate": 9.695398827900699e-06, "loss": 0.9008, "step": 9144 }, { "epoch": 0.52, "grad_norm": 1.6389646530151367, "learning_rate": 9.693542083580096e-06, "loss": 0.9112, "step": 9145 }, { "epoch": 0.52, "grad_norm": 1.8176825046539307, "learning_rate": 9.691685349834499e-06, "loss": 0.9228, "step": 9146 }, { "epoch": 0.52, "grad_norm": 1.8787474632263184, "learning_rate": 9.68982862672798e-06, "loss": 0.9737, "step": 9147 }, { "epoch": 0.52, "grad_norm": 1.841701865196228, "learning_rate": 9.687971914324607e-06, "loss": 0.9943, "step": 9148 }, { "epoch": 0.52, "grad_norm": 1.8081611394882202, "learning_rate": 9.686115212688455e-06, "loss": 0.9862, "step": 9149 }, { "epoch": 0.52, "grad_norm": 1.6243294477462769, "learning_rate": 9.684258521883589e-06, "loss": 1.0066, "step": 9150 }, { "epoch": 0.52, "grad_norm": 1.7476487159729004, "learning_rate": 9.68240184197408e-06, "loss": 0.948, "step": 9151 }, { "epoch": 0.52, "grad_norm": 1.863942265510559, "learning_rate": 9.680545173023996e-06, "loss": 0.9946, "step": 9152 }, { "epoch": 0.52, "grad_norm": 1.7008764743804932, "learning_rate": 9.678688515097405e-06, "loss": 1.0009, "step": 9153 }, { "epoch": 0.52, "grad_norm": 1.732004165649414, "learning_rate": 9.676831868258377e-06, "loss": 0.9444, "step": 9154 }, { "epoch": 0.53, "grad_norm": 1.787692904472351, "learning_rate": 9.674975232570978e-06, "loss": 0.8751, "step": 9155 }, { "epoch": 0.53, "grad_norm": 1.7370622158050537, "learning_rate": 9.673118608099276e-06, "loss": 0.9743, "step": 9156 }, { "epoch": 0.53, "grad_norm": 1.7364182472229004, "learning_rate": 9.671261994907337e-06, "loss": 0.9604, "step": 9157 }, { "epoch": 0.53, "grad_norm": 1.56843101978302, "learning_rate": 9.669405393059228e-06, "loss": 0.8853, "step": 9158 }, { "epoch": 0.53, "grad_norm": 1.878078818321228, "learning_rate": 9.667548802619018e-06, "loss": 1.0337, "step": 9159 }, { "epoch": 0.53, "grad_norm": 1.0043394565582275, "learning_rate": 9.665692223650765e-06, "loss": 0.5717, "step": 9160 }, { "epoch": 0.53, "grad_norm": 1.8545775413513184, "learning_rate": 9.663835656218545e-06, "loss": 1.0082, "step": 9161 }, { "epoch": 0.53, "grad_norm": 1.8088265657424927, "learning_rate": 9.661979100386414e-06, "loss": 0.9772, "step": 9162 }, { "epoch": 0.53, "grad_norm": 1.8426729440689087, "learning_rate": 9.660122556218441e-06, "loss": 0.9882, "step": 9163 }, { "epoch": 0.53, "grad_norm": 1.5606706142425537, "learning_rate": 9.658266023778689e-06, "loss": 0.9362, "step": 9164 }, { "epoch": 0.53, "grad_norm": 1.7727643251419067, "learning_rate": 9.656409503131224e-06, "loss": 0.9656, "step": 9165 }, { "epoch": 0.53, "grad_norm": 1.7324577569961548, "learning_rate": 9.654552994340104e-06, "loss": 0.9218, "step": 9166 }, { "epoch": 0.53, "grad_norm": 1.7717936038970947, "learning_rate": 9.652696497469398e-06, "loss": 0.9775, "step": 9167 }, { "epoch": 0.53, "grad_norm": 2.023068428039551, "learning_rate": 9.650840012583164e-06, "loss": 0.9217, "step": 9168 }, { "epoch": 0.53, "grad_norm": 1.6869587898254395, "learning_rate": 9.648983539745468e-06, "loss": 0.9897, "step": 9169 }, { "epoch": 0.53, "grad_norm": 1.8586260080337524, "learning_rate": 9.647127079020368e-06, "loss": 0.977, "step": 9170 }, { "epoch": 0.53, "grad_norm": 1.7261173725128174, "learning_rate": 9.645270630471927e-06, "loss": 1.013, "step": 9171 }, { "epoch": 0.53, "grad_norm": 1.6706613302230835, "learning_rate": 9.643414194164205e-06, "loss": 0.9789, "step": 9172 }, { "epoch": 0.53, "grad_norm": 1.6840660572052002, "learning_rate": 9.64155777016126e-06, "loss": 0.8984, "step": 9173 }, { "epoch": 0.53, "grad_norm": 1.7468554973602295, "learning_rate": 9.639701358527159e-06, "loss": 1.0471, "step": 9174 }, { "epoch": 0.53, "grad_norm": 1.770385980606079, "learning_rate": 9.637844959325958e-06, "loss": 0.9114, "step": 9175 }, { "epoch": 0.53, "grad_norm": 1.6317611932754517, "learning_rate": 9.635988572621716e-06, "loss": 0.9143, "step": 9176 }, { "epoch": 0.53, "grad_norm": 1.7013816833496094, "learning_rate": 9.63413219847849e-06, "loss": 0.9388, "step": 9177 }, { "epoch": 0.53, "grad_norm": 1.7648004293441772, "learning_rate": 9.632275836960339e-06, "loss": 0.9616, "step": 9178 }, { "epoch": 0.53, "grad_norm": 1.6768865585327148, "learning_rate": 9.630419488131324e-06, "loss": 0.8414, "step": 9179 }, { "epoch": 0.53, "grad_norm": 1.8724957704544067, "learning_rate": 9.628563152055499e-06, "loss": 0.9046, "step": 9180 }, { "epoch": 0.53, "grad_norm": 1.067582368850708, "learning_rate": 9.626706828796923e-06, "loss": 0.5963, "step": 9181 }, { "epoch": 0.53, "grad_norm": 1.7324275970458984, "learning_rate": 9.624850518419651e-06, "loss": 1.0006, "step": 9182 }, { "epoch": 0.53, "grad_norm": 1.6639113426208496, "learning_rate": 9.62299422098774e-06, "loss": 0.96, "step": 9183 }, { "epoch": 0.53, "grad_norm": 1.666750431060791, "learning_rate": 9.621137936565244e-06, "loss": 0.8709, "step": 9184 }, { "epoch": 0.53, "grad_norm": 1.788880705833435, "learning_rate": 9.619281665216218e-06, "loss": 0.9644, "step": 9185 }, { "epoch": 0.53, "grad_norm": 1.9265905618667603, "learning_rate": 9.61742540700472e-06, "loss": 0.9225, "step": 9186 }, { "epoch": 0.53, "grad_norm": 1.6256499290466309, "learning_rate": 9.615569161994804e-06, "loss": 1.0594, "step": 9187 }, { "epoch": 0.53, "grad_norm": 1.769329309463501, "learning_rate": 9.613712930250521e-06, "loss": 0.8919, "step": 9188 }, { "epoch": 0.53, "grad_norm": 1.7993172407150269, "learning_rate": 9.611856711835926e-06, "loss": 0.9707, "step": 9189 }, { "epoch": 0.53, "grad_norm": 1.7590276002883911, "learning_rate": 9.610000506815072e-06, "loss": 0.9534, "step": 9190 }, { "epoch": 0.53, "grad_norm": 1.7609660625457764, "learning_rate": 9.608144315252011e-06, "loss": 0.9863, "step": 9191 }, { "epoch": 0.53, "grad_norm": 1.6544277667999268, "learning_rate": 9.606288137210795e-06, "loss": 0.9516, "step": 9192 }, { "epoch": 0.53, "grad_norm": 1.1204427480697632, "learning_rate": 9.604431972755477e-06, "loss": 0.6416, "step": 9193 }, { "epoch": 0.53, "grad_norm": 1.7792524099349976, "learning_rate": 9.602575821950105e-06, "loss": 0.9991, "step": 9194 }, { "epoch": 0.53, "grad_norm": 1.770972490310669, "learning_rate": 9.60071968485873e-06, "loss": 0.9462, "step": 9195 }, { "epoch": 0.53, "grad_norm": 1.6536386013031006, "learning_rate": 9.598863561545404e-06, "loss": 0.9621, "step": 9196 }, { "epoch": 0.53, "grad_norm": 1.7517553567886353, "learning_rate": 9.597007452074175e-06, "loss": 0.9583, "step": 9197 }, { "epoch": 0.53, "grad_norm": 1.7854695320129395, "learning_rate": 9.595151356509095e-06, "loss": 0.8949, "step": 9198 }, { "epoch": 0.53, "grad_norm": 2.052421808242798, "learning_rate": 9.59329527491421e-06, "loss": 1.0236, "step": 9199 }, { "epoch": 0.53, "grad_norm": 1.8106080293655396, "learning_rate": 9.59143920735357e-06, "loss": 0.9872, "step": 9200 }, { "epoch": 0.53, "grad_norm": 1.841724157333374, "learning_rate": 9.589583153891222e-06, "loss": 1.0363, "step": 9201 }, { "epoch": 0.53, "grad_norm": 1.870548963546753, "learning_rate": 9.587727114591212e-06, "loss": 1.0104, "step": 9202 }, { "epoch": 0.53, "grad_norm": 1.8253833055496216, "learning_rate": 9.585871089517588e-06, "loss": 0.9651, "step": 9203 }, { "epoch": 0.53, "grad_norm": 1.6802380084991455, "learning_rate": 9.584015078734395e-06, "loss": 1.0274, "step": 9204 }, { "epoch": 0.53, "grad_norm": 1.7654331922531128, "learning_rate": 9.58215908230568e-06, "loss": 1.0008, "step": 9205 }, { "epoch": 0.53, "grad_norm": 2.0187466144561768, "learning_rate": 9.58030310029549e-06, "loss": 0.915, "step": 9206 }, { "epoch": 0.53, "grad_norm": 1.7666550874710083, "learning_rate": 9.578447132767866e-06, "loss": 0.9878, "step": 9207 }, { "epoch": 0.53, "grad_norm": 1.8300156593322754, "learning_rate": 9.576591179786852e-06, "loss": 1.0434, "step": 9208 }, { "epoch": 0.53, "grad_norm": 2.0231473445892334, "learning_rate": 9.574735241416495e-06, "loss": 1.0109, "step": 9209 }, { "epoch": 0.53, "grad_norm": 2.1398117542266846, "learning_rate": 9.572879317720836e-06, "loss": 0.9557, "step": 9210 }, { "epoch": 0.53, "grad_norm": 1.8345195055007935, "learning_rate": 9.57102340876392e-06, "loss": 0.8996, "step": 9211 }, { "epoch": 0.53, "grad_norm": 1.0773444175720215, "learning_rate": 9.569167514609786e-06, "loss": 0.6073, "step": 9212 }, { "epoch": 0.53, "grad_norm": 1.5026386976242065, "learning_rate": 9.567311635322479e-06, "loss": 0.851, "step": 9213 }, { "epoch": 0.53, "grad_norm": 1.7157379388809204, "learning_rate": 9.565455770966036e-06, "loss": 0.9678, "step": 9214 }, { "epoch": 0.53, "grad_norm": 1.8854520320892334, "learning_rate": 9.563599921604505e-06, "loss": 0.9765, "step": 9215 }, { "epoch": 0.53, "grad_norm": 1.846970558166504, "learning_rate": 9.56174408730192e-06, "loss": 0.9221, "step": 9216 }, { "epoch": 0.53, "grad_norm": 1.7136763334274292, "learning_rate": 9.559888268122323e-06, "loss": 0.853, "step": 9217 }, { "epoch": 0.53, "grad_norm": 1.8666157722473145, "learning_rate": 9.558032464129752e-06, "loss": 0.9567, "step": 9218 }, { "epoch": 0.53, "grad_norm": 1.773330569267273, "learning_rate": 9.556176675388245e-06, "loss": 1.0328, "step": 9219 }, { "epoch": 0.53, "grad_norm": 1.7969763278961182, "learning_rate": 9.554320901961844e-06, "loss": 0.9848, "step": 9220 }, { "epoch": 0.53, "grad_norm": 0.9880284667015076, "learning_rate": 9.55246514391458e-06, "loss": 0.5913, "step": 9221 }, { "epoch": 0.53, "grad_norm": 1.7320494651794434, "learning_rate": 9.550609401310498e-06, "loss": 0.954, "step": 9222 }, { "epoch": 0.53, "grad_norm": 1.727244257926941, "learning_rate": 9.548753674213627e-06, "loss": 0.9452, "step": 9223 }, { "epoch": 0.53, "grad_norm": 1.803238034248352, "learning_rate": 9.546897962688007e-06, "loss": 1.0112, "step": 9224 }, { "epoch": 0.53, "grad_norm": 1.8354918956756592, "learning_rate": 9.545042266797675e-06, "loss": 0.9314, "step": 9225 }, { "epoch": 0.53, "grad_norm": 1.7105672359466553, "learning_rate": 9.543186586606657e-06, "loss": 0.9461, "step": 9226 }, { "epoch": 0.53, "grad_norm": 1.7394497394561768, "learning_rate": 9.541330922178998e-06, "loss": 0.9825, "step": 9227 }, { "epoch": 0.53, "grad_norm": 1.9199997186660767, "learning_rate": 9.539475273578729e-06, "loss": 0.9236, "step": 9228 }, { "epoch": 0.53, "grad_norm": 1.910712480545044, "learning_rate": 9.53761964086988e-06, "loss": 1.0679, "step": 9229 }, { "epoch": 0.53, "grad_norm": 1.696234107017517, "learning_rate": 9.535764024116488e-06, "loss": 0.9596, "step": 9230 }, { "epoch": 0.53, "grad_norm": 1.876043677330017, "learning_rate": 9.53390842338258e-06, "loss": 0.9485, "step": 9231 }, { "epoch": 0.53, "grad_norm": 1.908756136894226, "learning_rate": 9.53205283873219e-06, "loss": 0.9829, "step": 9232 }, { "epoch": 0.53, "grad_norm": 1.6808531284332275, "learning_rate": 9.530197270229352e-06, "loss": 0.9752, "step": 9233 }, { "epoch": 0.53, "grad_norm": 1.7408236265182495, "learning_rate": 9.528341717938093e-06, "loss": 0.9499, "step": 9234 }, { "epoch": 0.53, "grad_norm": 1.89076566696167, "learning_rate": 9.526486181922443e-06, "loss": 0.9591, "step": 9235 }, { "epoch": 0.53, "grad_norm": 1.696572184562683, "learning_rate": 9.524630662246432e-06, "loss": 0.939, "step": 9236 }, { "epoch": 0.53, "grad_norm": 1.7089451551437378, "learning_rate": 9.522775158974091e-06, "loss": 0.9423, "step": 9237 }, { "epoch": 0.53, "grad_norm": 1.6499236822128296, "learning_rate": 9.520919672169444e-06, "loss": 0.8797, "step": 9238 }, { "epoch": 0.53, "grad_norm": 1.7998429536819458, "learning_rate": 9.51906420189652e-06, "loss": 0.9286, "step": 9239 }, { "epoch": 0.53, "grad_norm": 1.6627498865127563, "learning_rate": 9.517208748219347e-06, "loss": 0.9495, "step": 9240 }, { "epoch": 0.53, "grad_norm": 1.7416205406188965, "learning_rate": 9.515353311201953e-06, "loss": 0.9525, "step": 9241 }, { "epoch": 0.53, "grad_norm": 1.7147578001022339, "learning_rate": 9.513497890908363e-06, "loss": 0.9136, "step": 9242 }, { "epoch": 0.53, "grad_norm": 1.7469813823699951, "learning_rate": 9.511642487402601e-06, "loss": 0.9336, "step": 9243 }, { "epoch": 0.53, "grad_norm": 1.8452692031860352, "learning_rate": 9.509787100748693e-06, "loss": 0.9226, "step": 9244 }, { "epoch": 0.53, "grad_norm": 1.7484073638916016, "learning_rate": 9.507931731010663e-06, "loss": 0.979, "step": 9245 }, { "epoch": 0.53, "grad_norm": 1.7768722772598267, "learning_rate": 9.506076378252535e-06, "loss": 0.9941, "step": 9246 }, { "epoch": 0.53, "grad_norm": 1.7821413278579712, "learning_rate": 9.50422104253833e-06, "loss": 0.9402, "step": 9247 }, { "epoch": 0.53, "grad_norm": 1.7205201387405396, "learning_rate": 9.502365723932072e-06, "loss": 0.9296, "step": 9248 }, { "epoch": 0.53, "grad_norm": 1.6679078340530396, "learning_rate": 9.500510422497783e-06, "loss": 0.9417, "step": 9249 }, { "epoch": 0.53, "grad_norm": 1.790339469909668, "learning_rate": 9.498655138299484e-06, "loss": 0.9388, "step": 9250 }, { "epoch": 0.53, "grad_norm": 1.8243279457092285, "learning_rate": 9.496799871401195e-06, "loss": 0.9771, "step": 9251 }, { "epoch": 0.53, "grad_norm": 1.116796851158142, "learning_rate": 9.494944621866938e-06, "loss": 0.6068, "step": 9252 }, { "epoch": 0.53, "grad_norm": 1.671281337738037, "learning_rate": 9.49308938976073e-06, "loss": 0.9475, "step": 9253 }, { "epoch": 0.53, "grad_norm": 1.5734124183654785, "learning_rate": 9.491234175146592e-06, "loss": 0.9178, "step": 9254 }, { "epoch": 0.53, "grad_norm": 1.646193265914917, "learning_rate": 9.489378978088542e-06, "loss": 1.0565, "step": 9255 }, { "epoch": 0.53, "grad_norm": 1.7536954879760742, "learning_rate": 9.487523798650596e-06, "loss": 1.0017, "step": 9256 }, { "epoch": 0.53, "grad_norm": 1.7661257982254028, "learning_rate": 9.485668636896774e-06, "loss": 0.9342, "step": 9257 }, { "epoch": 0.53, "grad_norm": 1.7019902467727661, "learning_rate": 9.48381349289109e-06, "loss": 1.0087, "step": 9258 }, { "epoch": 0.53, "grad_norm": 1.738187551498413, "learning_rate": 9.48195836669756e-06, "loss": 0.961, "step": 9259 }, { "epoch": 0.53, "grad_norm": 1.7771666049957275, "learning_rate": 9.480103258380198e-06, "loss": 0.986, "step": 9260 }, { "epoch": 0.53, "grad_norm": 1.7559185028076172, "learning_rate": 9.478248168003022e-06, "loss": 1.0406, "step": 9261 }, { "epoch": 0.53, "grad_norm": 1.7301557064056396, "learning_rate": 9.476393095630046e-06, "loss": 0.9277, "step": 9262 }, { "epoch": 0.53, "grad_norm": 1.8342472314834595, "learning_rate": 9.474538041325277e-06, "loss": 0.9542, "step": 9263 }, { "epoch": 0.53, "grad_norm": 1.74834406375885, "learning_rate": 9.472683005152735e-06, "loss": 0.8808, "step": 9264 }, { "epoch": 0.53, "grad_norm": 1.6324621438980103, "learning_rate": 9.47082798717643e-06, "loss": 0.9081, "step": 9265 }, { "epoch": 0.53, "grad_norm": 1.949791431427002, "learning_rate": 9.468972987460372e-06, "loss": 0.9579, "step": 9266 }, { "epoch": 0.53, "grad_norm": 1.8759971857070923, "learning_rate": 9.467118006068575e-06, "loss": 1.0282, "step": 9267 }, { "epoch": 0.53, "grad_norm": 1.6768170595169067, "learning_rate": 9.465263043065045e-06, "loss": 0.9918, "step": 9268 }, { "epoch": 0.53, "grad_norm": 1.8442323207855225, "learning_rate": 9.463408098513794e-06, "loss": 0.9271, "step": 9269 }, { "epoch": 0.53, "grad_norm": 1.8389896154403687, "learning_rate": 9.461553172478831e-06, "loss": 1.0064, "step": 9270 }, { "epoch": 0.53, "grad_norm": 1.6300525665283203, "learning_rate": 9.459698265024164e-06, "loss": 0.9981, "step": 9271 }, { "epoch": 0.53, "grad_norm": 1.8235679864883423, "learning_rate": 9.4578433762138e-06, "loss": 0.9891, "step": 9272 }, { "epoch": 0.53, "grad_norm": 1.0574673414230347, "learning_rate": 9.455988506111747e-06, "loss": 0.6068, "step": 9273 }, { "epoch": 0.53, "grad_norm": 1.6184523105621338, "learning_rate": 9.454133654782011e-06, "loss": 0.9987, "step": 9274 }, { "epoch": 0.53, "grad_norm": 1.1272081136703491, "learning_rate": 9.452278822288597e-06, "loss": 0.6166, "step": 9275 }, { "epoch": 0.53, "grad_norm": 1.8617305755615234, "learning_rate": 9.45042400869551e-06, "loss": 1.0194, "step": 9276 }, { "epoch": 0.53, "grad_norm": 1.6669964790344238, "learning_rate": 9.448569214066757e-06, "loss": 0.9676, "step": 9277 }, { "epoch": 0.53, "grad_norm": 0.9764981865882874, "learning_rate": 9.446714438466338e-06, "loss": 0.5387, "step": 9278 }, { "epoch": 0.53, "grad_norm": 1.910941481590271, "learning_rate": 9.44485968195826e-06, "loss": 0.9878, "step": 9279 }, { "epoch": 0.53, "grad_norm": 1.9592466354370117, "learning_rate": 9.443004944606522e-06, "loss": 0.9693, "step": 9280 }, { "epoch": 0.53, "grad_norm": 1.9266902208328247, "learning_rate": 9.44115022647513e-06, "loss": 1.0737, "step": 9281 }, { "epoch": 0.53, "grad_norm": 1.9121421575546265, "learning_rate": 9.439295527628083e-06, "loss": 0.9761, "step": 9282 }, { "epoch": 0.53, "grad_norm": 1.0599453449249268, "learning_rate": 9.437440848129377e-06, "loss": 0.6047, "step": 9283 }, { "epoch": 0.53, "grad_norm": 1.684749960899353, "learning_rate": 9.43558618804302e-06, "loss": 0.9396, "step": 9284 }, { "epoch": 0.53, "grad_norm": 1.7958449125289917, "learning_rate": 9.433731547433007e-06, "loss": 0.9947, "step": 9285 }, { "epoch": 0.53, "grad_norm": 2.284531593322754, "learning_rate": 9.431876926363335e-06, "loss": 0.9178, "step": 9286 }, { "epoch": 0.53, "grad_norm": 1.710998296737671, "learning_rate": 9.430022324898003e-06, "loss": 0.9176, "step": 9287 }, { "epoch": 0.53, "grad_norm": 1.6571248769760132, "learning_rate": 9.428167743101009e-06, "loss": 0.9121, "step": 9288 }, { "epoch": 0.53, "grad_norm": 1.716275691986084, "learning_rate": 9.426313181036349e-06, "loss": 0.9716, "step": 9289 }, { "epoch": 0.53, "grad_norm": 1.6823360919952393, "learning_rate": 9.424458638768018e-06, "loss": 0.8511, "step": 9290 }, { "epoch": 0.53, "grad_norm": 1.5675513744354248, "learning_rate": 9.422604116360012e-06, "loss": 0.9822, "step": 9291 }, { "epoch": 0.53, "grad_norm": 1.7278447151184082, "learning_rate": 9.420749613876326e-06, "loss": 0.8702, "step": 9292 }, { "epoch": 0.53, "grad_norm": 1.7466652393341064, "learning_rate": 9.41889513138095e-06, "loss": 0.9253, "step": 9293 }, { "epoch": 0.53, "grad_norm": 1.6461753845214844, "learning_rate": 9.417040668937881e-06, "loss": 0.9027, "step": 9294 }, { "epoch": 0.53, "grad_norm": 1.678087592124939, "learning_rate": 9.415186226611111e-06, "loss": 0.8641, "step": 9295 }, { "epoch": 0.53, "grad_norm": 1.0825157165527344, "learning_rate": 9.41333180446463e-06, "loss": 0.5885, "step": 9296 }, { "epoch": 0.53, "grad_norm": 1.754571557044983, "learning_rate": 9.411477402562432e-06, "loss": 0.9902, "step": 9297 }, { "epoch": 0.53, "grad_norm": 1.7567294836044312, "learning_rate": 9.409623020968502e-06, "loss": 0.9742, "step": 9298 }, { "epoch": 0.53, "grad_norm": 1.7099295854568481, "learning_rate": 9.407768659746833e-06, "loss": 0.9486, "step": 9299 }, { "epoch": 0.53, "grad_norm": 1.8363503217697144, "learning_rate": 9.405914318961414e-06, "loss": 0.9886, "step": 9300 }, { "epoch": 0.53, "grad_norm": 1.8244668245315552, "learning_rate": 9.404059998676232e-06, "loss": 0.9367, "step": 9301 }, { "epoch": 0.53, "grad_norm": 1.678518533706665, "learning_rate": 9.402205698955274e-06, "loss": 0.9928, "step": 9302 }, { "epoch": 0.53, "grad_norm": 1.814306616783142, "learning_rate": 9.40035141986253e-06, "loss": 0.9596, "step": 9303 }, { "epoch": 0.53, "grad_norm": 1.6924738883972168, "learning_rate": 9.398497161461981e-06, "loss": 0.867, "step": 9304 }, { "epoch": 0.53, "grad_norm": 1.6738865375518799, "learning_rate": 9.396642923817613e-06, "loss": 1.0128, "step": 9305 }, { "epoch": 0.53, "grad_norm": 1.6957411766052246, "learning_rate": 9.394788706993414e-06, "loss": 0.8222, "step": 9306 }, { "epoch": 0.53, "grad_norm": 1.9628829956054688, "learning_rate": 9.392934511053367e-06, "loss": 0.9796, "step": 9307 }, { "epoch": 0.53, "grad_norm": 1.9452887773513794, "learning_rate": 9.391080336061454e-06, "loss": 1.0289, "step": 9308 }, { "epoch": 0.53, "grad_norm": 1.852339744567871, "learning_rate": 9.38922618208166e-06, "loss": 1.0154, "step": 9309 }, { "epoch": 0.53, "grad_norm": 1.5806230306625366, "learning_rate": 9.387372049177961e-06, "loss": 0.9014, "step": 9310 }, { "epoch": 0.53, "grad_norm": 1.6444921493530273, "learning_rate": 9.385517937414341e-06, "loss": 1.0151, "step": 9311 }, { "epoch": 0.53, "grad_norm": 1.7565041780471802, "learning_rate": 9.383663846854782e-06, "loss": 0.9015, "step": 9312 }, { "epoch": 0.53, "grad_norm": 1.6916500329971313, "learning_rate": 9.381809777563262e-06, "loss": 0.956, "step": 9313 }, { "epoch": 0.53, "grad_norm": 1.6737329959869385, "learning_rate": 9.37995572960376e-06, "loss": 1.0323, "step": 9314 }, { "epoch": 0.53, "grad_norm": 1.6440950632095337, "learning_rate": 9.378101703040254e-06, "loss": 0.9892, "step": 9315 }, { "epoch": 0.53, "grad_norm": 1.7503169775009155, "learning_rate": 9.376247697936719e-06, "loss": 0.9663, "step": 9316 }, { "epoch": 0.53, "grad_norm": 1.6544078588485718, "learning_rate": 9.374393714357132e-06, "loss": 0.9875, "step": 9317 }, { "epoch": 0.53, "grad_norm": 1.6965941190719604, "learning_rate": 9.372539752365474e-06, "loss": 0.9113, "step": 9318 }, { "epoch": 0.53, "grad_norm": 1.6590759754180908, "learning_rate": 9.370685812025716e-06, "loss": 0.8929, "step": 9319 }, { "epoch": 0.53, "grad_norm": 1.8665412664413452, "learning_rate": 9.368831893401832e-06, "loss": 0.9715, "step": 9320 }, { "epoch": 0.53, "grad_norm": 1.6794768571853638, "learning_rate": 9.366977996557797e-06, "loss": 0.9558, "step": 9321 }, { "epoch": 0.53, "grad_norm": 1.7764272689819336, "learning_rate": 9.365124121557583e-06, "loss": 0.9906, "step": 9322 }, { "epoch": 0.53, "grad_norm": 2.009660243988037, "learning_rate": 9.363270268465162e-06, "loss": 0.9514, "step": 9323 }, { "epoch": 0.53, "grad_norm": 1.7077337503433228, "learning_rate": 9.361416437344504e-06, "loss": 0.9117, "step": 9324 }, { "epoch": 0.53, "grad_norm": 1.8543405532836914, "learning_rate": 9.359562628259582e-06, "loss": 0.9906, "step": 9325 }, { "epoch": 0.53, "grad_norm": 1.6126928329467773, "learning_rate": 9.357708841274365e-06, "loss": 0.9519, "step": 9326 }, { "epoch": 0.53, "grad_norm": 2.1099298000335693, "learning_rate": 9.355855076452823e-06, "loss": 0.9479, "step": 9327 }, { "epoch": 0.53, "grad_norm": 1.7311211824417114, "learning_rate": 9.35400133385892e-06, "loss": 0.9436, "step": 9328 }, { "epoch": 0.54, "grad_norm": 1.7908272743225098, "learning_rate": 9.352147613556626e-06, "loss": 0.9988, "step": 9329 }, { "epoch": 0.54, "grad_norm": 1.7593470811843872, "learning_rate": 9.35029391560991e-06, "loss": 0.9288, "step": 9330 }, { "epoch": 0.54, "grad_norm": 1.6753427982330322, "learning_rate": 9.348440240082737e-06, "loss": 0.9635, "step": 9331 }, { "epoch": 0.54, "grad_norm": 1.61228609085083, "learning_rate": 9.34658658703907e-06, "loss": 0.9349, "step": 9332 }, { "epoch": 0.54, "grad_norm": 1.681319236755371, "learning_rate": 9.344732956542874e-06, "loss": 0.9856, "step": 9333 }, { "epoch": 0.54, "grad_norm": 1.7990999221801758, "learning_rate": 9.342879348658115e-06, "loss": 1.0457, "step": 9334 }, { "epoch": 0.54, "grad_norm": 1.905035138130188, "learning_rate": 9.341025763448753e-06, "loss": 0.9639, "step": 9335 }, { "epoch": 0.54, "grad_norm": 2.023582935333252, "learning_rate": 9.339172200978752e-06, "loss": 1.0008, "step": 9336 }, { "epoch": 0.54, "grad_norm": 1.716108798980713, "learning_rate": 9.337318661312072e-06, "loss": 0.9706, "step": 9337 }, { "epoch": 0.54, "grad_norm": 1.643920660018921, "learning_rate": 9.335465144512674e-06, "loss": 0.9526, "step": 9338 }, { "epoch": 0.54, "grad_norm": 1.7576969861984253, "learning_rate": 9.333611650644518e-06, "loss": 0.9388, "step": 9339 }, { "epoch": 0.54, "grad_norm": 1.0834410190582275, "learning_rate": 9.331758179771562e-06, "loss": 0.593, "step": 9340 }, { "epoch": 0.54, "grad_norm": 1.848089337348938, "learning_rate": 9.329904731957761e-06, "loss": 1.021, "step": 9341 }, { "epoch": 0.54, "grad_norm": 1.681815266609192, "learning_rate": 9.328051307267079e-06, "loss": 0.897, "step": 9342 }, { "epoch": 0.54, "grad_norm": 1.6038150787353516, "learning_rate": 9.32619790576347e-06, "loss": 0.965, "step": 9343 }, { "epoch": 0.54, "grad_norm": 1.6405913829803467, "learning_rate": 9.324344527510886e-06, "loss": 0.9329, "step": 9344 }, { "epoch": 0.54, "grad_norm": 1.6053853034973145, "learning_rate": 9.32249117257329e-06, "loss": 0.968, "step": 9345 }, { "epoch": 0.54, "grad_norm": 2.1093873977661133, "learning_rate": 9.320637841014625e-06, "loss": 0.9681, "step": 9346 }, { "epoch": 0.54, "grad_norm": 1.633998990058899, "learning_rate": 9.318784532898855e-06, "loss": 0.9681, "step": 9347 }, { "epoch": 0.54, "grad_norm": 1.8004143238067627, "learning_rate": 9.316931248289926e-06, "loss": 0.9307, "step": 9348 }, { "epoch": 0.54, "grad_norm": 1.7232537269592285, "learning_rate": 9.315077987251793e-06, "loss": 0.9118, "step": 9349 }, { "epoch": 0.54, "grad_norm": 1.8785570859909058, "learning_rate": 9.313224749848405e-06, "loss": 0.9349, "step": 9350 }, { "epoch": 0.54, "grad_norm": 1.798021674156189, "learning_rate": 9.311371536143713e-06, "loss": 0.949, "step": 9351 }, { "epoch": 0.54, "grad_norm": 1.9915432929992676, "learning_rate": 9.309518346201665e-06, "loss": 0.9075, "step": 9352 }, { "epoch": 0.54, "grad_norm": 1.695725440979004, "learning_rate": 9.30766518008621e-06, "loss": 0.9895, "step": 9353 }, { "epoch": 0.54, "grad_norm": 2.0562973022460938, "learning_rate": 9.305812037861296e-06, "loss": 0.9156, "step": 9354 }, { "epoch": 0.54, "grad_norm": 1.8085405826568604, "learning_rate": 9.30395891959087e-06, "loss": 0.9798, "step": 9355 }, { "epoch": 0.54, "grad_norm": 1.8736507892608643, "learning_rate": 9.302105825338876e-06, "loss": 0.9638, "step": 9356 }, { "epoch": 0.54, "grad_norm": 1.645461082458496, "learning_rate": 9.300252755169261e-06, "loss": 1.0142, "step": 9357 }, { "epoch": 0.54, "grad_norm": 1.8870527744293213, "learning_rate": 9.29839970914597e-06, "loss": 0.9952, "step": 9358 }, { "epoch": 0.54, "grad_norm": 1.7292526960372925, "learning_rate": 9.296546687332941e-06, "loss": 0.8701, "step": 9359 }, { "epoch": 0.54, "grad_norm": 1.8257315158843994, "learning_rate": 9.294693689794123e-06, "loss": 0.988, "step": 9360 }, { "epoch": 0.54, "grad_norm": 1.8182116746902466, "learning_rate": 9.292840716593458e-06, "loss": 0.9103, "step": 9361 }, { "epoch": 0.54, "grad_norm": 1.8039677143096924, "learning_rate": 9.290987767794883e-06, "loss": 0.9649, "step": 9362 }, { "epoch": 0.54, "grad_norm": 1.6090418100357056, "learning_rate": 9.28913484346234e-06, "loss": 0.9163, "step": 9363 }, { "epoch": 0.54, "grad_norm": 1.694377064704895, "learning_rate": 9.287281943659767e-06, "loss": 0.9383, "step": 9364 }, { "epoch": 0.54, "grad_norm": 1.7505521774291992, "learning_rate": 9.285429068451103e-06, "loss": 1.0301, "step": 9365 }, { "epoch": 0.54, "grad_norm": 1.6647368669509888, "learning_rate": 9.283576217900286e-06, "loss": 0.9736, "step": 9366 }, { "epoch": 0.54, "grad_norm": 1.736624002456665, "learning_rate": 9.281723392071254e-06, "loss": 0.9525, "step": 9367 }, { "epoch": 0.54, "grad_norm": 1.7116786241531372, "learning_rate": 9.279870591027939e-06, "loss": 0.9935, "step": 9368 }, { "epoch": 0.54, "grad_norm": 1.7518374919891357, "learning_rate": 9.27801781483428e-06, "loss": 0.9534, "step": 9369 }, { "epoch": 0.54, "grad_norm": 1.7609783411026, "learning_rate": 9.27616506355421e-06, "loss": 0.9716, "step": 9370 }, { "epoch": 0.54, "grad_norm": 1.8115838766098022, "learning_rate": 9.274312337251658e-06, "loss": 0.9897, "step": 9371 }, { "epoch": 0.54, "grad_norm": 1.8622715473175049, "learning_rate": 9.272459635990563e-06, "loss": 0.9372, "step": 9372 }, { "epoch": 0.54, "grad_norm": 1.8954293727874756, "learning_rate": 9.270606959834853e-06, "loss": 0.9923, "step": 9373 }, { "epoch": 0.54, "grad_norm": 1.6513603925704956, "learning_rate": 9.26875430884846e-06, "loss": 0.8766, "step": 9374 }, { "epoch": 0.54, "grad_norm": 1.7509721517562866, "learning_rate": 9.266901683095313e-06, "loss": 0.9584, "step": 9375 }, { "epoch": 0.54, "grad_norm": 1.7485777139663696, "learning_rate": 9.26504908263934e-06, "loss": 0.967, "step": 9376 }, { "epoch": 0.54, "grad_norm": 1.8830461502075195, "learning_rate": 9.263196507544472e-06, "loss": 0.9521, "step": 9377 }, { "epoch": 0.54, "grad_norm": 1.940224051475525, "learning_rate": 9.261343957874633e-06, "loss": 0.9947, "step": 9378 }, { "epoch": 0.54, "grad_norm": 1.9655793905258179, "learning_rate": 9.259491433693751e-06, "loss": 1.0147, "step": 9379 }, { "epoch": 0.54, "grad_norm": 1.828822135925293, "learning_rate": 9.257638935065752e-06, "loss": 1.0006, "step": 9380 }, { "epoch": 0.54, "grad_norm": 1.077743649482727, "learning_rate": 9.255786462054559e-06, "loss": 0.6011, "step": 9381 }, { "epoch": 0.54, "grad_norm": 1.8199617862701416, "learning_rate": 9.253934014724097e-06, "loss": 0.9852, "step": 9382 }, { "epoch": 0.54, "grad_norm": 1.846769094467163, "learning_rate": 9.252081593138284e-06, "loss": 0.926, "step": 9383 }, { "epoch": 0.54, "grad_norm": 1.7593907117843628, "learning_rate": 9.25022919736105e-06, "loss": 0.8686, "step": 9384 }, { "epoch": 0.54, "grad_norm": 1.9020034074783325, "learning_rate": 9.248376827456312e-06, "loss": 0.9573, "step": 9385 }, { "epoch": 0.54, "grad_norm": 1.859734296798706, "learning_rate": 9.246524483487988e-06, "loss": 0.9477, "step": 9386 }, { "epoch": 0.54, "grad_norm": 1.704759120941162, "learning_rate": 9.24467216552e-06, "loss": 0.9924, "step": 9387 }, { "epoch": 0.54, "grad_norm": 1.8651659488677979, "learning_rate": 9.242819873616268e-06, "loss": 1.0056, "step": 9388 }, { "epoch": 0.54, "grad_norm": 1.623112678527832, "learning_rate": 9.240967607840706e-06, "loss": 0.9713, "step": 9389 }, { "epoch": 0.54, "grad_norm": 1.8262887001037598, "learning_rate": 9.23911536825723e-06, "loss": 1.0327, "step": 9390 }, { "epoch": 0.54, "grad_norm": 1.5998939275741577, "learning_rate": 9.237263154929759e-06, "loss": 0.9604, "step": 9391 }, { "epoch": 0.54, "grad_norm": 1.9035791158676147, "learning_rate": 9.235410967922205e-06, "loss": 0.9565, "step": 9392 }, { "epoch": 0.54, "grad_norm": 1.736545205116272, "learning_rate": 9.233558807298484e-06, "loss": 1.0426, "step": 9393 }, { "epoch": 0.54, "grad_norm": 1.8222191333770752, "learning_rate": 9.231706673122504e-06, "loss": 0.913, "step": 9394 }, { "epoch": 0.54, "grad_norm": 1.8094574213027954, "learning_rate": 9.229854565458181e-06, "loss": 1.0163, "step": 9395 }, { "epoch": 0.54, "grad_norm": 1.698534607887268, "learning_rate": 9.228002484369429e-06, "loss": 0.9728, "step": 9396 }, { "epoch": 0.54, "grad_norm": 1.6703506708145142, "learning_rate": 9.226150429920153e-06, "loss": 0.8821, "step": 9397 }, { "epoch": 0.54, "grad_norm": 1.7057803869247437, "learning_rate": 9.224298402174264e-06, "loss": 0.9655, "step": 9398 }, { "epoch": 0.54, "grad_norm": 1.8563436269760132, "learning_rate": 9.222446401195672e-06, "loss": 0.9388, "step": 9399 }, { "epoch": 0.54, "grad_norm": 1.850363850593567, "learning_rate": 9.22059442704828e-06, "loss": 0.9431, "step": 9400 }, { "epoch": 0.54, "grad_norm": 1.6936535835266113, "learning_rate": 9.218742479796e-06, "loss": 0.9031, "step": 9401 }, { "epoch": 0.54, "grad_norm": 1.9065234661102295, "learning_rate": 9.216890559502732e-06, "loss": 0.9411, "step": 9402 }, { "epoch": 0.54, "grad_norm": 1.7509254217147827, "learning_rate": 9.215038666232385e-06, "loss": 1.0015, "step": 9403 }, { "epoch": 0.54, "grad_norm": 1.7968378067016602, "learning_rate": 9.213186800048862e-06, "loss": 0.9325, "step": 9404 }, { "epoch": 0.54, "grad_norm": 1.8421032428741455, "learning_rate": 9.211334961016063e-06, "loss": 0.9295, "step": 9405 }, { "epoch": 0.54, "grad_norm": 1.9317408800125122, "learning_rate": 9.20948314919789e-06, "loss": 1.0458, "step": 9406 }, { "epoch": 0.54, "grad_norm": 1.9427756071090698, "learning_rate": 9.207631364658244e-06, "loss": 0.9244, "step": 9407 }, { "epoch": 0.54, "grad_norm": 1.1277761459350586, "learning_rate": 9.20577960746103e-06, "loss": 0.6026, "step": 9408 }, { "epoch": 0.54, "grad_norm": 1.7018071413040161, "learning_rate": 9.203927877670143e-06, "loss": 0.9916, "step": 9409 }, { "epoch": 0.54, "grad_norm": 1.9066749811172485, "learning_rate": 9.202076175349477e-06, "loss": 0.9313, "step": 9410 }, { "epoch": 0.54, "grad_norm": 1.7801448106765747, "learning_rate": 9.200224500562937e-06, "loss": 0.908, "step": 9411 }, { "epoch": 0.54, "grad_norm": 1.7181084156036377, "learning_rate": 9.198372853374415e-06, "loss": 0.9213, "step": 9412 }, { "epoch": 0.54, "grad_norm": 1.7172428369522095, "learning_rate": 9.196521233847806e-06, "loss": 0.892, "step": 9413 }, { "epoch": 0.54, "grad_norm": 1.706556797027588, "learning_rate": 9.194669642047004e-06, "loss": 0.9235, "step": 9414 }, { "epoch": 0.54, "grad_norm": 1.6751763820648193, "learning_rate": 9.192818078035904e-06, "loss": 0.9211, "step": 9415 }, { "epoch": 0.54, "grad_norm": 2.077202558517456, "learning_rate": 9.190966541878399e-06, "loss": 0.8807, "step": 9416 }, { "epoch": 0.54, "grad_norm": 1.7662979364395142, "learning_rate": 9.189115033638378e-06, "loss": 0.9822, "step": 9417 }, { "epoch": 0.54, "grad_norm": 1.911409854888916, "learning_rate": 9.187263553379731e-06, "loss": 0.9367, "step": 9418 }, { "epoch": 0.54, "grad_norm": 1.8815838098526, "learning_rate": 9.185412101166349e-06, "loss": 1.0048, "step": 9419 }, { "epoch": 0.54, "grad_norm": 1.6018904447555542, "learning_rate": 9.18356067706212e-06, "loss": 0.943, "step": 9420 }, { "epoch": 0.54, "grad_norm": 1.7018402814865112, "learning_rate": 9.181709281130932e-06, "loss": 0.9695, "step": 9421 }, { "epoch": 0.54, "grad_norm": 1.0471857786178589, "learning_rate": 9.17985791343667e-06, "loss": 0.621, "step": 9422 }, { "epoch": 0.54, "grad_norm": 1.5961575508117676, "learning_rate": 9.178006574043221e-06, "loss": 0.9629, "step": 9423 }, { "epoch": 0.54, "grad_norm": 1.9312336444854736, "learning_rate": 9.17615526301447e-06, "loss": 0.9628, "step": 9424 }, { "epoch": 0.54, "grad_norm": 1.6334311962127686, "learning_rate": 9.174303980414295e-06, "loss": 0.9408, "step": 9425 }, { "epoch": 0.54, "grad_norm": 1.6464003324508667, "learning_rate": 9.172452726306586e-06, "loss": 0.9167, "step": 9426 }, { "epoch": 0.54, "grad_norm": 1.7353788614273071, "learning_rate": 9.170601500755224e-06, "loss": 1.0614, "step": 9427 }, { "epoch": 0.54, "grad_norm": 1.622787594795227, "learning_rate": 9.168750303824085e-06, "loss": 0.9679, "step": 9428 }, { "epoch": 0.54, "grad_norm": 1.8990715742111206, "learning_rate": 9.166899135577052e-06, "loss": 1.0443, "step": 9429 }, { "epoch": 0.54, "grad_norm": 1.7969086170196533, "learning_rate": 9.165047996078001e-06, "loss": 0.9661, "step": 9430 }, { "epoch": 0.54, "grad_norm": 1.7733854055404663, "learning_rate": 9.163196885390812e-06, "loss": 0.9886, "step": 9431 }, { "epoch": 0.54, "grad_norm": 1.5882633924484253, "learning_rate": 9.161345803579362e-06, "loss": 0.9392, "step": 9432 }, { "epoch": 0.54, "grad_norm": 1.7996106147766113, "learning_rate": 9.159494750707527e-06, "loss": 0.9823, "step": 9433 }, { "epoch": 0.54, "grad_norm": 1.6894115209579468, "learning_rate": 9.157643726839177e-06, "loss": 0.9926, "step": 9434 }, { "epoch": 0.54, "grad_norm": 1.8034989833831787, "learning_rate": 9.155792732038192e-06, "loss": 0.9992, "step": 9435 }, { "epoch": 0.54, "grad_norm": 1.6855391263961792, "learning_rate": 9.153941766368439e-06, "loss": 0.9735, "step": 9436 }, { "epoch": 0.54, "grad_norm": 1.645350694656372, "learning_rate": 9.152090829893792e-06, "loss": 0.9966, "step": 9437 }, { "epoch": 0.54, "grad_norm": 1.6695024967193604, "learning_rate": 9.150239922678122e-06, "loss": 0.9534, "step": 9438 }, { "epoch": 0.54, "grad_norm": 1.7206846475601196, "learning_rate": 9.1483890447853e-06, "loss": 0.9488, "step": 9439 }, { "epoch": 0.54, "grad_norm": 1.875939130783081, "learning_rate": 9.146538196279193e-06, "loss": 0.9287, "step": 9440 }, { "epoch": 0.54, "grad_norm": 1.6875778436660767, "learning_rate": 9.144687377223669e-06, "loss": 0.9785, "step": 9441 }, { "epoch": 0.54, "grad_norm": 1.7431803941726685, "learning_rate": 9.142836587682594e-06, "loss": 0.9611, "step": 9442 }, { "epoch": 0.54, "grad_norm": 1.6714915037155151, "learning_rate": 9.140985827719835e-06, "loss": 0.8847, "step": 9443 }, { "epoch": 0.54, "grad_norm": 1.6376293897628784, "learning_rate": 9.139135097399254e-06, "loss": 0.9481, "step": 9444 }, { "epoch": 0.54, "grad_norm": 1.180797815322876, "learning_rate": 9.137284396784716e-06, "loss": 0.6542, "step": 9445 }, { "epoch": 0.54, "grad_norm": 1.837324619293213, "learning_rate": 9.135433725940086e-06, "loss": 0.9308, "step": 9446 }, { "epoch": 0.54, "grad_norm": 1.7160722017288208, "learning_rate": 9.13358308492922e-06, "loss": 0.9645, "step": 9447 }, { "epoch": 0.54, "grad_norm": 1.8531137704849243, "learning_rate": 9.131732473815984e-06, "loss": 0.9529, "step": 9448 }, { "epoch": 0.54, "grad_norm": 1.7481144666671753, "learning_rate": 9.129881892664232e-06, "loss": 0.9322, "step": 9449 }, { "epoch": 0.54, "grad_norm": 1.6420543193817139, "learning_rate": 9.128031341537826e-06, "loss": 0.9179, "step": 9450 }, { "epoch": 0.54, "grad_norm": 1.7773021459579468, "learning_rate": 9.126180820500624e-06, "loss": 1.0342, "step": 9451 }, { "epoch": 0.54, "grad_norm": 1.8343827724456787, "learning_rate": 9.124330329616482e-06, "loss": 0.9579, "step": 9452 }, { "epoch": 0.54, "grad_norm": 1.7513643503189087, "learning_rate": 9.122479868949253e-06, "loss": 0.9919, "step": 9453 }, { "epoch": 0.54, "grad_norm": 1.779710292816162, "learning_rate": 9.12062943856279e-06, "loss": 0.8962, "step": 9454 }, { "epoch": 0.54, "grad_norm": 1.8562480211257935, "learning_rate": 9.118779038520953e-06, "loss": 0.9752, "step": 9455 }, { "epoch": 0.54, "grad_norm": 1.8598366975784302, "learning_rate": 9.116928668887587e-06, "loss": 0.9388, "step": 9456 }, { "epoch": 0.54, "grad_norm": 1.6017711162567139, "learning_rate": 9.115078329726548e-06, "loss": 0.8695, "step": 9457 }, { "epoch": 0.54, "grad_norm": 1.767075538635254, "learning_rate": 9.113228021101682e-06, "loss": 0.9458, "step": 9458 }, { "epoch": 0.54, "grad_norm": 1.7146070003509521, "learning_rate": 9.111377743076842e-06, "loss": 0.9633, "step": 9459 }, { "epoch": 0.54, "grad_norm": 2.0590147972106934, "learning_rate": 9.109527495715872e-06, "loss": 1.0209, "step": 9460 }, { "epoch": 0.54, "grad_norm": 1.7618181705474854, "learning_rate": 9.107677279082619e-06, "loss": 0.8306, "step": 9461 }, { "epoch": 0.54, "grad_norm": 1.7003214359283447, "learning_rate": 9.105827093240932e-06, "loss": 0.9516, "step": 9462 }, { "epoch": 0.54, "grad_norm": 1.6746435165405273, "learning_rate": 9.103976938254656e-06, "loss": 0.9773, "step": 9463 }, { "epoch": 0.54, "grad_norm": 1.7741769552230835, "learning_rate": 9.10212681418763e-06, "loss": 0.9066, "step": 9464 }, { "epoch": 0.54, "grad_norm": 1.8328062295913696, "learning_rate": 9.100276721103703e-06, "loss": 0.98, "step": 9465 }, { "epoch": 0.54, "grad_norm": 1.7269577980041504, "learning_rate": 9.098426659066711e-06, "loss": 0.9496, "step": 9466 }, { "epoch": 0.54, "grad_norm": 1.7527824640274048, "learning_rate": 9.096576628140497e-06, "loss": 0.9444, "step": 9467 }, { "epoch": 0.54, "grad_norm": 1.8261911869049072, "learning_rate": 9.094726628388899e-06, "loss": 1.012, "step": 9468 }, { "epoch": 0.54, "grad_norm": 1.655664324760437, "learning_rate": 9.092876659875757e-06, "loss": 1.013, "step": 9469 }, { "epoch": 0.54, "grad_norm": 1.7257881164550781, "learning_rate": 9.091026722664908e-06, "loss": 0.8881, "step": 9470 }, { "epoch": 0.54, "grad_norm": 1.8430830240249634, "learning_rate": 9.089176816820187e-06, "loss": 1.0273, "step": 9471 }, { "epoch": 0.54, "grad_norm": 2.2765910625457764, "learning_rate": 9.08732694240543e-06, "loss": 1.0921, "step": 9472 }, { "epoch": 0.54, "grad_norm": 1.716312289237976, "learning_rate": 9.08547709948447e-06, "loss": 0.9452, "step": 9473 }, { "epoch": 0.54, "grad_norm": 1.932315707206726, "learning_rate": 9.083627288121141e-06, "loss": 1.0633, "step": 9474 }, { "epoch": 0.54, "grad_norm": 1.8637512922286987, "learning_rate": 9.081777508379275e-06, "loss": 1.0597, "step": 9475 }, { "epoch": 0.54, "grad_norm": 1.9153664112091064, "learning_rate": 9.0799277603227e-06, "loss": 0.8944, "step": 9476 }, { "epoch": 0.54, "grad_norm": 1.8246362209320068, "learning_rate": 9.07807804401525e-06, "loss": 0.9529, "step": 9477 }, { "epoch": 0.54, "grad_norm": 1.9576621055603027, "learning_rate": 9.076228359520752e-06, "loss": 0.972, "step": 9478 }, { "epoch": 0.54, "grad_norm": 2.001964807510376, "learning_rate": 9.074378706903029e-06, "loss": 0.9575, "step": 9479 }, { "epoch": 0.54, "grad_norm": 1.8684083223342896, "learning_rate": 9.072529086225917e-06, "loss": 0.8825, "step": 9480 }, { "epoch": 0.54, "grad_norm": 1.8071510791778564, "learning_rate": 9.070679497553232e-06, "loss": 1.0095, "step": 9481 }, { "epoch": 0.54, "grad_norm": 1.7188029289245605, "learning_rate": 9.068829940948802e-06, "loss": 0.9754, "step": 9482 }, { "epoch": 0.54, "grad_norm": 1.7659193277359009, "learning_rate": 9.06698041647645e-06, "loss": 0.9619, "step": 9483 }, { "epoch": 0.54, "grad_norm": 1.6424344778060913, "learning_rate": 9.065130924199998e-06, "loss": 0.9424, "step": 9484 }, { "epoch": 0.54, "grad_norm": 1.927290678024292, "learning_rate": 9.063281464183267e-06, "loss": 0.9697, "step": 9485 }, { "epoch": 0.54, "grad_norm": 1.84980046749115, "learning_rate": 9.061432036490076e-06, "loss": 0.939, "step": 9486 }, { "epoch": 0.54, "grad_norm": 1.702980637550354, "learning_rate": 9.059582641184242e-06, "loss": 1.0235, "step": 9487 }, { "epoch": 0.54, "grad_norm": 1.7925997972488403, "learning_rate": 9.057733278329585e-06, "loss": 0.9668, "step": 9488 }, { "epoch": 0.54, "grad_norm": 1.7005348205566406, "learning_rate": 9.055883947989921e-06, "loss": 0.9953, "step": 9489 }, { "epoch": 0.54, "grad_norm": 1.7919210195541382, "learning_rate": 9.054034650229065e-06, "loss": 0.9414, "step": 9490 }, { "epoch": 0.54, "grad_norm": 1.9344539642333984, "learning_rate": 9.052185385110826e-06, "loss": 0.9659, "step": 9491 }, { "epoch": 0.54, "grad_norm": 1.8405370712280273, "learning_rate": 9.050336152699026e-06, "loss": 0.9985, "step": 9492 }, { "epoch": 0.54, "grad_norm": 1.7546119689941406, "learning_rate": 9.048486953057472e-06, "loss": 0.9996, "step": 9493 }, { "epoch": 0.54, "grad_norm": 1.9716124534606934, "learning_rate": 9.046637786249977e-06, "loss": 0.9922, "step": 9494 }, { "epoch": 0.54, "grad_norm": 1.7934556007385254, "learning_rate": 9.044788652340346e-06, "loss": 0.9561, "step": 9495 }, { "epoch": 0.54, "grad_norm": 1.8514511585235596, "learning_rate": 9.042939551392392e-06, "loss": 0.9761, "step": 9496 }, { "epoch": 0.54, "grad_norm": 1.8618247509002686, "learning_rate": 9.041090483469921e-06, "loss": 0.9264, "step": 9497 }, { "epoch": 0.54, "grad_norm": 1.636122465133667, "learning_rate": 9.039241448636739e-06, "loss": 0.9431, "step": 9498 }, { "epoch": 0.54, "grad_norm": 0.9527537822723389, "learning_rate": 9.03739244695665e-06, "loss": 0.5931, "step": 9499 }, { "epoch": 0.54, "grad_norm": 1.0867558717727661, "learning_rate": 9.035543478493458e-06, "loss": 0.6989, "step": 9500 }, { "epoch": 0.54, "grad_norm": 1.7727773189544678, "learning_rate": 9.033694543310968e-06, "loss": 1.0118, "step": 9501 }, { "epoch": 0.54, "grad_norm": 1.7828236818313599, "learning_rate": 9.031845641472978e-06, "loss": 0.9518, "step": 9502 }, { "epoch": 0.55, "grad_norm": 1.7448800802230835, "learning_rate": 9.02999677304329e-06, "loss": 1.0723, "step": 9503 }, { "epoch": 0.55, "grad_norm": 1.7413157224655151, "learning_rate": 9.028147938085705e-06, "loss": 0.9257, "step": 9504 }, { "epoch": 0.55, "grad_norm": 1.9028218984603882, "learning_rate": 9.02629913666402e-06, "loss": 0.9451, "step": 9505 }, { "epoch": 0.55, "grad_norm": 1.788261890411377, "learning_rate": 9.024450368842033e-06, "loss": 0.9795, "step": 9506 }, { "epoch": 0.55, "grad_norm": 1.7059335708618164, "learning_rate": 9.022601634683539e-06, "loss": 0.9401, "step": 9507 }, { "epoch": 0.55, "grad_norm": 2.094498634338379, "learning_rate": 9.02075293425233e-06, "loss": 1.0031, "step": 9508 }, { "epoch": 0.55, "grad_norm": 1.8183048963546753, "learning_rate": 9.018904267612205e-06, "loss": 0.9388, "step": 9509 }, { "epoch": 0.55, "grad_norm": 1.0388621091842651, "learning_rate": 9.01705563482695e-06, "loss": 0.5598, "step": 9510 }, { "epoch": 0.55, "grad_norm": 1.7996293306350708, "learning_rate": 9.01520703596036e-06, "loss": 0.9998, "step": 9511 }, { "epoch": 0.55, "grad_norm": 1.6867812871932983, "learning_rate": 9.013358471076226e-06, "loss": 1.0428, "step": 9512 }, { "epoch": 0.55, "grad_norm": 1.6633232831954956, "learning_rate": 9.011509940238335e-06, "loss": 0.9994, "step": 9513 }, { "epoch": 0.55, "grad_norm": 1.6506688594818115, "learning_rate": 9.009661443510472e-06, "loss": 0.9509, "step": 9514 }, { "epoch": 0.55, "grad_norm": 1.8423984050750732, "learning_rate": 9.007812980956427e-06, "loss": 0.9095, "step": 9515 }, { "epoch": 0.55, "grad_norm": 1.9332531690597534, "learning_rate": 9.005964552639983e-06, "loss": 0.9607, "step": 9516 }, { "epoch": 0.55, "grad_norm": 1.8392046689987183, "learning_rate": 9.004116158624928e-06, "loss": 0.9435, "step": 9517 }, { "epoch": 0.55, "grad_norm": 1.7351148128509521, "learning_rate": 9.002267798975044e-06, "loss": 0.8928, "step": 9518 }, { "epoch": 0.55, "grad_norm": 1.748577356338501, "learning_rate": 9.00041947375411e-06, "loss": 0.973, "step": 9519 }, { "epoch": 0.55, "grad_norm": 1.611171841621399, "learning_rate": 8.998571183025906e-06, "loss": 0.9533, "step": 9520 }, { "epoch": 0.55, "grad_norm": 1.6906951665878296, "learning_rate": 8.996722926854215e-06, "loss": 0.9997, "step": 9521 }, { "epoch": 0.55, "grad_norm": 1.8013076782226562, "learning_rate": 8.994874705302814e-06, "loss": 0.8942, "step": 9522 }, { "epoch": 0.55, "grad_norm": 1.580102801322937, "learning_rate": 8.993026518435477e-06, "loss": 1.0187, "step": 9523 }, { "epoch": 0.55, "grad_norm": 1.818587303161621, "learning_rate": 8.991178366315982e-06, "loss": 0.9452, "step": 9524 }, { "epoch": 0.55, "grad_norm": 1.6145761013031006, "learning_rate": 8.989330249008106e-06, "loss": 0.8558, "step": 9525 }, { "epoch": 0.55, "grad_norm": 1.8406963348388672, "learning_rate": 8.987482166575618e-06, "loss": 0.9711, "step": 9526 }, { "epoch": 0.55, "grad_norm": 1.7605501413345337, "learning_rate": 8.985634119082289e-06, "loss": 0.9203, "step": 9527 }, { "epoch": 0.55, "grad_norm": 1.6978834867477417, "learning_rate": 8.983786106591897e-06, "loss": 1.0072, "step": 9528 }, { "epoch": 0.55, "grad_norm": 1.6757129430770874, "learning_rate": 8.981938129168208e-06, "loss": 0.8962, "step": 9529 }, { "epoch": 0.55, "grad_norm": 1.9993942975997925, "learning_rate": 8.980090186874989e-06, "loss": 0.9645, "step": 9530 }, { "epoch": 0.55, "grad_norm": 1.742680311203003, "learning_rate": 8.978242279776009e-06, "loss": 0.8854, "step": 9531 }, { "epoch": 0.55, "grad_norm": 1.7286977767944336, "learning_rate": 8.976394407935034e-06, "loss": 0.9304, "step": 9532 }, { "epoch": 0.55, "grad_norm": 1.7718924283981323, "learning_rate": 8.974546571415829e-06, "loss": 0.9673, "step": 9533 }, { "epoch": 0.55, "grad_norm": 1.8872159719467163, "learning_rate": 8.972698770282156e-06, "loss": 0.8919, "step": 9534 }, { "epoch": 0.55, "grad_norm": 1.7218289375305176, "learning_rate": 8.97085100459778e-06, "loss": 0.9493, "step": 9535 }, { "epoch": 0.55, "grad_norm": 1.7465647459030151, "learning_rate": 8.96900327442646e-06, "loss": 0.9487, "step": 9536 }, { "epoch": 0.55, "grad_norm": 1.8240687847137451, "learning_rate": 8.967155579831959e-06, "loss": 0.9612, "step": 9537 }, { "epoch": 0.55, "grad_norm": 1.7849608659744263, "learning_rate": 8.965307920878033e-06, "loss": 0.9452, "step": 9538 }, { "epoch": 0.55, "grad_norm": 1.7726281881332397, "learning_rate": 8.963460297628437e-06, "loss": 0.9387, "step": 9539 }, { "epoch": 0.55, "grad_norm": 1.056203842163086, "learning_rate": 8.961612710146934e-06, "loss": 0.6264, "step": 9540 }, { "epoch": 0.55, "grad_norm": 1.0727276802062988, "learning_rate": 8.959765158497275e-06, "loss": 0.6305, "step": 9541 }, { "epoch": 0.55, "grad_norm": 1.7222744226455688, "learning_rate": 8.957917642743214e-06, "loss": 0.9104, "step": 9542 }, { "epoch": 0.55, "grad_norm": 1.7554842233657837, "learning_rate": 8.956070162948505e-06, "loss": 0.9713, "step": 9543 }, { "epoch": 0.55, "grad_norm": 1.740689754486084, "learning_rate": 8.954222719176898e-06, "loss": 0.9424, "step": 9544 }, { "epoch": 0.55, "grad_norm": 1.7328921556472778, "learning_rate": 8.952375311492142e-06, "loss": 1.0324, "step": 9545 }, { "epoch": 0.55, "grad_norm": 1.779259204864502, "learning_rate": 8.95052793995799e-06, "loss": 0.9891, "step": 9546 }, { "epoch": 0.55, "grad_norm": 1.7910897731781006, "learning_rate": 8.948680604638188e-06, "loss": 0.8713, "step": 9547 }, { "epoch": 0.55, "grad_norm": 1.660340428352356, "learning_rate": 8.946833305596481e-06, "loss": 0.8751, "step": 9548 }, { "epoch": 0.55, "grad_norm": 1.8062788248062134, "learning_rate": 8.944986042896615e-06, "loss": 0.9456, "step": 9549 }, { "epoch": 0.55, "grad_norm": 1.8999450206756592, "learning_rate": 8.943138816602333e-06, "loss": 0.977, "step": 9550 }, { "epoch": 0.55, "grad_norm": 1.8570343255996704, "learning_rate": 8.941291626777378e-06, "loss": 0.9803, "step": 9551 }, { "epoch": 0.55, "grad_norm": 1.8765219449996948, "learning_rate": 8.939444473485492e-06, "loss": 1.0303, "step": 9552 }, { "epoch": 0.55, "grad_norm": 1.908486008644104, "learning_rate": 8.937597356790414e-06, "loss": 0.8866, "step": 9553 }, { "epoch": 0.55, "grad_norm": 1.767295241355896, "learning_rate": 8.935750276755884e-06, "loss": 1.0025, "step": 9554 }, { "epoch": 0.55, "grad_norm": 1.754777193069458, "learning_rate": 8.93390323344564e-06, "loss": 0.909, "step": 9555 }, { "epoch": 0.55, "grad_norm": 1.950538992881775, "learning_rate": 8.932056226923416e-06, "loss": 0.9791, "step": 9556 }, { "epoch": 0.55, "grad_norm": 1.0358662605285645, "learning_rate": 8.930209257252948e-06, "loss": 0.6559, "step": 9557 }, { "epoch": 0.55, "grad_norm": 1.9117839336395264, "learning_rate": 8.92836232449797e-06, "loss": 0.9522, "step": 9558 }, { "epoch": 0.55, "grad_norm": 1.0651201009750366, "learning_rate": 8.926515428722217e-06, "loss": 0.5732, "step": 9559 }, { "epoch": 0.55, "grad_norm": 1.6351854801177979, "learning_rate": 8.924668569989416e-06, "loss": 0.9464, "step": 9560 }, { "epoch": 0.55, "grad_norm": 1.02516508102417, "learning_rate": 8.9228217483633e-06, "loss": 0.5854, "step": 9561 }, { "epoch": 0.55, "grad_norm": 1.8900099992752075, "learning_rate": 8.920974963907596e-06, "loss": 1.0156, "step": 9562 }, { "epoch": 0.55, "grad_norm": 1.7059491872787476, "learning_rate": 8.919128216686033e-06, "loss": 0.8825, "step": 9563 }, { "epoch": 0.55, "grad_norm": 1.8137855529785156, "learning_rate": 8.917281506762335e-06, "loss": 1.0537, "step": 9564 }, { "epoch": 0.55, "grad_norm": 1.7275257110595703, "learning_rate": 8.915434834200228e-06, "loss": 0.9196, "step": 9565 }, { "epoch": 0.55, "grad_norm": 1.9669073820114136, "learning_rate": 8.913588199063435e-06, "loss": 0.8261, "step": 9566 }, { "epoch": 0.55, "grad_norm": 1.7783315181732178, "learning_rate": 8.911741601415678e-06, "loss": 0.9535, "step": 9567 }, { "epoch": 0.55, "grad_norm": 1.639341115951538, "learning_rate": 8.909895041320678e-06, "loss": 0.8925, "step": 9568 }, { "epoch": 0.55, "grad_norm": 1.6668494939804077, "learning_rate": 8.908048518842154e-06, "loss": 0.9068, "step": 9569 }, { "epoch": 0.55, "grad_norm": 1.7862534523010254, "learning_rate": 8.906202034043828e-06, "loss": 0.952, "step": 9570 }, { "epoch": 0.55, "grad_norm": 1.742661952972412, "learning_rate": 8.904355586989414e-06, "loss": 0.8981, "step": 9571 }, { "epoch": 0.55, "grad_norm": 1.7427294254302979, "learning_rate": 8.902509177742626e-06, "loss": 0.9565, "step": 9572 }, { "epoch": 0.55, "grad_norm": 1.6680155992507935, "learning_rate": 8.900662806367182e-06, "loss": 1.0055, "step": 9573 }, { "epoch": 0.55, "grad_norm": 1.8078573942184448, "learning_rate": 8.898816472926795e-06, "loss": 0.8967, "step": 9574 }, { "epoch": 0.55, "grad_norm": 1.1516464948654175, "learning_rate": 8.896970177485174e-06, "loss": 0.5923, "step": 9575 }, { "epoch": 0.55, "grad_norm": 1.8302619457244873, "learning_rate": 8.895123920106033e-06, "loss": 0.9869, "step": 9576 }, { "epoch": 0.55, "grad_norm": 1.7211086750030518, "learning_rate": 8.893277700853077e-06, "loss": 0.9434, "step": 9577 }, { "epoch": 0.55, "grad_norm": 1.9084489345550537, "learning_rate": 8.891431519790017e-06, "loss": 0.9876, "step": 9578 }, { "epoch": 0.55, "grad_norm": 1.7240585088729858, "learning_rate": 8.889585376980557e-06, "loss": 1.0356, "step": 9579 }, { "epoch": 0.55, "grad_norm": 1.8124111890792847, "learning_rate": 8.887739272488407e-06, "loss": 1.017, "step": 9580 }, { "epoch": 0.55, "grad_norm": 1.6830075979232788, "learning_rate": 8.885893206377263e-06, "loss": 0.9794, "step": 9581 }, { "epoch": 0.55, "grad_norm": 1.749099612236023, "learning_rate": 8.884047178710835e-06, "loss": 1.0162, "step": 9582 }, { "epoch": 0.55, "grad_norm": 1.7007075548171997, "learning_rate": 8.882201189552821e-06, "loss": 0.93, "step": 9583 }, { "epoch": 0.55, "grad_norm": 1.797532558441162, "learning_rate": 8.880355238966923e-06, "loss": 0.9382, "step": 9584 }, { "epoch": 0.55, "grad_norm": 1.7762314081192017, "learning_rate": 8.878509327016838e-06, "loss": 0.9286, "step": 9585 }, { "epoch": 0.55, "grad_norm": 1.6819589138031006, "learning_rate": 8.876663453766263e-06, "loss": 0.8806, "step": 9586 }, { "epoch": 0.55, "grad_norm": 1.7350964546203613, "learning_rate": 8.874817619278893e-06, "loss": 0.991, "step": 9587 }, { "epoch": 0.55, "grad_norm": 1.0609499216079712, "learning_rate": 8.872971823618424e-06, "loss": 0.6276, "step": 9588 }, { "epoch": 0.55, "grad_norm": 1.787153959274292, "learning_rate": 8.871126066848552e-06, "loss": 0.9548, "step": 9589 }, { "epoch": 0.55, "grad_norm": 2.0259146690368652, "learning_rate": 8.869280349032962e-06, "loss": 0.9922, "step": 9590 }, { "epoch": 0.55, "grad_norm": 1.8086462020874023, "learning_rate": 8.867434670235352e-06, "loss": 0.9891, "step": 9591 }, { "epoch": 0.55, "grad_norm": 1.8335354328155518, "learning_rate": 8.865589030519405e-06, "loss": 1.0128, "step": 9592 }, { "epoch": 0.55, "grad_norm": 1.828158974647522, "learning_rate": 8.863743429948812e-06, "loss": 0.9048, "step": 9593 }, { "epoch": 0.55, "grad_norm": 1.8213618993759155, "learning_rate": 8.861897868587262e-06, "loss": 1.0193, "step": 9594 }, { "epoch": 0.55, "grad_norm": 1.7731385231018066, "learning_rate": 8.860052346498435e-06, "loss": 0.9123, "step": 9595 }, { "epoch": 0.55, "grad_norm": 1.6486048698425293, "learning_rate": 8.858206863746018e-06, "loss": 0.907, "step": 9596 }, { "epoch": 0.55, "grad_norm": 1.0992177724838257, "learning_rate": 8.856361420393694e-06, "loss": 0.7051, "step": 9597 }, { "epoch": 0.55, "grad_norm": 1.6694741249084473, "learning_rate": 8.85451601650514e-06, "loss": 0.8699, "step": 9598 }, { "epoch": 0.55, "grad_norm": 1.7299197912216187, "learning_rate": 8.85267065214404e-06, "loss": 0.9088, "step": 9599 }, { "epoch": 0.55, "grad_norm": 1.8383482694625854, "learning_rate": 8.85082532737407e-06, "loss": 0.9431, "step": 9600 }, { "epoch": 0.55, "grad_norm": 1.8750901222229004, "learning_rate": 8.84898004225891e-06, "loss": 1.0146, "step": 9601 }, { "epoch": 0.55, "grad_norm": 1.8442833423614502, "learning_rate": 8.847134796862232e-06, "loss": 0.9215, "step": 9602 }, { "epoch": 0.55, "grad_norm": 1.9772388935089111, "learning_rate": 8.845289591247713e-06, "loss": 0.9364, "step": 9603 }, { "epoch": 0.55, "grad_norm": 0.9751039743423462, "learning_rate": 8.843444425479023e-06, "loss": 0.4999, "step": 9604 }, { "epoch": 0.55, "grad_norm": 1.7800713777542114, "learning_rate": 8.841599299619834e-06, "loss": 0.9667, "step": 9605 }, { "epoch": 0.55, "grad_norm": 1.5958842039108276, "learning_rate": 8.839754213733818e-06, "loss": 0.9445, "step": 9606 }, { "epoch": 0.55, "grad_norm": 1.7308917045593262, "learning_rate": 8.837909167884646e-06, "loss": 1.0054, "step": 9607 }, { "epoch": 0.55, "grad_norm": 1.9478790760040283, "learning_rate": 8.836064162135977e-06, "loss": 1.0303, "step": 9608 }, { "epoch": 0.55, "grad_norm": 1.7335511445999146, "learning_rate": 8.834219196551486e-06, "loss": 0.9714, "step": 9609 }, { "epoch": 0.55, "grad_norm": 1.6473861932754517, "learning_rate": 8.832374271194834e-06, "loss": 0.9467, "step": 9610 }, { "epoch": 0.55, "grad_norm": 1.9433598518371582, "learning_rate": 8.830529386129683e-06, "loss": 1.0636, "step": 9611 }, { "epoch": 0.55, "grad_norm": 1.9085807800292969, "learning_rate": 8.828684541419696e-06, "loss": 0.8839, "step": 9612 }, { "epoch": 0.55, "grad_norm": 1.8517625331878662, "learning_rate": 8.826839737128537e-06, "loss": 0.9621, "step": 9613 }, { "epoch": 0.55, "grad_norm": 1.8316911458969116, "learning_rate": 8.824994973319859e-06, "loss": 0.9305, "step": 9614 }, { "epoch": 0.55, "grad_norm": 1.7378853559494019, "learning_rate": 8.823150250057323e-06, "loss": 0.9346, "step": 9615 }, { "epoch": 0.55, "grad_norm": 1.7135299444198608, "learning_rate": 8.821305567404583e-06, "loss": 1.0465, "step": 9616 }, { "epoch": 0.55, "grad_norm": 1.796515941619873, "learning_rate": 8.819460925425297e-06, "loss": 0.996, "step": 9617 }, { "epoch": 0.55, "grad_norm": 1.7214481830596924, "learning_rate": 8.817616324183116e-06, "loss": 0.89, "step": 9618 }, { "epoch": 0.55, "grad_norm": 1.6071430444717407, "learning_rate": 8.815771763741694e-06, "loss": 0.9728, "step": 9619 }, { "epoch": 0.55, "grad_norm": 1.9085267782211304, "learning_rate": 8.81392724416468e-06, "loss": 0.9682, "step": 9620 }, { "epoch": 0.55, "grad_norm": 1.8057479858398438, "learning_rate": 8.812082765515722e-06, "loss": 0.947, "step": 9621 }, { "epoch": 0.55, "grad_norm": 1.8132497072219849, "learning_rate": 8.810238327858471e-06, "loss": 0.9344, "step": 9622 }, { "epoch": 0.55, "grad_norm": 1.8121356964111328, "learning_rate": 8.80839393125657e-06, "loss": 1.0095, "step": 9623 }, { "epoch": 0.55, "grad_norm": 1.6763031482696533, "learning_rate": 8.806549575773667e-06, "loss": 0.9563, "step": 9624 }, { "epoch": 0.55, "grad_norm": 1.7980499267578125, "learning_rate": 8.804705261473405e-06, "loss": 0.9171, "step": 9625 }, { "epoch": 0.55, "grad_norm": 1.6353436708450317, "learning_rate": 8.802860988419427e-06, "loss": 1.0482, "step": 9626 }, { "epoch": 0.55, "grad_norm": 1.6753578186035156, "learning_rate": 8.801016756675368e-06, "loss": 0.9315, "step": 9627 }, { "epoch": 0.55, "grad_norm": 1.7558215856552124, "learning_rate": 8.799172566304874e-06, "loss": 0.9647, "step": 9628 }, { "epoch": 0.55, "grad_norm": 2.0095503330230713, "learning_rate": 8.797328417371581e-06, "loss": 1.0234, "step": 9629 }, { "epoch": 0.55, "grad_norm": 1.7538678646087646, "learning_rate": 8.795484309939124e-06, "loss": 1.0021, "step": 9630 }, { "epoch": 0.55, "grad_norm": 1.9550825357437134, "learning_rate": 8.793640244071139e-06, "loss": 0.9885, "step": 9631 }, { "epoch": 0.55, "grad_norm": 1.7170133590698242, "learning_rate": 8.791796219831259e-06, "loss": 0.9583, "step": 9632 }, { "epoch": 0.55, "grad_norm": 1.7709131240844727, "learning_rate": 8.789952237283117e-06, "loss": 0.9078, "step": 9633 }, { "epoch": 0.55, "grad_norm": 1.7496020793914795, "learning_rate": 8.788108296490343e-06, "loss": 1.0123, "step": 9634 }, { "epoch": 0.55, "grad_norm": 1.831620454788208, "learning_rate": 8.786264397516564e-06, "loss": 0.9095, "step": 9635 }, { "epoch": 0.55, "grad_norm": 1.6673592329025269, "learning_rate": 8.784420540425413e-06, "loss": 0.9155, "step": 9636 }, { "epoch": 0.55, "grad_norm": 1.6360702514648438, "learning_rate": 8.782576725280513e-06, "loss": 0.9543, "step": 9637 }, { "epoch": 0.55, "grad_norm": 1.646201729774475, "learning_rate": 8.78073295214549e-06, "loss": 0.8538, "step": 9638 }, { "epoch": 0.55, "grad_norm": 1.7288867235183716, "learning_rate": 8.778889221083966e-06, "loss": 0.9037, "step": 9639 }, { "epoch": 0.55, "grad_norm": 1.034565806388855, "learning_rate": 8.777045532159564e-06, "loss": 0.6406, "step": 9640 }, { "epoch": 0.55, "grad_norm": 2.004770278930664, "learning_rate": 8.775201885435906e-06, "loss": 1.0462, "step": 9641 }, { "epoch": 0.55, "grad_norm": 1.7726222276687622, "learning_rate": 8.773358280976607e-06, "loss": 1.0457, "step": 9642 }, { "epoch": 0.55, "grad_norm": 1.6694080829620361, "learning_rate": 8.77151471884529e-06, "loss": 0.9629, "step": 9643 }, { "epoch": 0.55, "grad_norm": 1.769039273262024, "learning_rate": 8.769671199105566e-06, "loss": 0.9704, "step": 9644 }, { "epoch": 0.55, "grad_norm": 1.746877908706665, "learning_rate": 8.767827721821054e-06, "loss": 0.9845, "step": 9645 }, { "epoch": 0.55, "grad_norm": 1.6474612951278687, "learning_rate": 8.765984287055364e-06, "loss": 0.9397, "step": 9646 }, { "epoch": 0.55, "grad_norm": 1.8083648681640625, "learning_rate": 8.764140894872108e-06, "loss": 1.0045, "step": 9647 }, { "epoch": 0.55, "grad_norm": 1.8130719661712646, "learning_rate": 8.7622975453349e-06, "loss": 0.9449, "step": 9648 }, { "epoch": 0.55, "grad_norm": 1.750686526298523, "learning_rate": 8.760454238507345e-06, "loss": 0.9287, "step": 9649 }, { "epoch": 0.55, "grad_norm": 1.8263676166534424, "learning_rate": 8.758610974453052e-06, "loss": 0.9459, "step": 9650 }, { "epoch": 0.55, "grad_norm": 1.8185365200042725, "learning_rate": 8.756767753235628e-06, "loss": 0.9605, "step": 9651 }, { "epoch": 0.55, "grad_norm": 1.7819435596466064, "learning_rate": 8.754924574918675e-06, "loss": 0.9143, "step": 9652 }, { "epoch": 0.55, "grad_norm": 1.625867486000061, "learning_rate": 8.753081439565795e-06, "loss": 0.96, "step": 9653 }, { "epoch": 0.55, "grad_norm": 1.9162887334823608, "learning_rate": 8.751238347240595e-06, "loss": 0.9645, "step": 9654 }, { "epoch": 0.55, "grad_norm": 1.726010799407959, "learning_rate": 8.749395298006668e-06, "loss": 0.929, "step": 9655 }, { "epoch": 0.55, "grad_norm": 1.6468002796173096, "learning_rate": 8.74755229192762e-06, "loss": 0.8925, "step": 9656 }, { "epoch": 0.55, "grad_norm": 1.6392147541046143, "learning_rate": 8.74570932906704e-06, "loss": 0.9376, "step": 9657 }, { "epoch": 0.55, "grad_norm": 1.7753158807754517, "learning_rate": 8.743866409488529e-06, "loss": 1.0555, "step": 9658 }, { "epoch": 0.55, "grad_norm": 1.8286948204040527, "learning_rate": 8.742023533255677e-06, "loss": 0.8981, "step": 9659 }, { "epoch": 0.55, "grad_norm": 1.6443310976028442, "learning_rate": 8.74018070043208e-06, "loss": 0.9228, "step": 9660 }, { "epoch": 0.55, "grad_norm": 1.683529257774353, "learning_rate": 8.738337911081329e-06, "loss": 0.9325, "step": 9661 }, { "epoch": 0.55, "grad_norm": 1.0657858848571777, "learning_rate": 8.736495165267012e-06, "loss": 0.5665, "step": 9662 }, { "epoch": 0.55, "grad_norm": 1.7507518529891968, "learning_rate": 8.734652463052717e-06, "loss": 1.0048, "step": 9663 }, { "epoch": 0.55, "grad_norm": 2.0817203521728516, "learning_rate": 8.732809804502032e-06, "loss": 1.0015, "step": 9664 }, { "epoch": 0.55, "grad_norm": 1.75890052318573, "learning_rate": 8.73096718967854e-06, "loss": 0.9469, "step": 9665 }, { "epoch": 0.55, "grad_norm": 1.9110064506530762, "learning_rate": 8.729124618645827e-06, "loss": 0.9162, "step": 9666 }, { "epoch": 0.55, "grad_norm": 1.8386422395706177, "learning_rate": 8.727282091467472e-06, "loss": 0.9129, "step": 9667 }, { "epoch": 0.55, "grad_norm": 1.1576179265975952, "learning_rate": 8.725439608207056e-06, "loss": 0.6172, "step": 9668 }, { "epoch": 0.55, "grad_norm": 1.7457475662231445, "learning_rate": 8.723597168928159e-06, "loss": 0.9466, "step": 9669 }, { "epoch": 0.55, "grad_norm": 1.8802131414413452, "learning_rate": 8.72175477369436e-06, "loss": 0.9078, "step": 9670 }, { "epoch": 0.55, "grad_norm": 1.935111165046692, "learning_rate": 8.719912422569232e-06, "loss": 1.0129, "step": 9671 }, { "epoch": 0.55, "grad_norm": 1.6842056512832642, "learning_rate": 8.718070115616348e-06, "loss": 0.9663, "step": 9672 }, { "epoch": 0.55, "grad_norm": 1.790739893913269, "learning_rate": 8.716227852899286e-06, "loss": 0.8655, "step": 9673 }, { "epoch": 0.55, "grad_norm": 1.7785013914108276, "learning_rate": 8.714385634481613e-06, "loss": 0.9837, "step": 9674 }, { "epoch": 0.55, "grad_norm": 1.8138951063156128, "learning_rate": 8.712543460426901e-06, "loss": 0.9868, "step": 9675 }, { "epoch": 0.55, "grad_norm": 1.9091839790344238, "learning_rate": 8.71070133079872e-06, "loss": 0.9166, "step": 9676 }, { "epoch": 0.55, "grad_norm": 1.2488070726394653, "learning_rate": 8.70885924566063e-06, "loss": 0.6598, "step": 9677 }, { "epoch": 0.56, "grad_norm": 1.8477082252502441, "learning_rate": 8.707017205076205e-06, "loss": 0.9076, "step": 9678 }, { "epoch": 0.56, "grad_norm": 1.7101072072982788, "learning_rate": 8.705175209109003e-06, "loss": 1.0161, "step": 9679 }, { "epoch": 0.56, "grad_norm": 1.840926170349121, "learning_rate": 8.70333325782259e-06, "loss": 0.9264, "step": 9680 }, { "epoch": 0.56, "grad_norm": 1.7042949199676514, "learning_rate": 8.701491351280521e-06, "loss": 0.924, "step": 9681 }, { "epoch": 0.56, "grad_norm": 1.8257490396499634, "learning_rate": 8.69964948954636e-06, "loss": 0.9654, "step": 9682 }, { "epoch": 0.56, "grad_norm": 1.7320343255996704, "learning_rate": 8.697807672683662e-06, "loss": 0.9265, "step": 9683 }, { "epoch": 0.56, "grad_norm": 1.6063027381896973, "learning_rate": 8.695965900755985e-06, "loss": 0.9934, "step": 9684 }, { "epoch": 0.56, "grad_norm": 1.703420877456665, "learning_rate": 8.694124173826881e-06, "loss": 0.899, "step": 9685 }, { "epoch": 0.56, "grad_norm": 1.5832927227020264, "learning_rate": 8.692282491959904e-06, "loss": 0.9626, "step": 9686 }, { "epoch": 0.56, "grad_norm": 1.7099549770355225, "learning_rate": 8.690440855218606e-06, "loss": 0.9516, "step": 9687 }, { "epoch": 0.56, "grad_norm": 1.8984639644622803, "learning_rate": 8.688599263666536e-06, "loss": 0.9517, "step": 9688 }, { "epoch": 0.56, "grad_norm": 1.898252248764038, "learning_rate": 8.68675771736724e-06, "loss": 0.934, "step": 9689 }, { "epoch": 0.56, "grad_norm": 1.888367772102356, "learning_rate": 8.684916216384268e-06, "loss": 0.9491, "step": 9690 }, { "epoch": 0.56, "grad_norm": 1.8562384843826294, "learning_rate": 8.683074760781163e-06, "loss": 1.0361, "step": 9691 }, { "epoch": 0.56, "grad_norm": 1.880313754081726, "learning_rate": 8.681233350621472e-06, "loss": 0.9818, "step": 9692 }, { "epoch": 0.56, "grad_norm": 1.7483007907867432, "learning_rate": 8.679391985968732e-06, "loss": 1.0765, "step": 9693 }, { "epoch": 0.56, "grad_norm": 1.6772500276565552, "learning_rate": 8.677550666886486e-06, "loss": 0.9398, "step": 9694 }, { "epoch": 0.56, "grad_norm": 1.8419322967529297, "learning_rate": 8.675709393438273e-06, "loss": 0.953, "step": 9695 }, { "epoch": 0.56, "grad_norm": 1.7664134502410889, "learning_rate": 8.67386816568763e-06, "loss": 0.968, "step": 9696 }, { "epoch": 0.56, "grad_norm": 1.7157539129257202, "learning_rate": 8.67202698369809e-06, "loss": 0.9796, "step": 9697 }, { "epoch": 0.56, "grad_norm": 1.7701255083084106, "learning_rate": 8.67018584753319e-06, "loss": 0.9877, "step": 9698 }, { "epoch": 0.56, "grad_norm": 1.855797290802002, "learning_rate": 8.668344757256464e-06, "loss": 0.9418, "step": 9699 }, { "epoch": 0.56, "grad_norm": 1.8157871961593628, "learning_rate": 8.666503712931439e-06, "loss": 0.9642, "step": 9700 }, { "epoch": 0.56, "grad_norm": 1.7075386047363281, "learning_rate": 8.664662714621643e-06, "loss": 0.9357, "step": 9701 }, { "epoch": 0.56, "grad_norm": 1.830863118171692, "learning_rate": 8.66282176239061e-06, "loss": 0.8789, "step": 9702 }, { "epoch": 0.56, "grad_norm": 1.7107125520706177, "learning_rate": 8.660980856301862e-06, "loss": 0.9236, "step": 9703 }, { "epoch": 0.56, "grad_norm": 1.0277130603790283, "learning_rate": 8.659139996418925e-06, "loss": 0.5594, "step": 9704 }, { "epoch": 0.56, "grad_norm": 1.6675851345062256, "learning_rate": 8.65729918280532e-06, "loss": 0.9527, "step": 9705 }, { "epoch": 0.56, "grad_norm": 1.654502511024475, "learning_rate": 8.655458415524571e-06, "loss": 0.9099, "step": 9706 }, { "epoch": 0.56, "grad_norm": 1.9035966396331787, "learning_rate": 8.653617694640196e-06, "loss": 0.9928, "step": 9707 }, { "epoch": 0.56, "grad_norm": 1.6585253477096558, "learning_rate": 8.651777020215713e-06, "loss": 0.9427, "step": 9708 }, { "epoch": 0.56, "grad_norm": 1.7313233613967896, "learning_rate": 8.649936392314638e-06, "loss": 0.9722, "step": 9709 }, { "epoch": 0.56, "grad_norm": 1.5806974172592163, "learning_rate": 8.648095811000488e-06, "loss": 0.8853, "step": 9710 }, { "epoch": 0.56, "grad_norm": 1.775068998336792, "learning_rate": 8.646255276336775e-06, "loss": 0.9058, "step": 9711 }, { "epoch": 0.56, "grad_norm": 1.642682671546936, "learning_rate": 8.644414788387012e-06, "loss": 0.9213, "step": 9712 }, { "epoch": 0.56, "grad_norm": 1.8018274307250977, "learning_rate": 8.642574347214702e-06, "loss": 0.9542, "step": 9713 }, { "epoch": 0.56, "grad_norm": 1.6730376482009888, "learning_rate": 8.640733952883365e-06, "loss": 0.9341, "step": 9714 }, { "epoch": 0.56, "grad_norm": 1.6986688375473022, "learning_rate": 8.638893605456502e-06, "loss": 0.9835, "step": 9715 }, { "epoch": 0.56, "grad_norm": 1.712127447128296, "learning_rate": 8.637053304997618e-06, "loss": 1.0183, "step": 9716 }, { "epoch": 0.56, "grad_norm": 1.7850602865219116, "learning_rate": 8.635213051570217e-06, "loss": 1.0927, "step": 9717 }, { "epoch": 0.56, "grad_norm": 1.6969475746154785, "learning_rate": 8.633372845237803e-06, "loss": 0.921, "step": 9718 }, { "epoch": 0.56, "grad_norm": 1.6289851665496826, "learning_rate": 8.631532686063871e-06, "loss": 0.9846, "step": 9719 }, { "epoch": 0.56, "grad_norm": 1.767514705657959, "learning_rate": 8.629692574111926e-06, "loss": 1.0, "step": 9720 }, { "epoch": 0.56, "grad_norm": 1.7698767185211182, "learning_rate": 8.627852509445462e-06, "loss": 0.9593, "step": 9721 }, { "epoch": 0.56, "grad_norm": 1.8133702278137207, "learning_rate": 8.626012492127975e-06, "loss": 0.9646, "step": 9722 }, { "epoch": 0.56, "grad_norm": 1.889578104019165, "learning_rate": 8.624172522222959e-06, "loss": 0.9346, "step": 9723 }, { "epoch": 0.56, "grad_norm": 1.0171505212783813, "learning_rate": 8.622332599793906e-06, "loss": 0.5231, "step": 9724 }, { "epoch": 0.56, "grad_norm": 1.6942979097366333, "learning_rate": 8.620492724904304e-06, "loss": 0.9819, "step": 9725 }, { "epoch": 0.56, "grad_norm": 1.6233772039413452, "learning_rate": 8.618652897617646e-06, "loss": 0.9597, "step": 9726 }, { "epoch": 0.56, "grad_norm": 1.7178518772125244, "learning_rate": 8.61681311799742e-06, "loss": 1.038, "step": 9727 }, { "epoch": 0.56, "grad_norm": 1.79873788356781, "learning_rate": 8.614973386107107e-06, "loss": 1.0083, "step": 9728 }, { "epoch": 0.56, "grad_norm": 1.760603666305542, "learning_rate": 8.613133702010196e-06, "loss": 1.0058, "step": 9729 }, { "epoch": 0.56, "grad_norm": 1.648322343826294, "learning_rate": 8.611294065770166e-06, "loss": 0.9731, "step": 9730 }, { "epoch": 0.56, "grad_norm": 1.6967090368270874, "learning_rate": 8.609454477450497e-06, "loss": 0.971, "step": 9731 }, { "epoch": 0.56, "grad_norm": 1.753484845161438, "learning_rate": 8.607614937114671e-06, "loss": 1.0044, "step": 9732 }, { "epoch": 0.56, "grad_norm": 1.8227661848068237, "learning_rate": 8.605775444826164e-06, "loss": 0.9016, "step": 9733 }, { "epoch": 0.56, "grad_norm": 1.7095043659210205, "learning_rate": 8.603936000648452e-06, "loss": 0.8888, "step": 9734 }, { "epoch": 0.56, "grad_norm": 1.6874574422836304, "learning_rate": 8.602096604645009e-06, "loss": 0.9798, "step": 9735 }, { "epoch": 0.56, "grad_norm": 1.68003249168396, "learning_rate": 8.600257256879306e-06, "loss": 0.9337, "step": 9736 }, { "epoch": 0.56, "grad_norm": 1.9060606956481934, "learning_rate": 8.598417957414817e-06, "loss": 0.962, "step": 9737 }, { "epoch": 0.56, "grad_norm": 1.7026256322860718, "learning_rate": 8.596578706315006e-06, "loss": 0.9992, "step": 9738 }, { "epoch": 0.56, "grad_norm": 1.8191571235656738, "learning_rate": 8.594739503643345e-06, "loss": 0.9129, "step": 9739 }, { "epoch": 0.56, "grad_norm": 1.7354254722595215, "learning_rate": 8.592900349463297e-06, "loss": 0.8972, "step": 9740 }, { "epoch": 0.56, "grad_norm": 1.7470617294311523, "learning_rate": 8.59106124383833e-06, "loss": 0.9997, "step": 9741 }, { "epoch": 0.56, "grad_norm": 1.7821121215820312, "learning_rate": 8.589222186831903e-06, "loss": 1.0658, "step": 9742 }, { "epoch": 0.56, "grad_norm": 1.844748854637146, "learning_rate": 8.587383178507474e-06, "loss": 0.9313, "step": 9743 }, { "epoch": 0.56, "grad_norm": 1.834816336631775, "learning_rate": 8.58554421892851e-06, "loss": 0.8901, "step": 9744 }, { "epoch": 0.56, "grad_norm": 1.7154957056045532, "learning_rate": 8.583705308158463e-06, "loss": 0.9643, "step": 9745 }, { "epoch": 0.56, "grad_norm": 1.7639740705490112, "learning_rate": 8.581866446260789e-06, "loss": 0.9867, "step": 9746 }, { "epoch": 0.56, "grad_norm": 1.6491105556488037, "learning_rate": 8.580027633298945e-06, "loss": 0.9272, "step": 9747 }, { "epoch": 0.56, "grad_norm": 1.973211646080017, "learning_rate": 8.578188869336378e-06, "loss": 1.006, "step": 9748 }, { "epoch": 0.56, "grad_norm": 1.7114208936691284, "learning_rate": 8.576350154436542e-06, "loss": 0.909, "step": 9749 }, { "epoch": 0.56, "grad_norm": 1.7240196466445923, "learning_rate": 8.574511488662886e-06, "loss": 0.9678, "step": 9750 }, { "epoch": 0.56, "grad_norm": 1.8778692483901978, "learning_rate": 8.572672872078856e-06, "loss": 0.9763, "step": 9751 }, { "epoch": 0.56, "grad_norm": 1.8747127056121826, "learning_rate": 8.570834304747898e-06, "loss": 1.0946, "step": 9752 }, { "epoch": 0.56, "grad_norm": 1.891045331954956, "learning_rate": 8.568995786733456e-06, "loss": 0.9068, "step": 9753 }, { "epoch": 0.56, "grad_norm": 1.7993545532226562, "learning_rate": 8.567157318098974e-06, "loss": 0.9032, "step": 9754 }, { "epoch": 0.56, "grad_norm": 1.6323741674423218, "learning_rate": 8.565318898907886e-06, "loss": 0.9372, "step": 9755 }, { "epoch": 0.56, "grad_norm": 1.9460618495941162, "learning_rate": 8.563480529223638e-06, "loss": 1.0282, "step": 9756 }, { "epoch": 0.56, "grad_norm": 1.7004189491271973, "learning_rate": 8.561642209109664e-06, "loss": 0.9513, "step": 9757 }, { "epoch": 0.56, "grad_norm": 1.8786660432815552, "learning_rate": 8.5598039386294e-06, "loss": 1.0091, "step": 9758 }, { "epoch": 0.56, "grad_norm": 1.1535083055496216, "learning_rate": 8.557965717846278e-06, "loss": 0.6237, "step": 9759 }, { "epoch": 0.56, "grad_norm": 1.7741538286209106, "learning_rate": 8.556127546823732e-06, "loss": 0.9286, "step": 9760 }, { "epoch": 0.56, "grad_norm": 1.7362631559371948, "learning_rate": 8.554289425625191e-06, "loss": 1.0297, "step": 9761 }, { "epoch": 0.56, "grad_norm": 1.7978144884109497, "learning_rate": 8.552451354314083e-06, "loss": 0.9479, "step": 9762 }, { "epoch": 0.56, "grad_norm": 1.6696869134902954, "learning_rate": 8.550613332953835e-06, "loss": 0.9023, "step": 9763 }, { "epoch": 0.56, "grad_norm": 1.9857523441314697, "learning_rate": 8.548775361607872e-06, "loss": 1.014, "step": 9764 }, { "epoch": 0.56, "grad_norm": 1.7713627815246582, "learning_rate": 8.54693744033962e-06, "loss": 0.8942, "step": 9765 }, { "epoch": 0.56, "grad_norm": 1.7986689805984497, "learning_rate": 8.545099569212496e-06, "loss": 0.9259, "step": 9766 }, { "epoch": 0.56, "grad_norm": 1.7333914041519165, "learning_rate": 8.543261748289919e-06, "loss": 0.8948, "step": 9767 }, { "epoch": 0.56, "grad_norm": 1.8085017204284668, "learning_rate": 8.541423977635313e-06, "loss": 0.9572, "step": 9768 }, { "epoch": 0.56, "grad_norm": 1.556166410446167, "learning_rate": 8.539586257312091e-06, "loss": 0.945, "step": 9769 }, { "epoch": 0.56, "grad_norm": 1.851192831993103, "learning_rate": 8.537748587383667e-06, "loss": 0.9588, "step": 9770 }, { "epoch": 0.56, "grad_norm": 1.839781641960144, "learning_rate": 8.535910967913454e-06, "loss": 0.9402, "step": 9771 }, { "epoch": 0.56, "grad_norm": 1.8548117876052856, "learning_rate": 8.534073398964866e-06, "loss": 0.9723, "step": 9772 }, { "epoch": 0.56, "grad_norm": 2.003147602081299, "learning_rate": 8.532235880601309e-06, "loss": 1.0372, "step": 9773 }, { "epoch": 0.56, "grad_norm": 1.7658926248550415, "learning_rate": 8.530398412886192e-06, "loss": 0.8876, "step": 9774 }, { "epoch": 0.56, "grad_norm": 1.7920811176300049, "learning_rate": 8.52856099588292e-06, "loss": 0.8905, "step": 9775 }, { "epoch": 0.56, "grad_norm": 1.8868311643600464, "learning_rate": 8.526723629654898e-06, "loss": 0.9874, "step": 9776 }, { "epoch": 0.56, "grad_norm": 1.6354767084121704, "learning_rate": 8.524886314265527e-06, "loss": 1.015, "step": 9777 }, { "epoch": 0.56, "grad_norm": 1.606435775756836, "learning_rate": 8.523049049778212e-06, "loss": 0.8059, "step": 9778 }, { "epoch": 0.56, "grad_norm": 1.6480079889297485, "learning_rate": 8.521211836256343e-06, "loss": 0.9127, "step": 9779 }, { "epoch": 0.56, "grad_norm": 1.6213204860687256, "learning_rate": 8.519374673763326e-06, "loss": 0.9295, "step": 9780 }, { "epoch": 0.56, "grad_norm": 1.8984014987945557, "learning_rate": 8.517537562362554e-06, "loss": 0.944, "step": 9781 }, { "epoch": 0.56, "grad_norm": 1.905485987663269, "learning_rate": 8.515700502117418e-06, "loss": 0.9928, "step": 9782 }, { "epoch": 0.56, "grad_norm": 1.5779186487197876, "learning_rate": 8.513863493091313e-06, "loss": 0.9735, "step": 9783 }, { "epoch": 0.56, "grad_norm": 1.8514306545257568, "learning_rate": 8.512026535347627e-06, "loss": 0.9884, "step": 9784 }, { "epoch": 0.56, "grad_norm": 1.719305396080017, "learning_rate": 8.51018962894975e-06, "loss": 0.9532, "step": 9785 }, { "epoch": 0.56, "grad_norm": 1.6308788061141968, "learning_rate": 8.508352773961063e-06, "loss": 0.9655, "step": 9786 }, { "epoch": 0.56, "grad_norm": 1.6522870063781738, "learning_rate": 8.50651597044496e-06, "loss": 0.8783, "step": 9787 }, { "epoch": 0.56, "grad_norm": 1.7631512880325317, "learning_rate": 8.504679218464816e-06, "loss": 0.9546, "step": 9788 }, { "epoch": 0.56, "grad_norm": 1.6883199214935303, "learning_rate": 8.502842518084015e-06, "loss": 0.9983, "step": 9789 }, { "epoch": 0.56, "grad_norm": 1.8403781652450562, "learning_rate": 8.501005869365939e-06, "loss": 0.9601, "step": 9790 }, { "epoch": 0.56, "grad_norm": 1.1367347240447998, "learning_rate": 8.499169272373961e-06, "loss": 0.6541, "step": 9791 }, { "epoch": 0.56, "grad_norm": 1.7039514780044556, "learning_rate": 8.497332727171458e-06, "loss": 0.8586, "step": 9792 }, { "epoch": 0.56, "grad_norm": 1.794058918952942, "learning_rate": 8.495496233821808e-06, "loss": 1.0559, "step": 9793 }, { "epoch": 0.56, "grad_norm": 1.6120576858520508, "learning_rate": 8.493659792388378e-06, "loss": 1.0517, "step": 9794 }, { "epoch": 0.56, "grad_norm": 1.5673608779907227, "learning_rate": 8.491823402934542e-06, "loss": 0.8646, "step": 9795 }, { "epoch": 0.56, "grad_norm": 1.6555083990097046, "learning_rate": 8.489987065523668e-06, "loss": 0.9611, "step": 9796 }, { "epoch": 0.56, "grad_norm": 1.9197218418121338, "learning_rate": 8.488150780219122e-06, "loss": 0.9224, "step": 9797 }, { "epoch": 0.56, "grad_norm": 2.3184118270874023, "learning_rate": 8.48631454708427e-06, "loss": 1.0038, "step": 9798 }, { "epoch": 0.56, "grad_norm": 1.0872104167938232, "learning_rate": 8.484478366182472e-06, "loss": 0.5469, "step": 9799 }, { "epoch": 0.56, "grad_norm": 1.7852264642715454, "learning_rate": 8.482642237577094e-06, "loss": 0.9636, "step": 9800 }, { "epoch": 0.56, "grad_norm": 1.748695731163025, "learning_rate": 8.480806161331494e-06, "loss": 0.9493, "step": 9801 }, { "epoch": 0.56, "grad_norm": 1.024226427078247, "learning_rate": 8.478970137509029e-06, "loss": 0.6269, "step": 9802 }, { "epoch": 0.56, "grad_norm": 1.771871566772461, "learning_rate": 8.477134166173057e-06, "loss": 0.9976, "step": 9803 }, { "epoch": 0.56, "grad_norm": 1.6688435077667236, "learning_rate": 8.475298247386927e-06, "loss": 1.072, "step": 9804 }, { "epoch": 0.56, "grad_norm": 1.708177924156189, "learning_rate": 8.473462381213999e-06, "loss": 1.0109, "step": 9805 }, { "epoch": 0.56, "grad_norm": 1.9090077877044678, "learning_rate": 8.471626567717617e-06, "loss": 0.9891, "step": 9806 }, { "epoch": 0.56, "grad_norm": 1.7106572389602661, "learning_rate": 8.469790806961136e-06, "loss": 0.9655, "step": 9807 }, { "epoch": 0.56, "grad_norm": 1.7648698091506958, "learning_rate": 8.467955099007899e-06, "loss": 0.9127, "step": 9808 }, { "epoch": 0.56, "grad_norm": 1.8491175174713135, "learning_rate": 8.466119443921249e-06, "loss": 0.9318, "step": 9809 }, { "epoch": 0.56, "grad_norm": 1.848143458366394, "learning_rate": 8.464283841764536e-06, "loss": 0.9889, "step": 9810 }, { "epoch": 0.56, "grad_norm": 1.8523794412612915, "learning_rate": 8.462448292601096e-06, "loss": 0.981, "step": 9811 }, { "epoch": 0.56, "grad_norm": 1.6393706798553467, "learning_rate": 8.460612796494272e-06, "loss": 0.8938, "step": 9812 }, { "epoch": 0.56, "grad_norm": 1.6241719722747803, "learning_rate": 8.4587773535074e-06, "loss": 0.9579, "step": 9813 }, { "epoch": 0.56, "grad_norm": 1.8510385751724243, "learning_rate": 8.456941963703817e-06, "loss": 1.04, "step": 9814 }, { "epoch": 0.56, "grad_norm": 1.6346877813339233, "learning_rate": 8.455106627146855e-06, "loss": 0.868, "step": 9815 }, { "epoch": 0.56, "grad_norm": 1.8974014520645142, "learning_rate": 8.453271343899849e-06, "loss": 0.9612, "step": 9816 }, { "epoch": 0.56, "grad_norm": 1.6034828424453735, "learning_rate": 8.451436114026127e-06, "loss": 0.9871, "step": 9817 }, { "epoch": 0.56, "grad_norm": 1.7454249858856201, "learning_rate": 8.449600937589019e-06, "loss": 0.9252, "step": 9818 }, { "epoch": 0.56, "grad_norm": 1.6624367237091064, "learning_rate": 8.447765814651853e-06, "loss": 0.9011, "step": 9819 }, { "epoch": 0.56, "grad_norm": 1.7782756090164185, "learning_rate": 8.445930745277953e-06, "loss": 1.0924, "step": 9820 }, { "epoch": 0.56, "grad_norm": 1.7412757873535156, "learning_rate": 8.444095729530638e-06, "loss": 0.9848, "step": 9821 }, { "epoch": 0.56, "grad_norm": 1.0630688667297363, "learning_rate": 8.442260767473236e-06, "loss": 0.5285, "step": 9822 }, { "epoch": 0.56, "grad_norm": 1.6591832637786865, "learning_rate": 8.440425859169064e-06, "loss": 0.9191, "step": 9823 }, { "epoch": 0.56, "grad_norm": 1.804245948791504, "learning_rate": 8.438591004681439e-06, "loss": 0.9802, "step": 9824 }, { "epoch": 0.56, "grad_norm": 1.0639244318008423, "learning_rate": 8.436756204073676e-06, "loss": 0.7007, "step": 9825 }, { "epoch": 0.56, "grad_norm": 1.6975889205932617, "learning_rate": 8.434921457409091e-06, "loss": 0.9165, "step": 9826 }, { "epoch": 0.56, "grad_norm": 1.0042953491210938, "learning_rate": 8.433086764750993e-06, "loss": 0.5333, "step": 9827 }, { "epoch": 0.56, "grad_norm": 1.662895917892456, "learning_rate": 8.431252126162695e-06, "loss": 0.9927, "step": 9828 }, { "epoch": 0.56, "grad_norm": 1.726589322090149, "learning_rate": 8.429417541707505e-06, "loss": 0.9874, "step": 9829 }, { "epoch": 0.56, "grad_norm": 1.6898778676986694, "learning_rate": 8.427583011448725e-06, "loss": 0.8727, "step": 9830 }, { "epoch": 0.56, "grad_norm": 1.6102042198181152, "learning_rate": 8.425748535449666e-06, "loss": 0.9873, "step": 9831 }, { "epoch": 0.56, "grad_norm": 1.4961017370224, "learning_rate": 8.423914113773627e-06, "loss": 1.0199, "step": 9832 }, { "epoch": 0.56, "grad_norm": 1.7345589399337769, "learning_rate": 8.422079746483907e-06, "loss": 0.8976, "step": 9833 }, { "epoch": 0.56, "grad_norm": 1.8311835527420044, "learning_rate": 8.420245433643807e-06, "loss": 1.0025, "step": 9834 }, { "epoch": 0.56, "grad_norm": 1.7003817558288574, "learning_rate": 8.418411175316627e-06, "loss": 1.0129, "step": 9835 }, { "epoch": 0.56, "grad_norm": 1.6672784090042114, "learning_rate": 8.41657697156566e-06, "loss": 0.938, "step": 9836 }, { "epoch": 0.56, "grad_norm": 1.7642420530319214, "learning_rate": 8.414742822454197e-06, "loss": 0.9119, "step": 9837 }, { "epoch": 0.56, "grad_norm": 1.7299810647964478, "learning_rate": 8.41290872804553e-06, "loss": 0.9908, "step": 9838 }, { "epoch": 0.56, "grad_norm": 1.5857008695602417, "learning_rate": 8.411074688402952e-06, "loss": 0.9169, "step": 9839 }, { "epoch": 0.56, "grad_norm": 1.5904239416122437, "learning_rate": 8.409240703589746e-06, "loss": 0.9951, "step": 9840 }, { "epoch": 0.56, "grad_norm": 1.7446154356002808, "learning_rate": 8.4074067736692e-06, "loss": 0.9103, "step": 9841 }, { "epoch": 0.56, "grad_norm": 1.9121451377868652, "learning_rate": 8.405572898704598e-06, "loss": 0.9484, "step": 9842 }, { "epoch": 0.56, "grad_norm": 1.6011571884155273, "learning_rate": 8.403739078759221e-06, "loss": 0.8354, "step": 9843 }, { "epoch": 0.56, "grad_norm": 1.7281208038330078, "learning_rate": 8.40190531389635e-06, "loss": 0.9697, "step": 9844 }, { "epoch": 0.56, "grad_norm": 1.7923554182052612, "learning_rate": 8.40007160417926e-06, "loss": 1.0145, "step": 9845 }, { "epoch": 0.56, "grad_norm": 1.9364678859710693, "learning_rate": 8.398237949671231e-06, "loss": 1.012, "step": 9846 }, { "epoch": 0.56, "grad_norm": 1.9073313474655151, "learning_rate": 8.396404350435539e-06, "loss": 0.9566, "step": 9847 }, { "epoch": 0.56, "grad_norm": 1.9555952548980713, "learning_rate": 8.39457080653545e-06, "loss": 0.9763, "step": 9848 }, { "epoch": 0.56, "grad_norm": 1.8375389575958252, "learning_rate": 8.392737318034239e-06, "loss": 1.0071, "step": 9849 }, { "epoch": 0.56, "grad_norm": 1.7012312412261963, "learning_rate": 8.390903884995174e-06, "loss": 0.9439, "step": 9850 }, { "epoch": 0.56, "grad_norm": 1.7312678098678589, "learning_rate": 8.389070507481522e-06, "loss": 0.8852, "step": 9851 }, { "epoch": 0.57, "grad_norm": 1.7921741008758545, "learning_rate": 8.387237185556544e-06, "loss": 1.0518, "step": 9852 }, { "epoch": 0.57, "grad_norm": 1.680347204208374, "learning_rate": 8.385403919283508e-06, "loss": 0.9043, "step": 9853 }, { "epoch": 0.57, "grad_norm": 1.8480693101882935, "learning_rate": 8.383570708725672e-06, "loss": 0.9366, "step": 9854 }, { "epoch": 0.57, "grad_norm": 1.5710793733596802, "learning_rate": 8.381737553946296e-06, "loss": 0.9025, "step": 9855 }, { "epoch": 0.57, "grad_norm": 1.7552309036254883, "learning_rate": 8.379904455008635e-06, "loss": 0.9226, "step": 9856 }, { "epoch": 0.57, "grad_norm": 1.6855357885360718, "learning_rate": 8.378071411975947e-06, "loss": 0.9774, "step": 9857 }, { "epoch": 0.57, "grad_norm": 1.665492296218872, "learning_rate": 8.376238424911481e-06, "loss": 0.9258, "step": 9858 }, { "epoch": 0.57, "grad_norm": 1.7533986568450928, "learning_rate": 8.374405493878494e-06, "loss": 0.9493, "step": 9859 }, { "epoch": 0.57, "grad_norm": 1.9120346307754517, "learning_rate": 8.372572618940232e-06, "loss": 0.9401, "step": 9860 }, { "epoch": 0.57, "grad_norm": 1.6676182746887207, "learning_rate": 8.370739800159944e-06, "loss": 1.0289, "step": 9861 }, { "epoch": 0.57, "grad_norm": 1.775700330734253, "learning_rate": 8.368907037600873e-06, "loss": 0.9437, "step": 9862 }, { "epoch": 0.57, "grad_norm": 1.6866471767425537, "learning_rate": 8.367074331326264e-06, "loss": 0.9376, "step": 9863 }, { "epoch": 0.57, "grad_norm": 1.5305966138839722, "learning_rate": 8.365241681399359e-06, "loss": 0.9331, "step": 9864 }, { "epoch": 0.57, "grad_norm": 1.730548620223999, "learning_rate": 8.363409087883397e-06, "loss": 0.8898, "step": 9865 }, { "epoch": 0.57, "grad_norm": 1.7145718336105347, "learning_rate": 8.361576550841615e-06, "loss": 0.8388, "step": 9866 }, { "epoch": 0.57, "grad_norm": 1.746213436126709, "learning_rate": 8.35974407033725e-06, "loss": 0.9281, "step": 9867 }, { "epoch": 0.57, "grad_norm": 1.6540430784225464, "learning_rate": 8.357911646433534e-06, "loss": 0.9637, "step": 9868 }, { "epoch": 0.57, "grad_norm": 1.9239869117736816, "learning_rate": 8.356079279193703e-06, "loss": 0.9915, "step": 9869 }, { "epoch": 0.57, "grad_norm": 1.7954456806182861, "learning_rate": 8.35424696868098e-06, "loss": 0.9518, "step": 9870 }, { "epoch": 0.57, "grad_norm": 1.0025910139083862, "learning_rate": 8.3524147149586e-06, "loss": 0.6069, "step": 9871 }, { "epoch": 0.57, "grad_norm": 1.7625956535339355, "learning_rate": 8.350582518089781e-06, "loss": 0.9031, "step": 9872 }, { "epoch": 0.57, "grad_norm": 1.857361912727356, "learning_rate": 8.348750378137756e-06, "loss": 0.9891, "step": 9873 }, { "epoch": 0.57, "grad_norm": 1.7598695755004883, "learning_rate": 8.346918295165743e-06, "loss": 0.894, "step": 9874 }, { "epoch": 0.57, "grad_norm": 1.6191054582595825, "learning_rate": 8.34508626923696e-06, "loss": 0.901, "step": 9875 }, { "epoch": 0.57, "grad_norm": 1.8950486183166504, "learning_rate": 8.343254300414629e-06, "loss": 0.9912, "step": 9876 }, { "epoch": 0.57, "grad_norm": 1.622578740119934, "learning_rate": 8.341422388761964e-06, "loss": 0.948, "step": 9877 }, { "epoch": 0.57, "grad_norm": 1.1127142906188965, "learning_rate": 8.33959053434218e-06, "loss": 0.588, "step": 9878 }, { "epoch": 0.57, "grad_norm": 1.8276445865631104, "learning_rate": 8.337758737218487e-06, "loss": 0.9216, "step": 9879 }, { "epoch": 0.57, "grad_norm": 1.5599991083145142, "learning_rate": 8.335926997454097e-06, "loss": 0.9535, "step": 9880 }, { "epoch": 0.57, "grad_norm": 1.8944109678268433, "learning_rate": 8.334095315112218e-06, "loss": 0.9727, "step": 9881 }, { "epoch": 0.57, "grad_norm": 1.729617714881897, "learning_rate": 8.332263690256056e-06, "loss": 0.9828, "step": 9882 }, { "epoch": 0.57, "grad_norm": 1.883064866065979, "learning_rate": 8.330432122948816e-06, "loss": 0.9903, "step": 9883 }, { "epoch": 0.57, "grad_norm": 1.6110795736312866, "learning_rate": 8.3286006132537e-06, "loss": 0.9622, "step": 9884 }, { "epoch": 0.57, "grad_norm": 1.9348753690719604, "learning_rate": 8.326769161233907e-06, "loss": 0.9983, "step": 9885 }, { "epoch": 0.57, "grad_norm": 1.8125224113464355, "learning_rate": 8.324937766952638e-06, "loss": 0.9462, "step": 9886 }, { "epoch": 0.57, "grad_norm": 2.894012928009033, "learning_rate": 8.323106430473084e-06, "loss": 0.9146, "step": 9887 }, { "epoch": 0.57, "grad_norm": 1.705432653427124, "learning_rate": 8.321275151858445e-06, "loss": 0.9496, "step": 9888 }, { "epoch": 0.57, "grad_norm": 2.2724661827087402, "learning_rate": 8.319443931171911e-06, "loss": 0.984, "step": 9889 }, { "epoch": 0.57, "grad_norm": 1.600510597229004, "learning_rate": 8.317612768476673e-06, "loss": 0.9081, "step": 9890 }, { "epoch": 0.57, "grad_norm": 1.8133643865585327, "learning_rate": 8.315781663835918e-06, "loss": 0.9857, "step": 9891 }, { "epoch": 0.57, "grad_norm": 1.8171542882919312, "learning_rate": 8.313950617312835e-06, "loss": 0.9706, "step": 9892 }, { "epoch": 0.57, "grad_norm": 1.671372413635254, "learning_rate": 8.312119628970605e-06, "loss": 0.9608, "step": 9893 }, { "epoch": 0.57, "grad_norm": 1.6408926248550415, "learning_rate": 8.310288698872412e-06, "loss": 0.8968, "step": 9894 }, { "epoch": 0.57, "grad_norm": 1.8124191761016846, "learning_rate": 8.308457827081436e-06, "loss": 0.9211, "step": 9895 }, { "epoch": 0.57, "grad_norm": 1.907081127166748, "learning_rate": 8.306627013660856e-06, "loss": 0.9142, "step": 9896 }, { "epoch": 0.57, "grad_norm": 1.7713135480880737, "learning_rate": 8.304796258673845e-06, "loss": 0.9305, "step": 9897 }, { "epoch": 0.57, "grad_norm": 1.721226692199707, "learning_rate": 8.302965562183583e-06, "loss": 0.8517, "step": 9898 }, { "epoch": 0.57, "grad_norm": 1.7855409383773804, "learning_rate": 8.301134924253233e-06, "loss": 0.9, "step": 9899 }, { "epoch": 0.57, "grad_norm": 1.7462619543075562, "learning_rate": 8.299304344945977e-06, "loss": 0.9325, "step": 9900 }, { "epoch": 0.57, "grad_norm": 1.9759937524795532, "learning_rate": 8.297473824324976e-06, "loss": 0.9968, "step": 9901 }, { "epoch": 0.57, "grad_norm": 1.7004410028457642, "learning_rate": 8.295643362453397e-06, "loss": 0.9483, "step": 9902 }, { "epoch": 0.57, "grad_norm": 1.7353190183639526, "learning_rate": 8.293812959394405e-06, "loss": 0.9268, "step": 9903 }, { "epoch": 0.57, "grad_norm": 1.8573025465011597, "learning_rate": 8.291982615211163e-06, "loss": 0.9673, "step": 9904 }, { "epoch": 0.57, "grad_norm": 1.6955972909927368, "learning_rate": 8.290152329966827e-06, "loss": 0.958, "step": 9905 }, { "epoch": 0.57, "grad_norm": 1.865373969078064, "learning_rate": 8.28832210372456e-06, "loss": 0.9732, "step": 9906 }, { "epoch": 0.57, "grad_norm": 1.7247673273086548, "learning_rate": 8.286491936547514e-06, "loss": 0.9136, "step": 9907 }, { "epoch": 0.57, "grad_norm": 1.706994891166687, "learning_rate": 8.284661828498847e-06, "loss": 0.9503, "step": 9908 }, { "epoch": 0.57, "grad_norm": 1.6628011465072632, "learning_rate": 8.282831779641708e-06, "loss": 0.9114, "step": 9909 }, { "epoch": 0.57, "grad_norm": 1.8326587677001953, "learning_rate": 8.281001790039246e-06, "loss": 0.9767, "step": 9910 }, { "epoch": 0.57, "grad_norm": 1.6019359827041626, "learning_rate": 8.27917185975461e-06, "loss": 0.8618, "step": 9911 }, { "epoch": 0.57, "grad_norm": 1.680530309677124, "learning_rate": 8.277341988850949e-06, "loss": 0.9544, "step": 9912 }, { "epoch": 0.57, "grad_norm": 1.7658640146255493, "learning_rate": 8.275512177391403e-06, "loss": 0.929, "step": 9913 }, { "epoch": 0.57, "grad_norm": 1.848766565322876, "learning_rate": 8.273682425439114e-06, "loss": 0.9147, "step": 9914 }, { "epoch": 0.57, "grad_norm": 1.8004672527313232, "learning_rate": 8.271852733057222e-06, "loss": 0.9916, "step": 9915 }, { "epoch": 0.57, "grad_norm": 1.755155086517334, "learning_rate": 8.270023100308865e-06, "loss": 0.9459, "step": 9916 }, { "epoch": 0.57, "grad_norm": 1.9252840280532837, "learning_rate": 8.26819352725718e-06, "loss": 0.9535, "step": 9917 }, { "epoch": 0.57, "grad_norm": 0.923312783241272, "learning_rate": 8.266364013965297e-06, "loss": 0.5343, "step": 9918 }, { "epoch": 0.57, "grad_norm": 1.6745131015777588, "learning_rate": 8.26453456049635e-06, "loss": 0.9126, "step": 9919 }, { "epoch": 0.57, "grad_norm": 1.7967758178710938, "learning_rate": 8.262705166913467e-06, "loss": 0.8683, "step": 9920 }, { "epoch": 0.57, "grad_norm": 1.6511890888214111, "learning_rate": 8.260875833279776e-06, "loss": 0.8783, "step": 9921 }, { "epoch": 0.57, "grad_norm": 1.5698761940002441, "learning_rate": 8.259046559658401e-06, "loss": 0.8749, "step": 9922 }, { "epoch": 0.57, "grad_norm": 1.8676340579986572, "learning_rate": 8.257217346112468e-06, "loss": 1.0066, "step": 9923 }, { "epoch": 0.57, "grad_norm": 1.8737683296203613, "learning_rate": 8.255388192705092e-06, "loss": 0.9175, "step": 9924 }, { "epoch": 0.57, "grad_norm": 1.8011157512664795, "learning_rate": 8.2535590994994e-06, "loss": 0.9462, "step": 9925 }, { "epoch": 0.57, "grad_norm": 1.796507716178894, "learning_rate": 8.251730066558504e-06, "loss": 0.9056, "step": 9926 }, { "epoch": 0.57, "grad_norm": 1.9044804573059082, "learning_rate": 8.24990109394552e-06, "loss": 0.9592, "step": 9927 }, { "epoch": 0.57, "grad_norm": 1.7152119874954224, "learning_rate": 8.24807218172356e-06, "loss": 0.85, "step": 9928 }, { "epoch": 0.57, "grad_norm": 1.0453922748565674, "learning_rate": 8.246243329955735e-06, "loss": 0.5773, "step": 9929 }, { "epoch": 0.57, "grad_norm": 1.6330358982086182, "learning_rate": 8.244414538705155e-06, "loss": 0.9323, "step": 9930 }, { "epoch": 0.57, "grad_norm": 1.782004714012146, "learning_rate": 8.242585808034924e-06, "loss": 0.908, "step": 9931 }, { "epoch": 0.57, "grad_norm": 1.7026286125183105, "learning_rate": 8.240757138008149e-06, "loss": 0.9192, "step": 9932 }, { "epoch": 0.57, "grad_norm": 1.8290990591049194, "learning_rate": 8.23892852868793e-06, "loss": 1.0071, "step": 9933 }, { "epoch": 0.57, "grad_norm": 1.7360644340515137, "learning_rate": 8.237099980137368e-06, "loss": 0.9037, "step": 9934 }, { "epoch": 0.57, "grad_norm": 1.7625229358673096, "learning_rate": 8.235271492419563e-06, "loss": 0.9103, "step": 9935 }, { "epoch": 0.57, "grad_norm": 2.077831983566284, "learning_rate": 8.233443065597605e-06, "loss": 0.9098, "step": 9936 }, { "epoch": 0.57, "grad_norm": 1.783826231956482, "learning_rate": 8.231614699734595e-06, "loss": 0.9188, "step": 9937 }, { "epoch": 0.57, "grad_norm": 1.857846736907959, "learning_rate": 8.22978639489362e-06, "loss": 0.984, "step": 9938 }, { "epoch": 0.57, "grad_norm": 1.8796751499176025, "learning_rate": 8.227958151137773e-06, "loss": 0.9687, "step": 9939 }, { "epoch": 0.57, "grad_norm": 1.7597626447677612, "learning_rate": 8.22612996853014e-06, "loss": 0.9487, "step": 9940 }, { "epoch": 0.57, "grad_norm": 1.6525269746780396, "learning_rate": 8.224301847133805e-06, "loss": 0.899, "step": 9941 }, { "epoch": 0.57, "grad_norm": 1.6960855722427368, "learning_rate": 8.222473787011855e-06, "loss": 0.9641, "step": 9942 }, { "epoch": 0.57, "grad_norm": 1.9196513891220093, "learning_rate": 8.22064578822737e-06, "loss": 0.9329, "step": 9943 }, { "epoch": 0.57, "grad_norm": 1.6569267511367798, "learning_rate": 8.218817850843428e-06, "loss": 0.9485, "step": 9944 }, { "epoch": 0.57, "grad_norm": 1.8788106441497803, "learning_rate": 8.216989974923107e-06, "loss": 0.9351, "step": 9945 }, { "epoch": 0.57, "grad_norm": 1.7779889106750488, "learning_rate": 8.21516216052948e-06, "loss": 0.9016, "step": 9946 }, { "epoch": 0.57, "grad_norm": 1.7726404666900635, "learning_rate": 8.213334407725622e-06, "loss": 0.9259, "step": 9947 }, { "epoch": 0.57, "grad_norm": 1.700067162513733, "learning_rate": 8.211506716574604e-06, "loss": 0.9279, "step": 9948 }, { "epoch": 0.57, "grad_norm": 1.6555423736572266, "learning_rate": 8.209679087139491e-06, "loss": 0.9613, "step": 9949 }, { "epoch": 0.57, "grad_norm": 1.6706092357635498, "learning_rate": 8.207851519483352e-06, "loss": 0.9813, "step": 9950 }, { "epoch": 0.57, "grad_norm": 1.842773199081421, "learning_rate": 8.206024013669253e-06, "loss": 0.9573, "step": 9951 }, { "epoch": 0.57, "grad_norm": 1.7060863971710205, "learning_rate": 8.204196569760252e-06, "loss": 0.895, "step": 9952 }, { "epoch": 0.57, "grad_norm": 1.686597228050232, "learning_rate": 8.20236918781941e-06, "loss": 0.9311, "step": 9953 }, { "epoch": 0.57, "grad_norm": 1.580003261566162, "learning_rate": 8.200541867909786e-06, "loss": 0.9929, "step": 9954 }, { "epoch": 0.57, "grad_norm": 2.1099209785461426, "learning_rate": 8.198714610094438e-06, "loss": 0.9683, "step": 9955 }, { "epoch": 0.57, "grad_norm": 1.8280285596847534, "learning_rate": 8.196887414436416e-06, "loss": 0.9237, "step": 9956 }, { "epoch": 0.57, "grad_norm": 1.651206374168396, "learning_rate": 8.195060280998772e-06, "loss": 0.8837, "step": 9957 }, { "epoch": 0.57, "grad_norm": 1.8081951141357422, "learning_rate": 8.193233209844557e-06, "loss": 0.9543, "step": 9958 }, { "epoch": 0.57, "grad_norm": 1.7081942558288574, "learning_rate": 8.191406201036816e-06, "loss": 0.9403, "step": 9959 }, { "epoch": 0.57, "grad_norm": 1.6036102771759033, "learning_rate": 8.189579254638595e-06, "loss": 0.9286, "step": 9960 }, { "epoch": 0.57, "grad_norm": 1.5856201648712158, "learning_rate": 8.187752370712936e-06, "loss": 0.8996, "step": 9961 }, { "epoch": 0.57, "grad_norm": 1.8253306150436401, "learning_rate": 8.185925549322883e-06, "loss": 1.033, "step": 9962 }, { "epoch": 0.57, "grad_norm": 1.6382275819778442, "learning_rate": 8.18409879053147e-06, "loss": 0.9808, "step": 9963 }, { "epoch": 0.57, "grad_norm": 1.7461841106414795, "learning_rate": 8.182272094401735e-06, "loss": 0.9837, "step": 9964 }, { "epoch": 0.57, "grad_norm": 1.797199010848999, "learning_rate": 8.180445460996711e-06, "loss": 0.9297, "step": 9965 }, { "epoch": 0.57, "grad_norm": 1.779309868812561, "learning_rate": 8.178618890379432e-06, "loss": 0.9404, "step": 9966 }, { "epoch": 0.57, "grad_norm": 1.7046159505844116, "learning_rate": 8.17679238261293e-06, "loss": 0.8942, "step": 9967 }, { "epoch": 0.57, "grad_norm": 1.6449289321899414, "learning_rate": 8.174965937760228e-06, "loss": 1.0307, "step": 9968 }, { "epoch": 0.57, "grad_norm": 1.8228262662887573, "learning_rate": 8.173139555884353e-06, "loss": 0.9957, "step": 9969 }, { "epoch": 0.57, "grad_norm": 1.7197256088256836, "learning_rate": 8.171313237048331e-06, "loss": 0.9318, "step": 9970 }, { "epoch": 0.57, "grad_norm": 1.8620012998580933, "learning_rate": 8.16948698131518e-06, "loss": 0.9437, "step": 9971 }, { "epoch": 0.57, "grad_norm": 1.6902499198913574, "learning_rate": 8.16766078874792e-06, "loss": 0.8836, "step": 9972 }, { "epoch": 0.57, "grad_norm": 1.9670344591140747, "learning_rate": 8.165834659409566e-06, "loss": 0.9847, "step": 9973 }, { "epoch": 0.57, "grad_norm": 1.9216474294662476, "learning_rate": 8.164008593363136e-06, "loss": 0.9186, "step": 9974 }, { "epoch": 0.57, "grad_norm": 1.835475206375122, "learning_rate": 8.16218259067164e-06, "loss": 1.0679, "step": 9975 }, { "epoch": 0.57, "grad_norm": 1.7477036714553833, "learning_rate": 8.16035665139809e-06, "loss": 0.8871, "step": 9976 }, { "epoch": 0.57, "grad_norm": 1.9078035354614258, "learning_rate": 8.158530775605493e-06, "loss": 0.9443, "step": 9977 }, { "epoch": 0.57, "grad_norm": 2.0806448459625244, "learning_rate": 8.156704963356851e-06, "loss": 0.9899, "step": 9978 }, { "epoch": 0.57, "grad_norm": 1.7123740911483765, "learning_rate": 8.154879214715176e-06, "loss": 0.9562, "step": 9979 }, { "epoch": 0.57, "grad_norm": 1.0956957340240479, "learning_rate": 8.153053529743465e-06, "loss": 0.598, "step": 9980 }, { "epoch": 0.57, "grad_norm": 0.9688538312911987, "learning_rate": 8.151227908504718e-06, "loss": 0.544, "step": 9981 }, { "epoch": 0.57, "grad_norm": 1.7711527347564697, "learning_rate": 8.14940235106193e-06, "loss": 0.9526, "step": 9982 }, { "epoch": 0.57, "grad_norm": 1.7858059406280518, "learning_rate": 8.147576857478098e-06, "loss": 0.9197, "step": 9983 }, { "epoch": 0.57, "grad_norm": 1.8245915174484253, "learning_rate": 8.145751427816215e-06, "loss": 0.9208, "step": 9984 }, { "epoch": 0.57, "grad_norm": 1.8225047588348389, "learning_rate": 8.143926062139268e-06, "loss": 0.8478, "step": 9985 }, { "epoch": 0.57, "grad_norm": 1.947296380996704, "learning_rate": 8.142100760510249e-06, "loss": 0.9458, "step": 9986 }, { "epoch": 0.57, "grad_norm": 1.6904819011688232, "learning_rate": 8.140275522992146e-06, "loss": 0.9517, "step": 9987 }, { "epoch": 0.57, "grad_norm": 1.6851528882980347, "learning_rate": 8.138450349647936e-06, "loss": 0.9918, "step": 9988 }, { "epoch": 0.57, "grad_norm": 1.8243478536605835, "learning_rate": 8.136625240540605e-06, "loss": 1.0168, "step": 9989 }, { "epoch": 0.57, "grad_norm": 1.660402536392212, "learning_rate": 8.13480019573313e-06, "loss": 0.9552, "step": 9990 }, { "epoch": 0.57, "grad_norm": 1.6985617876052856, "learning_rate": 8.132975215288494e-06, "loss": 0.936, "step": 9991 }, { "epoch": 0.57, "grad_norm": 1.9087896347045898, "learning_rate": 8.131150299269665e-06, "loss": 0.9941, "step": 9992 }, { "epoch": 0.57, "grad_norm": 1.7848143577575684, "learning_rate": 8.12932544773962e-06, "loss": 1.0171, "step": 9993 }, { "epoch": 0.57, "grad_norm": 2.4789185523986816, "learning_rate": 8.12750066076133e-06, "loss": 0.925, "step": 9994 }, { "epoch": 0.57, "grad_norm": 1.866515874862671, "learning_rate": 8.125675938397759e-06, "loss": 0.8677, "step": 9995 }, { "epoch": 0.57, "grad_norm": 1.5574911832809448, "learning_rate": 8.123851280711877e-06, "loss": 0.9505, "step": 9996 }, { "epoch": 0.57, "grad_norm": 1.8167750835418701, "learning_rate": 8.122026687766647e-06, "loss": 0.9371, "step": 9997 }, { "epoch": 0.57, "grad_norm": 0.9497883319854736, "learning_rate": 8.120202159625029e-06, "loss": 0.5246, "step": 9998 }, { "epoch": 0.57, "grad_norm": 1.7061885595321655, "learning_rate": 8.118377696349984e-06, "loss": 0.9318, "step": 9999 }, { "epoch": 0.57, "grad_norm": 1.7085875272750854, "learning_rate": 8.116553298004467e-06, "loss": 0.9913, "step": 10000 }, { "epoch": 0.57, "grad_norm": 2.0322234630584717, "learning_rate": 8.114728964651438e-06, "loss": 0.9057, "step": 10001 }, { "epoch": 0.57, "grad_norm": 1.708474040031433, "learning_rate": 8.11290469635384e-06, "loss": 0.9604, "step": 10002 }, { "epoch": 0.57, "grad_norm": 1.6908295154571533, "learning_rate": 8.111080493174635e-06, "loss": 0.931, "step": 10003 }, { "epoch": 0.57, "grad_norm": 1.865363597869873, "learning_rate": 8.109256355176761e-06, "loss": 0.9613, "step": 10004 }, { "epoch": 0.57, "grad_norm": 1.6232551336288452, "learning_rate": 8.107432282423172e-06, "loss": 0.8894, "step": 10005 }, { "epoch": 0.57, "grad_norm": 1.6600151062011719, "learning_rate": 8.105608274976808e-06, "loss": 1.0031, "step": 10006 }, { "epoch": 0.57, "grad_norm": 1.7602753639221191, "learning_rate": 8.10378433290061e-06, "loss": 0.9707, "step": 10007 }, { "epoch": 0.57, "grad_norm": 1.9686120748519897, "learning_rate": 8.101960456257518e-06, "loss": 0.9289, "step": 10008 }, { "epoch": 0.57, "grad_norm": 1.7205287218093872, "learning_rate": 8.10013664511047e-06, "loss": 0.9626, "step": 10009 }, { "epoch": 0.57, "grad_norm": 1.6464554071426392, "learning_rate": 8.098312899522398e-06, "loss": 0.8431, "step": 10010 }, { "epoch": 0.57, "grad_norm": 1.621382236480713, "learning_rate": 8.096489219556237e-06, "loss": 0.9092, "step": 10011 }, { "epoch": 0.57, "grad_norm": 1.6266937255859375, "learning_rate": 8.094665605274914e-06, "loss": 0.952, "step": 10012 }, { "epoch": 0.57, "grad_norm": 1.698685884475708, "learning_rate": 8.09284205674136e-06, "loss": 0.9421, "step": 10013 }, { "epoch": 0.57, "grad_norm": 1.6341575384140015, "learning_rate": 8.091018574018499e-06, "loss": 0.8872, "step": 10014 }, { "epoch": 0.57, "grad_norm": 1.7809375524520874, "learning_rate": 8.089195157169254e-06, "loss": 0.8652, "step": 10015 }, { "epoch": 0.57, "grad_norm": 1.8846242427825928, "learning_rate": 8.087371806256548e-06, "loss": 0.8921, "step": 10016 }, { "epoch": 0.57, "grad_norm": 1.857582688331604, "learning_rate": 8.085548521343296e-06, "loss": 0.936, "step": 10017 }, { "epoch": 0.57, "grad_norm": 1.7509684562683105, "learning_rate": 8.083725302492418e-06, "loss": 0.9823, "step": 10018 }, { "epoch": 0.57, "grad_norm": 1.935383677482605, "learning_rate": 8.081902149766825e-06, "loss": 0.9181, "step": 10019 }, { "epoch": 0.57, "grad_norm": 1.7712290287017822, "learning_rate": 8.080079063229432e-06, "loss": 0.9885, "step": 10020 }, { "epoch": 0.57, "grad_norm": 1.823823094367981, "learning_rate": 8.078256042943149e-06, "loss": 0.9332, "step": 10021 }, { "epoch": 0.57, "grad_norm": 1.6421974897384644, "learning_rate": 8.07643308897088e-06, "loss": 0.8633, "step": 10022 }, { "epoch": 0.57, "grad_norm": 1.8431400060653687, "learning_rate": 8.074610201375532e-06, "loss": 0.9373, "step": 10023 }, { "epoch": 0.57, "grad_norm": 1.8571799993515015, "learning_rate": 8.07278738022001e-06, "loss": 0.9569, "step": 10024 }, { "epoch": 0.57, "grad_norm": 1.780059576034546, "learning_rate": 8.070964625567209e-06, "loss": 0.9423, "step": 10025 }, { "epoch": 0.58, "grad_norm": 1.074397087097168, "learning_rate": 8.069141937480031e-06, "loss": 0.6162, "step": 10026 }, { "epoch": 0.58, "grad_norm": 1.8878297805786133, "learning_rate": 8.067319316021372e-06, "loss": 0.9086, "step": 10027 }, { "epoch": 0.58, "grad_norm": 1.7411837577819824, "learning_rate": 8.065496761254126e-06, "loss": 0.9747, "step": 10028 }, { "epoch": 0.58, "grad_norm": 1.7861181497573853, "learning_rate": 8.06367427324118e-06, "loss": 0.9468, "step": 10029 }, { "epoch": 0.58, "grad_norm": 1.9160903692245483, "learning_rate": 8.061851852045428e-06, "loss": 0.9071, "step": 10030 }, { "epoch": 0.58, "grad_norm": 1.8891775608062744, "learning_rate": 8.060029497729752e-06, "loss": 0.937, "step": 10031 }, { "epoch": 0.58, "grad_norm": 1.7994674444198608, "learning_rate": 8.05820721035704e-06, "loss": 0.9338, "step": 10032 }, { "epoch": 0.58, "grad_norm": 1.7510249614715576, "learning_rate": 8.056384989990173e-06, "loss": 0.8641, "step": 10033 }, { "epoch": 0.58, "grad_norm": 1.7696845531463623, "learning_rate": 8.054562836692032e-06, "loss": 0.9854, "step": 10034 }, { "epoch": 0.58, "grad_norm": 1.7008767127990723, "learning_rate": 8.052740750525492e-06, "loss": 0.9997, "step": 10035 }, { "epoch": 0.58, "grad_norm": 1.846603274345398, "learning_rate": 8.05091873155343e-06, "loss": 0.8249, "step": 10036 }, { "epoch": 0.58, "grad_norm": 1.8892041444778442, "learning_rate": 8.04909677983872e-06, "loss": 0.9787, "step": 10037 }, { "epoch": 0.58, "grad_norm": 1.979797601699829, "learning_rate": 8.047274895444227e-06, "loss": 0.9783, "step": 10038 }, { "epoch": 0.58, "grad_norm": 1.7236649990081787, "learning_rate": 8.045453078432824e-06, "loss": 0.9483, "step": 10039 }, { "epoch": 0.58, "grad_norm": 1.763467788696289, "learning_rate": 8.043631328867376e-06, "loss": 0.9365, "step": 10040 }, { "epoch": 0.58, "grad_norm": 1.706706166267395, "learning_rate": 8.041809646810745e-06, "loss": 0.9042, "step": 10041 }, { "epoch": 0.58, "grad_norm": 1.8034722805023193, "learning_rate": 8.039988032325794e-06, "loss": 0.9613, "step": 10042 }, { "epoch": 0.58, "grad_norm": 1.8166176080703735, "learning_rate": 8.038166485475381e-06, "loss": 0.9866, "step": 10043 }, { "epoch": 0.58, "grad_norm": 1.7812343835830688, "learning_rate": 8.036345006322358e-06, "loss": 0.9458, "step": 10044 }, { "epoch": 0.58, "grad_norm": 1.5142858028411865, "learning_rate": 8.034523594929588e-06, "loss": 0.8928, "step": 10045 }, { "epoch": 0.58, "grad_norm": 0.9605401158332825, "learning_rate": 8.032702251359918e-06, "loss": 0.5909, "step": 10046 }, { "epoch": 0.58, "grad_norm": 1.8225682973861694, "learning_rate": 8.030880975676198e-06, "loss": 0.8881, "step": 10047 }, { "epoch": 0.58, "grad_norm": 2.013690233230591, "learning_rate": 8.029059767941275e-06, "loss": 1.1004, "step": 10048 }, { "epoch": 0.58, "grad_norm": 1.7007765769958496, "learning_rate": 8.027238628217993e-06, "loss": 0.9715, "step": 10049 }, { "epoch": 0.58, "grad_norm": 1.8556448221206665, "learning_rate": 8.025417556569196e-06, "loss": 0.9892, "step": 10050 }, { "epoch": 0.58, "grad_norm": 1.7061582803726196, "learning_rate": 8.023596553057723e-06, "loss": 0.9882, "step": 10051 }, { "epoch": 0.58, "grad_norm": 1.878564715385437, "learning_rate": 8.021775617746412e-06, "loss": 1.0291, "step": 10052 }, { "epoch": 0.58, "grad_norm": 1.933752417564392, "learning_rate": 8.0199547506981e-06, "loss": 0.9178, "step": 10053 }, { "epoch": 0.58, "grad_norm": 1.8793507814407349, "learning_rate": 8.018133951975617e-06, "loss": 0.9142, "step": 10054 }, { "epoch": 0.58, "grad_norm": 1.7183701992034912, "learning_rate": 8.016313221641795e-06, "loss": 0.9182, "step": 10055 }, { "epoch": 0.58, "grad_norm": 2.3512232303619385, "learning_rate": 8.01449255975946e-06, "loss": 1.0324, "step": 10056 }, { "epoch": 0.58, "grad_norm": 1.810836672782898, "learning_rate": 8.012671966391444e-06, "loss": 0.8897, "step": 10057 }, { "epoch": 0.58, "grad_norm": 1.7067466974258423, "learning_rate": 8.010851441600567e-06, "loss": 0.8918, "step": 10058 }, { "epoch": 0.58, "grad_norm": 1.859833002090454, "learning_rate": 8.00903098544965e-06, "loss": 0.9134, "step": 10059 }, { "epoch": 0.58, "grad_norm": 1.0736366510391235, "learning_rate": 8.007210598001511e-06, "loss": 0.5943, "step": 10060 }, { "epoch": 0.58, "grad_norm": 1.6433991193771362, "learning_rate": 8.00539027931897e-06, "loss": 0.9065, "step": 10061 }, { "epoch": 0.58, "grad_norm": 1.650827169418335, "learning_rate": 8.003570029464836e-06, "loss": 0.9097, "step": 10062 }, { "epoch": 0.58, "grad_norm": 1.7230761051177979, "learning_rate": 8.001749848501925e-06, "loss": 0.9745, "step": 10063 }, { "epoch": 0.58, "grad_norm": 1.7422531843185425, "learning_rate": 7.999929736493046e-06, "loss": 1.0152, "step": 10064 }, { "epoch": 0.58, "grad_norm": 1.9442970752716064, "learning_rate": 7.998109693501002e-06, "loss": 0.927, "step": 10065 }, { "epoch": 0.58, "grad_norm": 1.6801656484603882, "learning_rate": 7.996289719588604e-06, "loss": 0.9136, "step": 10066 }, { "epoch": 0.58, "grad_norm": 1.7054427862167358, "learning_rate": 7.994469814818647e-06, "loss": 0.994, "step": 10067 }, { "epoch": 0.58, "grad_norm": 1.7884531021118164, "learning_rate": 7.992649979253934e-06, "loss": 1.0502, "step": 10068 }, { "epoch": 0.58, "grad_norm": 1.7164208889007568, "learning_rate": 7.990830212957266e-06, "loss": 0.9148, "step": 10069 }, { "epoch": 0.58, "grad_norm": 1.9825434684753418, "learning_rate": 7.989010515991433e-06, "loss": 0.9415, "step": 10070 }, { "epoch": 0.58, "grad_norm": 1.7256280183792114, "learning_rate": 7.987190888419229e-06, "loss": 0.9046, "step": 10071 }, { "epoch": 0.58, "grad_norm": 1.1246126890182495, "learning_rate": 7.985371330303446e-06, "loss": 0.5958, "step": 10072 }, { "epoch": 0.58, "grad_norm": 1.8341796398162842, "learning_rate": 7.983551841706869e-06, "loss": 1.0017, "step": 10073 }, { "epoch": 0.58, "grad_norm": 1.876781940460205, "learning_rate": 7.981732422692288e-06, "loss": 0.9257, "step": 10074 }, { "epoch": 0.58, "grad_norm": 1.823730707168579, "learning_rate": 7.979913073322482e-06, "loss": 1.043, "step": 10075 }, { "epoch": 0.58, "grad_norm": 1.6162852048873901, "learning_rate": 7.978093793660234e-06, "loss": 0.9329, "step": 10076 }, { "epoch": 0.58, "grad_norm": 1.8555052280426025, "learning_rate": 7.976274583768322e-06, "loss": 0.9239, "step": 10077 }, { "epoch": 0.58, "grad_norm": 1.7418303489685059, "learning_rate": 7.974455443709519e-06, "loss": 0.9221, "step": 10078 }, { "epoch": 0.58, "grad_norm": 1.0510542392730713, "learning_rate": 7.9726363735466e-06, "loss": 0.5679, "step": 10079 }, { "epoch": 0.58, "grad_norm": 1.6530357599258423, "learning_rate": 7.97081737334234e-06, "loss": 0.9626, "step": 10080 }, { "epoch": 0.58, "grad_norm": 1.9245362281799316, "learning_rate": 7.968998443159502e-06, "loss": 1.0087, "step": 10081 }, { "epoch": 0.58, "grad_norm": 1.8035534620285034, "learning_rate": 7.967179583060853e-06, "loss": 1.0413, "step": 10082 }, { "epoch": 0.58, "grad_norm": 1.6676276922225952, "learning_rate": 7.96536079310916e-06, "loss": 1.0009, "step": 10083 }, { "epoch": 0.58, "grad_norm": 1.9292770624160767, "learning_rate": 7.963542073367183e-06, "loss": 0.8833, "step": 10084 }, { "epoch": 0.58, "grad_norm": 1.8010612726211548, "learning_rate": 7.961723423897676e-06, "loss": 1.0337, "step": 10085 }, { "epoch": 0.58, "grad_norm": 1.8244218826293945, "learning_rate": 7.959904844763405e-06, "loss": 0.9809, "step": 10086 }, { "epoch": 0.58, "grad_norm": 1.7090595960617065, "learning_rate": 7.958086336027116e-06, "loss": 1.0106, "step": 10087 }, { "epoch": 0.58, "grad_norm": 1.880307674407959, "learning_rate": 7.956267897751566e-06, "loss": 0.964, "step": 10088 }, { "epoch": 0.58, "grad_norm": 1.741143822669983, "learning_rate": 7.954449529999501e-06, "loss": 0.9734, "step": 10089 }, { "epoch": 0.58, "grad_norm": 1.65348219871521, "learning_rate": 7.952631232833669e-06, "loss": 0.8861, "step": 10090 }, { "epoch": 0.58, "grad_norm": 1.9793020486831665, "learning_rate": 7.950813006316813e-06, "loss": 1.0093, "step": 10091 }, { "epoch": 0.58, "grad_norm": 1.5826938152313232, "learning_rate": 7.948994850511678e-06, "loss": 0.932, "step": 10092 }, { "epoch": 0.58, "grad_norm": 1.7913683652877808, "learning_rate": 7.947176765481e-06, "loss": 0.8892, "step": 10093 }, { "epoch": 0.58, "grad_norm": 1.6705474853515625, "learning_rate": 7.945358751287518e-06, "loss": 0.9189, "step": 10094 }, { "epoch": 0.58, "grad_norm": 1.7742568254470825, "learning_rate": 7.943540807993965e-06, "loss": 0.9934, "step": 10095 }, { "epoch": 0.58, "grad_norm": 1.6560449600219727, "learning_rate": 7.941722935663076e-06, "loss": 0.933, "step": 10096 }, { "epoch": 0.58, "grad_norm": 1.8294296264648438, "learning_rate": 7.939905134357574e-06, "loss": 0.9435, "step": 10097 }, { "epoch": 0.58, "grad_norm": 1.8178365230560303, "learning_rate": 7.938087404140196e-06, "loss": 0.9668, "step": 10098 }, { "epoch": 0.58, "grad_norm": 1.7293378114700317, "learning_rate": 7.936269745073661e-06, "loss": 0.8747, "step": 10099 }, { "epoch": 0.58, "grad_norm": 1.6465486288070679, "learning_rate": 7.934452157220693e-06, "loss": 0.8598, "step": 10100 }, { "epoch": 0.58, "grad_norm": 1.574589729309082, "learning_rate": 7.93263464064401e-06, "loss": 1.0007, "step": 10101 }, { "epoch": 0.58, "grad_norm": 1.5112907886505127, "learning_rate": 7.930817195406332e-06, "loss": 0.8726, "step": 10102 }, { "epoch": 0.58, "grad_norm": 1.0750426054000854, "learning_rate": 7.928999821570372e-06, "loss": 0.576, "step": 10103 }, { "epoch": 0.58, "grad_norm": 1.7453200817108154, "learning_rate": 7.927182519198843e-06, "loss": 0.9616, "step": 10104 }, { "epoch": 0.58, "grad_norm": 1.7644621133804321, "learning_rate": 7.925365288354453e-06, "loss": 1.017, "step": 10105 }, { "epoch": 0.58, "grad_norm": 1.6096513271331787, "learning_rate": 7.923548129099914e-06, "loss": 0.9792, "step": 10106 }, { "epoch": 0.58, "grad_norm": 1.7948856353759766, "learning_rate": 7.921731041497928e-06, "loss": 1.0133, "step": 10107 }, { "epoch": 0.58, "grad_norm": 1.5832029581069946, "learning_rate": 7.9199140256112e-06, "loss": 0.9992, "step": 10108 }, { "epoch": 0.58, "grad_norm": 1.7737842798233032, "learning_rate": 7.918097081502426e-06, "loss": 0.9635, "step": 10109 }, { "epoch": 0.58, "grad_norm": 1.668912410736084, "learning_rate": 7.916280209234307e-06, "loss": 0.9632, "step": 10110 }, { "epoch": 0.58, "grad_norm": 1.6767700910568237, "learning_rate": 7.914463408869537e-06, "loss": 0.8997, "step": 10111 }, { "epoch": 0.58, "grad_norm": 1.7865148782730103, "learning_rate": 7.91264668047081e-06, "loss": 0.9156, "step": 10112 }, { "epoch": 0.58, "grad_norm": 1.7553844451904297, "learning_rate": 7.910830024100816e-06, "loss": 0.9804, "step": 10113 }, { "epoch": 0.58, "grad_norm": 1.698000192642212, "learning_rate": 7.90901343982224e-06, "loss": 1.0, "step": 10114 }, { "epoch": 0.58, "grad_norm": 1.5750150680541992, "learning_rate": 7.90719692769777e-06, "loss": 0.9558, "step": 10115 }, { "epoch": 0.58, "grad_norm": 1.8725422620773315, "learning_rate": 7.905380487790088e-06, "loss": 0.9985, "step": 10116 }, { "epoch": 0.58, "grad_norm": 1.736026406288147, "learning_rate": 7.903564120161876e-06, "loss": 1.009, "step": 10117 }, { "epoch": 0.58, "grad_norm": 1.7956511974334717, "learning_rate": 7.901747824875807e-06, "loss": 0.9785, "step": 10118 }, { "epoch": 0.58, "grad_norm": 1.9132428169250488, "learning_rate": 7.89993160199456e-06, "loss": 1.0134, "step": 10119 }, { "epoch": 0.58, "grad_norm": 1.8169267177581787, "learning_rate": 7.898115451580809e-06, "loss": 0.9388, "step": 10120 }, { "epoch": 0.58, "grad_norm": 1.7750738859176636, "learning_rate": 7.896299373697221e-06, "loss": 0.998, "step": 10121 }, { "epoch": 0.58, "grad_norm": 1.8177005052566528, "learning_rate": 7.894483368406464e-06, "loss": 0.8983, "step": 10122 }, { "epoch": 0.58, "grad_norm": 1.6939191818237305, "learning_rate": 7.892667435771207e-06, "loss": 0.9132, "step": 10123 }, { "epoch": 0.58, "grad_norm": 1.83375883102417, "learning_rate": 7.890851575854108e-06, "loss": 0.9686, "step": 10124 }, { "epoch": 0.58, "grad_norm": 1.798264503479004, "learning_rate": 7.88903578871783e-06, "loss": 0.9657, "step": 10125 }, { "epoch": 0.58, "grad_norm": 1.6605677604675293, "learning_rate": 7.887220074425032e-06, "loss": 0.9687, "step": 10126 }, { "epoch": 0.58, "grad_norm": 1.6503233909606934, "learning_rate": 7.885404433038366e-06, "loss": 0.9301, "step": 10127 }, { "epoch": 0.58, "grad_norm": 1.7085896730422974, "learning_rate": 7.883588864620486e-06, "loss": 0.9524, "step": 10128 }, { "epoch": 0.58, "grad_norm": 1.6330724954605103, "learning_rate": 7.881773369234043e-06, "loss": 0.9499, "step": 10129 }, { "epoch": 0.58, "grad_norm": 1.8343591690063477, "learning_rate": 7.879957946941683e-06, "loss": 1.0077, "step": 10130 }, { "epoch": 0.58, "grad_norm": 1.7827332019805908, "learning_rate": 7.878142597806054e-06, "loss": 0.927, "step": 10131 }, { "epoch": 0.58, "grad_norm": 1.814094066619873, "learning_rate": 7.876327321889794e-06, "loss": 0.9986, "step": 10132 }, { "epoch": 0.58, "grad_norm": 1.7959904670715332, "learning_rate": 7.87451211925555e-06, "loss": 0.9828, "step": 10133 }, { "epoch": 0.58, "grad_norm": 1.809969425201416, "learning_rate": 7.87269698996595e-06, "loss": 0.8209, "step": 10134 }, { "epoch": 0.58, "grad_norm": 1.641908884048462, "learning_rate": 7.870881934083637e-06, "loss": 0.9187, "step": 10135 }, { "epoch": 0.58, "grad_norm": 1.74166738986969, "learning_rate": 7.869066951671241e-06, "loss": 0.8763, "step": 10136 }, { "epoch": 0.58, "grad_norm": 1.7387738227844238, "learning_rate": 7.867252042791392e-06, "loss": 0.8779, "step": 10137 }, { "epoch": 0.58, "grad_norm": 1.7424275875091553, "learning_rate": 7.865437207506716e-06, "loss": 0.9328, "step": 10138 }, { "epoch": 0.58, "grad_norm": 1.7437899112701416, "learning_rate": 7.86362244587984e-06, "loss": 0.8659, "step": 10139 }, { "epoch": 0.58, "grad_norm": 1.9675076007843018, "learning_rate": 7.861807757973386e-06, "loss": 0.9703, "step": 10140 }, { "epoch": 0.58, "grad_norm": 1.6945221424102783, "learning_rate": 7.859993143849976e-06, "loss": 0.9291, "step": 10141 }, { "epoch": 0.58, "grad_norm": 1.7178763151168823, "learning_rate": 7.858178603572222e-06, "loss": 0.943, "step": 10142 }, { "epoch": 0.58, "grad_norm": 1.7484416961669922, "learning_rate": 7.856364137202742e-06, "loss": 0.9301, "step": 10143 }, { "epoch": 0.58, "grad_norm": 1.7419393062591553, "learning_rate": 7.85454974480415e-06, "loss": 0.8959, "step": 10144 }, { "epoch": 0.58, "grad_norm": 1.6358963251113892, "learning_rate": 7.85273542643905e-06, "loss": 0.9806, "step": 10145 }, { "epoch": 0.58, "grad_norm": 1.0152032375335693, "learning_rate": 7.850921182170053e-06, "loss": 0.5685, "step": 10146 }, { "epoch": 0.58, "grad_norm": 1.6213083267211914, "learning_rate": 7.849107012059765e-06, "loss": 0.8766, "step": 10147 }, { "epoch": 0.58, "grad_norm": 2.02239727973938, "learning_rate": 7.847292916170783e-06, "loss": 0.9914, "step": 10148 }, { "epoch": 0.58, "grad_norm": 1.9953396320343018, "learning_rate": 7.84547889456571e-06, "loss": 1.0276, "step": 10149 }, { "epoch": 0.58, "grad_norm": 1.780761957168579, "learning_rate": 7.843664947307143e-06, "loss": 0.9116, "step": 10150 }, { "epoch": 0.58, "grad_norm": 1.7339656352996826, "learning_rate": 7.841851074457672e-06, "loss": 0.8628, "step": 10151 }, { "epoch": 0.58, "grad_norm": 1.8337950706481934, "learning_rate": 7.840037276079895e-06, "loss": 1.0312, "step": 10152 }, { "epoch": 0.58, "grad_norm": 1.6772739887237549, "learning_rate": 7.838223552236396e-06, "loss": 0.9894, "step": 10153 }, { "epoch": 0.58, "grad_norm": 1.0506101846694946, "learning_rate": 7.836409902989766e-06, "loss": 0.6064, "step": 10154 }, { "epoch": 0.58, "grad_norm": 1.6640840768814087, "learning_rate": 7.834596328402585e-06, "loss": 0.9619, "step": 10155 }, { "epoch": 0.58, "grad_norm": 1.6776657104492188, "learning_rate": 7.832782828537437e-06, "loss": 0.8892, "step": 10156 }, { "epoch": 0.58, "grad_norm": 1.7512624263763428, "learning_rate": 7.830969403456899e-06, "loss": 0.8953, "step": 10157 }, { "epoch": 0.58, "grad_norm": 1.7777304649353027, "learning_rate": 7.829156053223546e-06, "loss": 1.0109, "step": 10158 }, { "epoch": 0.58, "grad_norm": 1.8017364740371704, "learning_rate": 7.827342777899956e-06, "loss": 1.0039, "step": 10159 }, { "epoch": 0.58, "grad_norm": 1.9505505561828613, "learning_rate": 7.825529577548698e-06, "loss": 0.9583, "step": 10160 }, { "epoch": 0.58, "grad_norm": 1.7678524255752563, "learning_rate": 7.823716452232339e-06, "loss": 0.863, "step": 10161 }, { "epoch": 0.58, "grad_norm": 1.674464225769043, "learning_rate": 7.821903402013447e-06, "loss": 0.9751, "step": 10162 }, { "epoch": 0.58, "grad_norm": 1.6768087148666382, "learning_rate": 7.820090426954583e-06, "loss": 0.8927, "step": 10163 }, { "epoch": 0.58, "grad_norm": 1.72789466381073, "learning_rate": 7.818277527118308e-06, "loss": 0.9972, "step": 10164 }, { "epoch": 0.58, "grad_norm": 1.6604609489440918, "learning_rate": 7.816464702567182e-06, "loss": 0.9636, "step": 10165 }, { "epoch": 0.58, "grad_norm": 0.9407632350921631, "learning_rate": 7.81465195336376e-06, "loss": 0.5517, "step": 10166 }, { "epoch": 0.58, "grad_norm": 1.732412338256836, "learning_rate": 7.812839279570596e-06, "loss": 0.8963, "step": 10167 }, { "epoch": 0.58, "grad_norm": 1.7932050228118896, "learning_rate": 7.811026681250237e-06, "loss": 0.9524, "step": 10168 }, { "epoch": 0.58, "grad_norm": 1.8364158868789673, "learning_rate": 7.809214158465234e-06, "loss": 0.9195, "step": 10169 }, { "epoch": 0.58, "grad_norm": 1.795823574066162, "learning_rate": 7.807401711278132e-06, "loss": 0.9191, "step": 10170 }, { "epoch": 0.58, "grad_norm": 1.636948585510254, "learning_rate": 7.80558933975147e-06, "loss": 1.0192, "step": 10171 }, { "epoch": 0.58, "grad_norm": 1.7334684133529663, "learning_rate": 7.80377704394779e-06, "loss": 0.9352, "step": 10172 }, { "epoch": 0.58, "grad_norm": 1.6473677158355713, "learning_rate": 7.801964823929628e-06, "loss": 0.984, "step": 10173 }, { "epoch": 0.58, "grad_norm": 1.5900925397872925, "learning_rate": 7.800152679759523e-06, "loss": 1.0016, "step": 10174 }, { "epoch": 0.58, "grad_norm": 1.7851715087890625, "learning_rate": 7.798340611500002e-06, "loss": 0.9217, "step": 10175 }, { "epoch": 0.58, "grad_norm": 1.6785316467285156, "learning_rate": 7.796528619213594e-06, "loss": 0.8743, "step": 10176 }, { "epoch": 0.58, "grad_norm": 1.8954955339431763, "learning_rate": 7.794716702962832e-06, "loss": 0.9065, "step": 10177 }, { "epoch": 0.58, "grad_norm": 1.8364949226379395, "learning_rate": 7.792904862810236e-06, "loss": 0.9438, "step": 10178 }, { "epoch": 0.58, "grad_norm": 1.845595359802246, "learning_rate": 7.791093098818328e-06, "loss": 0.8886, "step": 10179 }, { "epoch": 0.58, "grad_norm": 1.7551746368408203, "learning_rate": 7.789281411049626e-06, "loss": 0.9246, "step": 10180 }, { "epoch": 0.58, "grad_norm": 1.775098204612732, "learning_rate": 7.787469799566647e-06, "loss": 1.0187, "step": 10181 }, { "epoch": 0.58, "grad_norm": 1.5932780504226685, "learning_rate": 7.785658264431906e-06, "loss": 0.9131, "step": 10182 }, { "epoch": 0.58, "grad_norm": 1.742674708366394, "learning_rate": 7.783846805707911e-06, "loss": 0.882, "step": 10183 }, { "epoch": 0.58, "grad_norm": 1.6689084768295288, "learning_rate": 7.782035423457173e-06, "loss": 0.959, "step": 10184 }, { "epoch": 0.58, "grad_norm": 1.8702350854873657, "learning_rate": 7.780224117742197e-06, "loss": 1.0103, "step": 10185 }, { "epoch": 0.58, "grad_norm": 1.9909065961837769, "learning_rate": 7.778412888625486e-06, "loss": 1.028, "step": 10186 }, { "epoch": 0.58, "grad_norm": 1.704916000366211, "learning_rate": 7.776601736169542e-06, "loss": 0.9748, "step": 10187 }, { "epoch": 0.58, "grad_norm": 1.9401311874389648, "learning_rate": 7.774790660436857e-06, "loss": 0.9973, "step": 10188 }, { "epoch": 0.58, "grad_norm": 1.815200686454773, "learning_rate": 7.772979661489934e-06, "loss": 0.9165, "step": 10189 }, { "epoch": 0.58, "grad_norm": 1.7874748706817627, "learning_rate": 7.77116873939126e-06, "loss": 0.9812, "step": 10190 }, { "epoch": 0.58, "grad_norm": 1.7537847757339478, "learning_rate": 7.769357894203329e-06, "loss": 0.9927, "step": 10191 }, { "epoch": 0.58, "grad_norm": 1.6697522401809692, "learning_rate": 7.767547125988624e-06, "loss": 0.9763, "step": 10192 }, { "epoch": 0.58, "grad_norm": 1.7794636487960815, "learning_rate": 7.765736434809633e-06, "loss": 0.9326, "step": 10193 }, { "epoch": 0.58, "grad_norm": 1.832753300666809, "learning_rate": 7.763925820728838e-06, "loss": 0.9674, "step": 10194 }, { "epoch": 0.58, "grad_norm": 1.7453020811080933, "learning_rate": 7.762115283808713e-06, "loss": 0.9168, "step": 10195 }, { "epoch": 0.58, "grad_norm": 1.8311960697174072, "learning_rate": 7.760304824111741e-06, "loss": 0.8921, "step": 10196 }, { "epoch": 0.58, "grad_norm": 1.859073519706726, "learning_rate": 7.758494441700391e-06, "loss": 0.9462, "step": 10197 }, { "epoch": 0.58, "grad_norm": 1.7316911220550537, "learning_rate": 7.756684136637139e-06, "loss": 0.9028, "step": 10198 }, { "epoch": 0.58, "grad_norm": 1.588106632232666, "learning_rate": 7.75487390898445e-06, "loss": 0.973, "step": 10199 }, { "epoch": 0.58, "grad_norm": 1.7877694368362427, "learning_rate": 7.753063758804787e-06, "loss": 0.9899, "step": 10200 }, { "epoch": 0.59, "grad_norm": 2.2713561058044434, "learning_rate": 7.751253686160621e-06, "loss": 0.907, "step": 10201 }, { "epoch": 0.59, "grad_norm": 1.5533236265182495, "learning_rate": 7.749443691114409e-06, "loss": 0.9074, "step": 10202 }, { "epoch": 0.59, "grad_norm": 1.7497491836547852, "learning_rate": 7.747633773728606e-06, "loss": 0.9786, "step": 10203 }, { "epoch": 0.59, "grad_norm": 1.9755339622497559, "learning_rate": 7.745823934065672e-06, "loss": 0.9897, "step": 10204 }, { "epoch": 0.59, "grad_norm": 1.7884821891784668, "learning_rate": 7.744014172188055e-06, "loss": 0.9738, "step": 10205 }, { "epoch": 0.59, "grad_norm": 1.7109590768814087, "learning_rate": 7.742204488158207e-06, "loss": 0.8953, "step": 10206 }, { "epoch": 0.59, "grad_norm": 1.713253378868103, "learning_rate": 7.740394882038578e-06, "loss": 0.9875, "step": 10207 }, { "epoch": 0.59, "grad_norm": 1.8571285009384155, "learning_rate": 7.738585353891609e-06, "loss": 0.9634, "step": 10208 }, { "epoch": 0.59, "grad_norm": 1.8915430307388306, "learning_rate": 7.736775903779744e-06, "loss": 0.8547, "step": 10209 }, { "epoch": 0.59, "grad_norm": 1.8626590967178345, "learning_rate": 7.734966531765417e-06, "loss": 0.9033, "step": 10210 }, { "epoch": 0.59, "grad_norm": 1.676995873451233, "learning_rate": 7.73315723791107e-06, "loss": 0.9495, "step": 10211 }, { "epoch": 0.59, "grad_norm": 1.7670599222183228, "learning_rate": 7.731348022279135e-06, "loss": 0.9953, "step": 10212 }, { "epoch": 0.59, "grad_norm": 1.7109105587005615, "learning_rate": 7.72953888493204e-06, "loss": 0.8834, "step": 10213 }, { "epoch": 0.59, "grad_norm": 1.0694222450256348, "learning_rate": 7.727729825932218e-06, "loss": 0.6004, "step": 10214 }, { "epoch": 0.59, "grad_norm": 1.8354840278625488, "learning_rate": 7.725920845342091e-06, "loss": 0.9872, "step": 10215 }, { "epoch": 0.59, "grad_norm": 1.745019793510437, "learning_rate": 7.724111943224085e-06, "loss": 0.8719, "step": 10216 }, { "epoch": 0.59, "grad_norm": 1.8354718685150146, "learning_rate": 7.722303119640616e-06, "loss": 0.8687, "step": 10217 }, { "epoch": 0.59, "grad_norm": 1.8223716020584106, "learning_rate": 7.720494374654104e-06, "loss": 0.9698, "step": 10218 }, { "epoch": 0.59, "grad_norm": 1.8163396120071411, "learning_rate": 7.718685708326965e-06, "loss": 0.9147, "step": 10219 }, { "epoch": 0.59, "grad_norm": 1.718186616897583, "learning_rate": 7.716877120721612e-06, "loss": 1.0772, "step": 10220 }, { "epoch": 0.59, "grad_norm": 1.687212347984314, "learning_rate": 7.71506861190045e-06, "loss": 0.9258, "step": 10221 }, { "epoch": 0.59, "grad_norm": 1.6812412738800049, "learning_rate": 7.713260181925886e-06, "loss": 0.9733, "step": 10222 }, { "epoch": 0.59, "grad_norm": 1.6774266958236694, "learning_rate": 7.711451830860325e-06, "loss": 0.9446, "step": 10223 }, { "epoch": 0.59, "grad_norm": 1.5561604499816895, "learning_rate": 7.70964355876617e-06, "loss": 0.8825, "step": 10224 }, { "epoch": 0.59, "grad_norm": 1.8358871936798096, "learning_rate": 7.70783536570582e-06, "loss": 0.9304, "step": 10225 }, { "epoch": 0.59, "grad_norm": 1.658037781715393, "learning_rate": 7.706027251741666e-06, "loss": 0.9222, "step": 10226 }, { "epoch": 0.59, "grad_norm": 1.6748814582824707, "learning_rate": 7.704219216936104e-06, "loss": 0.9108, "step": 10227 }, { "epoch": 0.59, "grad_norm": 2.3357272148132324, "learning_rate": 7.702411261351524e-06, "loss": 0.8942, "step": 10228 }, { "epoch": 0.59, "grad_norm": 1.8117284774780273, "learning_rate": 7.700603385050312e-06, "loss": 0.9566, "step": 10229 }, { "epoch": 0.59, "grad_norm": 1.9078834056854248, "learning_rate": 7.698795588094855e-06, "loss": 0.9866, "step": 10230 }, { "epoch": 0.59, "grad_norm": 1.9172916412353516, "learning_rate": 7.696987870547533e-06, "loss": 1.0027, "step": 10231 }, { "epoch": 0.59, "grad_norm": 1.7475751638412476, "learning_rate": 7.695180232470727e-06, "loss": 0.9186, "step": 10232 }, { "epoch": 0.59, "grad_norm": 2.149897575378418, "learning_rate": 7.693372673926814e-06, "loss": 0.9182, "step": 10233 }, { "epoch": 0.59, "grad_norm": 1.6616199016571045, "learning_rate": 7.691565194978167e-06, "loss": 1.0003, "step": 10234 }, { "epoch": 0.59, "grad_norm": 1.7549474239349365, "learning_rate": 7.689757795687156e-06, "loss": 0.9944, "step": 10235 }, { "epoch": 0.59, "grad_norm": 1.0900800228118896, "learning_rate": 7.68795047611615e-06, "loss": 0.6007, "step": 10236 }, { "epoch": 0.59, "grad_norm": 1.0690689086914062, "learning_rate": 7.686143236327515e-06, "loss": 0.5851, "step": 10237 }, { "epoch": 0.59, "grad_norm": 1.01665461063385, "learning_rate": 7.684336076383614e-06, "loss": 0.5323, "step": 10238 }, { "epoch": 0.59, "grad_norm": 1.686279535293579, "learning_rate": 7.682528996346805e-06, "loss": 0.9305, "step": 10239 }, { "epoch": 0.59, "grad_norm": 1.7071090936660767, "learning_rate": 7.680721996279448e-06, "loss": 0.9139, "step": 10240 }, { "epoch": 0.59, "grad_norm": 1.8833144903182983, "learning_rate": 7.678915076243895e-06, "loss": 0.9461, "step": 10241 }, { "epoch": 0.59, "grad_norm": 1.81498384475708, "learning_rate": 7.677108236302499e-06, "loss": 0.9373, "step": 10242 }, { "epoch": 0.59, "grad_norm": 1.7244994640350342, "learning_rate": 7.675301476517609e-06, "loss": 0.8794, "step": 10243 }, { "epoch": 0.59, "grad_norm": 1.7051539421081543, "learning_rate": 7.673494796951573e-06, "loss": 0.9557, "step": 10244 }, { "epoch": 0.59, "grad_norm": 1.7417227029800415, "learning_rate": 7.671688197666731e-06, "loss": 1.0163, "step": 10245 }, { "epoch": 0.59, "grad_norm": 1.7964205741882324, "learning_rate": 7.669881678725426e-06, "loss": 0.9567, "step": 10246 }, { "epoch": 0.59, "grad_norm": 1.6077373027801514, "learning_rate": 7.668075240189996e-06, "loss": 0.8828, "step": 10247 }, { "epoch": 0.59, "grad_norm": 1.931624174118042, "learning_rate": 7.666268882122775e-06, "loss": 0.9854, "step": 10248 }, { "epoch": 0.59, "grad_norm": 1.9547218084335327, "learning_rate": 7.664462604586095e-06, "loss": 1.0109, "step": 10249 }, { "epoch": 0.59, "grad_norm": 1.7365643978118896, "learning_rate": 7.662656407642288e-06, "loss": 0.9014, "step": 10250 }, { "epoch": 0.59, "grad_norm": 1.807495355606079, "learning_rate": 7.660850291353679e-06, "loss": 0.9813, "step": 10251 }, { "epoch": 0.59, "grad_norm": 1.787226915359497, "learning_rate": 7.659044255782592e-06, "loss": 0.9302, "step": 10252 }, { "epoch": 0.59, "grad_norm": 1.7445251941680908, "learning_rate": 7.65723830099135e-06, "loss": 0.9756, "step": 10253 }, { "epoch": 0.59, "grad_norm": 1.7434368133544922, "learning_rate": 7.655432427042266e-06, "loss": 0.9532, "step": 10254 }, { "epoch": 0.59, "grad_norm": 1.8607277870178223, "learning_rate": 7.653626633997661e-06, "loss": 0.924, "step": 10255 }, { "epoch": 0.59, "grad_norm": 1.94423246383667, "learning_rate": 7.651820921919848e-06, "loss": 0.9836, "step": 10256 }, { "epoch": 0.59, "grad_norm": 1.7074990272521973, "learning_rate": 7.650015290871135e-06, "loss": 0.9772, "step": 10257 }, { "epoch": 0.59, "grad_norm": 1.7803990840911865, "learning_rate": 7.648209740913831e-06, "loss": 0.986, "step": 10258 }, { "epoch": 0.59, "grad_norm": 1.8392260074615479, "learning_rate": 7.646404272110238e-06, "loss": 0.9683, "step": 10259 }, { "epoch": 0.59, "grad_norm": 1.81463623046875, "learning_rate": 7.644598884522659e-06, "loss": 0.9455, "step": 10260 }, { "epoch": 0.59, "grad_norm": 1.7497608661651611, "learning_rate": 7.642793578213394e-06, "loss": 0.9348, "step": 10261 }, { "epoch": 0.59, "grad_norm": 1.675525188446045, "learning_rate": 7.640988353244739e-06, "loss": 0.9311, "step": 10262 }, { "epoch": 0.59, "grad_norm": 1.8487236499786377, "learning_rate": 7.639183209678984e-06, "loss": 0.97, "step": 10263 }, { "epoch": 0.59, "grad_norm": 1.765504240989685, "learning_rate": 7.637378147578422e-06, "loss": 0.942, "step": 10264 }, { "epoch": 0.59, "grad_norm": 1.772786259651184, "learning_rate": 7.63557316700534e-06, "loss": 0.9335, "step": 10265 }, { "epoch": 0.59, "grad_norm": 1.7995020151138306, "learning_rate": 7.633768268022023e-06, "loss": 0.9533, "step": 10266 }, { "epoch": 0.59, "grad_norm": 1.7972110509872437, "learning_rate": 7.631963450690755e-06, "loss": 0.9128, "step": 10267 }, { "epoch": 0.59, "grad_norm": 1.8026013374328613, "learning_rate": 7.630158715073813e-06, "loss": 0.9882, "step": 10268 }, { "epoch": 0.59, "grad_norm": 1.8926401138305664, "learning_rate": 7.628354061233472e-06, "loss": 0.8931, "step": 10269 }, { "epoch": 0.59, "grad_norm": 1.671213150024414, "learning_rate": 7.626549489232009e-06, "loss": 0.9555, "step": 10270 }, { "epoch": 0.59, "grad_norm": 1.1038405895233154, "learning_rate": 7.624744999131691e-06, "loss": 0.6132, "step": 10271 }, { "epoch": 0.59, "grad_norm": 1.7962102890014648, "learning_rate": 7.6229405909947915e-06, "loss": 0.9754, "step": 10272 }, { "epoch": 0.59, "grad_norm": 1.042463779449463, "learning_rate": 7.621136264883571e-06, "loss": 0.5654, "step": 10273 }, { "epoch": 0.59, "grad_norm": 1.761847972869873, "learning_rate": 7.619332020860293e-06, "loss": 1.0196, "step": 10274 }, { "epoch": 0.59, "grad_norm": 1.72986900806427, "learning_rate": 7.617527858987217e-06, "loss": 1.0209, "step": 10275 }, { "epoch": 0.59, "grad_norm": 1.6604474782943726, "learning_rate": 7.6157237793265996e-06, "loss": 0.9374, "step": 10276 }, { "epoch": 0.59, "grad_norm": 1.5567196607589722, "learning_rate": 7.613919781940694e-06, "loss": 1.004, "step": 10277 }, { "epoch": 0.59, "grad_norm": 1.776759147644043, "learning_rate": 7.612115866891751e-06, "loss": 0.9036, "step": 10278 }, { "epoch": 0.59, "grad_norm": 1.7751779556274414, "learning_rate": 7.61031203424202e-06, "loss": 0.9085, "step": 10279 }, { "epoch": 0.59, "grad_norm": 1.7614938020706177, "learning_rate": 7.608508284053746e-06, "loss": 0.9481, "step": 10280 }, { "epoch": 0.59, "grad_norm": 1.625900387763977, "learning_rate": 7.606704616389169e-06, "loss": 0.8383, "step": 10281 }, { "epoch": 0.59, "grad_norm": 1.795332431793213, "learning_rate": 7.604901031310532e-06, "loss": 0.9474, "step": 10282 }, { "epoch": 0.59, "grad_norm": 1.7623851299285889, "learning_rate": 7.603097528880067e-06, "loss": 0.9397, "step": 10283 }, { "epoch": 0.59, "grad_norm": 1.5453157424926758, "learning_rate": 7.601294109160012e-06, "loss": 0.9392, "step": 10284 }, { "epoch": 0.59, "grad_norm": 1.791609287261963, "learning_rate": 7.599490772212599e-06, "loss": 0.9385, "step": 10285 }, { "epoch": 0.59, "grad_norm": 1.7089269161224365, "learning_rate": 7.597687518100052e-06, "loss": 0.9342, "step": 10286 }, { "epoch": 0.59, "grad_norm": 2.1518092155456543, "learning_rate": 7.595884346884599e-06, "loss": 0.9135, "step": 10287 }, { "epoch": 0.59, "grad_norm": 1.794111728668213, "learning_rate": 7.594081258628461e-06, "loss": 0.918, "step": 10288 }, { "epoch": 0.59, "grad_norm": 1.7815576791763306, "learning_rate": 7.592278253393859e-06, "loss": 0.9136, "step": 10289 }, { "epoch": 0.59, "grad_norm": 1.6652026176452637, "learning_rate": 7.590475331243008e-06, "loss": 0.9964, "step": 10290 }, { "epoch": 0.59, "grad_norm": 0.9116684794425964, "learning_rate": 7.588672492238123e-06, "loss": 0.526, "step": 10291 }, { "epoch": 0.59, "grad_norm": 1.0427523851394653, "learning_rate": 7.586869736441413e-06, "loss": 0.5481, "step": 10292 }, { "epoch": 0.59, "grad_norm": 1.7694807052612305, "learning_rate": 7.5850670639150904e-06, "loss": 0.8908, "step": 10293 }, { "epoch": 0.59, "grad_norm": 1.902209997177124, "learning_rate": 7.583264474721356e-06, "loss": 0.9052, "step": 10294 }, { "epoch": 0.59, "grad_norm": 1.9816991090774536, "learning_rate": 7.581461968922413e-06, "loss": 0.9873, "step": 10295 }, { "epoch": 0.59, "grad_norm": 1.9553471803665161, "learning_rate": 7.5796595465804616e-06, "loss": 0.9211, "step": 10296 }, { "epoch": 0.59, "grad_norm": 1.6685162782669067, "learning_rate": 7.577857207757698e-06, "loss": 0.924, "step": 10297 }, { "epoch": 0.59, "grad_norm": 1.6475584506988525, "learning_rate": 7.576054952516318e-06, "loss": 0.8465, "step": 10298 }, { "epoch": 0.59, "grad_norm": 1.8763052225112915, "learning_rate": 7.57425278091851e-06, "loss": 0.9843, "step": 10299 }, { "epoch": 0.59, "grad_norm": 1.7583242654800415, "learning_rate": 7.572450693026462e-06, "loss": 0.9446, "step": 10300 }, { "epoch": 0.59, "grad_norm": 1.6700351238250732, "learning_rate": 7.57064868890236e-06, "loss": 0.9628, "step": 10301 }, { "epoch": 0.59, "grad_norm": 1.8223263025283813, "learning_rate": 7.5688467686083845e-06, "loss": 0.9452, "step": 10302 }, { "epoch": 0.59, "grad_norm": 1.9018670320510864, "learning_rate": 7.567044932206717e-06, "loss": 0.9898, "step": 10303 }, { "epoch": 0.59, "grad_norm": 1.9095746278762817, "learning_rate": 7.565243179759533e-06, "loss": 0.9818, "step": 10304 }, { "epoch": 0.59, "grad_norm": 1.5821911096572876, "learning_rate": 7.563441511329005e-06, "loss": 0.843, "step": 10305 }, { "epoch": 0.59, "grad_norm": 1.8519865274429321, "learning_rate": 7.561639926977304e-06, "loss": 1.0019, "step": 10306 }, { "epoch": 0.59, "grad_norm": 1.8512591123580933, "learning_rate": 7.559838426766598e-06, "loss": 0.9718, "step": 10307 }, { "epoch": 0.59, "grad_norm": 1.7307250499725342, "learning_rate": 7.55803701075905e-06, "loss": 0.9418, "step": 10308 }, { "epoch": 0.59, "grad_norm": 1.7194267511367798, "learning_rate": 7.5562356790168256e-06, "loss": 0.9845, "step": 10309 }, { "epoch": 0.59, "grad_norm": 1.724992036819458, "learning_rate": 7.5544344316020804e-06, "loss": 0.8839, "step": 10310 }, { "epoch": 0.59, "grad_norm": 1.851946234703064, "learning_rate": 7.552633268576972e-06, "loss": 0.969, "step": 10311 }, { "epoch": 0.59, "grad_norm": 1.7737661600112915, "learning_rate": 7.550832190003654e-06, "loss": 0.9666, "step": 10312 }, { "epoch": 0.59, "grad_norm": 1.7596232891082764, "learning_rate": 7.549031195944274e-06, "loss": 0.9188, "step": 10313 }, { "epoch": 0.59, "grad_norm": 1.709008812904358, "learning_rate": 7.547230286460983e-06, "loss": 0.8701, "step": 10314 }, { "epoch": 0.59, "grad_norm": 1.0186187028884888, "learning_rate": 7.5454294616159215e-06, "loss": 0.5712, "step": 10315 }, { "epoch": 0.59, "grad_norm": 1.8753888607025146, "learning_rate": 7.543628721471234e-06, "loss": 0.9725, "step": 10316 }, { "epoch": 0.59, "grad_norm": 1.641258716583252, "learning_rate": 7.5418280660890565e-06, "loss": 0.9877, "step": 10317 }, { "epoch": 0.59, "grad_norm": 1.8166675567626953, "learning_rate": 7.540027495531527e-06, "loss": 0.9519, "step": 10318 }, { "epoch": 0.59, "grad_norm": 1.8743252754211426, "learning_rate": 7.538227009860775e-06, "loss": 1.0089, "step": 10319 }, { "epoch": 0.59, "grad_norm": 1.784569501876831, "learning_rate": 7.536426609138933e-06, "loss": 0.8955, "step": 10320 }, { "epoch": 0.59, "grad_norm": 1.7723467350006104, "learning_rate": 7.534626293428127e-06, "loss": 1.041, "step": 10321 }, { "epoch": 0.59, "grad_norm": 1.9346673488616943, "learning_rate": 7.532826062790482e-06, "loss": 0.9564, "step": 10322 }, { "epoch": 0.59, "grad_norm": 1.6984739303588867, "learning_rate": 7.531025917288116e-06, "loss": 0.9992, "step": 10323 }, { "epoch": 0.59, "grad_norm": 1.8053065538406372, "learning_rate": 7.529225856983151e-06, "loss": 0.9708, "step": 10324 }, { "epoch": 0.59, "grad_norm": 1.8159064054489136, "learning_rate": 7.527425881937699e-06, "loss": 0.9246, "step": 10325 }, { "epoch": 0.59, "grad_norm": 1.5712523460388184, "learning_rate": 7.525625992213872e-06, "loss": 0.9322, "step": 10326 }, { "epoch": 0.59, "grad_norm": 1.7395873069763184, "learning_rate": 7.5238261878737815e-06, "loss": 1.0413, "step": 10327 }, { "epoch": 0.59, "grad_norm": 1.6705348491668701, "learning_rate": 7.522026468979532e-06, "loss": 0.9279, "step": 10328 }, { "epoch": 0.59, "grad_norm": 1.8817106485366821, "learning_rate": 7.520226835593226e-06, "loss": 1.0056, "step": 10329 }, { "epoch": 0.59, "grad_norm": 1.8142273426055908, "learning_rate": 7.518427287776966e-06, "loss": 0.9287, "step": 10330 }, { "epoch": 0.59, "grad_norm": 1.9435560703277588, "learning_rate": 7.516627825592848e-06, "loss": 0.8942, "step": 10331 }, { "epoch": 0.59, "grad_norm": 1.6227105855941772, "learning_rate": 7.514828449102965e-06, "loss": 0.9093, "step": 10332 }, { "epoch": 0.59, "grad_norm": 1.5616214275360107, "learning_rate": 7.513029158369412e-06, "loss": 0.9424, "step": 10333 }, { "epoch": 0.59, "grad_norm": 1.938692569732666, "learning_rate": 7.511229953454276e-06, "loss": 0.9645, "step": 10334 }, { "epoch": 0.59, "grad_norm": 1.7903739213943481, "learning_rate": 7.50943083441964e-06, "loss": 0.909, "step": 10335 }, { "epoch": 0.59, "grad_norm": 1.7871263027191162, "learning_rate": 7.50763180132759e-06, "loss": 0.8656, "step": 10336 }, { "epoch": 0.59, "grad_norm": 1.8659932613372803, "learning_rate": 7.5058328542402035e-06, "loss": 0.9399, "step": 10337 }, { "epoch": 0.59, "grad_norm": 1.9322506189346313, "learning_rate": 7.504033993219559e-06, "loss": 0.9597, "step": 10338 }, { "epoch": 0.59, "grad_norm": 1.6774568557739258, "learning_rate": 7.50223521832773e-06, "loss": 1.0672, "step": 10339 }, { "epoch": 0.59, "grad_norm": 1.6441165208816528, "learning_rate": 7.500436529626787e-06, "loss": 0.9803, "step": 10340 }, { "epoch": 0.59, "grad_norm": 1.534072995185852, "learning_rate": 7.498637927178796e-06, "loss": 0.8899, "step": 10341 }, { "epoch": 0.59, "grad_norm": 1.7665263414382935, "learning_rate": 7.496839411045824e-06, "loss": 0.9251, "step": 10342 }, { "epoch": 0.59, "grad_norm": 1.7371271848678589, "learning_rate": 7.495040981289931e-06, "loss": 0.9546, "step": 10343 }, { "epoch": 0.59, "grad_norm": 1.6054104566574097, "learning_rate": 7.493242637973175e-06, "loss": 0.9281, "step": 10344 }, { "epoch": 0.59, "grad_norm": 1.692980408668518, "learning_rate": 7.491444381157616e-06, "loss": 0.9072, "step": 10345 }, { "epoch": 0.59, "grad_norm": 1.8429760932922363, "learning_rate": 7.489646210905301e-06, "loss": 0.9423, "step": 10346 }, { "epoch": 0.59, "grad_norm": 1.8078008890151978, "learning_rate": 7.487848127278285e-06, "loss": 0.9181, "step": 10347 }, { "epoch": 0.59, "grad_norm": 1.8376256227493286, "learning_rate": 7.486050130338611e-06, "loss": 0.9126, "step": 10348 }, { "epoch": 0.59, "grad_norm": 2.14129376411438, "learning_rate": 7.484252220148327e-06, "loss": 0.9624, "step": 10349 }, { "epoch": 0.59, "grad_norm": 1.7582612037658691, "learning_rate": 7.482454396769468e-06, "loss": 1.0191, "step": 10350 }, { "epoch": 0.59, "grad_norm": 1.6357886791229248, "learning_rate": 7.480656660264076e-06, "loss": 0.9612, "step": 10351 }, { "epoch": 0.59, "grad_norm": 1.7550610303878784, "learning_rate": 7.478859010694187e-06, "loss": 0.9674, "step": 10352 }, { "epoch": 0.59, "grad_norm": 1.855189323425293, "learning_rate": 7.477061448121832e-06, "loss": 0.9267, "step": 10353 }, { "epoch": 0.59, "grad_norm": 1.7542481422424316, "learning_rate": 7.4752639726090374e-06, "loss": 0.953, "step": 10354 }, { "epoch": 0.59, "grad_norm": 1.6325795650482178, "learning_rate": 7.47346658421783e-06, "loss": 0.9645, "step": 10355 }, { "epoch": 0.59, "grad_norm": 1.7339776754379272, "learning_rate": 7.4716692830102335e-06, "loss": 0.8925, "step": 10356 }, { "epoch": 0.59, "grad_norm": 1.6789088249206543, "learning_rate": 7.469872069048267e-06, "loss": 0.967, "step": 10357 }, { "epoch": 0.59, "grad_norm": 1.7512871026992798, "learning_rate": 7.468074942393949e-06, "loss": 0.9476, "step": 10358 }, { "epoch": 0.59, "grad_norm": 1.722955584526062, "learning_rate": 7.466277903109291e-06, "loss": 0.9465, "step": 10359 }, { "epoch": 0.59, "grad_norm": 1.7761191129684448, "learning_rate": 7.464480951256306e-06, "loss": 0.9638, "step": 10360 }, { "epoch": 0.59, "grad_norm": 1.7436084747314453, "learning_rate": 7.462684086897001e-06, "loss": 0.9487, "step": 10361 }, { "epoch": 0.59, "grad_norm": 1.6918977499008179, "learning_rate": 7.460887310093377e-06, "loss": 0.9617, "step": 10362 }, { "epoch": 0.59, "grad_norm": 2.652294874191284, "learning_rate": 7.459090620907441e-06, "loss": 0.9451, "step": 10363 }, { "epoch": 0.59, "grad_norm": 1.7144298553466797, "learning_rate": 7.457294019401191e-06, "loss": 0.9358, "step": 10364 }, { "epoch": 0.59, "grad_norm": 1.8511468172073364, "learning_rate": 7.455497505636622e-06, "loss": 1.0208, "step": 10365 }, { "epoch": 0.59, "grad_norm": 1.8542863130569458, "learning_rate": 7.4537010796757244e-06, "loss": 0.9762, "step": 10366 }, { "epoch": 0.59, "grad_norm": 1.6627072095870972, "learning_rate": 7.451904741580491e-06, "loss": 0.9919, "step": 10367 }, { "epoch": 0.59, "grad_norm": 1.6468545198440552, "learning_rate": 7.450108491412909e-06, "loss": 0.9141, "step": 10368 }, { "epoch": 0.59, "grad_norm": 1.729580044746399, "learning_rate": 7.448312329234957e-06, "loss": 0.9566, "step": 10369 }, { "epoch": 0.59, "grad_norm": 1.9567667245864868, "learning_rate": 7.44651625510862e-06, "loss": 0.9225, "step": 10370 }, { "epoch": 0.59, "grad_norm": 1.0888370275497437, "learning_rate": 7.444720269095875e-06, "loss": 0.5679, "step": 10371 }, { "epoch": 0.59, "grad_norm": 1.6800328493118286, "learning_rate": 7.442924371258694e-06, "loss": 0.8681, "step": 10372 }, { "epoch": 0.59, "grad_norm": 1.8042755126953125, "learning_rate": 7.4411285616590505e-06, "loss": 0.9504, "step": 10373 }, { "epoch": 0.59, "grad_norm": 1.6325427293777466, "learning_rate": 7.4393328403589105e-06, "loss": 0.8894, "step": 10374 }, { "epoch": 0.6, "grad_norm": 1.6852833032608032, "learning_rate": 7.437537207420243e-06, "loss": 0.9377, "step": 10375 }, { "epoch": 0.6, "grad_norm": 1.6984894275665283, "learning_rate": 7.435741662905009e-06, "loss": 0.9038, "step": 10376 }, { "epoch": 0.6, "grad_norm": 1.5487910509109497, "learning_rate": 7.433946206875167e-06, "loss": 0.9157, "step": 10377 }, { "epoch": 0.6, "grad_norm": 1.7332934141159058, "learning_rate": 7.432150839392674e-06, "loss": 0.9208, "step": 10378 }, { "epoch": 0.6, "grad_norm": 1.8254334926605225, "learning_rate": 7.4303555605194825e-06, "loss": 0.9517, "step": 10379 }, { "epoch": 0.6, "grad_norm": 1.7641140222549438, "learning_rate": 7.428560370317542e-06, "loss": 0.9257, "step": 10380 }, { "epoch": 0.6, "grad_norm": 1.8517814874649048, "learning_rate": 7.426765268848801e-06, "loss": 0.9889, "step": 10381 }, { "epoch": 0.6, "grad_norm": 1.831356406211853, "learning_rate": 7.424970256175201e-06, "loss": 0.9247, "step": 10382 }, { "epoch": 0.6, "grad_norm": 1.7396743297576904, "learning_rate": 7.423175332358686e-06, "loss": 1.0379, "step": 10383 }, { "epoch": 0.6, "grad_norm": 1.6547646522521973, "learning_rate": 7.421380497461191e-06, "loss": 0.9259, "step": 10384 }, { "epoch": 0.6, "grad_norm": 1.8959203958511353, "learning_rate": 7.419585751544654e-06, "loss": 0.9567, "step": 10385 }, { "epoch": 0.6, "grad_norm": 1.6283398866653442, "learning_rate": 7.417791094671e-06, "loss": 0.984, "step": 10386 }, { "epoch": 0.6, "grad_norm": 1.9545018672943115, "learning_rate": 7.415996526902165e-06, "loss": 0.9362, "step": 10387 }, { "epoch": 0.6, "grad_norm": 1.826254963874817, "learning_rate": 7.414202048300072e-06, "loss": 0.9179, "step": 10388 }, { "epoch": 0.6, "grad_norm": 1.9001611471176147, "learning_rate": 7.412407658926644e-06, "loss": 0.9545, "step": 10389 }, { "epoch": 0.6, "grad_norm": 1.6383544206619263, "learning_rate": 7.4106133588437975e-06, "loss": 0.9945, "step": 10390 }, { "epoch": 0.6, "grad_norm": 1.791081428527832, "learning_rate": 7.408819148113453e-06, "loss": 0.9062, "step": 10391 }, { "epoch": 0.6, "grad_norm": 1.7783887386322021, "learning_rate": 7.407025026797521e-06, "loss": 0.9097, "step": 10392 }, { "epoch": 0.6, "grad_norm": 1.778610348701477, "learning_rate": 7.405230994957911e-06, "loss": 0.9445, "step": 10393 }, { "epoch": 0.6, "grad_norm": 1.7498493194580078, "learning_rate": 7.403437052656531e-06, "loss": 0.9177, "step": 10394 }, { "epoch": 0.6, "grad_norm": 1.6312103271484375, "learning_rate": 7.401643199955286e-06, "loss": 0.8852, "step": 10395 }, { "epoch": 0.6, "grad_norm": 1.6708095073699951, "learning_rate": 7.399849436916076e-06, "loss": 0.8823, "step": 10396 }, { "epoch": 0.6, "grad_norm": 1.8595303297042847, "learning_rate": 7.3980557636008e-06, "loss": 1.0069, "step": 10397 }, { "epoch": 0.6, "grad_norm": 1.739510178565979, "learning_rate": 7.3962621800713475e-06, "loss": 0.9061, "step": 10398 }, { "epoch": 0.6, "grad_norm": 1.7827552556991577, "learning_rate": 7.394468686389615e-06, "loss": 0.9607, "step": 10399 }, { "epoch": 0.6, "grad_norm": 1.6982142925262451, "learning_rate": 7.3926752826174916e-06, "loss": 0.9281, "step": 10400 }, { "epoch": 0.6, "grad_norm": 1.8110274076461792, "learning_rate": 7.390881968816859e-06, "loss": 0.8984, "step": 10401 }, { "epoch": 0.6, "grad_norm": 1.720268726348877, "learning_rate": 7.389088745049604e-06, "loss": 1.0069, "step": 10402 }, { "epoch": 0.6, "grad_norm": 1.7948675155639648, "learning_rate": 7.387295611377599e-06, "loss": 1.0496, "step": 10403 }, { "epoch": 0.6, "grad_norm": 1.751927375793457, "learning_rate": 7.385502567862728e-06, "loss": 0.9373, "step": 10404 }, { "epoch": 0.6, "grad_norm": 1.6295148134231567, "learning_rate": 7.383709614566859e-06, "loss": 0.9132, "step": 10405 }, { "epoch": 0.6, "grad_norm": 1.6818876266479492, "learning_rate": 7.381916751551863e-06, "loss": 1.029, "step": 10406 }, { "epoch": 0.6, "grad_norm": 1.627815842628479, "learning_rate": 7.3801239788796075e-06, "loss": 0.9505, "step": 10407 }, { "epoch": 0.6, "grad_norm": 1.1063754558563232, "learning_rate": 7.3783312966119535e-06, "loss": 0.5933, "step": 10408 }, { "epoch": 0.6, "grad_norm": 1.7942789793014526, "learning_rate": 7.376538704810765e-06, "loss": 1.0138, "step": 10409 }, { "epoch": 0.6, "grad_norm": 1.8698800802230835, "learning_rate": 7.374746203537897e-06, "loss": 0.9455, "step": 10410 }, { "epoch": 0.6, "grad_norm": 1.7118797302246094, "learning_rate": 7.372953792855203e-06, "loss": 0.9469, "step": 10411 }, { "epoch": 0.6, "grad_norm": 1.63349449634552, "learning_rate": 7.3711614728245364e-06, "loss": 0.9426, "step": 10412 }, { "epoch": 0.6, "grad_norm": 1.7269929647445679, "learning_rate": 7.3693692435077425e-06, "loss": 0.9628, "step": 10413 }, { "epoch": 0.6, "grad_norm": 1.8332700729370117, "learning_rate": 7.36757710496667e-06, "loss": 0.8703, "step": 10414 }, { "epoch": 0.6, "grad_norm": 1.7432461977005005, "learning_rate": 7.365785057263156e-06, "loss": 0.9429, "step": 10415 }, { "epoch": 0.6, "grad_norm": 1.6571274995803833, "learning_rate": 7.36399310045904e-06, "loss": 0.9949, "step": 10416 }, { "epoch": 0.6, "grad_norm": 1.7190614938735962, "learning_rate": 7.362201234616162e-06, "loss": 1.0038, "step": 10417 }, { "epoch": 0.6, "grad_norm": 1.7729727029800415, "learning_rate": 7.3604094597963494e-06, "loss": 0.9989, "step": 10418 }, { "epoch": 0.6, "grad_norm": 1.938710331916809, "learning_rate": 7.358617776061434e-06, "loss": 0.8579, "step": 10419 }, { "epoch": 0.6, "grad_norm": 1.6284576654434204, "learning_rate": 7.35682618347324e-06, "loss": 0.9837, "step": 10420 }, { "epoch": 0.6, "grad_norm": 1.7897228002548218, "learning_rate": 7.355034682093591e-06, "loss": 0.9078, "step": 10421 }, { "epoch": 0.6, "grad_norm": 1.685447096824646, "learning_rate": 7.3532432719843075e-06, "loss": 0.9719, "step": 10422 }, { "epoch": 0.6, "grad_norm": 1.5817539691925049, "learning_rate": 7.351451953207205e-06, "loss": 0.8721, "step": 10423 }, { "epoch": 0.6, "grad_norm": 1.7040961980819702, "learning_rate": 7.349660725824097e-06, "loss": 0.9954, "step": 10424 }, { "epoch": 0.6, "grad_norm": 1.8188546895980835, "learning_rate": 7.347869589896794e-06, "loss": 0.9368, "step": 10425 }, { "epoch": 0.6, "grad_norm": 1.659582257270813, "learning_rate": 7.346078545487102e-06, "loss": 0.947, "step": 10426 }, { "epoch": 0.6, "grad_norm": 1.7515660524368286, "learning_rate": 7.344287592656827e-06, "loss": 0.9685, "step": 10427 }, { "epoch": 0.6, "grad_norm": 1.727772831916809, "learning_rate": 7.342496731467766e-06, "loss": 0.9036, "step": 10428 }, { "epoch": 0.6, "grad_norm": 1.9014296531677246, "learning_rate": 7.340705961981722e-06, "loss": 1.0327, "step": 10429 }, { "epoch": 0.6, "grad_norm": 1.9315305948257446, "learning_rate": 7.338915284260487e-06, "loss": 0.8965, "step": 10430 }, { "epoch": 0.6, "grad_norm": 1.7743418216705322, "learning_rate": 7.337124698365851e-06, "loss": 0.9831, "step": 10431 }, { "epoch": 0.6, "grad_norm": 1.7623370885849, "learning_rate": 7.335334204359605e-06, "loss": 1.061, "step": 10432 }, { "epoch": 0.6, "grad_norm": 1.877609133720398, "learning_rate": 7.333543802303531e-06, "loss": 1.0535, "step": 10433 }, { "epoch": 0.6, "grad_norm": 1.8454853296279907, "learning_rate": 7.331753492259412e-06, "loss": 0.9605, "step": 10434 }, { "epoch": 0.6, "grad_norm": 1.6689057350158691, "learning_rate": 7.329963274289027e-06, "loss": 0.9097, "step": 10435 }, { "epoch": 0.6, "grad_norm": 1.8124525547027588, "learning_rate": 7.328173148454151e-06, "loss": 0.9679, "step": 10436 }, { "epoch": 0.6, "grad_norm": 1.8154628276824951, "learning_rate": 7.326383114816555e-06, "loss": 1.0283, "step": 10437 }, { "epoch": 0.6, "grad_norm": 1.8383556604385376, "learning_rate": 7.324593173438011e-06, "loss": 0.8785, "step": 10438 }, { "epoch": 0.6, "grad_norm": 1.8645848035812378, "learning_rate": 7.322803324380282e-06, "loss": 0.8606, "step": 10439 }, { "epoch": 0.6, "grad_norm": 1.590856671333313, "learning_rate": 7.321013567705131e-06, "loss": 0.9486, "step": 10440 }, { "epoch": 0.6, "grad_norm": 1.7762575149536133, "learning_rate": 7.319223903474318e-06, "loss": 0.9583, "step": 10441 }, { "epoch": 0.6, "grad_norm": 1.6695590019226074, "learning_rate": 7.317434331749602e-06, "loss": 0.9919, "step": 10442 }, { "epoch": 0.6, "grad_norm": 1.7676153182983398, "learning_rate": 7.315644852592733e-06, "loss": 0.9094, "step": 10443 }, { "epoch": 0.6, "grad_norm": 1.873572826385498, "learning_rate": 7.31385546606546e-06, "loss": 1.0042, "step": 10444 }, { "epoch": 0.6, "grad_norm": 1.0585534572601318, "learning_rate": 7.312066172229534e-06, "loss": 0.65, "step": 10445 }, { "epoch": 0.6, "grad_norm": 1.7065507173538208, "learning_rate": 7.310276971146695e-06, "loss": 0.9397, "step": 10446 }, { "epoch": 0.6, "grad_norm": 1.8405537605285645, "learning_rate": 7.308487862878684e-06, "loss": 0.934, "step": 10447 }, { "epoch": 0.6, "grad_norm": 1.7185841798782349, "learning_rate": 7.306698847487239e-06, "loss": 0.8914, "step": 10448 }, { "epoch": 0.6, "grad_norm": 1.8095675706863403, "learning_rate": 7.304909925034093e-06, "loss": 0.9432, "step": 10449 }, { "epoch": 0.6, "grad_norm": 1.1023943424224854, "learning_rate": 7.303121095580976e-06, "loss": 0.6007, "step": 10450 }, { "epoch": 0.6, "grad_norm": 1.740768313407898, "learning_rate": 7.301332359189618e-06, "loss": 0.9509, "step": 10451 }, { "epoch": 0.6, "grad_norm": 2.058027505874634, "learning_rate": 7.29954371592174e-06, "loss": 0.9772, "step": 10452 }, { "epoch": 0.6, "grad_norm": 1.7246119976043701, "learning_rate": 7.297755165839066e-06, "loss": 0.9351, "step": 10453 }, { "epoch": 0.6, "grad_norm": 2.024247169494629, "learning_rate": 7.295966709003312e-06, "loss": 0.9063, "step": 10454 }, { "epoch": 0.6, "grad_norm": 1.9063416719436646, "learning_rate": 7.294178345476195e-06, "loss": 1.0634, "step": 10455 }, { "epoch": 0.6, "grad_norm": 1.8643913269042969, "learning_rate": 7.292390075319426e-06, "loss": 0.919, "step": 10456 }, { "epoch": 0.6, "grad_norm": 1.9154936075210571, "learning_rate": 7.2906018985947095e-06, "loss": 1.0358, "step": 10457 }, { "epoch": 0.6, "grad_norm": 1.7114940881729126, "learning_rate": 7.288813815363754e-06, "loss": 1.0048, "step": 10458 }, { "epoch": 0.6, "grad_norm": 1.8005897998809814, "learning_rate": 7.287025825688261e-06, "loss": 0.9033, "step": 10459 }, { "epoch": 0.6, "grad_norm": 1.8557074069976807, "learning_rate": 7.285237929629928e-06, "loss": 1.0136, "step": 10460 }, { "epoch": 0.6, "grad_norm": 1.6670433282852173, "learning_rate": 7.283450127250451e-06, "loss": 0.9328, "step": 10461 }, { "epoch": 0.6, "grad_norm": 1.7676392793655396, "learning_rate": 7.281662418611521e-06, "loss": 0.9511, "step": 10462 }, { "epoch": 0.6, "grad_norm": 2.0404930114746094, "learning_rate": 7.279874803774828e-06, "loss": 0.9822, "step": 10463 }, { "epoch": 0.6, "grad_norm": 1.8812988996505737, "learning_rate": 7.2780872828020556e-06, "loss": 0.8572, "step": 10464 }, { "epoch": 0.6, "grad_norm": 1.6840405464172363, "learning_rate": 7.2762998557548894e-06, "loss": 0.9332, "step": 10465 }, { "epoch": 0.6, "grad_norm": 1.7049657106399536, "learning_rate": 7.274512522695006e-06, "loss": 0.8929, "step": 10466 }, { "epoch": 0.6, "grad_norm": 1.7308307886123657, "learning_rate": 7.27272528368408e-06, "loss": 0.8621, "step": 10467 }, { "epoch": 0.6, "grad_norm": 1.9550319910049438, "learning_rate": 7.2709381387837894e-06, "loss": 0.8522, "step": 10468 }, { "epoch": 0.6, "grad_norm": 1.7165430784225464, "learning_rate": 7.269151088055799e-06, "loss": 0.9454, "step": 10469 }, { "epoch": 0.6, "grad_norm": 1.5933412313461304, "learning_rate": 7.267364131561775e-06, "loss": 0.9117, "step": 10470 }, { "epoch": 0.6, "grad_norm": 1.8131766319274902, "learning_rate": 7.265577269363384e-06, "loss": 0.9682, "step": 10471 }, { "epoch": 0.6, "grad_norm": 1.0049012899398804, "learning_rate": 7.263790501522282e-06, "loss": 0.5249, "step": 10472 }, { "epoch": 0.6, "grad_norm": 1.990783929824829, "learning_rate": 7.262003828100127e-06, "loss": 0.8583, "step": 10473 }, { "epoch": 0.6, "grad_norm": 1.7652831077575684, "learning_rate": 7.260217249158572e-06, "loss": 0.8636, "step": 10474 }, { "epoch": 0.6, "grad_norm": 1.781686544418335, "learning_rate": 7.258430764759266e-06, "loss": 0.8908, "step": 10475 }, { "epoch": 0.6, "grad_norm": 1.7794626951217651, "learning_rate": 7.256644374963857e-06, "loss": 0.9199, "step": 10476 }, { "epoch": 0.6, "grad_norm": 1.717576503753662, "learning_rate": 7.254858079833986e-06, "loss": 0.9136, "step": 10477 }, { "epoch": 0.6, "grad_norm": 1.561353087425232, "learning_rate": 7.253071879431295e-06, "loss": 0.956, "step": 10478 }, { "epoch": 0.6, "grad_norm": 1.6979806423187256, "learning_rate": 7.25128577381742e-06, "loss": 0.9309, "step": 10479 }, { "epoch": 0.6, "grad_norm": 1.685271978378296, "learning_rate": 7.249499763053996e-06, "loss": 0.9505, "step": 10480 }, { "epoch": 0.6, "grad_norm": 1.6365466117858887, "learning_rate": 7.24771384720265e-06, "loss": 0.9827, "step": 10481 }, { "epoch": 0.6, "grad_norm": 1.0076375007629395, "learning_rate": 7.24592802632501e-06, "loss": 0.5142, "step": 10482 }, { "epoch": 0.6, "grad_norm": 1.7766143083572388, "learning_rate": 7.2441423004827016e-06, "loss": 0.9976, "step": 10483 }, { "epoch": 0.6, "grad_norm": 0.996250569820404, "learning_rate": 7.2423566697373445e-06, "loss": 0.5652, "step": 10484 }, { "epoch": 0.6, "grad_norm": 1.6844693422317505, "learning_rate": 7.240571134150558e-06, "loss": 0.8965, "step": 10485 }, { "epoch": 0.6, "grad_norm": 1.5797817707061768, "learning_rate": 7.238785693783951e-06, "loss": 0.9805, "step": 10486 }, { "epoch": 0.6, "grad_norm": 1.8870216608047485, "learning_rate": 7.237000348699137e-06, "loss": 0.8402, "step": 10487 }, { "epoch": 0.6, "grad_norm": 1.679835319519043, "learning_rate": 7.235215098957723e-06, "loss": 0.9737, "step": 10488 }, { "epoch": 0.6, "grad_norm": 1.6840558052062988, "learning_rate": 7.233429944621313e-06, "loss": 0.9124, "step": 10489 }, { "epoch": 0.6, "grad_norm": 1.4455772638320923, "learning_rate": 7.2316448857515076e-06, "loss": 0.542, "step": 10490 }, { "epoch": 0.6, "grad_norm": 1.719997525215149, "learning_rate": 7.229859922409903e-06, "loss": 0.9594, "step": 10491 }, { "epoch": 0.6, "grad_norm": 1.718862771987915, "learning_rate": 7.228075054658096e-06, "loss": 0.8893, "step": 10492 }, { "epoch": 0.6, "grad_norm": 1.6912163496017456, "learning_rate": 7.226290282557675e-06, "loss": 0.8767, "step": 10493 }, { "epoch": 0.6, "grad_norm": 1.9682996273040771, "learning_rate": 7.224505606170227e-06, "loss": 0.9355, "step": 10494 }, { "epoch": 0.6, "grad_norm": 1.0403172969818115, "learning_rate": 7.222721025557337e-06, "loss": 0.4955, "step": 10495 }, { "epoch": 0.6, "grad_norm": 1.7146739959716797, "learning_rate": 7.22093654078059e-06, "loss": 0.8837, "step": 10496 }, { "epoch": 0.6, "grad_norm": 2.006544351577759, "learning_rate": 7.219152151901558e-06, "loss": 0.8625, "step": 10497 }, { "epoch": 0.6, "grad_norm": 1.672311782836914, "learning_rate": 7.217367858981818e-06, "loss": 0.8896, "step": 10498 }, { "epoch": 0.6, "grad_norm": 2.8284478187561035, "learning_rate": 7.215583662082939e-06, "loss": 0.9508, "step": 10499 }, { "epoch": 0.6, "grad_norm": 1.8492563962936401, "learning_rate": 7.21379956126649e-06, "loss": 0.9396, "step": 10500 }, { "epoch": 0.6, "grad_norm": 1.769406795501709, "learning_rate": 7.212015556594037e-06, "loss": 0.9696, "step": 10501 }, { "epoch": 0.6, "grad_norm": 1.7644988298416138, "learning_rate": 7.2102316481271376e-06, "loss": 0.8542, "step": 10502 }, { "epoch": 0.6, "grad_norm": 1.5977485179901123, "learning_rate": 7.2084478359273514e-06, "loss": 0.8127, "step": 10503 }, { "epoch": 0.6, "grad_norm": 1.9385881423950195, "learning_rate": 7.206664120056232e-06, "loss": 1.0041, "step": 10504 }, { "epoch": 0.6, "grad_norm": 1.7143248319625854, "learning_rate": 7.204880500575333e-06, "loss": 0.914, "step": 10505 }, { "epoch": 0.6, "grad_norm": 1.7707990407943726, "learning_rate": 7.203096977546196e-06, "loss": 0.874, "step": 10506 }, { "epoch": 0.6, "grad_norm": 1.6297681331634521, "learning_rate": 7.201313551030373e-06, "loss": 0.944, "step": 10507 }, { "epoch": 0.6, "grad_norm": 1.7276585102081299, "learning_rate": 7.199530221089399e-06, "loss": 0.9402, "step": 10508 }, { "epoch": 0.6, "grad_norm": 1.7412209510803223, "learning_rate": 7.1977469877848175e-06, "loss": 0.965, "step": 10509 }, { "epoch": 0.6, "grad_norm": 1.6354182958602905, "learning_rate": 7.195963851178157e-06, "loss": 0.9107, "step": 10510 }, { "epoch": 0.6, "grad_norm": 1.6675649881362915, "learning_rate": 7.194180811330953e-06, "loss": 0.9716, "step": 10511 }, { "epoch": 0.6, "grad_norm": 1.645922303199768, "learning_rate": 7.1923978683047305e-06, "loss": 0.9131, "step": 10512 }, { "epoch": 0.6, "grad_norm": 1.6072906255722046, "learning_rate": 7.190615022161015e-06, "loss": 0.9417, "step": 10513 }, { "epoch": 0.6, "grad_norm": 1.8283355236053467, "learning_rate": 7.188832272961328e-06, "loss": 0.9146, "step": 10514 }, { "epoch": 0.6, "grad_norm": 1.786779522895813, "learning_rate": 7.187049620767186e-06, "loss": 0.9453, "step": 10515 }, { "epoch": 0.6, "grad_norm": 1.7378132343292236, "learning_rate": 7.1852670656401036e-06, "loss": 0.9004, "step": 10516 }, { "epoch": 0.6, "grad_norm": 1.7554913759231567, "learning_rate": 7.183484607641593e-06, "loss": 0.95, "step": 10517 }, { "epoch": 0.6, "grad_norm": 2.108429431915283, "learning_rate": 7.181702246833158e-06, "loss": 0.9026, "step": 10518 }, { "epoch": 0.6, "grad_norm": 1.702378511428833, "learning_rate": 7.179919983276309e-06, "loss": 0.9425, "step": 10519 }, { "epoch": 0.6, "grad_norm": 1.7587751150131226, "learning_rate": 7.178137817032542e-06, "loss": 1.0535, "step": 10520 }, { "epoch": 0.6, "grad_norm": 1.0380104780197144, "learning_rate": 7.176355748163358e-06, "loss": 0.6182, "step": 10521 }, { "epoch": 0.6, "grad_norm": 1.6112908124923706, "learning_rate": 7.17457377673025e-06, "loss": 0.8, "step": 10522 }, { "epoch": 0.6, "grad_norm": 1.8520015478134155, "learning_rate": 7.1727919027947064e-06, "loss": 0.9574, "step": 10523 }, { "epoch": 0.6, "grad_norm": 1.7646820545196533, "learning_rate": 7.171010126418218e-06, "loss": 1.0021, "step": 10524 }, { "epoch": 0.6, "grad_norm": 1.8538589477539062, "learning_rate": 7.169228447662269e-06, "loss": 0.9343, "step": 10525 }, { "epoch": 0.6, "grad_norm": 1.6463186740875244, "learning_rate": 7.167446866588337e-06, "loss": 0.9269, "step": 10526 }, { "epoch": 0.6, "grad_norm": 1.7062673568725586, "learning_rate": 7.165665383257902e-06, "loss": 0.9435, "step": 10527 }, { "epoch": 0.6, "grad_norm": 1.6561214923858643, "learning_rate": 7.1638839977324374e-06, "loss": 0.8998, "step": 10528 }, { "epoch": 0.6, "grad_norm": 1.766083836555481, "learning_rate": 7.162102710073413e-06, "loss": 0.9265, "step": 10529 }, { "epoch": 0.6, "grad_norm": 2.144442319869995, "learning_rate": 7.1603215203422945e-06, "loss": 0.9792, "step": 10530 }, { "epoch": 0.6, "grad_norm": 1.583807110786438, "learning_rate": 7.158540428600551e-06, "loss": 0.9106, "step": 10531 }, { "epoch": 0.6, "grad_norm": 1.0937477350234985, "learning_rate": 7.1567594349096395e-06, "loss": 0.5903, "step": 10532 }, { "epoch": 0.6, "grad_norm": 1.7128753662109375, "learning_rate": 7.154978539331015e-06, "loss": 0.9245, "step": 10533 }, { "epoch": 0.6, "grad_norm": 1.5351214408874512, "learning_rate": 7.153197741926137e-06, "loss": 0.8612, "step": 10534 }, { "epoch": 0.6, "grad_norm": 1.7781466245651245, "learning_rate": 7.1514170427564525e-06, "loss": 0.9275, "step": 10535 }, { "epoch": 0.6, "grad_norm": 1.757417917251587, "learning_rate": 7.149636441883405e-06, "loss": 0.8927, "step": 10536 }, { "epoch": 0.6, "grad_norm": 1.8818066120147705, "learning_rate": 7.147855939368445e-06, "loss": 0.9756, "step": 10537 }, { "epoch": 0.6, "grad_norm": 1.6473575830459595, "learning_rate": 7.14607553527301e-06, "loss": 0.9219, "step": 10538 }, { "epoch": 0.6, "grad_norm": 1.020093321800232, "learning_rate": 7.144295229658536e-06, "loss": 0.5469, "step": 10539 }, { "epoch": 0.6, "grad_norm": 1.7127479314804077, "learning_rate": 7.142515022586456e-06, "loss": 0.8661, "step": 10540 }, { "epoch": 0.6, "grad_norm": 1.7459585666656494, "learning_rate": 7.1407349141182e-06, "loss": 0.9061, "step": 10541 }, { "epoch": 0.6, "grad_norm": 1.7850323915481567, "learning_rate": 7.138954904315196e-06, "loss": 0.9114, "step": 10542 }, { "epoch": 0.6, "grad_norm": 1.9461034536361694, "learning_rate": 7.137174993238865e-06, "loss": 1.0089, "step": 10543 }, { "epoch": 0.6, "grad_norm": 1.715523362159729, "learning_rate": 7.13539518095063e-06, "loss": 0.9641, "step": 10544 }, { "epoch": 0.6, "grad_norm": 1.6246423721313477, "learning_rate": 7.1336154675119044e-06, "loss": 0.936, "step": 10545 }, { "epoch": 0.6, "grad_norm": 1.7631171941757202, "learning_rate": 7.131835852984102e-06, "loss": 0.9249, "step": 10546 }, { "epoch": 0.6, "grad_norm": 1.6301947832107544, "learning_rate": 7.130056337428633e-06, "loss": 0.9562, "step": 10547 }, { "epoch": 0.6, "grad_norm": 1.62639319896698, "learning_rate": 7.1282769209069005e-06, "loss": 0.9001, "step": 10548 }, { "epoch": 0.61, "grad_norm": 1.9143786430358887, "learning_rate": 7.126497603480311e-06, "loss": 0.8719, "step": 10549 }, { "epoch": 0.61, "grad_norm": 2.0039377212524414, "learning_rate": 7.124718385210263e-06, "loss": 0.9298, "step": 10550 }, { "epoch": 0.61, "grad_norm": 1.73677396774292, "learning_rate": 7.122939266158151e-06, "loss": 0.8826, "step": 10551 }, { "epoch": 0.61, "grad_norm": 1.623577356338501, "learning_rate": 7.121160246385369e-06, "loss": 0.9616, "step": 10552 }, { "epoch": 0.61, "grad_norm": 2.02581524848938, "learning_rate": 7.119381325953305e-06, "loss": 0.915, "step": 10553 }, { "epoch": 0.61, "grad_norm": 1.5755760669708252, "learning_rate": 7.117602504923345e-06, "loss": 0.9115, "step": 10554 }, { "epoch": 0.61, "grad_norm": 1.8273218870162964, "learning_rate": 7.11582378335687e-06, "loss": 0.8953, "step": 10555 }, { "epoch": 0.61, "grad_norm": 1.772381067276001, "learning_rate": 7.11404516131526e-06, "loss": 0.9046, "step": 10556 }, { "epoch": 0.61, "grad_norm": 1.6744062900543213, "learning_rate": 7.11226663885989e-06, "loss": 0.8696, "step": 10557 }, { "epoch": 0.61, "grad_norm": 1.6818684339523315, "learning_rate": 7.110488216052133e-06, "loss": 0.9822, "step": 10558 }, { "epoch": 0.61, "grad_norm": 1.8032218217849731, "learning_rate": 7.108709892953355e-06, "loss": 0.9339, "step": 10559 }, { "epoch": 0.61, "grad_norm": 1.751014232635498, "learning_rate": 7.106931669624919e-06, "loss": 0.8842, "step": 10560 }, { "epoch": 0.61, "grad_norm": 1.7469888925552368, "learning_rate": 7.105153546128194e-06, "loss": 0.9295, "step": 10561 }, { "epoch": 0.61, "grad_norm": 1.7836576700210571, "learning_rate": 7.1033755225245315e-06, "loss": 0.889, "step": 10562 }, { "epoch": 0.61, "grad_norm": 1.7243170738220215, "learning_rate": 7.10159759887529e-06, "loss": 0.9444, "step": 10563 }, { "epoch": 0.61, "grad_norm": 1.9207457304000854, "learning_rate": 7.099819775241818e-06, "loss": 0.9771, "step": 10564 }, { "epoch": 0.61, "grad_norm": 1.876839280128479, "learning_rate": 7.0980420516854655e-06, "loss": 0.9412, "step": 10565 }, { "epoch": 0.61, "grad_norm": 1.0860122442245483, "learning_rate": 7.096264428267574e-06, "loss": 0.5333, "step": 10566 }, { "epoch": 0.61, "grad_norm": 1.6661583185195923, "learning_rate": 7.094486905049487e-06, "loss": 1.0143, "step": 10567 }, { "epoch": 0.61, "grad_norm": 1.8619788885116577, "learning_rate": 7.092709482092539e-06, "loss": 0.9895, "step": 10568 }, { "epoch": 0.61, "grad_norm": 1.5746023654937744, "learning_rate": 7.090932159458067e-06, "loss": 0.9401, "step": 10569 }, { "epoch": 0.61, "grad_norm": 1.7725682258605957, "learning_rate": 7.0891549372073996e-06, "loss": 0.99, "step": 10570 }, { "epoch": 0.61, "grad_norm": 1.750618815422058, "learning_rate": 7.0873778154018636e-06, "loss": 0.8965, "step": 10571 }, { "epoch": 0.61, "grad_norm": 1.7544050216674805, "learning_rate": 7.085600794102783e-06, "loss": 0.9502, "step": 10572 }, { "epoch": 0.61, "grad_norm": 1.8539271354675293, "learning_rate": 7.0838238733714785e-06, "loss": 0.9348, "step": 10573 }, { "epoch": 0.61, "grad_norm": 1.5890002250671387, "learning_rate": 7.0820470532692654e-06, "loss": 0.8919, "step": 10574 }, { "epoch": 0.61, "grad_norm": 1.7070835828781128, "learning_rate": 7.080270333857459e-06, "loss": 0.8832, "step": 10575 }, { "epoch": 0.61, "grad_norm": 1.6903101205825806, "learning_rate": 7.0784937151973666e-06, "loss": 0.9332, "step": 10576 }, { "epoch": 0.61, "grad_norm": 1.779494047164917, "learning_rate": 7.0767171973502955e-06, "loss": 0.9111, "step": 10577 }, { "epoch": 0.61, "grad_norm": 1.853913426399231, "learning_rate": 7.074940780377548e-06, "loss": 0.9215, "step": 10578 }, { "epoch": 0.61, "grad_norm": 1.840926170349121, "learning_rate": 7.073164464340423e-06, "loss": 0.8718, "step": 10579 }, { "epoch": 0.61, "grad_norm": 1.883320689201355, "learning_rate": 7.071388249300217e-06, "loss": 0.949, "step": 10580 }, { "epoch": 0.61, "grad_norm": 1.708052158355713, "learning_rate": 7.069612135318222e-06, "loss": 0.9655, "step": 10581 }, { "epoch": 0.61, "grad_norm": 1.7948048114776611, "learning_rate": 7.0678361224557265e-06, "loss": 0.8808, "step": 10582 }, { "epoch": 0.61, "grad_norm": 1.682698130607605, "learning_rate": 7.066060210774015e-06, "loss": 0.9309, "step": 10583 }, { "epoch": 0.61, "grad_norm": 1.7539793252944946, "learning_rate": 7.064284400334369e-06, "loss": 0.9424, "step": 10584 }, { "epoch": 0.61, "grad_norm": 1.015618085861206, "learning_rate": 7.0625086911980685e-06, "loss": 0.6048, "step": 10585 }, { "epoch": 0.61, "grad_norm": 1.7441778182983398, "learning_rate": 7.060733083426389e-06, "loss": 0.894, "step": 10586 }, { "epoch": 0.61, "grad_norm": 1.7858424186706543, "learning_rate": 7.058957577080599e-06, "loss": 0.8866, "step": 10587 }, { "epoch": 0.61, "grad_norm": 1.6261181831359863, "learning_rate": 7.057182172221968e-06, "loss": 0.8644, "step": 10588 }, { "epoch": 0.61, "grad_norm": 1.6882188320159912, "learning_rate": 7.055406868911761e-06, "loss": 0.9653, "step": 10589 }, { "epoch": 0.61, "grad_norm": 1.735335350036621, "learning_rate": 7.053631667211236e-06, "loss": 0.8811, "step": 10590 }, { "epoch": 0.61, "grad_norm": 1.9600070714950562, "learning_rate": 7.051856567181652e-06, "loss": 0.9558, "step": 10591 }, { "epoch": 0.61, "grad_norm": 1.7807068824768066, "learning_rate": 7.0500815688842614e-06, "loss": 0.9736, "step": 10592 }, { "epoch": 0.61, "grad_norm": 1.8206084966659546, "learning_rate": 7.048306672380318e-06, "loss": 0.946, "step": 10593 }, { "epoch": 0.61, "grad_norm": 1.8796347379684448, "learning_rate": 7.046531877731065e-06, "loss": 1.0012, "step": 10594 }, { "epoch": 0.61, "grad_norm": 1.684680461883545, "learning_rate": 7.044757184997747e-06, "loss": 0.8911, "step": 10595 }, { "epoch": 0.61, "grad_norm": 1.6220518350601196, "learning_rate": 7.0429825942416e-06, "loss": 0.9426, "step": 10596 }, { "epoch": 0.61, "grad_norm": 1.8553264141082764, "learning_rate": 7.0412081055238675e-06, "loss": 0.9212, "step": 10597 }, { "epoch": 0.61, "grad_norm": 1.7969778776168823, "learning_rate": 7.039433718905777e-06, "loss": 0.8835, "step": 10598 }, { "epoch": 0.61, "grad_norm": 1.9352818727493286, "learning_rate": 7.0376594344485586e-06, "loss": 0.9461, "step": 10599 }, { "epoch": 0.61, "grad_norm": 1.75458824634552, "learning_rate": 7.035885252213439e-06, "loss": 1.0264, "step": 10600 }, { "epoch": 0.61, "grad_norm": 1.6329597234725952, "learning_rate": 7.03411117226164e-06, "loss": 0.9338, "step": 10601 }, { "epoch": 0.61, "grad_norm": 1.737112283706665, "learning_rate": 7.032337194654375e-06, "loss": 0.9172, "step": 10602 }, { "epoch": 0.61, "grad_norm": 1.7277860641479492, "learning_rate": 7.0305633194528675e-06, "loss": 0.9527, "step": 10603 }, { "epoch": 0.61, "grad_norm": 1.8295855522155762, "learning_rate": 7.028789546718327e-06, "loss": 1.051, "step": 10604 }, { "epoch": 0.61, "grad_norm": 1.8651982545852661, "learning_rate": 7.027015876511955e-06, "loss": 0.9216, "step": 10605 }, { "epoch": 0.61, "grad_norm": 1.8102905750274658, "learning_rate": 7.025242308894964e-06, "loss": 0.9481, "step": 10606 }, { "epoch": 0.61, "grad_norm": 1.8126261234283447, "learning_rate": 7.02346884392855e-06, "loss": 0.8496, "step": 10607 }, { "epoch": 0.61, "grad_norm": 1.8048622608184814, "learning_rate": 7.021695481673912e-06, "loss": 0.9313, "step": 10608 }, { "epoch": 0.61, "grad_norm": 1.9112160205841064, "learning_rate": 7.019922222192243e-06, "loss": 0.9956, "step": 10609 }, { "epoch": 0.61, "grad_norm": 1.7338054180145264, "learning_rate": 7.018149065544735e-06, "loss": 1.0393, "step": 10610 }, { "epoch": 0.61, "grad_norm": 2.0718231201171875, "learning_rate": 7.016376011792572e-06, "loss": 0.8995, "step": 10611 }, { "epoch": 0.61, "grad_norm": 1.6575974225997925, "learning_rate": 7.0146030609969385e-06, "loss": 0.8795, "step": 10612 }, { "epoch": 0.61, "grad_norm": 1.819928526878357, "learning_rate": 7.012830213219013e-06, "loss": 0.8589, "step": 10613 }, { "epoch": 0.61, "grad_norm": 1.666174054145813, "learning_rate": 7.011057468519973e-06, "loss": 0.9066, "step": 10614 }, { "epoch": 0.61, "grad_norm": 1.8539707660675049, "learning_rate": 7.009284826960989e-06, "loss": 0.8985, "step": 10615 }, { "epoch": 0.61, "grad_norm": 1.9939125776290894, "learning_rate": 7.007512288603234e-06, "loss": 0.9897, "step": 10616 }, { "epoch": 0.61, "grad_norm": 1.7586653232574463, "learning_rate": 7.005739853507871e-06, "loss": 1.0008, "step": 10617 }, { "epoch": 0.61, "grad_norm": 2.0530898571014404, "learning_rate": 7.00396752173606e-06, "loss": 0.9621, "step": 10618 }, { "epoch": 0.61, "grad_norm": 1.9518625736236572, "learning_rate": 7.002195293348961e-06, "loss": 0.9535, "step": 10619 }, { "epoch": 0.61, "grad_norm": 1.890276551246643, "learning_rate": 7.00042316840773e-06, "loss": 0.9704, "step": 10620 }, { "epoch": 0.61, "grad_norm": 1.9246728420257568, "learning_rate": 6.9986511469735145e-06, "loss": 0.8925, "step": 10621 }, { "epoch": 0.61, "grad_norm": 1.808403491973877, "learning_rate": 6.9968792291074646e-06, "loss": 0.9285, "step": 10622 }, { "epoch": 0.61, "grad_norm": 1.9037803411483765, "learning_rate": 6.995107414870725e-06, "loss": 0.9468, "step": 10623 }, { "epoch": 0.61, "grad_norm": 1.7801018953323364, "learning_rate": 6.9933357043244335e-06, "loss": 0.8925, "step": 10624 }, { "epoch": 0.61, "grad_norm": 1.5667699575424194, "learning_rate": 6.991564097529727e-06, "loss": 0.9389, "step": 10625 }, { "epoch": 0.61, "grad_norm": 1.8979694843292236, "learning_rate": 6.989792594547739e-06, "loss": 1.0365, "step": 10626 }, { "epoch": 0.61, "grad_norm": 1.711273193359375, "learning_rate": 6.988021195439603e-06, "loss": 0.8901, "step": 10627 }, { "epoch": 0.61, "grad_norm": 1.8914388418197632, "learning_rate": 6.98624990026644e-06, "loss": 0.8564, "step": 10628 }, { "epoch": 0.61, "grad_norm": 1.753403663635254, "learning_rate": 6.984478709089375e-06, "loss": 0.9645, "step": 10629 }, { "epoch": 0.61, "grad_norm": 1.5992937088012695, "learning_rate": 6.9827076219695254e-06, "loss": 0.9076, "step": 10630 }, { "epoch": 0.61, "grad_norm": 2.0181591510772705, "learning_rate": 6.9809366389680075e-06, "loss": 0.9307, "step": 10631 }, { "epoch": 0.61, "grad_norm": 1.8274506330490112, "learning_rate": 6.979165760145934e-06, "loss": 1.029, "step": 10632 }, { "epoch": 0.61, "grad_norm": 1.6901062726974487, "learning_rate": 6.977394985564412e-06, "loss": 0.9327, "step": 10633 }, { "epoch": 0.61, "grad_norm": 1.8407485485076904, "learning_rate": 6.975624315284544e-06, "loss": 0.9415, "step": 10634 }, { "epoch": 0.61, "grad_norm": 1.6040914058685303, "learning_rate": 6.973853749367434e-06, "loss": 0.9332, "step": 10635 }, { "epoch": 0.61, "grad_norm": 1.1123594045639038, "learning_rate": 6.9720832878741776e-06, "loss": 0.5664, "step": 10636 }, { "epoch": 0.61, "grad_norm": 1.6647958755493164, "learning_rate": 6.970312930865868e-06, "loss": 0.9076, "step": 10637 }, { "epoch": 0.61, "grad_norm": 1.694380283355713, "learning_rate": 6.968542678403596e-06, "loss": 0.8781, "step": 10638 }, { "epoch": 0.61, "grad_norm": 1.7860279083251953, "learning_rate": 6.966772530548448e-06, "loss": 1.0284, "step": 10639 }, { "epoch": 0.61, "grad_norm": 1.7016037702560425, "learning_rate": 6.965002487361507e-06, "loss": 0.9512, "step": 10640 }, { "epoch": 0.61, "grad_norm": 1.9999542236328125, "learning_rate": 6.963232548903853e-06, "loss": 0.9682, "step": 10641 }, { "epoch": 0.61, "grad_norm": 1.8749598264694214, "learning_rate": 6.9614627152365625e-06, "loss": 0.9298, "step": 10642 }, { "epoch": 0.61, "grad_norm": 1.0003541707992554, "learning_rate": 6.959692986420703e-06, "loss": 0.5844, "step": 10643 }, { "epoch": 0.61, "grad_norm": 0.9773035049438477, "learning_rate": 6.957923362517348e-06, "loss": 0.5531, "step": 10644 }, { "epoch": 0.61, "grad_norm": 1.7271779775619507, "learning_rate": 6.956153843587559e-06, "loss": 0.9272, "step": 10645 }, { "epoch": 0.61, "grad_norm": 1.640600562095642, "learning_rate": 6.954384429692398e-06, "loss": 0.929, "step": 10646 }, { "epoch": 0.61, "grad_norm": 1.7854920625686646, "learning_rate": 6.9526151208929234e-06, "loss": 0.9038, "step": 10647 }, { "epoch": 0.61, "grad_norm": 1.6040844917297363, "learning_rate": 6.950845917250188e-06, "loss": 0.9255, "step": 10648 }, { "epoch": 0.61, "grad_norm": 1.7694549560546875, "learning_rate": 6.9490768188252435e-06, "loss": 1.0054, "step": 10649 }, { "epoch": 0.61, "grad_norm": 1.6183125972747803, "learning_rate": 6.947307825679133e-06, "loss": 0.9455, "step": 10650 }, { "epoch": 0.61, "grad_norm": 1.8158743381500244, "learning_rate": 6.945538937872903e-06, "loss": 1.0143, "step": 10651 }, { "epoch": 0.61, "grad_norm": 1.8781062364578247, "learning_rate": 6.943770155467593e-06, "loss": 0.9514, "step": 10652 }, { "epoch": 0.61, "grad_norm": 1.7817882299423218, "learning_rate": 6.9420014785242374e-06, "loss": 0.9948, "step": 10653 }, { "epoch": 0.61, "grad_norm": 1.7521567344665527, "learning_rate": 6.940232907103868e-06, "loss": 0.9492, "step": 10654 }, { "epoch": 0.61, "grad_norm": 1.6803890466690063, "learning_rate": 6.9384644412675165e-06, "loss": 0.9569, "step": 10655 }, { "epoch": 0.61, "grad_norm": 0.9495131969451904, "learning_rate": 6.936696081076202e-06, "loss": 0.5421, "step": 10656 }, { "epoch": 0.61, "grad_norm": 1.7499480247497559, "learning_rate": 6.9349278265909506e-06, "loss": 0.9578, "step": 10657 }, { "epoch": 0.61, "grad_norm": 1.877681016921997, "learning_rate": 6.933159677872776e-06, "loss": 0.9675, "step": 10658 }, { "epoch": 0.61, "grad_norm": 1.7409342527389526, "learning_rate": 6.931391634982696e-06, "loss": 0.9898, "step": 10659 }, { "epoch": 0.61, "grad_norm": 0.9767569899559021, "learning_rate": 6.9296236979817175e-06, "loss": 0.5647, "step": 10660 }, { "epoch": 0.61, "grad_norm": 1.8217902183532715, "learning_rate": 6.92785586693085e-06, "loss": 1.0466, "step": 10661 }, { "epoch": 0.61, "grad_norm": 1.5689538717269897, "learning_rate": 6.926088141891092e-06, "loss": 0.9663, "step": 10662 }, { "epoch": 0.61, "grad_norm": 1.7143189907073975, "learning_rate": 6.924320522923448e-06, "loss": 0.8771, "step": 10663 }, { "epoch": 0.61, "grad_norm": 1.7888163328170776, "learning_rate": 6.9225530100889105e-06, "loss": 0.8921, "step": 10664 }, { "epoch": 0.61, "grad_norm": 1.8386070728302002, "learning_rate": 6.92078560344847e-06, "loss": 0.9951, "step": 10665 }, { "epoch": 0.61, "grad_norm": 1.7091959714889526, "learning_rate": 6.9190183030631185e-06, "loss": 1.0207, "step": 10666 }, { "epoch": 0.61, "grad_norm": 1.7623509168624878, "learning_rate": 6.917251108993841e-06, "loss": 0.9466, "step": 10667 }, { "epoch": 0.61, "grad_norm": 1.561977505683899, "learning_rate": 6.915484021301613e-06, "loss": 0.91, "step": 10668 }, { "epoch": 0.61, "grad_norm": 1.9120452404022217, "learning_rate": 6.9137170400474164e-06, "loss": 0.8985, "step": 10669 }, { "epoch": 0.61, "grad_norm": 1.798666000366211, "learning_rate": 6.911950165292225e-06, "loss": 0.9788, "step": 10670 }, { "epoch": 0.61, "grad_norm": 1.042483925819397, "learning_rate": 6.9101833970970074e-06, "loss": 0.5321, "step": 10671 }, { "epoch": 0.61, "grad_norm": 1.6670053005218506, "learning_rate": 6.9084167355227295e-06, "loss": 1.0227, "step": 10672 }, { "epoch": 0.61, "grad_norm": 1.7947410345077515, "learning_rate": 6.906650180630353e-06, "loss": 0.9832, "step": 10673 }, { "epoch": 0.61, "grad_norm": 1.788172721862793, "learning_rate": 6.904883732480838e-06, "loss": 1.0008, "step": 10674 }, { "epoch": 0.61, "grad_norm": 1.8236624002456665, "learning_rate": 6.903117391135141e-06, "loss": 0.953, "step": 10675 }, { "epoch": 0.61, "grad_norm": 1.8077296018600464, "learning_rate": 6.90135115665421e-06, "loss": 0.9215, "step": 10676 }, { "epoch": 0.61, "grad_norm": 1.737565517425537, "learning_rate": 6.899585029098996e-06, "loss": 0.8719, "step": 10677 }, { "epoch": 0.61, "grad_norm": 1.7289538383483887, "learning_rate": 6.897819008530442e-06, "loss": 0.9999, "step": 10678 }, { "epoch": 0.61, "grad_norm": 0.9687201976776123, "learning_rate": 6.89605309500949e-06, "loss": 0.511, "step": 10679 }, { "epoch": 0.61, "grad_norm": 1.8477734327316284, "learning_rate": 6.89428728859707e-06, "loss": 0.9496, "step": 10680 }, { "epoch": 0.61, "grad_norm": 2.0349655151367188, "learning_rate": 6.892521589354124e-06, "loss": 0.9415, "step": 10681 }, { "epoch": 0.61, "grad_norm": 1.8025058507919312, "learning_rate": 6.8907559973415776e-06, "loss": 0.9244, "step": 10682 }, { "epoch": 0.61, "grad_norm": 1.6989761590957642, "learning_rate": 6.888990512620356e-06, "loss": 0.9642, "step": 10683 }, { "epoch": 0.61, "grad_norm": 1.666062831878662, "learning_rate": 6.887225135251381e-06, "loss": 0.9645, "step": 10684 }, { "epoch": 0.61, "grad_norm": 1.7569764852523804, "learning_rate": 6.885459865295573e-06, "loss": 0.9722, "step": 10685 }, { "epoch": 0.61, "grad_norm": 1.8222192525863647, "learning_rate": 6.883694702813843e-06, "loss": 0.9852, "step": 10686 }, { "epoch": 0.61, "grad_norm": 1.7695648670196533, "learning_rate": 6.881929647867105e-06, "loss": 1.0292, "step": 10687 }, { "epoch": 0.61, "grad_norm": 1.5964521169662476, "learning_rate": 6.880164700516265e-06, "loss": 0.8765, "step": 10688 }, { "epoch": 0.61, "grad_norm": 1.6771517992019653, "learning_rate": 6.878399860822226e-06, "loss": 0.8959, "step": 10689 }, { "epoch": 0.61, "grad_norm": 1.8062862157821655, "learning_rate": 6.876635128845888e-06, "loss": 0.9944, "step": 10690 }, { "epoch": 0.61, "grad_norm": 2.052683115005493, "learning_rate": 6.874870504648147e-06, "loss": 0.9878, "step": 10691 }, { "epoch": 0.61, "grad_norm": 1.04588782787323, "learning_rate": 6.873105988289892e-06, "loss": 0.5483, "step": 10692 }, { "epoch": 0.61, "grad_norm": 1.980678915977478, "learning_rate": 6.871341579832018e-06, "loss": 0.9433, "step": 10693 }, { "epoch": 0.61, "grad_norm": 1.6742863655090332, "learning_rate": 6.869577279335407e-06, "loss": 0.998, "step": 10694 }, { "epoch": 0.61, "grad_norm": 1.8063756227493286, "learning_rate": 6.867813086860939e-06, "loss": 0.9092, "step": 10695 }, { "epoch": 0.61, "grad_norm": 1.6749621629714966, "learning_rate": 6.8660490024694905e-06, "loss": 0.8654, "step": 10696 }, { "epoch": 0.61, "grad_norm": 1.7051033973693848, "learning_rate": 6.864285026221939e-06, "loss": 0.8724, "step": 10697 }, { "epoch": 0.61, "grad_norm": 1.8245407342910767, "learning_rate": 6.862521158179151e-06, "loss": 0.9671, "step": 10698 }, { "epoch": 0.61, "grad_norm": 1.9305305480957031, "learning_rate": 6.860757398401994e-06, "loss": 0.9709, "step": 10699 }, { "epoch": 0.61, "grad_norm": 1.756739616394043, "learning_rate": 6.858993746951328e-06, "loss": 0.9593, "step": 10700 }, { "epoch": 0.61, "grad_norm": 1.8252607583999634, "learning_rate": 6.8572302038880155e-06, "loss": 0.9419, "step": 10701 }, { "epoch": 0.61, "grad_norm": 1.632275104522705, "learning_rate": 6.85546676927291e-06, "loss": 0.855, "step": 10702 }, { "epoch": 0.61, "grad_norm": 2.02900767326355, "learning_rate": 6.853703443166861e-06, "loss": 0.9852, "step": 10703 }, { "epoch": 0.61, "grad_norm": 1.0382273197174072, "learning_rate": 6.851940225630718e-06, "loss": 0.5807, "step": 10704 }, { "epoch": 0.61, "grad_norm": 1.691987156867981, "learning_rate": 6.8501771167253224e-06, "loss": 0.8632, "step": 10705 }, { "epoch": 0.61, "grad_norm": 1.7489248514175415, "learning_rate": 6.848414116511519e-06, "loss": 0.9458, "step": 10706 }, { "epoch": 0.61, "grad_norm": 1.7351505756378174, "learning_rate": 6.84665122505014e-06, "loss": 0.8572, "step": 10707 }, { "epoch": 0.61, "grad_norm": 1.7645676136016846, "learning_rate": 6.844888442402018e-06, "loss": 1.0089, "step": 10708 }, { "epoch": 0.61, "grad_norm": 1.7964791059494019, "learning_rate": 6.843125768627983e-06, "loss": 0.9314, "step": 10709 }, { "epoch": 0.61, "grad_norm": 1.9650331735610962, "learning_rate": 6.841363203788858e-06, "loss": 1.0, "step": 10710 }, { "epoch": 0.61, "grad_norm": 1.6995660066604614, "learning_rate": 6.8396007479454675e-06, "loss": 0.8429, "step": 10711 }, { "epoch": 0.61, "grad_norm": 1.8449565172195435, "learning_rate": 6.837838401158625e-06, "loss": 0.9166, "step": 10712 }, { "epoch": 0.61, "grad_norm": 1.5481001138687134, "learning_rate": 6.836076163489147e-06, "loss": 0.9429, "step": 10713 }, { "epoch": 0.61, "grad_norm": 1.0663679838180542, "learning_rate": 6.834314034997844e-06, "loss": 0.5822, "step": 10714 }, { "epoch": 0.61, "grad_norm": 1.8308523893356323, "learning_rate": 6.832552015745519e-06, "loss": 0.9478, "step": 10715 }, { "epoch": 0.61, "grad_norm": 1.7743624448776245, "learning_rate": 6.8307901057929735e-06, "loss": 1.0199, "step": 10716 }, { "epoch": 0.61, "grad_norm": 1.8001407384872437, "learning_rate": 6.829028305201012e-06, "loss": 0.8889, "step": 10717 }, { "epoch": 0.61, "grad_norm": 1.7936240434646606, "learning_rate": 6.8272666140304255e-06, "loss": 0.9346, "step": 10718 }, { "epoch": 0.61, "grad_norm": 1.0354218482971191, "learning_rate": 6.825505032342005e-06, "loss": 0.487, "step": 10719 }, { "epoch": 0.61, "grad_norm": 1.6789531707763672, "learning_rate": 6.823743560196539e-06, "loss": 0.9096, "step": 10720 }, { "epoch": 0.61, "grad_norm": 1.9396724700927734, "learning_rate": 6.8219821976548104e-06, "loss": 0.9735, "step": 10721 }, { "epoch": 0.61, "grad_norm": 1.6277079582214355, "learning_rate": 6.820220944777598e-06, "loss": 0.9284, "step": 10722 }, { "epoch": 0.61, "grad_norm": 1.6848450899124146, "learning_rate": 6.818459801625679e-06, "loss": 0.903, "step": 10723 }, { "epoch": 0.62, "grad_norm": 1.8770413398742676, "learning_rate": 6.816698768259824e-06, "loss": 0.9984, "step": 10724 }, { "epoch": 0.62, "grad_norm": 1.7362871170043945, "learning_rate": 6.814937844740803e-06, "loss": 0.8919, "step": 10725 }, { "epoch": 0.62, "grad_norm": 1.7478768825531006, "learning_rate": 6.81317703112938e-06, "loss": 0.9723, "step": 10726 }, { "epoch": 0.62, "grad_norm": 1.7467293739318848, "learning_rate": 6.811416327486316e-06, "loss": 0.9714, "step": 10727 }, { "epoch": 0.62, "grad_norm": 1.8072682619094849, "learning_rate": 6.8096557338723665e-06, "loss": 0.9422, "step": 10728 }, { "epoch": 0.62, "grad_norm": 1.956426978111267, "learning_rate": 6.807895250348284e-06, "loss": 0.9892, "step": 10729 }, { "epoch": 0.62, "grad_norm": 1.7539492845535278, "learning_rate": 6.806134876974821e-06, "loss": 0.9577, "step": 10730 }, { "epoch": 0.62, "grad_norm": 1.7264825105667114, "learning_rate": 6.804374613812721e-06, "loss": 0.9687, "step": 10731 }, { "epoch": 0.62, "grad_norm": 1.7004607915878296, "learning_rate": 6.802614460922728e-06, "loss": 1.0253, "step": 10732 }, { "epoch": 0.62, "grad_norm": 1.651534080505371, "learning_rate": 6.800854418365579e-06, "loss": 0.8433, "step": 10733 }, { "epoch": 0.62, "grad_norm": 1.8990579843521118, "learning_rate": 6.799094486202005e-06, "loss": 0.9038, "step": 10734 }, { "epoch": 0.62, "grad_norm": 1.7547316551208496, "learning_rate": 6.797334664492741e-06, "loss": 0.8984, "step": 10735 }, { "epoch": 0.62, "grad_norm": 1.787498950958252, "learning_rate": 6.795574953298511e-06, "loss": 0.9147, "step": 10736 }, { "epoch": 0.62, "grad_norm": 1.7551413774490356, "learning_rate": 6.7938153526800386e-06, "loss": 0.9337, "step": 10737 }, { "epoch": 0.62, "grad_norm": 1.6865466833114624, "learning_rate": 6.792055862698042e-06, "loss": 0.9851, "step": 10738 }, { "epoch": 0.62, "grad_norm": 1.6120564937591553, "learning_rate": 6.790296483413237e-06, "loss": 0.9142, "step": 10739 }, { "epoch": 0.62, "grad_norm": 1.8226311206817627, "learning_rate": 6.788537214886335e-06, "loss": 1.0253, "step": 10740 }, { "epoch": 0.62, "grad_norm": 1.7005010843276978, "learning_rate": 6.7867780571780416e-06, "loss": 0.9272, "step": 10741 }, { "epoch": 0.62, "grad_norm": 1.7235755920410156, "learning_rate": 6.785019010349062e-06, "loss": 0.9282, "step": 10742 }, { "epoch": 0.62, "grad_norm": 1.7701646089553833, "learning_rate": 6.783260074460096e-06, "loss": 0.8534, "step": 10743 }, { "epoch": 0.62, "grad_norm": 1.613637089729309, "learning_rate": 6.781501249571839e-06, "loss": 0.9348, "step": 10744 }, { "epoch": 0.62, "grad_norm": 1.94866144657135, "learning_rate": 6.7797425357449844e-06, "loss": 0.8959, "step": 10745 }, { "epoch": 0.62, "grad_norm": 1.9007580280303955, "learning_rate": 6.777983933040216e-06, "loss": 0.9693, "step": 10746 }, { "epoch": 0.62, "grad_norm": 1.9180207252502441, "learning_rate": 6.776225441518224e-06, "loss": 1.0191, "step": 10747 }, { "epoch": 0.62, "grad_norm": 1.7450957298278809, "learning_rate": 6.7744670612396866e-06, "loss": 0.9105, "step": 10748 }, { "epoch": 0.62, "grad_norm": 1.7718079090118408, "learning_rate": 6.7727087922652815e-06, "loss": 1.0198, "step": 10749 }, { "epoch": 0.62, "grad_norm": 1.7065348625183105, "learning_rate": 6.77095063465568e-06, "loss": 0.9569, "step": 10750 }, { "epoch": 0.62, "grad_norm": 1.6835771799087524, "learning_rate": 6.769192588471553e-06, "loss": 0.8948, "step": 10751 }, { "epoch": 0.62, "grad_norm": 1.660536766052246, "learning_rate": 6.767434653773564e-06, "loss": 0.9514, "step": 10752 }, { "epoch": 0.62, "grad_norm": 1.7290070056915283, "learning_rate": 6.765676830622376e-06, "loss": 0.9366, "step": 10753 }, { "epoch": 0.62, "grad_norm": 1.7143102884292603, "learning_rate": 6.7639191190786455e-06, "loss": 0.8621, "step": 10754 }, { "epoch": 0.62, "grad_norm": 1.8951539993286133, "learning_rate": 6.762161519203028e-06, "loss": 0.938, "step": 10755 }, { "epoch": 0.62, "grad_norm": 1.5635262727737427, "learning_rate": 6.760404031056169e-06, "loss": 0.878, "step": 10756 }, { "epoch": 0.62, "grad_norm": 1.835033655166626, "learning_rate": 6.758646654698719e-06, "loss": 0.9498, "step": 10757 }, { "epoch": 0.62, "grad_norm": 1.6972578763961792, "learning_rate": 6.756889390191317e-06, "loss": 0.9662, "step": 10758 }, { "epoch": 0.62, "grad_norm": 1.6809446811676025, "learning_rate": 6.755132237594605e-06, "loss": 0.8907, "step": 10759 }, { "epoch": 0.62, "grad_norm": 1.8069835901260376, "learning_rate": 6.753375196969214e-06, "loss": 0.9287, "step": 10760 }, { "epoch": 0.62, "grad_norm": 1.5308797359466553, "learning_rate": 6.751618268375777e-06, "loss": 0.9434, "step": 10761 }, { "epoch": 0.62, "grad_norm": 1.7994133234024048, "learning_rate": 6.749861451874919e-06, "loss": 0.9521, "step": 10762 }, { "epoch": 0.62, "grad_norm": 1.5352572202682495, "learning_rate": 6.748104747527265e-06, "loss": 0.8142, "step": 10763 }, { "epoch": 0.62, "grad_norm": 1.821385383605957, "learning_rate": 6.74634815539343e-06, "loss": 0.9621, "step": 10764 }, { "epoch": 0.62, "grad_norm": 1.73631751537323, "learning_rate": 6.744591675534033e-06, "loss": 0.9163, "step": 10765 }, { "epoch": 0.62, "grad_norm": 1.7903461456298828, "learning_rate": 6.742835308009683e-06, "loss": 0.9126, "step": 10766 }, { "epoch": 0.62, "grad_norm": 2.125685453414917, "learning_rate": 6.7410790528809875e-06, "loss": 1.1167, "step": 10767 }, { "epoch": 0.62, "grad_norm": 1.6026840209960938, "learning_rate": 6.7393229102085525e-06, "loss": 0.8783, "step": 10768 }, { "epoch": 0.62, "grad_norm": 1.8227370977401733, "learning_rate": 6.737566880052973e-06, "loss": 0.9831, "step": 10769 }, { "epoch": 0.62, "grad_norm": 1.8788026571273804, "learning_rate": 6.735810962474847e-06, "loss": 1.0038, "step": 10770 }, { "epoch": 0.62, "grad_norm": 1.6938592195510864, "learning_rate": 6.734055157534768e-06, "loss": 0.9911, "step": 10771 }, { "epoch": 0.62, "grad_norm": 1.7798479795455933, "learning_rate": 6.732299465293322e-06, "loss": 0.8596, "step": 10772 }, { "epoch": 0.62, "grad_norm": 2.1224095821380615, "learning_rate": 6.730543885811094e-06, "loss": 0.9381, "step": 10773 }, { "epoch": 0.62, "grad_norm": 1.8793865442276, "learning_rate": 6.728788419148664e-06, "loss": 0.9947, "step": 10774 }, { "epoch": 0.62, "grad_norm": 1.6216096878051758, "learning_rate": 6.727033065366609e-06, "loss": 0.9507, "step": 10775 }, { "epoch": 0.62, "grad_norm": 1.7437671422958374, "learning_rate": 6.725277824525498e-06, "loss": 0.8766, "step": 10776 }, { "epoch": 0.62, "grad_norm": 1.6781764030456543, "learning_rate": 6.723522696685902e-06, "loss": 0.8865, "step": 10777 }, { "epoch": 0.62, "grad_norm": 1.764045238494873, "learning_rate": 6.721767681908386e-06, "loss": 0.9362, "step": 10778 }, { "epoch": 0.62, "grad_norm": 1.993725299835205, "learning_rate": 6.720012780253509e-06, "loss": 1.0574, "step": 10779 }, { "epoch": 0.62, "grad_norm": 1.5924664735794067, "learning_rate": 6.7182579917818295e-06, "loss": 0.9438, "step": 10780 }, { "epoch": 0.62, "grad_norm": 1.4769564867019653, "learning_rate": 6.716503316553899e-06, "loss": 0.8964, "step": 10781 }, { "epoch": 0.62, "grad_norm": 1.6932201385498047, "learning_rate": 6.714748754630264e-06, "loss": 0.9264, "step": 10782 }, { "epoch": 0.62, "grad_norm": 1.9355340003967285, "learning_rate": 6.712994306071476e-06, "loss": 0.8863, "step": 10783 }, { "epoch": 0.62, "grad_norm": 1.7098079919815063, "learning_rate": 6.711239970938073e-06, "loss": 0.9729, "step": 10784 }, { "epoch": 0.62, "grad_norm": 1.6829544305801392, "learning_rate": 6.709485749290592e-06, "loss": 0.9553, "step": 10785 }, { "epoch": 0.62, "grad_norm": 1.8375035524368286, "learning_rate": 6.707731641189565e-06, "loss": 0.9197, "step": 10786 }, { "epoch": 0.62, "grad_norm": 1.7233394384384155, "learning_rate": 6.705977646695523e-06, "loss": 0.9421, "step": 10787 }, { "epoch": 0.62, "grad_norm": 1.0112662315368652, "learning_rate": 6.704223765868991e-06, "loss": 0.5897, "step": 10788 }, { "epoch": 0.62, "grad_norm": 1.1351630687713623, "learning_rate": 6.70246999877049e-06, "loss": 0.589, "step": 10789 }, { "epoch": 0.62, "grad_norm": 1.6162692308425903, "learning_rate": 6.700716345460538e-06, "loss": 0.9482, "step": 10790 }, { "epoch": 0.62, "grad_norm": 1.758133888244629, "learning_rate": 6.698962805999649e-06, "loss": 0.9673, "step": 10791 }, { "epoch": 0.62, "grad_norm": 1.8369076251983643, "learning_rate": 6.697209380448333e-06, "loss": 0.9569, "step": 10792 }, { "epoch": 0.62, "grad_norm": 1.950299620628357, "learning_rate": 6.695456068867094e-06, "loss": 0.8982, "step": 10793 }, { "epoch": 0.62, "grad_norm": 1.7521032094955444, "learning_rate": 6.693702871316436e-06, "loss": 0.9578, "step": 10794 }, { "epoch": 0.62, "grad_norm": 1.783825397491455, "learning_rate": 6.691949787856855e-06, "loss": 0.8967, "step": 10795 }, { "epoch": 0.62, "grad_norm": 1.6299785375595093, "learning_rate": 6.690196818548846e-06, "loss": 0.9325, "step": 10796 }, { "epoch": 0.62, "grad_norm": 1.6558128595352173, "learning_rate": 6.6884439634529e-06, "loss": 0.9219, "step": 10797 }, { "epoch": 0.62, "grad_norm": 1.7902382612228394, "learning_rate": 6.686691222629503e-06, "loss": 0.9059, "step": 10798 }, { "epoch": 0.62, "grad_norm": 1.6823365688323975, "learning_rate": 6.684938596139135e-06, "loss": 0.9002, "step": 10799 }, { "epoch": 0.62, "grad_norm": 1.7720131874084473, "learning_rate": 6.683186084042276e-06, "loss": 0.9064, "step": 10800 }, { "epoch": 0.62, "grad_norm": 1.748056411743164, "learning_rate": 6.681433686399401e-06, "loss": 0.9167, "step": 10801 }, { "epoch": 0.62, "grad_norm": 1.7118057012557983, "learning_rate": 6.67968140327098e-06, "loss": 0.9403, "step": 10802 }, { "epoch": 0.62, "grad_norm": 1.6603078842163086, "learning_rate": 6.677929234717478e-06, "loss": 0.9045, "step": 10803 }, { "epoch": 0.62, "grad_norm": 1.636451244354248, "learning_rate": 6.676177180799359e-06, "loss": 0.8681, "step": 10804 }, { "epoch": 0.62, "grad_norm": 1.8545950651168823, "learning_rate": 6.6744252415770806e-06, "loss": 0.8167, "step": 10805 }, { "epoch": 0.62, "grad_norm": 1.7148292064666748, "learning_rate": 6.672673417111098e-06, "loss": 0.947, "step": 10806 }, { "epoch": 0.62, "grad_norm": 1.7503153085708618, "learning_rate": 6.670921707461862e-06, "loss": 0.8889, "step": 10807 }, { "epoch": 0.62, "grad_norm": 1.7412528991699219, "learning_rate": 6.669170112689816e-06, "loss": 0.9614, "step": 10808 }, { "epoch": 0.62, "grad_norm": 1.7296286821365356, "learning_rate": 6.667418632855407e-06, "loss": 0.9172, "step": 10809 }, { "epoch": 0.62, "grad_norm": 1.9325380325317383, "learning_rate": 6.665667268019071e-06, "loss": 0.8946, "step": 10810 }, { "epoch": 0.62, "grad_norm": 1.9250518083572388, "learning_rate": 6.663916018241244e-06, "loss": 0.9679, "step": 10811 }, { "epoch": 0.62, "grad_norm": 1.0168627500534058, "learning_rate": 6.662164883582354e-06, "loss": 0.5764, "step": 10812 }, { "epoch": 0.62, "grad_norm": 1.9645469188690186, "learning_rate": 6.660413864102831e-06, "loss": 1.0383, "step": 10813 }, { "epoch": 0.62, "grad_norm": 1.8175413608551025, "learning_rate": 6.658662959863098e-06, "loss": 0.9694, "step": 10814 }, { "epoch": 0.62, "grad_norm": 1.8003426790237427, "learning_rate": 6.656912170923573e-06, "loss": 0.8811, "step": 10815 }, { "epoch": 0.62, "grad_norm": 1.5706884860992432, "learning_rate": 6.6551614973446685e-06, "loss": 0.9244, "step": 10816 }, { "epoch": 0.62, "grad_norm": 1.019797444343567, "learning_rate": 6.653410939186799e-06, "loss": 0.6027, "step": 10817 }, { "epoch": 0.62, "grad_norm": 1.7114776372909546, "learning_rate": 6.65166049651037e-06, "loss": 0.9755, "step": 10818 }, { "epoch": 0.62, "grad_norm": 1.6805094480514526, "learning_rate": 6.6499101693757815e-06, "loss": 0.9309, "step": 10819 }, { "epoch": 0.62, "grad_norm": 1.6730667352676392, "learning_rate": 6.648159957843438e-06, "loss": 0.9252, "step": 10820 }, { "epoch": 0.62, "grad_norm": 1.744978904724121, "learning_rate": 6.64640986197373e-06, "loss": 0.9374, "step": 10821 }, { "epoch": 0.62, "grad_norm": 1.6462736129760742, "learning_rate": 6.6446598818270495e-06, "loss": 0.8708, "step": 10822 }, { "epoch": 0.62, "grad_norm": 1.8557629585266113, "learning_rate": 6.642910017463784e-06, "loss": 1.0394, "step": 10823 }, { "epoch": 0.62, "grad_norm": 1.7058827877044678, "learning_rate": 6.641160268944314e-06, "loss": 0.9515, "step": 10824 }, { "epoch": 0.62, "grad_norm": 1.7320085763931274, "learning_rate": 6.6394106363290235e-06, "loss": 0.9082, "step": 10825 }, { "epoch": 0.62, "grad_norm": 1.74837327003479, "learning_rate": 6.637661119678284e-06, "loss": 0.9166, "step": 10826 }, { "epoch": 0.62, "grad_norm": 1.7110360860824585, "learning_rate": 6.635911719052466e-06, "loss": 0.9835, "step": 10827 }, { "epoch": 0.62, "grad_norm": 1.9405287504196167, "learning_rate": 6.634162434511939e-06, "loss": 0.9671, "step": 10828 }, { "epoch": 0.62, "grad_norm": 1.8028181791305542, "learning_rate": 6.632413266117064e-06, "loss": 0.8949, "step": 10829 }, { "epoch": 0.62, "grad_norm": 1.68267822265625, "learning_rate": 6.6306642139281994e-06, "loss": 0.8916, "step": 10830 }, { "epoch": 0.62, "grad_norm": 1.6243553161621094, "learning_rate": 6.628915278005701e-06, "loss": 0.9176, "step": 10831 }, { "epoch": 0.62, "grad_norm": 1.7617520093917847, "learning_rate": 6.627166458409919e-06, "loss": 0.9344, "step": 10832 }, { "epoch": 0.62, "grad_norm": 2.0125434398651123, "learning_rate": 6.625417755201202e-06, "loss": 1.0592, "step": 10833 }, { "epoch": 0.62, "grad_norm": 1.7374382019042969, "learning_rate": 6.623669168439893e-06, "loss": 0.9634, "step": 10834 }, { "epoch": 0.62, "grad_norm": 1.7962983846664429, "learning_rate": 6.621920698186326e-06, "loss": 0.9429, "step": 10835 }, { "epoch": 0.62, "grad_norm": 1.618694543838501, "learning_rate": 6.620172344500841e-06, "loss": 0.9134, "step": 10836 }, { "epoch": 0.62, "grad_norm": 1.7785835266113281, "learning_rate": 6.618424107443766e-06, "loss": 0.9473, "step": 10837 }, { "epoch": 0.62, "grad_norm": 1.7171711921691895, "learning_rate": 6.616675987075432e-06, "loss": 0.9392, "step": 10838 }, { "epoch": 0.62, "grad_norm": 1.709456443786621, "learning_rate": 6.614927983456156e-06, "loss": 1.0263, "step": 10839 }, { "epoch": 0.62, "grad_norm": 1.7950149774551392, "learning_rate": 6.613180096646261e-06, "loss": 0.9222, "step": 10840 }, { "epoch": 0.62, "grad_norm": 1.7731181383132935, "learning_rate": 6.611432326706061e-06, "loss": 0.8782, "step": 10841 }, { "epoch": 0.62, "grad_norm": 1.9063584804534912, "learning_rate": 6.609684673695864e-06, "loss": 1.0037, "step": 10842 }, { "epoch": 0.62, "grad_norm": 2.1089892387390137, "learning_rate": 6.607937137675981e-06, "loss": 0.9239, "step": 10843 }, { "epoch": 0.62, "grad_norm": 1.9470454454421997, "learning_rate": 6.606189718706711e-06, "loss": 0.9464, "step": 10844 }, { "epoch": 0.62, "grad_norm": 1.0197046995162964, "learning_rate": 6.604442416848351e-06, "loss": 0.5566, "step": 10845 }, { "epoch": 0.62, "grad_norm": 1.7876802682876587, "learning_rate": 6.6026952321612005e-06, "loss": 0.9183, "step": 10846 }, { "epoch": 0.62, "grad_norm": 1.8760367631912231, "learning_rate": 6.6009481647055475e-06, "loss": 0.8508, "step": 10847 }, { "epoch": 0.62, "grad_norm": 1.8140838146209717, "learning_rate": 6.599201214541677e-06, "loss": 1.0108, "step": 10848 }, { "epoch": 0.62, "grad_norm": 1.8265868425369263, "learning_rate": 6.597454381729873e-06, "loss": 0.8781, "step": 10849 }, { "epoch": 0.62, "grad_norm": 1.8629993200302124, "learning_rate": 6.595707666330414e-06, "loss": 0.9431, "step": 10850 }, { "epoch": 0.62, "grad_norm": 1.7512820959091187, "learning_rate": 6.5939610684035745e-06, "loss": 0.9436, "step": 10851 }, { "epoch": 0.62, "grad_norm": 1.6978334188461304, "learning_rate": 6.592214588009624e-06, "loss": 0.9382, "step": 10852 }, { "epoch": 0.62, "grad_norm": 1.7078100442886353, "learning_rate": 6.59046822520883e-06, "loss": 0.9595, "step": 10853 }, { "epoch": 0.62, "grad_norm": 1.0426899194717407, "learning_rate": 6.588721980061452e-06, "loss": 0.5675, "step": 10854 }, { "epoch": 0.62, "grad_norm": 1.7512562274932861, "learning_rate": 6.58697585262775e-06, "loss": 0.9969, "step": 10855 }, { "epoch": 0.62, "grad_norm": 1.7518759965896606, "learning_rate": 6.585229842967977e-06, "loss": 0.8917, "step": 10856 }, { "epoch": 0.62, "grad_norm": 1.8223766088485718, "learning_rate": 6.583483951142384e-06, "loss": 0.9971, "step": 10857 }, { "epoch": 0.62, "grad_norm": 1.8145039081573486, "learning_rate": 6.581738177211215e-06, "loss": 0.9559, "step": 10858 }, { "epoch": 0.62, "grad_norm": 1.8618780374526978, "learning_rate": 6.5799925212347145e-06, "loss": 0.9522, "step": 10859 }, { "epoch": 0.62, "grad_norm": 1.7096915245056152, "learning_rate": 6.578246983273118e-06, "loss": 0.9688, "step": 10860 }, { "epoch": 0.62, "grad_norm": 1.6276496648788452, "learning_rate": 6.576501563386657e-06, "loss": 0.8663, "step": 10861 }, { "epoch": 0.62, "grad_norm": 1.5654850006103516, "learning_rate": 6.574756261635567e-06, "loss": 0.8401, "step": 10862 }, { "epoch": 0.62, "grad_norm": 1.8588556051254272, "learning_rate": 6.573011078080067e-06, "loss": 0.8103, "step": 10863 }, { "epoch": 0.62, "grad_norm": 1.8329823017120361, "learning_rate": 6.571266012780386e-06, "loss": 0.9849, "step": 10864 }, { "epoch": 0.62, "grad_norm": 1.5993702411651611, "learning_rate": 6.569521065796735e-06, "loss": 0.9921, "step": 10865 }, { "epoch": 0.62, "grad_norm": 1.525145173072815, "learning_rate": 6.56777623718933e-06, "loss": 0.9066, "step": 10866 }, { "epoch": 0.62, "grad_norm": 1.8859401941299438, "learning_rate": 6.56603152701838e-06, "loss": 0.8992, "step": 10867 }, { "epoch": 0.62, "grad_norm": 1.6509398221969604, "learning_rate": 6.564286935344088e-06, "loss": 0.9792, "step": 10868 }, { "epoch": 0.62, "grad_norm": 1.602835774421692, "learning_rate": 6.562542462226658e-06, "loss": 0.8529, "step": 10869 }, { "epoch": 0.62, "grad_norm": 1.7459063529968262, "learning_rate": 6.560798107726285e-06, "loss": 0.9177, "step": 10870 }, { "epoch": 0.62, "grad_norm": 1.983681082725525, "learning_rate": 6.559053871903163e-06, "loss": 0.9336, "step": 10871 }, { "epoch": 0.62, "grad_norm": 1.7293874025344849, "learning_rate": 6.55730975481748e-06, "loss": 0.9035, "step": 10872 }, { "epoch": 0.62, "grad_norm": 1.7049914598464966, "learning_rate": 6.55556575652942e-06, "loss": 0.9272, "step": 10873 }, { "epoch": 0.62, "grad_norm": 1.6534440517425537, "learning_rate": 6.553821877099165e-06, "loss": 0.9224, "step": 10874 }, { "epoch": 0.62, "grad_norm": 1.7726149559020996, "learning_rate": 6.55207811658689e-06, "loss": 0.888, "step": 10875 }, { "epoch": 0.62, "grad_norm": 1.0222344398498535, "learning_rate": 6.550334475052767e-06, "loss": 0.5555, "step": 10876 }, { "epoch": 0.62, "grad_norm": 1.712051272392273, "learning_rate": 6.548590952556966e-06, "loss": 0.9751, "step": 10877 }, { "epoch": 0.62, "grad_norm": 1.9366486072540283, "learning_rate": 6.546847549159648e-06, "loss": 1.012, "step": 10878 }, { "epoch": 0.62, "grad_norm": 1.9394659996032715, "learning_rate": 6.545104264920978e-06, "loss": 0.9222, "step": 10879 }, { "epoch": 0.62, "grad_norm": 1.7507514953613281, "learning_rate": 6.543361099901106e-06, "loss": 0.867, "step": 10880 }, { "epoch": 0.62, "grad_norm": 1.7260664701461792, "learning_rate": 6.541618054160191e-06, "loss": 0.8269, "step": 10881 }, { "epoch": 0.62, "grad_norm": 1.7792997360229492, "learning_rate": 6.539875127758373e-06, "loss": 0.9738, "step": 10882 }, { "epoch": 0.62, "grad_norm": 1.8152871131896973, "learning_rate": 6.538132320755799e-06, "loss": 0.9572, "step": 10883 }, { "epoch": 0.62, "grad_norm": 1.764359951019287, "learning_rate": 6.53638963321261e-06, "loss": 0.9687, "step": 10884 }, { "epoch": 0.62, "grad_norm": 1.6781080961227417, "learning_rate": 6.534647065188939e-06, "loss": 0.993, "step": 10885 }, { "epoch": 0.62, "grad_norm": 1.753397822380066, "learning_rate": 6.532904616744918e-06, "loss": 0.9365, "step": 10886 }, { "epoch": 0.62, "grad_norm": 1.9462578296661377, "learning_rate": 6.531162287940672e-06, "loss": 0.9443, "step": 10887 }, { "epoch": 0.62, "grad_norm": 1.6521697044372559, "learning_rate": 6.529420078836327e-06, "loss": 0.9058, "step": 10888 }, { "epoch": 0.62, "grad_norm": 1.6961244344711304, "learning_rate": 6.527677989492001e-06, "loss": 0.8956, "step": 10889 }, { "epoch": 0.62, "grad_norm": 1.67652428150177, "learning_rate": 6.5259360199678046e-06, "loss": 1.0197, "step": 10890 }, { "epoch": 0.62, "grad_norm": 1.6533143520355225, "learning_rate": 6.5241941703238545e-06, "loss": 0.9823, "step": 10891 }, { "epoch": 0.62, "grad_norm": 1.6874151229858398, "learning_rate": 6.5224524406202535e-06, "loss": 1.0069, "step": 10892 }, { "epoch": 0.62, "grad_norm": 1.8553768396377563, "learning_rate": 6.520710830917105e-06, "loss": 0.9092, "step": 10893 }, { "epoch": 0.62, "grad_norm": 1.6393564939498901, "learning_rate": 6.518969341274508e-06, "loss": 0.8662, "step": 10894 }, { "epoch": 0.62, "grad_norm": 1.0096694231033325, "learning_rate": 6.517227971752553e-06, "loss": 0.5339, "step": 10895 }, { "epoch": 0.62, "grad_norm": 1.6071406602859497, "learning_rate": 6.515486722411334e-06, "loss": 0.8773, "step": 10896 }, { "epoch": 0.62, "grad_norm": 1.675369143486023, "learning_rate": 6.513745593310934e-06, "loss": 0.966, "step": 10897 }, { "epoch": 0.63, "grad_norm": 1.8798292875289917, "learning_rate": 6.5120045845114344e-06, "loss": 0.9706, "step": 10898 }, { "epoch": 0.63, "grad_norm": 1.766384243965149, "learning_rate": 6.510263696072914e-06, "loss": 0.9265, "step": 10899 }, { "epoch": 0.63, "grad_norm": 1.820424199104309, "learning_rate": 6.508522928055445e-06, "loss": 1.0018, "step": 10900 }, { "epoch": 0.63, "grad_norm": 1.5814709663391113, "learning_rate": 6.5067822805190976e-06, "loss": 0.9253, "step": 10901 }, { "epoch": 0.63, "grad_norm": 1.6871063709259033, "learning_rate": 6.505041753523932e-06, "loss": 0.9669, "step": 10902 }, { "epoch": 0.63, "grad_norm": 1.6259853839874268, "learning_rate": 6.503301347130015e-06, "loss": 1.0228, "step": 10903 }, { "epoch": 0.63, "grad_norm": 1.807197093963623, "learning_rate": 6.501561061397402e-06, "loss": 0.9348, "step": 10904 }, { "epoch": 0.63, "grad_norm": 1.557220220565796, "learning_rate": 6.499820896386144e-06, "loss": 0.911, "step": 10905 }, { "epoch": 0.63, "grad_norm": 1.5868818759918213, "learning_rate": 6.4980808521562895e-06, "loss": 0.8803, "step": 10906 }, { "epoch": 0.63, "grad_norm": 1.6475913524627686, "learning_rate": 6.496340928767881e-06, "loss": 0.8941, "step": 10907 }, { "epoch": 0.63, "grad_norm": 1.7701905965805054, "learning_rate": 6.494601126280963e-06, "loss": 0.9548, "step": 10908 }, { "epoch": 0.63, "grad_norm": 1.8128606081008911, "learning_rate": 6.492861444755566e-06, "loss": 0.9861, "step": 10909 }, { "epoch": 0.63, "grad_norm": 1.6365362405776978, "learning_rate": 6.491121884251724e-06, "loss": 0.9098, "step": 10910 }, { "epoch": 0.63, "grad_norm": 1.7748537063598633, "learning_rate": 6.489382444829464e-06, "loss": 1.024, "step": 10911 }, { "epoch": 0.63, "grad_norm": 1.6241058111190796, "learning_rate": 6.487643126548811e-06, "loss": 0.8813, "step": 10912 }, { "epoch": 0.63, "grad_norm": 1.8209609985351562, "learning_rate": 6.485903929469782e-06, "loss": 0.8999, "step": 10913 }, { "epoch": 0.63, "grad_norm": 1.6941410303115845, "learning_rate": 6.484164853652391e-06, "loss": 1.0171, "step": 10914 }, { "epoch": 0.63, "grad_norm": 1.8765860795974731, "learning_rate": 6.482425899156647e-06, "loss": 0.8233, "step": 10915 }, { "epoch": 0.63, "grad_norm": 1.7330796718597412, "learning_rate": 6.480687066042562e-06, "loss": 0.8856, "step": 10916 }, { "epoch": 0.63, "grad_norm": 1.7512115240097046, "learning_rate": 6.478948354370136e-06, "loss": 0.9015, "step": 10917 }, { "epoch": 0.63, "grad_norm": 1.5984995365142822, "learning_rate": 6.477209764199366e-06, "loss": 0.9206, "step": 10918 }, { "epoch": 0.63, "grad_norm": 1.857762336730957, "learning_rate": 6.475471295590248e-06, "loss": 0.9192, "step": 10919 }, { "epoch": 0.63, "grad_norm": 1.8268460035324097, "learning_rate": 6.473732948602769e-06, "loss": 1.0099, "step": 10920 }, { "epoch": 0.63, "grad_norm": 1.6284695863723755, "learning_rate": 6.471994723296915e-06, "loss": 0.8808, "step": 10921 }, { "epoch": 0.63, "grad_norm": 1.6751948595046997, "learning_rate": 6.470256619732669e-06, "loss": 0.9119, "step": 10922 }, { "epoch": 0.63, "grad_norm": 1.7086914777755737, "learning_rate": 6.4685186379700075e-06, "loss": 0.8154, "step": 10923 }, { "epoch": 0.63, "grad_norm": 1.6296610832214355, "learning_rate": 6.466780778068903e-06, "loss": 0.9486, "step": 10924 }, { "epoch": 0.63, "grad_norm": 1.778385043144226, "learning_rate": 6.465043040089322e-06, "loss": 0.9376, "step": 10925 }, { "epoch": 0.63, "grad_norm": 2.3001983165740967, "learning_rate": 6.463305424091235e-06, "loss": 0.9382, "step": 10926 }, { "epoch": 0.63, "grad_norm": 1.6163650751113892, "learning_rate": 6.461567930134593e-06, "loss": 0.8851, "step": 10927 }, { "epoch": 0.63, "grad_norm": 1.7785840034484863, "learning_rate": 6.459830558279362e-06, "loss": 0.9041, "step": 10928 }, { "epoch": 0.63, "grad_norm": 1.8891711235046387, "learning_rate": 6.458093308585486e-06, "loss": 0.9212, "step": 10929 }, { "epoch": 0.63, "grad_norm": 1.7698668241500854, "learning_rate": 6.456356181112919e-06, "loss": 0.9251, "step": 10930 }, { "epoch": 0.63, "grad_norm": 1.8802356719970703, "learning_rate": 6.4546191759216e-06, "loss": 0.9468, "step": 10931 }, { "epoch": 0.63, "grad_norm": 1.5903085470199585, "learning_rate": 6.452882293071467e-06, "loss": 1.0076, "step": 10932 }, { "epoch": 0.63, "grad_norm": 1.7616667747497559, "learning_rate": 6.451145532622463e-06, "loss": 0.9344, "step": 10933 }, { "epoch": 0.63, "grad_norm": 1.7080965042114258, "learning_rate": 6.44940889463451e-06, "loss": 0.9825, "step": 10934 }, { "epoch": 0.63, "grad_norm": 1.6940077543258667, "learning_rate": 6.44767237916754e-06, "loss": 0.9852, "step": 10935 }, { "epoch": 0.63, "grad_norm": 1.5831682682037354, "learning_rate": 6.445935986281472e-06, "loss": 0.8523, "step": 10936 }, { "epoch": 0.63, "grad_norm": 1.8369783163070679, "learning_rate": 6.444199716036225e-06, "loss": 0.9479, "step": 10937 }, { "epoch": 0.63, "grad_norm": 1.7305128574371338, "learning_rate": 6.442463568491715e-06, "loss": 0.9108, "step": 10938 }, { "epoch": 0.63, "grad_norm": 1.8118075132369995, "learning_rate": 6.440727543707847e-06, "loss": 0.8433, "step": 10939 }, { "epoch": 0.63, "grad_norm": 1.747452974319458, "learning_rate": 6.438991641744531e-06, "loss": 0.966, "step": 10940 }, { "epoch": 0.63, "grad_norm": 1.0185604095458984, "learning_rate": 6.437255862661664e-06, "loss": 0.6005, "step": 10941 }, { "epoch": 0.63, "grad_norm": 1.8497415781021118, "learning_rate": 6.435520206519148e-06, "loss": 1.0114, "step": 10942 }, { "epoch": 0.63, "grad_norm": 1.7420477867126465, "learning_rate": 6.43378467337687e-06, "loss": 0.8999, "step": 10943 }, { "epoch": 0.63, "grad_norm": 1.77449369430542, "learning_rate": 6.432049263294722e-06, "loss": 0.9797, "step": 10944 }, { "epoch": 0.63, "grad_norm": 1.6068028211593628, "learning_rate": 6.4303139763325874e-06, "loss": 0.8382, "step": 10945 }, { "epoch": 0.63, "grad_norm": 1.8904341459274292, "learning_rate": 6.428578812550346e-06, "loss": 0.8673, "step": 10946 }, { "epoch": 0.63, "grad_norm": 1.0405447483062744, "learning_rate": 6.426843772007873e-06, "loss": 0.5558, "step": 10947 }, { "epoch": 0.63, "grad_norm": 1.9702852964401245, "learning_rate": 6.425108854765041e-06, "loss": 0.822, "step": 10948 }, { "epoch": 0.63, "grad_norm": 1.7094571590423584, "learning_rate": 6.423374060881716e-06, "loss": 0.8728, "step": 10949 }, { "epoch": 0.63, "grad_norm": 2.0238184928894043, "learning_rate": 6.421639390417762e-06, "loss": 0.8342, "step": 10950 }, { "epoch": 0.63, "grad_norm": 1.7719672918319702, "learning_rate": 6.4199048434330355e-06, "loss": 0.8763, "step": 10951 }, { "epoch": 0.63, "grad_norm": 1.7524116039276123, "learning_rate": 6.418170419987393e-06, "loss": 0.9056, "step": 10952 }, { "epoch": 0.63, "grad_norm": 1.7774206399917603, "learning_rate": 6.416436120140684e-06, "loss": 0.9202, "step": 10953 }, { "epoch": 0.63, "grad_norm": 1.8391846418380737, "learning_rate": 6.414701943952755e-06, "loss": 0.9902, "step": 10954 }, { "epoch": 0.63, "grad_norm": 1.7043883800506592, "learning_rate": 6.412967891483446e-06, "loss": 0.8446, "step": 10955 }, { "epoch": 0.63, "grad_norm": 1.6859205961227417, "learning_rate": 6.411233962792593e-06, "loss": 0.8399, "step": 10956 }, { "epoch": 0.63, "grad_norm": 1.781709909439087, "learning_rate": 6.409500157940033e-06, "loss": 0.9385, "step": 10957 }, { "epoch": 0.63, "grad_norm": 1.7395527362823486, "learning_rate": 6.407766476985593e-06, "loss": 0.924, "step": 10958 }, { "epoch": 0.63, "grad_norm": 1.7877651453018188, "learning_rate": 6.406032919989098e-06, "loss": 0.9883, "step": 10959 }, { "epoch": 0.63, "grad_norm": 1.6655069589614868, "learning_rate": 6.404299487010366e-06, "loss": 0.9283, "step": 10960 }, { "epoch": 0.63, "grad_norm": 1.6002076864242554, "learning_rate": 6.402566178109217e-06, "loss": 0.912, "step": 10961 }, { "epoch": 0.63, "grad_norm": 1.7050714492797852, "learning_rate": 6.4008329933454585e-06, "loss": 0.9527, "step": 10962 }, { "epoch": 0.63, "grad_norm": 1.8286172151565552, "learning_rate": 6.399099932778898e-06, "loss": 0.9709, "step": 10963 }, { "epoch": 0.63, "grad_norm": 1.755947232246399, "learning_rate": 6.397366996469343e-06, "loss": 0.9697, "step": 10964 }, { "epoch": 0.63, "grad_norm": 1.0392966270446777, "learning_rate": 6.395634184476589e-06, "loss": 0.523, "step": 10965 }, { "epoch": 0.63, "grad_norm": 1.6759611368179321, "learning_rate": 6.39390149686043e-06, "loss": 0.966, "step": 10966 }, { "epoch": 0.63, "grad_norm": 1.6869038343429565, "learning_rate": 6.392168933680657e-06, "loss": 0.8614, "step": 10967 }, { "epoch": 0.63, "grad_norm": 1.754591941833496, "learning_rate": 6.390436494997055e-06, "loss": 0.8291, "step": 10968 }, { "epoch": 0.63, "grad_norm": 1.8557289838790894, "learning_rate": 6.388704180869407e-06, "loss": 0.8698, "step": 10969 }, { "epoch": 0.63, "grad_norm": 1.7339762449264526, "learning_rate": 6.386971991357491e-06, "loss": 0.966, "step": 10970 }, { "epoch": 0.63, "grad_norm": 1.7179497480392456, "learning_rate": 6.385239926521078e-06, "loss": 1.0051, "step": 10971 }, { "epoch": 0.63, "grad_norm": 1.726916790008545, "learning_rate": 6.383507986419939e-06, "loss": 0.9143, "step": 10972 }, { "epoch": 0.63, "grad_norm": 1.942336916923523, "learning_rate": 6.381776171113837e-06, "loss": 0.9306, "step": 10973 }, { "epoch": 0.63, "grad_norm": 1.8387527465820312, "learning_rate": 6.3800444806625325e-06, "loss": 1.0069, "step": 10974 }, { "epoch": 0.63, "grad_norm": 1.8318450450897217, "learning_rate": 6.378312915125781e-06, "loss": 0.8225, "step": 10975 }, { "epoch": 0.63, "grad_norm": 2.052971839904785, "learning_rate": 6.376581474563332e-06, "loss": 0.8635, "step": 10976 }, { "epoch": 0.63, "grad_norm": 1.8037197589874268, "learning_rate": 6.3748501590349374e-06, "loss": 0.9644, "step": 10977 }, { "epoch": 0.63, "grad_norm": 1.8193291425704956, "learning_rate": 6.373118968600336e-06, "loss": 0.8925, "step": 10978 }, { "epoch": 0.63, "grad_norm": 1.7070635557174683, "learning_rate": 6.371387903319268e-06, "loss": 0.9196, "step": 10979 }, { "epoch": 0.63, "grad_norm": 1.756906509399414, "learning_rate": 6.369656963251467e-06, "loss": 1.0098, "step": 10980 }, { "epoch": 0.63, "grad_norm": 1.69398832321167, "learning_rate": 6.367926148456663e-06, "loss": 0.9388, "step": 10981 }, { "epoch": 0.63, "grad_norm": 1.7069170475006104, "learning_rate": 6.366195458994581e-06, "loss": 1.0062, "step": 10982 }, { "epoch": 0.63, "grad_norm": 1.7974534034729004, "learning_rate": 6.3644648949249444e-06, "loss": 0.9866, "step": 10983 }, { "epoch": 0.63, "grad_norm": 1.713573932647705, "learning_rate": 6.362734456307469e-06, "loss": 1.0169, "step": 10984 }, { "epoch": 0.63, "grad_norm": 1.866715431213379, "learning_rate": 6.3610041432018675e-06, "loss": 0.8631, "step": 10985 }, { "epoch": 0.63, "grad_norm": 1.7902175188064575, "learning_rate": 6.359273955667847e-06, "loss": 0.8962, "step": 10986 }, { "epoch": 0.63, "grad_norm": 1.731526255607605, "learning_rate": 6.3575438937651126e-06, "loss": 0.9179, "step": 10987 }, { "epoch": 0.63, "grad_norm": 1.8109718561172485, "learning_rate": 6.355813957553364e-06, "loss": 0.9641, "step": 10988 }, { "epoch": 0.63, "grad_norm": 1.0731029510498047, "learning_rate": 6.354084147092296e-06, "loss": 0.6011, "step": 10989 }, { "epoch": 0.63, "grad_norm": 1.8114094734191895, "learning_rate": 6.352354462441599e-06, "loss": 0.943, "step": 10990 }, { "epoch": 0.63, "grad_norm": 1.0237623453140259, "learning_rate": 6.350624903660961e-06, "loss": 0.5365, "step": 10991 }, { "epoch": 0.63, "grad_norm": 1.690920352935791, "learning_rate": 6.3488954708100635e-06, "loss": 0.9045, "step": 10992 }, { "epoch": 0.63, "grad_norm": 1.8189454078674316, "learning_rate": 6.347166163948581e-06, "loss": 0.896, "step": 10993 }, { "epoch": 0.63, "grad_norm": 1.7014495134353638, "learning_rate": 6.345436983136195e-06, "loss": 0.954, "step": 10994 }, { "epoch": 0.63, "grad_norm": 1.827431321144104, "learning_rate": 6.343707928432566e-06, "loss": 0.8499, "step": 10995 }, { "epoch": 0.63, "grad_norm": 2.02494215965271, "learning_rate": 6.3419789998973655e-06, "loss": 0.9032, "step": 10996 }, { "epoch": 0.63, "grad_norm": 1.9038704633712769, "learning_rate": 6.340250197590252e-06, "loss": 0.9574, "step": 10997 }, { "epoch": 0.63, "grad_norm": 1.9325326681137085, "learning_rate": 6.338521521570878e-06, "loss": 0.9384, "step": 10998 }, { "epoch": 0.63, "grad_norm": 1.8828848600387573, "learning_rate": 6.336792971898902e-06, "loss": 0.9209, "step": 10999 }, { "epoch": 0.63, "grad_norm": 1.1130417585372925, "learning_rate": 6.335064548633967e-06, "loss": 0.5687, "step": 11000 }, { "epoch": 0.63, "grad_norm": 1.816176414489746, "learning_rate": 6.333336251835715e-06, "loss": 0.9401, "step": 11001 }, { "epoch": 0.63, "grad_norm": 1.798572063446045, "learning_rate": 6.331608081563789e-06, "loss": 0.9238, "step": 11002 }, { "epoch": 0.63, "grad_norm": 1.5433313846588135, "learning_rate": 6.3298800378778205e-06, "loss": 0.9521, "step": 11003 }, { "epoch": 0.63, "grad_norm": 1.7677944898605347, "learning_rate": 6.328152120837438e-06, "loss": 0.8213, "step": 11004 }, { "epoch": 0.63, "grad_norm": 1.8854432106018066, "learning_rate": 6.326424330502271e-06, "loss": 0.9288, "step": 11005 }, { "epoch": 0.63, "grad_norm": 1.8492082357406616, "learning_rate": 6.324696666931938e-06, "loss": 1.0141, "step": 11006 }, { "epoch": 0.63, "grad_norm": 1.8113558292388916, "learning_rate": 6.322969130186057e-06, "loss": 0.9787, "step": 11007 }, { "epoch": 0.63, "grad_norm": 1.7282055616378784, "learning_rate": 6.3212417203242386e-06, "loss": 0.9875, "step": 11008 }, { "epoch": 0.63, "grad_norm": 1.9558500051498413, "learning_rate": 6.319514437406092e-06, "loss": 0.9387, "step": 11009 }, { "epoch": 0.63, "grad_norm": 1.7584469318389893, "learning_rate": 6.317787281491221e-06, "loss": 0.9657, "step": 11010 }, { "epoch": 0.63, "grad_norm": 1.7344539165496826, "learning_rate": 6.316060252639226e-06, "loss": 0.9068, "step": 11011 }, { "epoch": 0.63, "grad_norm": 1.7521001100540161, "learning_rate": 6.314333350909701e-06, "loss": 0.9043, "step": 11012 }, { "epoch": 0.63, "grad_norm": 1.7542681694030762, "learning_rate": 6.312606576362237e-06, "loss": 0.9057, "step": 11013 }, { "epoch": 0.63, "grad_norm": 1.663023829460144, "learning_rate": 6.3108799290564195e-06, "loss": 0.9462, "step": 11014 }, { "epoch": 0.63, "grad_norm": 1.7715116739273071, "learning_rate": 6.30915340905183e-06, "loss": 0.8449, "step": 11015 }, { "epoch": 0.63, "grad_norm": 1.9177113771438599, "learning_rate": 6.307427016408048e-06, "loss": 0.9331, "step": 11016 }, { "epoch": 0.63, "grad_norm": 1.8698233366012573, "learning_rate": 6.3057007511846425e-06, "loss": 0.9832, "step": 11017 }, { "epoch": 0.63, "grad_norm": 1.887035608291626, "learning_rate": 6.303974613441186e-06, "loss": 0.9607, "step": 11018 }, { "epoch": 0.63, "grad_norm": 1.9032979011535645, "learning_rate": 6.30224860323724e-06, "loss": 0.8288, "step": 11019 }, { "epoch": 0.63, "grad_norm": 1.7999794483184814, "learning_rate": 6.300522720632367e-06, "loss": 0.9558, "step": 11020 }, { "epoch": 0.63, "grad_norm": 1.743431568145752, "learning_rate": 6.29879696568612e-06, "loss": 0.9257, "step": 11021 }, { "epoch": 0.63, "grad_norm": 1.869232177734375, "learning_rate": 6.297071338458049e-06, "loss": 0.9035, "step": 11022 }, { "epoch": 0.63, "grad_norm": 1.8955055475234985, "learning_rate": 6.295345839007705e-06, "loss": 0.966, "step": 11023 }, { "epoch": 0.63, "grad_norm": 1.8623287677764893, "learning_rate": 6.293620467394626e-06, "loss": 0.9151, "step": 11024 }, { "epoch": 0.63, "grad_norm": 1.7950202226638794, "learning_rate": 6.291895223678352e-06, "loss": 1.0403, "step": 11025 }, { "epoch": 0.63, "grad_norm": 1.7138208150863647, "learning_rate": 6.290170107918416e-06, "loss": 0.9177, "step": 11026 }, { "epoch": 0.63, "grad_norm": 1.7432208061218262, "learning_rate": 6.2884451201743465e-06, "loss": 0.9432, "step": 11027 }, { "epoch": 0.63, "grad_norm": 1.6791489124298096, "learning_rate": 6.286720260505667e-06, "loss": 0.9159, "step": 11028 }, { "epoch": 0.63, "grad_norm": 1.7297539710998535, "learning_rate": 6.2849955289719015e-06, "loss": 0.9047, "step": 11029 }, { "epoch": 0.63, "grad_norm": 1.9915313720703125, "learning_rate": 6.283270925632561e-06, "loss": 0.9114, "step": 11030 }, { "epoch": 0.63, "grad_norm": 1.877548336982727, "learning_rate": 6.281546450547158e-06, "loss": 1.0277, "step": 11031 }, { "epoch": 0.63, "grad_norm": 1.6026054620742798, "learning_rate": 6.279822103775202e-06, "loss": 0.8753, "step": 11032 }, { "epoch": 0.63, "grad_norm": 1.8172279596328735, "learning_rate": 6.278097885376191e-06, "loss": 0.941, "step": 11033 }, { "epoch": 0.63, "grad_norm": 1.755049467086792, "learning_rate": 6.276373795409626e-06, "loss": 0.9646, "step": 11034 }, { "epoch": 0.63, "grad_norm": 1.6424330472946167, "learning_rate": 6.274649833934998e-06, "loss": 1.0022, "step": 11035 }, { "epoch": 0.63, "grad_norm": 1.8425216674804688, "learning_rate": 6.2729260010117995e-06, "loss": 0.9213, "step": 11036 }, { "epoch": 0.63, "grad_norm": 1.7436614036560059, "learning_rate": 6.271202296699515e-06, "loss": 0.8983, "step": 11037 }, { "epoch": 0.63, "grad_norm": 1.8171215057373047, "learning_rate": 6.269478721057621e-06, "loss": 0.9871, "step": 11038 }, { "epoch": 0.63, "grad_norm": 1.7208151817321777, "learning_rate": 6.267755274145597e-06, "loss": 0.8681, "step": 11039 }, { "epoch": 0.63, "grad_norm": 1.785952091217041, "learning_rate": 6.266031956022913e-06, "loss": 0.9223, "step": 11040 }, { "epoch": 0.63, "grad_norm": 2.1018598079681396, "learning_rate": 6.264308766749034e-06, "loss": 0.9146, "step": 11041 }, { "epoch": 0.63, "grad_norm": 1.9064139127731323, "learning_rate": 6.262585706383426e-06, "loss": 0.9593, "step": 11042 }, { "epoch": 0.63, "grad_norm": 1.6764744520187378, "learning_rate": 6.260862774985545e-06, "loss": 0.9406, "step": 11043 }, { "epoch": 0.63, "grad_norm": 0.9635815620422363, "learning_rate": 6.259139972614845e-06, "loss": 0.5402, "step": 11044 }, { "epoch": 0.63, "grad_norm": 1.744985580444336, "learning_rate": 6.257417299330775e-06, "loss": 0.9671, "step": 11045 }, { "epoch": 0.63, "grad_norm": 1.869409441947937, "learning_rate": 6.2556947551927786e-06, "loss": 0.935, "step": 11046 }, { "epoch": 0.63, "grad_norm": 1.692609429359436, "learning_rate": 6.253972340260295e-06, "loss": 0.9935, "step": 11047 }, { "epoch": 0.63, "grad_norm": 1.7915390729904175, "learning_rate": 6.2522500545927635e-06, "loss": 0.9857, "step": 11048 }, { "epoch": 0.63, "grad_norm": 1.8140815496444702, "learning_rate": 6.2505278982496146e-06, "loss": 0.9292, "step": 11049 }, { "epoch": 0.63, "grad_norm": 1.841794729232788, "learning_rate": 6.248805871290274e-06, "loss": 0.8641, "step": 11050 }, { "epoch": 0.63, "grad_norm": 1.6097465753555298, "learning_rate": 6.247083973774164e-06, "loss": 0.9056, "step": 11051 }, { "epoch": 0.63, "grad_norm": 1.6902005672454834, "learning_rate": 6.245362205760703e-06, "loss": 0.9938, "step": 11052 }, { "epoch": 0.63, "grad_norm": 1.6900423765182495, "learning_rate": 6.2436405673093035e-06, "loss": 0.995, "step": 11053 }, { "epoch": 0.63, "grad_norm": 1.7228493690490723, "learning_rate": 6.2419190584793755e-06, "loss": 0.9032, "step": 11054 }, { "epoch": 0.63, "grad_norm": 1.713371753692627, "learning_rate": 6.240197679330324e-06, "loss": 1.0024, "step": 11055 }, { "epoch": 0.63, "grad_norm": 1.67291259765625, "learning_rate": 6.238476429921547e-06, "loss": 0.9291, "step": 11056 }, { "epoch": 0.63, "grad_norm": 0.9409734606742859, "learning_rate": 6.236755310312441e-06, "loss": 0.5372, "step": 11057 }, { "epoch": 0.63, "grad_norm": 1.72934889793396, "learning_rate": 6.235034320562396e-06, "loss": 0.9588, "step": 11058 }, { "epoch": 0.63, "grad_norm": 1.783906102180481, "learning_rate": 6.2333134607308e-06, "loss": 0.9355, "step": 11059 }, { "epoch": 0.63, "grad_norm": 1.0282846689224243, "learning_rate": 6.231592730877035e-06, "loss": 0.5902, "step": 11060 }, { "epoch": 0.63, "grad_norm": 1.7783414125442505, "learning_rate": 6.229872131060477e-06, "loss": 0.8947, "step": 11061 }, { "epoch": 0.63, "grad_norm": 1.770006537437439, "learning_rate": 6.228151661340503e-06, "loss": 0.9197, "step": 11062 }, { "epoch": 0.63, "grad_norm": 1.6996607780456543, "learning_rate": 6.226431321776476e-06, "loss": 0.8681, "step": 11063 }, { "epoch": 0.63, "grad_norm": 1.5794992446899414, "learning_rate": 6.224711112427764e-06, "loss": 0.9598, "step": 11064 }, { "epoch": 0.63, "grad_norm": 1.8020678758621216, "learning_rate": 6.2229910333537256e-06, "loss": 0.9207, "step": 11065 }, { "epoch": 0.63, "grad_norm": 1.8647137880325317, "learning_rate": 6.221271084613718e-06, "loss": 1.0036, "step": 11066 }, { "epoch": 0.63, "grad_norm": 1.6966720819473267, "learning_rate": 6.219551266267088e-06, "loss": 0.9952, "step": 11067 }, { "epoch": 0.63, "grad_norm": 1.490164875984192, "learning_rate": 6.217831578373185e-06, "loss": 0.8968, "step": 11068 }, { "epoch": 0.63, "grad_norm": 1.6140415668487549, "learning_rate": 6.2161120209913475e-06, "loss": 0.9231, "step": 11069 }, { "epoch": 0.63, "grad_norm": 1.9712891578674316, "learning_rate": 6.214392594180915e-06, "loss": 0.9862, "step": 11070 }, { "epoch": 0.63, "grad_norm": 1.9715535640716553, "learning_rate": 6.212673298001221e-06, "loss": 0.937, "step": 11071 }, { "epoch": 0.63, "grad_norm": 1.711273431777954, "learning_rate": 6.2109541325115905e-06, "loss": 0.9432, "step": 11072 }, { "epoch": 0.64, "grad_norm": 1.7534865140914917, "learning_rate": 6.209235097771349e-06, "loss": 0.9281, "step": 11073 }, { "epoch": 0.64, "grad_norm": 1.7710973024368286, "learning_rate": 6.207516193839815e-06, "loss": 0.8858, "step": 11074 }, { "epoch": 0.64, "grad_norm": 1.2353761196136475, "learning_rate": 6.205797420776303e-06, "loss": 0.5681, "step": 11075 }, { "epoch": 0.64, "grad_norm": 1.7199963331222534, "learning_rate": 6.204078778640121e-06, "loss": 0.9494, "step": 11076 }, { "epoch": 0.64, "grad_norm": 1.8881731033325195, "learning_rate": 6.2023602674905795e-06, "loss": 0.8742, "step": 11077 }, { "epoch": 0.64, "grad_norm": 1.774097204208374, "learning_rate": 6.2006418873869776e-06, "loss": 0.936, "step": 11078 }, { "epoch": 0.64, "grad_norm": 1.7970454692840576, "learning_rate": 6.19892363838861e-06, "loss": 0.8906, "step": 11079 }, { "epoch": 0.64, "grad_norm": 1.851100206375122, "learning_rate": 6.1972055205547696e-06, "loss": 0.9278, "step": 11080 }, { "epoch": 0.64, "grad_norm": 1.7701952457427979, "learning_rate": 6.195487533944745e-06, "loss": 0.8708, "step": 11081 }, { "epoch": 0.64, "grad_norm": 1.9664692878723145, "learning_rate": 6.1937696786178184e-06, "loss": 0.9535, "step": 11082 }, { "epoch": 0.64, "grad_norm": 1.7868226766586304, "learning_rate": 6.192051954633267e-06, "loss": 0.9718, "step": 11083 }, { "epoch": 0.64, "grad_norm": 1.709617257118225, "learning_rate": 6.190334362050365e-06, "loss": 0.8835, "step": 11084 }, { "epoch": 0.64, "grad_norm": 1.8134889602661133, "learning_rate": 6.188616900928384e-06, "loss": 0.9123, "step": 11085 }, { "epoch": 0.64, "grad_norm": 2.1057851314544678, "learning_rate": 6.186899571326586e-06, "loss": 1.0112, "step": 11086 }, { "epoch": 0.64, "grad_norm": 1.7677581310272217, "learning_rate": 6.185182373304233e-06, "loss": 0.9094, "step": 11087 }, { "epoch": 0.64, "grad_norm": 1.849670648574829, "learning_rate": 6.183465306920578e-06, "loss": 0.9055, "step": 11088 }, { "epoch": 0.64, "grad_norm": 1.5702879428863525, "learning_rate": 6.181748372234875e-06, "loss": 0.8569, "step": 11089 }, { "epoch": 0.64, "grad_norm": 1.9833319187164307, "learning_rate": 6.180031569306371e-06, "loss": 1.0073, "step": 11090 }, { "epoch": 0.64, "grad_norm": 1.8712981939315796, "learning_rate": 6.178314898194305e-06, "loss": 0.9958, "step": 11091 }, { "epoch": 0.64, "grad_norm": 1.6491199731826782, "learning_rate": 6.1765983589579185e-06, "loss": 0.8409, "step": 11092 }, { "epoch": 0.64, "grad_norm": 1.8637315034866333, "learning_rate": 6.1748819516564414e-06, "loss": 0.9626, "step": 11093 }, { "epoch": 0.64, "grad_norm": 1.911102056503296, "learning_rate": 6.173165676349103e-06, "loss": 0.8994, "step": 11094 }, { "epoch": 0.64, "grad_norm": 1.8287254571914673, "learning_rate": 6.1714495330951285e-06, "loss": 0.934, "step": 11095 }, { "epoch": 0.64, "grad_norm": 1.856987714767456, "learning_rate": 6.169733521953735e-06, "loss": 1.0073, "step": 11096 }, { "epoch": 0.64, "grad_norm": 1.696885585784912, "learning_rate": 6.168017642984139e-06, "loss": 0.9138, "step": 11097 }, { "epoch": 0.64, "grad_norm": 1.845579743385315, "learning_rate": 6.166301896245549e-06, "loss": 0.8685, "step": 11098 }, { "epoch": 0.64, "grad_norm": 1.6699053049087524, "learning_rate": 6.164586281797171e-06, "loss": 0.945, "step": 11099 }, { "epoch": 0.64, "grad_norm": 1.6427404880523682, "learning_rate": 6.162870799698209e-06, "loss": 0.881, "step": 11100 }, { "epoch": 0.64, "grad_norm": 1.8081845045089722, "learning_rate": 6.161155450007853e-06, "loss": 0.9336, "step": 11101 }, { "epoch": 0.64, "grad_norm": 1.63100004196167, "learning_rate": 6.159440232785301e-06, "loss": 0.8434, "step": 11102 }, { "epoch": 0.64, "grad_norm": 1.61729896068573, "learning_rate": 6.1577251480897394e-06, "loss": 1.0031, "step": 11103 }, { "epoch": 0.64, "grad_norm": 1.6938660144805908, "learning_rate": 6.15601019598035e-06, "loss": 0.8987, "step": 11104 }, { "epoch": 0.64, "grad_norm": 1.8394631147384644, "learning_rate": 6.1542953765163105e-06, "loss": 0.897, "step": 11105 }, { "epoch": 0.64, "grad_norm": 1.7753878831863403, "learning_rate": 6.152580689756795e-06, "loss": 0.9828, "step": 11106 }, { "epoch": 0.64, "grad_norm": 1.822212815284729, "learning_rate": 6.150866135760973e-06, "loss": 0.9443, "step": 11107 }, { "epoch": 0.64, "grad_norm": 1.7607523202896118, "learning_rate": 6.149151714588009e-06, "loss": 0.9835, "step": 11108 }, { "epoch": 0.64, "grad_norm": 1.9261929988861084, "learning_rate": 6.14743742629706e-06, "loss": 0.9983, "step": 11109 }, { "epoch": 0.64, "grad_norm": 1.727971076965332, "learning_rate": 6.1457232709472854e-06, "loss": 0.889, "step": 11110 }, { "epoch": 0.64, "grad_norm": 1.9376394748687744, "learning_rate": 6.1440092485978355e-06, "loss": 0.9913, "step": 11111 }, { "epoch": 0.64, "grad_norm": 1.7738347053527832, "learning_rate": 6.1422953593078535e-06, "loss": 0.9228, "step": 11112 }, { "epoch": 0.64, "grad_norm": 1.7786391973495483, "learning_rate": 6.140581603136482e-06, "loss": 0.943, "step": 11113 }, { "epoch": 0.64, "grad_norm": 1.676505446434021, "learning_rate": 6.138867980142859e-06, "loss": 0.9668, "step": 11114 }, { "epoch": 0.64, "grad_norm": 2.002361536026001, "learning_rate": 6.137154490386117e-06, "loss": 0.9506, "step": 11115 }, { "epoch": 0.64, "grad_norm": 1.7543835639953613, "learning_rate": 6.135441133925382e-06, "loss": 0.9386, "step": 11116 }, { "epoch": 0.64, "grad_norm": 1.8200215101242065, "learning_rate": 6.13372791081978e-06, "loss": 0.9455, "step": 11117 }, { "epoch": 0.64, "grad_norm": 1.7570029497146606, "learning_rate": 6.132014821128427e-06, "loss": 1.0108, "step": 11118 }, { "epoch": 0.64, "grad_norm": 1.802232027053833, "learning_rate": 6.130301864910437e-06, "loss": 0.9386, "step": 11119 }, { "epoch": 0.64, "grad_norm": 1.6663905382156372, "learning_rate": 6.128589042224922e-06, "loss": 0.9714, "step": 11120 }, { "epoch": 0.64, "grad_norm": 1.7160100936889648, "learning_rate": 6.126876353130984e-06, "loss": 0.8989, "step": 11121 }, { "epoch": 0.64, "grad_norm": 1.9042409658432007, "learning_rate": 6.125163797687723e-06, "loss": 0.9002, "step": 11122 }, { "epoch": 0.64, "grad_norm": 1.8753575086593628, "learning_rate": 6.123451375954235e-06, "loss": 0.9573, "step": 11123 }, { "epoch": 0.64, "grad_norm": 1.7204365730285645, "learning_rate": 6.121739087989613e-06, "loss": 0.931, "step": 11124 }, { "epoch": 0.64, "grad_norm": 1.9006472826004028, "learning_rate": 6.120026933852939e-06, "loss": 0.9898, "step": 11125 }, { "epoch": 0.64, "grad_norm": 1.850385069847107, "learning_rate": 6.118314913603299e-06, "loss": 1.0244, "step": 11126 }, { "epoch": 0.64, "grad_norm": 1.7779486179351807, "learning_rate": 6.116603027299769e-06, "loss": 0.8601, "step": 11127 }, { "epoch": 0.64, "grad_norm": 2.093890428543091, "learning_rate": 6.114891275001417e-06, "loss": 0.9856, "step": 11128 }, { "epoch": 0.64, "grad_norm": 1.6598483324050903, "learning_rate": 6.113179656767319e-06, "loss": 0.8929, "step": 11129 }, { "epoch": 0.64, "grad_norm": 1.772676706314087, "learning_rate": 6.111468172656529e-06, "loss": 0.8801, "step": 11130 }, { "epoch": 0.64, "grad_norm": 1.7685588598251343, "learning_rate": 6.109756822728114e-06, "loss": 1.0091, "step": 11131 }, { "epoch": 0.64, "grad_norm": 1.6190587282180786, "learning_rate": 6.108045607041124e-06, "loss": 0.8415, "step": 11132 }, { "epoch": 0.64, "grad_norm": 1.8606104850769043, "learning_rate": 6.106334525654608e-06, "loss": 0.9118, "step": 11133 }, { "epoch": 0.64, "grad_norm": 1.846361517906189, "learning_rate": 6.1046235786276105e-06, "loss": 1.0228, "step": 11134 }, { "epoch": 0.64, "grad_norm": 1.0187033414840698, "learning_rate": 6.102912766019173e-06, "loss": 0.5528, "step": 11135 }, { "epoch": 0.64, "grad_norm": 1.6623220443725586, "learning_rate": 6.101202087888329e-06, "loss": 0.9492, "step": 11136 }, { "epoch": 0.64, "grad_norm": 1.8402243852615356, "learning_rate": 6.099491544294111e-06, "loss": 0.9341, "step": 11137 }, { "epoch": 0.64, "grad_norm": 1.6684446334838867, "learning_rate": 6.097781135295543e-06, "loss": 0.9616, "step": 11138 }, { "epoch": 0.64, "grad_norm": 1.869888424873352, "learning_rate": 6.096070860951648e-06, "loss": 0.8574, "step": 11139 }, { "epoch": 0.64, "grad_norm": 1.659745454788208, "learning_rate": 6.094360721321443e-06, "loss": 1.0251, "step": 11140 }, { "epoch": 0.64, "grad_norm": 1.5677752494812012, "learning_rate": 6.092650716463939e-06, "loss": 0.8079, "step": 11141 }, { "epoch": 0.64, "grad_norm": 1.7860305309295654, "learning_rate": 6.090940846438143e-06, "loss": 0.9112, "step": 11142 }, { "epoch": 0.64, "grad_norm": 1.7747595310211182, "learning_rate": 6.08923111130306e-06, "loss": 0.8739, "step": 11143 }, { "epoch": 0.64, "grad_norm": 1.6891639232635498, "learning_rate": 6.087521511117686e-06, "loss": 0.8964, "step": 11144 }, { "epoch": 0.64, "grad_norm": 1.9224579334259033, "learning_rate": 6.085812045941018e-06, "loss": 0.9193, "step": 11145 }, { "epoch": 0.64, "grad_norm": 1.7775453329086304, "learning_rate": 6.084102715832041e-06, "loss": 0.9303, "step": 11146 }, { "epoch": 0.64, "grad_norm": 1.6949049234390259, "learning_rate": 6.08239352084974e-06, "loss": 0.8649, "step": 11147 }, { "epoch": 0.64, "grad_norm": 1.6505948305130005, "learning_rate": 6.080684461053096e-06, "loss": 1.0045, "step": 11148 }, { "epoch": 0.64, "grad_norm": 1.5866869688034058, "learning_rate": 6.0789755365010834e-06, "loss": 0.8772, "step": 11149 }, { "epoch": 0.64, "grad_norm": 1.8161391019821167, "learning_rate": 6.077266747252672e-06, "loss": 0.9599, "step": 11150 }, { "epoch": 0.64, "grad_norm": 1.048730731010437, "learning_rate": 6.0755580933668265e-06, "loss": 0.5741, "step": 11151 }, { "epoch": 0.64, "grad_norm": 1.7437841892242432, "learning_rate": 6.073849574902509e-06, "loss": 0.9133, "step": 11152 }, { "epoch": 0.64, "grad_norm": 1.804383635520935, "learning_rate": 6.0721411919186766e-06, "loss": 0.918, "step": 11153 }, { "epoch": 0.64, "grad_norm": 1.7442551851272583, "learning_rate": 6.070432944474276e-06, "loss": 0.9426, "step": 11154 }, { "epoch": 0.64, "grad_norm": 1.6498570442199707, "learning_rate": 6.068724832628261e-06, "loss": 0.8492, "step": 11155 }, { "epoch": 0.64, "grad_norm": 1.7418148517608643, "learning_rate": 6.06701685643957e-06, "loss": 0.871, "step": 11156 }, { "epoch": 0.64, "grad_norm": 1.6323784589767456, "learning_rate": 6.065309015967141e-06, "loss": 0.9127, "step": 11157 }, { "epoch": 0.64, "grad_norm": 1.6228750944137573, "learning_rate": 6.063601311269906e-06, "loss": 0.9608, "step": 11158 }, { "epoch": 0.64, "grad_norm": 1.959595799446106, "learning_rate": 6.061893742406795e-06, "loss": 0.911, "step": 11159 }, { "epoch": 0.64, "grad_norm": 1.590744972229004, "learning_rate": 6.06018630943673e-06, "loss": 0.965, "step": 11160 }, { "epoch": 0.64, "grad_norm": 1.7135217189788818, "learning_rate": 6.05847901241863e-06, "loss": 0.8576, "step": 11161 }, { "epoch": 0.64, "grad_norm": 1.6816157102584839, "learning_rate": 6.05677185141141e-06, "loss": 0.8943, "step": 11162 }, { "epoch": 0.64, "grad_norm": 1.673656702041626, "learning_rate": 6.0550648264739776e-06, "loss": 0.9347, "step": 11163 }, { "epoch": 0.64, "grad_norm": 1.862610936164856, "learning_rate": 6.053357937665237e-06, "loss": 0.9805, "step": 11164 }, { "epoch": 0.64, "grad_norm": 1.6045889854431152, "learning_rate": 6.051651185044091e-06, "loss": 0.8789, "step": 11165 }, { "epoch": 0.64, "grad_norm": 1.777668833732605, "learning_rate": 6.049944568669432e-06, "loss": 0.9633, "step": 11166 }, { "epoch": 0.64, "grad_norm": 1.8737283945083618, "learning_rate": 6.048238088600151e-06, "loss": 0.9442, "step": 11167 }, { "epoch": 0.64, "grad_norm": 1.9231356382369995, "learning_rate": 6.046531744895136e-06, "loss": 0.9518, "step": 11168 }, { "epoch": 0.64, "grad_norm": 2.009995698928833, "learning_rate": 6.044825537613268e-06, "loss": 0.9372, "step": 11169 }, { "epoch": 0.64, "grad_norm": 1.8889410495758057, "learning_rate": 6.0431194668134226e-06, "loss": 0.9116, "step": 11170 }, { "epoch": 0.64, "grad_norm": 2.0155773162841797, "learning_rate": 6.04141353255447e-06, "loss": 0.9155, "step": 11171 }, { "epoch": 0.64, "grad_norm": 1.9353569746017456, "learning_rate": 6.039707734895279e-06, "loss": 0.9612, "step": 11172 }, { "epoch": 0.64, "grad_norm": 1.7433983087539673, "learning_rate": 6.038002073894712e-06, "loss": 0.9868, "step": 11173 }, { "epoch": 0.64, "grad_norm": 1.7999604940414429, "learning_rate": 6.036296549611627e-06, "loss": 0.9284, "step": 11174 }, { "epoch": 0.64, "grad_norm": 1.7205383777618408, "learning_rate": 6.034591162104873e-06, "loss": 0.9225, "step": 11175 }, { "epoch": 0.64, "grad_norm": 1.6665748357772827, "learning_rate": 6.032885911433303e-06, "loss": 0.9616, "step": 11176 }, { "epoch": 0.64, "grad_norm": 1.7283823490142822, "learning_rate": 6.031180797655758e-06, "loss": 0.94, "step": 11177 }, { "epoch": 0.64, "grad_norm": 1.857397198677063, "learning_rate": 6.029475820831077e-06, "loss": 0.8904, "step": 11178 }, { "epoch": 0.64, "grad_norm": 1.7711527347564697, "learning_rate": 6.027770981018093e-06, "loss": 0.9774, "step": 11179 }, { "epoch": 0.64, "grad_norm": 1.0171586275100708, "learning_rate": 6.026066278275638e-06, "loss": 0.5762, "step": 11180 }, { "epoch": 0.64, "grad_norm": 1.5514590740203857, "learning_rate": 6.024361712662534e-06, "loss": 0.8537, "step": 11181 }, { "epoch": 0.64, "grad_norm": 1.6581367254257202, "learning_rate": 6.022657284237603e-06, "loss": 0.872, "step": 11182 }, { "epoch": 0.64, "grad_norm": 1.8713977336883545, "learning_rate": 6.020952993059659e-06, "loss": 0.8437, "step": 11183 }, { "epoch": 0.64, "grad_norm": 2.0557243824005127, "learning_rate": 6.0192488391875125e-06, "loss": 0.9067, "step": 11184 }, { "epoch": 0.64, "grad_norm": 1.955741047859192, "learning_rate": 6.017544822679968e-06, "loss": 0.9168, "step": 11185 }, { "epoch": 0.64, "grad_norm": 1.8321826457977295, "learning_rate": 6.015840943595828e-06, "loss": 0.9073, "step": 11186 }, { "epoch": 0.64, "grad_norm": 1.8450034856796265, "learning_rate": 6.014137201993886e-06, "loss": 0.9441, "step": 11187 }, { "epoch": 0.64, "grad_norm": 1.584324598312378, "learning_rate": 6.0124335979329365e-06, "loss": 0.8551, "step": 11188 }, { "epoch": 0.64, "grad_norm": 1.6888806819915771, "learning_rate": 6.0107301314717635e-06, "loss": 0.9328, "step": 11189 }, { "epoch": 0.64, "grad_norm": 1.745956540107727, "learning_rate": 6.009026802669151e-06, "loss": 0.8763, "step": 11190 }, { "epoch": 0.64, "grad_norm": 1.7399253845214844, "learning_rate": 6.007323611583873e-06, "loss": 0.9309, "step": 11191 }, { "epoch": 0.64, "grad_norm": 2.072075128555298, "learning_rate": 6.005620558274707e-06, "loss": 0.8344, "step": 11192 }, { "epoch": 0.64, "grad_norm": 1.795037865638733, "learning_rate": 6.003917642800416e-06, "loss": 0.8578, "step": 11193 }, { "epoch": 0.64, "grad_norm": 1.6517858505249023, "learning_rate": 6.002214865219764e-06, "loss": 0.9073, "step": 11194 }, { "epoch": 0.64, "grad_norm": 1.7928987741470337, "learning_rate": 6.00051222559151e-06, "loss": 0.9194, "step": 11195 }, { "epoch": 0.64, "grad_norm": 1.718496561050415, "learning_rate": 5.998809723974407e-06, "loss": 0.9122, "step": 11196 }, { "epoch": 0.64, "grad_norm": 1.8300230503082275, "learning_rate": 5.997107360427205e-06, "loss": 0.9925, "step": 11197 }, { "epoch": 0.64, "grad_norm": 1.6812857389450073, "learning_rate": 5.995405135008645e-06, "loss": 0.8515, "step": 11198 }, { "epoch": 0.64, "grad_norm": 1.677461862564087, "learning_rate": 5.993703047777468e-06, "loss": 0.8564, "step": 11199 }, { "epoch": 0.64, "grad_norm": 1.7871259450912476, "learning_rate": 5.9920010987924086e-06, "loss": 0.9006, "step": 11200 }, { "epoch": 0.64, "grad_norm": 1.9717799425125122, "learning_rate": 5.9902992881121955e-06, "loss": 1.0088, "step": 11201 }, { "epoch": 0.64, "grad_norm": 2.0505146980285645, "learning_rate": 5.988597615795553e-06, "loss": 0.9313, "step": 11202 }, { "epoch": 0.64, "grad_norm": 1.7951393127441406, "learning_rate": 5.9868960819012e-06, "loss": 1.0173, "step": 11203 }, { "epoch": 0.64, "grad_norm": 0.9893182516098022, "learning_rate": 5.985194686487854e-06, "loss": 0.5617, "step": 11204 }, { "epoch": 0.64, "grad_norm": 1.9469119310379028, "learning_rate": 5.983493429614224e-06, "loss": 0.978, "step": 11205 }, { "epoch": 0.64, "grad_norm": 1.8285976648330688, "learning_rate": 5.981792311339017e-06, "loss": 0.9528, "step": 11206 }, { "epoch": 0.64, "grad_norm": 1.7404613494873047, "learning_rate": 5.980091331720933e-06, "loss": 0.9591, "step": 11207 }, { "epoch": 0.64, "grad_norm": 1.718595266342163, "learning_rate": 5.978390490818665e-06, "loss": 0.9273, "step": 11208 }, { "epoch": 0.64, "grad_norm": 1.6811379194259644, "learning_rate": 5.97668978869091e-06, "loss": 0.9081, "step": 11209 }, { "epoch": 0.64, "grad_norm": 1.9186893701553345, "learning_rate": 5.974989225396352e-06, "loss": 0.9442, "step": 11210 }, { "epoch": 0.64, "grad_norm": 1.7086790800094604, "learning_rate": 5.973288800993672e-06, "loss": 0.8348, "step": 11211 }, { "epoch": 0.64, "grad_norm": 1.8945207595825195, "learning_rate": 5.971588515541547e-06, "loss": 0.9188, "step": 11212 }, { "epoch": 0.64, "grad_norm": 1.758755087852478, "learning_rate": 5.969888369098649e-06, "loss": 0.9017, "step": 11213 }, { "epoch": 0.64, "grad_norm": 1.7454659938812256, "learning_rate": 5.968188361723647e-06, "loss": 0.9104, "step": 11214 }, { "epoch": 0.64, "grad_norm": 1.8722522258758545, "learning_rate": 5.9664884934752025e-06, "loss": 0.9755, "step": 11215 }, { "epoch": 0.64, "grad_norm": 1.843132734298706, "learning_rate": 5.964788764411971e-06, "loss": 0.9215, "step": 11216 }, { "epoch": 0.64, "grad_norm": 1.9044537544250488, "learning_rate": 5.963089174592609e-06, "loss": 0.9293, "step": 11217 }, { "epoch": 0.64, "grad_norm": 1.5448360443115234, "learning_rate": 5.961389724075761e-06, "loss": 0.9017, "step": 11218 }, { "epoch": 0.64, "grad_norm": 1.8351337909698486, "learning_rate": 5.959690412920074e-06, "loss": 0.9585, "step": 11219 }, { "epoch": 0.64, "grad_norm": 1.7059670686721802, "learning_rate": 5.957991241184184e-06, "loss": 0.9345, "step": 11220 }, { "epoch": 0.64, "grad_norm": 1.7572273015975952, "learning_rate": 5.956292208926724e-06, "loss": 0.9058, "step": 11221 }, { "epoch": 0.64, "grad_norm": 1.060110092163086, "learning_rate": 5.954593316206325e-06, "loss": 0.5581, "step": 11222 }, { "epoch": 0.64, "grad_norm": 1.689358115196228, "learning_rate": 5.952894563081612e-06, "loss": 0.9642, "step": 11223 }, { "epoch": 0.64, "grad_norm": 2.02638578414917, "learning_rate": 5.9511959496112015e-06, "loss": 0.9481, "step": 11224 }, { "epoch": 0.64, "grad_norm": 1.7780299186706543, "learning_rate": 5.949497475853709e-06, "loss": 0.9089, "step": 11225 }, { "epoch": 0.64, "grad_norm": 1.793791651725769, "learning_rate": 5.947799141867744e-06, "loss": 0.9441, "step": 11226 }, { "epoch": 0.64, "grad_norm": 1.588212013244629, "learning_rate": 5.94610094771191e-06, "loss": 0.8989, "step": 11227 }, { "epoch": 0.64, "grad_norm": 1.7925142049789429, "learning_rate": 5.9444028934448105e-06, "loss": 0.985, "step": 11228 }, { "epoch": 0.64, "grad_norm": 1.7412967681884766, "learning_rate": 5.942704979125037e-06, "loss": 0.9236, "step": 11229 }, { "epoch": 0.64, "grad_norm": 1.7654411792755127, "learning_rate": 5.941007204811181e-06, "loss": 0.9671, "step": 11230 }, { "epoch": 0.64, "grad_norm": 1.689968228340149, "learning_rate": 5.939309570561828e-06, "loss": 0.8924, "step": 11231 }, { "epoch": 0.64, "grad_norm": 1.7111839056015015, "learning_rate": 5.9376120764355595e-06, "loss": 0.9579, "step": 11232 }, { "epoch": 0.64, "grad_norm": 1.9688478708267212, "learning_rate": 5.935914722490947e-06, "loss": 0.9566, "step": 11233 }, { "epoch": 0.64, "grad_norm": 1.7676457166671753, "learning_rate": 5.934217508786569e-06, "loss": 0.9576, "step": 11234 }, { "epoch": 0.64, "grad_norm": 1.965472936630249, "learning_rate": 5.932520435380986e-06, "loss": 1.0277, "step": 11235 }, { "epoch": 0.64, "grad_norm": 1.7442251443862915, "learning_rate": 5.930823502332761e-06, "loss": 0.9822, "step": 11236 }, { "epoch": 0.64, "grad_norm": 1.8437994718551636, "learning_rate": 5.92912670970045e-06, "loss": 0.9122, "step": 11237 }, { "epoch": 0.64, "grad_norm": 1.6816394329071045, "learning_rate": 5.9274300575426045e-06, "loss": 0.9191, "step": 11238 }, { "epoch": 0.64, "grad_norm": 1.7000749111175537, "learning_rate": 5.925733545917771e-06, "loss": 0.842, "step": 11239 }, { "epoch": 0.64, "grad_norm": 1.6760202646255493, "learning_rate": 5.924037174884494e-06, "loss": 0.878, "step": 11240 }, { "epoch": 0.64, "grad_norm": 1.844265341758728, "learning_rate": 5.922340944501306e-06, "loss": 0.9125, "step": 11241 }, { "epoch": 0.64, "grad_norm": 1.7644356489181519, "learning_rate": 5.920644854826742e-06, "loss": 0.8731, "step": 11242 }, { "epoch": 0.64, "grad_norm": 1.7012150287628174, "learning_rate": 5.918948905919331e-06, "loss": 0.9895, "step": 11243 }, { "epoch": 0.64, "grad_norm": 1.0596972703933716, "learning_rate": 5.91725309783759e-06, "loss": 0.6, "step": 11244 }, { "epoch": 0.64, "grad_norm": 1.8861899375915527, "learning_rate": 5.91555743064004e-06, "loss": 0.9187, "step": 11245 }, { "epoch": 0.64, "grad_norm": 1.0559172630310059, "learning_rate": 5.913861904385194e-06, "loss": 0.5721, "step": 11246 }, { "epoch": 0.65, "grad_norm": 1.706436038017273, "learning_rate": 5.912166519131561e-06, "loss": 0.8855, "step": 11247 }, { "epoch": 0.65, "grad_norm": 1.769168734550476, "learning_rate": 5.910471274937643e-06, "loss": 1.0049, "step": 11248 }, { "epoch": 0.65, "grad_norm": 1.6045399904251099, "learning_rate": 5.908776171861937e-06, "loss": 0.9566, "step": 11249 }, { "epoch": 0.65, "grad_norm": 1.6776833534240723, "learning_rate": 5.907081209962937e-06, "loss": 0.9508, "step": 11250 }, { "epoch": 0.65, "grad_norm": 1.7636966705322266, "learning_rate": 5.9053863892991304e-06, "loss": 0.931, "step": 11251 }, { "epoch": 0.65, "grad_norm": 0.9879350066184998, "learning_rate": 5.903691709929002e-06, "loss": 0.5479, "step": 11252 }, { "epoch": 0.65, "grad_norm": 1.6759867668151855, "learning_rate": 5.901997171911032e-06, "loss": 0.9567, "step": 11253 }, { "epoch": 0.65, "grad_norm": 1.9727058410644531, "learning_rate": 5.90030277530369e-06, "loss": 0.8964, "step": 11254 }, { "epoch": 0.65, "grad_norm": 1.6429152488708496, "learning_rate": 5.898608520165448e-06, "loss": 0.9155, "step": 11255 }, { "epoch": 0.65, "grad_norm": 1.7586252689361572, "learning_rate": 5.896914406554768e-06, "loss": 1.0172, "step": 11256 }, { "epoch": 0.65, "grad_norm": 1.696723222732544, "learning_rate": 5.89522043453011e-06, "loss": 0.9038, "step": 11257 }, { "epoch": 0.65, "grad_norm": 1.055777668952942, "learning_rate": 5.893526604149931e-06, "loss": 0.5995, "step": 11258 }, { "epoch": 0.65, "grad_norm": 1.6775919198989868, "learning_rate": 5.891832915472676e-06, "loss": 0.9389, "step": 11259 }, { "epoch": 0.65, "grad_norm": 1.6754018068313599, "learning_rate": 5.8901393685567906e-06, "loss": 0.8429, "step": 11260 }, { "epoch": 0.65, "grad_norm": 1.7547240257263184, "learning_rate": 5.888445963460716e-06, "loss": 1.0018, "step": 11261 }, { "epoch": 0.65, "grad_norm": 1.7485063076019287, "learning_rate": 5.886752700242886e-06, "loss": 0.9531, "step": 11262 }, { "epoch": 0.65, "grad_norm": 1.6518371105194092, "learning_rate": 5.885059578961732e-06, "loss": 0.9516, "step": 11263 }, { "epoch": 0.65, "grad_norm": 1.7253373861312866, "learning_rate": 5.883366599675675e-06, "loss": 0.9161, "step": 11264 }, { "epoch": 0.65, "grad_norm": 1.7492817640304565, "learning_rate": 5.881673762443138e-06, "loss": 0.8991, "step": 11265 }, { "epoch": 0.65, "grad_norm": 1.7205696105957031, "learning_rate": 5.879981067322538e-06, "loss": 1.0574, "step": 11266 }, { "epoch": 0.65, "grad_norm": 1.75700044631958, "learning_rate": 5.878288514372281e-06, "loss": 0.9142, "step": 11267 }, { "epoch": 0.65, "grad_norm": 1.5620605945587158, "learning_rate": 5.8765961036507734e-06, "loss": 0.8509, "step": 11268 }, { "epoch": 0.65, "grad_norm": 1.6875847578048706, "learning_rate": 5.874903835216417e-06, "loss": 0.9441, "step": 11269 }, { "epoch": 0.65, "grad_norm": 1.839728593826294, "learning_rate": 5.873211709127604e-06, "loss": 0.9261, "step": 11270 }, { "epoch": 0.65, "grad_norm": 1.8331587314605713, "learning_rate": 5.871519725442729e-06, "loss": 0.9048, "step": 11271 }, { "epoch": 0.65, "grad_norm": 1.7244642972946167, "learning_rate": 5.869827884220176e-06, "loss": 0.9746, "step": 11272 }, { "epoch": 0.65, "grad_norm": 1.7994509935379028, "learning_rate": 5.868136185518325e-06, "loss": 0.9162, "step": 11273 }, { "epoch": 0.65, "grad_norm": 1.6378540992736816, "learning_rate": 5.866444629395551e-06, "loss": 0.8494, "step": 11274 }, { "epoch": 0.65, "grad_norm": 1.6356014013290405, "learning_rate": 5.864753215910227e-06, "loss": 0.9277, "step": 11275 }, { "epoch": 0.65, "grad_norm": 1.8897268772125244, "learning_rate": 5.863061945120719e-06, "loss": 0.9266, "step": 11276 }, { "epoch": 0.65, "grad_norm": 1.7455523014068604, "learning_rate": 5.8613708170853875e-06, "loss": 0.8983, "step": 11277 }, { "epoch": 0.65, "grad_norm": 1.8324429988861084, "learning_rate": 5.859679831862588e-06, "loss": 0.9782, "step": 11278 }, { "epoch": 0.65, "grad_norm": 1.7036374807357788, "learning_rate": 5.857988989510672e-06, "loss": 0.9329, "step": 11279 }, { "epoch": 0.65, "grad_norm": 1.8639886379241943, "learning_rate": 5.856298290087985e-06, "loss": 0.9633, "step": 11280 }, { "epoch": 0.65, "grad_norm": 1.6167266368865967, "learning_rate": 5.854607733652871e-06, "loss": 0.9092, "step": 11281 }, { "epoch": 0.65, "grad_norm": 1.9042736291885376, "learning_rate": 5.852917320263662e-06, "loss": 0.9026, "step": 11282 }, { "epoch": 0.65, "grad_norm": 1.7621804475784302, "learning_rate": 5.8512270499786925e-06, "loss": 0.9477, "step": 11283 }, { "epoch": 0.65, "grad_norm": 1.0519490242004395, "learning_rate": 5.849536922856289e-06, "loss": 0.5061, "step": 11284 }, { "epoch": 0.65, "grad_norm": 1.7745466232299805, "learning_rate": 5.847846938954773e-06, "loss": 0.9707, "step": 11285 }, { "epoch": 0.65, "grad_norm": 1.7042334079742432, "learning_rate": 5.846157098332459e-06, "loss": 1.0151, "step": 11286 }, { "epoch": 0.65, "grad_norm": 1.7382415533065796, "learning_rate": 5.8444674010476595e-06, "loss": 0.9163, "step": 11287 }, { "epoch": 0.65, "grad_norm": 1.9140769243240356, "learning_rate": 5.842777847158682e-06, "loss": 0.9885, "step": 11288 }, { "epoch": 0.65, "grad_norm": 1.7565152645111084, "learning_rate": 5.841088436723832e-06, "loss": 0.9545, "step": 11289 }, { "epoch": 0.65, "grad_norm": 1.0483756065368652, "learning_rate": 5.839399169801399e-06, "loss": 0.5986, "step": 11290 }, { "epoch": 0.65, "grad_norm": 1.8989804983139038, "learning_rate": 5.837710046449681e-06, "loss": 0.8871, "step": 11291 }, { "epoch": 0.65, "grad_norm": 1.8919768333435059, "learning_rate": 5.836021066726962e-06, "loss": 0.9383, "step": 11292 }, { "epoch": 0.65, "grad_norm": 1.6916006803512573, "learning_rate": 5.8343322306915215e-06, "loss": 0.9034, "step": 11293 }, { "epoch": 0.65, "grad_norm": 1.6258602142333984, "learning_rate": 5.832643538401641e-06, "loss": 0.8699, "step": 11294 }, { "epoch": 0.65, "grad_norm": 1.8095002174377441, "learning_rate": 5.830954989915593e-06, "loss": 0.9066, "step": 11295 }, { "epoch": 0.65, "grad_norm": 1.7301563024520874, "learning_rate": 5.82926658529164e-06, "loss": 0.9352, "step": 11296 }, { "epoch": 0.65, "grad_norm": 1.8340225219726562, "learning_rate": 5.827578324588049e-06, "loss": 0.8658, "step": 11297 }, { "epoch": 0.65, "grad_norm": 1.8023529052734375, "learning_rate": 5.825890207863072e-06, "loss": 0.946, "step": 11298 }, { "epoch": 0.65, "grad_norm": 1.1031925678253174, "learning_rate": 5.824202235174967e-06, "loss": 0.5817, "step": 11299 }, { "epoch": 0.65, "grad_norm": 1.8104435205459595, "learning_rate": 5.822514406581975e-06, "loss": 0.9293, "step": 11300 }, { "epoch": 0.65, "grad_norm": 1.8191719055175781, "learning_rate": 5.820826722142345e-06, "loss": 0.8759, "step": 11301 }, { "epoch": 0.65, "grad_norm": 1.9445056915283203, "learning_rate": 5.819139181914307e-06, "loss": 0.9718, "step": 11302 }, { "epoch": 0.65, "grad_norm": 1.6088212728500366, "learning_rate": 5.817451785956101e-06, "loss": 0.8821, "step": 11303 }, { "epoch": 0.65, "grad_norm": 1.774735689163208, "learning_rate": 5.815764534325947e-06, "loss": 0.8943, "step": 11304 }, { "epoch": 0.65, "grad_norm": 1.787113070487976, "learning_rate": 5.81407742708207e-06, "loss": 0.9508, "step": 11305 }, { "epoch": 0.65, "grad_norm": 1.6502835750579834, "learning_rate": 5.812390464282694e-06, "loss": 0.9513, "step": 11306 }, { "epoch": 0.65, "grad_norm": 1.7536396980285645, "learning_rate": 5.810703645986018e-06, "loss": 0.9259, "step": 11307 }, { "epoch": 0.65, "grad_norm": 1.833302617073059, "learning_rate": 5.8090169722502634e-06, "loss": 0.9006, "step": 11308 }, { "epoch": 0.65, "grad_norm": 1.7453045845031738, "learning_rate": 5.807330443133621e-06, "loss": 0.9877, "step": 11309 }, { "epoch": 0.65, "grad_norm": 1.8242830038070679, "learning_rate": 5.805644058694297e-06, "loss": 0.9277, "step": 11310 }, { "epoch": 0.65, "grad_norm": 1.7830101251602173, "learning_rate": 5.803957818990478e-06, "loss": 0.9388, "step": 11311 }, { "epoch": 0.65, "grad_norm": 1.7013764381408691, "learning_rate": 5.802271724080355e-06, "loss": 1.0082, "step": 11312 }, { "epoch": 0.65, "grad_norm": 1.6380224227905273, "learning_rate": 5.800585774022107e-06, "loss": 0.8738, "step": 11313 }, { "epoch": 0.65, "grad_norm": 1.618857979774475, "learning_rate": 5.7988999688739165e-06, "loss": 0.9265, "step": 11314 }, { "epoch": 0.65, "grad_norm": 1.052559733390808, "learning_rate": 5.797214308693948e-06, "loss": 0.5528, "step": 11315 }, { "epoch": 0.65, "grad_norm": 1.8657804727554321, "learning_rate": 5.7955287935403795e-06, "loss": 0.9717, "step": 11316 }, { "epoch": 0.65, "grad_norm": 1.9440536499023438, "learning_rate": 5.793843423471361e-06, "loss": 0.9188, "step": 11317 }, { "epoch": 0.65, "grad_norm": 1.7049248218536377, "learning_rate": 5.792158198545059e-06, "loss": 0.8873, "step": 11318 }, { "epoch": 0.65, "grad_norm": 1.775940179824829, "learning_rate": 5.790473118819626e-06, "loss": 0.8851, "step": 11319 }, { "epoch": 0.65, "grad_norm": 1.7453688383102417, "learning_rate": 5.788788184353203e-06, "loss": 0.9521, "step": 11320 }, { "epoch": 0.65, "grad_norm": 1.721278429031372, "learning_rate": 5.7871033952039416e-06, "loss": 1.0033, "step": 11321 }, { "epoch": 0.65, "grad_norm": 1.8560482263565063, "learning_rate": 5.785418751429968e-06, "loss": 0.9488, "step": 11322 }, { "epoch": 0.65, "grad_norm": 1.8506866693496704, "learning_rate": 5.783734253089426e-06, "loss": 1.0243, "step": 11323 }, { "epoch": 0.65, "grad_norm": 1.7493692636489868, "learning_rate": 5.782049900240432e-06, "loss": 0.8892, "step": 11324 }, { "epoch": 0.65, "grad_norm": 1.66787588596344, "learning_rate": 5.780365692941118e-06, "loss": 0.8545, "step": 11325 }, { "epoch": 0.65, "grad_norm": 1.833383321762085, "learning_rate": 5.778681631249588e-06, "loss": 0.8914, "step": 11326 }, { "epoch": 0.65, "grad_norm": 1.8372529745101929, "learning_rate": 5.776997715223972e-06, "loss": 0.9183, "step": 11327 }, { "epoch": 0.65, "grad_norm": 1.8468822240829468, "learning_rate": 5.775313944922365e-06, "loss": 1.0248, "step": 11328 }, { "epoch": 0.65, "grad_norm": 2.1548287868499756, "learning_rate": 5.773630320402875e-06, "loss": 0.9246, "step": 11329 }, { "epoch": 0.65, "grad_norm": 1.88059663772583, "learning_rate": 5.771946841723594e-06, "loss": 0.9688, "step": 11330 }, { "epoch": 0.65, "grad_norm": 2.030954599380493, "learning_rate": 5.77026350894262e-06, "loss": 0.9839, "step": 11331 }, { "epoch": 0.65, "grad_norm": 0.9893274903297424, "learning_rate": 5.768580322118034e-06, "loss": 0.5852, "step": 11332 }, { "epoch": 0.65, "grad_norm": 1.6069986820220947, "learning_rate": 5.766897281307924e-06, "loss": 1.0112, "step": 11333 }, { "epoch": 0.65, "grad_norm": 1.8937959671020508, "learning_rate": 5.765214386570361e-06, "loss": 0.9403, "step": 11334 }, { "epoch": 0.65, "grad_norm": 1.613581895828247, "learning_rate": 5.76353163796342e-06, "loss": 0.8648, "step": 11335 }, { "epoch": 0.65, "grad_norm": 0.9452673196792603, "learning_rate": 5.761849035545171e-06, "loss": 0.5137, "step": 11336 }, { "epoch": 0.65, "grad_norm": 1.7590184211730957, "learning_rate": 5.760166579373671e-06, "loss": 0.8906, "step": 11337 }, { "epoch": 0.65, "grad_norm": 1.6628491878509521, "learning_rate": 5.758484269506981e-06, "loss": 0.9672, "step": 11338 }, { "epoch": 0.65, "grad_norm": 1.7405368089675903, "learning_rate": 5.756802106003148e-06, "loss": 0.938, "step": 11339 }, { "epoch": 0.65, "grad_norm": 1.6419308185577393, "learning_rate": 5.755120088920225e-06, "loss": 0.9541, "step": 11340 }, { "epoch": 0.65, "grad_norm": 2.029478073120117, "learning_rate": 5.753438218316245e-06, "loss": 0.9222, "step": 11341 }, { "epoch": 0.65, "grad_norm": 1.9166191816329956, "learning_rate": 5.751756494249255e-06, "loss": 0.8825, "step": 11342 }, { "epoch": 0.65, "grad_norm": 1.964041829109192, "learning_rate": 5.7500749167772775e-06, "loss": 0.954, "step": 11343 }, { "epoch": 0.65, "grad_norm": 1.697587013244629, "learning_rate": 5.7483934859583465e-06, "loss": 0.8927, "step": 11344 }, { "epoch": 0.65, "grad_norm": 1.761136531829834, "learning_rate": 5.746712201850476e-06, "loss": 0.9865, "step": 11345 }, { "epoch": 0.65, "grad_norm": 1.7736387252807617, "learning_rate": 5.74503106451169e-06, "loss": 0.9655, "step": 11346 }, { "epoch": 0.65, "grad_norm": 1.6465517282485962, "learning_rate": 5.743350073999994e-06, "loss": 0.8737, "step": 11347 }, { "epoch": 0.65, "grad_norm": 1.7512249946594238, "learning_rate": 5.741669230373394e-06, "loss": 0.94, "step": 11348 }, { "epoch": 0.65, "grad_norm": 1.8223719596862793, "learning_rate": 5.739988533689899e-06, "loss": 0.9007, "step": 11349 }, { "epoch": 0.65, "grad_norm": 1.6956031322479248, "learning_rate": 5.738307984007495e-06, "loss": 0.9315, "step": 11350 }, { "epoch": 0.65, "grad_norm": 1.8070241212844849, "learning_rate": 5.736627581384182e-06, "loss": 0.9475, "step": 11351 }, { "epoch": 0.65, "grad_norm": 1.7960282564163208, "learning_rate": 5.73494732587794e-06, "loss": 0.9441, "step": 11352 }, { "epoch": 0.65, "grad_norm": 1.7883763313293457, "learning_rate": 5.7332672175467545e-06, "loss": 0.9634, "step": 11353 }, { "epoch": 0.65, "grad_norm": 1.6098722219467163, "learning_rate": 5.731587256448594e-06, "loss": 0.9032, "step": 11354 }, { "epoch": 0.65, "grad_norm": 1.7845321893692017, "learning_rate": 5.729907442641438e-06, "loss": 0.8938, "step": 11355 }, { "epoch": 0.65, "grad_norm": 1.8283565044403076, "learning_rate": 5.728227776183244e-06, "loss": 1.0146, "step": 11356 }, { "epoch": 0.65, "grad_norm": 1.741353154182434, "learning_rate": 5.726548257131981e-06, "loss": 0.9553, "step": 11357 }, { "epoch": 0.65, "grad_norm": 1.688572883605957, "learning_rate": 5.724868885545597e-06, "loss": 0.939, "step": 11358 }, { "epoch": 0.65, "grad_norm": 1.776901125907898, "learning_rate": 5.723189661482045e-06, "loss": 0.9235, "step": 11359 }, { "epoch": 0.65, "grad_norm": 1.8392395973205566, "learning_rate": 5.721510584999275e-06, "loss": 0.9456, "step": 11360 }, { "epoch": 0.65, "grad_norm": 1.6833420991897583, "learning_rate": 5.719831656155219e-06, "loss": 0.9358, "step": 11361 }, { "epoch": 0.65, "grad_norm": 1.7693593502044678, "learning_rate": 5.718152875007821e-06, "loss": 0.8412, "step": 11362 }, { "epoch": 0.65, "grad_norm": 1.7292180061340332, "learning_rate": 5.716474241615002e-06, "loss": 0.967, "step": 11363 }, { "epoch": 0.65, "grad_norm": 2.0668320655822754, "learning_rate": 5.7147957560346955e-06, "loss": 0.86, "step": 11364 }, { "epoch": 0.65, "grad_norm": 1.7753098011016846, "learning_rate": 5.713117418324814e-06, "loss": 0.9726, "step": 11365 }, { "epoch": 0.65, "grad_norm": 1.8005441427230835, "learning_rate": 5.711439228543278e-06, "loss": 0.9117, "step": 11366 }, { "epoch": 0.65, "grad_norm": 1.7748939990997314, "learning_rate": 5.7097611867479915e-06, "loss": 1.0715, "step": 11367 }, { "epoch": 0.65, "grad_norm": 1.6896727085113525, "learning_rate": 5.708083292996867e-06, "loss": 0.8891, "step": 11368 }, { "epoch": 0.65, "grad_norm": 1.5829453468322754, "learning_rate": 5.7064055473477934e-06, "loss": 0.875, "step": 11369 }, { "epoch": 0.65, "grad_norm": 1.6841682195663452, "learning_rate": 5.704727949858675e-06, "loss": 0.8664, "step": 11370 }, { "epoch": 0.65, "grad_norm": 1.752638578414917, "learning_rate": 5.703050500587393e-06, "loss": 0.8691, "step": 11371 }, { "epoch": 0.65, "grad_norm": 1.6753618717193604, "learning_rate": 5.7013731995918355e-06, "loss": 0.9747, "step": 11372 }, { "epoch": 0.65, "grad_norm": 1.8384405374526978, "learning_rate": 5.699696046929885e-06, "loss": 0.943, "step": 11373 }, { "epoch": 0.65, "grad_norm": 1.9088551998138428, "learning_rate": 5.698019042659407e-06, "loss": 0.9491, "step": 11374 }, { "epoch": 0.65, "grad_norm": 1.9232667684555054, "learning_rate": 5.696342186838279e-06, "loss": 0.9104, "step": 11375 }, { "epoch": 0.65, "grad_norm": 1.7725820541381836, "learning_rate": 5.694665479524357e-06, "loss": 0.9058, "step": 11376 }, { "epoch": 0.65, "grad_norm": 1.647047996520996, "learning_rate": 5.692988920775506e-06, "loss": 0.8647, "step": 11377 }, { "epoch": 0.65, "grad_norm": 1.7829203605651855, "learning_rate": 5.6913125106495725e-06, "loss": 0.952, "step": 11378 }, { "epoch": 0.65, "grad_norm": 1.854789137840271, "learning_rate": 5.689636249204412e-06, "loss": 0.9181, "step": 11379 }, { "epoch": 0.65, "grad_norm": 1.8685446977615356, "learning_rate": 5.687960136497861e-06, "loss": 0.9711, "step": 11380 }, { "epoch": 0.65, "grad_norm": 1.8763785362243652, "learning_rate": 5.686284172587764e-06, "loss": 0.9196, "step": 11381 }, { "epoch": 0.65, "grad_norm": 1.6788727045059204, "learning_rate": 5.684608357531946e-06, "loss": 0.9355, "step": 11382 }, { "epoch": 0.65, "grad_norm": 1.7597674131393433, "learning_rate": 5.682932691388239e-06, "loss": 0.8707, "step": 11383 }, { "epoch": 0.65, "grad_norm": 1.816584825515747, "learning_rate": 5.68125717421447e-06, "loss": 0.957, "step": 11384 }, { "epoch": 0.65, "grad_norm": 1.0189027786254883, "learning_rate": 5.679581806068448e-06, "loss": 0.5032, "step": 11385 }, { "epoch": 0.65, "grad_norm": 1.7265815734863281, "learning_rate": 5.677906587007993e-06, "loss": 0.932, "step": 11386 }, { "epoch": 0.65, "grad_norm": 1.7486203908920288, "learning_rate": 5.676231517090904e-06, "loss": 0.9954, "step": 11387 }, { "epoch": 0.65, "grad_norm": 1.6739150285720825, "learning_rate": 5.674556596374993e-06, "loss": 0.8794, "step": 11388 }, { "epoch": 0.65, "grad_norm": 1.6435792446136475, "learning_rate": 5.672881824918046e-06, "loss": 0.952, "step": 11389 }, { "epoch": 0.65, "grad_norm": 1.8992124795913696, "learning_rate": 5.671207202777864e-06, "loss": 0.9311, "step": 11390 }, { "epoch": 0.65, "grad_norm": 1.759995698928833, "learning_rate": 5.669532730012226e-06, "loss": 0.9304, "step": 11391 }, { "epoch": 0.65, "grad_norm": 1.8289308547973633, "learning_rate": 5.667858406678915e-06, "loss": 0.8461, "step": 11392 }, { "epoch": 0.65, "grad_norm": 1.7310559749603271, "learning_rate": 5.666184232835711e-06, "loss": 1.0165, "step": 11393 }, { "epoch": 0.65, "grad_norm": 1.6345224380493164, "learning_rate": 5.664510208540386e-06, "loss": 0.8365, "step": 11394 }, { "epoch": 0.65, "grad_norm": 1.841046690940857, "learning_rate": 5.6628363338506995e-06, "loss": 0.9026, "step": 11395 }, { "epoch": 0.65, "grad_norm": 1.867016315460205, "learning_rate": 5.66116260882442e-06, "loss": 0.9462, "step": 11396 }, { "epoch": 0.65, "grad_norm": 1.76585054397583, "learning_rate": 5.659489033519294e-06, "loss": 1.0184, "step": 11397 }, { "epoch": 0.65, "grad_norm": 1.5102393627166748, "learning_rate": 5.6578156079930824e-06, "loss": 0.8637, "step": 11398 }, { "epoch": 0.65, "grad_norm": 1.6155517101287842, "learning_rate": 5.656142332303518e-06, "loss": 0.8823, "step": 11399 }, { "epoch": 0.65, "grad_norm": 1.677160382270813, "learning_rate": 5.65446920650835e-06, "loss": 0.9642, "step": 11400 }, { "epoch": 0.65, "grad_norm": 1.8145761489868164, "learning_rate": 5.652796230665314e-06, "loss": 0.9221, "step": 11401 }, { "epoch": 0.65, "grad_norm": 1.5694677829742432, "learning_rate": 5.6511234048321325e-06, "loss": 0.8843, "step": 11402 }, { "epoch": 0.65, "grad_norm": 1.7132309675216675, "learning_rate": 5.649450729066539e-06, "loss": 0.9187, "step": 11403 }, { "epoch": 0.65, "grad_norm": 1.7981895208358765, "learning_rate": 5.647778203426244e-06, "loss": 0.9496, "step": 11404 }, { "epoch": 0.65, "grad_norm": 1.7436301708221436, "learning_rate": 5.6461058279689685e-06, "loss": 0.9728, "step": 11405 }, { "epoch": 0.65, "grad_norm": 1.0067760944366455, "learning_rate": 5.644433602752416e-06, "loss": 0.5576, "step": 11406 }, { "epoch": 0.65, "grad_norm": 1.7562768459320068, "learning_rate": 5.642761527834297e-06, "loss": 0.996, "step": 11407 }, { "epoch": 0.65, "grad_norm": 1.8708735704421997, "learning_rate": 5.641089603272301e-06, "loss": 1.0431, "step": 11408 }, { "epoch": 0.65, "grad_norm": 1.6377204656600952, "learning_rate": 5.639417829124132e-06, "loss": 0.8273, "step": 11409 }, { "epoch": 0.65, "grad_norm": 1.7471342086791992, "learning_rate": 5.637746205447469e-06, "loss": 0.915, "step": 11410 }, { "epoch": 0.65, "grad_norm": 1.9942532777786255, "learning_rate": 5.636074732300002e-06, "loss": 0.9953, "step": 11411 }, { "epoch": 0.65, "grad_norm": 1.6542710065841675, "learning_rate": 5.634403409739402e-06, "loss": 0.9299, "step": 11412 }, { "epoch": 0.65, "grad_norm": 1.836714744567871, "learning_rate": 5.632732237823346e-06, "loss": 0.966, "step": 11413 }, { "epoch": 0.65, "grad_norm": 1.7244693040847778, "learning_rate": 5.6310612166095055e-06, "loss": 0.9559, "step": 11414 }, { "epoch": 0.65, "grad_norm": 1.7735915184020996, "learning_rate": 5.629390346155533e-06, "loss": 0.9378, "step": 11415 }, { "epoch": 0.65, "grad_norm": 1.6782585382461548, "learning_rate": 5.627719626519096e-06, "loss": 0.9366, "step": 11416 }, { "epoch": 0.65, "grad_norm": 1.5590078830718994, "learning_rate": 5.6260490577578365e-06, "loss": 0.857, "step": 11417 }, { "epoch": 0.65, "grad_norm": 1.706998586654663, "learning_rate": 5.624378639929411e-06, "loss": 0.9245, "step": 11418 }, { "epoch": 0.65, "grad_norm": 1.7812827825546265, "learning_rate": 5.62270837309145e-06, "loss": 0.831, "step": 11419 }, { "epoch": 0.65, "grad_norm": 1.6757922172546387, "learning_rate": 5.621038257301601e-06, "loss": 0.9411, "step": 11420 }, { "epoch": 0.66, "grad_norm": 1.7457726001739502, "learning_rate": 5.619368292617484e-06, "loss": 0.8925, "step": 11421 }, { "epoch": 0.66, "grad_norm": 1.8011186122894287, "learning_rate": 5.617698479096736e-06, "loss": 0.9744, "step": 11422 }, { "epoch": 0.66, "grad_norm": 1.837487816810608, "learning_rate": 5.616028816796968e-06, "loss": 0.9744, "step": 11423 }, { "epoch": 0.66, "grad_norm": 1.7239410877227783, "learning_rate": 5.614359305775803e-06, "loss": 0.9241, "step": 11424 }, { "epoch": 0.66, "grad_norm": 1.007815957069397, "learning_rate": 5.612689946090844e-06, "loss": 0.5286, "step": 11425 }, { "epoch": 0.66, "grad_norm": 1.1158007383346558, "learning_rate": 5.6110207377996985e-06, "loss": 0.5159, "step": 11426 }, { "epoch": 0.66, "grad_norm": 1.7071516513824463, "learning_rate": 5.609351680959971e-06, "loss": 0.9342, "step": 11427 }, { "epoch": 0.66, "grad_norm": 1.6868281364440918, "learning_rate": 5.607682775629249e-06, "loss": 0.9794, "step": 11428 }, { "epoch": 0.66, "grad_norm": 0.980263352394104, "learning_rate": 5.606014021865129e-06, "loss": 0.5045, "step": 11429 }, { "epoch": 0.66, "grad_norm": 1.6178532838821411, "learning_rate": 5.604345419725188e-06, "loss": 0.8148, "step": 11430 }, { "epoch": 0.66, "grad_norm": 1.5486879348754883, "learning_rate": 5.6026769692670106e-06, "loss": 0.8594, "step": 11431 }, { "epoch": 0.66, "grad_norm": 0.9661365747451782, "learning_rate": 5.601008670548162e-06, "loss": 0.4986, "step": 11432 }, { "epoch": 0.66, "grad_norm": 1.6334351301193237, "learning_rate": 5.599340523626222e-06, "loss": 0.9142, "step": 11433 }, { "epoch": 0.66, "grad_norm": 1.9416334629058838, "learning_rate": 5.5976725285587445e-06, "loss": 0.9804, "step": 11434 }, { "epoch": 0.66, "grad_norm": 1.8198814392089844, "learning_rate": 5.596004685403294e-06, "loss": 0.9481, "step": 11435 }, { "epoch": 0.66, "grad_norm": 1.9834403991699219, "learning_rate": 5.594336994217416e-06, "loss": 1.0434, "step": 11436 }, { "epoch": 0.66, "grad_norm": 1.6068751811981201, "learning_rate": 5.59266945505866e-06, "loss": 0.8837, "step": 11437 }, { "epoch": 0.66, "grad_norm": 1.6591851711273193, "learning_rate": 5.5910020679845745e-06, "loss": 0.9259, "step": 11438 }, { "epoch": 0.66, "grad_norm": 1.711958408355713, "learning_rate": 5.5893348330526885e-06, "loss": 0.8653, "step": 11439 }, { "epoch": 0.66, "grad_norm": 1.7710493803024292, "learning_rate": 5.58766775032054e-06, "loss": 0.9518, "step": 11440 }, { "epoch": 0.66, "grad_norm": 1.7233673334121704, "learning_rate": 5.586000819845647e-06, "loss": 0.9111, "step": 11441 }, { "epoch": 0.66, "grad_norm": 1.6358797550201416, "learning_rate": 5.584334041685542e-06, "loss": 0.8505, "step": 11442 }, { "epoch": 0.66, "grad_norm": 1.6612132787704468, "learning_rate": 5.582667415897729e-06, "loss": 0.92, "step": 11443 }, { "epoch": 0.66, "grad_norm": 1.7748699188232422, "learning_rate": 5.581000942539729e-06, "loss": 0.9163, "step": 11444 }, { "epoch": 0.66, "grad_norm": 1.8957267999649048, "learning_rate": 5.579334621669038e-06, "loss": 0.8912, "step": 11445 }, { "epoch": 0.66, "grad_norm": 1.6913210153579712, "learning_rate": 5.577668453343165e-06, "loss": 0.8982, "step": 11446 }, { "epoch": 0.66, "grad_norm": 1.7763898372650146, "learning_rate": 5.576002437619595e-06, "loss": 0.8938, "step": 11447 }, { "epoch": 0.66, "grad_norm": 1.6640903949737549, "learning_rate": 5.574336574555829e-06, "loss": 0.924, "step": 11448 }, { "epoch": 0.66, "grad_norm": 1.8456172943115234, "learning_rate": 5.572670864209339e-06, "loss": 0.9085, "step": 11449 }, { "epoch": 0.66, "grad_norm": 1.6747535467147827, "learning_rate": 5.571005306637611e-06, "loss": 0.9439, "step": 11450 }, { "epoch": 0.66, "grad_norm": 1.6667211055755615, "learning_rate": 5.569339901898123e-06, "loss": 0.9794, "step": 11451 }, { "epoch": 0.66, "grad_norm": 1.5908657312393188, "learning_rate": 5.567674650048334e-06, "loss": 0.9393, "step": 11452 }, { "epoch": 0.66, "grad_norm": 1.8179203271865845, "learning_rate": 5.566009551145716e-06, "loss": 0.9556, "step": 11453 }, { "epoch": 0.66, "grad_norm": 1.90180242061615, "learning_rate": 5.564344605247718e-06, "loss": 1.0459, "step": 11454 }, { "epoch": 0.66, "grad_norm": 1.785592794418335, "learning_rate": 5.5626798124118005e-06, "loss": 0.919, "step": 11455 }, { "epoch": 0.66, "grad_norm": 1.7312959432601929, "learning_rate": 5.561015172695406e-06, "loss": 0.9251, "step": 11456 }, { "epoch": 0.66, "grad_norm": 1.9106122255325317, "learning_rate": 5.559350686155979e-06, "loss": 0.9373, "step": 11457 }, { "epoch": 0.66, "grad_norm": 1.6820158958435059, "learning_rate": 5.5576863528509486e-06, "loss": 1.0181, "step": 11458 }, { "epoch": 0.66, "grad_norm": 1.640638828277588, "learning_rate": 5.556022172837761e-06, "loss": 0.8923, "step": 11459 }, { "epoch": 0.66, "grad_norm": 1.6908283233642578, "learning_rate": 5.554358146173831e-06, "loss": 0.8973, "step": 11460 }, { "epoch": 0.66, "grad_norm": 1.8675917387008667, "learning_rate": 5.552694272916586e-06, "loss": 0.9577, "step": 11461 }, { "epoch": 0.66, "grad_norm": 1.731400728225708, "learning_rate": 5.551030553123436e-06, "loss": 0.9217, "step": 11462 }, { "epoch": 0.66, "grad_norm": 1.7646234035491943, "learning_rate": 5.549366986851797e-06, "loss": 0.8923, "step": 11463 }, { "epoch": 0.66, "grad_norm": 1.7584969997406006, "learning_rate": 5.547703574159067e-06, "loss": 0.9994, "step": 11464 }, { "epoch": 0.66, "grad_norm": 1.6685765981674194, "learning_rate": 5.546040315102653e-06, "loss": 0.8648, "step": 11465 }, { "epoch": 0.66, "grad_norm": 1.7235735654830933, "learning_rate": 5.544377209739943e-06, "loss": 0.8839, "step": 11466 }, { "epoch": 0.66, "grad_norm": 1.8909136056900024, "learning_rate": 5.542714258128329e-06, "loss": 0.9504, "step": 11467 }, { "epoch": 0.66, "grad_norm": 1.8063448667526245, "learning_rate": 5.541051460325199e-06, "loss": 0.9321, "step": 11468 }, { "epoch": 0.66, "grad_norm": 1.7116748094558716, "learning_rate": 5.539388816387922e-06, "loss": 0.9564, "step": 11469 }, { "epoch": 0.66, "grad_norm": 1.92942214012146, "learning_rate": 5.537726326373883e-06, "loss": 0.9518, "step": 11470 }, { "epoch": 0.66, "grad_norm": 1.8322480916976929, "learning_rate": 5.536063990340439e-06, "loss": 0.975, "step": 11471 }, { "epoch": 0.66, "grad_norm": 1.627907395362854, "learning_rate": 5.5344018083449615e-06, "loss": 0.881, "step": 11472 }, { "epoch": 0.66, "grad_norm": 1.822126030921936, "learning_rate": 5.532739780444799e-06, "loss": 1.0355, "step": 11473 }, { "epoch": 0.66, "grad_norm": 1.627274751663208, "learning_rate": 5.531077906697312e-06, "loss": 0.9008, "step": 11474 }, { "epoch": 0.66, "grad_norm": 1.7201422452926636, "learning_rate": 5.52941618715984e-06, "loss": 0.9682, "step": 11475 }, { "epoch": 0.66, "grad_norm": 1.8258881568908691, "learning_rate": 5.52775462188973e-06, "loss": 0.9198, "step": 11476 }, { "epoch": 0.66, "grad_norm": 1.0668574571609497, "learning_rate": 5.52609321094431e-06, "loss": 0.5941, "step": 11477 }, { "epoch": 0.66, "grad_norm": 1.7159940004348755, "learning_rate": 5.524431954380922e-06, "loss": 0.9072, "step": 11478 }, { "epoch": 0.66, "grad_norm": 1.7637144327163696, "learning_rate": 5.52277085225688e-06, "loss": 0.9198, "step": 11479 }, { "epoch": 0.66, "grad_norm": 1.8645601272583008, "learning_rate": 5.521109904629511e-06, "loss": 0.9771, "step": 11480 }, { "epoch": 0.66, "grad_norm": 1.0816428661346436, "learning_rate": 5.51944911155613e-06, "loss": 0.625, "step": 11481 }, { "epoch": 0.66, "grad_norm": 1.7623848915100098, "learning_rate": 5.517788473094041e-06, "loss": 0.9269, "step": 11482 }, { "epoch": 0.66, "grad_norm": 0.9972187280654907, "learning_rate": 5.516127989300556e-06, "loss": 0.5633, "step": 11483 }, { "epoch": 0.66, "grad_norm": 1.7359601259231567, "learning_rate": 5.514467660232965e-06, "loss": 0.9553, "step": 11484 }, { "epoch": 0.66, "grad_norm": 1.8252882957458496, "learning_rate": 5.512807485948568e-06, "loss": 0.8859, "step": 11485 }, { "epoch": 0.66, "grad_norm": 1.6612387895584106, "learning_rate": 5.5111474665046475e-06, "loss": 0.9042, "step": 11486 }, { "epoch": 0.66, "grad_norm": 1.821286916732788, "learning_rate": 5.509487601958491e-06, "loss": 0.8859, "step": 11487 }, { "epoch": 0.66, "grad_norm": 1.7292828559875488, "learning_rate": 5.50782789236737e-06, "loss": 0.9862, "step": 11488 }, { "epoch": 0.66, "grad_norm": 1.5921818017959595, "learning_rate": 5.5061683377885645e-06, "loss": 0.9683, "step": 11489 }, { "epoch": 0.66, "grad_norm": 0.9562250375747681, "learning_rate": 5.504508938279334e-06, "loss": 0.5405, "step": 11490 }, { "epoch": 0.66, "grad_norm": 1.8294230699539185, "learning_rate": 5.502849693896941e-06, "loss": 0.8617, "step": 11491 }, { "epoch": 0.66, "grad_norm": 1.721171498298645, "learning_rate": 5.501190604698647e-06, "loss": 0.9466, "step": 11492 }, { "epoch": 0.66, "grad_norm": 1.7587980031967163, "learning_rate": 5.499531670741694e-06, "loss": 0.953, "step": 11493 }, { "epoch": 0.66, "grad_norm": 1.9093011617660522, "learning_rate": 5.497872892083336e-06, "loss": 0.9648, "step": 11494 }, { "epoch": 0.66, "grad_norm": 1.916135549545288, "learning_rate": 5.496214268780804e-06, "loss": 1.0285, "step": 11495 }, { "epoch": 0.66, "grad_norm": 1.6544861793518066, "learning_rate": 5.494555800891342e-06, "loss": 0.9849, "step": 11496 }, { "epoch": 0.66, "grad_norm": 1.1265995502471924, "learning_rate": 5.492897488472167e-06, "loss": 0.5242, "step": 11497 }, { "epoch": 0.66, "grad_norm": 1.8691014051437378, "learning_rate": 5.491239331580515e-06, "loss": 0.9291, "step": 11498 }, { "epoch": 0.66, "grad_norm": 1.7157424688339233, "learning_rate": 5.4895813302735965e-06, "loss": 0.8802, "step": 11499 }, { "epoch": 0.66, "grad_norm": 1.6429020166397095, "learning_rate": 5.487923484608629e-06, "loss": 0.8892, "step": 11500 }, { "epoch": 0.66, "grad_norm": 1.8457252979278564, "learning_rate": 5.4862657946428155e-06, "loss": 0.878, "step": 11501 }, { "epoch": 0.66, "grad_norm": 1.9185314178466797, "learning_rate": 5.484608260433364e-06, "loss": 0.9211, "step": 11502 }, { "epoch": 0.66, "grad_norm": 1.7743207216262817, "learning_rate": 5.4829508820374645e-06, "loss": 0.852, "step": 11503 }, { "epoch": 0.66, "grad_norm": 1.6909737586975098, "learning_rate": 5.481293659512312e-06, "loss": 0.9529, "step": 11504 }, { "epoch": 0.66, "grad_norm": 1.8193588256835938, "learning_rate": 5.479636592915096e-06, "loss": 0.9207, "step": 11505 }, { "epoch": 0.66, "grad_norm": 1.639646053314209, "learning_rate": 5.477979682302992e-06, "loss": 0.9547, "step": 11506 }, { "epoch": 0.66, "grad_norm": 1.7361444234848022, "learning_rate": 5.47632292773318e-06, "loss": 0.9694, "step": 11507 }, { "epoch": 0.66, "grad_norm": 1.9035722017288208, "learning_rate": 5.474666329262823e-06, "loss": 0.9077, "step": 11508 }, { "epoch": 0.66, "grad_norm": 1.6163007020950317, "learning_rate": 5.473009886949094e-06, "loss": 0.8152, "step": 11509 }, { "epoch": 0.66, "grad_norm": 1.6628053188323975, "learning_rate": 5.4713536008491455e-06, "loss": 0.9493, "step": 11510 }, { "epoch": 0.66, "grad_norm": 1.7400606870651245, "learning_rate": 5.4696974710201375e-06, "loss": 0.9133, "step": 11511 }, { "epoch": 0.66, "grad_norm": 1.8280506134033203, "learning_rate": 5.468041497519211e-06, "loss": 0.9639, "step": 11512 }, { "epoch": 0.66, "grad_norm": 1.655489206314087, "learning_rate": 5.466385680403517e-06, "loss": 0.9196, "step": 11513 }, { "epoch": 0.66, "grad_norm": 1.751820683479309, "learning_rate": 5.464730019730185e-06, "loss": 0.9209, "step": 11514 }, { "epoch": 0.66, "grad_norm": 1.812620997428894, "learning_rate": 5.46307451555635e-06, "loss": 0.9288, "step": 11515 }, { "epoch": 0.66, "grad_norm": 1.8815985918045044, "learning_rate": 5.461419167939145e-06, "loss": 0.9429, "step": 11516 }, { "epoch": 0.66, "grad_norm": 1.7389006614685059, "learning_rate": 5.459763976935681e-06, "loss": 0.8534, "step": 11517 }, { "epoch": 0.66, "grad_norm": 1.872350811958313, "learning_rate": 5.4581089426030865e-06, "loss": 0.946, "step": 11518 }, { "epoch": 0.66, "grad_norm": 1.04293954372406, "learning_rate": 5.45645406499846e-06, "loss": 0.5744, "step": 11519 }, { "epoch": 0.66, "grad_norm": 1.738649606704712, "learning_rate": 5.454799344178914e-06, "loss": 0.881, "step": 11520 }, { "epoch": 0.66, "grad_norm": 1.8397550582885742, "learning_rate": 5.4531447802015445e-06, "loss": 0.9773, "step": 11521 }, { "epoch": 0.66, "grad_norm": 1.888254165649414, "learning_rate": 5.45149037312345e-06, "loss": 1.0335, "step": 11522 }, { "epoch": 0.66, "grad_norm": 1.6881126165390015, "learning_rate": 5.449836123001714e-06, "loss": 0.9937, "step": 11523 }, { "epoch": 0.66, "grad_norm": 1.7345950603485107, "learning_rate": 5.448182029893423e-06, "loss": 0.8285, "step": 11524 }, { "epoch": 0.66, "grad_norm": 1.787186622619629, "learning_rate": 5.4465280938556545e-06, "loss": 0.9602, "step": 11525 }, { "epoch": 0.66, "grad_norm": 1.6280945539474487, "learning_rate": 5.444874314945485e-06, "loss": 0.9719, "step": 11526 }, { "epoch": 0.66, "grad_norm": 1.9787378311157227, "learning_rate": 5.443220693219976e-06, "loss": 0.9074, "step": 11527 }, { "epoch": 0.66, "grad_norm": 1.9109443426132202, "learning_rate": 5.441567228736195e-06, "loss": 0.9853, "step": 11528 }, { "epoch": 0.66, "grad_norm": 0.9916813969612122, "learning_rate": 5.439913921551193e-06, "loss": 0.569, "step": 11529 }, { "epoch": 0.66, "grad_norm": 1.7193677425384521, "learning_rate": 5.438260771722027e-06, "loss": 0.9586, "step": 11530 }, { "epoch": 0.66, "grad_norm": 1.85320246219635, "learning_rate": 5.436607779305735e-06, "loss": 0.9215, "step": 11531 }, { "epoch": 0.66, "grad_norm": 1.6162906885147095, "learning_rate": 5.434954944359365e-06, "loss": 0.9022, "step": 11532 }, { "epoch": 0.66, "grad_norm": 1.770362138748169, "learning_rate": 5.433302266939944e-06, "loss": 0.905, "step": 11533 }, { "epoch": 0.66, "grad_norm": 2.0092549324035645, "learning_rate": 5.431649747104505e-06, "loss": 0.926, "step": 11534 }, { "epoch": 0.66, "grad_norm": 1.7938748598098755, "learning_rate": 5.429997384910075e-06, "loss": 0.9024, "step": 11535 }, { "epoch": 0.66, "grad_norm": 1.872809886932373, "learning_rate": 5.428345180413667e-06, "loss": 0.9583, "step": 11536 }, { "epoch": 0.66, "grad_norm": 1.754981517791748, "learning_rate": 5.426693133672301e-06, "loss": 0.9708, "step": 11537 }, { "epoch": 0.66, "grad_norm": 1.6853177547454834, "learning_rate": 5.425041244742975e-06, "loss": 0.9505, "step": 11538 }, { "epoch": 0.66, "grad_norm": 1.9788057804107666, "learning_rate": 5.4233895136827e-06, "loss": 0.9462, "step": 11539 }, { "epoch": 0.66, "grad_norm": 1.873182773590088, "learning_rate": 5.421737940548464e-06, "loss": 0.9124, "step": 11540 }, { "epoch": 0.66, "grad_norm": 1.7483211755752563, "learning_rate": 5.420086525397268e-06, "loss": 0.9743, "step": 11541 }, { "epoch": 0.66, "grad_norm": 1.7042179107666016, "learning_rate": 5.418435268286089e-06, "loss": 1.0, "step": 11542 }, { "epoch": 0.66, "grad_norm": 2.109450340270996, "learning_rate": 5.416784169271913e-06, "loss": 0.9745, "step": 11543 }, { "epoch": 0.66, "grad_norm": 1.7788567543029785, "learning_rate": 5.415133228411709e-06, "loss": 0.8561, "step": 11544 }, { "epoch": 0.66, "grad_norm": 1.6479825973510742, "learning_rate": 5.4134824457624504e-06, "loss": 0.8752, "step": 11545 }, { "epoch": 0.66, "grad_norm": 1.5855357646942139, "learning_rate": 5.411831821381105e-06, "loss": 0.8289, "step": 11546 }, { "epoch": 0.66, "grad_norm": 1.67849600315094, "learning_rate": 5.410181355324622e-06, "loss": 0.9589, "step": 11547 }, { "epoch": 0.66, "grad_norm": 1.7494593858718872, "learning_rate": 5.408531047649964e-06, "loss": 0.9427, "step": 11548 }, { "epoch": 0.66, "grad_norm": 1.7740103006362915, "learning_rate": 5.406880898414069e-06, "loss": 0.9479, "step": 11549 }, { "epoch": 0.66, "grad_norm": 1.8178991079330444, "learning_rate": 5.405230907673889e-06, "loss": 1.0246, "step": 11550 }, { "epoch": 0.66, "grad_norm": 1.8297910690307617, "learning_rate": 5.403581075486351e-06, "loss": 0.8878, "step": 11551 }, { "epoch": 0.66, "grad_norm": 1.9474565982818604, "learning_rate": 5.401931401908394e-06, "loss": 0.9151, "step": 11552 }, { "epoch": 0.66, "grad_norm": 2.0433881282806396, "learning_rate": 5.400281886996938e-06, "loss": 0.9608, "step": 11553 }, { "epoch": 0.66, "grad_norm": 1.0580730438232422, "learning_rate": 5.3986325308089075e-06, "loss": 0.5894, "step": 11554 }, { "epoch": 0.66, "grad_norm": 1.5779671669006348, "learning_rate": 5.396983333401211e-06, "loss": 0.9373, "step": 11555 }, { "epoch": 0.66, "grad_norm": 1.670257329940796, "learning_rate": 5.395334294830766e-06, "loss": 0.8942, "step": 11556 }, { "epoch": 0.66, "grad_norm": 1.8482142686843872, "learning_rate": 5.393685415154468e-06, "loss": 0.9514, "step": 11557 }, { "epoch": 0.66, "grad_norm": 1.7733861207962036, "learning_rate": 5.392036694429219e-06, "loss": 0.942, "step": 11558 }, { "epoch": 0.66, "grad_norm": 1.0109962224960327, "learning_rate": 5.390388132711916e-06, "loss": 0.5292, "step": 11559 }, { "epoch": 0.66, "grad_norm": 1.7582389116287231, "learning_rate": 5.388739730059438e-06, "loss": 0.9298, "step": 11560 }, { "epoch": 0.66, "grad_norm": 1.804871678352356, "learning_rate": 5.387091486528675e-06, "loss": 0.9533, "step": 11561 }, { "epoch": 0.66, "grad_norm": 1.0298603773117065, "learning_rate": 5.385443402176494e-06, "loss": 0.5349, "step": 11562 }, { "epoch": 0.66, "grad_norm": 1.0929852724075317, "learning_rate": 5.383795477059776e-06, "loss": 0.6014, "step": 11563 }, { "epoch": 0.66, "grad_norm": 1.8419651985168457, "learning_rate": 5.382147711235377e-06, "loss": 0.8793, "step": 11564 }, { "epoch": 0.66, "grad_norm": 1.7546218633651733, "learning_rate": 5.380500104760165e-06, "loss": 0.9293, "step": 11565 }, { "epoch": 0.66, "grad_norm": 1.869836688041687, "learning_rate": 5.3788526576909874e-06, "loss": 0.8758, "step": 11566 }, { "epoch": 0.66, "grad_norm": 1.7971305847167969, "learning_rate": 5.3772053700847e-06, "loss": 0.9042, "step": 11567 }, { "epoch": 0.66, "grad_norm": 1.9503904581069946, "learning_rate": 5.3755582419981354e-06, "loss": 1.0142, "step": 11568 }, { "epoch": 0.66, "grad_norm": 1.9284682273864746, "learning_rate": 5.373911273488139e-06, "loss": 0.9625, "step": 11569 }, { "epoch": 0.66, "grad_norm": 1.793882131576538, "learning_rate": 5.372264464611548e-06, "loss": 0.924, "step": 11570 }, { "epoch": 0.66, "grad_norm": 1.7408554553985596, "learning_rate": 5.370617815425177e-06, "loss": 0.9551, "step": 11571 }, { "epoch": 0.66, "grad_norm": 1.0461440086364746, "learning_rate": 5.368971325985859e-06, "loss": 0.5158, "step": 11572 }, { "epoch": 0.66, "grad_norm": 1.737757682800293, "learning_rate": 5.3673249963504005e-06, "loss": 0.9009, "step": 11573 }, { "epoch": 0.66, "grad_norm": 1.6210299730300903, "learning_rate": 5.3656788265756175e-06, "loss": 0.9465, "step": 11574 }, { "epoch": 0.66, "grad_norm": 1.8598971366882324, "learning_rate": 5.364032816718311e-06, "loss": 0.863, "step": 11575 }, { "epoch": 0.66, "grad_norm": 1.8453575372695923, "learning_rate": 5.362386966835285e-06, "loss": 0.92, "step": 11576 }, { "epoch": 0.66, "grad_norm": 0.9715495705604553, "learning_rate": 5.360741276983325e-06, "loss": 0.5064, "step": 11577 }, { "epoch": 0.66, "grad_norm": 1.72489595413208, "learning_rate": 5.359095747219231e-06, "loss": 0.903, "step": 11578 }, { "epoch": 0.66, "grad_norm": 1.7449196577072144, "learning_rate": 5.357450377599773e-06, "loss": 0.9767, "step": 11579 }, { "epoch": 0.66, "grad_norm": 1.7011772394180298, "learning_rate": 5.355805168181738e-06, "loss": 1.0102, "step": 11580 }, { "epoch": 0.66, "grad_norm": 1.8990588188171387, "learning_rate": 5.354160119021891e-06, "loss": 0.9222, "step": 11581 }, { "epoch": 0.66, "grad_norm": 1.6559391021728516, "learning_rate": 5.352515230177e-06, "loss": 0.9839, "step": 11582 }, { "epoch": 0.66, "grad_norm": 1.7523138523101807, "learning_rate": 5.350870501703829e-06, "loss": 0.9601, "step": 11583 }, { "epoch": 0.66, "grad_norm": 1.7118678092956543, "learning_rate": 5.3492259336591275e-06, "loss": 0.8945, "step": 11584 }, { "epoch": 0.66, "grad_norm": 1.8525192737579346, "learning_rate": 5.347581526099651e-06, "loss": 0.9597, "step": 11585 }, { "epoch": 0.66, "grad_norm": 1.8023669719696045, "learning_rate": 5.345937279082136e-06, "loss": 0.897, "step": 11586 }, { "epoch": 0.66, "grad_norm": 2.7100729942321777, "learning_rate": 5.344293192663329e-06, "loss": 0.8538, "step": 11587 }, { "epoch": 0.66, "grad_norm": 1.794411063194275, "learning_rate": 5.342649266899955e-06, "loss": 1.0126, "step": 11588 }, { "epoch": 0.66, "grad_norm": 1.6426360607147217, "learning_rate": 5.341005501848749e-06, "loss": 0.9104, "step": 11589 }, { "epoch": 0.66, "grad_norm": 1.933236837387085, "learning_rate": 5.33936189756642e-06, "loss": 0.9685, "step": 11590 }, { "epoch": 0.66, "grad_norm": 1.662968635559082, "learning_rate": 5.337718454109702e-06, "loss": 0.9731, "step": 11591 }, { "epoch": 0.66, "grad_norm": 1.6944464445114136, "learning_rate": 5.336075171535292e-06, "loss": 0.8069, "step": 11592 }, { "epoch": 0.66, "grad_norm": 1.620992660522461, "learning_rate": 5.334432049899904e-06, "loss": 0.9808, "step": 11593 }, { "epoch": 0.66, "grad_norm": 1.782029151916504, "learning_rate": 5.3327890892602286e-06, "loss": 0.8903, "step": 11594 }, { "epoch": 0.66, "grad_norm": 1.8381882905960083, "learning_rate": 5.331146289672968e-06, "loss": 0.95, "step": 11595 }, { "epoch": 0.67, "grad_norm": 1.635562777519226, "learning_rate": 5.329503651194805e-06, "loss": 0.8921, "step": 11596 }, { "epoch": 0.67, "grad_norm": 1.8567148447036743, "learning_rate": 5.327861173882427e-06, "loss": 0.8928, "step": 11597 }, { "epoch": 0.67, "grad_norm": 1.6727715730667114, "learning_rate": 5.326218857792505e-06, "loss": 0.9496, "step": 11598 }, { "epoch": 0.67, "grad_norm": 1.6957740783691406, "learning_rate": 5.324576702981716e-06, "loss": 0.9577, "step": 11599 }, { "epoch": 0.67, "grad_norm": 1.5796741247177124, "learning_rate": 5.322934709506726e-06, "loss": 0.9309, "step": 11600 }, { "epoch": 0.67, "grad_norm": 1.8734960556030273, "learning_rate": 5.321292877424192e-06, "loss": 0.9592, "step": 11601 }, { "epoch": 0.67, "grad_norm": 1.8251913785934448, "learning_rate": 5.319651206790775e-06, "loss": 0.8968, "step": 11602 }, { "epoch": 0.67, "grad_norm": 1.6572778224945068, "learning_rate": 5.318009697663118e-06, "loss": 0.9261, "step": 11603 }, { "epoch": 0.67, "grad_norm": 2.157059669494629, "learning_rate": 5.316368350097869e-06, "loss": 0.9059, "step": 11604 }, { "epoch": 0.67, "grad_norm": 1.8795872926712036, "learning_rate": 5.314727164151663e-06, "loss": 0.9272, "step": 11605 }, { "epoch": 0.67, "grad_norm": 1.8853521347045898, "learning_rate": 5.3130861398811385e-06, "loss": 0.9651, "step": 11606 }, { "epoch": 0.67, "grad_norm": 1.7526181936264038, "learning_rate": 5.311445277342915e-06, "loss": 0.9949, "step": 11607 }, { "epoch": 0.67, "grad_norm": 1.7402299642562866, "learning_rate": 5.309804576593623e-06, "loss": 1.0122, "step": 11608 }, { "epoch": 0.67, "grad_norm": 1.888508915901184, "learning_rate": 5.308164037689867e-06, "loss": 0.9748, "step": 11609 }, { "epoch": 0.67, "grad_norm": 1.8047722578048706, "learning_rate": 5.30652366068827e-06, "loss": 0.9391, "step": 11610 }, { "epoch": 0.67, "grad_norm": 1.8021529912948608, "learning_rate": 5.304883445645425e-06, "loss": 0.9236, "step": 11611 }, { "epoch": 0.67, "grad_norm": 1.682714581489563, "learning_rate": 5.3032433926179395e-06, "loss": 0.8347, "step": 11612 }, { "epoch": 0.67, "grad_norm": 2.193568706512451, "learning_rate": 5.301603501662407e-06, "loss": 1.0248, "step": 11613 }, { "epoch": 0.67, "grad_norm": 1.9750372171401978, "learning_rate": 5.29996377283541e-06, "loss": 0.9867, "step": 11614 }, { "epoch": 0.67, "grad_norm": 1.8413037061691284, "learning_rate": 5.2983242061935365e-06, "loss": 0.9245, "step": 11615 }, { "epoch": 0.67, "grad_norm": 1.947635531425476, "learning_rate": 5.296684801793359e-06, "loss": 0.9093, "step": 11616 }, { "epoch": 0.67, "grad_norm": 1.6264203786849976, "learning_rate": 5.295045559691454e-06, "loss": 0.9554, "step": 11617 }, { "epoch": 0.67, "grad_norm": 1.0501279830932617, "learning_rate": 5.293406479944381e-06, "loss": 0.5505, "step": 11618 }, { "epoch": 0.67, "grad_norm": 1.7028065919876099, "learning_rate": 5.291767562608705e-06, "loss": 1.0395, "step": 11619 }, { "epoch": 0.67, "grad_norm": 1.570185661315918, "learning_rate": 5.290128807740976e-06, "loss": 0.9477, "step": 11620 }, { "epoch": 0.67, "grad_norm": 1.7570148706436157, "learning_rate": 5.288490215397749e-06, "loss": 0.8676, "step": 11621 }, { "epoch": 0.67, "grad_norm": 1.7937171459197998, "learning_rate": 5.286851785635559e-06, "loss": 0.8738, "step": 11622 }, { "epoch": 0.67, "grad_norm": 1.8685243129730225, "learning_rate": 5.28521351851095e-06, "loss": 0.9151, "step": 11623 }, { "epoch": 0.67, "grad_norm": 1.8277575969696045, "learning_rate": 5.283575414080455e-06, "loss": 0.9532, "step": 11624 }, { "epoch": 0.67, "grad_norm": 1.6919366121292114, "learning_rate": 5.281937472400594e-06, "loss": 0.9393, "step": 11625 }, { "epoch": 0.67, "grad_norm": 1.760114073753357, "learning_rate": 5.280299693527895e-06, "loss": 0.9267, "step": 11626 }, { "epoch": 0.67, "grad_norm": 1.6749792098999023, "learning_rate": 5.278662077518866e-06, "loss": 0.9551, "step": 11627 }, { "epoch": 0.67, "grad_norm": 1.653491497039795, "learning_rate": 5.2770246244300225e-06, "loss": 0.9102, "step": 11628 }, { "epoch": 0.67, "grad_norm": 0.9866293668746948, "learning_rate": 5.275387334317864e-06, "loss": 0.5441, "step": 11629 }, { "epoch": 0.67, "grad_norm": 1.599632740020752, "learning_rate": 5.273750207238894e-06, "loss": 0.9997, "step": 11630 }, { "epoch": 0.67, "grad_norm": 1.7295571565628052, "learning_rate": 5.272113243249599e-06, "loss": 0.9848, "step": 11631 }, { "epoch": 0.67, "grad_norm": 1.7558050155639648, "learning_rate": 5.270476442406472e-06, "loss": 0.8713, "step": 11632 }, { "epoch": 0.67, "grad_norm": 1.0101670026779175, "learning_rate": 5.268839804765988e-06, "loss": 0.5259, "step": 11633 }, { "epoch": 0.67, "grad_norm": 1.7342720031738281, "learning_rate": 5.267203330384632e-06, "loss": 0.939, "step": 11634 }, { "epoch": 0.67, "grad_norm": 1.9709718227386475, "learning_rate": 5.265567019318862e-06, "loss": 0.9353, "step": 11635 }, { "epoch": 0.67, "grad_norm": 1.9546594619750977, "learning_rate": 5.263930871625151e-06, "loss": 0.9891, "step": 11636 }, { "epoch": 0.67, "grad_norm": 1.8432539701461792, "learning_rate": 5.2622948873599595e-06, "loss": 0.9238, "step": 11637 }, { "epoch": 0.67, "grad_norm": 1.7171056270599365, "learning_rate": 5.260659066579733e-06, "loss": 0.9101, "step": 11638 }, { "epoch": 0.67, "grad_norm": 1.618330955505371, "learning_rate": 5.259023409340926e-06, "loss": 0.9904, "step": 11639 }, { "epoch": 0.67, "grad_norm": 1.6643435955047607, "learning_rate": 5.257387915699976e-06, "loss": 0.8719, "step": 11640 }, { "epoch": 0.67, "grad_norm": 1.6756975650787354, "learning_rate": 5.255752585713324e-06, "loss": 0.7993, "step": 11641 }, { "epoch": 0.67, "grad_norm": 1.7428895235061646, "learning_rate": 5.254117419437394e-06, "loss": 0.9586, "step": 11642 }, { "epoch": 0.67, "grad_norm": 1.834367275238037, "learning_rate": 5.252482416928619e-06, "loss": 0.9845, "step": 11643 }, { "epoch": 0.67, "grad_norm": 1.76563560962677, "learning_rate": 5.2508475782434095e-06, "loss": 0.9248, "step": 11644 }, { "epoch": 0.67, "grad_norm": 1.7358566522598267, "learning_rate": 5.2492129034381875e-06, "loss": 1.0042, "step": 11645 }, { "epoch": 0.67, "grad_norm": 1.7760509252548218, "learning_rate": 5.247578392569354e-06, "loss": 1.0442, "step": 11646 }, { "epoch": 0.67, "grad_norm": 1.6131383180618286, "learning_rate": 5.2459440456933156e-06, "loss": 0.888, "step": 11647 }, { "epoch": 0.67, "grad_norm": 1.7445423603057861, "learning_rate": 5.24430986286647e-06, "loss": 0.8852, "step": 11648 }, { "epoch": 0.67, "grad_norm": 1.8820563554763794, "learning_rate": 5.242675844145204e-06, "loss": 0.9549, "step": 11649 }, { "epoch": 0.67, "grad_norm": 1.5935759544372559, "learning_rate": 5.241041989585911e-06, "loss": 0.9394, "step": 11650 }, { "epoch": 0.67, "grad_norm": 1.603927731513977, "learning_rate": 5.239408299244959e-06, "loss": 0.8361, "step": 11651 }, { "epoch": 0.67, "grad_norm": 1.622622013092041, "learning_rate": 5.237774773178734e-06, "loss": 0.8539, "step": 11652 }, { "epoch": 0.67, "grad_norm": 1.5777705907821655, "learning_rate": 5.236141411443594e-06, "loss": 0.8871, "step": 11653 }, { "epoch": 0.67, "grad_norm": 1.781461477279663, "learning_rate": 5.23450821409591e-06, "loss": 1.0093, "step": 11654 }, { "epoch": 0.67, "grad_norm": 1.6174107789993286, "learning_rate": 5.232875181192033e-06, "loss": 0.9867, "step": 11655 }, { "epoch": 0.67, "grad_norm": 1.0862786769866943, "learning_rate": 5.231242312788316e-06, "loss": 0.55, "step": 11656 }, { "epoch": 0.67, "grad_norm": 1.7979928255081177, "learning_rate": 5.229609608941106e-06, "loss": 0.8687, "step": 11657 }, { "epoch": 0.67, "grad_norm": 1.9316529035568237, "learning_rate": 5.227977069706748e-06, "loss": 0.9461, "step": 11658 }, { "epoch": 0.67, "grad_norm": 1.7856119871139526, "learning_rate": 5.226344695141567e-06, "loss": 0.9508, "step": 11659 }, { "epoch": 0.67, "grad_norm": 1.6399339437484741, "learning_rate": 5.224712485301898e-06, "loss": 0.9082, "step": 11660 }, { "epoch": 0.67, "grad_norm": 1.898725152015686, "learning_rate": 5.223080440244059e-06, "loss": 0.9459, "step": 11661 }, { "epoch": 0.67, "grad_norm": 1.7254668474197388, "learning_rate": 5.2214485600243756e-06, "loss": 0.9364, "step": 11662 }, { "epoch": 0.67, "grad_norm": 1.9469966888427734, "learning_rate": 5.219816844699148e-06, "loss": 1.025, "step": 11663 }, { "epoch": 0.67, "grad_norm": 1.5856157541275024, "learning_rate": 5.218185294324694e-06, "loss": 0.8914, "step": 11664 }, { "epoch": 0.67, "grad_norm": 1.9075679779052734, "learning_rate": 5.2165539089573025e-06, "loss": 1.0112, "step": 11665 }, { "epoch": 0.67, "grad_norm": 1.7695640325546265, "learning_rate": 5.214922688653274e-06, "loss": 0.8524, "step": 11666 }, { "epoch": 0.67, "grad_norm": 2.0467355251312256, "learning_rate": 5.213291633468901e-06, "loss": 1.0058, "step": 11667 }, { "epoch": 0.67, "grad_norm": 2.226175546646118, "learning_rate": 5.211660743460458e-06, "loss": 1.026, "step": 11668 }, { "epoch": 0.67, "grad_norm": 1.812477946281433, "learning_rate": 5.210030018684233e-06, "loss": 0.9607, "step": 11669 }, { "epoch": 0.67, "grad_norm": 2.1455178260803223, "learning_rate": 5.208399459196486e-06, "loss": 0.8835, "step": 11670 }, { "epoch": 0.67, "grad_norm": 1.9750603437423706, "learning_rate": 5.206769065053494e-06, "loss": 0.9793, "step": 11671 }, { "epoch": 0.67, "grad_norm": 1.6402770280838013, "learning_rate": 5.205138836311508e-06, "loss": 0.8243, "step": 11672 }, { "epoch": 0.67, "grad_norm": 1.8187425136566162, "learning_rate": 5.20350877302679e-06, "loss": 0.9573, "step": 11673 }, { "epoch": 0.67, "grad_norm": 1.6716643571853638, "learning_rate": 5.201878875255582e-06, "loss": 0.9245, "step": 11674 }, { "epoch": 0.67, "grad_norm": 1.706623911857605, "learning_rate": 5.2002491430541346e-06, "loss": 0.9697, "step": 11675 }, { "epoch": 0.67, "grad_norm": 1.6241567134857178, "learning_rate": 5.198619576478678e-06, "loss": 0.9486, "step": 11676 }, { "epoch": 0.67, "grad_norm": 1.8172380924224854, "learning_rate": 5.196990175585449e-06, "loss": 0.9072, "step": 11677 }, { "epoch": 0.67, "grad_norm": 1.908457636833191, "learning_rate": 5.195360940430676e-06, "loss": 0.8448, "step": 11678 }, { "epoch": 0.67, "grad_norm": 1.6959284543991089, "learning_rate": 5.1937318710705706e-06, "loss": 0.949, "step": 11679 }, { "epoch": 0.67, "grad_norm": 1.7938159704208374, "learning_rate": 5.192102967561357e-06, "loss": 0.8996, "step": 11680 }, { "epoch": 0.67, "grad_norm": 1.590562105178833, "learning_rate": 5.190474229959236e-06, "loss": 0.9546, "step": 11681 }, { "epoch": 0.67, "grad_norm": 1.7886768579483032, "learning_rate": 5.188845658320419e-06, "loss": 0.9238, "step": 11682 }, { "epoch": 0.67, "grad_norm": 1.8281091451644897, "learning_rate": 5.187217252701093e-06, "loss": 0.8072, "step": 11683 }, { "epoch": 0.67, "grad_norm": 1.8246984481811523, "learning_rate": 5.1855890131574615e-06, "loss": 0.9469, "step": 11684 }, { "epoch": 0.67, "grad_norm": 1.7635284662246704, "learning_rate": 5.1839609397457e-06, "loss": 0.9573, "step": 11685 }, { "epoch": 0.67, "grad_norm": 1.762611746788025, "learning_rate": 5.182333032521997e-06, "loss": 0.9996, "step": 11686 }, { "epoch": 0.67, "grad_norm": 1.7961598634719849, "learning_rate": 5.18070529154252e-06, "loss": 0.955, "step": 11687 }, { "epoch": 0.67, "grad_norm": 1.871989130973816, "learning_rate": 5.179077716863445e-06, "loss": 0.9361, "step": 11688 }, { "epoch": 0.67, "grad_norm": 1.0644457340240479, "learning_rate": 5.177450308540928e-06, "loss": 0.5879, "step": 11689 }, { "epoch": 0.67, "grad_norm": 1.8947426080703735, "learning_rate": 5.1758230666311286e-06, "loss": 0.9638, "step": 11690 }, { "epoch": 0.67, "grad_norm": 1.7619361877441406, "learning_rate": 5.174195991190203e-06, "loss": 0.884, "step": 11691 }, { "epoch": 0.67, "grad_norm": 1.802830457687378, "learning_rate": 5.17256908227429e-06, "loss": 0.9009, "step": 11692 }, { "epoch": 0.67, "grad_norm": 1.696345329284668, "learning_rate": 5.170942339939538e-06, "loss": 0.9669, "step": 11693 }, { "epoch": 0.67, "grad_norm": 1.7990154027938843, "learning_rate": 5.169315764242071e-06, "loss": 1.0317, "step": 11694 }, { "epoch": 0.67, "grad_norm": 1.9006434679031372, "learning_rate": 5.167689355238028e-06, "loss": 0.912, "step": 11695 }, { "epoch": 0.67, "grad_norm": 1.524186134338379, "learning_rate": 5.166063112983522e-06, "loss": 0.9333, "step": 11696 }, { "epoch": 0.67, "grad_norm": 1.8372793197631836, "learning_rate": 5.16443703753468e-06, "loss": 0.8728, "step": 11697 }, { "epoch": 0.67, "grad_norm": 1.6507500410079956, "learning_rate": 5.1628111289476025e-06, "loss": 0.9579, "step": 11698 }, { "epoch": 0.67, "grad_norm": 1.7946605682373047, "learning_rate": 5.1611853872784065e-06, "loss": 0.9745, "step": 11699 }, { "epoch": 0.67, "grad_norm": 1.781618356704712, "learning_rate": 5.159559812583181e-06, "loss": 0.9549, "step": 11700 }, { "epoch": 0.67, "grad_norm": 1.867897391319275, "learning_rate": 5.157934404918025e-06, "loss": 0.8843, "step": 11701 }, { "epoch": 0.67, "grad_norm": 1.9105182886123657, "learning_rate": 5.1563091643390324e-06, "loss": 0.9926, "step": 11702 }, { "epoch": 0.67, "grad_norm": 1.707181692123413, "learning_rate": 5.154684090902275e-06, "loss": 0.8772, "step": 11703 }, { "epoch": 0.67, "grad_norm": 1.5971965789794922, "learning_rate": 5.15305918466384e-06, "loss": 0.8584, "step": 11704 }, { "epoch": 0.67, "grad_norm": 1.7153371572494507, "learning_rate": 5.151434445679788e-06, "loss": 1.0003, "step": 11705 }, { "epoch": 0.67, "grad_norm": 1.7008590698242188, "learning_rate": 5.149809874006194e-06, "loss": 0.9284, "step": 11706 }, { "epoch": 0.67, "grad_norm": 1.5508931875228882, "learning_rate": 5.148185469699109e-06, "loss": 0.9212, "step": 11707 }, { "epoch": 0.67, "grad_norm": 1.6856895685195923, "learning_rate": 5.146561232814593e-06, "loss": 0.9562, "step": 11708 }, { "epoch": 0.67, "grad_norm": 1.6909445524215698, "learning_rate": 5.144937163408689e-06, "loss": 0.9435, "step": 11709 }, { "epoch": 0.67, "grad_norm": 1.8079134225845337, "learning_rate": 5.143313261537443e-06, "loss": 0.9669, "step": 11710 }, { "epoch": 0.67, "grad_norm": 1.7138488292694092, "learning_rate": 5.141689527256889e-06, "loss": 0.9888, "step": 11711 }, { "epoch": 0.67, "grad_norm": 1.628553867340088, "learning_rate": 5.140065960623061e-06, "loss": 0.8738, "step": 11712 }, { "epoch": 0.67, "grad_norm": 1.8324013948440552, "learning_rate": 5.138442561691976e-06, "loss": 0.9254, "step": 11713 }, { "epoch": 0.67, "grad_norm": 1.7348748445510864, "learning_rate": 5.136819330519659e-06, "loss": 0.8754, "step": 11714 }, { "epoch": 0.67, "grad_norm": 1.7780094146728516, "learning_rate": 5.135196267162126e-06, "loss": 0.9213, "step": 11715 }, { "epoch": 0.67, "grad_norm": 1.78496515750885, "learning_rate": 5.133573371675375e-06, "loss": 0.8827, "step": 11716 }, { "epoch": 0.67, "grad_norm": 1.6222641468048096, "learning_rate": 5.1319506441154195e-06, "loss": 1.0103, "step": 11717 }, { "epoch": 0.67, "grad_norm": 1.8690211772918701, "learning_rate": 5.130328084538244e-06, "loss": 0.9449, "step": 11718 }, { "epoch": 0.67, "grad_norm": 1.813813328742981, "learning_rate": 5.128705692999847e-06, "loss": 0.9515, "step": 11719 }, { "epoch": 0.67, "grad_norm": 1.6950210332870483, "learning_rate": 5.127083469556206e-06, "loss": 0.8646, "step": 11720 }, { "epoch": 0.67, "grad_norm": 1.6613376140594482, "learning_rate": 5.1254614142633064e-06, "loss": 0.9313, "step": 11721 }, { "epoch": 0.67, "grad_norm": 1.7989271879196167, "learning_rate": 5.123839527177108e-06, "loss": 0.8764, "step": 11722 }, { "epoch": 0.67, "grad_norm": 1.6891721487045288, "learning_rate": 5.122217808353596e-06, "loss": 0.9692, "step": 11723 }, { "epoch": 0.67, "grad_norm": 1.5907065868377686, "learning_rate": 5.120596257848716e-06, "loss": 0.9337, "step": 11724 }, { "epoch": 0.67, "grad_norm": 1.8290561437606812, "learning_rate": 5.118974875718434e-06, "loss": 0.9892, "step": 11725 }, { "epoch": 0.67, "grad_norm": 1.8799984455108643, "learning_rate": 5.117353662018692e-06, "loss": 0.9835, "step": 11726 }, { "epoch": 0.67, "grad_norm": 1.8811370134353638, "learning_rate": 5.1157326168054374e-06, "loss": 0.9903, "step": 11727 }, { "epoch": 0.67, "grad_norm": 1.7325929403305054, "learning_rate": 5.114111740134604e-06, "loss": 0.8456, "step": 11728 }, { "epoch": 0.67, "grad_norm": 1.7307196855545044, "learning_rate": 5.112491032062129e-06, "loss": 0.949, "step": 11729 }, { "epoch": 0.67, "grad_norm": 1.9088267087936401, "learning_rate": 5.110870492643934e-06, "loss": 1.0396, "step": 11730 }, { "epoch": 0.67, "grad_norm": 2.07021427154541, "learning_rate": 5.109250121935938e-06, "loss": 1.0772, "step": 11731 }, { "epoch": 0.67, "grad_norm": 1.7922099828720093, "learning_rate": 5.107629919994065e-06, "loss": 0.9785, "step": 11732 }, { "epoch": 0.67, "grad_norm": 1.8231827020645142, "learning_rate": 5.106009886874212e-06, "loss": 0.9594, "step": 11733 }, { "epoch": 0.67, "grad_norm": 1.6988617181777954, "learning_rate": 5.104390022632292e-06, "loss": 0.8619, "step": 11734 }, { "epoch": 0.67, "grad_norm": 1.7075978517532349, "learning_rate": 5.102770327324193e-06, "loss": 0.7801, "step": 11735 }, { "epoch": 0.67, "grad_norm": 1.885631799697876, "learning_rate": 5.101150801005813e-06, "loss": 0.9459, "step": 11736 }, { "epoch": 0.67, "grad_norm": 1.802761435508728, "learning_rate": 5.0995314437330315e-06, "loss": 0.901, "step": 11737 }, { "epoch": 0.67, "grad_norm": 1.91435968875885, "learning_rate": 5.0979122555617345e-06, "loss": 0.9863, "step": 11738 }, { "epoch": 0.67, "grad_norm": 1.0569753646850586, "learning_rate": 5.096293236547787e-06, "loss": 0.5421, "step": 11739 }, { "epoch": 0.67, "grad_norm": 1.7725480794906616, "learning_rate": 5.0946743867470675e-06, "loss": 0.9424, "step": 11740 }, { "epoch": 0.67, "grad_norm": 1.9501739740371704, "learning_rate": 5.093055706215428e-06, "loss": 0.9644, "step": 11741 }, { "epoch": 0.67, "grad_norm": 1.853328824043274, "learning_rate": 5.0914371950087325e-06, "loss": 0.8084, "step": 11742 }, { "epoch": 0.67, "grad_norm": 1.8748635053634644, "learning_rate": 5.089818853182825e-06, "loss": 0.9379, "step": 11743 }, { "epoch": 0.67, "grad_norm": 1.6371344327926636, "learning_rate": 5.088200680793553e-06, "loss": 0.9422, "step": 11744 }, { "epoch": 0.67, "grad_norm": 1.9204833507537842, "learning_rate": 5.086582677896758e-06, "loss": 0.9899, "step": 11745 }, { "epoch": 0.67, "grad_norm": 1.8673197031021118, "learning_rate": 5.084964844548266e-06, "loss": 1.0478, "step": 11746 }, { "epoch": 0.67, "grad_norm": 1.8951619863510132, "learning_rate": 5.083347180803911e-06, "loss": 0.8845, "step": 11747 }, { "epoch": 0.67, "grad_norm": 1.8026243448257446, "learning_rate": 5.081729686719507e-06, "loss": 0.8968, "step": 11748 }, { "epoch": 0.67, "grad_norm": 1.7013325691223145, "learning_rate": 5.080112362350877e-06, "loss": 0.896, "step": 11749 }, { "epoch": 0.67, "grad_norm": 1.6203408241271973, "learning_rate": 5.078495207753824e-06, "loss": 0.9532, "step": 11750 }, { "epoch": 0.67, "grad_norm": 1.763732671737671, "learning_rate": 5.076878222984156e-06, "loss": 0.9439, "step": 11751 }, { "epoch": 0.67, "grad_norm": 1.9145323038101196, "learning_rate": 5.075261408097665e-06, "loss": 0.9206, "step": 11752 }, { "epoch": 0.67, "grad_norm": 1.8830153942108154, "learning_rate": 5.073644763150148e-06, "loss": 0.8938, "step": 11753 }, { "epoch": 0.67, "grad_norm": 1.9114141464233398, "learning_rate": 5.072028288197387e-06, "loss": 0.9421, "step": 11754 }, { "epoch": 0.67, "grad_norm": 1.8215630054473877, "learning_rate": 5.070411983295164e-06, "loss": 0.8783, "step": 11755 }, { "epoch": 0.67, "grad_norm": 1.6587389707565308, "learning_rate": 5.068795848499257e-06, "loss": 0.8897, "step": 11756 }, { "epoch": 0.67, "grad_norm": 1.73112154006958, "learning_rate": 5.067179883865425e-06, "loss": 0.8657, "step": 11757 }, { "epoch": 0.67, "grad_norm": 1.9408777952194214, "learning_rate": 5.0655640894494415e-06, "loss": 0.9885, "step": 11758 }, { "epoch": 0.67, "grad_norm": 1.7125475406646729, "learning_rate": 5.0639484653070535e-06, "loss": 0.8915, "step": 11759 }, { "epoch": 0.67, "grad_norm": 1.7384669780731201, "learning_rate": 5.0623330114940195e-06, "loss": 0.9165, "step": 11760 }, { "epoch": 0.67, "grad_norm": 1.8590176105499268, "learning_rate": 5.060717728066076e-06, "loss": 0.9095, "step": 11761 }, { "epoch": 0.67, "grad_norm": 1.7750022411346436, "learning_rate": 5.059102615078972e-06, "loss": 0.9658, "step": 11762 }, { "epoch": 0.67, "grad_norm": 1.625806212425232, "learning_rate": 5.057487672588428e-06, "loss": 0.989, "step": 11763 }, { "epoch": 0.67, "grad_norm": 1.6001558303833008, "learning_rate": 5.055872900650185e-06, "loss": 0.9151, "step": 11764 }, { "epoch": 0.67, "grad_norm": 1.8214077949523926, "learning_rate": 5.054258299319952e-06, "loss": 0.8698, "step": 11765 }, { "epoch": 0.67, "grad_norm": 1.6866214275360107, "learning_rate": 5.052643868653453e-06, "loss": 0.8927, "step": 11766 }, { "epoch": 0.67, "grad_norm": 1.838830828666687, "learning_rate": 5.05102960870639e-06, "loss": 0.9075, "step": 11767 }, { "epoch": 0.67, "grad_norm": 1.7893980741500854, "learning_rate": 5.049415519534473e-06, "loss": 1.0027, "step": 11768 }, { "epoch": 0.67, "grad_norm": 1.7837446928024292, "learning_rate": 5.047801601193401e-06, "loss": 0.9065, "step": 11769 }, { "epoch": 0.68, "grad_norm": 1.7983431816101074, "learning_rate": 5.0461878537388575e-06, "loss": 0.8972, "step": 11770 }, { "epoch": 0.68, "grad_norm": 1.0220587253570557, "learning_rate": 5.044574277226537e-06, "loss": 0.5722, "step": 11771 }, { "epoch": 0.68, "grad_norm": 1.0957280397415161, "learning_rate": 5.042960871712112e-06, "loss": 0.5698, "step": 11772 }, { "epoch": 0.68, "grad_norm": 1.776816487312317, "learning_rate": 5.041347637251264e-06, "loss": 0.9815, "step": 11773 }, { "epoch": 0.68, "grad_norm": 1.5999451875686646, "learning_rate": 5.039734573899655e-06, "loss": 0.9808, "step": 11774 }, { "epoch": 0.68, "grad_norm": 1.758184790611267, "learning_rate": 5.038121681712953e-06, "loss": 0.9628, "step": 11775 }, { "epoch": 0.68, "grad_norm": 1.7792437076568604, "learning_rate": 5.036508960746806e-06, "loss": 0.8652, "step": 11776 }, { "epoch": 0.68, "grad_norm": 1.678625464439392, "learning_rate": 5.034896411056875e-06, "loss": 0.8422, "step": 11777 }, { "epoch": 0.68, "grad_norm": 1.768420934677124, "learning_rate": 5.033284032698797e-06, "loss": 0.8544, "step": 11778 }, { "epoch": 0.68, "grad_norm": 1.6039161682128906, "learning_rate": 5.031671825728211e-06, "loss": 0.8034, "step": 11779 }, { "epoch": 0.68, "grad_norm": 2.097721576690674, "learning_rate": 5.0300597902007565e-06, "loss": 0.9364, "step": 11780 }, { "epoch": 0.68, "grad_norm": 1.9498729705810547, "learning_rate": 5.028447926172052e-06, "loss": 0.8816, "step": 11781 }, { "epoch": 0.68, "grad_norm": 1.829207420349121, "learning_rate": 5.026836233697725e-06, "loss": 0.9438, "step": 11782 }, { "epoch": 0.68, "grad_norm": 1.7565690279006958, "learning_rate": 5.025224712833385e-06, "loss": 0.937, "step": 11783 }, { "epoch": 0.68, "grad_norm": 1.7777293920516968, "learning_rate": 5.023613363634647e-06, "loss": 0.9255, "step": 11784 }, { "epoch": 0.68, "grad_norm": 1.8801199197769165, "learning_rate": 5.0220021861571064e-06, "loss": 0.9024, "step": 11785 }, { "epoch": 0.68, "grad_norm": 1.8420298099517822, "learning_rate": 5.0203911804563695e-06, "loss": 0.9135, "step": 11786 }, { "epoch": 0.68, "grad_norm": 1.6504629850387573, "learning_rate": 5.018780346588019e-06, "loss": 0.8379, "step": 11787 }, { "epoch": 0.68, "grad_norm": 1.661375880241394, "learning_rate": 5.017169684607644e-06, "loss": 0.95, "step": 11788 }, { "epoch": 0.68, "grad_norm": 1.8354562520980835, "learning_rate": 5.015559194570825e-06, "loss": 0.9028, "step": 11789 }, { "epoch": 0.68, "grad_norm": 1.717236876487732, "learning_rate": 5.013948876533138e-06, "loss": 0.898, "step": 11790 }, { "epoch": 0.68, "grad_norm": 1.7261273860931396, "learning_rate": 5.012338730550144e-06, "loss": 0.9238, "step": 11791 }, { "epoch": 0.68, "grad_norm": 1.855106234550476, "learning_rate": 5.01072875667741e-06, "loss": 0.9425, "step": 11792 }, { "epoch": 0.68, "grad_norm": 1.8412870168685913, "learning_rate": 5.009118954970488e-06, "loss": 0.9069, "step": 11793 }, { "epoch": 0.68, "grad_norm": 1.8281900882720947, "learning_rate": 5.007509325484932e-06, "loss": 0.8786, "step": 11794 }, { "epoch": 0.68, "grad_norm": 1.8697614669799805, "learning_rate": 5.005899868276279e-06, "loss": 0.8988, "step": 11795 }, { "epoch": 0.68, "grad_norm": 1.8560070991516113, "learning_rate": 5.004290583400076e-06, "loss": 0.9358, "step": 11796 }, { "epoch": 0.68, "grad_norm": 0.9895504117012024, "learning_rate": 5.002681470911846e-06, "loss": 0.5185, "step": 11797 }, { "epoch": 0.68, "grad_norm": 1.906506061553955, "learning_rate": 5.001072530867119e-06, "loss": 0.9455, "step": 11798 }, { "epoch": 0.68, "grad_norm": 1.9184057712554932, "learning_rate": 4.999463763321419e-06, "loss": 0.9266, "step": 11799 }, { "epoch": 0.68, "grad_norm": 2.2055280208587646, "learning_rate": 4.997855168330251e-06, "loss": 0.9154, "step": 11800 }, { "epoch": 0.68, "grad_norm": 1.797656774520874, "learning_rate": 4.996246745949133e-06, "loss": 0.9651, "step": 11801 }, { "epoch": 0.68, "grad_norm": 1.7430778741836548, "learning_rate": 4.994638496233558e-06, "loss": 0.8839, "step": 11802 }, { "epoch": 0.68, "grad_norm": 1.739729642868042, "learning_rate": 4.993030419239031e-06, "loss": 0.8473, "step": 11803 }, { "epoch": 0.68, "grad_norm": 1.0470771789550781, "learning_rate": 4.991422515021034e-06, "loss": 0.5337, "step": 11804 }, { "epoch": 0.68, "grad_norm": 1.750593662261963, "learning_rate": 4.989814783635059e-06, "loss": 0.8905, "step": 11805 }, { "epoch": 0.68, "grad_norm": 1.7154282331466675, "learning_rate": 4.988207225136577e-06, "loss": 0.8077, "step": 11806 }, { "epoch": 0.68, "grad_norm": 1.7973541021347046, "learning_rate": 4.986599839581065e-06, "loss": 0.8583, "step": 11807 }, { "epoch": 0.68, "grad_norm": 1.7965013980865479, "learning_rate": 4.9849926270239865e-06, "loss": 0.9084, "step": 11808 }, { "epoch": 0.68, "grad_norm": 1.6194044351577759, "learning_rate": 4.983385587520804e-06, "loss": 1.0169, "step": 11809 }, { "epoch": 0.68, "grad_norm": 1.6442198753356934, "learning_rate": 4.981778721126975e-06, "loss": 0.9304, "step": 11810 }, { "epoch": 0.68, "grad_norm": 1.8897299766540527, "learning_rate": 4.98017202789794e-06, "loss": 0.955, "step": 11811 }, { "epoch": 0.68, "grad_norm": 1.8244643211364746, "learning_rate": 4.97856550788915e-06, "loss": 0.8707, "step": 11812 }, { "epoch": 0.68, "grad_norm": 1.6431162357330322, "learning_rate": 4.976959161156034e-06, "loss": 0.9616, "step": 11813 }, { "epoch": 0.68, "grad_norm": 1.5817203521728516, "learning_rate": 4.975352987754031e-06, "loss": 0.9714, "step": 11814 }, { "epoch": 0.68, "grad_norm": 1.6769627332687378, "learning_rate": 4.973746987738555e-06, "loss": 0.8568, "step": 11815 }, { "epoch": 0.68, "grad_norm": 1.6666783094406128, "learning_rate": 4.972141161165035e-06, "loss": 0.878, "step": 11816 }, { "epoch": 0.68, "grad_norm": 1.6715227365493774, "learning_rate": 4.970535508088874e-06, "loss": 0.9001, "step": 11817 }, { "epoch": 0.68, "grad_norm": 1.8833880424499512, "learning_rate": 4.9689300285654886e-06, "loss": 0.9476, "step": 11818 }, { "epoch": 0.68, "grad_norm": 1.8556007146835327, "learning_rate": 4.9673247226502684e-06, "loss": 0.9316, "step": 11819 }, { "epoch": 0.68, "grad_norm": 1.6133644580841064, "learning_rate": 4.965719590398619e-06, "loss": 0.9271, "step": 11820 }, { "epoch": 0.68, "grad_norm": 1.8462836742401123, "learning_rate": 4.964114631865919e-06, "loss": 0.8824, "step": 11821 }, { "epoch": 0.68, "grad_norm": 1.7447646856307983, "learning_rate": 4.962509847107557e-06, "loss": 0.9709, "step": 11822 }, { "epoch": 0.68, "grad_norm": 1.5922834873199463, "learning_rate": 4.960905236178912e-06, "loss": 0.9975, "step": 11823 }, { "epoch": 0.68, "grad_norm": 1.7459512948989868, "learning_rate": 4.959300799135348e-06, "loss": 0.8725, "step": 11824 }, { "epoch": 0.68, "grad_norm": 1.6210883855819702, "learning_rate": 4.957696536032236e-06, "loss": 0.8809, "step": 11825 }, { "epoch": 0.68, "grad_norm": 1.6789504289627075, "learning_rate": 4.9560924469249276e-06, "loss": 0.8933, "step": 11826 }, { "epoch": 0.68, "grad_norm": 1.7001127004623413, "learning_rate": 4.954488531868783e-06, "loss": 0.8888, "step": 11827 }, { "epoch": 0.68, "grad_norm": 1.8419580459594727, "learning_rate": 4.9528847909191414e-06, "loss": 0.9265, "step": 11828 }, { "epoch": 0.68, "grad_norm": 1.6779720783233643, "learning_rate": 4.95128122413135e-06, "loss": 0.9137, "step": 11829 }, { "epoch": 0.68, "grad_norm": 1.0809009075164795, "learning_rate": 4.949677831560738e-06, "loss": 0.5939, "step": 11830 }, { "epoch": 0.68, "grad_norm": 0.9956091046333313, "learning_rate": 4.94807461326264e-06, "loss": 0.5585, "step": 11831 }, { "epoch": 0.68, "grad_norm": 1.6134732961654663, "learning_rate": 4.946471569292372e-06, "loss": 0.8912, "step": 11832 }, { "epoch": 0.68, "grad_norm": 1.7498338222503662, "learning_rate": 4.944868699705252e-06, "loss": 0.8553, "step": 11833 }, { "epoch": 0.68, "grad_norm": 1.0120846033096313, "learning_rate": 4.943266004556597e-06, "loss": 0.6111, "step": 11834 }, { "epoch": 0.68, "grad_norm": 1.8234264850616455, "learning_rate": 4.941663483901703e-06, "loss": 0.9485, "step": 11835 }, { "epoch": 0.68, "grad_norm": 1.8544602394104004, "learning_rate": 4.940061137795876e-06, "loss": 0.871, "step": 11836 }, { "epoch": 0.68, "grad_norm": 1.8312692642211914, "learning_rate": 4.9384589662944005e-06, "loss": 0.8945, "step": 11837 }, { "epoch": 0.68, "grad_norm": 1.806687593460083, "learning_rate": 4.936856969452572e-06, "loss": 0.9047, "step": 11838 }, { "epoch": 0.68, "grad_norm": 1.8743573427200317, "learning_rate": 4.935255147325661e-06, "loss": 0.8792, "step": 11839 }, { "epoch": 0.68, "grad_norm": 1.6780905723571777, "learning_rate": 4.933653499968952e-06, "loss": 0.878, "step": 11840 }, { "epoch": 0.68, "grad_norm": 1.8195533752441406, "learning_rate": 4.932052027437705e-06, "loss": 0.9269, "step": 11841 }, { "epoch": 0.68, "grad_norm": 1.683165431022644, "learning_rate": 4.930450729787188e-06, "loss": 0.9617, "step": 11842 }, { "epoch": 0.68, "grad_norm": 1.706084966659546, "learning_rate": 4.928849607072654e-06, "loss": 0.8759, "step": 11843 }, { "epoch": 0.68, "grad_norm": 1.837035059928894, "learning_rate": 4.927248659349355e-06, "loss": 1.0567, "step": 11844 }, { "epoch": 0.68, "grad_norm": 1.6546556949615479, "learning_rate": 4.9256478866725325e-06, "loss": 0.8836, "step": 11845 }, { "epoch": 0.68, "grad_norm": 1.7245676517486572, "learning_rate": 4.924047289097426e-06, "loss": 0.9767, "step": 11846 }, { "epoch": 0.68, "grad_norm": 1.8457307815551758, "learning_rate": 4.922446866679274e-06, "loss": 0.8356, "step": 11847 }, { "epoch": 0.68, "grad_norm": 1.672777771949768, "learning_rate": 4.920846619473292e-06, "loss": 0.9419, "step": 11848 }, { "epoch": 0.68, "grad_norm": 1.7533535957336426, "learning_rate": 4.919246547534709e-06, "loss": 0.8678, "step": 11849 }, { "epoch": 0.68, "grad_norm": 1.8325262069702148, "learning_rate": 4.917646650918731e-06, "loss": 0.9241, "step": 11850 }, { "epoch": 0.68, "grad_norm": 1.8409937620162964, "learning_rate": 4.9160469296805735e-06, "loss": 0.9361, "step": 11851 }, { "epoch": 0.68, "grad_norm": 1.6650915145874023, "learning_rate": 4.914447383875433e-06, "loss": 0.932, "step": 11852 }, { "epoch": 0.68, "grad_norm": 2.0347445011138916, "learning_rate": 4.912848013558509e-06, "loss": 0.9281, "step": 11853 }, { "epoch": 0.68, "grad_norm": 2.161604881286621, "learning_rate": 4.911248818784984e-06, "loss": 0.9575, "step": 11854 }, { "epoch": 0.68, "grad_norm": 1.7821305990219116, "learning_rate": 4.909649799610054e-06, "loss": 0.9008, "step": 11855 }, { "epoch": 0.68, "grad_norm": 1.8175138235092163, "learning_rate": 4.908050956088886e-06, "loss": 0.9265, "step": 11856 }, { "epoch": 0.68, "grad_norm": 1.9369957447052002, "learning_rate": 4.90645228827666e-06, "loss": 0.926, "step": 11857 }, { "epoch": 0.68, "grad_norm": 1.6925907135009766, "learning_rate": 4.904853796228534e-06, "loss": 0.9258, "step": 11858 }, { "epoch": 0.68, "grad_norm": 1.7461246252059937, "learning_rate": 4.9032554799996735e-06, "loss": 0.8678, "step": 11859 }, { "epoch": 0.68, "grad_norm": 1.863371729850769, "learning_rate": 4.901657339645226e-06, "loss": 0.9126, "step": 11860 }, { "epoch": 0.68, "grad_norm": 1.7510188817977905, "learning_rate": 4.900059375220345e-06, "loss": 0.8835, "step": 11861 }, { "epoch": 0.68, "grad_norm": 1.6668410301208496, "learning_rate": 4.8984615867801664e-06, "loss": 0.9261, "step": 11862 }, { "epoch": 0.68, "grad_norm": 1.7928352355957031, "learning_rate": 4.896863974379828e-06, "loss": 0.9124, "step": 11863 }, { "epoch": 0.68, "grad_norm": 1.6541231870651245, "learning_rate": 4.895266538074461e-06, "loss": 0.8353, "step": 11864 }, { "epoch": 0.68, "grad_norm": 2.0209603309631348, "learning_rate": 4.893669277919184e-06, "loss": 0.9066, "step": 11865 }, { "epoch": 0.68, "grad_norm": 1.8958548307418823, "learning_rate": 4.892072193969119e-06, "loss": 0.9218, "step": 11866 }, { "epoch": 0.68, "grad_norm": 1.8142685890197754, "learning_rate": 4.8904752862793705e-06, "loss": 0.9999, "step": 11867 }, { "epoch": 0.68, "grad_norm": 1.7322319746017456, "learning_rate": 4.888878554905051e-06, "loss": 0.9048, "step": 11868 }, { "epoch": 0.68, "grad_norm": 1.686623215675354, "learning_rate": 4.887281999901253e-06, "loss": 0.9522, "step": 11869 }, { "epoch": 0.68, "grad_norm": 1.7443772554397583, "learning_rate": 4.885685621323073e-06, "loss": 0.9411, "step": 11870 }, { "epoch": 0.68, "grad_norm": 1.0071076154708862, "learning_rate": 4.884089419225593e-06, "loss": 0.5505, "step": 11871 }, { "epoch": 0.68, "grad_norm": 1.9278382062911987, "learning_rate": 4.8824933936639e-06, "loss": 0.9589, "step": 11872 }, { "epoch": 0.68, "grad_norm": 1.830845832824707, "learning_rate": 4.880897544693061e-06, "loss": 0.9267, "step": 11873 }, { "epoch": 0.68, "grad_norm": 1.7285884618759155, "learning_rate": 4.879301872368152e-06, "loss": 0.9682, "step": 11874 }, { "epoch": 0.68, "grad_norm": 1.5627127885818481, "learning_rate": 4.877706376744227e-06, "loss": 0.9164, "step": 11875 }, { "epoch": 0.68, "grad_norm": 1.8125560283660889, "learning_rate": 4.8761110578763475e-06, "loss": 0.9179, "step": 11876 }, { "epoch": 0.68, "grad_norm": 1.0074329376220703, "learning_rate": 4.874515915819565e-06, "loss": 0.5481, "step": 11877 }, { "epoch": 0.68, "grad_norm": 1.965358018875122, "learning_rate": 4.872920950628918e-06, "loss": 0.9204, "step": 11878 }, { "epoch": 0.68, "grad_norm": 1.6264110803604126, "learning_rate": 4.8713261623594495e-06, "loss": 0.895, "step": 11879 }, { "epoch": 0.68, "grad_norm": 1.6801484823226929, "learning_rate": 4.869731551066185e-06, "loss": 0.9491, "step": 11880 }, { "epoch": 0.68, "grad_norm": 0.9870801568031311, "learning_rate": 4.86813711680416e-06, "loss": 0.5671, "step": 11881 }, { "epoch": 0.68, "grad_norm": 1.9008806943893433, "learning_rate": 4.866542859628383e-06, "loss": 0.9284, "step": 11882 }, { "epoch": 0.68, "grad_norm": 1.8084572553634644, "learning_rate": 4.864948779593874e-06, "loss": 0.9221, "step": 11883 }, { "epoch": 0.68, "grad_norm": 1.7177457809448242, "learning_rate": 4.863354876755637e-06, "loss": 0.9533, "step": 11884 }, { "epoch": 0.68, "grad_norm": 1.9433872699737549, "learning_rate": 4.861761151168678e-06, "loss": 0.9925, "step": 11885 }, { "epoch": 0.68, "grad_norm": 1.7670843601226807, "learning_rate": 4.860167602887984e-06, "loss": 0.942, "step": 11886 }, { "epoch": 0.68, "grad_norm": 1.7366704940795898, "learning_rate": 4.858574231968548e-06, "loss": 0.9313, "step": 11887 }, { "epoch": 0.68, "grad_norm": 1.809315800666809, "learning_rate": 4.8569810384653585e-06, "loss": 0.8818, "step": 11888 }, { "epoch": 0.68, "grad_norm": 1.8434861898422241, "learning_rate": 4.855388022433383e-06, "loss": 0.9349, "step": 11889 }, { "epoch": 0.68, "grad_norm": 1.8136955499649048, "learning_rate": 4.8537951839275985e-06, "loss": 0.8954, "step": 11890 }, { "epoch": 0.68, "grad_norm": 1.6073514223098755, "learning_rate": 4.852202523002964e-06, "loss": 0.8456, "step": 11891 }, { "epoch": 0.68, "grad_norm": 1.6485495567321777, "learning_rate": 4.850610039714444e-06, "loss": 0.8092, "step": 11892 }, { "epoch": 0.68, "grad_norm": 1.8869320154190063, "learning_rate": 4.849017734116984e-06, "loss": 1.0245, "step": 11893 }, { "epoch": 0.68, "grad_norm": 1.7046549320220947, "learning_rate": 4.847425606265537e-06, "loss": 0.9873, "step": 11894 }, { "epoch": 0.68, "grad_norm": 1.7491726875305176, "learning_rate": 4.845833656215034e-06, "loss": 0.8055, "step": 11895 }, { "epoch": 0.68, "grad_norm": 1.614396572113037, "learning_rate": 4.8442418840204195e-06, "loss": 0.9741, "step": 11896 }, { "epoch": 0.68, "grad_norm": 1.599818229675293, "learning_rate": 4.842650289736611e-06, "loss": 0.9123, "step": 11897 }, { "epoch": 0.68, "grad_norm": 1.7163891792297363, "learning_rate": 4.84105887341854e-06, "loss": 0.9908, "step": 11898 }, { "epoch": 0.68, "grad_norm": 1.6395829916000366, "learning_rate": 4.83946763512111e-06, "loss": 0.9483, "step": 11899 }, { "epoch": 0.68, "grad_norm": 1.7141355276107788, "learning_rate": 4.837876574899237e-06, "loss": 0.9483, "step": 11900 }, { "epoch": 0.68, "grad_norm": 1.8705010414123535, "learning_rate": 4.836285692807828e-06, "loss": 0.9262, "step": 11901 }, { "epoch": 0.68, "grad_norm": 1.6290096044540405, "learning_rate": 4.834694988901772e-06, "loss": 0.943, "step": 11902 }, { "epoch": 0.68, "grad_norm": 1.8338147401809692, "learning_rate": 4.833104463235967e-06, "loss": 0.9686, "step": 11903 }, { "epoch": 0.68, "grad_norm": 1.906256914138794, "learning_rate": 4.83151411586529e-06, "loss": 0.9605, "step": 11904 }, { "epoch": 0.68, "grad_norm": 1.6055206060409546, "learning_rate": 4.829923946844627e-06, "loss": 0.91, "step": 11905 }, { "epoch": 0.68, "grad_norm": 1.6356450319290161, "learning_rate": 4.828333956228842e-06, "loss": 0.9496, "step": 11906 }, { "epoch": 0.68, "grad_norm": 1.76207435131073, "learning_rate": 4.82674414407281e-06, "loss": 0.9235, "step": 11907 }, { "epoch": 0.68, "grad_norm": 2.0050086975097656, "learning_rate": 4.825154510431383e-06, "loss": 1.0014, "step": 11908 }, { "epoch": 0.68, "grad_norm": 1.8124254941940308, "learning_rate": 4.823565055359423e-06, "loss": 0.9586, "step": 11909 }, { "epoch": 0.68, "grad_norm": 1.6688618659973145, "learning_rate": 4.821975778911768e-06, "loss": 0.9545, "step": 11910 }, { "epoch": 0.68, "grad_norm": 1.654943823814392, "learning_rate": 4.82038668114327e-06, "loss": 0.9573, "step": 11911 }, { "epoch": 0.68, "grad_norm": 1.8712985515594482, "learning_rate": 4.818797762108754e-06, "loss": 0.9806, "step": 11912 }, { "epoch": 0.68, "grad_norm": 1.8634648323059082, "learning_rate": 4.817209021863054e-06, "loss": 0.9103, "step": 11913 }, { "epoch": 0.68, "grad_norm": 1.8785457611083984, "learning_rate": 4.815620460460997e-06, "loss": 0.8856, "step": 11914 }, { "epoch": 0.68, "grad_norm": 1.6534260511398315, "learning_rate": 4.814032077957392e-06, "loss": 0.9501, "step": 11915 }, { "epoch": 0.68, "grad_norm": 1.777207612991333, "learning_rate": 4.812443874407059e-06, "loss": 0.883, "step": 11916 }, { "epoch": 0.68, "grad_norm": 2.2961912155151367, "learning_rate": 4.810855849864792e-06, "loss": 0.8819, "step": 11917 }, { "epoch": 0.68, "grad_norm": 1.073600172996521, "learning_rate": 4.809268004385398e-06, "loss": 0.5544, "step": 11918 }, { "epoch": 0.68, "grad_norm": 1.741358757019043, "learning_rate": 4.807680338023661e-06, "loss": 0.9771, "step": 11919 }, { "epoch": 0.68, "grad_norm": 1.7747488021850586, "learning_rate": 4.806092850834373e-06, "loss": 0.9182, "step": 11920 }, { "epoch": 0.68, "grad_norm": 1.7467007637023926, "learning_rate": 4.804505542872311e-06, "loss": 0.8902, "step": 11921 }, { "epoch": 0.68, "grad_norm": 1.837347149848938, "learning_rate": 4.802918414192254e-06, "loss": 0.9105, "step": 11922 }, { "epoch": 0.68, "grad_norm": 1.7731757164001465, "learning_rate": 4.801331464848961e-06, "loss": 0.9517, "step": 11923 }, { "epoch": 0.68, "grad_norm": 2.208500862121582, "learning_rate": 4.7997446948972015e-06, "loss": 0.9847, "step": 11924 }, { "epoch": 0.68, "grad_norm": 1.6921570301055908, "learning_rate": 4.798158104391721e-06, "loss": 0.8937, "step": 11925 }, { "epoch": 0.68, "grad_norm": 1.8473275899887085, "learning_rate": 4.796571693387278e-06, "loss": 1.0081, "step": 11926 }, { "epoch": 0.68, "grad_norm": 1.70350980758667, "learning_rate": 4.7949854619386086e-06, "loss": 0.9556, "step": 11927 }, { "epoch": 0.68, "grad_norm": 1.8426882028579712, "learning_rate": 4.793399410100453e-06, "loss": 0.8594, "step": 11928 }, { "epoch": 0.68, "grad_norm": 1.7434173822402954, "learning_rate": 4.791813537927537e-06, "loss": 0.9984, "step": 11929 }, { "epoch": 0.68, "grad_norm": 1.97019362449646, "learning_rate": 4.790227845474588e-06, "loss": 0.9194, "step": 11930 }, { "epoch": 0.68, "grad_norm": 1.6433249711990356, "learning_rate": 4.788642332796325e-06, "loss": 0.9674, "step": 11931 }, { "epoch": 0.68, "grad_norm": 1.9288480281829834, "learning_rate": 4.7870569999474545e-06, "loss": 0.9144, "step": 11932 }, { "epoch": 0.68, "grad_norm": 1.6517329216003418, "learning_rate": 4.78547184698269e-06, "loss": 0.853, "step": 11933 }, { "epoch": 0.68, "grad_norm": 1.876889705657959, "learning_rate": 4.783886873956721e-06, "loss": 0.939, "step": 11934 }, { "epoch": 0.68, "grad_norm": 1.5641628503799438, "learning_rate": 4.78230208092425e-06, "loss": 0.9281, "step": 11935 }, { "epoch": 0.68, "grad_norm": 1.9833928346633911, "learning_rate": 4.780717467939955e-06, "loss": 0.9072, "step": 11936 }, { "epoch": 0.68, "grad_norm": 1.7088240385055542, "learning_rate": 4.779133035058524e-06, "loss": 0.9, "step": 11937 }, { "epoch": 0.68, "grad_norm": 1.7135251760482788, "learning_rate": 4.777548782334626e-06, "loss": 0.8758, "step": 11938 }, { "epoch": 0.68, "grad_norm": 1.8125405311584473, "learning_rate": 4.7759647098229335e-06, "loss": 0.9986, "step": 11939 }, { "epoch": 0.68, "grad_norm": 1.6916090250015259, "learning_rate": 4.774380817578101e-06, "loss": 0.9156, "step": 11940 }, { "epoch": 0.68, "grad_norm": 1.758058786392212, "learning_rate": 4.7727971056547915e-06, "loss": 0.958, "step": 11941 }, { "epoch": 0.68, "grad_norm": 1.90669584274292, "learning_rate": 4.771213574107656e-06, "loss": 0.8799, "step": 11942 }, { "epoch": 0.68, "grad_norm": 1.7429393529891968, "learning_rate": 4.769630222991329e-06, "loss": 0.8746, "step": 11943 }, { "epoch": 0.69, "grad_norm": 1.8275842666625977, "learning_rate": 4.768047052360457e-06, "loss": 0.9892, "step": 11944 }, { "epoch": 0.69, "grad_norm": 1.6740633249282837, "learning_rate": 4.7664640622696626e-06, "loss": 0.9438, "step": 11945 }, { "epoch": 0.69, "grad_norm": 2.126016139984131, "learning_rate": 4.7648812527735775e-06, "loss": 0.9547, "step": 11946 }, { "epoch": 0.69, "grad_norm": 1.7013310194015503, "learning_rate": 4.7632986239268145e-06, "loss": 0.8546, "step": 11947 }, { "epoch": 0.69, "grad_norm": 1.70201575756073, "learning_rate": 4.7617161757839895e-06, "loss": 0.9254, "step": 11948 }, { "epoch": 0.69, "grad_norm": 1.6803851127624512, "learning_rate": 4.760133908399705e-06, "loss": 0.8593, "step": 11949 }, { "epoch": 0.69, "grad_norm": 1.8167961835861206, "learning_rate": 4.758551821828564e-06, "loss": 0.9036, "step": 11950 }, { "epoch": 0.69, "grad_norm": 1.7445735931396484, "learning_rate": 4.756969916125155e-06, "loss": 0.9293, "step": 11951 }, { "epoch": 0.69, "grad_norm": 1.948959231376648, "learning_rate": 4.755388191344073e-06, "loss": 0.9645, "step": 11952 }, { "epoch": 0.69, "grad_norm": 1.778679609298706, "learning_rate": 4.7538066475398905e-06, "loss": 0.8778, "step": 11953 }, { "epoch": 0.69, "grad_norm": 1.7203950881958008, "learning_rate": 4.752225284767185e-06, "loss": 0.8853, "step": 11954 }, { "epoch": 0.69, "grad_norm": 1.8872040510177612, "learning_rate": 4.750644103080529e-06, "loss": 0.9498, "step": 11955 }, { "epoch": 0.69, "grad_norm": 1.8887821435928345, "learning_rate": 4.7490631025344805e-06, "loss": 0.9093, "step": 11956 }, { "epoch": 0.69, "grad_norm": 1.7111982107162476, "learning_rate": 4.747482283183598e-06, "loss": 0.8984, "step": 11957 }, { "epoch": 0.69, "grad_norm": 1.802860975265503, "learning_rate": 4.745901645082426e-06, "loss": 0.952, "step": 11958 }, { "epoch": 0.69, "grad_norm": 1.728653907775879, "learning_rate": 4.744321188285516e-06, "loss": 0.8727, "step": 11959 }, { "epoch": 0.69, "grad_norm": 1.8139851093292236, "learning_rate": 4.742740912847397e-06, "loss": 0.9184, "step": 11960 }, { "epoch": 0.69, "grad_norm": 1.8228660821914673, "learning_rate": 4.741160818822607e-06, "loss": 0.9789, "step": 11961 }, { "epoch": 0.69, "grad_norm": 1.754248023033142, "learning_rate": 4.739580906265663e-06, "loss": 0.8698, "step": 11962 }, { "epoch": 0.69, "grad_norm": 1.7690085172653198, "learning_rate": 4.738001175231091e-06, "loss": 0.9068, "step": 11963 }, { "epoch": 0.69, "grad_norm": 1.0565402507781982, "learning_rate": 4.736421625773396e-06, "loss": 0.5449, "step": 11964 }, { "epoch": 0.69, "grad_norm": 1.7604234218597412, "learning_rate": 4.734842257947089e-06, "loss": 0.8692, "step": 11965 }, { "epoch": 0.69, "grad_norm": 1.6503328084945679, "learning_rate": 4.73326307180667e-06, "loss": 0.9616, "step": 11966 }, { "epoch": 0.69, "grad_norm": 1.69640052318573, "learning_rate": 4.731684067406628e-06, "loss": 0.9819, "step": 11967 }, { "epoch": 0.69, "grad_norm": 1.7732524871826172, "learning_rate": 4.730105244801455e-06, "loss": 0.931, "step": 11968 }, { "epoch": 0.69, "grad_norm": 1.6727626323699951, "learning_rate": 4.7285266040456255e-06, "loss": 0.8658, "step": 11969 }, { "epoch": 0.69, "grad_norm": 1.7181884050369263, "learning_rate": 4.726948145193622e-06, "loss": 0.931, "step": 11970 }, { "epoch": 0.69, "grad_norm": 2.002288818359375, "learning_rate": 4.725369868299904e-06, "loss": 0.8838, "step": 11971 }, { "epoch": 0.69, "grad_norm": 1.8661658763885498, "learning_rate": 4.723791773418942e-06, "loss": 0.9205, "step": 11972 }, { "epoch": 0.69, "grad_norm": 1.8935105800628662, "learning_rate": 4.722213860605184e-06, "loss": 1.0292, "step": 11973 }, { "epoch": 0.69, "grad_norm": 1.77283775806427, "learning_rate": 4.720636129913086e-06, "loss": 0.8934, "step": 11974 }, { "epoch": 0.69, "grad_norm": 1.0479719638824463, "learning_rate": 4.719058581397084e-06, "loss": 0.6187, "step": 11975 }, { "epoch": 0.69, "grad_norm": 1.8851581811904907, "learning_rate": 4.717481215111622e-06, "loss": 0.9473, "step": 11976 }, { "epoch": 0.69, "grad_norm": 1.7985254526138306, "learning_rate": 4.715904031111124e-06, "loss": 0.9009, "step": 11977 }, { "epoch": 0.69, "grad_norm": 1.1421436071395874, "learning_rate": 4.714327029450016e-06, "loss": 0.5794, "step": 11978 }, { "epoch": 0.69, "grad_norm": 1.9085428714752197, "learning_rate": 4.712750210182724e-06, "loss": 0.8492, "step": 11979 }, { "epoch": 0.69, "grad_norm": 1.741111159324646, "learning_rate": 4.711173573363647e-06, "loss": 0.8585, "step": 11980 }, { "epoch": 0.69, "grad_norm": 1.8865671157836914, "learning_rate": 4.7095971190472e-06, "loss": 0.939, "step": 11981 }, { "epoch": 0.69, "grad_norm": 1.9194602966308594, "learning_rate": 4.708020847287776e-06, "loss": 0.8701, "step": 11982 }, { "epoch": 0.69, "grad_norm": 1.7661586999893188, "learning_rate": 4.706444758139772e-06, "loss": 0.9776, "step": 11983 }, { "epoch": 0.69, "grad_norm": 1.8294540643692017, "learning_rate": 4.704868851657569e-06, "loss": 0.8541, "step": 11984 }, { "epoch": 0.69, "grad_norm": 1.6922866106033325, "learning_rate": 4.703293127895555e-06, "loss": 0.9931, "step": 11985 }, { "epoch": 0.69, "grad_norm": 1.7151439189910889, "learning_rate": 4.701717586908091e-06, "loss": 0.9293, "step": 11986 }, { "epoch": 0.69, "grad_norm": 1.8376166820526123, "learning_rate": 4.700142228749561e-06, "loss": 0.9037, "step": 11987 }, { "epoch": 0.69, "grad_norm": 1.656103491783142, "learning_rate": 4.698567053474316e-06, "loss": 0.8858, "step": 11988 }, { "epoch": 0.69, "grad_norm": 1.6496952772140503, "learning_rate": 4.6969920611367145e-06, "loss": 0.8841, "step": 11989 }, { "epoch": 0.69, "grad_norm": 1.8571563959121704, "learning_rate": 4.6954172517911e-06, "loss": 0.8724, "step": 11990 }, { "epoch": 0.69, "grad_norm": 1.7825052738189697, "learning_rate": 4.6938426254918235e-06, "loss": 0.9618, "step": 11991 }, { "epoch": 0.69, "grad_norm": 1.9085901975631714, "learning_rate": 4.692268182293211e-06, "loss": 0.9337, "step": 11992 }, { "epoch": 0.69, "grad_norm": 1.733119010925293, "learning_rate": 4.690693922249601e-06, "loss": 0.9527, "step": 11993 }, { "epoch": 0.69, "grad_norm": 1.0514566898345947, "learning_rate": 4.689119845415308e-06, "loss": 0.5433, "step": 11994 }, { "epoch": 0.69, "grad_norm": 2.064592123031616, "learning_rate": 4.687545951844656e-06, "loss": 0.9177, "step": 11995 }, { "epoch": 0.69, "grad_norm": 1.7091907262802124, "learning_rate": 4.685972241591956e-06, "loss": 0.9259, "step": 11996 }, { "epoch": 0.69, "grad_norm": 1.891315221786499, "learning_rate": 4.684398714711507e-06, "loss": 0.907, "step": 11997 }, { "epoch": 0.69, "grad_norm": 1.800525426864624, "learning_rate": 4.6828253712576125e-06, "loss": 0.8894, "step": 11998 }, { "epoch": 0.69, "grad_norm": 1.640904426574707, "learning_rate": 4.681252211284557e-06, "loss": 0.8976, "step": 11999 }, { "epoch": 0.69, "grad_norm": 1.7103461027145386, "learning_rate": 4.679679234846636e-06, "loss": 0.8161, "step": 12000 }, { "epoch": 0.69, "grad_norm": 1.7877203226089478, "learning_rate": 4.678106441998118e-06, "loss": 0.9406, "step": 12001 }, { "epoch": 0.69, "grad_norm": 1.7582924365997314, "learning_rate": 4.676533832793284e-06, "loss": 0.9277, "step": 12002 }, { "epoch": 0.69, "grad_norm": 1.628161072731018, "learning_rate": 4.674961407286393e-06, "loss": 0.9072, "step": 12003 }, { "epoch": 0.69, "grad_norm": 1.6190695762634277, "learning_rate": 4.673389165531714e-06, "loss": 0.8787, "step": 12004 }, { "epoch": 0.69, "grad_norm": 1.80056893825531, "learning_rate": 4.6718171075834916e-06, "loss": 0.9068, "step": 12005 }, { "epoch": 0.69, "grad_norm": 2.0112545490264893, "learning_rate": 4.67024523349598e-06, "loss": 0.9393, "step": 12006 }, { "epoch": 0.69, "grad_norm": 1.7300282716751099, "learning_rate": 4.668673543323414e-06, "loss": 0.948, "step": 12007 }, { "epoch": 0.69, "grad_norm": 1.6679729223251343, "learning_rate": 4.6671020371200324e-06, "loss": 0.9344, "step": 12008 }, { "epoch": 0.69, "grad_norm": 1.790862798690796, "learning_rate": 4.665530714940067e-06, "loss": 0.9331, "step": 12009 }, { "epoch": 0.69, "grad_norm": 1.7741063833236694, "learning_rate": 4.663959576837729e-06, "loss": 0.9388, "step": 12010 }, { "epoch": 0.69, "grad_norm": 0.9999828934669495, "learning_rate": 4.662388622867246e-06, "loss": 0.582, "step": 12011 }, { "epoch": 0.69, "grad_norm": 1.6702301502227783, "learning_rate": 4.6608178530828176e-06, "loss": 1.0275, "step": 12012 }, { "epoch": 0.69, "grad_norm": 1.947562575340271, "learning_rate": 4.6592472675386535e-06, "loss": 0.9604, "step": 12013 }, { "epoch": 0.69, "grad_norm": 1.6805510520935059, "learning_rate": 4.657676866288945e-06, "loss": 0.9143, "step": 12014 }, { "epoch": 0.69, "grad_norm": 1.771214246749878, "learning_rate": 4.656106649387887e-06, "loss": 0.8037, "step": 12015 }, { "epoch": 0.69, "grad_norm": 1.7808375358581543, "learning_rate": 4.654536616889658e-06, "loss": 0.9087, "step": 12016 }, { "epoch": 0.69, "grad_norm": 1.641385555267334, "learning_rate": 4.652966768848442e-06, "loss": 0.7922, "step": 12017 }, { "epoch": 0.69, "grad_norm": 1.814820408821106, "learning_rate": 4.651397105318402e-06, "loss": 0.8584, "step": 12018 }, { "epoch": 0.69, "grad_norm": 1.7147387266159058, "learning_rate": 4.649827626353709e-06, "loss": 0.9931, "step": 12019 }, { "epoch": 0.69, "grad_norm": 1.6887844800949097, "learning_rate": 4.648258332008523e-06, "loss": 0.9519, "step": 12020 }, { "epoch": 0.69, "grad_norm": 1.7287869453430176, "learning_rate": 4.646689222336988e-06, "loss": 0.8996, "step": 12021 }, { "epoch": 0.69, "grad_norm": 1.738333821296692, "learning_rate": 4.645120297393257e-06, "loss": 0.8911, "step": 12022 }, { "epoch": 0.69, "grad_norm": 1.7822545766830444, "learning_rate": 4.643551557231464e-06, "loss": 0.9667, "step": 12023 }, { "epoch": 0.69, "grad_norm": 2.0141637325286865, "learning_rate": 4.641983001905747e-06, "loss": 1.0143, "step": 12024 }, { "epoch": 0.69, "grad_norm": 1.9001177549362183, "learning_rate": 4.640414631470226e-06, "loss": 1.0146, "step": 12025 }, { "epoch": 0.69, "grad_norm": 1.7878799438476562, "learning_rate": 4.638846445979028e-06, "loss": 0.9018, "step": 12026 }, { "epoch": 0.69, "grad_norm": 1.6512539386749268, "learning_rate": 4.6372784454862605e-06, "loss": 0.9063, "step": 12027 }, { "epoch": 0.69, "grad_norm": 1.748706340789795, "learning_rate": 4.635710630046037e-06, "loss": 0.903, "step": 12028 }, { "epoch": 0.69, "grad_norm": 1.6269668340682983, "learning_rate": 4.634142999712451e-06, "loss": 0.9733, "step": 12029 }, { "epoch": 0.69, "grad_norm": 1.6721644401550293, "learning_rate": 4.632575554539605e-06, "loss": 0.9289, "step": 12030 }, { "epoch": 0.69, "grad_norm": 1.604628086090088, "learning_rate": 4.6310082945815805e-06, "loss": 0.8758, "step": 12031 }, { "epoch": 0.69, "grad_norm": 1.8042831420898438, "learning_rate": 4.6294412198924625e-06, "loss": 0.8787, "step": 12032 }, { "epoch": 0.69, "grad_norm": 1.7345798015594482, "learning_rate": 4.627874330526328e-06, "loss": 0.8784, "step": 12033 }, { "epoch": 0.69, "grad_norm": 1.7797752618789673, "learning_rate": 4.626307626537241e-06, "loss": 0.9824, "step": 12034 }, { "epoch": 0.69, "grad_norm": 1.7025492191314697, "learning_rate": 4.62474110797927e-06, "loss": 0.9857, "step": 12035 }, { "epoch": 0.69, "grad_norm": 1.6464751958847046, "learning_rate": 4.623174774906464e-06, "loss": 0.8602, "step": 12036 }, { "epoch": 0.69, "grad_norm": 1.8642346858978271, "learning_rate": 4.621608627372883e-06, "loss": 0.9015, "step": 12037 }, { "epoch": 0.69, "grad_norm": 1.6643532514572144, "learning_rate": 4.620042665432559e-06, "loss": 0.8842, "step": 12038 }, { "epoch": 0.69, "grad_norm": 1.704205870628357, "learning_rate": 4.618476889139538e-06, "loss": 0.8622, "step": 12039 }, { "epoch": 0.69, "grad_norm": 1.8148225545883179, "learning_rate": 4.616911298547845e-06, "loss": 0.9606, "step": 12040 }, { "epoch": 0.69, "grad_norm": 1.9357045888900757, "learning_rate": 4.615345893711508e-06, "loss": 0.9485, "step": 12041 }, { "epoch": 0.69, "grad_norm": 1.62363862991333, "learning_rate": 4.61378067468454e-06, "loss": 0.9628, "step": 12042 }, { "epoch": 0.69, "grad_norm": 1.8985458612442017, "learning_rate": 4.612215641520957e-06, "loss": 0.9177, "step": 12043 }, { "epoch": 0.69, "grad_norm": 1.9085721969604492, "learning_rate": 4.6106507942747595e-06, "loss": 0.9103, "step": 12044 }, { "epoch": 0.69, "grad_norm": 1.8194293975830078, "learning_rate": 4.609086132999949e-06, "loss": 0.966, "step": 12045 }, { "epoch": 0.69, "grad_norm": 1.7097723484039307, "learning_rate": 4.60752165775052e-06, "loss": 0.9256, "step": 12046 }, { "epoch": 0.69, "grad_norm": 1.81070876121521, "learning_rate": 4.605957368580453e-06, "loss": 0.8806, "step": 12047 }, { "epoch": 0.69, "grad_norm": 1.984039306640625, "learning_rate": 4.6043932655437316e-06, "loss": 0.9777, "step": 12048 }, { "epoch": 0.69, "grad_norm": 1.7470107078552246, "learning_rate": 4.6028293486943234e-06, "loss": 0.8602, "step": 12049 }, { "epoch": 0.69, "grad_norm": 1.6861248016357422, "learning_rate": 4.6012656180862024e-06, "loss": 0.9372, "step": 12050 }, { "epoch": 0.69, "grad_norm": 1.8057801723480225, "learning_rate": 4.59970207377332e-06, "loss": 0.9022, "step": 12051 }, { "epoch": 0.69, "grad_norm": 1.058417558670044, "learning_rate": 4.598138715809634e-06, "loss": 0.5787, "step": 12052 }, { "epoch": 0.69, "grad_norm": 1.7225613594055176, "learning_rate": 4.596575544249091e-06, "loss": 0.9108, "step": 12053 }, { "epoch": 0.69, "grad_norm": 1.819343090057373, "learning_rate": 4.595012559145636e-06, "loss": 0.8622, "step": 12054 }, { "epoch": 0.69, "grad_norm": 1.8904342651367188, "learning_rate": 4.5934497605531955e-06, "loss": 0.9406, "step": 12055 }, { "epoch": 0.69, "grad_norm": 1.7186291217803955, "learning_rate": 4.5918871485257055e-06, "loss": 0.9006, "step": 12056 }, { "epoch": 0.69, "grad_norm": 1.7375426292419434, "learning_rate": 4.5903247231170785e-06, "loss": 0.9171, "step": 12057 }, { "epoch": 0.69, "grad_norm": 1.8099197149276733, "learning_rate": 4.588762484381238e-06, "loss": 0.9935, "step": 12058 }, { "epoch": 0.69, "grad_norm": 1.7822587490081787, "learning_rate": 4.587200432372085e-06, "loss": 0.8809, "step": 12059 }, { "epoch": 0.69, "grad_norm": 1.7568249702453613, "learning_rate": 4.5856385671435285e-06, "loss": 0.9707, "step": 12060 }, { "epoch": 0.69, "grad_norm": 2.298645257949829, "learning_rate": 4.584076888749458e-06, "loss": 0.9125, "step": 12061 }, { "epoch": 0.69, "grad_norm": 1.9083985090255737, "learning_rate": 4.582515397243764e-06, "loss": 0.9895, "step": 12062 }, { "epoch": 0.69, "grad_norm": 1.7404065132141113, "learning_rate": 4.580954092680334e-06, "loss": 0.9085, "step": 12063 }, { "epoch": 0.69, "grad_norm": 1.6660234928131104, "learning_rate": 4.5793929751130384e-06, "loss": 0.8512, "step": 12064 }, { "epoch": 0.69, "grad_norm": 1.8452751636505127, "learning_rate": 4.577832044595752e-06, "loss": 0.8905, "step": 12065 }, { "epoch": 0.69, "grad_norm": 1.0838899612426758, "learning_rate": 4.576271301182332e-06, "loss": 0.5105, "step": 12066 }, { "epoch": 0.69, "grad_norm": 1.6553512811660767, "learning_rate": 4.574710744926643e-06, "loss": 0.914, "step": 12067 }, { "epoch": 0.69, "grad_norm": 1.6799341440200806, "learning_rate": 4.573150375882527e-06, "loss": 0.8506, "step": 12068 }, { "epoch": 0.69, "grad_norm": 1.718163251876831, "learning_rate": 4.571590194103836e-06, "loss": 1.0113, "step": 12069 }, { "epoch": 0.69, "grad_norm": 1.6394116878509521, "learning_rate": 4.570030199644401e-06, "loss": 0.9512, "step": 12070 }, { "epoch": 0.69, "grad_norm": 1.8188966512680054, "learning_rate": 4.568470392558059e-06, "loss": 1.0151, "step": 12071 }, { "epoch": 0.69, "grad_norm": 1.1250067949295044, "learning_rate": 4.566910772898627e-06, "loss": 0.5999, "step": 12072 }, { "epoch": 0.69, "grad_norm": 1.6914042234420776, "learning_rate": 4.565351340719928e-06, "loss": 0.8688, "step": 12073 }, { "epoch": 0.69, "grad_norm": 1.8462719917297363, "learning_rate": 4.563792096075777e-06, "loss": 0.9154, "step": 12074 }, { "epoch": 0.69, "grad_norm": 1.6726685762405396, "learning_rate": 4.562233039019971e-06, "loss": 0.9769, "step": 12075 }, { "epoch": 0.69, "grad_norm": 1.6645078659057617, "learning_rate": 4.560674169606317e-06, "loss": 0.9177, "step": 12076 }, { "epoch": 0.69, "grad_norm": 1.757951021194458, "learning_rate": 4.5591154878886e-06, "loss": 0.9182, "step": 12077 }, { "epoch": 0.69, "grad_norm": 1.641312837600708, "learning_rate": 4.5575569939206125e-06, "loss": 0.8785, "step": 12078 }, { "epoch": 0.69, "grad_norm": 1.945239782333374, "learning_rate": 4.555998687756127e-06, "loss": 0.8914, "step": 12079 }, { "epoch": 0.69, "grad_norm": 1.5375421047210693, "learning_rate": 4.5544405694489224e-06, "loss": 0.8649, "step": 12080 }, { "epoch": 0.69, "grad_norm": 1.819831371307373, "learning_rate": 4.5528826390527594e-06, "loss": 0.9037, "step": 12081 }, { "epoch": 0.69, "grad_norm": 1.689393401145935, "learning_rate": 4.551324896621403e-06, "loss": 0.8749, "step": 12082 }, { "epoch": 0.69, "grad_norm": 1.8362040519714355, "learning_rate": 4.549767342208602e-06, "loss": 0.9851, "step": 12083 }, { "epoch": 0.69, "grad_norm": 1.8308439254760742, "learning_rate": 4.548209975868109e-06, "loss": 0.9242, "step": 12084 }, { "epoch": 0.69, "grad_norm": 1.8101271390914917, "learning_rate": 4.546652797653656e-06, "loss": 0.8712, "step": 12085 }, { "epoch": 0.69, "grad_norm": 1.9170933961868286, "learning_rate": 4.5450958076189825e-06, "loss": 0.8969, "step": 12086 }, { "epoch": 0.69, "grad_norm": 1.8410924673080444, "learning_rate": 4.543539005817818e-06, "loss": 0.9218, "step": 12087 }, { "epoch": 0.69, "grad_norm": 1.8555163145065308, "learning_rate": 4.541982392303876e-06, "loss": 1.0267, "step": 12088 }, { "epoch": 0.69, "grad_norm": 1.7057043313980103, "learning_rate": 4.540425967130881e-06, "loss": 0.9537, "step": 12089 }, { "epoch": 0.69, "grad_norm": 1.855972409248352, "learning_rate": 4.53886973035253e-06, "loss": 0.8994, "step": 12090 }, { "epoch": 0.69, "grad_norm": 1.5824469327926636, "learning_rate": 4.537313682022534e-06, "loss": 0.8573, "step": 12091 }, { "epoch": 0.69, "grad_norm": 1.7325880527496338, "learning_rate": 4.53575782219458e-06, "loss": 0.8998, "step": 12092 }, { "epoch": 0.69, "grad_norm": 1.878062129020691, "learning_rate": 4.534202150922362e-06, "loss": 0.9307, "step": 12093 }, { "epoch": 0.69, "grad_norm": 1.8051809072494507, "learning_rate": 4.532646668259557e-06, "loss": 0.9662, "step": 12094 }, { "epoch": 0.69, "grad_norm": 1.7402708530426025, "learning_rate": 4.531091374259848e-06, "loss": 0.9049, "step": 12095 }, { "epoch": 0.69, "grad_norm": 1.7610024213790894, "learning_rate": 4.529536268976893e-06, "loss": 0.8916, "step": 12096 }, { "epoch": 0.69, "grad_norm": 2.026312828063965, "learning_rate": 4.5279813524643644e-06, "loss": 0.8098, "step": 12097 }, { "epoch": 0.69, "grad_norm": 1.8060368299484253, "learning_rate": 4.526426624775911e-06, "loss": 0.9179, "step": 12098 }, { "epoch": 0.69, "grad_norm": 1.9569159746170044, "learning_rate": 4.524872085965184e-06, "loss": 0.9601, "step": 12099 }, { "epoch": 0.69, "grad_norm": 1.655694603919983, "learning_rate": 4.523317736085832e-06, "loss": 0.9731, "step": 12100 }, { "epoch": 0.69, "grad_norm": 1.8205318450927734, "learning_rate": 4.521763575191482e-06, "loss": 0.9043, "step": 12101 }, { "epoch": 0.69, "grad_norm": 1.8622970581054688, "learning_rate": 4.520209603335772e-06, "loss": 0.8329, "step": 12102 }, { "epoch": 0.69, "grad_norm": 1.6752930879592896, "learning_rate": 4.518655820572317e-06, "loss": 0.9472, "step": 12103 }, { "epoch": 0.69, "grad_norm": 1.7246054410934448, "learning_rate": 4.517102226954744e-06, "loss": 0.9573, "step": 12104 }, { "epoch": 0.69, "grad_norm": 1.8553173542022705, "learning_rate": 4.515548822536652e-06, "loss": 0.9446, "step": 12105 }, { "epoch": 0.69, "grad_norm": 1.625590205192566, "learning_rate": 4.513995607371654e-06, "loss": 0.8991, "step": 12106 }, { "epoch": 0.69, "grad_norm": 1.8012144565582275, "learning_rate": 4.512442581513339e-06, "loss": 0.9701, "step": 12107 }, { "epoch": 0.69, "grad_norm": 1.044556975364685, "learning_rate": 4.510889745015306e-06, "loss": 0.5489, "step": 12108 }, { "epoch": 0.69, "grad_norm": 1.9414899349212646, "learning_rate": 4.50933709793113e-06, "loss": 0.8543, "step": 12109 }, { "epoch": 0.69, "grad_norm": 1.8772963285446167, "learning_rate": 4.507784640314393e-06, "loss": 0.9221, "step": 12110 }, { "epoch": 0.69, "grad_norm": 1.8219832181930542, "learning_rate": 4.50623237221867e-06, "loss": 0.9939, "step": 12111 }, { "epoch": 0.69, "grad_norm": 1.8231613636016846, "learning_rate": 4.50468029369752e-06, "loss": 0.9314, "step": 12112 }, { "epoch": 0.69, "grad_norm": 1.864905834197998, "learning_rate": 4.5031284048045045e-06, "loss": 1.0244, "step": 12113 }, { "epoch": 0.69, "grad_norm": 1.6909316778182983, "learning_rate": 4.5015767055931695e-06, "loss": 0.9142, "step": 12114 }, { "epoch": 0.69, "grad_norm": 1.4885127544403076, "learning_rate": 4.500025196117066e-06, "loss": 0.8463, "step": 12115 }, { "epoch": 0.69, "grad_norm": 1.721638798713684, "learning_rate": 4.498473876429727e-06, "loss": 0.9042, "step": 12116 }, { "epoch": 0.69, "grad_norm": 1.8825305700302124, "learning_rate": 4.49692274658469e-06, "loss": 0.9038, "step": 12117 }, { "epoch": 0.69, "grad_norm": 1.5840251445770264, "learning_rate": 4.495371806635469e-06, "loss": 0.9152, "step": 12118 }, { "epoch": 0.7, "grad_norm": 1.9720197916030884, "learning_rate": 4.493821056635598e-06, "loss": 0.9172, "step": 12119 }, { "epoch": 0.7, "grad_norm": 1.777827501296997, "learning_rate": 4.492270496638578e-06, "loss": 0.8906, "step": 12120 }, { "epoch": 0.7, "grad_norm": 1.783775806427002, "learning_rate": 4.490720126697921e-06, "loss": 0.9353, "step": 12121 }, { "epoch": 0.7, "grad_norm": 1.855164647102356, "learning_rate": 4.489169946867119e-06, "loss": 0.8433, "step": 12122 }, { "epoch": 0.7, "grad_norm": 1.7153263092041016, "learning_rate": 4.487619957199672e-06, "loss": 0.8493, "step": 12123 }, { "epoch": 0.7, "grad_norm": 1.6919550895690918, "learning_rate": 4.486070157749059e-06, "loss": 0.8684, "step": 12124 }, { "epoch": 0.7, "grad_norm": 2.6777186393737793, "learning_rate": 4.484520548568766e-06, "loss": 0.9316, "step": 12125 }, { "epoch": 0.7, "grad_norm": 1.784044623374939, "learning_rate": 4.482971129712258e-06, "loss": 0.9159, "step": 12126 }, { "epoch": 0.7, "grad_norm": 1.8387353420257568, "learning_rate": 4.481421901233005e-06, "loss": 0.9539, "step": 12127 }, { "epoch": 0.7, "grad_norm": 1.764176845550537, "learning_rate": 4.4798728631844715e-06, "loss": 0.9344, "step": 12128 }, { "epoch": 0.7, "grad_norm": 1.6732069253921509, "learning_rate": 4.478324015620101e-06, "loss": 0.9919, "step": 12129 }, { "epoch": 0.7, "grad_norm": 1.015463948249817, "learning_rate": 4.476775358593348e-06, "loss": 0.5797, "step": 12130 }, { "epoch": 0.7, "grad_norm": 1.6946240663528442, "learning_rate": 4.475226892157646e-06, "loss": 0.9111, "step": 12131 }, { "epoch": 0.7, "grad_norm": 1.6743507385253906, "learning_rate": 4.473678616366434e-06, "loss": 0.9501, "step": 12132 }, { "epoch": 0.7, "grad_norm": 1.6993016004562378, "learning_rate": 4.472130531273132e-06, "loss": 0.9076, "step": 12133 }, { "epoch": 0.7, "grad_norm": 1.6751279830932617, "learning_rate": 4.470582636931168e-06, "loss": 0.8764, "step": 12134 }, { "epoch": 0.7, "grad_norm": 1.8724472522735596, "learning_rate": 4.469034933393948e-06, "loss": 0.9181, "step": 12135 }, { "epoch": 0.7, "grad_norm": 1.0837968587875366, "learning_rate": 4.467487420714885e-06, "loss": 0.5223, "step": 12136 }, { "epoch": 0.7, "grad_norm": 1.6412482261657715, "learning_rate": 4.4659400989473744e-06, "loss": 0.9073, "step": 12137 }, { "epoch": 0.7, "grad_norm": 1.7140542268753052, "learning_rate": 4.464392968144815e-06, "loss": 0.9707, "step": 12138 }, { "epoch": 0.7, "grad_norm": 1.7308902740478516, "learning_rate": 4.462846028360588e-06, "loss": 1.0517, "step": 12139 }, { "epoch": 0.7, "grad_norm": 1.6907967329025269, "learning_rate": 4.461299279648077e-06, "loss": 0.9189, "step": 12140 }, { "epoch": 0.7, "grad_norm": 1.8545448780059814, "learning_rate": 4.45975272206066e-06, "loss": 0.9468, "step": 12141 }, { "epoch": 0.7, "grad_norm": 1.7968237400054932, "learning_rate": 4.4582063556516955e-06, "loss": 0.923, "step": 12142 }, { "epoch": 0.7, "grad_norm": 1.546665072441101, "learning_rate": 4.456660180474554e-06, "loss": 0.8514, "step": 12143 }, { "epoch": 0.7, "grad_norm": 1.7471485137939453, "learning_rate": 4.455114196582582e-06, "loss": 0.9956, "step": 12144 }, { "epoch": 0.7, "grad_norm": 1.7082730531692505, "learning_rate": 4.453568404029131e-06, "loss": 0.9179, "step": 12145 }, { "epoch": 0.7, "grad_norm": 0.9506896734237671, "learning_rate": 4.452022802867541e-06, "loss": 0.5619, "step": 12146 }, { "epoch": 0.7, "grad_norm": 1.96599543094635, "learning_rate": 4.450477393151148e-06, "loss": 0.9262, "step": 12147 }, { "epoch": 0.7, "grad_norm": 2.548830270767212, "learning_rate": 4.448932174933274e-06, "loss": 1.0198, "step": 12148 }, { "epoch": 0.7, "grad_norm": 1.0511407852172852, "learning_rate": 4.447387148267249e-06, "loss": 0.5041, "step": 12149 }, { "epoch": 0.7, "grad_norm": 1.7605023384094238, "learning_rate": 4.44584231320638e-06, "loss": 0.9343, "step": 12150 }, { "epoch": 0.7, "grad_norm": 1.782924771308899, "learning_rate": 4.444297669803981e-06, "loss": 0.9455, "step": 12151 }, { "epoch": 0.7, "grad_norm": 2.3206787109375, "learning_rate": 4.442753218113346e-06, "loss": 0.8799, "step": 12152 }, { "epoch": 0.7, "grad_norm": 1.7880959510803223, "learning_rate": 4.441208958187774e-06, "loss": 0.9132, "step": 12153 }, { "epoch": 0.7, "grad_norm": 1.594754695892334, "learning_rate": 4.4396648900805574e-06, "loss": 0.8757, "step": 12154 }, { "epoch": 0.7, "grad_norm": 1.7561460733413696, "learning_rate": 4.4381210138449685e-06, "loss": 0.9267, "step": 12155 }, { "epoch": 0.7, "grad_norm": 1.897371768951416, "learning_rate": 4.436577329534291e-06, "loss": 0.9138, "step": 12156 }, { "epoch": 0.7, "grad_norm": 1.6250697374343872, "learning_rate": 4.435033837201785e-06, "loss": 0.9545, "step": 12157 }, { "epoch": 0.7, "grad_norm": 1.0469470024108887, "learning_rate": 4.433490536900721e-06, "loss": 0.553, "step": 12158 }, { "epoch": 0.7, "grad_norm": 1.6664888858795166, "learning_rate": 4.4319474286843446e-06, "loss": 0.9255, "step": 12159 }, { "epoch": 0.7, "grad_norm": 1.5987811088562012, "learning_rate": 4.4304045126059126e-06, "loss": 0.8882, "step": 12160 }, { "epoch": 0.7, "grad_norm": 1.665635585784912, "learning_rate": 4.428861788718659e-06, "loss": 0.8743, "step": 12161 }, { "epoch": 0.7, "grad_norm": 1.5753731727600098, "learning_rate": 4.427319257075827e-06, "loss": 0.8321, "step": 12162 }, { "epoch": 0.7, "grad_norm": 1.6694729328155518, "learning_rate": 4.425776917730636e-06, "loss": 0.8196, "step": 12163 }, { "epoch": 0.7, "grad_norm": 1.8965317010879517, "learning_rate": 4.424234770736314e-06, "loss": 0.9573, "step": 12164 }, { "epoch": 0.7, "grad_norm": 1.8113521337509155, "learning_rate": 4.422692816146078e-06, "loss": 0.9978, "step": 12165 }, { "epoch": 0.7, "grad_norm": 2.0293681621551514, "learning_rate": 4.42115105401313e-06, "loss": 0.9883, "step": 12166 }, { "epoch": 0.7, "grad_norm": 1.8602954149246216, "learning_rate": 4.419609484390678e-06, "loss": 0.8724, "step": 12167 }, { "epoch": 0.7, "grad_norm": 1.7014737129211426, "learning_rate": 4.418068107331911e-06, "loss": 0.8917, "step": 12168 }, { "epoch": 0.7, "grad_norm": 1.682297945022583, "learning_rate": 4.416526922890024e-06, "loss": 0.9549, "step": 12169 }, { "epoch": 0.7, "grad_norm": 1.8611048460006714, "learning_rate": 4.414985931118194e-06, "loss": 0.9257, "step": 12170 }, { "epoch": 0.7, "grad_norm": 1.8226057291030884, "learning_rate": 4.413445132069601e-06, "loss": 0.8677, "step": 12171 }, { "epoch": 0.7, "grad_norm": 1.738637089729309, "learning_rate": 4.411904525797408e-06, "loss": 0.911, "step": 12172 }, { "epoch": 0.7, "grad_norm": 1.7301138639450073, "learning_rate": 4.4103641123547836e-06, "loss": 0.9653, "step": 12173 }, { "epoch": 0.7, "grad_norm": 1.8996912240982056, "learning_rate": 4.4088238917948765e-06, "loss": 0.9327, "step": 12174 }, { "epoch": 0.7, "grad_norm": 1.7330734729766846, "learning_rate": 4.4072838641708415e-06, "loss": 0.9284, "step": 12175 }, { "epoch": 0.7, "grad_norm": 1.5642669200897217, "learning_rate": 4.405744029535815e-06, "loss": 0.8801, "step": 12176 }, { "epoch": 0.7, "grad_norm": 1.9499053955078125, "learning_rate": 4.404204387942934e-06, "loss": 0.9138, "step": 12177 }, { "epoch": 0.7, "grad_norm": 1.825777292251587, "learning_rate": 4.402664939445333e-06, "loss": 0.9849, "step": 12178 }, { "epoch": 0.7, "grad_norm": 1.0183806419372559, "learning_rate": 4.401125684096124e-06, "loss": 0.5204, "step": 12179 }, { "epoch": 0.7, "grad_norm": 1.7793660163879395, "learning_rate": 4.399586621948433e-06, "loss": 0.9181, "step": 12180 }, { "epoch": 0.7, "grad_norm": 1.6689112186431885, "learning_rate": 4.39804775305536e-06, "loss": 0.8612, "step": 12181 }, { "epoch": 0.7, "grad_norm": 1.710805892944336, "learning_rate": 4.3965090774700135e-06, "loss": 0.9253, "step": 12182 }, { "epoch": 0.7, "grad_norm": 1.7184252738952637, "learning_rate": 4.394970595245483e-06, "loss": 0.9507, "step": 12183 }, { "epoch": 0.7, "grad_norm": 1.8182810544967651, "learning_rate": 4.3934323064348636e-06, "loss": 0.9591, "step": 12184 }, { "epoch": 0.7, "grad_norm": 1.6814923286437988, "learning_rate": 4.391894211091227e-06, "loss": 0.9022, "step": 12185 }, { "epoch": 0.7, "grad_norm": 1.8337434530258179, "learning_rate": 4.3903563092676626e-06, "loss": 0.9207, "step": 12186 }, { "epoch": 0.7, "grad_norm": 1.8550876379013062, "learning_rate": 4.388818601017228e-06, "loss": 0.8593, "step": 12187 }, { "epoch": 0.7, "grad_norm": 0.964364230632782, "learning_rate": 4.387281086392994e-06, "loss": 0.5373, "step": 12188 }, { "epoch": 0.7, "grad_norm": 1.5979536771774292, "learning_rate": 4.385743765448006e-06, "loss": 0.8781, "step": 12189 }, { "epoch": 0.7, "grad_norm": 2.090733766555786, "learning_rate": 4.384206638235322e-06, "loss": 0.8764, "step": 12190 }, { "epoch": 0.7, "grad_norm": 1.638323426246643, "learning_rate": 4.382669704807977e-06, "loss": 0.9201, "step": 12191 }, { "epoch": 0.7, "grad_norm": 1.8556439876556396, "learning_rate": 4.3811329652190126e-06, "loss": 0.9341, "step": 12192 }, { "epoch": 0.7, "grad_norm": 1.7892338037490845, "learning_rate": 4.37959641952145e-06, "loss": 0.9075, "step": 12193 }, { "epoch": 0.7, "grad_norm": 1.609830379486084, "learning_rate": 4.3780600677683145e-06, "loss": 0.8846, "step": 12194 }, { "epoch": 0.7, "grad_norm": 1.7032337188720703, "learning_rate": 4.376523910012627e-06, "loss": 0.9057, "step": 12195 }, { "epoch": 0.7, "grad_norm": 1.7046489715576172, "learning_rate": 4.3749879463073854e-06, "loss": 0.9698, "step": 12196 }, { "epoch": 0.7, "grad_norm": 1.6963069438934326, "learning_rate": 4.373452176705601e-06, "loss": 0.9395, "step": 12197 }, { "epoch": 0.7, "grad_norm": 1.8119169473648071, "learning_rate": 4.371916601260262e-06, "loss": 0.9465, "step": 12198 }, { "epoch": 0.7, "grad_norm": 1.6900408267974854, "learning_rate": 4.370381220024362e-06, "loss": 1.0242, "step": 12199 }, { "epoch": 0.7, "grad_norm": 1.9985848665237427, "learning_rate": 4.368846033050879e-06, "loss": 0.8655, "step": 12200 }, { "epoch": 0.7, "grad_norm": 1.646001935005188, "learning_rate": 4.367311040392791e-06, "loss": 0.9745, "step": 12201 }, { "epoch": 0.7, "grad_norm": 1.6562753915786743, "learning_rate": 4.365776242103062e-06, "loss": 0.9819, "step": 12202 }, { "epoch": 0.7, "grad_norm": 1.6765224933624268, "learning_rate": 4.364241638234659e-06, "loss": 0.9175, "step": 12203 }, { "epoch": 0.7, "grad_norm": 1.613690733909607, "learning_rate": 4.362707228840531e-06, "loss": 0.8811, "step": 12204 }, { "epoch": 0.7, "grad_norm": 1.6638638973236084, "learning_rate": 4.36117301397363e-06, "loss": 0.8766, "step": 12205 }, { "epoch": 0.7, "grad_norm": 1.0590672492980957, "learning_rate": 4.3596389936869e-06, "loss": 0.5949, "step": 12206 }, { "epoch": 0.7, "grad_norm": 1.7519869804382324, "learning_rate": 4.358105168033269e-06, "loss": 0.9128, "step": 12207 }, { "epoch": 0.7, "grad_norm": 1.703728437423706, "learning_rate": 4.3565715370656725e-06, "loss": 0.9564, "step": 12208 }, { "epoch": 0.7, "grad_norm": 1.8182377815246582, "learning_rate": 4.355038100837023e-06, "loss": 0.9266, "step": 12209 }, { "epoch": 0.7, "grad_norm": 1.6696603298187256, "learning_rate": 4.353504859400246e-06, "loss": 0.86, "step": 12210 }, { "epoch": 0.7, "grad_norm": 0.9939159154891968, "learning_rate": 4.351971812808239e-06, "loss": 0.513, "step": 12211 }, { "epoch": 0.7, "grad_norm": 1.8564430475234985, "learning_rate": 4.350438961113911e-06, "loss": 1.0003, "step": 12212 }, { "epoch": 0.7, "grad_norm": 1.786815881729126, "learning_rate": 4.348906304370148e-06, "loss": 0.8335, "step": 12213 }, { "epoch": 0.7, "grad_norm": 1.6922428607940674, "learning_rate": 4.3473738426298485e-06, "loss": 0.9523, "step": 12214 }, { "epoch": 0.7, "grad_norm": 1.7936313152313232, "learning_rate": 4.345841575945884e-06, "loss": 0.9573, "step": 12215 }, { "epoch": 0.7, "grad_norm": 1.7848342657089233, "learning_rate": 4.344309504371135e-06, "loss": 0.8772, "step": 12216 }, { "epoch": 0.7, "grad_norm": 1.7737482786178589, "learning_rate": 4.342777627958463e-06, "loss": 1.0202, "step": 12217 }, { "epoch": 0.7, "grad_norm": 1.8887823820114136, "learning_rate": 4.341245946760733e-06, "loss": 0.9995, "step": 12218 }, { "epoch": 0.7, "grad_norm": 1.6565766334533691, "learning_rate": 4.339714460830802e-06, "loss": 0.904, "step": 12219 }, { "epoch": 0.7, "grad_norm": 1.8045833110809326, "learning_rate": 4.338183170221508e-06, "loss": 0.939, "step": 12220 }, { "epoch": 0.7, "grad_norm": 1.729711651802063, "learning_rate": 4.336652074985703e-06, "loss": 0.91, "step": 12221 }, { "epoch": 0.7, "grad_norm": 1.0387065410614014, "learning_rate": 4.3351211751762104e-06, "loss": 0.5337, "step": 12222 }, { "epoch": 0.7, "grad_norm": 1.7121163606643677, "learning_rate": 4.333590470845866e-06, "loss": 0.872, "step": 12223 }, { "epoch": 0.7, "grad_norm": 1.7705730199813843, "learning_rate": 4.332059962047481e-06, "loss": 0.8938, "step": 12224 }, { "epoch": 0.7, "grad_norm": 1.8683247566223145, "learning_rate": 4.330529648833879e-06, "loss": 0.9195, "step": 12225 }, { "epoch": 0.7, "grad_norm": 1.6935882568359375, "learning_rate": 4.3289995312578585e-06, "loss": 0.942, "step": 12226 }, { "epoch": 0.7, "grad_norm": 1.761215090751648, "learning_rate": 4.327469609372224e-06, "loss": 0.9786, "step": 12227 }, { "epoch": 0.7, "grad_norm": 1.6463446617126465, "learning_rate": 4.3259398832297665e-06, "loss": 0.8471, "step": 12228 }, { "epoch": 0.7, "grad_norm": 1.6273789405822754, "learning_rate": 4.324410352883277e-06, "loss": 0.9037, "step": 12229 }, { "epoch": 0.7, "grad_norm": 1.8586559295654297, "learning_rate": 4.322881018385527e-06, "loss": 0.9008, "step": 12230 }, { "epoch": 0.7, "grad_norm": 1.6294889450073242, "learning_rate": 4.321351879789296e-06, "loss": 0.8492, "step": 12231 }, { "epoch": 0.7, "grad_norm": 1.7973731756210327, "learning_rate": 4.3198229371473535e-06, "loss": 0.9872, "step": 12232 }, { "epoch": 0.7, "grad_norm": 1.8881688117980957, "learning_rate": 4.31829419051245e-06, "loss": 0.9911, "step": 12233 }, { "epoch": 0.7, "grad_norm": 1.6258580684661865, "learning_rate": 4.316765639937346e-06, "loss": 0.9493, "step": 12234 }, { "epoch": 0.7, "grad_norm": 1.773603081703186, "learning_rate": 4.31523728547478e-06, "loss": 0.9609, "step": 12235 }, { "epoch": 0.7, "grad_norm": 0.9875374436378479, "learning_rate": 4.3137091271775e-06, "loss": 0.5411, "step": 12236 }, { "epoch": 0.7, "grad_norm": 1.6680865287780762, "learning_rate": 4.3121811650982306e-06, "loss": 0.859, "step": 12237 }, { "epoch": 0.7, "grad_norm": 1.8523112535476685, "learning_rate": 4.310653399289705e-06, "loss": 0.9491, "step": 12238 }, { "epoch": 0.7, "grad_norm": 1.8099464178085327, "learning_rate": 4.309125829804633e-06, "loss": 0.8759, "step": 12239 }, { "epoch": 0.7, "grad_norm": 1.7375110387802124, "learning_rate": 4.307598456695736e-06, "loss": 0.8817, "step": 12240 }, { "epoch": 0.7, "grad_norm": 2.2342143058776855, "learning_rate": 4.306071280015713e-06, "loss": 0.9653, "step": 12241 }, { "epoch": 0.7, "grad_norm": 2.0205042362213135, "learning_rate": 4.304544299817263e-06, "loss": 0.93, "step": 12242 }, { "epoch": 0.7, "grad_norm": 1.7261152267456055, "learning_rate": 4.303017516153083e-06, "loss": 0.9328, "step": 12243 }, { "epoch": 0.7, "grad_norm": 1.8399591445922852, "learning_rate": 4.3014909290758525e-06, "loss": 0.891, "step": 12244 }, { "epoch": 0.7, "grad_norm": 1.704454779624939, "learning_rate": 4.299964538638255e-06, "loss": 0.8718, "step": 12245 }, { "epoch": 0.7, "grad_norm": 1.7072902917861938, "learning_rate": 4.298438344892954e-06, "loss": 0.9065, "step": 12246 }, { "epoch": 0.7, "grad_norm": 1.7964297533035278, "learning_rate": 4.296912347892625e-06, "loss": 0.8728, "step": 12247 }, { "epoch": 0.7, "grad_norm": 1.6738181114196777, "learning_rate": 4.295386547689913e-06, "loss": 0.8801, "step": 12248 }, { "epoch": 0.7, "grad_norm": 1.88579523563385, "learning_rate": 4.293860944337482e-06, "loss": 0.9585, "step": 12249 }, { "epoch": 0.7, "grad_norm": 1.784139633178711, "learning_rate": 4.2923355378879675e-06, "loss": 0.9878, "step": 12250 }, { "epoch": 0.7, "grad_norm": 1.8959250450134277, "learning_rate": 4.290810328394008e-06, "loss": 0.8566, "step": 12251 }, { "epoch": 0.7, "grad_norm": 1.6999598741531372, "learning_rate": 4.289285315908237e-06, "loss": 0.8978, "step": 12252 }, { "epoch": 0.7, "grad_norm": 2.2608280181884766, "learning_rate": 4.2877605004832816e-06, "loss": 0.9765, "step": 12253 }, { "epoch": 0.7, "grad_norm": 1.7862107753753662, "learning_rate": 4.2862358821717496e-06, "loss": 0.8848, "step": 12254 }, { "epoch": 0.7, "grad_norm": 1.8780516386032104, "learning_rate": 4.284711461026262e-06, "loss": 0.9402, "step": 12255 }, { "epoch": 0.7, "grad_norm": 1.730836272239685, "learning_rate": 4.283187237099412e-06, "loss": 0.9146, "step": 12256 }, { "epoch": 0.7, "grad_norm": 1.8147203922271729, "learning_rate": 4.281663210443805e-06, "loss": 0.9374, "step": 12257 }, { "epoch": 0.7, "grad_norm": 1.9632763862609863, "learning_rate": 4.280139381112024e-06, "loss": 0.9564, "step": 12258 }, { "epoch": 0.7, "grad_norm": 1.7735322713851929, "learning_rate": 4.278615749156655e-06, "loss": 0.901, "step": 12259 }, { "epoch": 0.7, "grad_norm": 1.5340155363082886, "learning_rate": 4.277092314630278e-06, "loss": 0.9071, "step": 12260 }, { "epoch": 0.7, "grad_norm": 0.9996885657310486, "learning_rate": 4.275569077585455e-06, "loss": 0.4977, "step": 12261 }, { "epoch": 0.7, "grad_norm": 1.8660857677459717, "learning_rate": 4.274046038074756e-06, "loss": 0.9849, "step": 12262 }, { "epoch": 0.7, "grad_norm": 1.7176084518432617, "learning_rate": 4.27252319615073e-06, "loss": 0.9249, "step": 12263 }, { "epoch": 0.7, "grad_norm": 1.6369363069534302, "learning_rate": 4.271000551865934e-06, "loss": 0.8533, "step": 12264 }, { "epoch": 0.7, "grad_norm": 1.8407223224639893, "learning_rate": 4.269478105272901e-06, "loss": 0.9376, "step": 12265 }, { "epoch": 0.7, "grad_norm": 1.8782856464385986, "learning_rate": 4.267955856424175e-06, "loss": 0.9669, "step": 12266 }, { "epoch": 0.7, "grad_norm": 1.7713334560394287, "learning_rate": 4.266433805372278e-06, "loss": 0.929, "step": 12267 }, { "epoch": 0.7, "grad_norm": 1.8101853132247925, "learning_rate": 4.264911952169736e-06, "loss": 0.966, "step": 12268 }, { "epoch": 0.7, "grad_norm": 1.7709208726882935, "learning_rate": 4.26339029686906e-06, "loss": 0.8807, "step": 12269 }, { "epoch": 0.7, "grad_norm": 1.7185662984848022, "learning_rate": 4.2618688395227624e-06, "loss": 0.968, "step": 12270 }, { "epoch": 0.7, "grad_norm": 1.855184555053711, "learning_rate": 4.26034758018334e-06, "loss": 0.8851, "step": 12271 }, { "epoch": 0.7, "grad_norm": 1.8644254207611084, "learning_rate": 4.25882651890329e-06, "loss": 1.0355, "step": 12272 }, { "epoch": 0.7, "grad_norm": 1.6256617307662964, "learning_rate": 4.2573056557351015e-06, "loss": 0.8821, "step": 12273 }, { "epoch": 0.7, "grad_norm": 1.7973995208740234, "learning_rate": 4.2557849907312494e-06, "loss": 0.9267, "step": 12274 }, { "epoch": 0.7, "grad_norm": 1.726332664489746, "learning_rate": 4.254264523944217e-06, "loss": 0.8717, "step": 12275 }, { "epoch": 0.7, "grad_norm": 1.6894577741622925, "learning_rate": 4.252744255426461e-06, "loss": 0.8925, "step": 12276 }, { "epoch": 0.7, "grad_norm": 1.8342831134796143, "learning_rate": 4.2512241852304506e-06, "loss": 0.9195, "step": 12277 }, { "epoch": 0.7, "grad_norm": 1.6363437175750732, "learning_rate": 4.249704313408632e-06, "loss": 0.9135, "step": 12278 }, { "epoch": 0.7, "grad_norm": 1.8010531663894653, "learning_rate": 4.248184640013456e-06, "loss": 0.9287, "step": 12279 }, { "epoch": 0.7, "grad_norm": 1.7862954139709473, "learning_rate": 4.24666516509736e-06, "loss": 0.921, "step": 12280 }, { "epoch": 0.7, "grad_norm": 1.6051653623580933, "learning_rate": 4.24514588871278e-06, "loss": 0.8599, "step": 12281 }, { "epoch": 0.7, "grad_norm": 1.816988468170166, "learning_rate": 4.243626810912137e-06, "loss": 0.9146, "step": 12282 }, { "epoch": 0.7, "grad_norm": 1.6443513631820679, "learning_rate": 4.242107931747855e-06, "loss": 0.9604, "step": 12283 }, { "epoch": 0.7, "grad_norm": 1.6637365818023682, "learning_rate": 4.240589251272342e-06, "loss": 0.8863, "step": 12284 }, { "epoch": 0.7, "grad_norm": 1.7601675987243652, "learning_rate": 4.2390707695380065e-06, "loss": 0.9393, "step": 12285 }, { "epoch": 0.7, "grad_norm": 1.8307349681854248, "learning_rate": 4.2375524865972485e-06, "loss": 0.9495, "step": 12286 }, { "epoch": 0.7, "grad_norm": 1.7934890985488892, "learning_rate": 4.236034402502454e-06, "loss": 0.9336, "step": 12287 }, { "epoch": 0.7, "grad_norm": 1.90567147731781, "learning_rate": 4.234516517306016e-06, "loss": 0.9258, "step": 12288 }, { "epoch": 0.7, "grad_norm": 1.8131781816482544, "learning_rate": 4.2329988310603025e-06, "loss": 0.9204, "step": 12289 }, { "epoch": 0.7, "grad_norm": 1.0351872444152832, "learning_rate": 4.231481343817694e-06, "loss": 0.5383, "step": 12290 }, { "epoch": 0.7, "grad_norm": 1.5632492303848267, "learning_rate": 4.229964055630547e-06, "loss": 0.8826, "step": 12291 }, { "epoch": 0.7, "grad_norm": 1.6366617679595947, "learning_rate": 4.2284469665512265e-06, "loss": 0.8572, "step": 12292 }, { "epoch": 0.71, "grad_norm": 1.8293219804763794, "learning_rate": 4.226930076632075e-06, "loss": 0.9581, "step": 12293 }, { "epoch": 0.71, "grad_norm": 1.7509188652038574, "learning_rate": 4.2254133859254445e-06, "loss": 0.8889, "step": 12294 }, { "epoch": 0.71, "grad_norm": 1.7620365619659424, "learning_rate": 4.223896894483664e-06, "loss": 0.9245, "step": 12295 }, { "epoch": 0.71, "grad_norm": 1.0621201992034912, "learning_rate": 4.222380602359065e-06, "loss": 0.4952, "step": 12296 }, { "epoch": 0.71, "grad_norm": 1.8416523933410645, "learning_rate": 4.220864509603977e-06, "loss": 0.9421, "step": 12297 }, { "epoch": 0.71, "grad_norm": 1.8055000305175781, "learning_rate": 4.219348616270707e-06, "loss": 0.9643, "step": 12298 }, { "epoch": 0.71, "grad_norm": 1.926814079284668, "learning_rate": 4.217832922411574e-06, "loss": 0.937, "step": 12299 }, { "epoch": 0.71, "grad_norm": 1.6999422311782837, "learning_rate": 4.21631742807887e-06, "loss": 0.9289, "step": 12300 }, { "epoch": 0.71, "grad_norm": 1.5582475662231445, "learning_rate": 4.2148021333249e-06, "loss": 0.7853, "step": 12301 }, { "epoch": 0.71, "grad_norm": 1.8325923681259155, "learning_rate": 4.213287038201943e-06, "loss": 0.9646, "step": 12302 }, { "epoch": 0.71, "grad_norm": 1.7302120923995972, "learning_rate": 4.2117721427622916e-06, "loss": 0.9201, "step": 12303 }, { "epoch": 0.71, "grad_norm": 1.658416748046875, "learning_rate": 4.2102574470582094e-06, "loss": 0.7855, "step": 12304 }, { "epoch": 0.71, "grad_norm": 1.869757056236267, "learning_rate": 4.208742951141974e-06, "loss": 0.8731, "step": 12305 }, { "epoch": 0.71, "grad_norm": 1.5227854251861572, "learning_rate": 4.207228655065838e-06, "loss": 0.922, "step": 12306 }, { "epoch": 0.71, "grad_norm": 1.7483675479888916, "learning_rate": 4.205714558882064e-06, "loss": 0.9423, "step": 12307 }, { "epoch": 0.71, "grad_norm": 1.9386212825775146, "learning_rate": 4.204200662642891e-06, "loss": 0.9667, "step": 12308 }, { "epoch": 0.71, "grad_norm": 1.5964329242706299, "learning_rate": 4.2026869664005635e-06, "loss": 0.9097, "step": 12309 }, { "epoch": 0.71, "grad_norm": 1.6607552766799927, "learning_rate": 4.201173470207317e-06, "loss": 0.9291, "step": 12310 }, { "epoch": 0.71, "grad_norm": 1.658761739730835, "learning_rate": 4.199660174115373e-06, "loss": 0.9318, "step": 12311 }, { "epoch": 0.71, "grad_norm": 1.7204349040985107, "learning_rate": 4.1981470781769574e-06, "loss": 0.8651, "step": 12312 }, { "epoch": 0.71, "grad_norm": 1.692347526550293, "learning_rate": 4.196634182444276e-06, "loss": 0.9394, "step": 12313 }, { "epoch": 0.71, "grad_norm": 1.8105977773666382, "learning_rate": 4.195121486969541e-06, "loss": 0.984, "step": 12314 }, { "epoch": 0.71, "grad_norm": 1.9936108589172363, "learning_rate": 4.193608991804945e-06, "loss": 0.9871, "step": 12315 }, { "epoch": 0.71, "grad_norm": 1.7505136728286743, "learning_rate": 4.192096697002686e-06, "loss": 0.9003, "step": 12316 }, { "epoch": 0.71, "grad_norm": 1.6852811574935913, "learning_rate": 4.19058460261494e-06, "loss": 0.8776, "step": 12317 }, { "epoch": 0.71, "grad_norm": 1.7839736938476562, "learning_rate": 4.189072708693899e-06, "loss": 0.9221, "step": 12318 }, { "epoch": 0.71, "grad_norm": 1.588187336921692, "learning_rate": 4.1875610152917225e-06, "loss": 0.8894, "step": 12319 }, { "epoch": 0.71, "grad_norm": 1.654762625694275, "learning_rate": 4.186049522460581e-06, "loss": 0.835, "step": 12320 }, { "epoch": 0.71, "grad_norm": 1.8535449504852295, "learning_rate": 4.184538230252628e-06, "loss": 0.9031, "step": 12321 }, { "epoch": 0.71, "grad_norm": 1.5364501476287842, "learning_rate": 4.183027138720019e-06, "loss": 0.8726, "step": 12322 }, { "epoch": 0.71, "grad_norm": 1.8365474939346313, "learning_rate": 4.181516247914892e-06, "loss": 0.9154, "step": 12323 }, { "epoch": 0.71, "grad_norm": 1.5075379610061646, "learning_rate": 4.180005557889388e-06, "loss": 0.7941, "step": 12324 }, { "epoch": 0.71, "grad_norm": 1.7151494026184082, "learning_rate": 4.178495068695632e-06, "loss": 0.976, "step": 12325 }, { "epoch": 0.71, "grad_norm": 1.7232338190078735, "learning_rate": 4.176984780385749e-06, "loss": 1.0321, "step": 12326 }, { "epoch": 0.71, "grad_norm": 1.9207673072814941, "learning_rate": 4.175474693011858e-06, "loss": 0.967, "step": 12327 }, { "epoch": 0.71, "grad_norm": 1.7690528631210327, "learning_rate": 4.173964806626063e-06, "loss": 0.9015, "step": 12328 }, { "epoch": 0.71, "grad_norm": 1.6922712326049805, "learning_rate": 4.172455121280471e-06, "loss": 0.9439, "step": 12329 }, { "epoch": 0.71, "grad_norm": 1.8027880191802979, "learning_rate": 4.1709456370271716e-06, "loss": 0.9474, "step": 12330 }, { "epoch": 0.71, "grad_norm": 1.946854591369629, "learning_rate": 4.169436353918258e-06, "loss": 0.9256, "step": 12331 }, { "epoch": 0.71, "grad_norm": 1.715364933013916, "learning_rate": 4.167927272005805e-06, "loss": 0.8614, "step": 12332 }, { "epoch": 0.71, "grad_norm": 1.6620863676071167, "learning_rate": 4.1664183913418955e-06, "loss": 0.8832, "step": 12333 }, { "epoch": 0.71, "grad_norm": 1.7056200504302979, "learning_rate": 4.164909711978587e-06, "loss": 0.8893, "step": 12334 }, { "epoch": 0.71, "grad_norm": 1.67353093624115, "learning_rate": 4.163401233967949e-06, "loss": 0.9014, "step": 12335 }, { "epoch": 0.71, "grad_norm": 1.7750691175460815, "learning_rate": 4.161892957362027e-06, "loss": 0.896, "step": 12336 }, { "epoch": 0.71, "grad_norm": 1.7011590003967285, "learning_rate": 4.160384882212875e-06, "loss": 0.8289, "step": 12337 }, { "epoch": 0.71, "grad_norm": 1.7766766548156738, "learning_rate": 4.158877008572523e-06, "loss": 0.8957, "step": 12338 }, { "epoch": 0.71, "grad_norm": 1.6974159479141235, "learning_rate": 4.15736933649301e-06, "loss": 0.8842, "step": 12339 }, { "epoch": 0.71, "grad_norm": 2.032731056213379, "learning_rate": 4.155861866026364e-06, "loss": 0.9313, "step": 12340 }, { "epoch": 0.71, "grad_norm": 1.81290602684021, "learning_rate": 4.154354597224597e-06, "loss": 0.9109, "step": 12341 }, { "epoch": 0.71, "grad_norm": 1.8402010202407837, "learning_rate": 4.152847530139726e-06, "loss": 0.9116, "step": 12342 }, { "epoch": 0.71, "grad_norm": 1.7119414806365967, "learning_rate": 4.151340664823751e-06, "loss": 0.9269, "step": 12343 }, { "epoch": 0.71, "grad_norm": 1.7739782333374023, "learning_rate": 4.1498340013286755e-06, "loss": 0.8528, "step": 12344 }, { "epoch": 0.71, "grad_norm": 1.058659315109253, "learning_rate": 4.148327539706483e-06, "loss": 0.5841, "step": 12345 }, { "epoch": 0.71, "grad_norm": 1.7927967309951782, "learning_rate": 4.146821280009165e-06, "loss": 0.8347, "step": 12346 }, { "epoch": 0.71, "grad_norm": 1.8593764305114746, "learning_rate": 4.14531522228869e-06, "loss": 0.9265, "step": 12347 }, { "epoch": 0.71, "grad_norm": 1.7242666482925415, "learning_rate": 4.143809366597037e-06, "loss": 0.9712, "step": 12348 }, { "epoch": 0.71, "grad_norm": 1.7882723808288574, "learning_rate": 4.14230371298616e-06, "loss": 0.9409, "step": 12349 }, { "epoch": 0.71, "grad_norm": 1.588816523551941, "learning_rate": 4.140798261508019e-06, "loss": 0.9311, "step": 12350 }, { "epoch": 0.71, "grad_norm": 1.5768983364105225, "learning_rate": 4.139293012214566e-06, "loss": 0.8495, "step": 12351 }, { "epoch": 0.71, "grad_norm": 1.850172519683838, "learning_rate": 4.137787965157737e-06, "loss": 0.9402, "step": 12352 }, { "epoch": 0.71, "grad_norm": 1.6511579751968384, "learning_rate": 4.136283120389474e-06, "loss": 0.8052, "step": 12353 }, { "epoch": 0.71, "grad_norm": 1.7065151929855347, "learning_rate": 4.134778477961696e-06, "loss": 0.9554, "step": 12354 }, { "epoch": 0.71, "grad_norm": 1.922829270362854, "learning_rate": 4.1332740379263335e-06, "loss": 0.9357, "step": 12355 }, { "epoch": 0.71, "grad_norm": 2.006742000579834, "learning_rate": 4.131769800335293e-06, "loss": 0.9558, "step": 12356 }, { "epoch": 0.71, "grad_norm": 1.7688363790512085, "learning_rate": 4.1302657652404865e-06, "loss": 0.9481, "step": 12357 }, { "epoch": 0.71, "grad_norm": 1.6695479154586792, "learning_rate": 4.128761932693809e-06, "loss": 0.9734, "step": 12358 }, { "epoch": 0.71, "grad_norm": 1.860970377922058, "learning_rate": 4.127258302747159e-06, "loss": 0.939, "step": 12359 }, { "epoch": 0.71, "grad_norm": 1.7460209131240845, "learning_rate": 4.1257548754524175e-06, "loss": 0.9254, "step": 12360 }, { "epoch": 0.71, "grad_norm": 1.9051164388656616, "learning_rate": 4.124251650861471e-06, "loss": 0.9823, "step": 12361 }, { "epoch": 0.71, "grad_norm": 1.7319355010986328, "learning_rate": 4.122748629026182e-06, "loss": 0.9391, "step": 12362 }, { "epoch": 0.71, "grad_norm": 1.7394003868103027, "learning_rate": 4.12124580999842e-06, "loss": 0.871, "step": 12363 }, { "epoch": 0.71, "grad_norm": 1.6891359090805054, "learning_rate": 4.119743193830048e-06, "loss": 0.9179, "step": 12364 }, { "epoch": 0.71, "grad_norm": 1.8783525228500366, "learning_rate": 4.1182407805729084e-06, "loss": 0.9281, "step": 12365 }, { "epoch": 0.71, "grad_norm": 1.6569445133209229, "learning_rate": 4.116738570278853e-06, "loss": 0.8388, "step": 12366 }, { "epoch": 0.71, "grad_norm": 1.0325391292572021, "learning_rate": 4.115236562999713e-06, "loss": 0.5731, "step": 12367 }, { "epoch": 0.71, "grad_norm": 1.8691225051879883, "learning_rate": 4.113734758787322e-06, "loss": 0.9262, "step": 12368 }, { "epoch": 0.71, "grad_norm": 1.6074047088623047, "learning_rate": 4.112233157693501e-06, "loss": 0.8511, "step": 12369 }, { "epoch": 0.71, "grad_norm": 1.8227767944335938, "learning_rate": 4.110731759770068e-06, "loss": 0.9323, "step": 12370 }, { "epoch": 0.71, "grad_norm": 1.777860403060913, "learning_rate": 4.109230565068828e-06, "loss": 0.9444, "step": 12371 }, { "epoch": 0.71, "grad_norm": 1.888817310333252, "learning_rate": 4.10772957364159e-06, "loss": 0.907, "step": 12372 }, { "epoch": 0.71, "grad_norm": 1.888682246208191, "learning_rate": 4.106228785540141e-06, "loss": 0.8766, "step": 12373 }, { "epoch": 0.71, "grad_norm": 1.8184161186218262, "learning_rate": 4.1047282008162734e-06, "loss": 0.9125, "step": 12374 }, { "epoch": 0.71, "grad_norm": 1.7346991300582886, "learning_rate": 4.1032278195217725e-06, "loss": 0.9246, "step": 12375 }, { "epoch": 0.71, "grad_norm": 1.8360766172409058, "learning_rate": 4.101727641708403e-06, "loss": 0.985, "step": 12376 }, { "epoch": 0.71, "grad_norm": 1.703584909439087, "learning_rate": 4.1002276674279395e-06, "loss": 0.8077, "step": 12377 }, { "epoch": 0.71, "grad_norm": 1.140605092048645, "learning_rate": 4.098727896732135e-06, "loss": 0.6147, "step": 12378 }, { "epoch": 0.71, "grad_norm": 1.724928855895996, "learning_rate": 4.097228329672751e-06, "loss": 0.8605, "step": 12379 }, { "epoch": 0.71, "grad_norm": 1.5698235034942627, "learning_rate": 4.095728966301526e-06, "loss": 0.8455, "step": 12380 }, { "epoch": 0.71, "grad_norm": 1.629245400428772, "learning_rate": 4.0942298066702026e-06, "loss": 0.9636, "step": 12381 }, { "epoch": 0.71, "grad_norm": 1.8827617168426514, "learning_rate": 4.092730850830509e-06, "loss": 0.9127, "step": 12382 }, { "epoch": 0.71, "grad_norm": 1.7989907264709473, "learning_rate": 4.0912320988341725e-06, "loss": 1.0247, "step": 12383 }, { "epoch": 0.71, "grad_norm": 0.9501909613609314, "learning_rate": 4.0897335507329104e-06, "loss": 0.4938, "step": 12384 }, { "epoch": 0.71, "grad_norm": 1.7616958618164062, "learning_rate": 4.088235206578438e-06, "loss": 0.8138, "step": 12385 }, { "epoch": 0.71, "grad_norm": 1.6607574224472046, "learning_rate": 4.086737066422451e-06, "loss": 0.9306, "step": 12386 }, { "epoch": 0.71, "grad_norm": 1.7319327592849731, "learning_rate": 4.085239130316653e-06, "loss": 0.9814, "step": 12387 }, { "epoch": 0.71, "grad_norm": 1.812543272972107, "learning_rate": 4.083741398312727e-06, "loss": 0.8766, "step": 12388 }, { "epoch": 0.71, "grad_norm": 1.7206640243530273, "learning_rate": 4.082243870462362e-06, "loss": 0.9932, "step": 12389 }, { "epoch": 0.71, "grad_norm": 1.7464792728424072, "learning_rate": 4.080746546817228e-06, "loss": 0.9365, "step": 12390 }, { "epoch": 0.71, "grad_norm": 1.7988104820251465, "learning_rate": 4.079249427428995e-06, "loss": 0.8834, "step": 12391 }, { "epoch": 0.71, "grad_norm": 2.004652261734009, "learning_rate": 4.077752512349329e-06, "loss": 0.8995, "step": 12392 }, { "epoch": 0.71, "grad_norm": 1.943991780281067, "learning_rate": 4.076255801629877e-06, "loss": 0.9497, "step": 12393 }, { "epoch": 0.71, "grad_norm": 1.8857355117797852, "learning_rate": 4.074759295322295e-06, "loss": 0.8529, "step": 12394 }, { "epoch": 0.71, "grad_norm": 1.818435788154602, "learning_rate": 4.073262993478213e-06, "loss": 0.9409, "step": 12395 }, { "epoch": 0.71, "grad_norm": 1.7896023988723755, "learning_rate": 4.0717668961492725e-06, "loss": 0.9767, "step": 12396 }, { "epoch": 0.71, "grad_norm": 1.6305948495864868, "learning_rate": 4.0702710033870955e-06, "loss": 0.9032, "step": 12397 }, { "epoch": 0.71, "grad_norm": 1.8447567224502563, "learning_rate": 4.068775315243303e-06, "loss": 0.8821, "step": 12398 }, { "epoch": 0.71, "grad_norm": 1.6577025651931763, "learning_rate": 4.067279831769504e-06, "loss": 0.941, "step": 12399 }, { "epoch": 0.71, "grad_norm": 1.6408772468566895, "learning_rate": 4.065784553017309e-06, "loss": 0.9069, "step": 12400 }, { "epoch": 0.71, "grad_norm": 1.7687045335769653, "learning_rate": 4.0642894790383094e-06, "loss": 0.959, "step": 12401 }, { "epoch": 0.71, "grad_norm": 1.1135081052780151, "learning_rate": 4.062794609884102e-06, "loss": 0.5891, "step": 12402 }, { "epoch": 0.71, "grad_norm": 1.851867914199829, "learning_rate": 4.061299945606264e-06, "loss": 0.9897, "step": 12403 }, { "epoch": 0.71, "grad_norm": 1.8630967140197754, "learning_rate": 4.059805486256376e-06, "loss": 0.8719, "step": 12404 }, { "epoch": 0.71, "grad_norm": 1.7124943733215332, "learning_rate": 4.058311231886012e-06, "loss": 0.9115, "step": 12405 }, { "epoch": 0.71, "grad_norm": 1.8081378936767578, "learning_rate": 4.056817182546725e-06, "loss": 0.8665, "step": 12406 }, { "epoch": 0.71, "grad_norm": 1.7391353845596313, "learning_rate": 4.055323338290079e-06, "loss": 0.9366, "step": 12407 }, { "epoch": 0.71, "grad_norm": 1.8912296295166016, "learning_rate": 4.053829699167616e-06, "loss": 0.8939, "step": 12408 }, { "epoch": 0.71, "grad_norm": 1.6939836740493774, "learning_rate": 4.052336265230884e-06, "loss": 0.844, "step": 12409 }, { "epoch": 0.71, "grad_norm": 1.7051935195922852, "learning_rate": 4.050843036531409e-06, "loss": 0.8683, "step": 12410 }, { "epoch": 0.71, "grad_norm": 1.7583786249160767, "learning_rate": 4.049350013120726e-06, "loss": 0.8762, "step": 12411 }, { "epoch": 0.71, "grad_norm": 1.638037919998169, "learning_rate": 4.047857195050349e-06, "loss": 0.8813, "step": 12412 }, { "epoch": 0.71, "grad_norm": 1.7822891473770142, "learning_rate": 4.046364582371795e-06, "loss": 0.9287, "step": 12413 }, { "epoch": 0.71, "grad_norm": 1.7306482791900635, "learning_rate": 4.0448721751365675e-06, "loss": 0.9232, "step": 12414 }, { "epoch": 0.71, "grad_norm": 2.1819629669189453, "learning_rate": 4.0433799733961685e-06, "loss": 0.9664, "step": 12415 }, { "epoch": 0.71, "grad_norm": 1.7338840961456299, "learning_rate": 4.0418879772020835e-06, "loss": 1.0119, "step": 12416 }, { "epoch": 0.71, "grad_norm": 1.8047226667404175, "learning_rate": 4.040396186605803e-06, "loss": 0.8692, "step": 12417 }, { "epoch": 0.71, "grad_norm": 1.8259817361831665, "learning_rate": 4.038904601658804e-06, "loss": 0.9406, "step": 12418 }, { "epoch": 0.71, "grad_norm": 1.7484159469604492, "learning_rate": 4.037413222412553e-06, "loss": 0.8792, "step": 12419 }, { "epoch": 0.71, "grad_norm": 1.5469627380371094, "learning_rate": 4.035922048918519e-06, "loss": 0.8614, "step": 12420 }, { "epoch": 0.71, "grad_norm": 1.8891232013702393, "learning_rate": 4.034431081228152e-06, "loss": 0.9197, "step": 12421 }, { "epoch": 0.71, "grad_norm": 1.7687270641326904, "learning_rate": 4.0329403193929075e-06, "loss": 0.9467, "step": 12422 }, { "epoch": 0.71, "grad_norm": 1.0497314929962158, "learning_rate": 4.031449763464222e-06, "loss": 0.5542, "step": 12423 }, { "epoch": 0.71, "grad_norm": 1.7079578638076782, "learning_rate": 4.0299594134935335e-06, "loss": 0.9123, "step": 12424 }, { "epoch": 0.71, "grad_norm": 1.802924633026123, "learning_rate": 4.028469269532268e-06, "loss": 0.9238, "step": 12425 }, { "epoch": 0.71, "grad_norm": 1.74862539768219, "learning_rate": 4.0269793316318496e-06, "loss": 0.9317, "step": 12426 }, { "epoch": 0.71, "grad_norm": 2.230196475982666, "learning_rate": 4.025489599843686e-06, "loss": 0.8103, "step": 12427 }, { "epoch": 0.71, "grad_norm": 1.1775816679000854, "learning_rate": 4.0240000742191875e-06, "loss": 0.5971, "step": 12428 }, { "epoch": 0.71, "grad_norm": 1.729634404182434, "learning_rate": 4.022510754809757e-06, "loss": 0.8588, "step": 12429 }, { "epoch": 0.71, "grad_norm": 1.8385225534439087, "learning_rate": 4.021021641666778e-06, "loss": 0.9568, "step": 12430 }, { "epoch": 0.71, "grad_norm": 1.9453097581863403, "learning_rate": 4.019532734841645e-06, "loss": 0.8932, "step": 12431 }, { "epoch": 0.71, "grad_norm": 2.0154201984405518, "learning_rate": 4.018044034385728e-06, "loss": 0.9893, "step": 12432 }, { "epoch": 0.71, "grad_norm": 1.0026189088821411, "learning_rate": 4.0165555403504055e-06, "loss": 0.5369, "step": 12433 }, { "epoch": 0.71, "grad_norm": 1.6947625875473022, "learning_rate": 4.015067252787033e-06, "loss": 0.9101, "step": 12434 }, { "epoch": 0.71, "grad_norm": 1.704961895942688, "learning_rate": 4.013579171746975e-06, "loss": 0.8898, "step": 12435 }, { "epoch": 0.71, "grad_norm": 1.846923828125, "learning_rate": 4.012091297281574e-06, "loss": 0.9626, "step": 12436 }, { "epoch": 0.71, "grad_norm": 1.9454129934310913, "learning_rate": 4.010603629442179e-06, "loss": 0.9867, "step": 12437 }, { "epoch": 0.71, "grad_norm": 1.5986747741699219, "learning_rate": 4.009116168280119e-06, "loss": 0.891, "step": 12438 }, { "epoch": 0.71, "grad_norm": 1.6478374004364014, "learning_rate": 4.0076289138467286e-06, "loss": 0.8941, "step": 12439 }, { "epoch": 0.71, "grad_norm": 1.9266692399978638, "learning_rate": 4.006141866193321e-06, "loss": 0.9009, "step": 12440 }, { "epoch": 0.71, "grad_norm": 1.8235458135604858, "learning_rate": 4.004655025371215e-06, "loss": 0.9583, "step": 12441 }, { "epoch": 0.71, "grad_norm": 1.7677868604660034, "learning_rate": 4.003168391431721e-06, "loss": 0.8511, "step": 12442 }, { "epoch": 0.71, "grad_norm": 1.0000358819961548, "learning_rate": 4.001681964426131e-06, "loss": 0.5646, "step": 12443 }, { "epoch": 0.71, "grad_norm": 1.7779160737991333, "learning_rate": 4.000195744405742e-06, "loss": 1.0032, "step": 12444 }, { "epoch": 0.71, "grad_norm": 1.6591830253601074, "learning_rate": 3.998709731421837e-06, "loss": 0.8998, "step": 12445 }, { "epoch": 0.71, "grad_norm": 1.0530970096588135, "learning_rate": 3.997223925525698e-06, "loss": 0.5643, "step": 12446 }, { "epoch": 0.71, "grad_norm": 0.9968852400779724, "learning_rate": 3.995738326768589e-06, "loss": 0.5334, "step": 12447 }, { "epoch": 0.71, "grad_norm": 2.000629425048828, "learning_rate": 3.994252935201782e-06, "loss": 0.949, "step": 12448 }, { "epoch": 0.71, "grad_norm": 1.8586641550064087, "learning_rate": 3.9927677508765235e-06, "loss": 0.9192, "step": 12449 }, { "epoch": 0.71, "grad_norm": 2.0317986011505127, "learning_rate": 3.991282773844076e-06, "loss": 0.9873, "step": 12450 }, { "epoch": 0.71, "grad_norm": 1.721819519996643, "learning_rate": 3.989798004155671e-06, "loss": 0.8754, "step": 12451 }, { "epoch": 0.71, "grad_norm": 1.7990550994873047, "learning_rate": 3.9883134418625535e-06, "loss": 0.9468, "step": 12452 }, { "epoch": 0.71, "grad_norm": 1.5679570436477661, "learning_rate": 3.986829087015941e-06, "loss": 0.9563, "step": 12453 }, { "epoch": 0.71, "grad_norm": 1.8023875951766968, "learning_rate": 3.985344939667064e-06, "loss": 0.894, "step": 12454 }, { "epoch": 0.71, "grad_norm": 1.7562695741653442, "learning_rate": 3.983860999867128e-06, "loss": 0.939, "step": 12455 }, { "epoch": 0.71, "grad_norm": 1.7863434553146362, "learning_rate": 3.982377267667347e-06, "loss": 0.8763, "step": 12456 }, { "epoch": 0.71, "grad_norm": 1.6930670738220215, "learning_rate": 3.980893743118913e-06, "loss": 0.901, "step": 12457 }, { "epoch": 0.71, "grad_norm": 1.6185472011566162, "learning_rate": 3.979410426273022e-06, "loss": 0.8511, "step": 12458 }, { "epoch": 0.71, "grad_norm": 1.8135086297988892, "learning_rate": 3.977927317180864e-06, "loss": 0.8511, "step": 12459 }, { "epoch": 0.71, "grad_norm": 1.6804667711257935, "learning_rate": 3.9764444158936075e-06, "loss": 1.0038, "step": 12460 }, { "epoch": 0.71, "grad_norm": 1.563314437866211, "learning_rate": 3.9749617224624325e-06, "loss": 0.8486, "step": 12461 }, { "epoch": 0.71, "grad_norm": 1.8163152933120728, "learning_rate": 3.9734792369384945e-06, "loss": 1.011, "step": 12462 }, { "epoch": 0.71, "grad_norm": 1.882642388343811, "learning_rate": 3.971996959372958e-06, "loss": 0.934, "step": 12463 }, { "epoch": 0.71, "grad_norm": 1.6761990785598755, "learning_rate": 3.970514889816963e-06, "loss": 0.8904, "step": 12464 }, { "epoch": 0.71, "grad_norm": 1.7790278196334839, "learning_rate": 3.96903302832166e-06, "loss": 0.9655, "step": 12465 }, { "epoch": 0.71, "grad_norm": 1.7485060691833496, "learning_rate": 3.967551374938178e-06, "loss": 0.974, "step": 12466 }, { "epoch": 0.72, "grad_norm": 1.7336326837539673, "learning_rate": 3.96606992971765e-06, "loss": 0.9246, "step": 12467 }, { "epoch": 0.72, "grad_norm": 1.7338225841522217, "learning_rate": 3.96458869271119e-06, "loss": 0.9192, "step": 12468 }, { "epoch": 0.72, "grad_norm": 1.676155924797058, "learning_rate": 3.9631076639699185e-06, "loss": 0.9595, "step": 12469 }, { "epoch": 0.72, "grad_norm": 1.6723685264587402, "learning_rate": 3.961626843544935e-06, "loss": 0.9599, "step": 12470 }, { "epoch": 0.72, "grad_norm": 1.919965147972107, "learning_rate": 3.9601462314873405e-06, "loss": 0.9325, "step": 12471 }, { "epoch": 0.72, "grad_norm": 1.9680243730545044, "learning_rate": 3.958665827848233e-06, "loss": 0.8786, "step": 12472 }, { "epoch": 0.72, "grad_norm": 1.614292025566101, "learning_rate": 3.957185632678687e-06, "loss": 1.0014, "step": 12473 }, { "epoch": 0.72, "grad_norm": 1.982468605041504, "learning_rate": 3.95570564602979e-06, "loss": 0.9419, "step": 12474 }, { "epoch": 0.72, "grad_norm": 1.0683231353759766, "learning_rate": 3.954225867952602e-06, "loss": 0.5804, "step": 12475 }, { "epoch": 0.72, "grad_norm": 1.6746337413787842, "learning_rate": 3.9527462984981954e-06, "loss": 0.9248, "step": 12476 }, { "epoch": 0.72, "grad_norm": 1.7570018768310547, "learning_rate": 3.951266937717619e-06, "loss": 0.9298, "step": 12477 }, { "epoch": 0.72, "grad_norm": 1.7199463844299316, "learning_rate": 3.949787785661926e-06, "loss": 0.9107, "step": 12478 }, { "epoch": 0.72, "grad_norm": 1.9739207029342651, "learning_rate": 3.948308842382154e-06, "loss": 1.0158, "step": 12479 }, { "epoch": 0.72, "grad_norm": 1.806829810142517, "learning_rate": 3.946830107929342e-06, "loss": 0.9366, "step": 12480 }, { "epoch": 0.72, "grad_norm": 1.7213636636734009, "learning_rate": 3.94535158235451e-06, "loss": 1.0085, "step": 12481 }, { "epoch": 0.72, "grad_norm": 1.725403070449829, "learning_rate": 3.943873265708682e-06, "loss": 0.9565, "step": 12482 }, { "epoch": 0.72, "grad_norm": 1.7160180807113647, "learning_rate": 3.9423951580428744e-06, "loss": 0.9792, "step": 12483 }, { "epoch": 0.72, "grad_norm": 1.6791510581970215, "learning_rate": 3.940917259408085e-06, "loss": 0.9852, "step": 12484 }, { "epoch": 0.72, "grad_norm": 1.8221672773361206, "learning_rate": 3.939439569855319e-06, "loss": 0.9131, "step": 12485 }, { "epoch": 0.72, "grad_norm": 1.7410014867782593, "learning_rate": 3.937962089435561e-06, "loss": 0.9524, "step": 12486 }, { "epoch": 0.72, "grad_norm": 1.6324843168258667, "learning_rate": 3.936484818199801e-06, "loss": 0.8046, "step": 12487 }, { "epoch": 0.72, "grad_norm": 1.7280384302139282, "learning_rate": 3.935007756199009e-06, "loss": 0.8583, "step": 12488 }, { "epoch": 0.72, "grad_norm": 1.6598620414733887, "learning_rate": 3.9335309034841595e-06, "loss": 0.9518, "step": 12489 }, { "epoch": 0.72, "grad_norm": 1.9313338994979858, "learning_rate": 3.932054260106209e-06, "loss": 0.9691, "step": 12490 }, { "epoch": 0.72, "grad_norm": 1.8046241998672485, "learning_rate": 3.9305778261161205e-06, "loss": 0.9595, "step": 12491 }, { "epoch": 0.72, "grad_norm": 1.7612308263778687, "learning_rate": 3.929101601564834e-06, "loss": 0.9195, "step": 12492 }, { "epoch": 0.72, "grad_norm": 1.9295299053192139, "learning_rate": 3.9276255865032965e-06, "loss": 0.9285, "step": 12493 }, { "epoch": 0.72, "grad_norm": 1.8986449241638184, "learning_rate": 3.926149780982432e-06, "loss": 1.0166, "step": 12494 }, { "epoch": 0.72, "grad_norm": 1.7338577508926392, "learning_rate": 3.924674185053173e-06, "loss": 0.9299, "step": 12495 }, { "epoch": 0.72, "grad_norm": 1.605047345161438, "learning_rate": 3.923198798766441e-06, "loss": 0.8553, "step": 12496 }, { "epoch": 0.72, "grad_norm": 1.671620488166809, "learning_rate": 3.92172362217314e-06, "loss": 0.9145, "step": 12497 }, { "epoch": 0.72, "grad_norm": 1.8116626739501953, "learning_rate": 3.920248655324182e-06, "loss": 0.9558, "step": 12498 }, { "epoch": 0.72, "grad_norm": 1.6567705869674683, "learning_rate": 3.918773898270455e-06, "loss": 0.9004, "step": 12499 }, { "epoch": 0.72, "grad_norm": 2.235962390899658, "learning_rate": 3.917299351062858e-06, "loss": 0.9996, "step": 12500 }, { "epoch": 0.72, "grad_norm": 1.701453685760498, "learning_rate": 3.915825013752265e-06, "loss": 0.9198, "step": 12501 }, { "epoch": 0.72, "grad_norm": 1.7635984420776367, "learning_rate": 3.914350886389558e-06, "loss": 0.9939, "step": 12502 }, { "epoch": 0.72, "grad_norm": 1.929249882698059, "learning_rate": 3.912876969025601e-06, "loss": 0.8475, "step": 12503 }, { "epoch": 0.72, "grad_norm": 1.7297722101211548, "learning_rate": 3.911403261711257e-06, "loss": 0.8454, "step": 12504 }, { "epoch": 0.72, "grad_norm": 1.6612706184387207, "learning_rate": 3.909929764497377e-06, "loss": 0.8418, "step": 12505 }, { "epoch": 0.72, "grad_norm": 1.6837877035140991, "learning_rate": 3.908456477434809e-06, "loss": 0.8637, "step": 12506 }, { "epoch": 0.72, "grad_norm": 1.9422123432159424, "learning_rate": 3.906983400574394e-06, "loss": 0.9981, "step": 12507 }, { "epoch": 0.72, "grad_norm": 1.8037015199661255, "learning_rate": 3.905510533966959e-06, "loss": 0.9109, "step": 12508 }, { "epoch": 0.72, "grad_norm": 1.581459879875183, "learning_rate": 3.9040378776633355e-06, "loss": 0.9029, "step": 12509 }, { "epoch": 0.72, "grad_norm": 1.871157169342041, "learning_rate": 3.902565431714333e-06, "loss": 0.9423, "step": 12510 }, { "epoch": 0.72, "grad_norm": 1.815946340560913, "learning_rate": 3.901093196170766e-06, "loss": 0.9139, "step": 12511 }, { "epoch": 0.72, "grad_norm": 2.022512674331665, "learning_rate": 3.899621171083435e-06, "loss": 0.9518, "step": 12512 }, { "epoch": 0.72, "grad_norm": 1.766019582748413, "learning_rate": 3.898149356503139e-06, "loss": 0.9181, "step": 12513 }, { "epoch": 0.72, "grad_norm": 1.7870184183120728, "learning_rate": 3.896677752480662e-06, "loss": 0.9607, "step": 12514 }, { "epoch": 0.72, "grad_norm": 1.7253893613815308, "learning_rate": 3.8952063590667855e-06, "loss": 0.8236, "step": 12515 }, { "epoch": 0.72, "grad_norm": 0.9726859331130981, "learning_rate": 3.893735176312284e-06, "loss": 0.5207, "step": 12516 }, { "epoch": 0.72, "grad_norm": 1.633440375328064, "learning_rate": 3.892264204267929e-06, "loss": 0.7954, "step": 12517 }, { "epoch": 0.72, "grad_norm": 1.7915269136428833, "learning_rate": 3.890793442984471e-06, "loss": 0.9201, "step": 12518 }, { "epoch": 0.72, "grad_norm": 1.6709668636322021, "learning_rate": 3.889322892512669e-06, "loss": 0.9227, "step": 12519 }, { "epoch": 0.72, "grad_norm": 1.7345893383026123, "learning_rate": 3.887852552903262e-06, "loss": 0.9785, "step": 12520 }, { "epoch": 0.72, "grad_norm": 1.8026156425476074, "learning_rate": 3.886382424206992e-06, "loss": 0.9719, "step": 12521 }, { "epoch": 0.72, "grad_norm": 1.7650612592697144, "learning_rate": 3.884912506474585e-06, "loss": 0.9712, "step": 12522 }, { "epoch": 0.72, "grad_norm": 1.6858350038528442, "learning_rate": 3.883442799756768e-06, "loss": 0.9125, "step": 12523 }, { "epoch": 0.72, "grad_norm": 1.6993342638015747, "learning_rate": 3.881973304104252e-06, "loss": 0.8961, "step": 12524 }, { "epoch": 0.72, "grad_norm": 1.997534990310669, "learning_rate": 3.880504019567746e-06, "loss": 0.9865, "step": 12525 }, { "epoch": 0.72, "grad_norm": 1.7246320247650146, "learning_rate": 3.879034946197955e-06, "loss": 0.9523, "step": 12526 }, { "epoch": 0.72, "grad_norm": 1.5362396240234375, "learning_rate": 3.877566084045567e-06, "loss": 0.8882, "step": 12527 }, { "epoch": 0.72, "grad_norm": 1.66085684299469, "learning_rate": 3.876097433161275e-06, "loss": 0.974, "step": 12528 }, { "epoch": 0.72, "grad_norm": 1.7776702642440796, "learning_rate": 3.87462899359575e-06, "loss": 0.9397, "step": 12529 }, { "epoch": 0.72, "grad_norm": 1.78606379032135, "learning_rate": 3.873160765399672e-06, "loss": 0.8514, "step": 12530 }, { "epoch": 0.72, "grad_norm": 1.9417780637741089, "learning_rate": 3.8716927486236975e-06, "loss": 0.8324, "step": 12531 }, { "epoch": 0.72, "grad_norm": 1.6343858242034912, "learning_rate": 3.870224943318491e-06, "loss": 0.8932, "step": 12532 }, { "epoch": 0.72, "grad_norm": 0.9871156811714172, "learning_rate": 3.868757349534695e-06, "loss": 0.5515, "step": 12533 }, { "epoch": 0.72, "grad_norm": 1.8294332027435303, "learning_rate": 3.86728996732296e-06, "loss": 0.9094, "step": 12534 }, { "epoch": 0.72, "grad_norm": 1.7224035263061523, "learning_rate": 3.865822796733914e-06, "loss": 0.9669, "step": 12535 }, { "epoch": 0.72, "grad_norm": 1.7133840322494507, "learning_rate": 3.864355837818188e-06, "loss": 0.8429, "step": 12536 }, { "epoch": 0.72, "grad_norm": 1.7271113395690918, "learning_rate": 3.862889090626406e-06, "loss": 0.8986, "step": 12537 }, { "epoch": 0.72, "grad_norm": 1.7311434745788574, "learning_rate": 3.8614225552091745e-06, "loss": 0.9566, "step": 12538 }, { "epoch": 0.72, "grad_norm": 1.6086724996566772, "learning_rate": 3.859956231617107e-06, "loss": 0.8714, "step": 12539 }, { "epoch": 0.72, "grad_norm": 1.857214331626892, "learning_rate": 3.858490119900794e-06, "loss": 0.9976, "step": 12540 }, { "epoch": 0.72, "grad_norm": 1.6624752283096313, "learning_rate": 3.857024220110837e-06, "loss": 0.8879, "step": 12541 }, { "epoch": 0.72, "grad_norm": 1.7396091222763062, "learning_rate": 3.855558532297808e-06, "loss": 0.8906, "step": 12542 }, { "epoch": 0.72, "grad_norm": 1.8602701425552368, "learning_rate": 3.854093056512296e-06, "loss": 0.9584, "step": 12543 }, { "epoch": 0.72, "grad_norm": 1.6431396007537842, "learning_rate": 3.85262779280486e-06, "loss": 0.834, "step": 12544 }, { "epoch": 0.72, "grad_norm": 1.7836977243423462, "learning_rate": 3.851162741226071e-06, "loss": 0.8997, "step": 12545 }, { "epoch": 0.72, "grad_norm": 1.6669334173202515, "learning_rate": 3.849697901826477e-06, "loss": 0.9244, "step": 12546 }, { "epoch": 0.72, "grad_norm": 1.7953133583068848, "learning_rate": 3.848233274656631e-06, "loss": 0.9685, "step": 12547 }, { "epoch": 0.72, "grad_norm": 1.6984896659851074, "learning_rate": 3.846768859767066e-06, "loss": 0.9369, "step": 12548 }, { "epoch": 0.72, "grad_norm": 1.7227824926376343, "learning_rate": 3.845304657208321e-06, "loss": 0.917, "step": 12549 }, { "epoch": 0.72, "grad_norm": 1.754768967628479, "learning_rate": 3.8438406670309215e-06, "loss": 0.9208, "step": 12550 }, { "epoch": 0.72, "grad_norm": 1.7911325693130493, "learning_rate": 3.842376889285382e-06, "loss": 0.8674, "step": 12551 }, { "epoch": 0.72, "grad_norm": 1.7789198160171509, "learning_rate": 3.840913324022218e-06, "loss": 0.9308, "step": 12552 }, { "epoch": 0.72, "grad_norm": 1.8998196125030518, "learning_rate": 3.8394499712919275e-06, "loss": 0.9987, "step": 12553 }, { "epoch": 0.72, "grad_norm": 1.9097747802734375, "learning_rate": 3.8379868311450134e-06, "loss": 0.8982, "step": 12554 }, { "epoch": 0.72, "grad_norm": 1.6499348878860474, "learning_rate": 3.8365239036319565e-06, "loss": 0.9653, "step": 12555 }, { "epoch": 0.72, "grad_norm": 1.8675527572631836, "learning_rate": 3.8350611888032474e-06, "loss": 0.9251, "step": 12556 }, { "epoch": 0.72, "grad_norm": 1.8499308824539185, "learning_rate": 3.833598686709351e-06, "loss": 0.954, "step": 12557 }, { "epoch": 0.72, "grad_norm": 1.83310067653656, "learning_rate": 3.832136397400743e-06, "loss": 0.9913, "step": 12558 }, { "epoch": 0.72, "grad_norm": 2.1436314582824707, "learning_rate": 3.830674320927875e-06, "loss": 0.9536, "step": 12559 }, { "epoch": 0.72, "grad_norm": 0.9855970144271851, "learning_rate": 3.829212457341203e-06, "loss": 0.5498, "step": 12560 }, { "epoch": 0.72, "grad_norm": 1.843736171722412, "learning_rate": 3.827750806691175e-06, "loss": 0.9276, "step": 12561 }, { "epoch": 0.72, "grad_norm": 2.0240211486816406, "learning_rate": 3.8262893690282214e-06, "loss": 0.9149, "step": 12562 }, { "epoch": 0.72, "grad_norm": 1.6047064065933228, "learning_rate": 3.82482814440278e-06, "loss": 0.8451, "step": 12563 }, { "epoch": 0.72, "grad_norm": 1.83649742603302, "learning_rate": 3.823367132865266e-06, "loss": 0.9819, "step": 12564 }, { "epoch": 0.72, "grad_norm": 1.6730918884277344, "learning_rate": 3.821906334466102e-06, "loss": 0.9815, "step": 12565 }, { "epoch": 0.72, "grad_norm": 1.7875293493270874, "learning_rate": 3.820445749255689e-06, "loss": 0.9596, "step": 12566 }, { "epoch": 0.72, "grad_norm": 1.6322795152664185, "learning_rate": 3.818985377284435e-06, "loss": 0.9155, "step": 12567 }, { "epoch": 0.72, "grad_norm": 1.9037878513336182, "learning_rate": 3.817525218602727e-06, "loss": 0.9464, "step": 12568 }, { "epoch": 0.72, "grad_norm": 1.9884686470031738, "learning_rate": 3.816065273260956e-06, "loss": 0.9943, "step": 12569 }, { "epoch": 0.72, "grad_norm": 1.983309030532837, "learning_rate": 3.814605541309495e-06, "loss": 0.952, "step": 12570 }, { "epoch": 0.72, "grad_norm": 0.9923788905143738, "learning_rate": 3.8131460227987214e-06, "loss": 0.571, "step": 12571 }, { "epoch": 0.72, "grad_norm": 1.7488611936569214, "learning_rate": 3.811686717778994e-06, "loss": 0.9517, "step": 12572 }, { "epoch": 0.72, "grad_norm": 1.7324405908584595, "learning_rate": 3.810227626300671e-06, "loss": 0.8915, "step": 12573 }, { "epoch": 0.72, "grad_norm": 1.570957064628601, "learning_rate": 3.8087687484141055e-06, "loss": 0.9517, "step": 12574 }, { "epoch": 0.72, "grad_norm": 1.9039630889892578, "learning_rate": 3.8073100841696333e-06, "loss": 0.9445, "step": 12575 }, { "epoch": 0.72, "grad_norm": 1.8011610507965088, "learning_rate": 3.8058516336175942e-06, "loss": 0.9286, "step": 12576 }, { "epoch": 0.72, "grad_norm": 1.805119276046753, "learning_rate": 3.80439339680831e-06, "loss": 0.8667, "step": 12577 }, { "epoch": 0.72, "grad_norm": 1.8024210929870605, "learning_rate": 3.802935373792106e-06, "loss": 0.9213, "step": 12578 }, { "epoch": 0.72, "grad_norm": 1.6766784191131592, "learning_rate": 3.801477564619287e-06, "loss": 0.948, "step": 12579 }, { "epoch": 0.72, "grad_norm": 1.6544581651687622, "learning_rate": 3.8000199693401675e-06, "loss": 0.9568, "step": 12580 }, { "epoch": 0.72, "grad_norm": 1.9259475469589233, "learning_rate": 3.7985625880050315e-06, "loss": 0.9144, "step": 12581 }, { "epoch": 0.72, "grad_norm": 1.6724088191986084, "learning_rate": 3.7971054206641854e-06, "loss": 0.9235, "step": 12582 }, { "epoch": 0.72, "grad_norm": 1.8785679340362549, "learning_rate": 3.7956484673679006e-06, "loss": 0.9227, "step": 12583 }, { "epoch": 0.72, "grad_norm": 1.6434869766235352, "learning_rate": 3.7941917281664586e-06, "loss": 0.9661, "step": 12584 }, { "epoch": 0.72, "grad_norm": 1.777374029159546, "learning_rate": 3.7927352031101228e-06, "loss": 0.9159, "step": 12585 }, { "epoch": 0.72, "grad_norm": 1.6906895637512207, "learning_rate": 3.7912788922491582e-06, "loss": 0.8463, "step": 12586 }, { "epoch": 0.72, "grad_norm": 1.6666299104690552, "learning_rate": 3.789822795633813e-06, "loss": 0.9892, "step": 12587 }, { "epoch": 0.72, "grad_norm": 1.6618847846984863, "learning_rate": 3.7883669133143388e-06, "loss": 0.8577, "step": 12588 }, { "epoch": 0.72, "grad_norm": 1.839779257774353, "learning_rate": 3.7869112453409673e-06, "loss": 0.9683, "step": 12589 }, { "epoch": 0.72, "grad_norm": 1.899091362953186, "learning_rate": 3.7854557917639333e-06, "loss": 0.8988, "step": 12590 }, { "epoch": 0.72, "grad_norm": 1.85112726688385, "learning_rate": 3.7840005526334633e-06, "loss": 0.9093, "step": 12591 }, { "epoch": 0.72, "grad_norm": 1.7250407934188843, "learning_rate": 3.782545527999768e-06, "loss": 0.8787, "step": 12592 }, { "epoch": 0.72, "grad_norm": 1.8389370441436768, "learning_rate": 3.781090717913062e-06, "loss": 0.8964, "step": 12593 }, { "epoch": 0.72, "grad_norm": 1.7947970628738403, "learning_rate": 3.77963612242354e-06, "loss": 0.8455, "step": 12594 }, { "epoch": 0.72, "grad_norm": 1.7338473796844482, "learning_rate": 3.778181741581403e-06, "loss": 0.9537, "step": 12595 }, { "epoch": 0.72, "grad_norm": 1.795505404472351, "learning_rate": 3.7767275754368292e-06, "loss": 0.8984, "step": 12596 }, { "epoch": 0.72, "grad_norm": 1.0807818174362183, "learning_rate": 3.775273624040008e-06, "loss": 0.5417, "step": 12597 }, { "epoch": 0.72, "grad_norm": 1.979111909866333, "learning_rate": 3.7738198874411026e-06, "loss": 0.9746, "step": 12598 }, { "epoch": 0.72, "grad_norm": 1.93534255027771, "learning_rate": 3.772366365690283e-06, "loss": 0.9461, "step": 12599 }, { "epoch": 0.72, "grad_norm": 1.7157135009765625, "learning_rate": 3.7709130588377007e-06, "loss": 0.8516, "step": 12600 }, { "epoch": 0.72, "grad_norm": 1.702778935432434, "learning_rate": 3.7694599669335132e-06, "loss": 0.9412, "step": 12601 }, { "epoch": 0.72, "grad_norm": 1.7491122484207153, "learning_rate": 3.7680070900278533e-06, "loss": 1.0122, "step": 12602 }, { "epoch": 0.72, "grad_norm": 1.9815839529037476, "learning_rate": 3.766554428170861e-06, "loss": 0.9377, "step": 12603 }, { "epoch": 0.72, "grad_norm": 1.0403605699539185, "learning_rate": 3.7651019814126656e-06, "loss": 0.551, "step": 12604 }, { "epoch": 0.72, "grad_norm": 1.6842703819274902, "learning_rate": 3.7636497498033817e-06, "loss": 0.9202, "step": 12605 }, { "epoch": 0.72, "grad_norm": 1.7488585710525513, "learning_rate": 3.762197733393127e-06, "loss": 0.8912, "step": 12606 }, { "epoch": 0.72, "grad_norm": 1.752842903137207, "learning_rate": 3.7607459322320015e-06, "loss": 0.9385, "step": 12607 }, { "epoch": 0.72, "grad_norm": 1.7367526292800903, "learning_rate": 3.7592943463701083e-06, "loss": 0.9174, "step": 12608 }, { "epoch": 0.72, "grad_norm": 1.698089838027954, "learning_rate": 3.7578429758575306e-06, "loss": 0.936, "step": 12609 }, { "epoch": 0.72, "grad_norm": 1.7510919570922852, "learning_rate": 3.7563918207443583e-06, "loss": 0.8927, "step": 12610 }, { "epoch": 0.72, "grad_norm": 1.940737009048462, "learning_rate": 3.754940881080661e-06, "loss": 0.8255, "step": 12611 }, { "epoch": 0.72, "grad_norm": 1.8052895069122314, "learning_rate": 3.7534901569165117e-06, "loss": 0.9187, "step": 12612 }, { "epoch": 0.72, "grad_norm": 1.8674594163894653, "learning_rate": 3.7520396483019648e-06, "loss": 0.9784, "step": 12613 }, { "epoch": 0.72, "grad_norm": 1.7453211545944214, "learning_rate": 3.7505893552870774e-06, "loss": 0.8841, "step": 12614 }, { "epoch": 0.72, "grad_norm": 1.6215099096298218, "learning_rate": 3.749139277921897e-06, "loss": 0.9096, "step": 12615 }, { "epoch": 0.72, "grad_norm": 1.7010674476623535, "learning_rate": 3.747689416256456e-06, "loss": 0.9248, "step": 12616 }, { "epoch": 0.72, "grad_norm": 1.6624914407730103, "learning_rate": 3.7462397703407917e-06, "loss": 0.8962, "step": 12617 }, { "epoch": 0.72, "grad_norm": 1.7322510480880737, "learning_rate": 3.7447903402249197e-06, "loss": 0.8598, "step": 12618 }, { "epoch": 0.72, "grad_norm": 1.720736026763916, "learning_rate": 3.7433411259588635e-06, "loss": 0.9601, "step": 12619 }, { "epoch": 0.72, "grad_norm": 1.7048335075378418, "learning_rate": 3.7418921275926245e-06, "loss": 0.9032, "step": 12620 }, { "epoch": 0.72, "grad_norm": 1.5797029733657837, "learning_rate": 3.74044334517621e-06, "loss": 0.8538, "step": 12621 }, { "epoch": 0.72, "grad_norm": 1.8111974000930786, "learning_rate": 3.738994778759607e-06, "loss": 1.0421, "step": 12622 }, { "epoch": 0.72, "grad_norm": 1.5807260274887085, "learning_rate": 3.7375464283928086e-06, "loss": 0.9306, "step": 12623 }, { "epoch": 0.72, "grad_norm": 1.6567331552505493, "learning_rate": 3.736098294125785e-06, "loss": 0.9437, "step": 12624 }, { "epoch": 0.72, "grad_norm": 1.7192879915237427, "learning_rate": 3.734650376008516e-06, "loss": 0.9957, "step": 12625 }, { "epoch": 0.72, "grad_norm": 2.410449504852295, "learning_rate": 3.7332026740909576e-06, "loss": 0.9806, "step": 12626 }, { "epoch": 0.72, "grad_norm": 1.782019019126892, "learning_rate": 3.7317551884230697e-06, "loss": 0.9629, "step": 12627 }, { "epoch": 0.72, "grad_norm": 1.7913216352462769, "learning_rate": 3.730307919054803e-06, "loss": 0.9445, "step": 12628 }, { "epoch": 0.72, "grad_norm": 1.861438274383545, "learning_rate": 3.7288608660360935e-06, "loss": 0.9476, "step": 12629 }, { "epoch": 0.72, "grad_norm": 1.6599637269973755, "learning_rate": 3.7274140294168813e-06, "loss": 0.9118, "step": 12630 }, { "epoch": 0.72, "grad_norm": 1.7982391119003296, "learning_rate": 3.7259674092470853e-06, "loss": 0.9777, "step": 12631 }, { "epoch": 0.72, "grad_norm": 1.7976106405258179, "learning_rate": 3.7245210055766324e-06, "loss": 0.9944, "step": 12632 }, { "epoch": 0.72, "grad_norm": 1.8489376306533813, "learning_rate": 3.7230748184554254e-06, "loss": 0.9221, "step": 12633 }, { "epoch": 0.72, "grad_norm": 1.7034870386123657, "learning_rate": 3.7216288479333763e-06, "loss": 0.8935, "step": 12634 }, { "epoch": 0.72, "grad_norm": 1.614827275276184, "learning_rate": 3.7201830940603747e-06, "loss": 0.8533, "step": 12635 }, { "epoch": 0.72, "grad_norm": 2.673074245452881, "learning_rate": 3.718737556886316e-06, "loss": 0.9384, "step": 12636 }, { "epoch": 0.72, "grad_norm": 1.713295340538025, "learning_rate": 3.717292236461074e-06, "loss": 0.9422, "step": 12637 }, { "epoch": 0.72, "grad_norm": 1.6888242959976196, "learning_rate": 3.715847132834528e-06, "loss": 0.9148, "step": 12638 }, { "epoch": 0.72, "grad_norm": 1.663413166999817, "learning_rate": 3.7144022460565452e-06, "loss": 0.9475, "step": 12639 }, { "epoch": 0.72, "grad_norm": 1.6894317865371704, "learning_rate": 3.712957576176981e-06, "loss": 0.9171, "step": 12640 }, { "epoch": 0.72, "grad_norm": 1.6570804119110107, "learning_rate": 3.7115131232456915e-06, "loss": 0.9264, "step": 12641 }, { "epoch": 0.73, "grad_norm": 1.6393131017684937, "learning_rate": 3.7100688873125147e-06, "loss": 0.9128, "step": 12642 }, { "epoch": 0.73, "grad_norm": 1.886132001876831, "learning_rate": 3.708624868427293e-06, "loss": 0.8699, "step": 12643 }, { "epoch": 0.73, "grad_norm": 1.738251805305481, "learning_rate": 3.70718106663985e-06, "loss": 0.9156, "step": 12644 }, { "epoch": 0.73, "grad_norm": 1.7354727983474731, "learning_rate": 3.7057374820000137e-06, "loss": 0.8879, "step": 12645 }, { "epoch": 0.73, "grad_norm": 1.0547871589660645, "learning_rate": 3.7042941145575915e-06, "loss": 0.5625, "step": 12646 }, { "epoch": 0.73, "grad_norm": 1.902708649635315, "learning_rate": 3.702850964362392e-06, "loss": 0.8941, "step": 12647 }, { "epoch": 0.73, "grad_norm": 1.593868374824524, "learning_rate": 3.7014080314642163e-06, "loss": 0.8353, "step": 12648 }, { "epoch": 0.73, "grad_norm": 1.6904938220977783, "learning_rate": 3.699965315912858e-06, "loss": 0.9223, "step": 12649 }, { "epoch": 0.73, "grad_norm": 1.9393669366836548, "learning_rate": 3.6985228177580944e-06, "loss": 0.9283, "step": 12650 }, { "epoch": 0.73, "grad_norm": 1.7096010446548462, "learning_rate": 3.697080537049711e-06, "loss": 0.9653, "step": 12651 }, { "epoch": 0.73, "grad_norm": 1.7063405513763428, "learning_rate": 3.695638473837466e-06, "loss": 0.9289, "step": 12652 }, { "epoch": 0.73, "grad_norm": 1.8004151582717896, "learning_rate": 3.6941966281711318e-06, "loss": 1.0359, "step": 12653 }, { "epoch": 0.73, "grad_norm": 1.6368452310562134, "learning_rate": 3.692755000100453e-06, "loss": 0.9187, "step": 12654 }, { "epoch": 0.73, "grad_norm": 2.017239570617676, "learning_rate": 3.691313589675185e-06, "loss": 0.9316, "step": 12655 }, { "epoch": 0.73, "grad_norm": 1.7595504522323608, "learning_rate": 3.689872396945059e-06, "loss": 0.9018, "step": 12656 }, { "epoch": 0.73, "grad_norm": 1.7971736192703247, "learning_rate": 3.6884314219598095e-06, "loss": 1.0437, "step": 12657 }, { "epoch": 0.73, "grad_norm": 1.7783476114273071, "learning_rate": 3.6869906647691635e-06, "loss": 0.9333, "step": 12658 }, { "epoch": 0.73, "grad_norm": 1.6664040088653564, "learning_rate": 3.6855501254228322e-06, "loss": 0.9443, "step": 12659 }, { "epoch": 0.73, "grad_norm": 1.905690312385559, "learning_rate": 3.6841098039705313e-06, "loss": 0.8788, "step": 12660 }, { "epoch": 0.73, "grad_norm": 1.8161495923995972, "learning_rate": 3.682669700461955e-06, "loss": 0.913, "step": 12661 }, { "epoch": 0.73, "grad_norm": 1.781017780303955, "learning_rate": 3.681229814946803e-06, "loss": 0.9054, "step": 12662 }, { "epoch": 0.73, "grad_norm": 1.7194368839263916, "learning_rate": 3.6797901474747567e-06, "loss": 0.8599, "step": 12663 }, { "epoch": 0.73, "grad_norm": 1.7641019821166992, "learning_rate": 3.6783506980955007e-06, "loss": 0.9318, "step": 12664 }, { "epoch": 0.73, "grad_norm": 1.7111732959747314, "learning_rate": 3.6769114668587e-06, "loss": 0.9093, "step": 12665 }, { "epoch": 0.73, "grad_norm": 1.7966961860656738, "learning_rate": 3.675472453814025e-06, "loss": 0.8629, "step": 12666 }, { "epoch": 0.73, "grad_norm": 1.786097526550293, "learning_rate": 3.674033659011126e-06, "loss": 0.9613, "step": 12667 }, { "epoch": 0.73, "grad_norm": 1.7995284795761108, "learning_rate": 3.6725950824996537e-06, "loss": 0.9167, "step": 12668 }, { "epoch": 0.73, "grad_norm": 1.8264379501342773, "learning_rate": 3.6711567243292547e-06, "loss": 0.9118, "step": 12669 }, { "epoch": 0.73, "grad_norm": 1.905930995941162, "learning_rate": 3.669718584549553e-06, "loss": 0.9728, "step": 12670 }, { "epoch": 0.73, "grad_norm": 1.7546838521957397, "learning_rate": 3.6682806632101852e-06, "loss": 0.8725, "step": 12671 }, { "epoch": 0.73, "grad_norm": 1.646043062210083, "learning_rate": 3.6668429603607604e-06, "loss": 0.8587, "step": 12672 }, { "epoch": 0.73, "grad_norm": 1.7822115421295166, "learning_rate": 3.6654054760508983e-06, "loss": 0.9413, "step": 12673 }, { "epoch": 0.73, "grad_norm": 1.821334958076477, "learning_rate": 3.6639682103301943e-06, "loss": 1.004, "step": 12674 }, { "epoch": 0.73, "grad_norm": 1.7244815826416016, "learning_rate": 3.662531163248252e-06, "loss": 0.914, "step": 12675 }, { "epoch": 0.73, "grad_norm": 1.846912145614624, "learning_rate": 3.6610943348546524e-06, "loss": 0.9597, "step": 12676 }, { "epoch": 0.73, "grad_norm": 1.6697893142700195, "learning_rate": 3.659657725198984e-06, "loss": 0.9108, "step": 12677 }, { "epoch": 0.73, "grad_norm": 1.2002352476119995, "learning_rate": 3.6582213343308126e-06, "loss": 0.6341, "step": 12678 }, { "epoch": 0.73, "grad_norm": 1.776036262512207, "learning_rate": 3.656785162299712e-06, "loss": 0.9095, "step": 12679 }, { "epoch": 0.73, "grad_norm": 1.7355620861053467, "learning_rate": 3.6553492091552324e-06, "loss": 0.9476, "step": 12680 }, { "epoch": 0.73, "grad_norm": 1.7779364585876465, "learning_rate": 3.6539134749469284e-06, "loss": 0.9138, "step": 12681 }, { "epoch": 0.73, "grad_norm": 1.6813569068908691, "learning_rate": 3.652477959724348e-06, "loss": 0.929, "step": 12682 }, { "epoch": 0.73, "grad_norm": 0.9758507013320923, "learning_rate": 3.6510426635370178e-06, "loss": 0.522, "step": 12683 }, { "epoch": 0.73, "grad_norm": 1.7634559869766235, "learning_rate": 3.6496075864344736e-06, "loss": 0.9375, "step": 12684 }, { "epoch": 0.73, "grad_norm": 1.7748652696609497, "learning_rate": 3.6481727284662284e-06, "loss": 0.8694, "step": 12685 }, { "epoch": 0.73, "grad_norm": 1.6467344760894775, "learning_rate": 3.6467380896818037e-06, "loss": 0.8919, "step": 12686 }, { "epoch": 0.73, "grad_norm": 1.902411699295044, "learning_rate": 3.6453036701306964e-06, "loss": 0.8775, "step": 12687 }, { "epoch": 0.73, "grad_norm": 1.739104151725769, "learning_rate": 3.643869469862412e-06, "loss": 0.9339, "step": 12688 }, { "epoch": 0.73, "grad_norm": 1.7552645206451416, "learning_rate": 3.6424354889264334e-06, "loss": 0.8617, "step": 12689 }, { "epoch": 0.73, "grad_norm": 1.9040225744247437, "learning_rate": 3.641001727372251e-06, "loss": 0.9645, "step": 12690 }, { "epoch": 0.73, "grad_norm": 1.7261549234390259, "learning_rate": 3.6395681852493326e-06, "loss": 0.9759, "step": 12691 }, { "epoch": 0.73, "grad_norm": 1.8241993188858032, "learning_rate": 3.6381348626071477e-06, "loss": 0.9326, "step": 12692 }, { "epoch": 0.73, "grad_norm": 1.8203476667404175, "learning_rate": 3.6367017594951615e-06, "loss": 0.8533, "step": 12693 }, { "epoch": 0.73, "grad_norm": 1.871741533279419, "learning_rate": 3.63526887596282e-06, "loss": 0.9062, "step": 12694 }, { "epoch": 0.73, "grad_norm": 1.611861228942871, "learning_rate": 3.6338362120595726e-06, "loss": 0.9197, "step": 12695 }, { "epoch": 0.73, "grad_norm": 1.6157146692276, "learning_rate": 3.6324037678348513e-06, "loss": 0.8956, "step": 12696 }, { "epoch": 0.73, "grad_norm": 1.637628436088562, "learning_rate": 3.630971543338092e-06, "loss": 0.9061, "step": 12697 }, { "epoch": 0.73, "grad_norm": 1.0322898626327515, "learning_rate": 3.6295395386187103e-06, "loss": 0.5595, "step": 12698 }, { "epoch": 0.73, "grad_norm": 1.8189359903335571, "learning_rate": 3.6281077537261276e-06, "loss": 0.9409, "step": 12699 }, { "epoch": 0.73, "grad_norm": 1.7638901472091675, "learning_rate": 3.6266761887097433e-06, "loss": 0.9291, "step": 12700 }, { "epoch": 0.73, "grad_norm": 2.0128118991851807, "learning_rate": 3.625244843618965e-06, "loss": 0.896, "step": 12701 }, { "epoch": 0.73, "grad_norm": 1.7139759063720703, "learning_rate": 3.6238137185031765e-06, "loss": 0.9513, "step": 12702 }, { "epoch": 0.73, "grad_norm": 1.7586177587509155, "learning_rate": 3.6223828134117678e-06, "loss": 0.9365, "step": 12703 }, { "epoch": 0.73, "grad_norm": 1.7549299001693726, "learning_rate": 3.6209521283941097e-06, "loss": 0.8844, "step": 12704 }, { "epoch": 0.73, "grad_norm": 1.8163676261901855, "learning_rate": 3.6195216634995743e-06, "loss": 0.8534, "step": 12705 }, { "epoch": 0.73, "grad_norm": 1.9223564863204956, "learning_rate": 3.6180914187775273e-06, "loss": 0.875, "step": 12706 }, { "epoch": 0.73, "grad_norm": 1.8886995315551758, "learning_rate": 3.6166613942773156e-06, "loss": 0.9143, "step": 12707 }, { "epoch": 0.73, "grad_norm": 1.5925272703170776, "learning_rate": 3.6152315900482904e-06, "loss": 0.9418, "step": 12708 }, { "epoch": 0.73, "grad_norm": 1.7665399312973022, "learning_rate": 3.613802006139785e-06, "loss": 0.9588, "step": 12709 }, { "epoch": 0.73, "grad_norm": 1.0779120922088623, "learning_rate": 3.6123726426011363e-06, "loss": 0.5234, "step": 12710 }, { "epoch": 0.73, "grad_norm": 1.6971087455749512, "learning_rate": 3.6109434994816606e-06, "loss": 0.8811, "step": 12711 }, { "epoch": 0.73, "grad_norm": 1.682674765586853, "learning_rate": 3.6095145768306817e-06, "loss": 0.9488, "step": 12712 }, { "epoch": 0.73, "grad_norm": 1.0810747146606445, "learning_rate": 3.6080858746974965e-06, "loss": 0.5753, "step": 12713 }, { "epoch": 0.73, "grad_norm": 1.746949315071106, "learning_rate": 3.6066573931314198e-06, "loss": 0.9033, "step": 12714 }, { "epoch": 0.73, "grad_norm": 1.6196736097335815, "learning_rate": 3.6052291321817343e-06, "loss": 0.8162, "step": 12715 }, { "epoch": 0.73, "grad_norm": 1.6244525909423828, "learning_rate": 3.6038010918977308e-06, "loss": 0.9264, "step": 12716 }, { "epoch": 0.73, "grad_norm": 1.6909512281417847, "learning_rate": 3.602373272328682e-06, "loss": 0.8933, "step": 12717 }, { "epoch": 0.73, "grad_norm": 1.6652984619140625, "learning_rate": 3.6009456735238633e-06, "loss": 0.9474, "step": 12718 }, { "epoch": 0.73, "grad_norm": 1.8534907102584839, "learning_rate": 3.5995182955325313e-06, "loss": 0.9021, "step": 12719 }, { "epoch": 0.73, "grad_norm": 1.7665417194366455, "learning_rate": 3.598091138403947e-06, "loss": 0.9186, "step": 12720 }, { "epoch": 0.73, "grad_norm": 1.7442777156829834, "learning_rate": 3.596664202187352e-06, "loss": 0.9859, "step": 12721 }, { "epoch": 0.73, "grad_norm": 1.6241544485092163, "learning_rate": 3.5952374869319884e-06, "loss": 0.8833, "step": 12722 }, { "epoch": 0.73, "grad_norm": 1.8073933124542236, "learning_rate": 3.5938109926870914e-06, "loss": 0.9094, "step": 12723 }, { "epoch": 0.73, "grad_norm": 1.7068259716033936, "learning_rate": 3.592384719501878e-06, "loss": 0.8896, "step": 12724 }, { "epoch": 0.73, "grad_norm": 1.6717661619186401, "learning_rate": 3.5909586674255723e-06, "loss": 0.9558, "step": 12725 }, { "epoch": 0.73, "grad_norm": 1.688665747642517, "learning_rate": 3.5895328365073768e-06, "loss": 0.9212, "step": 12726 }, { "epoch": 0.73, "grad_norm": 1.8968111276626587, "learning_rate": 3.5881072267965e-06, "loss": 0.8936, "step": 12727 }, { "epoch": 0.73, "grad_norm": 1.6330198049545288, "learning_rate": 3.5866818383421288e-06, "loss": 0.954, "step": 12728 }, { "epoch": 0.73, "grad_norm": 1.8906724452972412, "learning_rate": 3.5852566711934545e-06, "loss": 0.8737, "step": 12729 }, { "epoch": 0.73, "grad_norm": 1.8431918621063232, "learning_rate": 3.5838317253996514e-06, "loss": 0.9466, "step": 12730 }, { "epoch": 0.73, "grad_norm": 1.8814911842346191, "learning_rate": 3.5824070010098956e-06, "loss": 0.9088, "step": 12731 }, { "epoch": 0.73, "grad_norm": 1.1299934387207031, "learning_rate": 3.5809824980733445e-06, "loss": 0.5222, "step": 12732 }, { "epoch": 0.73, "grad_norm": 0.9598451256752014, "learning_rate": 3.5795582166391597e-06, "loss": 0.5171, "step": 12733 }, { "epoch": 0.73, "grad_norm": 1.8097270727157593, "learning_rate": 3.578134156756482e-06, "loss": 0.9194, "step": 12734 }, { "epoch": 0.73, "grad_norm": 1.5717129707336426, "learning_rate": 3.5767103184744566e-06, "loss": 0.9148, "step": 12735 }, { "epoch": 0.73, "grad_norm": 1.8004220724105835, "learning_rate": 3.575286701842218e-06, "loss": 0.968, "step": 12736 }, { "epoch": 0.73, "grad_norm": 1.6115024089813232, "learning_rate": 3.5738633069088857e-06, "loss": 0.9142, "step": 12737 }, { "epoch": 0.73, "grad_norm": 1.9924904108047485, "learning_rate": 3.5724401337235835e-06, "loss": 0.8534, "step": 12738 }, { "epoch": 0.73, "grad_norm": 1.7243096828460693, "learning_rate": 3.5710171823354145e-06, "loss": 0.8912, "step": 12739 }, { "epoch": 0.73, "grad_norm": 1.0127779245376587, "learning_rate": 3.5695944527934868e-06, "loss": 0.478, "step": 12740 }, { "epoch": 0.73, "grad_norm": 1.7696268558502197, "learning_rate": 3.568171945146889e-06, "loss": 0.9013, "step": 12741 }, { "epoch": 0.73, "grad_norm": 0.9781690239906311, "learning_rate": 3.566749659444714e-06, "loss": 0.497, "step": 12742 }, { "epoch": 0.73, "grad_norm": 1.7435624599456787, "learning_rate": 3.5653275957360333e-06, "loss": 0.9278, "step": 12743 }, { "epoch": 0.73, "grad_norm": 2.006174325942993, "learning_rate": 3.5639057540699274e-06, "loss": 0.9099, "step": 12744 }, { "epoch": 0.73, "grad_norm": 1.8570996522903442, "learning_rate": 3.5624841344954508e-06, "loss": 0.91, "step": 12745 }, { "epoch": 0.73, "grad_norm": 1.8896645307540894, "learning_rate": 3.5610627370616656e-06, "loss": 0.8788, "step": 12746 }, { "epoch": 0.73, "grad_norm": 1.7396416664123535, "learning_rate": 3.5596415618176215e-06, "loss": 0.8968, "step": 12747 }, { "epoch": 0.73, "grad_norm": 1.8568254709243774, "learning_rate": 3.558220608812354e-06, "loss": 0.8122, "step": 12748 }, { "epoch": 0.73, "grad_norm": 1.1276328563690186, "learning_rate": 3.556799878094901e-06, "loss": 0.5746, "step": 12749 }, { "epoch": 0.73, "grad_norm": 1.8160136938095093, "learning_rate": 3.5553793697142837e-06, "loss": 0.9087, "step": 12750 }, { "epoch": 0.73, "grad_norm": 1.771316647529602, "learning_rate": 3.553959083719525e-06, "loss": 0.9419, "step": 12751 }, { "epoch": 0.73, "grad_norm": 1.8243526220321655, "learning_rate": 3.552539020159629e-06, "loss": 0.8467, "step": 12752 }, { "epoch": 0.73, "grad_norm": 1.841591477394104, "learning_rate": 3.551119179083603e-06, "loss": 0.955, "step": 12753 }, { "epoch": 0.73, "grad_norm": 0.9740155339241028, "learning_rate": 3.549699560540438e-06, "loss": 0.5323, "step": 12754 }, { "epoch": 0.73, "grad_norm": 1.8601003885269165, "learning_rate": 3.5482801645791266e-06, "loss": 0.9313, "step": 12755 }, { "epoch": 0.73, "grad_norm": 1.7400918006896973, "learning_rate": 3.5468609912486405e-06, "loss": 0.9171, "step": 12756 }, { "epoch": 0.73, "grad_norm": 1.8677492141723633, "learning_rate": 3.5454420405979583e-06, "loss": 1.0072, "step": 12757 }, { "epoch": 0.73, "grad_norm": 1.836872935295105, "learning_rate": 3.544023312676039e-06, "loss": 0.8738, "step": 12758 }, { "epoch": 0.73, "grad_norm": 1.095333218574524, "learning_rate": 3.542604807531841e-06, "loss": 0.5914, "step": 12759 }, { "epoch": 0.73, "grad_norm": 1.0798290967941284, "learning_rate": 3.541186525214316e-06, "loss": 0.505, "step": 12760 }, { "epoch": 0.73, "grad_norm": 1.845782995223999, "learning_rate": 3.5397684657723986e-06, "loss": 0.8721, "step": 12761 }, { "epoch": 0.73, "grad_norm": 1.8541338443756104, "learning_rate": 3.5383506292550296e-06, "loss": 0.9548, "step": 12762 }, { "epoch": 0.73, "grad_norm": 1.7853888273239136, "learning_rate": 3.536933015711126e-06, "loss": 0.8882, "step": 12763 }, { "epoch": 0.73, "grad_norm": 1.6120736598968506, "learning_rate": 3.535515625189614e-06, "loss": 0.8486, "step": 12764 }, { "epoch": 0.73, "grad_norm": 1.8972859382629395, "learning_rate": 3.5340984577393966e-06, "loss": 0.939, "step": 12765 }, { "epoch": 0.73, "grad_norm": 1.79887056350708, "learning_rate": 3.532681513409384e-06, "loss": 0.8357, "step": 12766 }, { "epoch": 0.73, "grad_norm": 1.855502963066101, "learning_rate": 3.531264792248462e-06, "loss": 0.8781, "step": 12767 }, { "epoch": 0.73, "grad_norm": 1.6616597175598145, "learning_rate": 3.5298482943055266e-06, "loss": 0.9153, "step": 12768 }, { "epoch": 0.73, "grad_norm": 1.823286533355713, "learning_rate": 3.5284320196294486e-06, "loss": 0.8766, "step": 12769 }, { "epoch": 0.73, "grad_norm": 1.7013206481933594, "learning_rate": 3.527015968269105e-06, "loss": 0.989, "step": 12770 }, { "epoch": 0.73, "grad_norm": 1.8120239973068237, "learning_rate": 3.5256001402733607e-06, "loss": 0.9408, "step": 12771 }, { "epoch": 0.73, "grad_norm": 1.8070712089538574, "learning_rate": 3.5241845356910688e-06, "loss": 0.8923, "step": 12772 }, { "epoch": 0.73, "grad_norm": 1.9496750831604004, "learning_rate": 3.5227691545710807e-06, "loss": 0.9271, "step": 12773 }, { "epoch": 0.73, "grad_norm": 0.9979255795478821, "learning_rate": 3.5213539969622335e-06, "loss": 0.5467, "step": 12774 }, { "epoch": 0.73, "grad_norm": 1.7153676748275757, "learning_rate": 3.5199390629133645e-06, "loss": 0.9151, "step": 12775 }, { "epoch": 0.73, "grad_norm": 1.8439545631408691, "learning_rate": 3.518524352473295e-06, "loss": 0.9153, "step": 12776 }, { "epoch": 0.73, "grad_norm": 1.5927504301071167, "learning_rate": 3.5171098656908475e-06, "loss": 0.8571, "step": 12777 }, { "epoch": 0.73, "grad_norm": 2.1567935943603516, "learning_rate": 3.515695602614826e-06, "loss": 0.8705, "step": 12778 }, { "epoch": 0.73, "grad_norm": 1.6935616731643677, "learning_rate": 3.514281563294036e-06, "loss": 0.9514, "step": 12779 }, { "epoch": 0.73, "grad_norm": 1.590765357017517, "learning_rate": 3.5128677477772733e-06, "loss": 0.8496, "step": 12780 }, { "epoch": 0.73, "grad_norm": 1.817599892616272, "learning_rate": 3.5114541561133253e-06, "loss": 0.9306, "step": 12781 }, { "epoch": 0.73, "grad_norm": 1.6007765531539917, "learning_rate": 3.510040788350967e-06, "loss": 0.9193, "step": 12782 }, { "epoch": 0.73, "grad_norm": 1.6822649240493774, "learning_rate": 3.5086276445389756e-06, "loss": 0.9429, "step": 12783 }, { "epoch": 0.73, "grad_norm": 0.9569311141967773, "learning_rate": 3.507214724726107e-06, "loss": 0.5317, "step": 12784 }, { "epoch": 0.73, "grad_norm": 1.951790690422058, "learning_rate": 3.505802028961125e-06, "loss": 1.0424, "step": 12785 }, { "epoch": 0.73, "grad_norm": 1.8282763957977295, "learning_rate": 3.504389557292771e-06, "loss": 0.8751, "step": 12786 }, { "epoch": 0.73, "grad_norm": 1.7141307592391968, "learning_rate": 3.5029773097697928e-06, "loss": 0.8671, "step": 12787 }, { "epoch": 0.73, "grad_norm": 1.8596477508544922, "learning_rate": 3.5015652864409142e-06, "loss": 0.95, "step": 12788 }, { "epoch": 0.73, "grad_norm": 1.8929619789123535, "learning_rate": 3.500153487354866e-06, "loss": 0.8451, "step": 12789 }, { "epoch": 0.73, "grad_norm": 1.6982077360153198, "learning_rate": 3.4987419125603674e-06, "loss": 0.9502, "step": 12790 }, { "epoch": 0.73, "grad_norm": 1.958797574043274, "learning_rate": 3.4973305621061214e-06, "loss": 0.8893, "step": 12791 }, { "epoch": 0.73, "grad_norm": 1.5980812311172485, "learning_rate": 3.4959194360408368e-06, "loss": 0.8945, "step": 12792 }, { "epoch": 0.73, "grad_norm": 0.9950487017631531, "learning_rate": 3.4945085344132e-06, "loss": 0.5542, "step": 12793 }, { "epoch": 0.73, "grad_norm": 1.6351646184921265, "learning_rate": 3.4930978572719054e-06, "loss": 0.925, "step": 12794 }, { "epoch": 0.73, "grad_norm": 1.7312103509902954, "learning_rate": 3.4916874046656235e-06, "loss": 0.9859, "step": 12795 }, { "epoch": 0.73, "grad_norm": 1.7557244300842285, "learning_rate": 3.490277176643033e-06, "loss": 0.8722, "step": 12796 }, { "epoch": 0.73, "grad_norm": 1.6782313585281372, "learning_rate": 3.488867173252789e-06, "loss": 0.8342, "step": 12797 }, { "epoch": 0.73, "grad_norm": 0.9762368202209473, "learning_rate": 3.487457394543554e-06, "loss": 0.514, "step": 12798 }, { "epoch": 0.73, "grad_norm": 1.7493423223495483, "learning_rate": 3.48604784056397e-06, "loss": 0.838, "step": 12799 }, { "epoch": 0.73, "grad_norm": 1.642878770828247, "learning_rate": 3.484638511362678e-06, "loss": 0.9094, "step": 12800 }, { "epoch": 0.73, "grad_norm": 1.7214213609695435, "learning_rate": 3.4832294069883143e-06, "loss": 0.9423, "step": 12801 }, { "epoch": 0.73, "grad_norm": 1.6931219100952148, "learning_rate": 3.4818205274894977e-06, "loss": 0.932, "step": 12802 }, { "epoch": 0.73, "grad_norm": 1.876623272895813, "learning_rate": 3.4804118729148494e-06, "loss": 0.9454, "step": 12803 }, { "epoch": 0.73, "grad_norm": 1.7917015552520752, "learning_rate": 3.4790034433129727e-06, "loss": 0.8338, "step": 12804 }, { "epoch": 0.73, "grad_norm": 1.8057525157928467, "learning_rate": 3.477595238732474e-06, "loss": 0.8977, "step": 12805 }, { "epoch": 0.73, "grad_norm": 1.622389554977417, "learning_rate": 3.4761872592219416e-06, "loss": 0.8829, "step": 12806 }, { "epoch": 0.73, "grad_norm": 2.0450387001037598, "learning_rate": 3.474779504829966e-06, "loss": 0.922, "step": 12807 }, { "epoch": 0.73, "grad_norm": 1.8976327180862427, "learning_rate": 3.473371975605119e-06, "loss": 0.8312, "step": 12808 }, { "epoch": 0.73, "grad_norm": 1.992012619972229, "learning_rate": 3.4719646715959777e-06, "loss": 0.9721, "step": 12809 }, { "epoch": 0.73, "grad_norm": 1.0269834995269775, "learning_rate": 3.470557592851096e-06, "loss": 0.5479, "step": 12810 }, { "epoch": 0.73, "grad_norm": 1.8272058963775635, "learning_rate": 3.469150739419036e-06, "loss": 1.0019, "step": 12811 }, { "epoch": 0.73, "grad_norm": 1.7720597982406616, "learning_rate": 3.467744111348338e-06, "loss": 0.8357, "step": 12812 }, { "epoch": 0.73, "grad_norm": 1.8477842807769775, "learning_rate": 3.466337708687544e-06, "loss": 0.929, "step": 12813 }, { "epoch": 0.73, "grad_norm": 1.7937548160552979, "learning_rate": 3.4649315314851874e-06, "loss": 0.936, "step": 12814 }, { "epoch": 0.73, "grad_norm": 1.9334877729415894, "learning_rate": 3.463525579789785e-06, "loss": 0.9758, "step": 12815 }, { "epoch": 0.74, "grad_norm": 1.843354344367981, "learning_rate": 3.462119853649859e-06, "loss": 0.9759, "step": 12816 }, { "epoch": 0.74, "grad_norm": 1.7610127925872803, "learning_rate": 3.460714353113912e-06, "loss": 0.8868, "step": 12817 }, { "epoch": 0.74, "grad_norm": 1.8768399953842163, "learning_rate": 3.459309078230448e-06, "loss": 0.9059, "step": 12818 }, { "epoch": 0.74, "grad_norm": 1.7744067907333374, "learning_rate": 3.4579040290479536e-06, "loss": 0.8708, "step": 12819 }, { "epoch": 0.74, "grad_norm": 1.6929404735565186, "learning_rate": 3.4564992056149216e-06, "loss": 0.9353, "step": 12820 }, { "epoch": 0.74, "grad_norm": 1.6844855546951294, "learning_rate": 3.4550946079798187e-06, "loss": 0.9305, "step": 12821 }, { "epoch": 0.74, "grad_norm": 1.7147749662399292, "learning_rate": 3.4536902361911218e-06, "loss": 0.9118, "step": 12822 }, { "epoch": 0.74, "grad_norm": 1.916273832321167, "learning_rate": 3.4522860902972854e-06, "loss": 0.9458, "step": 12823 }, { "epoch": 0.74, "grad_norm": 1.822318434715271, "learning_rate": 3.4508821703467653e-06, "loss": 1.0188, "step": 12824 }, { "epoch": 0.74, "grad_norm": 1.675005316734314, "learning_rate": 3.449478476388012e-06, "loss": 0.9206, "step": 12825 }, { "epoch": 0.74, "grad_norm": 1.8033742904663086, "learning_rate": 3.4480750084694537e-06, "loss": 0.9396, "step": 12826 }, { "epoch": 0.74, "grad_norm": 1.7420192956924438, "learning_rate": 3.446671766639528e-06, "loss": 0.9026, "step": 12827 }, { "epoch": 0.74, "grad_norm": 1.564064621925354, "learning_rate": 3.445268750946651e-06, "loss": 0.8778, "step": 12828 }, { "epoch": 0.74, "grad_norm": 1.7758115530014038, "learning_rate": 3.4438659614392423e-06, "loss": 0.8397, "step": 12829 }, { "epoch": 0.74, "grad_norm": 1.8669469356536865, "learning_rate": 3.442463398165703e-06, "loss": 0.8843, "step": 12830 }, { "epoch": 0.74, "grad_norm": 1.731524109840393, "learning_rate": 3.4410610611744368e-06, "loss": 0.992, "step": 12831 }, { "epoch": 0.74, "grad_norm": 1.8694032430648804, "learning_rate": 3.439658950513828e-06, "loss": 0.9731, "step": 12832 }, { "epoch": 0.74, "grad_norm": 1.9750151634216309, "learning_rate": 3.4382570662322667e-06, "loss": 0.9662, "step": 12833 }, { "epoch": 0.74, "grad_norm": 1.6615134477615356, "learning_rate": 3.4368554083781224e-06, "loss": 0.9445, "step": 12834 }, { "epoch": 0.74, "grad_norm": 1.627312183380127, "learning_rate": 3.4354539769997664e-06, "loss": 0.9308, "step": 12835 }, { "epoch": 0.74, "grad_norm": 1.6794954538345337, "learning_rate": 3.4340527721455542e-06, "loss": 0.8634, "step": 12836 }, { "epoch": 0.74, "grad_norm": 1.7179930210113525, "learning_rate": 3.432651793863838e-06, "loss": 0.9342, "step": 12837 }, { "epoch": 0.74, "grad_norm": 1.7547636032104492, "learning_rate": 3.4312510422029687e-06, "loss": 0.9639, "step": 12838 }, { "epoch": 0.74, "grad_norm": 1.7991423606872559, "learning_rate": 3.4298505172112716e-06, "loss": 0.9634, "step": 12839 }, { "epoch": 0.74, "grad_norm": 1.7628287076950073, "learning_rate": 3.428450218937085e-06, "loss": 0.9223, "step": 12840 }, { "epoch": 0.74, "grad_norm": 1.6760315895080566, "learning_rate": 3.42705014742872e-06, "loss": 0.8576, "step": 12841 }, { "epoch": 0.74, "grad_norm": 1.9345563650131226, "learning_rate": 3.425650302734498e-06, "loss": 0.9181, "step": 12842 }, { "epoch": 0.74, "grad_norm": 2.0413291454315186, "learning_rate": 3.4242506849027146e-06, "loss": 0.9273, "step": 12843 }, { "epoch": 0.74, "grad_norm": 1.7162268161773682, "learning_rate": 3.422851293981676e-06, "loss": 0.8944, "step": 12844 }, { "epoch": 0.74, "grad_norm": 1.8405197858810425, "learning_rate": 3.42145213001966e-06, "loss": 0.8331, "step": 12845 }, { "epoch": 0.74, "grad_norm": 1.737568736076355, "learning_rate": 3.4200531930649607e-06, "loss": 0.9446, "step": 12846 }, { "epoch": 0.74, "grad_norm": 1.8552114963531494, "learning_rate": 3.418654483165842e-06, "loss": 0.8681, "step": 12847 }, { "epoch": 0.74, "grad_norm": 1.609439730644226, "learning_rate": 3.417256000370577e-06, "loss": 0.866, "step": 12848 }, { "epoch": 0.74, "grad_norm": 1.6716490983963013, "learning_rate": 3.4158577447274156e-06, "loss": 0.9368, "step": 12849 }, { "epoch": 0.74, "grad_norm": 1.6060247421264648, "learning_rate": 3.4144597162846137e-06, "loss": 0.9161, "step": 12850 }, { "epoch": 0.74, "grad_norm": 1.8354800939559937, "learning_rate": 3.413061915090409e-06, "loss": 0.9907, "step": 12851 }, { "epoch": 0.74, "grad_norm": 1.675417184829712, "learning_rate": 3.4116643411930405e-06, "loss": 0.9179, "step": 12852 }, { "epoch": 0.74, "grad_norm": 1.6660715341567993, "learning_rate": 3.4102669946407284e-06, "loss": 0.8612, "step": 12853 }, { "epoch": 0.74, "grad_norm": 1.7027852535247803, "learning_rate": 3.408869875481695e-06, "loss": 0.9306, "step": 12854 }, { "epoch": 0.74, "grad_norm": 2.0397684574127197, "learning_rate": 3.407472983764153e-06, "loss": 1.0035, "step": 12855 }, { "epoch": 0.74, "grad_norm": 1.8944162130355835, "learning_rate": 3.406076319536301e-06, "loss": 1.0013, "step": 12856 }, { "epoch": 0.74, "grad_norm": 1.761699914932251, "learning_rate": 3.404679882846338e-06, "loss": 0.8935, "step": 12857 }, { "epoch": 0.74, "grad_norm": 1.4569480419158936, "learning_rate": 3.4032836737424456e-06, "loss": 0.8644, "step": 12858 }, { "epoch": 0.74, "grad_norm": 1.7800631523132324, "learning_rate": 3.4018876922728105e-06, "loss": 0.8816, "step": 12859 }, { "epoch": 0.74, "grad_norm": 1.7472339868545532, "learning_rate": 3.400491938485596e-06, "loss": 0.9345, "step": 12860 }, { "epoch": 0.74, "grad_norm": 1.7179112434387207, "learning_rate": 3.399096412428974e-06, "loss": 0.9177, "step": 12861 }, { "epoch": 0.74, "grad_norm": 1.0143840312957764, "learning_rate": 3.3977011141510917e-06, "loss": 0.5033, "step": 12862 }, { "epoch": 0.74, "grad_norm": 1.7133642435073853, "learning_rate": 3.396306043700105e-06, "loss": 0.9369, "step": 12863 }, { "epoch": 0.74, "grad_norm": 1.7558554410934448, "learning_rate": 3.394911201124147e-06, "loss": 0.9346, "step": 12864 }, { "epoch": 0.74, "grad_norm": 1.7255535125732422, "learning_rate": 3.393516586471356e-06, "loss": 0.8891, "step": 12865 }, { "epoch": 0.74, "grad_norm": 1.8447771072387695, "learning_rate": 3.392122199789849e-06, "loss": 0.9164, "step": 12866 }, { "epoch": 0.74, "grad_norm": 1.7963228225708008, "learning_rate": 3.3907280411277478e-06, "loss": 0.8766, "step": 12867 }, { "epoch": 0.74, "grad_norm": 1.7364304065704346, "learning_rate": 3.3893341105331612e-06, "loss": 0.8031, "step": 12868 }, { "epoch": 0.74, "grad_norm": 1.608424425125122, "learning_rate": 3.3879404080541866e-06, "loss": 0.8474, "step": 12869 }, { "epoch": 0.74, "grad_norm": 1.7260738611221313, "learning_rate": 3.386546933738921e-06, "loss": 0.8671, "step": 12870 }, { "epoch": 0.74, "grad_norm": 1.8440991640090942, "learning_rate": 3.385153687635444e-06, "loss": 1.015, "step": 12871 }, { "epoch": 0.74, "grad_norm": 1.9038454294204712, "learning_rate": 3.383760669791838e-06, "loss": 0.9003, "step": 12872 }, { "epoch": 0.74, "grad_norm": 1.6984950304031372, "learning_rate": 3.3823678802561677e-06, "loss": 0.9703, "step": 12873 }, { "epoch": 0.74, "grad_norm": 1.7837727069854736, "learning_rate": 3.3809753190764983e-06, "loss": 0.9246, "step": 12874 }, { "epoch": 0.74, "grad_norm": 1.101508378982544, "learning_rate": 3.3795829863008777e-06, "loss": 0.5568, "step": 12875 }, { "epoch": 0.74, "grad_norm": 1.751475214958191, "learning_rate": 3.378190881977359e-06, "loss": 0.8941, "step": 12876 }, { "epoch": 0.74, "grad_norm": 1.8393185138702393, "learning_rate": 3.376799006153971e-06, "loss": 0.979, "step": 12877 }, { "epoch": 0.74, "grad_norm": 1.878891110420227, "learning_rate": 3.3754073588787494e-06, "loss": 0.9126, "step": 12878 }, { "epoch": 0.74, "grad_norm": 1.8797088861465454, "learning_rate": 3.3740159401997173e-06, "loss": 0.8809, "step": 12879 }, { "epoch": 0.74, "grad_norm": 1.0130059719085693, "learning_rate": 3.3726247501648846e-06, "loss": 0.5417, "step": 12880 }, { "epoch": 0.74, "grad_norm": 1.6608471870422363, "learning_rate": 3.37123378882226e-06, "loss": 0.8555, "step": 12881 }, { "epoch": 0.74, "grad_norm": 1.7912064790725708, "learning_rate": 3.369843056219839e-06, "loss": 0.9811, "step": 12882 }, { "epoch": 0.74, "grad_norm": 1.8159829378128052, "learning_rate": 3.3684525524056156e-06, "loss": 0.828, "step": 12883 }, { "epoch": 0.74, "grad_norm": 1.7412052154541016, "learning_rate": 3.3670622774275676e-06, "loss": 0.8353, "step": 12884 }, { "epoch": 0.74, "grad_norm": 1.7044848203659058, "learning_rate": 3.3656722313336755e-06, "loss": 0.806, "step": 12885 }, { "epoch": 0.74, "grad_norm": 1.548651933670044, "learning_rate": 3.3642824141718986e-06, "loss": 0.9308, "step": 12886 }, { "epoch": 0.74, "grad_norm": 1.6368460655212402, "learning_rate": 3.362892825990203e-06, "loss": 1.0051, "step": 12887 }, { "epoch": 0.74, "grad_norm": 1.8173948526382446, "learning_rate": 3.361503466836532e-06, "loss": 0.9137, "step": 12888 }, { "epoch": 0.74, "grad_norm": 1.1178308725357056, "learning_rate": 3.3601143367588362e-06, "loss": 0.5839, "step": 12889 }, { "epoch": 0.74, "grad_norm": 1.6334292888641357, "learning_rate": 3.358725435805045e-06, "loss": 0.9938, "step": 12890 }, { "epoch": 0.74, "grad_norm": 1.720916509628296, "learning_rate": 3.3573367640230846e-06, "loss": 0.9691, "step": 12891 }, { "epoch": 0.74, "grad_norm": 1.7262259721755981, "learning_rate": 3.3559483214608822e-06, "loss": 0.9261, "step": 12892 }, { "epoch": 0.74, "grad_norm": 1.726747989654541, "learning_rate": 3.3545601081663405e-06, "loss": 0.9819, "step": 12893 }, { "epoch": 0.74, "grad_norm": 1.6461094617843628, "learning_rate": 3.3531721241873684e-06, "loss": 0.9791, "step": 12894 }, { "epoch": 0.74, "grad_norm": 1.6887973546981812, "learning_rate": 3.3517843695718567e-06, "loss": 0.8968, "step": 12895 }, { "epoch": 0.74, "grad_norm": 1.913801670074463, "learning_rate": 3.350396844367698e-06, "loss": 0.8532, "step": 12896 }, { "epoch": 0.74, "grad_norm": 1.8553576469421387, "learning_rate": 3.349009548622767e-06, "loss": 0.9709, "step": 12897 }, { "epoch": 0.74, "grad_norm": 1.8029811382293701, "learning_rate": 3.34762248238494e-06, "loss": 1.0315, "step": 12898 }, { "epoch": 0.74, "grad_norm": 1.7179360389709473, "learning_rate": 3.3462356457020762e-06, "loss": 0.8922, "step": 12899 }, { "epoch": 0.74, "grad_norm": 1.7548192739486694, "learning_rate": 3.3448490386220355e-06, "loss": 0.8201, "step": 12900 }, { "epoch": 0.74, "grad_norm": 1.611159086227417, "learning_rate": 3.3434626611926625e-06, "loss": 0.9366, "step": 12901 }, { "epoch": 0.74, "grad_norm": 2.0283126831054688, "learning_rate": 3.3420765134618006e-06, "loss": 1.0067, "step": 12902 }, { "epoch": 0.74, "grad_norm": 1.7182762622833252, "learning_rate": 3.340690595477277e-06, "loss": 0.9121, "step": 12903 }, { "epoch": 0.74, "grad_norm": 1.576112985610962, "learning_rate": 3.3393049072869198e-06, "loss": 0.8513, "step": 12904 }, { "epoch": 0.74, "grad_norm": 1.735619068145752, "learning_rate": 3.337919448938547e-06, "loss": 0.8703, "step": 12905 }, { "epoch": 0.74, "grad_norm": 1.8262648582458496, "learning_rate": 3.3365342204799613e-06, "loss": 0.9832, "step": 12906 }, { "epoch": 0.74, "grad_norm": 1.909550666809082, "learning_rate": 3.33514922195897e-06, "loss": 0.9173, "step": 12907 }, { "epoch": 0.74, "grad_norm": 1.7568025588989258, "learning_rate": 3.333764453423357e-06, "loss": 0.9044, "step": 12908 }, { "epoch": 0.74, "grad_norm": 1.8140579462051392, "learning_rate": 3.332379914920915e-06, "loss": 0.907, "step": 12909 }, { "epoch": 0.74, "grad_norm": 1.7118470668792725, "learning_rate": 3.330995606499413e-06, "loss": 0.8968, "step": 12910 }, { "epoch": 0.74, "grad_norm": 1.729712963104248, "learning_rate": 3.3296115282066245e-06, "loss": 0.9335, "step": 12911 }, { "epoch": 0.74, "grad_norm": 1.75454580783844, "learning_rate": 3.328227680090309e-06, "loss": 0.8658, "step": 12912 }, { "epoch": 0.74, "grad_norm": 2.0110607147216797, "learning_rate": 3.3268440621982222e-06, "loss": 0.9002, "step": 12913 }, { "epoch": 0.74, "grad_norm": 1.8920130729675293, "learning_rate": 3.3254606745781026e-06, "loss": 0.8568, "step": 12914 }, { "epoch": 0.74, "grad_norm": 1.8047552108764648, "learning_rate": 3.3240775172776952e-06, "loss": 0.9476, "step": 12915 }, { "epoch": 0.74, "grad_norm": 1.833772897720337, "learning_rate": 3.3226945903447196e-06, "loss": 0.916, "step": 12916 }, { "epoch": 0.74, "grad_norm": 1.830235481262207, "learning_rate": 3.321311893826905e-06, "loss": 0.8187, "step": 12917 }, { "epoch": 0.74, "grad_norm": 2.154555082321167, "learning_rate": 3.3199294277719573e-06, "loss": 0.5193, "step": 12918 }, { "epoch": 0.74, "grad_norm": 1.790439248085022, "learning_rate": 3.318547192227589e-06, "loss": 0.8954, "step": 12919 }, { "epoch": 0.74, "grad_norm": 1.786900281906128, "learning_rate": 3.31716518724149e-06, "loss": 0.9008, "step": 12920 }, { "epoch": 0.74, "grad_norm": 1.0621657371520996, "learning_rate": 3.315783412861352e-06, "loss": 0.5529, "step": 12921 }, { "epoch": 0.74, "grad_norm": 1.0171537399291992, "learning_rate": 3.3144018691348602e-06, "loss": 0.5673, "step": 12922 }, { "epoch": 0.74, "grad_norm": 1.809259295463562, "learning_rate": 3.3130205561096818e-06, "loss": 0.948, "step": 12923 }, { "epoch": 0.74, "grad_norm": 1.755825161933899, "learning_rate": 3.311639473833487e-06, "loss": 0.9291, "step": 12924 }, { "epoch": 0.74, "grad_norm": 1.730514645576477, "learning_rate": 3.310258622353928e-06, "loss": 0.9534, "step": 12925 }, { "epoch": 0.74, "grad_norm": 1.7440922260284424, "learning_rate": 3.3088780017186608e-06, "loss": 0.8313, "step": 12926 }, { "epoch": 0.74, "grad_norm": 1.7911334037780762, "learning_rate": 3.3074976119753178e-06, "loss": 0.8897, "step": 12927 }, { "epoch": 0.74, "grad_norm": 1.6797412633895874, "learning_rate": 3.3061174531715425e-06, "loss": 0.8669, "step": 12928 }, { "epoch": 0.74, "grad_norm": 1.809531569480896, "learning_rate": 3.304737525354951e-06, "loss": 0.9346, "step": 12929 }, { "epoch": 0.74, "grad_norm": 1.5755363702774048, "learning_rate": 3.3033578285731693e-06, "loss": 0.8273, "step": 12930 }, { "epoch": 0.74, "grad_norm": 1.689409613609314, "learning_rate": 3.301978362873798e-06, "loss": 0.8505, "step": 12931 }, { "epoch": 0.74, "grad_norm": 1.9128230810165405, "learning_rate": 3.3005991283044436e-06, "loss": 0.9701, "step": 12932 }, { "epoch": 0.74, "grad_norm": 1.6728066205978394, "learning_rate": 3.2992201249127033e-06, "loss": 0.9033, "step": 12933 }, { "epoch": 0.74, "grad_norm": 1.9323869943618774, "learning_rate": 3.2978413527461552e-06, "loss": 0.8976, "step": 12934 }, { "epoch": 0.74, "grad_norm": 1.7050065994262695, "learning_rate": 3.2964628118523832e-06, "loss": 0.8384, "step": 12935 }, { "epoch": 0.74, "grad_norm": 1.7353442907333374, "learning_rate": 3.295084502278951e-06, "loss": 0.9225, "step": 12936 }, { "epoch": 0.74, "grad_norm": 1.8594201803207397, "learning_rate": 3.2937064240734262e-06, "loss": 0.924, "step": 12937 }, { "epoch": 0.74, "grad_norm": 1.7685085535049438, "learning_rate": 3.292328577283356e-06, "loss": 0.8707, "step": 12938 }, { "epoch": 0.74, "grad_norm": 1.7772799730300903, "learning_rate": 3.290950961956293e-06, "loss": 0.8769, "step": 12939 }, { "epoch": 0.74, "grad_norm": 1.679996132850647, "learning_rate": 3.289573578139769e-06, "loss": 0.9618, "step": 12940 }, { "epoch": 0.74, "grad_norm": 1.7996190786361694, "learning_rate": 3.2881964258813172e-06, "loss": 0.9067, "step": 12941 }, { "epoch": 0.74, "grad_norm": 1.6842286586761475, "learning_rate": 3.2868195052284557e-06, "loss": 0.8927, "step": 12942 }, { "epoch": 0.74, "grad_norm": 1.8408339023590088, "learning_rate": 3.2854428162287046e-06, "loss": 0.9548, "step": 12943 }, { "epoch": 0.74, "grad_norm": 1.7755848169326782, "learning_rate": 3.2840663589295617e-06, "loss": 0.9062, "step": 12944 }, { "epoch": 0.74, "grad_norm": 1.5631972551345825, "learning_rate": 3.282690133378529e-06, "loss": 0.7774, "step": 12945 }, { "epoch": 0.74, "grad_norm": 1.7434961795806885, "learning_rate": 3.2813141396230986e-06, "loss": 0.9717, "step": 12946 }, { "epoch": 0.74, "grad_norm": 1.7239450216293335, "learning_rate": 3.2799383777107453e-06, "loss": 0.9651, "step": 12947 }, { "epoch": 0.74, "grad_norm": 1.7930244207382202, "learning_rate": 3.278562847688951e-06, "loss": 0.9002, "step": 12948 }, { "epoch": 0.74, "grad_norm": 1.7142078876495361, "learning_rate": 3.2771875496051743e-06, "loss": 0.9856, "step": 12949 }, { "epoch": 0.74, "grad_norm": 1.6232517957687378, "learning_rate": 3.275812483506878e-06, "loss": 0.8529, "step": 12950 }, { "epoch": 0.74, "grad_norm": 1.8481017351150513, "learning_rate": 3.2744376494415075e-06, "loss": 0.9415, "step": 12951 }, { "epoch": 0.74, "grad_norm": 1.7313182353973389, "learning_rate": 3.2730630474565096e-06, "loss": 0.9001, "step": 12952 }, { "epoch": 0.74, "grad_norm": 1.7166863679885864, "learning_rate": 3.2716886775993117e-06, "loss": 0.9696, "step": 12953 }, { "epoch": 0.74, "grad_norm": 1.6979272365570068, "learning_rate": 3.2703145399173453e-06, "loss": 0.9094, "step": 12954 }, { "epoch": 0.74, "grad_norm": 1.65584135055542, "learning_rate": 3.2689406344580233e-06, "loss": 0.8902, "step": 12955 }, { "epoch": 0.74, "grad_norm": 1.7679846286773682, "learning_rate": 3.2675669612687565e-06, "loss": 0.9014, "step": 12956 }, { "epoch": 0.74, "grad_norm": 1.5945175886154175, "learning_rate": 3.2661935203969518e-06, "loss": 0.8953, "step": 12957 }, { "epoch": 0.74, "grad_norm": 1.7289154529571533, "learning_rate": 3.264820311889996e-06, "loss": 0.8785, "step": 12958 }, { "epoch": 0.74, "grad_norm": 2.2390129566192627, "learning_rate": 3.263447335795279e-06, "loss": 0.8568, "step": 12959 }, { "epoch": 0.74, "grad_norm": 1.6853278875350952, "learning_rate": 3.2620745921601737e-06, "loss": 0.9589, "step": 12960 }, { "epoch": 0.74, "grad_norm": 1.7010923624038696, "learning_rate": 3.2607020810320558e-06, "loss": 0.9058, "step": 12961 }, { "epoch": 0.74, "grad_norm": 1.695469617843628, "learning_rate": 3.259329802458281e-06, "loss": 0.917, "step": 12962 }, { "epoch": 0.74, "grad_norm": 1.7931337356567383, "learning_rate": 3.2579577564862076e-06, "loss": 0.8935, "step": 12963 }, { "epoch": 0.74, "grad_norm": 1.7772189378738403, "learning_rate": 3.2565859431631765e-06, "loss": 0.8834, "step": 12964 }, { "epoch": 0.74, "grad_norm": 1.9803166389465332, "learning_rate": 3.2552143625365306e-06, "loss": 0.9446, "step": 12965 }, { "epoch": 0.74, "grad_norm": 1.8056056499481201, "learning_rate": 3.2538430146535927e-06, "loss": 0.8732, "step": 12966 }, { "epoch": 0.74, "grad_norm": 1.7885921001434326, "learning_rate": 3.2524718995616913e-06, "loss": 0.8989, "step": 12967 }, { "epoch": 0.74, "grad_norm": 1.7689409255981445, "learning_rate": 3.2511010173081327e-06, "loss": 0.9502, "step": 12968 }, { "epoch": 0.74, "grad_norm": 1.860883355140686, "learning_rate": 3.2497303679402258e-06, "loss": 0.93, "step": 12969 }, { "epoch": 0.74, "grad_norm": 1.7048285007476807, "learning_rate": 3.2483599515052723e-06, "loss": 0.8876, "step": 12970 }, { "epoch": 0.74, "grad_norm": 1.713229775428772, "learning_rate": 3.2469897680505515e-06, "loss": 0.8596, "step": 12971 }, { "epoch": 0.74, "grad_norm": 1.6767103672027588, "learning_rate": 3.2456198176233545e-06, "loss": 0.9543, "step": 12972 }, { "epoch": 0.74, "grad_norm": 1.7357815504074097, "learning_rate": 3.244250100270947e-06, "loss": 0.8644, "step": 12973 }, { "epoch": 0.74, "grad_norm": 1.882466435432434, "learning_rate": 3.242880616040599e-06, "loss": 0.8935, "step": 12974 }, { "epoch": 0.74, "grad_norm": 1.663617730140686, "learning_rate": 3.241511364979564e-06, "loss": 0.8514, "step": 12975 }, { "epoch": 0.74, "grad_norm": 1.702240228652954, "learning_rate": 3.2401423471350955e-06, "loss": 0.9161, "step": 12976 }, { "epoch": 0.74, "grad_norm": 1.813599705696106, "learning_rate": 3.238773562554425e-06, "loss": 0.8906, "step": 12977 }, { "epoch": 0.74, "grad_norm": 1.5721182823181152, "learning_rate": 3.237405011284799e-06, "loss": 0.9431, "step": 12978 }, { "epoch": 0.74, "grad_norm": 1.8185781240463257, "learning_rate": 3.236036693373431e-06, "loss": 0.9359, "step": 12979 }, { "epoch": 0.74, "grad_norm": 1.7295923233032227, "learning_rate": 3.234668608867547e-06, "loss": 0.853, "step": 12980 }, { "epoch": 0.74, "grad_norm": 1.7073886394500732, "learning_rate": 3.2333007578143473e-06, "loss": 0.8379, "step": 12981 }, { "epoch": 0.74, "grad_norm": 1.779390573501587, "learning_rate": 3.2319331402610397e-06, "loss": 0.889, "step": 12982 }, { "epoch": 0.74, "grad_norm": 1.8085041046142578, "learning_rate": 3.23056575625481e-06, "loss": 0.8762, "step": 12983 }, { "epoch": 0.74, "grad_norm": 1.6807066202163696, "learning_rate": 3.2291986058428506e-06, "loss": 0.9476, "step": 12984 }, { "epoch": 0.74, "grad_norm": 1.6979188919067383, "learning_rate": 3.2278316890723293e-06, "loss": 0.8902, "step": 12985 }, { "epoch": 0.74, "grad_norm": 1.868937611579895, "learning_rate": 3.2264650059904203e-06, "loss": 0.9113, "step": 12986 }, { "epoch": 0.74, "grad_norm": 1.90029776096344, "learning_rate": 3.225098556644286e-06, "loss": 0.8997, "step": 12987 }, { "epoch": 0.74, "grad_norm": 1.7030693292617798, "learning_rate": 3.2237323410810717e-06, "loss": 0.921, "step": 12988 }, { "epoch": 0.74, "grad_norm": 2.0223934650421143, "learning_rate": 3.2223663593479293e-06, "loss": 0.8821, "step": 12989 }, { "epoch": 0.74, "grad_norm": 1.7602988481521606, "learning_rate": 3.221000611491988e-06, "loss": 0.8874, "step": 12990 }, { "epoch": 0.75, "grad_norm": 1.736777424812317, "learning_rate": 3.219635097560382e-06, "loss": 0.9198, "step": 12991 }, { "epoch": 0.75, "grad_norm": 1.8135632276535034, "learning_rate": 3.218269817600226e-06, "loss": 1.0182, "step": 12992 }, { "epoch": 0.75, "grad_norm": 1.9046096801757812, "learning_rate": 3.2169047716586364e-06, "loss": 0.9709, "step": 12993 }, { "epoch": 0.75, "grad_norm": 1.081299901008606, "learning_rate": 3.215539959782714e-06, "loss": 0.5018, "step": 12994 }, { "epoch": 0.75, "grad_norm": 1.8905421495437622, "learning_rate": 3.2141753820195588e-06, "loss": 0.8946, "step": 12995 }, { "epoch": 0.75, "grad_norm": 2.0486741065979004, "learning_rate": 3.2128110384162515e-06, "loss": 0.8801, "step": 12996 }, { "epoch": 0.75, "grad_norm": 1.7285798788070679, "learning_rate": 3.21144692901988e-06, "loss": 0.9606, "step": 12997 }, { "epoch": 0.75, "grad_norm": 1.8632614612579346, "learning_rate": 3.2100830538775086e-06, "loss": 0.8923, "step": 12998 }, { "epoch": 0.75, "grad_norm": 1.7425857782363892, "learning_rate": 3.2087194130362033e-06, "loss": 0.8199, "step": 12999 }, { "epoch": 0.75, "grad_norm": 1.4580556154251099, "learning_rate": 3.207356006543024e-06, "loss": 0.8738, "step": 13000 }, { "epoch": 0.75, "grad_norm": 1.835984706878662, "learning_rate": 3.205992834445012e-06, "loss": 0.896, "step": 13001 }, { "epoch": 0.75, "grad_norm": 1.8194921016693115, "learning_rate": 3.204629896789212e-06, "loss": 0.8516, "step": 13002 }, { "epoch": 0.75, "grad_norm": 0.9852017164230347, "learning_rate": 3.203267193622649e-06, "loss": 0.5772, "step": 13003 }, { "epoch": 0.75, "grad_norm": 1.8939801454544067, "learning_rate": 3.201904724992352e-06, "loss": 0.9859, "step": 13004 }, { "epoch": 0.75, "grad_norm": 1.9345558881759644, "learning_rate": 3.2005424909453297e-06, "loss": 0.9309, "step": 13005 }, { "epoch": 0.75, "grad_norm": 1.8502147197723389, "learning_rate": 3.199180491528597e-06, "loss": 0.868, "step": 13006 }, { "epoch": 0.75, "grad_norm": 1.725514531135559, "learning_rate": 3.197818726789144e-06, "loss": 0.9148, "step": 13007 }, { "epoch": 0.75, "grad_norm": 1.6633045673370361, "learning_rate": 3.1964571967739687e-06, "loss": 0.8883, "step": 13008 }, { "epoch": 0.75, "grad_norm": 1.844893217086792, "learning_rate": 3.1950959015300486e-06, "loss": 0.8763, "step": 13009 }, { "epoch": 0.75, "grad_norm": 1.9353128671646118, "learning_rate": 3.1937348411043588e-06, "loss": 0.935, "step": 13010 }, { "epoch": 0.75, "grad_norm": 1.7447443008422852, "learning_rate": 3.192374015543871e-06, "loss": 0.8482, "step": 13011 }, { "epoch": 0.75, "grad_norm": 1.7648658752441406, "learning_rate": 3.191013424895536e-06, "loss": 0.9077, "step": 13012 }, { "epoch": 0.75, "grad_norm": 1.7657307386398315, "learning_rate": 3.189653069206311e-06, "loss": 0.9166, "step": 13013 }, { "epoch": 0.75, "grad_norm": 1.6768075227737427, "learning_rate": 3.1882929485231316e-06, "loss": 0.8811, "step": 13014 }, { "epoch": 0.75, "grad_norm": 1.8069775104522705, "learning_rate": 3.1869330628929385e-06, "loss": 0.9, "step": 13015 }, { "epoch": 0.75, "grad_norm": 1.7847970724105835, "learning_rate": 3.1855734123626493e-06, "loss": 0.9421, "step": 13016 }, { "epoch": 0.75, "grad_norm": 1.7322032451629639, "learning_rate": 3.1842139969791907e-06, "loss": 0.9391, "step": 13017 }, { "epoch": 0.75, "grad_norm": 1.6978659629821777, "learning_rate": 3.182854816789465e-06, "loss": 0.9857, "step": 13018 }, { "epoch": 0.75, "grad_norm": 1.6952332258224487, "learning_rate": 3.181495871840379e-06, "loss": 0.8781, "step": 13019 }, { "epoch": 0.75, "grad_norm": 1.6562113761901855, "learning_rate": 3.1801371621788203e-06, "loss": 0.9255, "step": 13020 }, { "epoch": 0.75, "grad_norm": 1.7698861360549927, "learning_rate": 3.1787786878516813e-06, "loss": 1.0115, "step": 13021 }, { "epoch": 0.75, "grad_norm": 1.94884192943573, "learning_rate": 3.1774204489058313e-06, "loss": 0.9073, "step": 13022 }, { "epoch": 0.75, "grad_norm": 1.6540274620056152, "learning_rate": 3.176062445388145e-06, "loss": 0.8815, "step": 13023 }, { "epoch": 0.75, "grad_norm": 1.8833669424057007, "learning_rate": 3.1747046773454838e-06, "loss": 0.9069, "step": 13024 }, { "epoch": 0.75, "grad_norm": 0.9806326627731323, "learning_rate": 3.1733471448246968e-06, "loss": 0.5096, "step": 13025 }, { "epoch": 0.75, "grad_norm": 1.677437424659729, "learning_rate": 3.171989847872632e-06, "loss": 0.9004, "step": 13026 }, { "epoch": 0.75, "grad_norm": 1.7745624780654907, "learning_rate": 3.1706327865361218e-06, "loss": 0.9371, "step": 13027 }, { "epoch": 0.75, "grad_norm": 1.6608716249465942, "learning_rate": 3.1692759608620004e-06, "loss": 0.962, "step": 13028 }, { "epoch": 0.75, "grad_norm": 1.6462064981460571, "learning_rate": 3.167919370897081e-06, "loss": 0.8764, "step": 13029 }, { "epoch": 0.75, "grad_norm": 1.8811894655227661, "learning_rate": 3.1665630166881833e-06, "loss": 0.9224, "step": 13030 }, { "epoch": 0.75, "grad_norm": 1.739756464958191, "learning_rate": 3.165206898282104e-06, "loss": 0.8838, "step": 13031 }, { "epoch": 0.75, "grad_norm": 1.033450961112976, "learning_rate": 3.1638510157256453e-06, "loss": 0.5078, "step": 13032 }, { "epoch": 0.75, "grad_norm": 1.6108418703079224, "learning_rate": 3.162495369065589e-06, "loss": 0.8754, "step": 13033 }, { "epoch": 0.75, "grad_norm": 1.6332170963287354, "learning_rate": 3.1611399583487213e-06, "loss": 0.8445, "step": 13034 }, { "epoch": 0.75, "grad_norm": 1.6060373783111572, "learning_rate": 3.1597847836218054e-06, "loss": 0.8578, "step": 13035 }, { "epoch": 0.75, "grad_norm": 1.8608494997024536, "learning_rate": 3.158429844931611e-06, "loss": 0.9151, "step": 13036 }, { "epoch": 0.75, "grad_norm": 1.9385156631469727, "learning_rate": 3.1570751423248935e-06, "loss": 0.9643, "step": 13037 }, { "epoch": 0.75, "grad_norm": 1.728148102760315, "learning_rate": 3.155720675848396e-06, "loss": 0.9174, "step": 13038 }, { "epoch": 0.75, "grad_norm": 1.7340481281280518, "learning_rate": 3.154366445548861e-06, "loss": 0.9254, "step": 13039 }, { "epoch": 0.75, "grad_norm": 1.9110102653503418, "learning_rate": 3.1530124514730155e-06, "loss": 0.9293, "step": 13040 }, { "epoch": 0.75, "grad_norm": 1.9378286600112915, "learning_rate": 3.1516586936675863e-06, "loss": 0.9301, "step": 13041 }, { "epoch": 0.75, "grad_norm": 1.9784626960754395, "learning_rate": 3.1503051721792833e-06, "loss": 0.9802, "step": 13042 }, { "epoch": 0.75, "grad_norm": 1.7403974533081055, "learning_rate": 3.148951887054814e-06, "loss": 0.8985, "step": 13043 }, { "epoch": 0.75, "grad_norm": 2.2266108989715576, "learning_rate": 3.1475988383408774e-06, "loss": 0.8071, "step": 13044 }, { "epoch": 0.75, "grad_norm": 1.7259621620178223, "learning_rate": 3.1462460260841675e-06, "loss": 0.9289, "step": 13045 }, { "epoch": 0.75, "grad_norm": 1.8038628101348877, "learning_rate": 3.1448934503313588e-06, "loss": 0.864, "step": 13046 }, { "epoch": 0.75, "grad_norm": 1.7620233297348022, "learning_rate": 3.1435411111291304e-06, "loss": 0.952, "step": 13047 }, { "epoch": 0.75, "grad_norm": 1.9062740802764893, "learning_rate": 3.1421890085241437e-06, "loss": 0.9023, "step": 13048 }, { "epoch": 0.75, "grad_norm": 1.7724922895431519, "learning_rate": 3.14083714256306e-06, "loss": 0.8166, "step": 13049 }, { "epoch": 0.75, "grad_norm": 1.7863596677780151, "learning_rate": 3.139485513292523e-06, "loss": 0.9269, "step": 13050 }, { "epoch": 0.75, "grad_norm": 1.7511223554611206, "learning_rate": 3.1381341207591797e-06, "loss": 0.8416, "step": 13051 }, { "epoch": 0.75, "grad_norm": 1.6451618671417236, "learning_rate": 3.136782965009658e-06, "loss": 0.8982, "step": 13052 }, { "epoch": 0.75, "grad_norm": 1.807515263557434, "learning_rate": 3.135432046090584e-06, "loss": 0.8506, "step": 13053 }, { "epoch": 0.75, "grad_norm": 1.9561550617218018, "learning_rate": 3.1340813640485777e-06, "loss": 0.9234, "step": 13054 }, { "epoch": 0.75, "grad_norm": 1.7279635667800903, "learning_rate": 3.1327309189302415e-06, "loss": 0.9816, "step": 13055 }, { "epoch": 0.75, "grad_norm": 1.6455328464508057, "learning_rate": 3.1313807107821815e-06, "loss": 0.8733, "step": 13056 }, { "epoch": 0.75, "grad_norm": 1.6129493713378906, "learning_rate": 3.1300307396509833e-06, "loss": 0.7935, "step": 13057 }, { "epoch": 0.75, "grad_norm": 1.6675885915756226, "learning_rate": 3.128681005583236e-06, "loss": 0.9505, "step": 13058 }, { "epoch": 0.75, "grad_norm": 1.9253281354904175, "learning_rate": 3.1273315086255106e-06, "loss": 0.9616, "step": 13059 }, { "epoch": 0.75, "grad_norm": 1.643847942352295, "learning_rate": 3.1259822488243805e-06, "loss": 0.8076, "step": 13060 }, { "epoch": 0.75, "grad_norm": 1.8107280731201172, "learning_rate": 3.1246332262263977e-06, "loss": 0.9682, "step": 13061 }, { "epoch": 0.75, "grad_norm": 1.8633383512496948, "learning_rate": 3.123284440878119e-06, "loss": 0.8906, "step": 13062 }, { "epoch": 0.75, "grad_norm": 1.893514633178711, "learning_rate": 3.1219358928260823e-06, "loss": 0.9793, "step": 13063 }, { "epoch": 0.75, "grad_norm": 1.8098845481872559, "learning_rate": 3.120587582116825e-06, "loss": 0.8851, "step": 13064 }, { "epoch": 0.75, "grad_norm": 1.9624648094177246, "learning_rate": 3.1192395087968775e-06, "loss": 0.9503, "step": 13065 }, { "epoch": 0.75, "grad_norm": 1.692080020904541, "learning_rate": 3.1178916729127497e-06, "loss": 0.8306, "step": 13066 }, { "epoch": 0.75, "grad_norm": 1.879489541053772, "learning_rate": 3.116544074510959e-06, "loss": 0.8684, "step": 13067 }, { "epoch": 0.75, "grad_norm": 1.7086669206619263, "learning_rate": 3.115196713638e-06, "loss": 0.9053, "step": 13068 }, { "epoch": 0.75, "grad_norm": 1.8532218933105469, "learning_rate": 3.1138495903403754e-06, "loss": 0.8797, "step": 13069 }, { "epoch": 0.75, "grad_norm": 1.6436641216278076, "learning_rate": 3.1125027046645616e-06, "loss": 0.9979, "step": 13070 }, { "epoch": 0.75, "grad_norm": 1.8048409223556519, "learning_rate": 3.111156056657044e-06, "loss": 0.8947, "step": 13071 }, { "epoch": 0.75, "grad_norm": 1.8001627922058105, "learning_rate": 3.1098096463642834e-06, "loss": 0.8906, "step": 13072 }, { "epoch": 0.75, "grad_norm": 1.830783724784851, "learning_rate": 3.108463473832749e-06, "loss": 0.8569, "step": 13073 }, { "epoch": 0.75, "grad_norm": 1.7894561290740967, "learning_rate": 3.1071175391088857e-06, "loss": 0.8195, "step": 13074 }, { "epoch": 0.75, "grad_norm": 1.7809464931488037, "learning_rate": 3.105771842239146e-06, "loss": 0.8661, "step": 13075 }, { "epoch": 0.75, "grad_norm": 1.8369849920272827, "learning_rate": 3.1044263832699574e-06, "loss": 0.9988, "step": 13076 }, { "epoch": 0.75, "grad_norm": 0.9826326370239258, "learning_rate": 3.103081162247752e-06, "loss": 0.5467, "step": 13077 }, { "epoch": 0.75, "grad_norm": 1.825492024421692, "learning_rate": 3.1017361792189537e-06, "loss": 0.9279, "step": 13078 }, { "epoch": 0.75, "grad_norm": 1.6958202123641968, "learning_rate": 3.100391434229967e-06, "loss": 0.8574, "step": 13079 }, { "epoch": 0.75, "grad_norm": 1.9676889181137085, "learning_rate": 3.0990469273272016e-06, "loss": 0.8827, "step": 13080 }, { "epoch": 0.75, "grad_norm": 1.9030475616455078, "learning_rate": 3.0977026585570467e-06, "loss": 0.9926, "step": 13081 }, { "epoch": 0.75, "grad_norm": 1.893059492111206, "learning_rate": 3.0963586279658963e-06, "loss": 0.9696, "step": 13082 }, { "epoch": 0.75, "grad_norm": 1.7339398860931396, "learning_rate": 3.095014835600121e-06, "loss": 0.9039, "step": 13083 }, { "epoch": 0.75, "grad_norm": 1.6585297584533691, "learning_rate": 3.093671281506099e-06, "loss": 0.9193, "step": 13084 }, { "epoch": 0.75, "grad_norm": 1.7605347633361816, "learning_rate": 3.0923279657301853e-06, "loss": 0.868, "step": 13085 }, { "epoch": 0.75, "grad_norm": 1.6108390092849731, "learning_rate": 3.090984888318741e-06, "loss": 0.8874, "step": 13086 }, { "epoch": 0.75, "grad_norm": 1.7320451736450195, "learning_rate": 3.0896420493181058e-06, "loss": 0.9975, "step": 13087 }, { "epoch": 0.75, "grad_norm": 1.9116482734680176, "learning_rate": 3.0882994487746233e-06, "loss": 0.9738, "step": 13088 }, { "epoch": 0.75, "grad_norm": 1.7406176328659058, "learning_rate": 3.0869570867346167e-06, "loss": 0.9516, "step": 13089 }, { "epoch": 0.75, "grad_norm": 1.8883994817733765, "learning_rate": 3.08561496324441e-06, "loss": 0.9032, "step": 13090 }, { "epoch": 0.75, "grad_norm": 1.8087818622589111, "learning_rate": 3.0842730783503195e-06, "loss": 0.8918, "step": 13091 }, { "epoch": 0.75, "grad_norm": 1.7573754787445068, "learning_rate": 3.0829314320986436e-06, "loss": 0.8815, "step": 13092 }, { "epoch": 0.75, "grad_norm": 1.8649489879608154, "learning_rate": 3.0815900245356857e-06, "loss": 0.9297, "step": 13093 }, { "epoch": 0.75, "grad_norm": 0.9662258625030518, "learning_rate": 3.0802488557077257e-06, "loss": 0.5492, "step": 13094 }, { "epoch": 0.75, "grad_norm": 1.8766885995864868, "learning_rate": 3.078907925661052e-06, "loss": 0.9706, "step": 13095 }, { "epoch": 0.75, "grad_norm": 1.7519360780715942, "learning_rate": 3.0775672344419305e-06, "loss": 0.9203, "step": 13096 }, { "epoch": 0.75, "grad_norm": 1.7163918018341064, "learning_rate": 3.0762267820966285e-06, "loss": 0.8335, "step": 13097 }, { "epoch": 0.75, "grad_norm": 1.8738588094711304, "learning_rate": 3.074886568671397e-06, "loss": 0.9792, "step": 13098 }, { "epoch": 0.75, "grad_norm": 1.6981703042984009, "learning_rate": 3.0735465942124877e-06, "loss": 0.9329, "step": 13099 }, { "epoch": 0.75, "grad_norm": 1.8276050090789795, "learning_rate": 3.0722068587661346e-06, "loss": 0.8767, "step": 13100 }, { "epoch": 0.75, "grad_norm": 1.7266253232955933, "learning_rate": 3.0708673623785713e-06, "loss": 0.9123, "step": 13101 }, { "epoch": 0.75, "grad_norm": 1.7737120389938354, "learning_rate": 3.0695281050960224e-06, "loss": 0.8927, "step": 13102 }, { "epoch": 0.75, "grad_norm": 1.770328164100647, "learning_rate": 3.0681890869646957e-06, "loss": 0.9232, "step": 13103 }, { "epoch": 0.75, "grad_norm": 1.7303805351257324, "learning_rate": 3.066850308030803e-06, "loss": 0.8801, "step": 13104 }, { "epoch": 0.75, "grad_norm": 1.819445013999939, "learning_rate": 3.0655117683405378e-06, "loss": 0.9607, "step": 13105 }, { "epoch": 0.75, "grad_norm": 1.722355842590332, "learning_rate": 3.0641734679400925e-06, "loss": 0.9052, "step": 13106 }, { "epoch": 0.75, "grad_norm": 1.8268797397613525, "learning_rate": 3.062835406875643e-06, "loss": 0.9597, "step": 13107 }, { "epoch": 0.75, "grad_norm": 1.6671241521835327, "learning_rate": 3.0614975851933694e-06, "loss": 0.8755, "step": 13108 }, { "epoch": 0.75, "grad_norm": 1.6749218702316284, "learning_rate": 3.060160002939425e-06, "loss": 0.9283, "step": 13109 }, { "epoch": 0.75, "grad_norm": 1.7422959804534912, "learning_rate": 3.0588226601599803e-06, "loss": 0.9027, "step": 13110 }, { "epoch": 0.75, "grad_norm": 1.7745940685272217, "learning_rate": 3.057485556901173e-06, "loss": 0.8485, "step": 13111 }, { "epoch": 0.75, "grad_norm": 1.1069972515106201, "learning_rate": 3.0561486932091487e-06, "loss": 0.5914, "step": 13112 }, { "epoch": 0.75, "grad_norm": 1.6952247619628906, "learning_rate": 3.0548120691300344e-06, "loss": 0.9315, "step": 13113 }, { "epoch": 0.75, "grad_norm": 1.8256460428237915, "learning_rate": 3.0534756847099567e-06, "loss": 0.852, "step": 13114 }, { "epoch": 0.75, "grad_norm": 1.9599213600158691, "learning_rate": 3.052139539995026e-06, "loss": 1.0084, "step": 13115 }, { "epoch": 0.75, "grad_norm": 1.946385145187378, "learning_rate": 3.0508036350313553e-06, "loss": 0.8959, "step": 13116 }, { "epoch": 0.75, "grad_norm": 2.651458501815796, "learning_rate": 3.0494679698650353e-06, "loss": 0.8646, "step": 13117 }, { "epoch": 0.75, "grad_norm": 1.7008143663406372, "learning_rate": 3.0481325445421604e-06, "loss": 0.9033, "step": 13118 }, { "epoch": 0.75, "grad_norm": 1.6350711584091187, "learning_rate": 3.0467973591088163e-06, "loss": 0.8603, "step": 13119 }, { "epoch": 0.75, "grad_norm": 1.908288598060608, "learning_rate": 3.0454624136110676e-06, "loss": 0.9841, "step": 13120 }, { "epoch": 0.75, "grad_norm": 1.8394027948379517, "learning_rate": 3.0441277080949883e-06, "loss": 0.8164, "step": 13121 }, { "epoch": 0.75, "grad_norm": 2.020101308822632, "learning_rate": 3.0427932426066286e-06, "loss": 0.9294, "step": 13122 }, { "epoch": 0.75, "grad_norm": 1.7723796367645264, "learning_rate": 3.041459017192042e-06, "loss": 0.8761, "step": 13123 }, { "epoch": 0.75, "grad_norm": 1.7550098896026611, "learning_rate": 3.0401250318972643e-06, "loss": 0.8431, "step": 13124 }, { "epoch": 0.75, "grad_norm": 0.9901959300041199, "learning_rate": 3.0387912867683334e-06, "loss": 0.5301, "step": 13125 }, { "epoch": 0.75, "grad_norm": 1.7972277402877808, "learning_rate": 3.037457781851266e-06, "loss": 0.9158, "step": 13126 }, { "epoch": 0.75, "grad_norm": 1.725225806236267, "learning_rate": 3.0361245171920862e-06, "loss": 0.8734, "step": 13127 }, { "epoch": 0.75, "grad_norm": 1.5979008674621582, "learning_rate": 3.0347914928367917e-06, "loss": 0.8932, "step": 13128 }, { "epoch": 0.75, "grad_norm": 1.6955655813217163, "learning_rate": 3.0334587088313903e-06, "loss": 0.9194, "step": 13129 }, { "epoch": 0.75, "grad_norm": 1.872408151626587, "learning_rate": 3.0321261652218647e-06, "loss": 0.8756, "step": 13130 }, { "epoch": 0.75, "grad_norm": 1.6361650228500366, "learning_rate": 3.0307938620542023e-06, "loss": 0.9277, "step": 13131 }, { "epoch": 0.75, "grad_norm": 1.7348065376281738, "learning_rate": 3.029461799374378e-06, "loss": 0.9003, "step": 13132 }, { "epoch": 0.75, "grad_norm": 0.9422408938407898, "learning_rate": 3.0281299772283534e-06, "loss": 0.5573, "step": 13133 }, { "epoch": 0.75, "grad_norm": 1.7520490884780884, "learning_rate": 3.0267983956620907e-06, "loss": 0.9474, "step": 13134 }, { "epoch": 0.75, "grad_norm": 1.8453209400177002, "learning_rate": 3.025467054721534e-06, "loss": 0.9664, "step": 13135 }, { "epoch": 0.75, "grad_norm": 1.849521279335022, "learning_rate": 3.0241359544526296e-06, "loss": 0.9295, "step": 13136 }, { "epoch": 0.75, "grad_norm": 1.7623339891433716, "learning_rate": 3.0228050949013033e-06, "loss": 0.8474, "step": 13137 }, { "epoch": 0.75, "grad_norm": 1.6508996486663818, "learning_rate": 3.0214744761134863e-06, "loss": 0.8804, "step": 13138 }, { "epoch": 0.75, "grad_norm": 1.7103842496871948, "learning_rate": 3.0201440981350892e-06, "loss": 0.9105, "step": 13139 }, { "epoch": 0.75, "grad_norm": 3.468301773071289, "learning_rate": 3.018813961012025e-06, "loss": 0.8877, "step": 13140 }, { "epoch": 0.75, "grad_norm": 1.0021722316741943, "learning_rate": 3.017484064790186e-06, "loss": 0.4864, "step": 13141 }, { "epoch": 0.75, "grad_norm": 1.6509027481079102, "learning_rate": 3.016154409515467e-06, "loss": 0.8585, "step": 13142 }, { "epoch": 0.75, "grad_norm": 1.9704585075378418, "learning_rate": 3.0148249952337536e-06, "loss": 0.9119, "step": 13143 }, { "epoch": 0.75, "grad_norm": 1.6537797451019287, "learning_rate": 3.013495821990915e-06, "loss": 0.9343, "step": 13144 }, { "epoch": 0.75, "grad_norm": 1.8629426956176758, "learning_rate": 3.0121668898328225e-06, "loss": 0.9428, "step": 13145 }, { "epoch": 0.75, "grad_norm": 1.9515079259872437, "learning_rate": 3.0108381988053283e-06, "loss": 0.9222, "step": 13146 }, { "epoch": 0.75, "grad_norm": 1.6965047121047974, "learning_rate": 3.0095097489542867e-06, "loss": 0.88, "step": 13147 }, { "epoch": 0.75, "grad_norm": 1.667617917060852, "learning_rate": 3.008181540325533e-06, "loss": 0.8564, "step": 13148 }, { "epoch": 0.75, "grad_norm": 1.7944458723068237, "learning_rate": 3.0068535729649074e-06, "loss": 0.9791, "step": 13149 }, { "epoch": 0.75, "grad_norm": 1.6819769144058228, "learning_rate": 3.0055258469182267e-06, "loss": 0.8316, "step": 13150 }, { "epoch": 0.75, "grad_norm": 1.7586705684661865, "learning_rate": 3.004198362231315e-06, "loss": 0.9353, "step": 13151 }, { "epoch": 0.75, "grad_norm": 1.6299564838409424, "learning_rate": 3.0028711189499717e-06, "loss": 0.8682, "step": 13152 }, { "epoch": 0.75, "grad_norm": 1.744849443435669, "learning_rate": 3.0015441171200045e-06, "loss": 1.0035, "step": 13153 }, { "epoch": 0.75, "grad_norm": 1.9347338676452637, "learning_rate": 3.0002173567871964e-06, "loss": 0.9465, "step": 13154 }, { "epoch": 0.75, "grad_norm": 1.6929874420166016, "learning_rate": 2.9988908379973346e-06, "loss": 0.9204, "step": 13155 }, { "epoch": 0.75, "grad_norm": 1.6636193990707397, "learning_rate": 2.997564560796196e-06, "loss": 0.8773, "step": 13156 }, { "epoch": 0.75, "grad_norm": 1.7956461906433105, "learning_rate": 2.9962385252295414e-06, "loss": 0.8391, "step": 13157 }, { "epoch": 0.75, "grad_norm": 1.6897242069244385, "learning_rate": 2.9949127313431335e-06, "loss": 0.9538, "step": 13158 }, { "epoch": 0.75, "grad_norm": 1.8598498106002808, "learning_rate": 2.9935871791827166e-06, "loss": 0.942, "step": 13159 }, { "epoch": 0.75, "grad_norm": 1.9431281089782715, "learning_rate": 2.9922618687940374e-06, "loss": 0.9107, "step": 13160 }, { "epoch": 0.75, "grad_norm": 1.6600600481033325, "learning_rate": 2.9909368002228223e-06, "loss": 0.8868, "step": 13161 }, { "epoch": 0.75, "grad_norm": 1.6209518909454346, "learning_rate": 2.989611973514803e-06, "loss": 0.9199, "step": 13162 }, { "epoch": 0.75, "grad_norm": 1.7206671237945557, "learning_rate": 2.9882873887156885e-06, "loss": 0.9183, "step": 13163 }, { "epoch": 0.75, "grad_norm": 1.82438325881958, "learning_rate": 2.986963045871193e-06, "loss": 0.9486, "step": 13164 }, { "epoch": 0.76, "grad_norm": 1.9009443521499634, "learning_rate": 2.9856389450270085e-06, "loss": 0.9075, "step": 13165 }, { "epoch": 0.76, "grad_norm": 1.6566468477249146, "learning_rate": 2.984315086228834e-06, "loss": 0.9397, "step": 13166 }, { "epoch": 0.76, "grad_norm": 1.7568695545196533, "learning_rate": 2.982991469522346e-06, "loss": 0.8926, "step": 13167 }, { "epoch": 0.76, "grad_norm": 1.7237212657928467, "learning_rate": 2.9816680949532207e-06, "loss": 0.8874, "step": 13168 }, { "epoch": 0.76, "grad_norm": 1.806574821472168, "learning_rate": 2.9803449625671266e-06, "loss": 0.883, "step": 13169 }, { "epoch": 0.76, "grad_norm": 1.788259744644165, "learning_rate": 2.9790220724097173e-06, "loss": 0.8566, "step": 13170 }, { "epoch": 0.76, "grad_norm": 1.6851210594177246, "learning_rate": 2.9776994245266465e-06, "loss": 0.888, "step": 13171 }, { "epoch": 0.76, "grad_norm": 1.7277971506118774, "learning_rate": 2.97637701896355e-06, "loss": 0.8776, "step": 13172 }, { "epoch": 0.76, "grad_norm": 1.7069425582885742, "learning_rate": 2.9750548557660663e-06, "loss": 0.8802, "step": 13173 }, { "epoch": 0.76, "grad_norm": 1.6721842288970947, "learning_rate": 2.9737329349798115e-06, "loss": 0.8595, "step": 13174 }, { "epoch": 0.76, "grad_norm": 1.721893548965454, "learning_rate": 2.9724112566504072e-06, "loss": 0.9999, "step": 13175 }, { "epoch": 0.76, "grad_norm": 2.011451244354248, "learning_rate": 2.9710898208234593e-06, "loss": 0.8833, "step": 13176 }, { "epoch": 0.76, "grad_norm": 1.6708012819290161, "learning_rate": 2.9697686275445703e-06, "loss": 0.8679, "step": 13177 }, { "epoch": 0.76, "grad_norm": 1.729573369026184, "learning_rate": 2.968447676859325e-06, "loss": 0.9192, "step": 13178 }, { "epoch": 0.76, "grad_norm": 1.8127408027648926, "learning_rate": 2.967126968813312e-06, "loss": 0.9578, "step": 13179 }, { "epoch": 0.76, "grad_norm": 1.6849807500839233, "learning_rate": 2.965806503452098e-06, "loss": 1.0057, "step": 13180 }, { "epoch": 0.76, "grad_norm": 1.6070747375488281, "learning_rate": 2.964486280821256e-06, "loss": 0.9915, "step": 13181 }, { "epoch": 0.76, "grad_norm": 0.9959151744842529, "learning_rate": 2.963166300966336e-06, "loss": 0.5262, "step": 13182 }, { "epoch": 0.76, "grad_norm": 1.8654508590698242, "learning_rate": 2.961846563932893e-06, "loss": 0.9353, "step": 13183 }, { "epoch": 0.76, "grad_norm": 1.695183277130127, "learning_rate": 2.9605270697664624e-06, "loss": 0.8821, "step": 13184 }, { "epoch": 0.76, "grad_norm": 1.6865360736846924, "learning_rate": 2.9592078185125783e-06, "loss": 0.9424, "step": 13185 }, { "epoch": 0.76, "grad_norm": 1.8784617185592651, "learning_rate": 2.957888810216768e-06, "loss": 0.9317, "step": 13186 }, { "epoch": 0.76, "grad_norm": 1.8124345541000366, "learning_rate": 2.9565700449245407e-06, "loss": 0.8859, "step": 13187 }, { "epoch": 0.76, "grad_norm": 1.7845858335494995, "learning_rate": 2.9552515226814084e-06, "loss": 0.9281, "step": 13188 }, { "epoch": 0.76, "grad_norm": 1.875267744064331, "learning_rate": 2.953933243532865e-06, "loss": 0.9778, "step": 13189 }, { "epoch": 0.76, "grad_norm": 1.817422866821289, "learning_rate": 2.9526152075244054e-06, "loss": 0.9433, "step": 13190 }, { "epoch": 0.76, "grad_norm": 1.8604117631912231, "learning_rate": 2.951297414701506e-06, "loss": 0.9248, "step": 13191 }, { "epoch": 0.76, "grad_norm": 1.7330478429794312, "learning_rate": 2.9499798651096466e-06, "loss": 0.9422, "step": 13192 }, { "epoch": 0.76, "grad_norm": 1.9802653789520264, "learning_rate": 2.9486625587942854e-06, "loss": 0.9167, "step": 13193 }, { "epoch": 0.76, "grad_norm": 1.7999430894851685, "learning_rate": 2.947345495800885e-06, "loss": 0.8833, "step": 13194 }, { "epoch": 0.76, "grad_norm": 1.666460633277893, "learning_rate": 2.946028676174888e-06, "loss": 0.8997, "step": 13195 }, { "epoch": 0.76, "grad_norm": 1.7075555324554443, "learning_rate": 2.9447120999617363e-06, "loss": 0.8959, "step": 13196 }, { "epoch": 0.76, "grad_norm": 1.7928537130355835, "learning_rate": 2.943395767206866e-06, "loss": 0.8873, "step": 13197 }, { "epoch": 0.76, "grad_norm": 1.7476816177368164, "learning_rate": 2.9420796779556916e-06, "loss": 0.9267, "step": 13198 }, { "epoch": 0.76, "grad_norm": 1.8047982454299927, "learning_rate": 2.940763832253636e-06, "loss": 0.8948, "step": 13199 }, { "epoch": 0.76, "grad_norm": 1.8126059770584106, "learning_rate": 2.939448230146098e-06, "loss": 0.9162, "step": 13200 }, { "epoch": 0.76, "grad_norm": 1.937567114830017, "learning_rate": 2.9381328716784816e-06, "loss": 0.8727, "step": 13201 }, { "epoch": 0.76, "grad_norm": 1.6910878419876099, "learning_rate": 2.936817756896171e-06, "loss": 0.9754, "step": 13202 }, { "epoch": 0.76, "grad_norm": 1.8811239004135132, "learning_rate": 2.935502885844551e-06, "loss": 0.918, "step": 13203 }, { "epoch": 0.76, "grad_norm": 1.7432224750518799, "learning_rate": 2.9341882585689908e-06, "loss": 0.9228, "step": 13204 }, { "epoch": 0.76, "grad_norm": 1.743201494216919, "learning_rate": 2.932873875114859e-06, "loss": 0.8639, "step": 13205 }, { "epoch": 0.76, "grad_norm": 1.8480312824249268, "learning_rate": 2.9315597355275048e-06, "loss": 0.9222, "step": 13206 }, { "epoch": 0.76, "grad_norm": 1.7654367685317993, "learning_rate": 2.9302458398522836e-06, "loss": 0.8815, "step": 13207 }, { "epoch": 0.76, "grad_norm": 1.85563063621521, "learning_rate": 2.9289321881345257e-06, "loss": 0.9126, "step": 13208 }, { "epoch": 0.76, "grad_norm": 1.093630075454712, "learning_rate": 2.9276187804195664e-06, "loss": 0.5255, "step": 13209 }, { "epoch": 0.76, "grad_norm": 1.7472140789031982, "learning_rate": 2.9263056167527293e-06, "loss": 0.8998, "step": 13210 }, { "epoch": 0.76, "grad_norm": 1.8472427129745483, "learning_rate": 2.924992697179324e-06, "loss": 0.9155, "step": 13211 }, { "epoch": 0.76, "grad_norm": 1.938778281211853, "learning_rate": 2.923680021744659e-06, "loss": 0.8698, "step": 13212 }, { "epoch": 0.76, "grad_norm": 1.8026741743087769, "learning_rate": 2.9223675904940274e-06, "loss": 0.8661, "step": 13213 }, { "epoch": 0.76, "grad_norm": 1.9690545797348022, "learning_rate": 2.9210554034727236e-06, "loss": 1.0288, "step": 13214 }, { "epoch": 0.76, "grad_norm": 1.7886452674865723, "learning_rate": 2.919743460726019e-06, "loss": 0.9428, "step": 13215 }, { "epoch": 0.76, "grad_norm": 1.6505091190338135, "learning_rate": 2.9184317622991933e-06, "loss": 0.8912, "step": 13216 }, { "epoch": 0.76, "grad_norm": 1.8436967134475708, "learning_rate": 2.9171203082375033e-06, "loss": 0.8631, "step": 13217 }, { "epoch": 0.76, "grad_norm": 1.7760852575302124, "learning_rate": 2.9158090985862085e-06, "loss": 0.9646, "step": 13218 }, { "epoch": 0.76, "grad_norm": 1.789567470550537, "learning_rate": 2.914498133390551e-06, "loss": 0.8927, "step": 13219 }, { "epoch": 0.76, "grad_norm": 1.7957115173339844, "learning_rate": 2.9131874126957728e-06, "loss": 0.9065, "step": 13220 }, { "epoch": 0.76, "grad_norm": 1.6785744428634644, "learning_rate": 2.9118769365470967e-06, "loss": 0.9948, "step": 13221 }, { "epoch": 0.76, "grad_norm": 1.6264938116073608, "learning_rate": 2.910566704989749e-06, "loss": 0.9087, "step": 13222 }, { "epoch": 0.76, "grad_norm": 1.6964391469955444, "learning_rate": 2.9092567180689436e-06, "loss": 0.8738, "step": 13223 }, { "epoch": 0.76, "grad_norm": 1.7114852666854858, "learning_rate": 2.907946975829877e-06, "loss": 0.9582, "step": 13224 }, { "epoch": 0.76, "grad_norm": 1.8612910509109497, "learning_rate": 2.9066374783177543e-06, "loss": 0.9365, "step": 13225 }, { "epoch": 0.76, "grad_norm": 1.7882800102233887, "learning_rate": 2.905328225577755e-06, "loss": 0.9113, "step": 13226 }, { "epoch": 0.76, "grad_norm": 1.8253496885299683, "learning_rate": 2.904019217655062e-06, "loss": 0.9329, "step": 13227 }, { "epoch": 0.76, "grad_norm": 1.815693736076355, "learning_rate": 2.9027104545948414e-06, "loss": 0.8785, "step": 13228 }, { "epoch": 0.76, "grad_norm": 1.9561164379119873, "learning_rate": 2.9014019364422606e-06, "loss": 0.9548, "step": 13229 }, { "epoch": 0.76, "grad_norm": 1.7446643114089966, "learning_rate": 2.9000936632424682e-06, "loss": 0.9702, "step": 13230 }, { "epoch": 0.76, "grad_norm": 1.8888019323349, "learning_rate": 2.898785635040612e-06, "loss": 0.8617, "step": 13231 }, { "epoch": 0.76, "grad_norm": 1.8743476867675781, "learning_rate": 2.897477851881825e-06, "loss": 0.8172, "step": 13232 }, { "epoch": 0.76, "grad_norm": 1.8921974897384644, "learning_rate": 2.896170313811236e-06, "loss": 0.8582, "step": 13233 }, { "epoch": 0.76, "grad_norm": 1.8024595975875854, "learning_rate": 2.8948630208739704e-06, "loss": 0.8963, "step": 13234 }, { "epoch": 0.76, "grad_norm": 0.9709048271179199, "learning_rate": 2.8935559731151295e-06, "loss": 0.497, "step": 13235 }, { "epoch": 0.76, "grad_norm": 1.7381986379623413, "learning_rate": 2.892249170579826e-06, "loss": 0.919, "step": 13236 }, { "epoch": 0.76, "grad_norm": 1.6300166845321655, "learning_rate": 2.8909426133131447e-06, "loss": 0.8549, "step": 13237 }, { "epoch": 0.76, "grad_norm": 1.6343140602111816, "learning_rate": 2.8896363013601793e-06, "loss": 0.8764, "step": 13238 }, { "epoch": 0.76, "grad_norm": 1.8076062202453613, "learning_rate": 2.888330234765999e-06, "loss": 1.0231, "step": 13239 }, { "epoch": 0.76, "grad_norm": 1.988242506980896, "learning_rate": 2.887024413575681e-06, "loss": 0.9234, "step": 13240 }, { "epoch": 0.76, "grad_norm": 1.8430019617080688, "learning_rate": 2.8857188378342773e-06, "loss": 0.9245, "step": 13241 }, { "epoch": 0.76, "grad_norm": 1.6071193218231201, "learning_rate": 2.884413507586844e-06, "loss": 0.8541, "step": 13242 }, { "epoch": 0.76, "grad_norm": 1.1695038080215454, "learning_rate": 2.8831084228784234e-06, "loss": 0.6118, "step": 13243 }, { "epoch": 0.76, "grad_norm": 1.8383134603500366, "learning_rate": 2.8818035837540538e-06, "loss": 0.9274, "step": 13244 }, { "epoch": 0.76, "grad_norm": 3.1496589183807373, "learning_rate": 2.8804989902587564e-06, "loss": 0.9226, "step": 13245 }, { "epoch": 0.76, "grad_norm": 2.0077054500579834, "learning_rate": 2.8791946424375537e-06, "loss": 0.8503, "step": 13246 }, { "epoch": 0.76, "grad_norm": 1.76277494430542, "learning_rate": 2.877890540335451e-06, "loss": 0.9148, "step": 13247 }, { "epoch": 0.76, "grad_norm": 1.645403504371643, "learning_rate": 2.8765866839974522e-06, "loss": 0.8756, "step": 13248 }, { "epoch": 0.76, "grad_norm": 1.716269850730896, "learning_rate": 2.8752830734685466e-06, "loss": 0.9304, "step": 13249 }, { "epoch": 0.76, "grad_norm": 1.7370630502700806, "learning_rate": 2.8739797087937194e-06, "loss": 0.9447, "step": 13250 }, { "epoch": 0.76, "grad_norm": 1.80650794506073, "learning_rate": 2.87267659001795e-06, "loss": 0.9511, "step": 13251 }, { "epoch": 0.76, "grad_norm": 1.7192002534866333, "learning_rate": 2.871373717186199e-06, "loss": 0.8388, "step": 13252 }, { "epoch": 0.76, "grad_norm": 2.081408739089966, "learning_rate": 2.8700710903434314e-06, "loss": 0.946, "step": 13253 }, { "epoch": 0.76, "grad_norm": 1.868427038192749, "learning_rate": 2.8687687095345894e-06, "loss": 0.9108, "step": 13254 }, { "epoch": 0.76, "grad_norm": 1.7838658094406128, "learning_rate": 2.867466574804624e-06, "loss": 0.9335, "step": 13255 }, { "epoch": 0.76, "grad_norm": 1.7105185985565186, "learning_rate": 2.866164686198459e-06, "loss": 0.9367, "step": 13256 }, { "epoch": 0.76, "grad_norm": 1.7979981899261475, "learning_rate": 2.864863043761026e-06, "loss": 0.9041, "step": 13257 }, { "epoch": 0.76, "grad_norm": 1.832834005355835, "learning_rate": 2.8635616475372365e-06, "loss": 0.9572, "step": 13258 }, { "epoch": 0.76, "grad_norm": 1.6792157888412476, "learning_rate": 2.8622604975720016e-06, "loss": 0.9173, "step": 13259 }, { "epoch": 0.76, "grad_norm": 1.8305110931396484, "learning_rate": 2.8609595939102153e-06, "loss": 1.0185, "step": 13260 }, { "epoch": 0.76, "grad_norm": 1.931822657585144, "learning_rate": 2.859658936596774e-06, "loss": 0.9481, "step": 13261 }, { "epoch": 0.76, "grad_norm": 2.1546175479888916, "learning_rate": 2.8583585256765547e-06, "loss": 0.87, "step": 13262 }, { "epoch": 0.76, "grad_norm": 1.8563448190689087, "learning_rate": 2.8570583611944336e-06, "loss": 0.7861, "step": 13263 }, { "epoch": 0.76, "grad_norm": 1.6987948417663574, "learning_rate": 2.855758443195278e-06, "loss": 0.8936, "step": 13264 }, { "epoch": 0.76, "grad_norm": 0.9658501148223877, "learning_rate": 2.854458771723939e-06, "loss": 0.5109, "step": 13265 }, { "epoch": 0.76, "grad_norm": 1.6588844060897827, "learning_rate": 2.8531593468252703e-06, "loss": 0.968, "step": 13266 }, { "epoch": 0.76, "grad_norm": 1.9360096454620361, "learning_rate": 2.851860168544106e-06, "loss": 0.9487, "step": 13267 }, { "epoch": 0.76, "grad_norm": 1.7261602878570557, "learning_rate": 2.8505612369252834e-06, "loss": 0.9179, "step": 13268 }, { "epoch": 0.76, "grad_norm": 2.0890910625457764, "learning_rate": 2.8492625520136174e-06, "loss": 0.9482, "step": 13269 }, { "epoch": 0.76, "grad_norm": 1.7408963441848755, "learning_rate": 2.847964113853928e-06, "loss": 0.9575, "step": 13270 }, { "epoch": 0.76, "grad_norm": 1.9424909353256226, "learning_rate": 2.8466659224910174e-06, "loss": 0.9589, "step": 13271 }, { "epoch": 0.76, "grad_norm": 1.7734640836715698, "learning_rate": 2.8453679779696864e-06, "loss": 0.8728, "step": 13272 }, { "epoch": 0.76, "grad_norm": 2.985802173614502, "learning_rate": 2.8440702803347175e-06, "loss": 0.9759, "step": 13273 }, { "epoch": 0.76, "grad_norm": 0.9950181841850281, "learning_rate": 2.8427728296308965e-06, "loss": 0.4843, "step": 13274 }, { "epoch": 0.76, "grad_norm": 1.7475999593734741, "learning_rate": 2.8414756259029907e-06, "loss": 0.8522, "step": 13275 }, { "epoch": 0.76, "grad_norm": 1.8192265033721924, "learning_rate": 2.8401786691957632e-06, "loss": 0.895, "step": 13276 }, { "epoch": 0.76, "grad_norm": 1.819940447807312, "learning_rate": 2.838881959553973e-06, "loss": 0.8825, "step": 13277 }, { "epoch": 0.76, "grad_norm": 1.5944887399673462, "learning_rate": 2.8375854970223595e-06, "loss": 0.8606, "step": 13278 }, { "epoch": 0.76, "grad_norm": 1.6561224460601807, "learning_rate": 2.8362892816456668e-06, "loss": 0.8669, "step": 13279 }, { "epoch": 0.76, "grad_norm": 1.8115332126617432, "learning_rate": 2.8349933134686156e-06, "loss": 0.9033, "step": 13280 }, { "epoch": 0.76, "grad_norm": 1.861738920211792, "learning_rate": 2.8336975925359345e-06, "loss": 0.9444, "step": 13281 }, { "epoch": 0.76, "grad_norm": 1.7704592943191528, "learning_rate": 2.8324021188923276e-06, "loss": 0.8754, "step": 13282 }, { "epoch": 0.76, "grad_norm": 1.9867587089538574, "learning_rate": 2.8311068925825057e-06, "loss": 0.9557, "step": 13283 }, { "epoch": 0.76, "grad_norm": 1.8192524909973145, "learning_rate": 2.829811913651156e-06, "loss": 0.9777, "step": 13284 }, { "epoch": 0.76, "grad_norm": 1.7871979475021362, "learning_rate": 2.8285171821429715e-06, "loss": 0.9353, "step": 13285 }, { "epoch": 0.76, "grad_norm": 1.7441893815994263, "learning_rate": 2.827222698102622e-06, "loss": 0.8872, "step": 13286 }, { "epoch": 0.76, "grad_norm": 1.8543709516525269, "learning_rate": 2.825928461574782e-06, "loss": 0.8975, "step": 13287 }, { "epoch": 0.76, "grad_norm": 1.7354766130447388, "learning_rate": 2.824634472604113e-06, "loss": 0.989, "step": 13288 }, { "epoch": 0.76, "grad_norm": 1.805140733718872, "learning_rate": 2.8233407312352623e-06, "loss": 0.8908, "step": 13289 }, { "epoch": 0.76, "grad_norm": 1.6988160610198975, "learning_rate": 2.8220472375128793e-06, "loss": 0.9229, "step": 13290 }, { "epoch": 0.76, "grad_norm": 1.7211908102035522, "learning_rate": 2.820753991481592e-06, "loss": 0.9281, "step": 13291 }, { "epoch": 0.76, "grad_norm": 1.7471613883972168, "learning_rate": 2.819460993186032e-06, "loss": 0.9751, "step": 13292 }, { "epoch": 0.76, "grad_norm": 1.793211817741394, "learning_rate": 2.8181682426708134e-06, "loss": 0.8538, "step": 13293 }, { "epoch": 0.76, "grad_norm": 1.8046174049377441, "learning_rate": 2.816875739980549e-06, "loss": 0.8877, "step": 13294 }, { "epoch": 0.76, "grad_norm": 1.811018705368042, "learning_rate": 2.815583485159835e-06, "loss": 0.9231, "step": 13295 }, { "epoch": 0.76, "grad_norm": 1.0825761556625366, "learning_rate": 2.8142914782532693e-06, "loss": 0.5365, "step": 13296 }, { "epoch": 0.76, "grad_norm": 1.767999529838562, "learning_rate": 2.8129997193054294e-06, "loss": 0.9104, "step": 13297 }, { "epoch": 0.76, "grad_norm": 1.8627296686172485, "learning_rate": 2.811708208360896e-06, "loss": 0.904, "step": 13298 }, { "epoch": 0.76, "grad_norm": 1.7052541971206665, "learning_rate": 2.8104169454642293e-06, "loss": 0.8966, "step": 13299 }, { "epoch": 0.76, "grad_norm": 1.656807541847229, "learning_rate": 2.8091259306599905e-06, "loss": 0.9033, "step": 13300 }, { "epoch": 0.76, "grad_norm": 1.7286090850830078, "learning_rate": 2.8078351639927326e-06, "loss": 0.8502, "step": 13301 }, { "epoch": 0.76, "grad_norm": 1.9246201515197754, "learning_rate": 2.806544645506989e-06, "loss": 0.9052, "step": 13302 }, { "epoch": 0.76, "grad_norm": 1.8742958307266235, "learning_rate": 2.8052543752472996e-06, "loss": 0.8318, "step": 13303 }, { "epoch": 0.76, "grad_norm": 1.6651182174682617, "learning_rate": 2.8039643532581794e-06, "loss": 0.8753, "step": 13304 }, { "epoch": 0.76, "grad_norm": 1.794137716293335, "learning_rate": 2.8026745795841525e-06, "loss": 0.8762, "step": 13305 }, { "epoch": 0.76, "grad_norm": 1.833147406578064, "learning_rate": 2.8013850542697162e-06, "loss": 0.9718, "step": 13306 }, { "epoch": 0.76, "grad_norm": 1.8844655752182007, "learning_rate": 2.8000957773593786e-06, "loss": 0.8835, "step": 13307 }, { "epoch": 0.76, "grad_norm": 1.6990755796432495, "learning_rate": 2.7988067488976158e-06, "loss": 0.9939, "step": 13308 }, { "epoch": 0.76, "grad_norm": 1.6252760887145996, "learning_rate": 2.7975179689289223e-06, "loss": 0.8682, "step": 13309 }, { "epoch": 0.76, "grad_norm": 1.8397388458251953, "learning_rate": 2.796229437497762e-06, "loss": 0.9186, "step": 13310 }, { "epoch": 0.76, "grad_norm": 1.85454261302948, "learning_rate": 2.7949411546486037e-06, "loss": 0.9231, "step": 13311 }, { "epoch": 0.76, "grad_norm": 1.76252019405365, "learning_rate": 2.7936531204258964e-06, "loss": 0.8301, "step": 13312 }, { "epoch": 0.76, "grad_norm": 1.7701213359832764, "learning_rate": 2.7923653348740944e-06, "loss": 0.8594, "step": 13313 }, { "epoch": 0.76, "grad_norm": 1.8002903461456299, "learning_rate": 2.7910777980376256e-06, "loss": 0.8304, "step": 13314 }, { "epoch": 0.76, "grad_norm": 1.742992877960205, "learning_rate": 2.789790509960929e-06, "loss": 0.8994, "step": 13315 }, { "epoch": 0.76, "grad_norm": 1.7432808876037598, "learning_rate": 2.7885034706884186e-06, "loss": 0.905, "step": 13316 }, { "epoch": 0.76, "grad_norm": 1.689575433731079, "learning_rate": 2.7872166802645073e-06, "loss": 0.8046, "step": 13317 }, { "epoch": 0.76, "grad_norm": 1.8014971017837524, "learning_rate": 2.785930138733605e-06, "loss": 0.9158, "step": 13318 }, { "epoch": 0.76, "grad_norm": 1.7904638051986694, "learning_rate": 2.784643846140097e-06, "loss": 0.888, "step": 13319 }, { "epoch": 0.76, "grad_norm": 1.850538969039917, "learning_rate": 2.783357802528379e-06, "loss": 0.9077, "step": 13320 }, { "epoch": 0.76, "grad_norm": 1.639488935470581, "learning_rate": 2.782072007942821e-06, "loss": 0.8709, "step": 13321 }, { "epoch": 0.76, "grad_norm": 1.6707806587219238, "learning_rate": 2.780786462427798e-06, "loss": 0.8406, "step": 13322 }, { "epoch": 0.76, "grad_norm": 1.0482465028762817, "learning_rate": 2.7795011660276662e-06, "loss": 0.6377, "step": 13323 }, { "epoch": 0.76, "grad_norm": 0.9531552195549011, "learning_rate": 2.778216118786782e-06, "loss": 0.5236, "step": 13324 }, { "epoch": 0.76, "grad_norm": 1.7198026180267334, "learning_rate": 2.776931320749483e-06, "loss": 0.8663, "step": 13325 }, { "epoch": 0.76, "grad_norm": 1.7642197608947754, "learning_rate": 2.775646771960111e-06, "loss": 0.9263, "step": 13326 }, { "epoch": 0.76, "grad_norm": 1.7483997344970703, "learning_rate": 2.7743624724629847e-06, "loss": 0.9107, "step": 13327 }, { "epoch": 0.76, "grad_norm": 1.8054081201553345, "learning_rate": 2.7730784223024255e-06, "loss": 0.8776, "step": 13328 }, { "epoch": 0.76, "grad_norm": 1.8185434341430664, "learning_rate": 2.7717946215227453e-06, "loss": 0.9155, "step": 13329 }, { "epoch": 0.76, "grad_norm": 1.6876800060272217, "learning_rate": 2.770511070168239e-06, "loss": 1.0118, "step": 13330 }, { "epoch": 0.76, "grad_norm": 1.8999838829040527, "learning_rate": 2.769227768283204e-06, "loss": 0.9727, "step": 13331 }, { "epoch": 0.76, "grad_norm": 1.9106507301330566, "learning_rate": 2.7679447159119164e-06, "loss": 0.9897, "step": 13332 }, { "epoch": 0.76, "grad_norm": 1.8607107400894165, "learning_rate": 2.7666619130986594e-06, "loss": 0.969, "step": 13333 }, { "epoch": 0.76, "grad_norm": 1.718234896659851, "learning_rate": 2.76537935988769e-06, "loss": 0.8793, "step": 13334 }, { "epoch": 0.76, "grad_norm": 1.8154743909835815, "learning_rate": 2.764097056323273e-06, "loss": 0.8537, "step": 13335 }, { "epoch": 0.76, "grad_norm": 1.8593318462371826, "learning_rate": 2.7628150024496513e-06, "loss": 0.8895, "step": 13336 }, { "epoch": 0.76, "grad_norm": 2.0154054164886475, "learning_rate": 2.7615331983110704e-06, "loss": 0.8898, "step": 13337 }, { "epoch": 0.76, "grad_norm": 1.656383991241455, "learning_rate": 2.7602516439517555e-06, "loss": 0.8681, "step": 13338 }, { "epoch": 0.77, "grad_norm": 1.6162132024765015, "learning_rate": 2.7589703394159362e-06, "loss": 0.9056, "step": 13339 }, { "epoch": 0.77, "grad_norm": 1.7413274049758911, "learning_rate": 2.7576892847478208e-06, "loss": 0.988, "step": 13340 }, { "epoch": 0.77, "grad_norm": 1.8440016508102417, "learning_rate": 2.756408479991618e-06, "loss": 0.9361, "step": 13341 }, { "epoch": 0.77, "grad_norm": 1.767145037651062, "learning_rate": 2.7551279251915265e-06, "loss": 0.9143, "step": 13342 }, { "epoch": 0.77, "grad_norm": 1.7157105207443237, "learning_rate": 2.7538476203917296e-06, "loss": 0.83, "step": 13343 }, { "epoch": 0.77, "grad_norm": 1.839754581451416, "learning_rate": 2.7525675656364136e-06, "loss": 0.8337, "step": 13344 }, { "epoch": 0.77, "grad_norm": 1.8468281030654907, "learning_rate": 2.751287760969743e-06, "loss": 0.8741, "step": 13345 }, { "epoch": 0.77, "grad_norm": 1.713848352432251, "learning_rate": 2.7500082064358855e-06, "loss": 0.8525, "step": 13346 }, { "epoch": 0.77, "grad_norm": 1.7702603340148926, "learning_rate": 2.748728902078991e-06, "loss": 0.9346, "step": 13347 }, { "epoch": 0.77, "grad_norm": 1.6287257671356201, "learning_rate": 2.7474498479432087e-06, "loss": 0.8488, "step": 13348 }, { "epoch": 0.77, "grad_norm": 1.7959941625595093, "learning_rate": 2.7461710440726696e-06, "loss": 0.934, "step": 13349 }, { "epoch": 0.77, "grad_norm": 1.8187071084976196, "learning_rate": 2.7448924905115095e-06, "loss": 0.8862, "step": 13350 }, { "epoch": 0.77, "grad_norm": 1.6120656728744507, "learning_rate": 2.743614187303838e-06, "loss": 0.9344, "step": 13351 }, { "epoch": 0.77, "grad_norm": 1.7234573364257812, "learning_rate": 2.742336134493776e-06, "loss": 0.9042, "step": 13352 }, { "epoch": 0.77, "grad_norm": 1.81466805934906, "learning_rate": 2.741058332125417e-06, "loss": 1.0001, "step": 13353 }, { "epoch": 0.77, "grad_norm": 1.6738122701644897, "learning_rate": 2.739780780242857e-06, "loss": 0.8824, "step": 13354 }, { "epoch": 0.77, "grad_norm": 1.862013578414917, "learning_rate": 2.7385034788901853e-06, "loss": 0.8605, "step": 13355 }, { "epoch": 0.77, "grad_norm": 1.7717914581298828, "learning_rate": 2.737226428111471e-06, "loss": 0.9429, "step": 13356 }, { "epoch": 0.77, "grad_norm": 1.7951974868774414, "learning_rate": 2.735949627950789e-06, "loss": 0.9648, "step": 13357 }, { "epoch": 0.77, "grad_norm": 1.8635969161987305, "learning_rate": 2.73467307845219e-06, "loss": 0.8941, "step": 13358 }, { "epoch": 0.77, "grad_norm": 2.5822746753692627, "learning_rate": 2.7333967796597317e-06, "loss": 0.9383, "step": 13359 }, { "epoch": 0.77, "grad_norm": 0.9676087498664856, "learning_rate": 2.7321207316174493e-06, "loss": 0.5084, "step": 13360 }, { "epoch": 0.77, "grad_norm": 1.760954737663269, "learning_rate": 2.7308449343693812e-06, "loss": 0.9147, "step": 13361 }, { "epoch": 0.77, "grad_norm": 1.6818454265594482, "learning_rate": 2.7295693879595453e-06, "loss": 0.8711, "step": 13362 }, { "epoch": 0.77, "grad_norm": 1.1502350568771362, "learning_rate": 2.7282940924319647e-06, "loss": 0.6337, "step": 13363 }, { "epoch": 0.77, "grad_norm": 1.0412083864212036, "learning_rate": 2.727019047830638e-06, "loss": 0.5221, "step": 13364 }, { "epoch": 0.77, "grad_norm": 1.6387964487075806, "learning_rate": 2.7257442541995692e-06, "loss": 0.8635, "step": 13365 }, { "epoch": 0.77, "grad_norm": 1.7158979177474976, "learning_rate": 2.724469711582748e-06, "loss": 0.9463, "step": 13366 }, { "epoch": 0.77, "grad_norm": 1.6583956480026245, "learning_rate": 2.723195420024152e-06, "loss": 0.9242, "step": 13367 }, { "epoch": 0.77, "grad_norm": 1.728717565536499, "learning_rate": 2.7219213795677567e-06, "loss": 0.9293, "step": 13368 }, { "epoch": 0.77, "grad_norm": 1.6908937692642212, "learning_rate": 2.7206475902575225e-06, "loss": 0.8562, "step": 13369 }, { "epoch": 0.77, "grad_norm": 1.9748352766036987, "learning_rate": 2.719374052137408e-06, "loss": 0.9284, "step": 13370 }, { "epoch": 0.77, "grad_norm": 1.8594862222671509, "learning_rate": 2.718100765251355e-06, "loss": 0.8801, "step": 13371 }, { "epoch": 0.77, "grad_norm": 1.7083646059036255, "learning_rate": 2.7168277296433055e-06, "loss": 0.8948, "step": 13372 }, { "epoch": 0.77, "grad_norm": 1.985163688659668, "learning_rate": 2.715554945357184e-06, "loss": 0.8844, "step": 13373 }, { "epoch": 0.77, "grad_norm": 1.7561290264129639, "learning_rate": 2.714282412436913e-06, "loss": 0.8, "step": 13374 }, { "epoch": 0.77, "grad_norm": 1.74337637424469, "learning_rate": 2.7130101309264035e-06, "loss": 0.9497, "step": 13375 }, { "epoch": 0.77, "grad_norm": 1.882129430770874, "learning_rate": 2.711738100869563e-06, "loss": 0.8478, "step": 13376 }, { "epoch": 0.77, "grad_norm": 1.6719154119491577, "learning_rate": 2.7104663223102776e-06, "loss": 0.8639, "step": 13377 }, { "epoch": 0.77, "grad_norm": 1.651335597038269, "learning_rate": 2.709194795292441e-06, "loss": 0.898, "step": 13378 }, { "epoch": 0.77, "grad_norm": 1.8392298221588135, "learning_rate": 2.707923519859922e-06, "loss": 0.9249, "step": 13379 }, { "epoch": 0.77, "grad_norm": 1.7724997997283936, "learning_rate": 2.7066524960565965e-06, "loss": 0.8261, "step": 13380 }, { "epoch": 0.77, "grad_norm": 2.262718915939331, "learning_rate": 2.7053817239263168e-06, "loss": 0.9593, "step": 13381 }, { "epoch": 0.77, "grad_norm": 1.8630397319793701, "learning_rate": 2.704111203512938e-06, "loss": 0.8823, "step": 13382 }, { "epoch": 0.77, "grad_norm": 1.1210665702819824, "learning_rate": 2.7028409348603037e-06, "loss": 0.5562, "step": 13383 }, { "epoch": 0.77, "grad_norm": 1.8422825336456299, "learning_rate": 2.7015709180122416e-06, "loss": 0.8691, "step": 13384 }, { "epoch": 0.77, "grad_norm": 1.7199374437332153, "learning_rate": 2.7003011530125823e-06, "loss": 0.9395, "step": 13385 }, { "epoch": 0.77, "grad_norm": 1.7889477014541626, "learning_rate": 2.6990316399051373e-06, "loss": 0.917, "step": 13386 }, { "epoch": 0.77, "grad_norm": 1.647608995437622, "learning_rate": 2.6977623787337193e-06, "loss": 0.852, "step": 13387 }, { "epoch": 0.77, "grad_norm": 1.870060682296753, "learning_rate": 2.696493369542119e-06, "loss": 0.8578, "step": 13388 }, { "epoch": 0.77, "grad_norm": 1.7119452953338623, "learning_rate": 2.6952246123741353e-06, "loss": 0.9926, "step": 13389 }, { "epoch": 0.77, "grad_norm": 1.614616870880127, "learning_rate": 2.693956107273542e-06, "loss": 0.8661, "step": 13390 }, { "epoch": 0.77, "grad_norm": 1.674981951713562, "learning_rate": 2.6926878542841184e-06, "loss": 0.8625, "step": 13391 }, { "epoch": 0.77, "grad_norm": 1.8029931783676147, "learning_rate": 2.6914198534496204e-06, "loss": 0.9375, "step": 13392 }, { "epoch": 0.77, "grad_norm": 1.7679470777511597, "learning_rate": 2.6901521048138115e-06, "loss": 0.8508, "step": 13393 }, { "epoch": 0.77, "grad_norm": 2.039330005645752, "learning_rate": 2.688884608420431e-06, "loss": 1.0253, "step": 13394 }, { "epoch": 0.77, "grad_norm": 1.7551875114440918, "learning_rate": 2.68761736431322e-06, "loss": 0.9618, "step": 13395 }, { "epoch": 0.77, "grad_norm": 1.813559889793396, "learning_rate": 2.6863503725359107e-06, "loss": 0.9196, "step": 13396 }, { "epoch": 0.77, "grad_norm": 1.8430988788604736, "learning_rate": 2.685083633132216e-06, "loss": 0.9033, "step": 13397 }, { "epoch": 0.77, "grad_norm": 1.765631914138794, "learning_rate": 2.6838171461458563e-06, "loss": 0.9833, "step": 13398 }, { "epoch": 0.77, "grad_norm": 1.743289589881897, "learning_rate": 2.682550911620526e-06, "loss": 0.8334, "step": 13399 }, { "epoch": 0.77, "grad_norm": 1.6356432437896729, "learning_rate": 2.6812849295999267e-06, "loss": 0.991, "step": 13400 }, { "epoch": 0.77, "grad_norm": 1.7690151929855347, "learning_rate": 2.680019200127737e-06, "loss": 0.9193, "step": 13401 }, { "epoch": 0.77, "grad_norm": 1.7529290914535522, "learning_rate": 2.6787537232476403e-06, "loss": 0.8587, "step": 13402 }, { "epoch": 0.77, "grad_norm": 1.8339829444885254, "learning_rate": 2.677488499003299e-06, "loss": 0.9759, "step": 13403 }, { "epoch": 0.77, "grad_norm": 1.732663631439209, "learning_rate": 2.6762235274383775e-06, "loss": 0.8637, "step": 13404 }, { "epoch": 0.77, "grad_norm": 1.7830681800842285, "learning_rate": 2.6749588085965216e-06, "loss": 0.9172, "step": 13405 }, { "epoch": 0.77, "grad_norm": 1.7096357345581055, "learning_rate": 2.673694342521378e-06, "loss": 0.9225, "step": 13406 }, { "epoch": 0.77, "grad_norm": 1.708238959312439, "learning_rate": 2.6724301292565747e-06, "loss": 0.9508, "step": 13407 }, { "epoch": 0.77, "grad_norm": 1.6304820775985718, "learning_rate": 2.671166168845738e-06, "loss": 0.9219, "step": 13408 }, { "epoch": 0.77, "grad_norm": 1.7312514781951904, "learning_rate": 2.6699024613324888e-06, "loss": 0.9579, "step": 13409 }, { "epoch": 0.77, "grad_norm": 1.1243536472320557, "learning_rate": 2.6686390067604264e-06, "loss": 0.5645, "step": 13410 }, { "epoch": 0.77, "grad_norm": 1.811184287071228, "learning_rate": 2.6673758051731546e-06, "loss": 0.8466, "step": 13411 }, { "epoch": 0.77, "grad_norm": 1.8927572965621948, "learning_rate": 2.6661128566142592e-06, "loss": 0.9854, "step": 13412 }, { "epoch": 0.77, "grad_norm": 1.5789134502410889, "learning_rate": 2.6648501611273248e-06, "loss": 0.8828, "step": 13413 }, { "epoch": 0.77, "grad_norm": 1.8304094076156616, "learning_rate": 2.663587718755919e-06, "loss": 0.9944, "step": 13414 }, { "epoch": 0.77, "grad_norm": 1.7803597450256348, "learning_rate": 2.66232552954361e-06, "loss": 0.9091, "step": 13415 }, { "epoch": 0.77, "grad_norm": 1.892556071281433, "learning_rate": 2.6610635935339477e-06, "loss": 0.9384, "step": 13416 }, { "epoch": 0.77, "grad_norm": 1.6220402717590332, "learning_rate": 2.659801910770483e-06, "loss": 0.8797, "step": 13417 }, { "epoch": 0.77, "grad_norm": 1.7127230167388916, "learning_rate": 2.6585404812967476e-06, "loss": 0.8351, "step": 13418 }, { "epoch": 0.77, "grad_norm": 1.6150742769241333, "learning_rate": 2.6572793051562727e-06, "loss": 0.9387, "step": 13419 }, { "epoch": 0.77, "grad_norm": 1.7272599935531616, "learning_rate": 2.65601838239258e-06, "loss": 0.8457, "step": 13420 }, { "epoch": 0.77, "grad_norm": 1.8239541053771973, "learning_rate": 2.6547577130491764e-06, "loss": 0.9089, "step": 13421 }, { "epoch": 0.77, "grad_norm": 1.6838151216506958, "learning_rate": 2.6534972971695683e-06, "loss": 0.9267, "step": 13422 }, { "epoch": 0.77, "grad_norm": 1.9428657293319702, "learning_rate": 2.6522371347972444e-06, "loss": 0.9237, "step": 13423 }, { "epoch": 0.77, "grad_norm": 1.8919479846954346, "learning_rate": 2.650977225975695e-06, "loss": 0.8336, "step": 13424 }, { "epoch": 0.77, "grad_norm": 1.679810881614685, "learning_rate": 2.649717570748389e-06, "loss": 0.8542, "step": 13425 }, { "epoch": 0.77, "grad_norm": 1.66526460647583, "learning_rate": 2.648458169158801e-06, "loss": 0.942, "step": 13426 }, { "epoch": 0.77, "grad_norm": 1.7164236307144165, "learning_rate": 2.647199021250383e-06, "loss": 0.7926, "step": 13427 }, { "epoch": 0.77, "grad_norm": 1.6227229833602905, "learning_rate": 2.64594012706659e-06, "loss": 0.8714, "step": 13428 }, { "epoch": 0.77, "grad_norm": 1.7704640626907349, "learning_rate": 2.6446814866508587e-06, "loss": 0.9682, "step": 13429 }, { "epoch": 0.77, "grad_norm": 1.7707606554031372, "learning_rate": 2.643423100046625e-06, "loss": 0.9444, "step": 13430 }, { "epoch": 0.77, "grad_norm": 1.769217848777771, "learning_rate": 2.6421649672973072e-06, "loss": 0.8163, "step": 13431 }, { "epoch": 0.77, "grad_norm": 1.6367143392562866, "learning_rate": 2.6409070884463227e-06, "loss": 0.9359, "step": 13432 }, { "epoch": 0.77, "grad_norm": 1.7883613109588623, "learning_rate": 2.6396494635370816e-06, "loss": 0.8401, "step": 13433 }, { "epoch": 0.77, "grad_norm": 1.8270783424377441, "learning_rate": 2.6383920926129746e-06, "loss": 0.8557, "step": 13434 }, { "epoch": 0.77, "grad_norm": 1.7381024360656738, "learning_rate": 2.6371349757173946e-06, "loss": 0.879, "step": 13435 }, { "epoch": 0.77, "grad_norm": 1.7255743741989136, "learning_rate": 2.635878112893717e-06, "loss": 0.8718, "step": 13436 }, { "epoch": 0.77, "grad_norm": 1.6766544580459595, "learning_rate": 2.6346215041853183e-06, "loss": 0.8918, "step": 13437 }, { "epoch": 0.77, "grad_norm": 1.7601232528686523, "learning_rate": 2.6333651496355527e-06, "loss": 0.8911, "step": 13438 }, { "epoch": 0.77, "grad_norm": 1.8920098543167114, "learning_rate": 2.6321090492877823e-06, "loss": 1.0118, "step": 13439 }, { "epoch": 0.77, "grad_norm": 1.7545466423034668, "learning_rate": 2.630853203185341e-06, "loss": 0.9712, "step": 13440 }, { "epoch": 0.77, "grad_norm": 1.7252217531204224, "learning_rate": 2.629597611371576e-06, "loss": 0.9643, "step": 13441 }, { "epoch": 0.77, "grad_norm": 1.9029046297073364, "learning_rate": 2.6283422738898067e-06, "loss": 0.9217, "step": 13442 }, { "epoch": 0.77, "grad_norm": 1.743169903755188, "learning_rate": 2.627087190783356e-06, "loss": 0.8654, "step": 13443 }, { "epoch": 0.77, "grad_norm": 1.7741267681121826, "learning_rate": 2.6258323620955286e-06, "loss": 0.8985, "step": 13444 }, { "epoch": 0.77, "grad_norm": 1.8389676809310913, "learning_rate": 2.62457778786963e-06, "loss": 0.8943, "step": 13445 }, { "epoch": 0.77, "grad_norm": 1.6463677883148193, "learning_rate": 2.6233234681489473e-06, "loss": 0.9001, "step": 13446 }, { "epoch": 0.77, "grad_norm": 1.912057638168335, "learning_rate": 2.622069402976768e-06, "loss": 0.9304, "step": 13447 }, { "epoch": 0.77, "grad_norm": 1.8358478546142578, "learning_rate": 2.620815592396362e-06, "loss": 0.9083, "step": 13448 }, { "epoch": 0.77, "grad_norm": 1.7130416631698608, "learning_rate": 2.6195620364509966e-06, "loss": 0.8868, "step": 13449 }, { "epoch": 0.77, "grad_norm": 1.6826281547546387, "learning_rate": 2.618308735183931e-06, "loss": 0.9089, "step": 13450 }, { "epoch": 0.77, "grad_norm": 1.8808422088623047, "learning_rate": 2.6170556886384092e-06, "loss": 0.9799, "step": 13451 }, { "epoch": 0.77, "grad_norm": 1.7785825729370117, "learning_rate": 2.615802896857674e-06, "loss": 0.9884, "step": 13452 }, { "epoch": 0.77, "grad_norm": 1.8639971017837524, "learning_rate": 2.614550359884952e-06, "loss": 0.9035, "step": 13453 }, { "epoch": 0.77, "grad_norm": 1.1102173328399658, "learning_rate": 2.61329807776347e-06, "loss": 0.5912, "step": 13454 }, { "epoch": 0.77, "grad_norm": 1.6840548515319824, "learning_rate": 2.6120460505364333e-06, "loss": 0.9342, "step": 13455 }, { "epoch": 0.77, "grad_norm": 1.7257391214370728, "learning_rate": 2.610794278247053e-06, "loss": 0.8567, "step": 13456 }, { "epoch": 0.77, "grad_norm": 1.7970428466796875, "learning_rate": 2.609542760938519e-06, "loss": 0.819, "step": 13457 }, { "epoch": 0.77, "grad_norm": 1.7008837461471558, "learning_rate": 2.608291498654023e-06, "loss": 0.8991, "step": 13458 }, { "epoch": 0.77, "grad_norm": 1.8798167705535889, "learning_rate": 2.6070404914367355e-06, "loss": 1.0265, "step": 13459 }, { "epoch": 0.77, "grad_norm": 0.9884977340698242, "learning_rate": 2.6057897393298328e-06, "loss": 0.5522, "step": 13460 }, { "epoch": 0.77, "grad_norm": 1.8128204345703125, "learning_rate": 2.604539242376468e-06, "loss": 1.0019, "step": 13461 }, { "epoch": 0.77, "grad_norm": 1.8131340742111206, "learning_rate": 2.6032890006197965e-06, "loss": 0.8568, "step": 13462 }, { "epoch": 0.77, "grad_norm": 2.576253652572632, "learning_rate": 2.6020390141029616e-06, "loss": 0.9143, "step": 13463 }, { "epoch": 0.77, "grad_norm": 2.562553882598877, "learning_rate": 2.6007892828690927e-06, "loss": 0.9111, "step": 13464 }, { "epoch": 0.77, "grad_norm": 1.8374035358428955, "learning_rate": 2.5995398069613197e-06, "loss": 0.9413, "step": 13465 }, { "epoch": 0.77, "grad_norm": 1.671708345413208, "learning_rate": 2.5982905864227526e-06, "loss": 0.8831, "step": 13466 }, { "epoch": 0.77, "grad_norm": 2.1745402812957764, "learning_rate": 2.5970416212965043e-06, "loss": 0.8832, "step": 13467 }, { "epoch": 0.77, "grad_norm": 1.7788275480270386, "learning_rate": 2.5957929116256677e-06, "loss": 0.9464, "step": 13468 }, { "epoch": 0.77, "grad_norm": 1.9412552118301392, "learning_rate": 2.5945444574533372e-06, "loss": 0.8723, "step": 13469 }, { "epoch": 0.77, "grad_norm": 1.8016103506088257, "learning_rate": 2.5932962588225884e-06, "loss": 0.9362, "step": 13470 }, { "epoch": 0.77, "grad_norm": 1.8947603702545166, "learning_rate": 2.5920483157764988e-06, "loss": 0.8644, "step": 13471 }, { "epoch": 0.77, "grad_norm": 1.8442952632904053, "learning_rate": 2.5908006283581255e-06, "loss": 0.884, "step": 13472 }, { "epoch": 0.77, "grad_norm": 2.0008127689361572, "learning_rate": 2.589553196610527e-06, "loss": 0.9186, "step": 13473 }, { "epoch": 0.77, "grad_norm": 1.8190070390701294, "learning_rate": 2.5883060205767495e-06, "loss": 0.8356, "step": 13474 }, { "epoch": 0.77, "grad_norm": 1.8109745979309082, "learning_rate": 2.5870591002998235e-06, "loss": 0.8993, "step": 13475 }, { "epoch": 0.77, "grad_norm": 1.9837809801101685, "learning_rate": 2.5858124358227856e-06, "loss": 0.9195, "step": 13476 }, { "epoch": 0.77, "grad_norm": 1.7921805381774902, "learning_rate": 2.584566027188645e-06, "loss": 0.9471, "step": 13477 }, { "epoch": 0.77, "grad_norm": 1.0525033473968506, "learning_rate": 2.583319874440421e-06, "loss": 0.5807, "step": 13478 }, { "epoch": 0.77, "grad_norm": 1.6983859539031982, "learning_rate": 2.582073977621107e-06, "loss": 0.8853, "step": 13479 }, { "epoch": 0.77, "grad_norm": 1.6854795217514038, "learning_rate": 2.580828336773702e-06, "loss": 0.9147, "step": 13480 }, { "epoch": 0.77, "grad_norm": 1.8637653589248657, "learning_rate": 2.579582951941184e-06, "loss": 0.8891, "step": 13481 }, { "epoch": 0.77, "grad_norm": 1.9753445386886597, "learning_rate": 2.5783378231665322e-06, "loss": 0.9432, "step": 13482 }, { "epoch": 0.77, "grad_norm": 1.770796537399292, "learning_rate": 2.577092950492708e-06, "loss": 0.8924, "step": 13483 }, { "epoch": 0.77, "grad_norm": 1.9099305868148804, "learning_rate": 2.575848333962674e-06, "loss": 0.8746, "step": 13484 }, { "epoch": 0.77, "grad_norm": 1.8360024690628052, "learning_rate": 2.5746039736193727e-06, "loss": 0.9524, "step": 13485 }, { "epoch": 0.77, "grad_norm": 1.7529373168945312, "learning_rate": 2.573359869505746e-06, "loss": 0.8807, "step": 13486 }, { "epoch": 0.77, "grad_norm": 1.687541127204895, "learning_rate": 2.572116021664728e-06, "loss": 0.8415, "step": 13487 }, { "epoch": 0.77, "grad_norm": 1.944042682647705, "learning_rate": 2.570872430139234e-06, "loss": 0.8743, "step": 13488 }, { "epoch": 0.77, "grad_norm": 1.9257749319076538, "learning_rate": 2.5696290949721823e-06, "loss": 0.913, "step": 13489 }, { "epoch": 0.77, "grad_norm": 1.6089198589324951, "learning_rate": 2.5683860162064723e-06, "loss": 0.8882, "step": 13490 }, { "epoch": 0.77, "grad_norm": 1.8171964883804321, "learning_rate": 2.5671431938850044e-06, "loss": 0.9666, "step": 13491 }, { "epoch": 0.77, "grad_norm": 1.7848421335220337, "learning_rate": 2.5659006280506594e-06, "loss": 0.8899, "step": 13492 }, { "epoch": 0.77, "grad_norm": 1.68595290184021, "learning_rate": 2.5646583187463203e-06, "loss": 0.8754, "step": 13493 }, { "epoch": 0.77, "grad_norm": 1.6042120456695557, "learning_rate": 2.56341626601485e-06, "loss": 0.8252, "step": 13494 }, { "epoch": 0.77, "grad_norm": 1.6256245374679565, "learning_rate": 2.5621744698991134e-06, "loss": 0.9226, "step": 13495 }, { "epoch": 0.77, "grad_norm": 1.9064655303955078, "learning_rate": 2.560932930441956e-06, "loss": 0.9921, "step": 13496 }, { "epoch": 0.77, "grad_norm": 1.5552937984466553, "learning_rate": 2.5596916476862234e-06, "loss": 0.8624, "step": 13497 }, { "epoch": 0.77, "grad_norm": 1.7282001972198486, "learning_rate": 2.5584506216747516e-06, "loss": 0.8774, "step": 13498 }, { "epoch": 0.77, "grad_norm": 1.682398796081543, "learning_rate": 2.5572098524503585e-06, "loss": 0.9536, "step": 13499 }, { "epoch": 0.77, "grad_norm": 1.698731780052185, "learning_rate": 2.555969340055866e-06, "loss": 0.9224, "step": 13500 }, { "epoch": 0.77, "grad_norm": 1.7543385028839111, "learning_rate": 2.5547290845340745e-06, "loss": 0.8983, "step": 13501 }, { "epoch": 0.77, "grad_norm": 1.7062714099884033, "learning_rate": 2.5534890859277873e-06, "loss": 0.8766, "step": 13502 }, { "epoch": 0.77, "grad_norm": 1.8012104034423828, "learning_rate": 2.552249344279788e-06, "loss": 0.8833, "step": 13503 }, { "epoch": 0.77, "grad_norm": 1.7164487838745117, "learning_rate": 2.5510098596328625e-06, "loss": 0.8712, "step": 13504 }, { "epoch": 0.77, "grad_norm": 2.0002520084381104, "learning_rate": 2.5497706320297757e-06, "loss": 0.954, "step": 13505 }, { "epoch": 0.77, "grad_norm": 1.6533023118972778, "learning_rate": 2.548531661513293e-06, "loss": 0.8439, "step": 13506 }, { "epoch": 0.77, "grad_norm": 1.8816951513290405, "learning_rate": 2.5472929481261677e-06, "loss": 0.9019, "step": 13507 }, { "epoch": 0.77, "grad_norm": 1.8175066709518433, "learning_rate": 2.5460544919111473e-06, "loss": 0.8599, "step": 13508 }, { "epoch": 0.77, "grad_norm": 1.6753649711608887, "learning_rate": 2.544816292910962e-06, "loss": 0.9854, "step": 13509 }, { "epoch": 0.77, "grad_norm": 1.8827247619628906, "learning_rate": 2.5435783511683444e-06, "loss": 0.9681, "step": 13510 }, { "epoch": 0.77, "grad_norm": 1.7025583982467651, "learning_rate": 2.5423406667260065e-06, "loss": 0.9546, "step": 13511 }, { "epoch": 0.77, "grad_norm": 1.6776599884033203, "learning_rate": 2.541103239626662e-06, "loss": 0.8516, "step": 13512 }, { "epoch": 0.77, "grad_norm": 1.160560131072998, "learning_rate": 2.539866069913007e-06, "loss": 0.5227, "step": 13513 }, { "epoch": 0.78, "grad_norm": 1.8358908891677856, "learning_rate": 2.5386291576277343e-06, "loss": 0.9137, "step": 13514 }, { "epoch": 0.78, "grad_norm": 1.8254345655441284, "learning_rate": 2.5373925028135304e-06, "loss": 0.9122, "step": 13515 }, { "epoch": 0.78, "grad_norm": 1.8013652563095093, "learning_rate": 2.5361561055130625e-06, "loss": 0.8143, "step": 13516 }, { "epoch": 0.78, "grad_norm": 1.8198809623718262, "learning_rate": 2.5349199657690004e-06, "loss": 0.9426, "step": 13517 }, { "epoch": 0.78, "grad_norm": 1.804755449295044, "learning_rate": 2.533684083623994e-06, "loss": 0.9327, "step": 13518 }, { "epoch": 0.78, "grad_norm": 1.6862956285476685, "learning_rate": 2.5324484591206978e-06, "loss": 0.8778, "step": 13519 }, { "epoch": 0.78, "grad_norm": 1.7597792148590088, "learning_rate": 2.531213092301742e-06, "loss": 0.8879, "step": 13520 }, { "epoch": 0.78, "grad_norm": 1.7173988819122314, "learning_rate": 2.5299779832097616e-06, "loss": 0.8587, "step": 13521 }, { "epoch": 0.78, "grad_norm": 2.000418186187744, "learning_rate": 2.528743131887371e-06, "loss": 0.952, "step": 13522 }, { "epoch": 0.78, "grad_norm": 1.7533934116363525, "learning_rate": 2.527508538377189e-06, "loss": 0.8514, "step": 13523 }, { "epoch": 0.78, "grad_norm": 1.8522275686264038, "learning_rate": 2.5262742027218102e-06, "loss": 0.8705, "step": 13524 }, { "epoch": 0.78, "grad_norm": 1.8937947750091553, "learning_rate": 2.5250401249638344e-06, "loss": 0.8968, "step": 13525 }, { "epoch": 0.78, "grad_norm": 1.0209144353866577, "learning_rate": 2.5238063051458415e-06, "loss": 0.5407, "step": 13526 }, { "epoch": 0.78, "grad_norm": 0.8867063522338867, "learning_rate": 2.5225727433104085e-06, "loss": 0.4912, "step": 13527 }, { "epoch": 0.78, "grad_norm": 1.712629795074463, "learning_rate": 2.5213394395001055e-06, "loss": 0.8222, "step": 13528 }, { "epoch": 0.78, "grad_norm": 1.7257368564605713, "learning_rate": 2.5201063937574842e-06, "loss": 0.8699, "step": 13529 }, { "epoch": 0.78, "grad_norm": 1.822894811630249, "learning_rate": 2.5188736061251016e-06, "loss": 0.8894, "step": 13530 }, { "epoch": 0.78, "grad_norm": 1.7532531023025513, "learning_rate": 2.5176410766454884e-06, "loss": 0.9672, "step": 13531 }, { "epoch": 0.78, "grad_norm": 1.8516680002212524, "learning_rate": 2.5164088053611844e-06, "loss": 0.9776, "step": 13532 }, { "epoch": 0.78, "grad_norm": 1.6414748430252075, "learning_rate": 2.515176792314705e-06, "loss": 0.8894, "step": 13533 }, { "epoch": 0.78, "grad_norm": 1.7627924680709839, "learning_rate": 2.51394503754857e-06, "loss": 0.8332, "step": 13534 }, { "epoch": 0.78, "grad_norm": 1.8156533241271973, "learning_rate": 2.512713541105276e-06, "loss": 0.8449, "step": 13535 }, { "epoch": 0.78, "grad_norm": 1.7378655672073364, "learning_rate": 2.5114823030273273e-06, "loss": 0.8479, "step": 13536 }, { "epoch": 0.78, "grad_norm": 1.8265857696533203, "learning_rate": 2.510251323357201e-06, "loss": 0.9534, "step": 13537 }, { "epoch": 0.78, "grad_norm": 1.8134419918060303, "learning_rate": 2.509020602137384e-06, "loss": 0.8817, "step": 13538 }, { "epoch": 0.78, "grad_norm": 1.701204776763916, "learning_rate": 2.5077901394103386e-06, "loss": 0.8588, "step": 13539 }, { "epoch": 0.78, "grad_norm": 1.8620517253875732, "learning_rate": 2.5065599352185255e-06, "loss": 0.9378, "step": 13540 }, { "epoch": 0.78, "grad_norm": 1.7137367725372314, "learning_rate": 2.5053299896044e-06, "loss": 0.8855, "step": 13541 }, { "epoch": 0.78, "grad_norm": 1.8661954402923584, "learning_rate": 2.5041003026103994e-06, "loss": 1.0024, "step": 13542 }, { "epoch": 0.78, "grad_norm": 1.8329602479934692, "learning_rate": 2.50287087427896e-06, "loss": 0.9197, "step": 13543 }, { "epoch": 0.78, "grad_norm": 2.0175042152404785, "learning_rate": 2.501641704652502e-06, "loss": 0.844, "step": 13544 }, { "epoch": 0.78, "grad_norm": 1.8630995750427246, "learning_rate": 2.5004127937734456e-06, "loss": 0.9515, "step": 13545 }, { "epoch": 0.78, "grad_norm": 1.8460482358932495, "learning_rate": 2.4991841416841922e-06, "loss": 0.9665, "step": 13546 }, { "epoch": 0.78, "grad_norm": 1.9038846492767334, "learning_rate": 2.497955748427143e-06, "loss": 0.9269, "step": 13547 }, { "epoch": 0.78, "grad_norm": 1.6748512983322144, "learning_rate": 2.496727614044683e-06, "loss": 0.9063, "step": 13548 }, { "epoch": 0.78, "grad_norm": 1.8474611043930054, "learning_rate": 2.4954997385791967e-06, "loss": 0.8088, "step": 13549 }, { "epoch": 0.78, "grad_norm": 1.8525071144104004, "learning_rate": 2.494272122073047e-06, "loss": 0.8748, "step": 13550 }, { "epoch": 0.78, "grad_norm": 1.1093207597732544, "learning_rate": 2.4930447645686016e-06, "loss": 0.5935, "step": 13551 }, { "epoch": 0.78, "grad_norm": 1.7060428857803345, "learning_rate": 2.491817666108214e-06, "loss": 0.8993, "step": 13552 }, { "epoch": 0.78, "grad_norm": 1.7593170404434204, "learning_rate": 2.4905908267342216e-06, "loss": 1.0324, "step": 13553 }, { "epoch": 0.78, "grad_norm": 1.8856810331344604, "learning_rate": 2.4893642464889667e-06, "loss": 0.8973, "step": 13554 }, { "epoch": 0.78, "grad_norm": 1.8496859073638916, "learning_rate": 2.4881379254147685e-06, "loss": 0.9747, "step": 13555 }, { "epoch": 0.78, "grad_norm": 1.7298897504806519, "learning_rate": 2.48691186355395e-06, "loss": 0.9879, "step": 13556 }, { "epoch": 0.78, "grad_norm": 1.7991303205490112, "learning_rate": 2.4856860609488133e-06, "loss": 0.8397, "step": 13557 }, { "epoch": 0.78, "grad_norm": 1.9909332990646362, "learning_rate": 2.484460517641664e-06, "loss": 0.9396, "step": 13558 }, { "epoch": 0.78, "grad_norm": 1.7192751169204712, "learning_rate": 2.4832352336747833e-06, "loss": 0.8254, "step": 13559 }, { "epoch": 0.78, "grad_norm": 1.7192391157150269, "learning_rate": 2.482010209090462e-06, "loss": 0.9126, "step": 13560 }, { "epoch": 0.78, "grad_norm": 1.6755099296569824, "learning_rate": 2.480785443930964e-06, "loss": 0.9509, "step": 13561 }, { "epoch": 0.78, "grad_norm": 1.794945240020752, "learning_rate": 2.47956093823856e-06, "loss": 0.9382, "step": 13562 }, { "epoch": 0.78, "grad_norm": 1.9940567016601562, "learning_rate": 2.4783366920554973e-06, "loss": 0.9632, "step": 13563 }, { "epoch": 0.78, "grad_norm": 1.7845189571380615, "learning_rate": 2.477112705424024e-06, "loss": 0.9667, "step": 13564 }, { "epoch": 0.78, "grad_norm": 1.5141937732696533, "learning_rate": 2.4758889783863803e-06, "loss": 0.9175, "step": 13565 }, { "epoch": 0.78, "grad_norm": 1.864034652709961, "learning_rate": 2.4746655109847874e-06, "loss": 0.9436, "step": 13566 }, { "epoch": 0.78, "grad_norm": 1.6060407161712646, "learning_rate": 2.4734423032614695e-06, "loss": 0.8613, "step": 13567 }, { "epoch": 0.78, "grad_norm": 1.8270012140274048, "learning_rate": 2.4722193552586295e-06, "loss": 0.9514, "step": 13568 }, { "epoch": 0.78, "grad_norm": 1.6867666244506836, "learning_rate": 2.4709966670184747e-06, "loss": 0.9665, "step": 13569 }, { "epoch": 0.78, "grad_norm": 1.9030689001083374, "learning_rate": 2.4697742385831915e-06, "loss": 0.8916, "step": 13570 }, { "epoch": 0.78, "grad_norm": 1.793871521949768, "learning_rate": 2.468552069994966e-06, "loss": 0.9407, "step": 13571 }, { "epoch": 0.78, "grad_norm": 1.6520037651062012, "learning_rate": 2.4673301612959653e-06, "loss": 0.8373, "step": 13572 }, { "epoch": 0.78, "grad_norm": 1.8157391548156738, "learning_rate": 2.4661085125283647e-06, "loss": 0.8917, "step": 13573 }, { "epoch": 0.78, "grad_norm": 1.650205135345459, "learning_rate": 2.464887123734312e-06, "loss": 0.9705, "step": 13574 }, { "epoch": 0.78, "grad_norm": 1.680430293083191, "learning_rate": 2.4636659949559583e-06, "loss": 0.8629, "step": 13575 }, { "epoch": 0.78, "grad_norm": 1.6670992374420166, "learning_rate": 2.4624451262354365e-06, "loss": 0.9356, "step": 13576 }, { "epoch": 0.78, "grad_norm": 1.6412323713302612, "learning_rate": 2.461224517614881e-06, "loss": 0.911, "step": 13577 }, { "epoch": 0.78, "grad_norm": 1.6828844547271729, "learning_rate": 2.4600041691364053e-06, "loss": 0.8202, "step": 13578 }, { "epoch": 0.78, "grad_norm": 1.8257712125778198, "learning_rate": 2.458784080842127e-06, "loss": 0.8662, "step": 13579 }, { "epoch": 0.78, "grad_norm": 1.8983169794082642, "learning_rate": 2.457564252774142e-06, "loss": 0.9116, "step": 13580 }, { "epoch": 0.78, "grad_norm": 1.7737958431243896, "learning_rate": 2.4563446849745453e-06, "loss": 0.9236, "step": 13581 }, { "epoch": 0.78, "grad_norm": 1.9499841928482056, "learning_rate": 2.455125377485423e-06, "loss": 0.8549, "step": 13582 }, { "epoch": 0.78, "grad_norm": 1.6789606809616089, "learning_rate": 2.4539063303488474e-06, "loss": 0.8902, "step": 13583 }, { "epoch": 0.78, "grad_norm": 1.7524129152297974, "learning_rate": 2.4526875436068865e-06, "loss": 0.9344, "step": 13584 }, { "epoch": 0.78, "grad_norm": 1.7539042234420776, "learning_rate": 2.4514690173015944e-06, "loss": 0.9338, "step": 13585 }, { "epoch": 0.78, "grad_norm": 1.7185512781143188, "learning_rate": 2.450250751475022e-06, "loss": 0.9036, "step": 13586 }, { "epoch": 0.78, "grad_norm": 1.0133153200149536, "learning_rate": 2.4490327461692043e-06, "loss": 0.5656, "step": 13587 }, { "epoch": 0.78, "grad_norm": 1.9754283428192139, "learning_rate": 2.447815001426177e-06, "loss": 0.9252, "step": 13588 }, { "epoch": 0.78, "grad_norm": 1.76350736618042, "learning_rate": 2.446597517287954e-06, "loss": 0.9511, "step": 13589 }, { "epoch": 0.78, "grad_norm": 1.8235632181167603, "learning_rate": 2.445380293796555e-06, "loss": 0.8567, "step": 13590 }, { "epoch": 0.78, "grad_norm": 1.664875864982605, "learning_rate": 2.4441633309939762e-06, "loss": 0.8897, "step": 13591 }, { "epoch": 0.78, "grad_norm": 1.6128277778625488, "learning_rate": 2.442946628922217e-06, "loss": 0.8974, "step": 13592 }, { "epoch": 0.78, "grad_norm": 1.7497379779815674, "learning_rate": 2.4417301876232568e-06, "loss": 0.936, "step": 13593 }, { "epoch": 0.78, "grad_norm": 1.7339715957641602, "learning_rate": 2.4405140071390755e-06, "loss": 0.8352, "step": 13594 }, { "epoch": 0.78, "grad_norm": 2.0066795349121094, "learning_rate": 2.4392980875116414e-06, "loss": 0.8483, "step": 13595 }, { "epoch": 0.78, "grad_norm": 2.13130784034729, "learning_rate": 2.4380824287829073e-06, "loss": 0.922, "step": 13596 }, { "epoch": 0.78, "grad_norm": 1.8409433364868164, "learning_rate": 2.4368670309948283e-06, "loss": 0.8528, "step": 13597 }, { "epoch": 0.78, "grad_norm": 1.9464130401611328, "learning_rate": 2.435651894189338e-06, "loss": 0.9887, "step": 13598 }, { "epoch": 0.78, "grad_norm": 1.7921556234359741, "learning_rate": 2.4344370184083742e-06, "loss": 0.8776, "step": 13599 }, { "epoch": 0.78, "grad_norm": 1.861660361289978, "learning_rate": 2.4332224036938524e-06, "loss": 0.8166, "step": 13600 }, { "epoch": 0.78, "grad_norm": 1.6708663702011108, "learning_rate": 2.432008050087692e-06, "loss": 1.0309, "step": 13601 }, { "epoch": 0.78, "grad_norm": 1.9906009435653687, "learning_rate": 2.4307939576317897e-06, "loss": 0.8884, "step": 13602 }, { "epoch": 0.78, "grad_norm": 1.7988314628601074, "learning_rate": 2.429580126368046e-06, "loss": 0.9101, "step": 13603 }, { "epoch": 0.78, "grad_norm": 0.9699030518531799, "learning_rate": 2.428366556338344e-06, "loss": 0.5127, "step": 13604 }, { "epoch": 0.78, "grad_norm": 1.9651240110397339, "learning_rate": 2.4271532475845617e-06, "loss": 0.9634, "step": 13605 }, { "epoch": 0.78, "grad_norm": 1.623925805091858, "learning_rate": 2.425940200148569e-06, "loss": 0.8451, "step": 13606 }, { "epoch": 0.78, "grad_norm": 1.8109289407730103, "learning_rate": 2.4247274140722197e-06, "loss": 0.9301, "step": 13607 }, { "epoch": 0.78, "grad_norm": 1.0365108251571655, "learning_rate": 2.4235148893973693e-06, "loss": 0.4759, "step": 13608 }, { "epoch": 0.78, "grad_norm": 1.9081228971481323, "learning_rate": 2.4223026261658546e-06, "loss": 0.9117, "step": 13609 }, { "epoch": 0.78, "grad_norm": 1.8595786094665527, "learning_rate": 2.42109062441951e-06, "loss": 0.8851, "step": 13610 }, { "epoch": 0.78, "grad_norm": 1.882069706916809, "learning_rate": 2.419878884200155e-06, "loss": 0.8586, "step": 13611 }, { "epoch": 0.78, "grad_norm": 1.7658051252365112, "learning_rate": 2.4186674055496084e-06, "loss": 0.9557, "step": 13612 }, { "epoch": 0.78, "grad_norm": 1.9237983226776123, "learning_rate": 2.417456188509669e-06, "loss": 0.9422, "step": 13613 }, { "epoch": 0.78, "grad_norm": 2.093208074569702, "learning_rate": 2.4162452331221387e-06, "loss": 0.9901, "step": 13614 }, { "epoch": 0.78, "grad_norm": 1.6953976154327393, "learning_rate": 2.415034539428798e-06, "loss": 0.9097, "step": 13615 }, { "epoch": 0.78, "grad_norm": 1.6519988775253296, "learning_rate": 2.413824107471431e-06, "loss": 0.8395, "step": 13616 }, { "epoch": 0.78, "grad_norm": 1.7033830881118774, "learning_rate": 2.412613937291799e-06, "loss": 0.8968, "step": 13617 }, { "epoch": 0.78, "grad_norm": 1.73103928565979, "learning_rate": 2.4114040289316665e-06, "loss": 0.9502, "step": 13618 }, { "epoch": 0.78, "grad_norm": 1.8082640171051025, "learning_rate": 2.4101943824327855e-06, "loss": 0.9526, "step": 13619 }, { "epoch": 0.78, "grad_norm": 1.8229402303695679, "learning_rate": 2.4089849978368917e-06, "loss": 0.9362, "step": 13620 }, { "epoch": 0.78, "grad_norm": 1.9007649421691895, "learning_rate": 2.407775875185725e-06, "loss": 0.8726, "step": 13621 }, { "epoch": 0.78, "grad_norm": 1.5575422048568726, "learning_rate": 2.4065670145210006e-06, "loss": 0.9016, "step": 13622 }, { "epoch": 0.78, "grad_norm": 1.653222918510437, "learning_rate": 2.4053584158844412e-06, "loss": 0.8523, "step": 13623 }, { "epoch": 0.78, "grad_norm": 1.9007463455200195, "learning_rate": 2.4041500793177454e-06, "loss": 0.8686, "step": 13624 }, { "epoch": 0.78, "grad_norm": 1.7739113569259644, "learning_rate": 2.402942004862614e-06, "loss": 0.8704, "step": 13625 }, { "epoch": 0.78, "grad_norm": 1.7059307098388672, "learning_rate": 2.4017341925607296e-06, "loss": 0.9077, "step": 13626 }, { "epoch": 0.78, "grad_norm": 1.8686466217041016, "learning_rate": 2.4005266424537767e-06, "loss": 0.9074, "step": 13627 }, { "epoch": 0.78, "grad_norm": 1.6895369291305542, "learning_rate": 2.3993193545834182e-06, "loss": 0.8743, "step": 13628 }, { "epoch": 0.78, "grad_norm": 1.6835358142852783, "learning_rate": 2.3981123289913176e-06, "loss": 0.8506, "step": 13629 }, { "epoch": 0.78, "grad_norm": 1.864768385887146, "learning_rate": 2.3969055657191276e-06, "loss": 0.8837, "step": 13630 }, { "epoch": 0.78, "grad_norm": 1.8432718515396118, "learning_rate": 2.3956990648084855e-06, "loss": 0.888, "step": 13631 }, { "epoch": 0.78, "grad_norm": 1.7115341424942017, "learning_rate": 2.39449282630103e-06, "loss": 0.8813, "step": 13632 }, { "epoch": 0.78, "grad_norm": 1.7658716440200806, "learning_rate": 2.3932868502383788e-06, "loss": 0.8436, "step": 13633 }, { "epoch": 0.78, "grad_norm": 1.7238898277282715, "learning_rate": 2.3920811366621533e-06, "loss": 0.9932, "step": 13634 }, { "epoch": 0.78, "grad_norm": 1.0151317119598389, "learning_rate": 2.3908756856139524e-06, "loss": 0.5165, "step": 13635 }, { "epoch": 0.78, "grad_norm": 1.7716412544250488, "learning_rate": 2.389670497135379e-06, "loss": 0.9194, "step": 13636 }, { "epoch": 0.78, "grad_norm": 1.8116930723190308, "learning_rate": 2.388465571268016e-06, "loss": 0.9041, "step": 13637 }, { "epoch": 0.78, "grad_norm": 1.8976644277572632, "learning_rate": 2.3872609080534436e-06, "loss": 0.9432, "step": 13638 }, { "epoch": 0.78, "grad_norm": 1.69411039352417, "learning_rate": 2.386056507533232e-06, "loss": 0.8547, "step": 13639 }, { "epoch": 0.78, "grad_norm": 1.659548282623291, "learning_rate": 2.384852369748946e-06, "loss": 0.8915, "step": 13640 }, { "epoch": 0.78, "grad_norm": 1.754137396812439, "learning_rate": 2.3836484947421278e-06, "loss": 0.9148, "step": 13641 }, { "epoch": 0.78, "grad_norm": 1.7559254169464111, "learning_rate": 2.382444882554328e-06, "loss": 0.9491, "step": 13642 }, { "epoch": 0.78, "grad_norm": 1.8168963193893433, "learning_rate": 2.3812415332270742e-06, "loss": 0.8787, "step": 13643 }, { "epoch": 0.78, "grad_norm": 1.7588047981262207, "learning_rate": 2.3800384468018954e-06, "loss": 0.9218, "step": 13644 }, { "epoch": 0.78, "grad_norm": 1.8390454053878784, "learning_rate": 2.3788356233203014e-06, "loss": 0.8287, "step": 13645 }, { "epoch": 0.78, "grad_norm": 1.5857840776443481, "learning_rate": 2.377633062823804e-06, "loss": 0.7898, "step": 13646 }, { "epoch": 0.78, "grad_norm": 1.965441107749939, "learning_rate": 2.3764307653538954e-06, "loss": 0.8902, "step": 13647 }, { "epoch": 0.78, "grad_norm": 1.928942084312439, "learning_rate": 2.3752287309520637e-06, "loss": 0.9285, "step": 13648 }, { "epoch": 0.78, "grad_norm": 1.8728222846984863, "learning_rate": 2.3740269596597943e-06, "loss": 0.827, "step": 13649 }, { "epoch": 0.78, "grad_norm": 1.9338548183441162, "learning_rate": 2.372825451518549e-06, "loss": 1.0039, "step": 13650 }, { "epoch": 0.78, "grad_norm": 1.8214329481124878, "learning_rate": 2.3716242065697938e-06, "loss": 0.9199, "step": 13651 }, { "epoch": 0.78, "grad_norm": 1.948134183883667, "learning_rate": 2.3704232248549753e-06, "loss": 0.8203, "step": 13652 }, { "epoch": 0.78, "grad_norm": 1.8129678964614868, "learning_rate": 2.3692225064155427e-06, "loss": 0.9602, "step": 13653 }, { "epoch": 0.78, "grad_norm": 1.8240019083023071, "learning_rate": 2.368022051292922e-06, "loss": 0.8448, "step": 13654 }, { "epoch": 0.78, "grad_norm": 1.8858380317687988, "learning_rate": 2.366821859528544e-06, "loss": 0.9295, "step": 13655 }, { "epoch": 0.78, "grad_norm": 1.7104382514953613, "learning_rate": 2.3656219311638194e-06, "loss": 0.956, "step": 13656 }, { "epoch": 0.78, "grad_norm": 1.8154139518737793, "learning_rate": 2.3644222662401583e-06, "loss": 0.9469, "step": 13657 }, { "epoch": 0.78, "grad_norm": 1.7367316484451294, "learning_rate": 2.363222864798953e-06, "loss": 0.8136, "step": 13658 }, { "epoch": 0.78, "grad_norm": 1.7884933948516846, "learning_rate": 2.362023726881594e-06, "loss": 0.9232, "step": 13659 }, { "epoch": 0.78, "grad_norm": 1.8147273063659668, "learning_rate": 2.360824852529463e-06, "loss": 0.9362, "step": 13660 }, { "epoch": 0.78, "grad_norm": 1.663731336593628, "learning_rate": 2.3596262417839256e-06, "loss": 0.8924, "step": 13661 }, { "epoch": 0.78, "grad_norm": 1.760013461112976, "learning_rate": 2.358427894686346e-06, "loss": 0.8986, "step": 13662 }, { "epoch": 0.78, "grad_norm": 1.8135193586349487, "learning_rate": 2.3572298112780702e-06, "loss": 0.9135, "step": 13663 }, { "epoch": 0.78, "grad_norm": 1.6551241874694824, "learning_rate": 2.356031991600448e-06, "loss": 0.8573, "step": 13664 }, { "epoch": 0.78, "grad_norm": 1.7942925691604614, "learning_rate": 2.3548344356948063e-06, "loss": 0.882, "step": 13665 }, { "epoch": 0.78, "grad_norm": 1.8022816181182861, "learning_rate": 2.353637143602475e-06, "loss": 0.888, "step": 13666 }, { "epoch": 0.78, "grad_norm": 1.7360374927520752, "learning_rate": 2.3524401153647646e-06, "loss": 0.8702, "step": 13667 }, { "epoch": 0.78, "grad_norm": 1.7473527193069458, "learning_rate": 2.3512433510229858e-06, "loss": 0.9101, "step": 13668 }, { "epoch": 0.78, "grad_norm": 1.761274814605713, "learning_rate": 2.350046850618429e-06, "loss": 0.9056, "step": 13669 }, { "epoch": 0.78, "grad_norm": 1.79006028175354, "learning_rate": 2.3488506141923907e-06, "loss": 0.8481, "step": 13670 }, { "epoch": 0.78, "grad_norm": 1.8377692699432373, "learning_rate": 2.347654641786141e-06, "loss": 1.0105, "step": 13671 }, { "epoch": 0.78, "grad_norm": 1.837234377861023, "learning_rate": 2.346458933440954e-06, "loss": 0.8925, "step": 13672 }, { "epoch": 0.78, "grad_norm": 2.252354621887207, "learning_rate": 2.345263489198093e-06, "loss": 1.0055, "step": 13673 }, { "epoch": 0.78, "grad_norm": 1.8179010152816772, "learning_rate": 2.3440683090988024e-06, "loss": 0.8645, "step": 13674 }, { "epoch": 0.78, "grad_norm": 1.6748462915420532, "learning_rate": 2.342873393184333e-06, "loss": 0.864, "step": 13675 }, { "epoch": 0.78, "grad_norm": 1.6932932138442993, "learning_rate": 2.3416787414959097e-06, "loss": 0.8868, "step": 13676 }, { "epoch": 0.78, "grad_norm": 1.9458096027374268, "learning_rate": 2.3404843540747634e-06, "loss": 0.9213, "step": 13677 }, { "epoch": 0.78, "grad_norm": 1.8058936595916748, "learning_rate": 2.3392902309621025e-06, "loss": 0.8676, "step": 13678 }, { "epoch": 0.78, "grad_norm": 1.7884318828582764, "learning_rate": 2.33809637219914e-06, "loss": 0.8709, "step": 13679 }, { "epoch": 0.78, "grad_norm": 1.700257658958435, "learning_rate": 2.3369027778270657e-06, "loss": 0.9089, "step": 13680 }, { "epoch": 0.78, "grad_norm": 1.8179317712783813, "learning_rate": 2.3357094478870747e-06, "loss": 0.8374, "step": 13681 }, { "epoch": 0.78, "grad_norm": 1.7130640745162964, "learning_rate": 2.3345163824203377e-06, "loss": 0.884, "step": 13682 }, { "epoch": 0.78, "grad_norm": 1.7481637001037598, "learning_rate": 2.3333235814680264e-06, "loss": 0.8403, "step": 13683 }, { "epoch": 0.78, "grad_norm": 1.6055896282196045, "learning_rate": 2.3321310450713066e-06, "loss": 0.8416, "step": 13684 }, { "epoch": 0.78, "grad_norm": 1.894039511680603, "learning_rate": 2.330938773271322e-06, "loss": 0.9605, "step": 13685 }, { "epoch": 0.78, "grad_norm": 1.6541424989700317, "learning_rate": 2.329746766109221e-06, "loss": 0.9665, "step": 13686 }, { "epoch": 0.78, "grad_norm": 1.7017853260040283, "learning_rate": 2.328555023626129e-06, "loss": 0.8191, "step": 13687 }, { "epoch": 0.79, "grad_norm": 1.7103816270828247, "learning_rate": 2.327363545863177e-06, "loss": 0.8927, "step": 13688 }, { "epoch": 0.79, "grad_norm": 1.7522919178009033, "learning_rate": 2.3261723328614747e-06, "loss": 0.9455, "step": 13689 }, { "epoch": 0.79, "grad_norm": 1.9626909494400024, "learning_rate": 2.3249813846621307e-06, "loss": 0.9702, "step": 13690 }, { "epoch": 0.79, "grad_norm": 1.9076628684997559, "learning_rate": 2.3237907013062377e-06, "loss": 0.8757, "step": 13691 }, { "epoch": 0.79, "grad_norm": 2.0335936546325684, "learning_rate": 2.322600282834888e-06, "loss": 0.9886, "step": 13692 }, { "epoch": 0.79, "grad_norm": 1.8790507316589355, "learning_rate": 2.3214101292891535e-06, "loss": 0.8645, "step": 13693 }, { "epoch": 0.79, "grad_norm": 1.8087753057479858, "learning_rate": 2.3202202407101084e-06, "loss": 0.8683, "step": 13694 }, { "epoch": 0.79, "grad_norm": 1.6997405290603638, "learning_rate": 2.3190306171388077e-06, "loss": 0.9287, "step": 13695 }, { "epoch": 0.79, "grad_norm": 1.8791561126708984, "learning_rate": 2.3178412586163046e-06, "loss": 0.9458, "step": 13696 }, { "epoch": 0.79, "grad_norm": 1.8820852041244507, "learning_rate": 2.3166521651836437e-06, "loss": 0.9908, "step": 13697 }, { "epoch": 0.79, "grad_norm": 1.7594972848892212, "learning_rate": 2.315463336881851e-06, "loss": 0.9198, "step": 13698 }, { "epoch": 0.79, "grad_norm": 2.0052285194396973, "learning_rate": 2.3142747737519555e-06, "loss": 0.9397, "step": 13699 }, { "epoch": 0.79, "grad_norm": 1.7913938760757446, "learning_rate": 2.3130864758349645e-06, "loss": 0.8404, "step": 13700 }, { "epoch": 0.79, "grad_norm": 1.629380464553833, "learning_rate": 2.3118984431718903e-06, "loss": 0.8831, "step": 13701 }, { "epoch": 0.79, "grad_norm": 1.844380497932434, "learning_rate": 2.3107106758037225e-06, "loss": 0.8203, "step": 13702 }, { "epoch": 0.79, "grad_norm": 1.7498242855072021, "learning_rate": 2.309523173771453e-06, "loss": 0.8491, "step": 13703 }, { "epoch": 0.79, "grad_norm": 1.773834466934204, "learning_rate": 2.3083359371160497e-06, "loss": 0.886, "step": 13704 }, { "epoch": 0.79, "grad_norm": 1.6575134992599487, "learning_rate": 2.307148965878494e-06, "loss": 0.8911, "step": 13705 }, { "epoch": 0.79, "grad_norm": 1.6364333629608154, "learning_rate": 2.3059622600997355e-06, "loss": 0.9379, "step": 13706 }, { "epoch": 0.79, "grad_norm": 2.0666403770446777, "learning_rate": 2.304775819820729e-06, "loss": 0.8677, "step": 13707 }, { "epoch": 0.79, "grad_norm": 1.8420294523239136, "learning_rate": 2.3035896450824115e-06, "loss": 0.8903, "step": 13708 }, { "epoch": 0.79, "grad_norm": 0.9709292054176331, "learning_rate": 2.302403735925718e-06, "loss": 0.479, "step": 13709 }, { "epoch": 0.79, "grad_norm": 1.9285016059875488, "learning_rate": 2.3012180923915673e-06, "loss": 0.8433, "step": 13710 }, { "epoch": 0.79, "grad_norm": 1.8556067943572998, "learning_rate": 2.300032714520877e-06, "loss": 0.9879, "step": 13711 }, { "epoch": 0.79, "grad_norm": 1.8201324939727783, "learning_rate": 2.2988476023545447e-06, "loss": 0.9486, "step": 13712 }, { "epoch": 0.79, "grad_norm": 1.862945795059204, "learning_rate": 2.2976627559334707e-06, "loss": 0.8615, "step": 13713 }, { "epoch": 0.79, "grad_norm": 1.8331682682037354, "learning_rate": 2.296478175298542e-06, "loss": 0.9561, "step": 13714 }, { "epoch": 0.79, "grad_norm": 2.592003583908081, "learning_rate": 2.2952938604906303e-06, "loss": 0.9171, "step": 13715 }, { "epoch": 0.79, "grad_norm": 1.8083757162094116, "learning_rate": 2.2941098115506065e-06, "loss": 0.9086, "step": 13716 }, { "epoch": 0.79, "grad_norm": 1.7005000114440918, "learning_rate": 2.2929260285193266e-06, "loss": 1.0155, "step": 13717 }, { "epoch": 0.79, "grad_norm": 1.9146922826766968, "learning_rate": 2.291742511437642e-06, "loss": 0.9498, "step": 13718 }, { "epoch": 0.79, "grad_norm": 1.6851366758346558, "learning_rate": 2.2905592603463888e-06, "loss": 0.9733, "step": 13719 }, { "epoch": 0.79, "grad_norm": 1.6600128412246704, "learning_rate": 2.2893762752864035e-06, "loss": 0.8193, "step": 13720 }, { "epoch": 0.79, "grad_norm": 1.816359519958496, "learning_rate": 2.2881935562985015e-06, "loss": 0.9327, "step": 13721 }, { "epoch": 0.79, "grad_norm": 1.7869069576263428, "learning_rate": 2.2870111034235e-06, "loss": 0.988, "step": 13722 }, { "epoch": 0.79, "grad_norm": 1.8428736925125122, "learning_rate": 2.2858289167021963e-06, "loss": 0.9121, "step": 13723 }, { "epoch": 0.79, "grad_norm": 1.8796955347061157, "learning_rate": 2.2846469961753916e-06, "loss": 0.8068, "step": 13724 }, { "epoch": 0.79, "grad_norm": 1.8612310886383057, "learning_rate": 2.2834653418838647e-06, "loss": 0.894, "step": 13725 }, { "epoch": 0.79, "grad_norm": 1.6684895753860474, "learning_rate": 2.282283953868393e-06, "loss": 0.8273, "step": 13726 }, { "epoch": 0.79, "grad_norm": 1.7755604982376099, "learning_rate": 2.281102832169747e-06, "loss": 0.9481, "step": 13727 }, { "epoch": 0.79, "grad_norm": 1.819770336151123, "learning_rate": 2.2799219768286774e-06, "loss": 0.8916, "step": 13728 }, { "epoch": 0.79, "grad_norm": 0.9963771104812622, "learning_rate": 2.278741387885938e-06, "loss": 0.5464, "step": 13729 }, { "epoch": 0.79, "grad_norm": 1.715556025505066, "learning_rate": 2.277561065382261e-06, "loss": 0.956, "step": 13730 }, { "epoch": 0.79, "grad_norm": 1.7296901941299438, "learning_rate": 2.276381009358384e-06, "loss": 0.9237, "step": 13731 }, { "epoch": 0.79, "grad_norm": 1.8876839876174927, "learning_rate": 2.27520121985502e-06, "loss": 0.9787, "step": 13732 }, { "epoch": 0.79, "grad_norm": 0.9505195617675781, "learning_rate": 2.274021696912886e-06, "loss": 0.497, "step": 13733 }, { "epoch": 0.79, "grad_norm": 1.7306569814682007, "learning_rate": 2.272842440572679e-06, "loss": 0.921, "step": 13734 }, { "epoch": 0.79, "grad_norm": 1.859546184539795, "learning_rate": 2.271663450875097e-06, "loss": 0.9257, "step": 13735 }, { "epoch": 0.79, "grad_norm": 1.832834005355835, "learning_rate": 2.2704847278608187e-06, "loss": 0.8853, "step": 13736 }, { "epoch": 0.79, "grad_norm": 1.7615388631820679, "learning_rate": 2.2693062715705203e-06, "loss": 0.874, "step": 13737 }, { "epoch": 0.79, "grad_norm": 1.811547040939331, "learning_rate": 2.2681280820448715e-06, "loss": 0.9464, "step": 13738 }, { "epoch": 0.79, "grad_norm": 1.780483603477478, "learning_rate": 2.2669501593245214e-06, "loss": 0.979, "step": 13739 }, { "epoch": 0.79, "grad_norm": 1.7343541383743286, "learning_rate": 2.265772503450122e-06, "loss": 0.881, "step": 13740 }, { "epoch": 0.79, "grad_norm": 1.9140340089797974, "learning_rate": 2.264595114462307e-06, "loss": 0.9782, "step": 13741 }, { "epoch": 0.79, "grad_norm": 1.7881622314453125, "learning_rate": 2.2634179924017086e-06, "loss": 0.7928, "step": 13742 }, { "epoch": 0.79, "grad_norm": 1.8407269716262817, "learning_rate": 2.2622411373089415e-06, "loss": 0.9731, "step": 13743 }, { "epoch": 0.79, "grad_norm": 1.7623176574707031, "learning_rate": 2.2610645492246207e-06, "loss": 0.9707, "step": 13744 }, { "epoch": 0.79, "grad_norm": 1.6502431631088257, "learning_rate": 2.2598882281893417e-06, "loss": 0.7793, "step": 13745 }, { "epoch": 0.79, "grad_norm": 1.78104829788208, "learning_rate": 2.2587121742437024e-06, "loss": 0.8254, "step": 13746 }, { "epoch": 0.79, "grad_norm": 1.828385829925537, "learning_rate": 2.2575363874282784e-06, "loss": 0.9187, "step": 13747 }, { "epoch": 0.79, "grad_norm": 1.6948174238204956, "learning_rate": 2.256360867783648e-06, "loss": 0.966, "step": 13748 }, { "epoch": 0.79, "grad_norm": 1.7490049600601196, "learning_rate": 2.2551856153503714e-06, "loss": 0.8107, "step": 13749 }, { "epoch": 0.79, "grad_norm": 1.704943060874939, "learning_rate": 2.2540106301690044e-06, "loss": 0.9108, "step": 13750 }, { "epoch": 0.79, "grad_norm": 1.8893815279006958, "learning_rate": 2.2528359122800957e-06, "loss": 0.891, "step": 13751 }, { "epoch": 0.79, "grad_norm": 1.7886872291564941, "learning_rate": 2.251661461724176e-06, "loss": 0.9073, "step": 13752 }, { "epoch": 0.79, "grad_norm": 1.7814863920211792, "learning_rate": 2.2504872785417776e-06, "loss": 0.9209, "step": 13753 }, { "epoch": 0.79, "grad_norm": 1.8895375728607178, "learning_rate": 2.249313362773414e-06, "loss": 0.9159, "step": 13754 }, { "epoch": 0.79, "grad_norm": 1.7596838474273682, "learning_rate": 2.2481397144595975e-06, "loss": 0.9126, "step": 13755 }, { "epoch": 0.79, "grad_norm": 1.8434736728668213, "learning_rate": 2.246966333640823e-06, "loss": 0.9373, "step": 13756 }, { "epoch": 0.79, "grad_norm": 1.8783890008926392, "learning_rate": 2.245793220357586e-06, "loss": 0.9616, "step": 13757 }, { "epoch": 0.79, "grad_norm": 1.6653900146484375, "learning_rate": 2.2446203746503626e-06, "loss": 0.9444, "step": 13758 }, { "epoch": 0.79, "grad_norm": 1.6814123392105103, "learning_rate": 2.243447796559628e-06, "loss": 0.9646, "step": 13759 }, { "epoch": 0.79, "grad_norm": 1.7036863565444946, "learning_rate": 2.2422754861258402e-06, "loss": 0.8768, "step": 13760 }, { "epoch": 0.79, "grad_norm": 1.6109156608581543, "learning_rate": 2.241103443389455e-06, "loss": 0.9263, "step": 13761 }, { "epoch": 0.79, "grad_norm": 1.7284893989562988, "learning_rate": 2.239931668390919e-06, "loss": 0.9258, "step": 13762 }, { "epoch": 0.79, "grad_norm": 1.7164733409881592, "learning_rate": 2.238760161170662e-06, "loss": 0.9379, "step": 13763 }, { "epoch": 0.79, "grad_norm": 1.7378226518630981, "learning_rate": 2.237588921769114e-06, "loss": 0.9341, "step": 13764 }, { "epoch": 0.79, "grad_norm": 1.7952555418014526, "learning_rate": 2.236417950226686e-06, "loss": 0.9198, "step": 13765 }, { "epoch": 0.79, "grad_norm": 1.6079233884811401, "learning_rate": 2.2352472465837915e-06, "loss": 0.8893, "step": 13766 }, { "epoch": 0.79, "grad_norm": 1.808095097541809, "learning_rate": 2.234076810880821e-06, "loss": 0.9644, "step": 13767 }, { "epoch": 0.79, "grad_norm": 1.7293614149093628, "learning_rate": 2.2329066431581693e-06, "loss": 0.9086, "step": 13768 }, { "epoch": 0.79, "grad_norm": 2.137747049331665, "learning_rate": 2.23173674345621e-06, "loss": 0.9378, "step": 13769 }, { "epoch": 0.79, "grad_norm": 1.8463839292526245, "learning_rate": 2.230567111815316e-06, "loss": 0.8991, "step": 13770 }, { "epoch": 0.79, "grad_norm": 1.8261408805847168, "learning_rate": 2.229397748275849e-06, "loss": 0.886, "step": 13771 }, { "epoch": 0.79, "grad_norm": 1.6731531620025635, "learning_rate": 2.2282286528781604e-06, "loss": 0.9302, "step": 13772 }, { "epoch": 0.79, "grad_norm": 1.8516348600387573, "learning_rate": 2.22705982566259e-06, "loss": 0.9173, "step": 13773 }, { "epoch": 0.79, "grad_norm": 1.6551576852798462, "learning_rate": 2.225891266669474e-06, "loss": 0.9373, "step": 13774 }, { "epoch": 0.79, "grad_norm": 1.026163935661316, "learning_rate": 2.224722975939133e-06, "loss": 0.6001, "step": 13775 }, { "epoch": 0.79, "grad_norm": 1.5885018110275269, "learning_rate": 2.2235549535118838e-06, "loss": 0.9574, "step": 13776 }, { "epoch": 0.79, "grad_norm": 1.783301830291748, "learning_rate": 2.222387199428029e-06, "loss": 0.8903, "step": 13777 }, { "epoch": 0.79, "grad_norm": 1.7355778217315674, "learning_rate": 2.221219713727868e-06, "loss": 0.9144, "step": 13778 }, { "epoch": 0.79, "grad_norm": 1.7261461019515991, "learning_rate": 2.2200524964516835e-06, "loss": 0.8655, "step": 13779 }, { "epoch": 0.79, "grad_norm": 1.708254337310791, "learning_rate": 2.218885547639754e-06, "loss": 0.8532, "step": 13780 }, { "epoch": 0.79, "grad_norm": 1.8537296056747437, "learning_rate": 2.2177188673323523e-06, "loss": 0.9126, "step": 13781 }, { "epoch": 0.79, "grad_norm": 1.7386155128479004, "learning_rate": 2.2165524555697306e-06, "loss": 0.8627, "step": 13782 }, { "epoch": 0.79, "grad_norm": 1.623005747795105, "learning_rate": 2.2153863123921435e-06, "loss": 0.8558, "step": 13783 }, { "epoch": 0.79, "grad_norm": 1.5860615968704224, "learning_rate": 2.214220437839827e-06, "loss": 0.7943, "step": 13784 }, { "epoch": 0.79, "grad_norm": 1.8126397132873535, "learning_rate": 2.2130548319530177e-06, "loss": 0.8235, "step": 13785 }, { "epoch": 0.79, "grad_norm": 1.6117854118347168, "learning_rate": 2.2118894947719305e-06, "loss": 0.8962, "step": 13786 }, { "epoch": 0.79, "grad_norm": 1.9143046140670776, "learning_rate": 2.2107244263367855e-06, "loss": 0.8604, "step": 13787 }, { "epoch": 0.79, "grad_norm": 1.6688520908355713, "learning_rate": 2.2095596266877783e-06, "loss": 0.9114, "step": 13788 }, { "epoch": 0.79, "grad_norm": 1.7845020294189453, "learning_rate": 2.2083950958651103e-06, "loss": 0.9671, "step": 13789 }, { "epoch": 0.79, "grad_norm": 1.8241589069366455, "learning_rate": 2.2072308339089597e-06, "loss": 0.8848, "step": 13790 }, { "epoch": 0.79, "grad_norm": 1.8034124374389648, "learning_rate": 2.206066840859504e-06, "loss": 0.9238, "step": 13791 }, { "epoch": 0.79, "grad_norm": 1.7303788661956787, "learning_rate": 2.2049031167569134e-06, "loss": 0.9272, "step": 13792 }, { "epoch": 0.79, "grad_norm": 1.6216984987258911, "learning_rate": 2.2037396616413386e-06, "loss": 0.9558, "step": 13793 }, { "epoch": 0.79, "grad_norm": 1.8044848442077637, "learning_rate": 2.202576475552933e-06, "loss": 0.8795, "step": 13794 }, { "epoch": 0.79, "grad_norm": 1.7683902978897095, "learning_rate": 2.2014135585318296e-06, "loss": 0.9747, "step": 13795 }, { "epoch": 0.79, "grad_norm": 1.8716888427734375, "learning_rate": 2.2002509106181625e-06, "loss": 0.9392, "step": 13796 }, { "epoch": 0.79, "grad_norm": 1.8777893781661987, "learning_rate": 2.199088531852046e-06, "loss": 1.0105, "step": 13797 }, { "epoch": 0.79, "grad_norm": 1.8201268911361694, "learning_rate": 2.197926422273595e-06, "loss": 0.8881, "step": 13798 }, { "epoch": 0.79, "grad_norm": 1.8586938381195068, "learning_rate": 2.1967645819229077e-06, "loss": 1.0509, "step": 13799 }, { "epoch": 0.79, "grad_norm": 1.5527417659759521, "learning_rate": 2.1956030108400796e-06, "loss": 0.8054, "step": 13800 }, { "epoch": 0.79, "grad_norm": 1.7363436222076416, "learning_rate": 2.194441709065187e-06, "loss": 0.9149, "step": 13801 }, { "epoch": 0.79, "grad_norm": 1.8199156522750854, "learning_rate": 2.193280676638311e-06, "loss": 0.896, "step": 13802 }, { "epoch": 0.79, "grad_norm": 1.851406455039978, "learning_rate": 2.1921199135995086e-06, "loss": 0.9581, "step": 13803 }, { "epoch": 0.79, "grad_norm": 1.7970741987228394, "learning_rate": 2.1909594199888374e-06, "loss": 0.9468, "step": 13804 }, { "epoch": 0.79, "grad_norm": 1.7619885206222534, "learning_rate": 2.189799195846346e-06, "loss": 0.9835, "step": 13805 }, { "epoch": 0.79, "grad_norm": 1.7285693883895874, "learning_rate": 2.188639241212065e-06, "loss": 0.9125, "step": 13806 }, { "epoch": 0.79, "grad_norm": 1.0640184879302979, "learning_rate": 2.1874795561260256e-06, "loss": 0.516, "step": 13807 }, { "epoch": 0.79, "grad_norm": 1.7181600332260132, "learning_rate": 2.186320140628241e-06, "loss": 0.8774, "step": 13808 }, { "epoch": 0.79, "grad_norm": 1.740336537361145, "learning_rate": 2.185160994758724e-06, "loss": 0.9246, "step": 13809 }, { "epoch": 0.79, "grad_norm": 1.8006306886672974, "learning_rate": 2.184002118557469e-06, "loss": 0.8765, "step": 13810 }, { "epoch": 0.79, "grad_norm": 1.747934341430664, "learning_rate": 2.1828435120644698e-06, "loss": 0.9317, "step": 13811 }, { "epoch": 0.79, "grad_norm": 1.9009658098220825, "learning_rate": 2.1816851753197023e-06, "loss": 0.9235, "step": 13812 }, { "epoch": 0.79, "grad_norm": 1.7725614309310913, "learning_rate": 2.180527108363143e-06, "loss": 0.902, "step": 13813 }, { "epoch": 0.79, "grad_norm": 1.901047945022583, "learning_rate": 2.179369311234747e-06, "loss": 0.8909, "step": 13814 }, { "epoch": 0.79, "grad_norm": 1.7487728595733643, "learning_rate": 2.178211783974471e-06, "loss": 0.8616, "step": 13815 }, { "epoch": 0.79, "grad_norm": 2.6686267852783203, "learning_rate": 2.1770545266222587e-06, "loss": 0.9567, "step": 13816 }, { "epoch": 0.79, "grad_norm": 1.7387621402740479, "learning_rate": 2.1758975392180405e-06, "loss": 0.9245, "step": 13817 }, { "epoch": 0.79, "grad_norm": 1.6347938776016235, "learning_rate": 2.174740821801744e-06, "loss": 0.8989, "step": 13818 }, { "epoch": 0.79, "grad_norm": 1.7126402854919434, "learning_rate": 2.17358437441328e-06, "loss": 0.9098, "step": 13819 }, { "epoch": 0.79, "grad_norm": 0.9313092827796936, "learning_rate": 2.172428197092561e-06, "loss": 0.4543, "step": 13820 }, { "epoch": 0.79, "grad_norm": 1.7997554540634155, "learning_rate": 2.1712722898794756e-06, "loss": 0.8912, "step": 13821 }, { "epoch": 0.79, "grad_norm": 1.8088159561157227, "learning_rate": 2.1701166528139182e-06, "loss": 0.9595, "step": 13822 }, { "epoch": 0.79, "grad_norm": 1.8501399755477905, "learning_rate": 2.16896128593576e-06, "loss": 0.9482, "step": 13823 }, { "epoch": 0.79, "grad_norm": 1.0146775245666504, "learning_rate": 2.167806189284877e-06, "loss": 0.5353, "step": 13824 }, { "epoch": 0.79, "grad_norm": 1.5741924047470093, "learning_rate": 2.166651362901119e-06, "loss": 0.9345, "step": 13825 }, { "epoch": 0.79, "grad_norm": 0.9845336079597473, "learning_rate": 2.1654968068243455e-06, "loss": 0.543, "step": 13826 }, { "epoch": 0.79, "grad_norm": 1.6851930618286133, "learning_rate": 2.164342521094388e-06, "loss": 0.8325, "step": 13827 }, { "epoch": 0.79, "grad_norm": 1.082017421722412, "learning_rate": 2.1631885057510836e-06, "loss": 0.5781, "step": 13828 }, { "epoch": 0.79, "grad_norm": 1.819605827331543, "learning_rate": 2.162034760834254e-06, "loss": 0.9403, "step": 13829 }, { "epoch": 0.79, "grad_norm": 0.9804980754852295, "learning_rate": 2.160881286383708e-06, "loss": 0.5368, "step": 13830 }, { "epoch": 0.79, "grad_norm": 1.7803066968917847, "learning_rate": 2.159728082439255e-06, "loss": 0.8666, "step": 13831 }, { "epoch": 0.79, "grad_norm": 1.7022088766098022, "learning_rate": 2.1585751490406816e-06, "loss": 0.9315, "step": 13832 }, { "epoch": 0.79, "grad_norm": 1.9040809869766235, "learning_rate": 2.157422486227778e-06, "loss": 0.8583, "step": 13833 }, { "epoch": 0.79, "grad_norm": 1.7470202445983887, "learning_rate": 2.1562700940403134e-06, "loss": 0.9411, "step": 13834 }, { "epoch": 0.79, "grad_norm": 1.8454785346984863, "learning_rate": 2.1551179725180613e-06, "loss": 0.9635, "step": 13835 }, { "epoch": 0.79, "grad_norm": 1.8282545804977417, "learning_rate": 2.153966121700769e-06, "loss": 0.9495, "step": 13836 }, { "epoch": 0.79, "grad_norm": 1.636711597442627, "learning_rate": 2.152814541628193e-06, "loss": 0.9873, "step": 13837 }, { "epoch": 0.79, "grad_norm": 1.758360743522644, "learning_rate": 2.1516632323400656e-06, "loss": 0.8658, "step": 13838 }, { "epoch": 0.79, "grad_norm": 1.8387466669082642, "learning_rate": 2.1505121938761184e-06, "loss": 0.8796, "step": 13839 }, { "epoch": 0.79, "grad_norm": 1.75312340259552, "learning_rate": 2.1493614262760664e-06, "loss": 0.97, "step": 13840 }, { "epoch": 0.79, "grad_norm": 1.9700262546539307, "learning_rate": 2.148210929579625e-06, "loss": 0.875, "step": 13841 }, { "epoch": 0.79, "grad_norm": 1.6707663536071777, "learning_rate": 2.1470607038264878e-06, "loss": 0.8742, "step": 13842 }, { "epoch": 0.79, "grad_norm": 1.769095540046692, "learning_rate": 2.1459107490563522e-06, "loss": 0.9435, "step": 13843 }, { "epoch": 0.79, "grad_norm": 1.7184206247329712, "learning_rate": 2.1447610653088946e-06, "loss": 0.9229, "step": 13844 }, { "epoch": 0.79, "grad_norm": 2.0802392959594727, "learning_rate": 2.1436116526237894e-06, "loss": 0.9026, "step": 13845 }, { "epoch": 0.79, "grad_norm": 0.9855695962905884, "learning_rate": 2.1424625110407036e-06, "loss": 0.5231, "step": 13846 }, { "epoch": 0.79, "grad_norm": 1.7364611625671387, "learning_rate": 2.141313640599284e-06, "loss": 0.9147, "step": 13847 }, { "epoch": 0.79, "grad_norm": 1.954888939857483, "learning_rate": 2.1401650413391816e-06, "loss": 0.8847, "step": 13848 }, { "epoch": 0.79, "grad_norm": 1.7651033401489258, "learning_rate": 2.139016713300025e-06, "loss": 0.9216, "step": 13849 }, { "epoch": 0.79, "grad_norm": 1.6987709999084473, "learning_rate": 2.137868656521446e-06, "loss": 0.8798, "step": 13850 }, { "epoch": 0.79, "grad_norm": 2.0484561920166016, "learning_rate": 2.136720871043054e-06, "loss": 0.9179, "step": 13851 }, { "epoch": 0.79, "grad_norm": 1.7574348449707031, "learning_rate": 2.1355733569044633e-06, "loss": 0.8653, "step": 13852 }, { "epoch": 0.79, "grad_norm": 1.9542453289031982, "learning_rate": 2.134426114145265e-06, "loss": 0.9537, "step": 13853 }, { "epoch": 0.79, "grad_norm": 1.7410023212432861, "learning_rate": 2.1332791428050526e-06, "loss": 0.8958, "step": 13854 }, { "epoch": 0.79, "grad_norm": 1.8932121992111206, "learning_rate": 2.132132442923398e-06, "loss": 0.8597, "step": 13855 }, { "epoch": 0.79, "grad_norm": 1.7822086811065674, "learning_rate": 2.1309860145398788e-06, "loss": 0.8556, "step": 13856 }, { "epoch": 0.79, "grad_norm": 1.7748568058013916, "learning_rate": 2.129839857694048e-06, "loss": 0.8818, "step": 13857 }, { "epoch": 0.79, "grad_norm": 1.751509189605713, "learning_rate": 2.1286939724254598e-06, "loss": 0.9335, "step": 13858 }, { "epoch": 0.79, "grad_norm": 1.7816684246063232, "learning_rate": 2.1275483587736577e-06, "loss": 0.9292, "step": 13859 }, { "epoch": 0.79, "grad_norm": 1.6210846900939941, "learning_rate": 2.126403016778168e-06, "loss": 0.8625, "step": 13860 }, { "epoch": 0.79, "grad_norm": 1.6334526538848877, "learning_rate": 2.1252579464785185e-06, "loss": 0.9132, "step": 13861 }, { "epoch": 0.8, "grad_norm": 1.672934651374817, "learning_rate": 2.1241131479142175e-06, "loss": 0.978, "step": 13862 }, { "epoch": 0.8, "grad_norm": 1.6934659481048584, "learning_rate": 2.1229686211247737e-06, "loss": 0.8937, "step": 13863 }, { "epoch": 0.8, "grad_norm": 1.836025357246399, "learning_rate": 2.1218243661496773e-06, "loss": 0.9629, "step": 13864 }, { "epoch": 0.8, "grad_norm": 1.7841970920562744, "learning_rate": 2.120680383028417e-06, "loss": 0.8626, "step": 13865 }, { "epoch": 0.8, "grad_norm": 1.6765799522399902, "learning_rate": 2.119536671800465e-06, "loss": 0.8911, "step": 13866 }, { "epoch": 0.8, "grad_norm": 1.7843064069747925, "learning_rate": 2.1183932325052915e-06, "loss": 0.9016, "step": 13867 }, { "epoch": 0.8, "grad_norm": 1.8544692993164062, "learning_rate": 2.117250065182349e-06, "loss": 0.9317, "step": 13868 }, { "epoch": 0.8, "grad_norm": 1.6293355226516724, "learning_rate": 2.1161071698710866e-06, "loss": 0.9438, "step": 13869 }, { "epoch": 0.8, "grad_norm": 1.718941569328308, "learning_rate": 2.114964546610946e-06, "loss": 0.8709, "step": 13870 }, { "epoch": 0.8, "grad_norm": 1.6374696493148804, "learning_rate": 2.1138221954413496e-06, "loss": 0.9263, "step": 13871 }, { "epoch": 0.8, "grad_norm": 1.8791415691375732, "learning_rate": 2.112680116401723e-06, "loss": 0.9, "step": 13872 }, { "epoch": 0.8, "grad_norm": 1.9028964042663574, "learning_rate": 2.1115383095314712e-06, "loss": 0.9502, "step": 13873 }, { "epoch": 0.8, "grad_norm": 1.6713298559188843, "learning_rate": 2.1103967748699995e-06, "loss": 0.9257, "step": 13874 }, { "epoch": 0.8, "grad_norm": 0.9702057242393494, "learning_rate": 2.1092555124566925e-06, "loss": 0.5332, "step": 13875 }, { "epoch": 0.8, "grad_norm": 1.6842060089111328, "learning_rate": 2.1081145223309397e-06, "loss": 1.0034, "step": 13876 }, { "epoch": 0.8, "grad_norm": 1.8811030387878418, "learning_rate": 2.1069738045321063e-06, "loss": 0.9115, "step": 13877 }, { "epoch": 0.8, "grad_norm": 1.6667803525924683, "learning_rate": 2.1058333590995617e-06, "loss": 0.9203, "step": 13878 }, { "epoch": 0.8, "grad_norm": 1.5625908374786377, "learning_rate": 2.104693186072654e-06, "loss": 0.9246, "step": 13879 }, { "epoch": 0.8, "grad_norm": 1.691623330116272, "learning_rate": 2.1035532854907314e-06, "loss": 0.9152, "step": 13880 }, { "epoch": 0.8, "grad_norm": 1.8293339014053345, "learning_rate": 2.1024136573931252e-06, "loss": 0.9322, "step": 13881 }, { "epoch": 0.8, "grad_norm": 1.6824750900268555, "learning_rate": 2.101274301819163e-06, "loss": 0.9162, "step": 13882 }, { "epoch": 0.8, "grad_norm": 1.0978751182556152, "learning_rate": 2.1001352188081627e-06, "loss": 0.5641, "step": 13883 }, { "epoch": 0.8, "grad_norm": 1.9204086065292358, "learning_rate": 2.0989964083994254e-06, "loss": 1.0214, "step": 13884 }, { "epoch": 0.8, "grad_norm": 1.8783453702926636, "learning_rate": 2.097857870632255e-06, "loss": 0.8456, "step": 13885 }, { "epoch": 0.8, "grad_norm": 2.049328565597534, "learning_rate": 2.0967196055459327e-06, "loss": 0.9211, "step": 13886 }, { "epoch": 0.8, "grad_norm": 1.8419142961502075, "learning_rate": 2.0955816131797425e-06, "loss": 0.9897, "step": 13887 }, { "epoch": 0.8, "grad_norm": 1.9320902824401855, "learning_rate": 2.0944438935729484e-06, "loss": 0.9657, "step": 13888 }, { "epoch": 0.8, "grad_norm": 1.9545713663101196, "learning_rate": 2.0933064467648147e-06, "loss": 0.8988, "step": 13889 }, { "epoch": 0.8, "grad_norm": 1.8225746154785156, "learning_rate": 2.0921692727945863e-06, "loss": 1.001, "step": 13890 }, { "epoch": 0.8, "grad_norm": 1.784244179725647, "learning_rate": 2.091032371701509e-06, "loss": 0.7933, "step": 13891 }, { "epoch": 0.8, "grad_norm": 1.695263385772705, "learning_rate": 2.08989574352481e-06, "loss": 0.8289, "step": 13892 }, { "epoch": 0.8, "grad_norm": 1.7006781101226807, "learning_rate": 2.0887593883037116e-06, "loss": 0.9493, "step": 13893 }, { "epoch": 0.8, "grad_norm": 1.7475663423538208, "learning_rate": 2.087623306077431e-06, "loss": 0.8839, "step": 13894 }, { "epoch": 0.8, "grad_norm": 1.7519831657409668, "learning_rate": 2.0864874968851657e-06, "loss": 0.8756, "step": 13895 }, { "epoch": 0.8, "grad_norm": 1.724940538406372, "learning_rate": 2.085351960766113e-06, "loss": 0.8132, "step": 13896 }, { "epoch": 0.8, "grad_norm": 1.9793107509613037, "learning_rate": 2.0842166977594538e-06, "loss": 0.891, "step": 13897 }, { "epoch": 0.8, "grad_norm": 1.643418312072754, "learning_rate": 2.0830817079043663e-06, "loss": 1.0207, "step": 13898 }, { "epoch": 0.8, "grad_norm": 1.8368695974349976, "learning_rate": 2.0819469912400113e-06, "loss": 0.9198, "step": 13899 }, { "epoch": 0.8, "grad_norm": 1.6786694526672363, "learning_rate": 2.0808125478055507e-06, "loss": 0.9134, "step": 13900 }, { "epoch": 0.8, "grad_norm": 1.7283631563186646, "learning_rate": 2.0796783776401252e-06, "loss": 0.9707, "step": 13901 }, { "epoch": 0.8, "grad_norm": 1.717724084854126, "learning_rate": 2.0785444807828737e-06, "loss": 0.902, "step": 13902 }, { "epoch": 0.8, "grad_norm": 1.774915099143982, "learning_rate": 2.0774108572729256e-06, "loss": 0.8784, "step": 13903 }, { "epoch": 0.8, "grad_norm": 1.7804466485977173, "learning_rate": 2.076277507149399e-06, "loss": 0.89, "step": 13904 }, { "epoch": 0.8, "grad_norm": 1.5639712810516357, "learning_rate": 2.0751444304514002e-06, "loss": 0.9027, "step": 13905 }, { "epoch": 0.8, "grad_norm": 1.874142050743103, "learning_rate": 2.074011627218032e-06, "loss": 0.9437, "step": 13906 }, { "epoch": 0.8, "grad_norm": 1.5796946287155151, "learning_rate": 2.0728790974883793e-06, "loss": 0.8641, "step": 13907 }, { "epoch": 0.8, "grad_norm": 1.9472652673721313, "learning_rate": 2.0717468413015285e-06, "loss": 0.8275, "step": 13908 }, { "epoch": 0.8, "grad_norm": 1.7220498323440552, "learning_rate": 2.0706148586965457e-06, "loss": 0.8823, "step": 13909 }, { "epoch": 0.8, "grad_norm": 1.7351999282836914, "learning_rate": 2.0694831497124958e-06, "loss": 0.8439, "step": 13910 }, { "epoch": 0.8, "grad_norm": 1.6093543767929077, "learning_rate": 2.068351714388427e-06, "loss": 0.8857, "step": 13911 }, { "epoch": 0.8, "grad_norm": 1.8661140203475952, "learning_rate": 2.0672205527633837e-06, "loss": 0.8783, "step": 13912 }, { "epoch": 0.8, "grad_norm": 1.6557588577270508, "learning_rate": 2.066089664876404e-06, "loss": 0.8129, "step": 13913 }, { "epoch": 0.8, "grad_norm": 1.6607632637023926, "learning_rate": 2.064959050766504e-06, "loss": 0.85, "step": 13914 }, { "epoch": 0.8, "grad_norm": 1.7957963943481445, "learning_rate": 2.063828710472704e-06, "loss": 0.9353, "step": 13915 }, { "epoch": 0.8, "grad_norm": 1.7717972993850708, "learning_rate": 2.0626986440340036e-06, "loss": 0.9331, "step": 13916 }, { "epoch": 0.8, "grad_norm": 1.737707495689392, "learning_rate": 2.061568851489404e-06, "loss": 0.8456, "step": 13917 }, { "epoch": 0.8, "grad_norm": 1.75895094871521, "learning_rate": 2.060439332877886e-06, "loss": 0.9747, "step": 13918 }, { "epoch": 0.8, "grad_norm": 1.6888923645019531, "learning_rate": 2.0593100882384297e-06, "loss": 0.9101, "step": 13919 }, { "epoch": 0.8, "grad_norm": 1.7585532665252686, "learning_rate": 2.0581811176099997e-06, "loss": 0.9234, "step": 13920 }, { "epoch": 0.8, "grad_norm": 1.872012734413147, "learning_rate": 2.057052421031557e-06, "loss": 0.9339, "step": 13921 }, { "epoch": 0.8, "grad_norm": 1.7240371704101562, "learning_rate": 2.0559239985420444e-06, "loss": 0.8901, "step": 13922 }, { "epoch": 0.8, "grad_norm": 1.0078750848770142, "learning_rate": 2.0547958501804034e-06, "loss": 0.5132, "step": 13923 }, { "epoch": 0.8, "grad_norm": 1.1504322290420532, "learning_rate": 2.053667975985567e-06, "loss": 0.5625, "step": 13924 }, { "epoch": 0.8, "grad_norm": 1.8820008039474487, "learning_rate": 2.052540375996449e-06, "loss": 0.9026, "step": 13925 }, { "epoch": 0.8, "grad_norm": 1.7437798976898193, "learning_rate": 2.051413050251965e-06, "loss": 0.9219, "step": 13926 }, { "epoch": 0.8, "grad_norm": 1.7602156400680542, "learning_rate": 2.0502859987910097e-06, "loss": 0.9531, "step": 13927 }, { "epoch": 0.8, "grad_norm": 1.6916152238845825, "learning_rate": 2.0491592216524813e-06, "loss": 0.9219, "step": 13928 }, { "epoch": 0.8, "grad_norm": 1.9786920547485352, "learning_rate": 2.048032718875255e-06, "loss": 0.914, "step": 13929 }, { "epoch": 0.8, "grad_norm": 1.8582561016082764, "learning_rate": 2.0469064904982094e-06, "loss": 0.9101, "step": 13930 }, { "epoch": 0.8, "grad_norm": 2.024723529815674, "learning_rate": 2.0457805365602023e-06, "loss": 0.8898, "step": 13931 }, { "epoch": 0.8, "grad_norm": 1.7189216613769531, "learning_rate": 2.0446548571000936e-06, "loss": 0.8826, "step": 13932 }, { "epoch": 0.8, "grad_norm": 1.8184411525726318, "learning_rate": 2.0435294521567194e-06, "loss": 0.8386, "step": 13933 }, { "epoch": 0.8, "grad_norm": 1.6967767477035522, "learning_rate": 2.0424043217689204e-06, "loss": 0.9319, "step": 13934 }, { "epoch": 0.8, "grad_norm": 1.6388914585113525, "learning_rate": 2.0412794659755187e-06, "loss": 1.0292, "step": 13935 }, { "epoch": 0.8, "grad_norm": 1.6549962759017944, "learning_rate": 2.0401548848153296e-06, "loss": 0.9041, "step": 13936 }, { "epoch": 0.8, "grad_norm": 1.5967315435409546, "learning_rate": 2.0390305783271636e-06, "loss": 0.8298, "step": 13937 }, { "epoch": 0.8, "grad_norm": 1.5790693759918213, "learning_rate": 2.0379065465498114e-06, "loss": 0.9771, "step": 13938 }, { "epoch": 0.8, "grad_norm": 1.5990328788757324, "learning_rate": 2.036782789522066e-06, "loss": 0.8833, "step": 13939 }, { "epoch": 0.8, "grad_norm": 1.6504746675491333, "learning_rate": 2.035659307282699e-06, "loss": 0.8712, "step": 13940 }, { "epoch": 0.8, "grad_norm": 1.7570942640304565, "learning_rate": 2.0345360998704843e-06, "loss": 0.9264, "step": 13941 }, { "epoch": 0.8, "grad_norm": 1.0953291654586792, "learning_rate": 2.0334131673241763e-06, "loss": 0.5458, "step": 13942 }, { "epoch": 0.8, "grad_norm": 1.848275899887085, "learning_rate": 2.0322905096825283e-06, "loss": 0.8963, "step": 13943 }, { "epoch": 0.8, "grad_norm": 1.793375015258789, "learning_rate": 2.0311681269842755e-06, "loss": 0.8808, "step": 13944 }, { "epoch": 0.8, "grad_norm": 1.6269214153289795, "learning_rate": 2.030046019268154e-06, "loss": 0.9117, "step": 13945 }, { "epoch": 0.8, "grad_norm": 1.8680565357208252, "learning_rate": 2.028924186572877e-06, "loss": 1.0309, "step": 13946 }, { "epoch": 0.8, "grad_norm": 1.8572474718093872, "learning_rate": 2.027802628937161e-06, "loss": 0.8297, "step": 13947 }, { "epoch": 0.8, "grad_norm": 1.782065510749817, "learning_rate": 2.0266813463997092e-06, "loss": 0.8893, "step": 13948 }, { "epoch": 0.8, "grad_norm": 1.405534029006958, "learning_rate": 2.0255603389992084e-06, "loss": 0.4688, "step": 13949 }, { "epoch": 0.8, "grad_norm": 1.8521493673324585, "learning_rate": 2.024439606774349e-06, "loss": 0.9972, "step": 13950 }, { "epoch": 0.8, "grad_norm": 1.5672870874404907, "learning_rate": 2.023319149763796e-06, "loss": 0.9191, "step": 13951 }, { "epoch": 0.8, "grad_norm": 1.757883071899414, "learning_rate": 2.0221989680062193e-06, "loss": 0.8822, "step": 13952 }, { "epoch": 0.8, "grad_norm": 1.7798980474472046, "learning_rate": 2.02107906154027e-06, "loss": 0.9268, "step": 13953 }, { "epoch": 0.8, "grad_norm": 1.721358299255371, "learning_rate": 2.0199594304045956e-06, "loss": 0.9446, "step": 13954 }, { "epoch": 0.8, "grad_norm": 1.7716293334960938, "learning_rate": 2.0188400746378268e-06, "loss": 0.8869, "step": 13955 }, { "epoch": 0.8, "grad_norm": 1.8402817249298096, "learning_rate": 2.017720994278596e-06, "loss": 1.0162, "step": 13956 }, { "epoch": 0.8, "grad_norm": 1.780182123184204, "learning_rate": 2.0166021893655143e-06, "loss": 0.9375, "step": 13957 }, { "epoch": 0.8, "grad_norm": 1.8040544986724854, "learning_rate": 2.0154836599371917e-06, "loss": 0.8863, "step": 13958 }, { "epoch": 0.8, "grad_norm": 2.145618200302124, "learning_rate": 2.0143654060322214e-06, "loss": 0.928, "step": 13959 }, { "epoch": 0.8, "grad_norm": 1.738170862197876, "learning_rate": 2.0132474276891945e-06, "loss": 0.9122, "step": 13960 }, { "epoch": 0.8, "grad_norm": 1.910254955291748, "learning_rate": 2.012129724946692e-06, "loss": 0.922, "step": 13961 }, { "epoch": 0.8, "grad_norm": 1.7241897583007812, "learning_rate": 2.0110122978432754e-06, "loss": 0.8984, "step": 13962 }, { "epoch": 0.8, "grad_norm": 1.1332030296325684, "learning_rate": 2.009895146417512e-06, "loss": 0.5619, "step": 13963 }, { "epoch": 0.8, "grad_norm": 1.7244709730148315, "learning_rate": 2.008778270707944e-06, "loss": 0.8796, "step": 13964 }, { "epoch": 0.8, "grad_norm": 0.96401047706604, "learning_rate": 2.007661670753118e-06, "loss": 0.5084, "step": 13965 }, { "epoch": 0.8, "grad_norm": 2.0133216381073, "learning_rate": 2.0065453465915608e-06, "loss": 0.956, "step": 13966 }, { "epoch": 0.8, "grad_norm": 1.7604944705963135, "learning_rate": 2.0054292982617964e-06, "loss": 0.8845, "step": 13967 }, { "epoch": 0.8, "grad_norm": 1.7436622381210327, "learning_rate": 2.0043135258023294e-06, "loss": 0.8628, "step": 13968 }, { "epoch": 0.8, "grad_norm": 1.8912303447723389, "learning_rate": 2.003198029251674e-06, "loss": 0.8703, "step": 13969 }, { "epoch": 0.8, "grad_norm": 1.7419434785842896, "learning_rate": 2.0020828086483124e-06, "loss": 0.9019, "step": 13970 }, { "epoch": 0.8, "grad_norm": 1.7680869102478027, "learning_rate": 2.000967864030735e-06, "loss": 0.867, "step": 13971 }, { "epoch": 0.8, "grad_norm": 1.8261722326278687, "learning_rate": 1.99985319543741e-06, "loss": 0.9777, "step": 13972 }, { "epoch": 0.8, "grad_norm": 1.7912230491638184, "learning_rate": 1.9987388029068068e-06, "loss": 0.9246, "step": 13973 }, { "epoch": 0.8, "grad_norm": 1.9042878150939941, "learning_rate": 1.997624686477373e-06, "loss": 0.9507, "step": 13974 }, { "epoch": 0.8, "grad_norm": 1.7716833353042603, "learning_rate": 1.9965108461875602e-06, "loss": 0.8996, "step": 13975 }, { "epoch": 0.8, "grad_norm": 1.7671964168548584, "learning_rate": 1.9953972820757994e-06, "loss": 0.888, "step": 13976 }, { "epoch": 0.8, "grad_norm": 1.7693727016448975, "learning_rate": 1.9942839941805183e-06, "loss": 0.8743, "step": 13977 }, { "epoch": 0.8, "grad_norm": 1.862221360206604, "learning_rate": 1.9931709825401358e-06, "loss": 0.9559, "step": 13978 }, { "epoch": 0.8, "grad_norm": 1.8267016410827637, "learning_rate": 1.992058247193054e-06, "loss": 0.9201, "step": 13979 }, { "epoch": 0.8, "grad_norm": 1.7337477207183838, "learning_rate": 1.990945788177676e-06, "loss": 0.8135, "step": 13980 }, { "epoch": 0.8, "grad_norm": 1.6489856243133545, "learning_rate": 1.989833605532383e-06, "loss": 0.881, "step": 13981 }, { "epoch": 0.8, "grad_norm": 1.5279343128204346, "learning_rate": 1.9887216992955605e-06, "loss": 0.8935, "step": 13982 }, { "epoch": 0.8, "grad_norm": 1.5997909307479858, "learning_rate": 1.98761006950557e-06, "loss": 0.9414, "step": 13983 }, { "epoch": 0.8, "grad_norm": 1.7981884479522705, "learning_rate": 1.9864987162007764e-06, "loss": 0.8647, "step": 13984 }, { "epoch": 0.8, "grad_norm": 1.8458565473556519, "learning_rate": 1.985387639419526e-06, "loss": 0.8627, "step": 13985 }, { "epoch": 0.8, "grad_norm": 1.6984550952911377, "learning_rate": 1.984276839200162e-06, "loss": 0.9655, "step": 13986 }, { "epoch": 0.8, "grad_norm": 1.7024176120758057, "learning_rate": 1.983166315581011e-06, "loss": 0.9092, "step": 13987 }, { "epoch": 0.8, "grad_norm": 1.8675802946090698, "learning_rate": 1.9820560686003985e-06, "loss": 0.9218, "step": 13988 }, { "epoch": 0.8, "grad_norm": 1.82809579372406, "learning_rate": 1.9809460982966323e-06, "loss": 0.9861, "step": 13989 }, { "epoch": 0.8, "grad_norm": 1.7346214056015015, "learning_rate": 1.979836404708014e-06, "loss": 0.9485, "step": 13990 }, { "epoch": 0.8, "grad_norm": 1.8084725141525269, "learning_rate": 1.978726987872842e-06, "loss": 0.8094, "step": 13991 }, { "epoch": 0.8, "grad_norm": 1.7948328256607056, "learning_rate": 1.9776178478293926e-06, "loss": 0.9049, "step": 13992 }, { "epoch": 0.8, "grad_norm": 1.7461326122283936, "learning_rate": 1.9765089846159433e-06, "loss": 0.8277, "step": 13993 }, { "epoch": 0.8, "grad_norm": 1.7405855655670166, "learning_rate": 1.9754003982707546e-06, "loss": 0.9028, "step": 13994 }, { "epoch": 0.8, "grad_norm": 1.9237440824508667, "learning_rate": 1.974292088832085e-06, "loss": 0.9199, "step": 13995 }, { "epoch": 0.8, "grad_norm": 2.2398149967193604, "learning_rate": 1.973184056338173e-06, "loss": 0.9631, "step": 13996 }, { "epoch": 0.8, "grad_norm": 1.7465142011642456, "learning_rate": 1.9720763008272604e-06, "loss": 0.824, "step": 13997 }, { "epoch": 0.8, "grad_norm": 1.7676247358322144, "learning_rate": 1.970968822337567e-06, "loss": 0.9268, "step": 13998 }, { "epoch": 0.8, "grad_norm": 1.8269869089126587, "learning_rate": 1.969861620907314e-06, "loss": 0.8781, "step": 13999 }, { "epoch": 0.8, "grad_norm": 1.7873669862747192, "learning_rate": 1.9687546965747018e-06, "loss": 0.8698, "step": 14000 }, { "epoch": 0.8, "grad_norm": 1.7634211778640747, "learning_rate": 1.9676480493779314e-06, "loss": 0.9295, "step": 14001 }, { "epoch": 0.8, "grad_norm": 1.7665032148361206, "learning_rate": 1.9665416793551917e-06, "loss": 0.9265, "step": 14002 }, { "epoch": 0.8, "grad_norm": 1.9484899044036865, "learning_rate": 1.965435586544656e-06, "loss": 0.9425, "step": 14003 }, { "epoch": 0.8, "grad_norm": 1.7135900259017944, "learning_rate": 1.9643297709844964e-06, "loss": 0.9304, "step": 14004 }, { "epoch": 0.8, "grad_norm": 1.8288850784301758, "learning_rate": 1.963224232712868e-06, "loss": 0.8891, "step": 14005 }, { "epoch": 0.8, "grad_norm": 1.6682873964309692, "learning_rate": 1.9621189717679236e-06, "loss": 0.9036, "step": 14006 }, { "epoch": 0.8, "grad_norm": 1.8813554048538208, "learning_rate": 1.9610139881877977e-06, "loss": 0.9836, "step": 14007 }, { "epoch": 0.8, "grad_norm": 1.85567045211792, "learning_rate": 1.9599092820106257e-06, "loss": 0.8597, "step": 14008 }, { "epoch": 0.8, "grad_norm": 1.7214148044586182, "learning_rate": 1.958804853274523e-06, "loss": 0.8757, "step": 14009 }, { "epoch": 0.8, "grad_norm": 1.8692643642425537, "learning_rate": 1.957700702017604e-06, "loss": 0.9252, "step": 14010 }, { "epoch": 0.8, "grad_norm": 1.819067358970642, "learning_rate": 1.956596828277968e-06, "loss": 0.8756, "step": 14011 }, { "epoch": 0.8, "grad_norm": 1.8559890985488892, "learning_rate": 1.9554932320937083e-06, "loss": 0.9353, "step": 14012 }, { "epoch": 0.8, "grad_norm": 1.7926456928253174, "learning_rate": 1.9543899135029034e-06, "loss": 0.9687, "step": 14013 }, { "epoch": 0.8, "grad_norm": 1.7329295873641968, "learning_rate": 1.953286872543628e-06, "loss": 0.8625, "step": 14014 }, { "epoch": 0.8, "grad_norm": 1.5481125116348267, "learning_rate": 1.9521841092539485e-06, "loss": 0.9414, "step": 14015 }, { "epoch": 0.8, "grad_norm": 1.8968678712844849, "learning_rate": 1.951081623671911e-06, "loss": 0.9576, "step": 14016 }, { "epoch": 0.8, "grad_norm": 1.6864975690841675, "learning_rate": 1.9499794158355658e-06, "loss": 0.9485, "step": 14017 }, { "epoch": 0.8, "grad_norm": 1.7293339967727661, "learning_rate": 1.948877485782942e-06, "loss": 0.9023, "step": 14018 }, { "epoch": 0.8, "grad_norm": 1.652435541152954, "learning_rate": 1.947775833552069e-06, "loss": 0.8574, "step": 14019 }, { "epoch": 0.8, "grad_norm": 1.0346343517303467, "learning_rate": 1.946674459180955e-06, "loss": 0.5031, "step": 14020 }, { "epoch": 0.8, "grad_norm": 1.6752530336380005, "learning_rate": 1.9455733627076136e-06, "loss": 0.9135, "step": 14021 }, { "epoch": 0.8, "grad_norm": 2.02181077003479, "learning_rate": 1.944472544170033e-06, "loss": 0.9037, "step": 14022 }, { "epoch": 0.8, "grad_norm": 1.712759256362915, "learning_rate": 1.9433720036062055e-06, "loss": 0.9107, "step": 14023 }, { "epoch": 0.8, "grad_norm": 1.5891083478927612, "learning_rate": 1.9422717410541016e-06, "loss": 0.8673, "step": 14024 }, { "epoch": 0.8, "grad_norm": 1.7310612201690674, "learning_rate": 1.941171756551695e-06, "loss": 0.8613, "step": 14025 }, { "epoch": 0.8, "grad_norm": 1.8287407159805298, "learning_rate": 1.9400720501369363e-06, "loss": 0.8781, "step": 14026 }, { "epoch": 0.8, "grad_norm": 1.7315726280212402, "learning_rate": 1.938972621847778e-06, "loss": 0.8755, "step": 14027 }, { "epoch": 0.8, "grad_norm": 1.8258755207061768, "learning_rate": 1.937873471722158e-06, "loss": 0.8767, "step": 14028 }, { "epoch": 0.8, "grad_norm": 1.8173223733901978, "learning_rate": 1.9367745997980026e-06, "loss": 0.9064, "step": 14029 }, { "epoch": 0.8, "grad_norm": 1.7682009935379028, "learning_rate": 1.935676006113234e-06, "loss": 0.8903, "step": 14030 }, { "epoch": 0.8, "grad_norm": 1.8015422821044922, "learning_rate": 1.9345776907057566e-06, "loss": 0.9974, "step": 14031 }, { "epoch": 0.8, "grad_norm": 1.5687092542648315, "learning_rate": 1.933479653613476e-06, "loss": 0.8866, "step": 14032 }, { "epoch": 0.8, "grad_norm": 1.937098503112793, "learning_rate": 1.932381894874278e-06, "loss": 0.9031, "step": 14033 }, { "epoch": 0.8, "grad_norm": 1.8051369190216064, "learning_rate": 1.9312844145260435e-06, "loss": 0.9186, "step": 14034 }, { "epoch": 0.8, "grad_norm": 1.686700463294983, "learning_rate": 1.930187212606646e-06, "loss": 0.9152, "step": 14035 }, { "epoch": 0.8, "grad_norm": 1.7500659227371216, "learning_rate": 1.9290902891539475e-06, "loss": 0.8877, "step": 14036 }, { "epoch": 0.81, "grad_norm": 1.8407310247421265, "learning_rate": 1.927993644205796e-06, "loss": 0.9643, "step": 14037 }, { "epoch": 0.81, "grad_norm": 1.7866275310516357, "learning_rate": 1.9268972778000373e-06, "loss": 0.9623, "step": 14038 }, { "epoch": 0.81, "grad_norm": 1.7575585842132568, "learning_rate": 1.9258011899744998e-06, "loss": 0.9314, "step": 14039 }, { "epoch": 0.81, "grad_norm": 1.8576430082321167, "learning_rate": 1.924705380767011e-06, "loss": 0.9341, "step": 14040 }, { "epoch": 0.81, "grad_norm": 1.7025312185287476, "learning_rate": 1.923609850215381e-06, "loss": 0.9, "step": 14041 }, { "epoch": 0.81, "grad_norm": 1.8535430431365967, "learning_rate": 1.9225145983574166e-06, "loss": 0.9393, "step": 14042 }, { "epoch": 0.81, "grad_norm": 1.1030341386795044, "learning_rate": 1.921419625230907e-06, "loss": 0.5638, "step": 14043 }, { "epoch": 0.81, "grad_norm": 1.7576714754104614, "learning_rate": 1.920324930873639e-06, "loss": 0.921, "step": 14044 }, { "epoch": 0.81, "grad_norm": 1.692887544631958, "learning_rate": 1.9192305153233913e-06, "loss": 0.9261, "step": 14045 }, { "epoch": 0.81, "grad_norm": 1.8226488828659058, "learning_rate": 1.918136378617923e-06, "loss": 0.9028, "step": 14046 }, { "epoch": 0.81, "grad_norm": 1.9208482503890991, "learning_rate": 1.917042520794995e-06, "loss": 0.8834, "step": 14047 }, { "epoch": 0.81, "grad_norm": 1.788041114807129, "learning_rate": 1.9159489418923493e-06, "loss": 0.8817, "step": 14048 }, { "epoch": 0.81, "grad_norm": 1.809095025062561, "learning_rate": 1.914855641947725e-06, "loss": 0.8783, "step": 14049 }, { "epoch": 0.81, "grad_norm": 0.9852890372276306, "learning_rate": 1.913762620998846e-06, "loss": 0.5117, "step": 14050 }, { "epoch": 0.81, "grad_norm": 1.8035486936569214, "learning_rate": 1.912669879083432e-06, "loss": 0.8581, "step": 14051 }, { "epoch": 0.81, "grad_norm": 1.917277455329895, "learning_rate": 1.9115774162391876e-06, "loss": 0.876, "step": 14052 }, { "epoch": 0.81, "grad_norm": 1.809247374534607, "learning_rate": 1.910485232503816e-06, "loss": 0.952, "step": 14053 }, { "epoch": 0.81, "grad_norm": 1.9863134622573853, "learning_rate": 1.909393327914998e-06, "loss": 0.9518, "step": 14054 }, { "epoch": 0.81, "grad_norm": 1.6907293796539307, "learning_rate": 1.9083017025104166e-06, "loss": 0.8399, "step": 14055 }, { "epoch": 0.81, "grad_norm": 1.7695196866989136, "learning_rate": 1.9072103563277423e-06, "loss": 0.9217, "step": 14056 }, { "epoch": 0.81, "grad_norm": 1.7960482835769653, "learning_rate": 1.906119289404631e-06, "loss": 0.9491, "step": 14057 }, { "epoch": 0.81, "grad_norm": 1.8133835792541504, "learning_rate": 1.9050285017787351e-06, "loss": 0.9302, "step": 14058 }, { "epoch": 0.81, "grad_norm": 1.8223031759262085, "learning_rate": 1.9039379934876912e-06, "loss": 0.8936, "step": 14059 }, { "epoch": 0.81, "grad_norm": 1.7321873903274536, "learning_rate": 1.9028477645691334e-06, "loss": 0.9289, "step": 14060 }, { "epoch": 0.81, "grad_norm": 1.7210311889648438, "learning_rate": 1.9017578150606786e-06, "loss": 0.9112, "step": 14061 }, { "epoch": 0.81, "grad_norm": 2.041524887084961, "learning_rate": 1.900668144999943e-06, "loss": 0.8705, "step": 14062 }, { "epoch": 0.81, "grad_norm": 2.0023767948150635, "learning_rate": 1.8995787544245225e-06, "loss": 0.9344, "step": 14063 }, { "epoch": 0.81, "grad_norm": 1.8140009641647339, "learning_rate": 1.8984896433720147e-06, "loss": 0.9044, "step": 14064 }, { "epoch": 0.81, "grad_norm": 1.752633810043335, "learning_rate": 1.8974008118799947e-06, "loss": 0.8332, "step": 14065 }, { "epoch": 0.81, "grad_norm": 1.741737723350525, "learning_rate": 1.8963122599860428e-06, "loss": 0.9709, "step": 14066 }, { "epoch": 0.81, "grad_norm": 1.1367217302322388, "learning_rate": 1.8952239877277145e-06, "loss": 0.5821, "step": 14067 }, { "epoch": 0.81, "grad_norm": 1.7521618604660034, "learning_rate": 1.8941359951425675e-06, "loss": 0.9497, "step": 14068 }, { "epoch": 0.81, "grad_norm": 1.8025600910186768, "learning_rate": 1.8930482822681473e-06, "loss": 0.9244, "step": 14069 }, { "epoch": 0.81, "grad_norm": 1.7236820459365845, "learning_rate": 1.8919608491419816e-06, "loss": 0.8974, "step": 14070 }, { "epoch": 0.81, "grad_norm": 1.762858271598816, "learning_rate": 1.8908736958016006e-06, "loss": 0.9244, "step": 14071 }, { "epoch": 0.81, "grad_norm": 1.8352046012878418, "learning_rate": 1.8897868222845139e-06, "loss": 0.9184, "step": 14072 }, { "epoch": 0.81, "grad_norm": 0.9757412075996399, "learning_rate": 1.8887002286282318e-06, "loss": 0.5177, "step": 14073 }, { "epoch": 0.81, "grad_norm": 1.8258934020996094, "learning_rate": 1.8876139148702444e-06, "loss": 0.9007, "step": 14074 }, { "epoch": 0.81, "grad_norm": 1.670674443244934, "learning_rate": 1.8865278810480425e-06, "loss": 0.927, "step": 14075 }, { "epoch": 0.81, "grad_norm": 1.6569859981536865, "learning_rate": 1.8854421271990964e-06, "loss": 0.84, "step": 14076 }, { "epoch": 0.81, "grad_norm": 1.7142783403396606, "learning_rate": 1.884356653360878e-06, "loss": 0.954, "step": 14077 }, { "epoch": 0.81, "grad_norm": 1.6480296850204468, "learning_rate": 1.883271459570839e-06, "loss": 0.8044, "step": 14078 }, { "epoch": 0.81, "grad_norm": 1.731122374534607, "learning_rate": 1.8821865458664291e-06, "loss": 0.928, "step": 14079 }, { "epoch": 0.81, "grad_norm": 1.768945574760437, "learning_rate": 1.8811019122850872e-06, "loss": 0.917, "step": 14080 }, { "epoch": 0.81, "grad_norm": 1.8149460554122925, "learning_rate": 1.8800175588642366e-06, "loss": 0.9792, "step": 14081 }, { "epoch": 0.81, "grad_norm": 2.107393741607666, "learning_rate": 1.8789334856413e-06, "loss": 0.9472, "step": 14082 }, { "epoch": 0.81, "grad_norm": 0.9868326783180237, "learning_rate": 1.8778496926536815e-06, "loss": 0.5967, "step": 14083 }, { "epoch": 0.81, "grad_norm": 1.7033144235610962, "learning_rate": 1.8767661799387848e-06, "loss": 0.8628, "step": 14084 }, { "epoch": 0.81, "grad_norm": 1.6941184997558594, "learning_rate": 1.8756829475339922e-06, "loss": 0.8656, "step": 14085 }, { "epoch": 0.81, "grad_norm": 1.8487378358840942, "learning_rate": 1.8745999954766903e-06, "loss": 1.0283, "step": 14086 }, { "epoch": 0.81, "grad_norm": 1.7418134212493896, "learning_rate": 1.8735173238042415e-06, "loss": 0.8322, "step": 14087 }, { "epoch": 0.81, "grad_norm": 1.5997120141983032, "learning_rate": 1.8724349325540137e-06, "loss": 0.8643, "step": 14088 }, { "epoch": 0.81, "grad_norm": 1.7328933477401733, "learning_rate": 1.8713528217633491e-06, "loss": 0.9392, "step": 14089 }, { "epoch": 0.81, "grad_norm": 1.7457698583602905, "learning_rate": 1.8702709914695949e-06, "loss": 0.8546, "step": 14090 }, { "epoch": 0.81, "grad_norm": 1.752020001411438, "learning_rate": 1.8691894417100764e-06, "loss": 0.8275, "step": 14091 }, { "epoch": 0.81, "grad_norm": 1.8976435661315918, "learning_rate": 1.8681081725221185e-06, "loss": 0.8771, "step": 14092 }, { "epoch": 0.81, "grad_norm": 1.7588292360305786, "learning_rate": 1.8670271839430343e-06, "loss": 0.8791, "step": 14093 }, { "epoch": 0.81, "grad_norm": 1.7215102910995483, "learning_rate": 1.865946476010121e-06, "loss": 0.8491, "step": 14094 }, { "epoch": 0.81, "grad_norm": 1.014331340789795, "learning_rate": 1.8648660487606752e-06, "loss": 0.5498, "step": 14095 }, { "epoch": 0.81, "grad_norm": 1.8594752550125122, "learning_rate": 1.863785902231976e-06, "loss": 0.8232, "step": 14096 }, { "epoch": 0.81, "grad_norm": 1.8511710166931152, "learning_rate": 1.8627060364612993e-06, "loss": 0.8858, "step": 14097 }, { "epoch": 0.81, "grad_norm": 1.0701453685760498, "learning_rate": 1.8616264514859051e-06, "loss": 0.5091, "step": 14098 }, { "epoch": 0.81, "grad_norm": 1.9040848016738892, "learning_rate": 1.8605471473430503e-06, "loss": 0.8619, "step": 14099 }, { "epoch": 0.81, "grad_norm": 1.7075958251953125, "learning_rate": 1.8594681240699708e-06, "loss": 0.8836, "step": 14100 }, { "epoch": 0.81, "grad_norm": 1.697514295578003, "learning_rate": 1.8583893817039134e-06, "loss": 0.8698, "step": 14101 }, { "epoch": 0.81, "grad_norm": 1.950579047203064, "learning_rate": 1.8573109202820927e-06, "loss": 0.9734, "step": 14102 }, { "epoch": 0.81, "grad_norm": 1.675250768661499, "learning_rate": 1.856232739841729e-06, "loss": 0.8716, "step": 14103 }, { "epoch": 0.81, "grad_norm": 1.7621372938156128, "learning_rate": 1.8551548404200215e-06, "loss": 0.9217, "step": 14104 }, { "epoch": 0.81, "grad_norm": 1.8805630207061768, "learning_rate": 1.8540772220541725e-06, "loss": 0.9084, "step": 14105 }, { "epoch": 0.81, "grad_norm": 1.8633058071136475, "learning_rate": 1.8529998847813602e-06, "loss": 0.9215, "step": 14106 }, { "epoch": 0.81, "grad_norm": 1.76390540599823, "learning_rate": 1.8519228286387668e-06, "loss": 0.9464, "step": 14107 }, { "epoch": 0.81, "grad_norm": 1.8566062450408936, "learning_rate": 1.8508460536635542e-06, "loss": 0.9695, "step": 14108 }, { "epoch": 0.81, "grad_norm": 1.6882922649383545, "learning_rate": 1.84976955989288e-06, "loss": 0.8501, "step": 14109 }, { "epoch": 0.81, "grad_norm": 1.6523946523666382, "learning_rate": 1.8486933473638945e-06, "loss": 0.8227, "step": 14110 }, { "epoch": 0.81, "grad_norm": 1.6837847232818604, "learning_rate": 1.8476174161137283e-06, "loss": 0.8144, "step": 14111 }, { "epoch": 0.81, "grad_norm": 1.713645577430725, "learning_rate": 1.846541766179516e-06, "loss": 0.922, "step": 14112 }, { "epoch": 0.81, "grad_norm": 1.6728363037109375, "learning_rate": 1.8454663975983677e-06, "loss": 0.9482, "step": 14113 }, { "epoch": 0.81, "grad_norm": 1.7649805545806885, "learning_rate": 1.8443913104073984e-06, "loss": 0.8473, "step": 14114 }, { "epoch": 0.81, "grad_norm": 1.617882490158081, "learning_rate": 1.8433165046437018e-06, "loss": 0.8963, "step": 14115 }, { "epoch": 0.81, "grad_norm": 1.833274006843567, "learning_rate": 1.8422419803443692e-06, "loss": 0.938, "step": 14116 }, { "epoch": 0.81, "grad_norm": 1.1394277811050415, "learning_rate": 1.8411677375464754e-06, "loss": 0.5762, "step": 14117 }, { "epoch": 0.81, "grad_norm": 1.7469112873077393, "learning_rate": 1.840093776287095e-06, "loss": 0.9194, "step": 14118 }, { "epoch": 0.81, "grad_norm": 0.9957841634750366, "learning_rate": 1.8390200966032822e-06, "loss": 0.5626, "step": 14119 }, { "epoch": 0.81, "grad_norm": 1.7444697618484497, "learning_rate": 1.8379466985320915e-06, "loss": 0.8521, "step": 14120 }, { "epoch": 0.81, "grad_norm": 1.8393453359603882, "learning_rate": 1.8368735821105588e-06, "loss": 0.9045, "step": 14121 }, { "epoch": 0.81, "grad_norm": 1.8354759216308594, "learning_rate": 1.8358007473757145e-06, "loss": 0.9408, "step": 14122 }, { "epoch": 0.81, "grad_norm": 1.677212119102478, "learning_rate": 1.8347281943645846e-06, "loss": 0.8847, "step": 14123 }, { "epoch": 0.81, "grad_norm": 1.7355605363845825, "learning_rate": 1.8336559231141726e-06, "loss": 0.8906, "step": 14124 }, { "epoch": 0.81, "grad_norm": 1.0447041988372803, "learning_rate": 1.8325839336614858e-06, "loss": 0.5264, "step": 14125 }, { "epoch": 0.81, "grad_norm": 1.7561938762664795, "learning_rate": 1.8315122260435092e-06, "loss": 0.8997, "step": 14126 }, { "epoch": 0.81, "grad_norm": 1.7475374937057495, "learning_rate": 1.8304408002972318e-06, "loss": 0.9372, "step": 14127 }, { "epoch": 0.81, "grad_norm": 1.8813424110412598, "learning_rate": 1.8293696564596186e-06, "loss": 0.9757, "step": 14128 }, { "epoch": 0.81, "grad_norm": 1.6131538152694702, "learning_rate": 1.8282987945676368e-06, "loss": 0.876, "step": 14129 }, { "epoch": 0.81, "grad_norm": 1.7284905910491943, "learning_rate": 1.8272282146582354e-06, "loss": 0.8574, "step": 14130 }, { "epoch": 0.81, "grad_norm": 1.8039865493774414, "learning_rate": 1.8261579167683597e-06, "loss": 0.9642, "step": 14131 }, { "epoch": 0.81, "grad_norm": 1.7227176427841187, "learning_rate": 1.8250879009349398e-06, "loss": 0.9131, "step": 14132 }, { "epoch": 0.81, "grad_norm": 1.846163034439087, "learning_rate": 1.824018167194901e-06, "loss": 0.9538, "step": 14133 }, { "epoch": 0.81, "grad_norm": 1.778264045715332, "learning_rate": 1.8229487155851589e-06, "loss": 0.9239, "step": 14134 }, { "epoch": 0.81, "grad_norm": 1.0351076126098633, "learning_rate": 1.821879546142613e-06, "loss": 0.5484, "step": 14135 }, { "epoch": 0.81, "grad_norm": 1.67878258228302, "learning_rate": 1.8208106589041608e-06, "loss": 0.8323, "step": 14136 }, { "epoch": 0.81, "grad_norm": 1.779456377029419, "learning_rate": 1.8197420539066834e-06, "loss": 0.8745, "step": 14137 }, { "epoch": 0.81, "grad_norm": 1.8889758586883545, "learning_rate": 1.8186737311870596e-06, "loss": 0.9385, "step": 14138 }, { "epoch": 0.81, "grad_norm": 1.7490098476409912, "learning_rate": 1.8176056907821482e-06, "loss": 0.8726, "step": 14139 }, { "epoch": 0.81, "grad_norm": 0.9873440265655518, "learning_rate": 1.8165379327288113e-06, "loss": 0.5469, "step": 14140 }, { "epoch": 0.81, "grad_norm": 1.7836222648620605, "learning_rate": 1.8154704570638882e-06, "loss": 0.9273, "step": 14141 }, { "epoch": 0.81, "grad_norm": 1.867698073387146, "learning_rate": 1.8144032638242192e-06, "loss": 0.8756, "step": 14142 }, { "epoch": 0.81, "grad_norm": 2.6066627502441406, "learning_rate": 1.8133363530466253e-06, "loss": 0.9629, "step": 14143 }, { "epoch": 0.81, "grad_norm": 1.7265434265136719, "learning_rate": 1.8122697247679288e-06, "loss": 0.9542, "step": 14144 }, { "epoch": 0.81, "grad_norm": 1.7514406442642212, "learning_rate": 1.8112033790249294e-06, "loss": 0.9315, "step": 14145 }, { "epoch": 0.81, "grad_norm": 1.7310880422592163, "learning_rate": 1.8101373158544267e-06, "loss": 0.8511, "step": 14146 }, { "epoch": 0.81, "grad_norm": 1.8452088832855225, "learning_rate": 1.809071535293211e-06, "loss": 0.8755, "step": 14147 }, { "epoch": 0.81, "grad_norm": 1.7872775793075562, "learning_rate": 1.808006037378053e-06, "loss": 0.8991, "step": 14148 }, { "epoch": 0.81, "grad_norm": 1.7407293319702148, "learning_rate": 1.8069408221457264e-06, "loss": 0.898, "step": 14149 }, { "epoch": 0.81, "grad_norm": 1.7654609680175781, "learning_rate": 1.805875889632982e-06, "loss": 0.8645, "step": 14150 }, { "epoch": 0.81, "grad_norm": 1.651309847831726, "learning_rate": 1.804811239876575e-06, "loss": 0.9322, "step": 14151 }, { "epoch": 0.81, "grad_norm": 1.718447208404541, "learning_rate": 1.8037468729132368e-06, "loss": 0.8514, "step": 14152 }, { "epoch": 0.81, "grad_norm": 1.7021558284759521, "learning_rate": 1.8026827887797016e-06, "loss": 0.9079, "step": 14153 }, { "epoch": 0.81, "grad_norm": 1.7367775440216064, "learning_rate": 1.8016189875126821e-06, "loss": 0.9014, "step": 14154 }, { "epoch": 0.81, "grad_norm": 1.7291998863220215, "learning_rate": 1.8005554691488924e-06, "loss": 0.892, "step": 14155 }, { "epoch": 0.81, "grad_norm": 1.8266780376434326, "learning_rate": 1.7994922337250276e-06, "loss": 1.017, "step": 14156 }, { "epoch": 0.81, "grad_norm": 1.6764843463897705, "learning_rate": 1.7984292812777805e-06, "loss": 0.8096, "step": 14157 }, { "epoch": 0.81, "grad_norm": 1.7339614629745483, "learning_rate": 1.797366611843826e-06, "loss": 0.8607, "step": 14158 }, { "epoch": 0.81, "grad_norm": 1.0795891284942627, "learning_rate": 1.7963042254598362e-06, "loss": 0.5529, "step": 14159 }, { "epoch": 0.81, "grad_norm": 1.7569904327392578, "learning_rate": 1.795242122162475e-06, "loss": 0.9161, "step": 14160 }, { "epoch": 0.81, "grad_norm": 1.1390608549118042, "learning_rate": 1.7941803019883864e-06, "loss": 0.6185, "step": 14161 }, { "epoch": 0.81, "grad_norm": 1.9225541353225708, "learning_rate": 1.7931187649742155e-06, "loss": 0.8756, "step": 14162 }, { "epoch": 0.81, "grad_norm": 1.7774367332458496, "learning_rate": 1.7920575111565896e-06, "loss": 0.8882, "step": 14163 }, { "epoch": 0.81, "grad_norm": 1.7486282587051392, "learning_rate": 1.790996540572133e-06, "loss": 0.8801, "step": 14164 }, { "epoch": 0.81, "grad_norm": 1.7437165975570679, "learning_rate": 1.7899358532574518e-06, "loss": 0.914, "step": 14165 }, { "epoch": 0.81, "grad_norm": 1.7274329662322998, "learning_rate": 1.788875449249151e-06, "loss": 0.898, "step": 14166 }, { "epoch": 0.81, "grad_norm": 1.7874555587768555, "learning_rate": 1.7878153285838206e-06, "loss": 0.8526, "step": 14167 }, { "epoch": 0.81, "grad_norm": 1.7938071489334106, "learning_rate": 1.7867554912980478e-06, "loss": 0.8716, "step": 14168 }, { "epoch": 0.81, "grad_norm": 1.744019627571106, "learning_rate": 1.7856959374283967e-06, "loss": 0.8601, "step": 14169 }, { "epoch": 0.81, "grad_norm": 1.8557969331741333, "learning_rate": 1.7846366670114345e-06, "loss": 0.9045, "step": 14170 }, { "epoch": 0.81, "grad_norm": 1.9055627584457397, "learning_rate": 1.7835776800837113e-06, "loss": 0.9082, "step": 14171 }, { "epoch": 0.81, "grad_norm": 1.5974041223526, "learning_rate": 1.782518976681773e-06, "loss": 0.9352, "step": 14172 }, { "epoch": 0.81, "grad_norm": 1.7303529977798462, "learning_rate": 1.7814605568421473e-06, "loss": 0.8609, "step": 14173 }, { "epoch": 0.81, "grad_norm": 1.9245283603668213, "learning_rate": 1.7804024206013625e-06, "loss": 0.927, "step": 14174 }, { "epoch": 0.81, "grad_norm": 1.6998205184936523, "learning_rate": 1.7793445679959276e-06, "loss": 0.9598, "step": 14175 }, { "epoch": 0.81, "grad_norm": 1.7247503995895386, "learning_rate": 1.7782869990623475e-06, "loss": 0.8842, "step": 14176 }, { "epoch": 0.81, "grad_norm": 1.0022276639938354, "learning_rate": 1.7772297138371197e-06, "loss": 0.4701, "step": 14177 }, { "epoch": 0.81, "grad_norm": 1.750462293624878, "learning_rate": 1.776172712356723e-06, "loss": 0.8759, "step": 14178 }, { "epoch": 0.81, "grad_norm": 2.079314708709717, "learning_rate": 1.7751159946576357e-06, "loss": 0.9231, "step": 14179 }, { "epoch": 0.81, "grad_norm": 1.8474836349487305, "learning_rate": 1.7740595607763177e-06, "loss": 0.902, "step": 14180 }, { "epoch": 0.81, "grad_norm": 1.7702025175094604, "learning_rate": 1.7730034107492278e-06, "loss": 0.8724, "step": 14181 }, { "epoch": 0.81, "grad_norm": 1.7252347469329834, "learning_rate": 1.7719475446128076e-06, "loss": 0.8878, "step": 14182 }, { "epoch": 0.81, "grad_norm": 1.8300334215164185, "learning_rate": 1.770891962403496e-06, "loss": 0.9086, "step": 14183 }, { "epoch": 0.81, "grad_norm": 1.7947404384613037, "learning_rate": 1.7698366641577124e-06, "loss": 0.8064, "step": 14184 }, { "epoch": 0.81, "grad_norm": 1.6846437454223633, "learning_rate": 1.7687816499118781e-06, "loss": 0.8231, "step": 14185 }, { "epoch": 0.81, "grad_norm": 1.7311357259750366, "learning_rate": 1.7677269197023938e-06, "loss": 0.9341, "step": 14186 }, { "epoch": 0.81, "grad_norm": 1.7213209867477417, "learning_rate": 1.7666724735656583e-06, "loss": 0.8893, "step": 14187 }, { "epoch": 0.81, "grad_norm": 1.854055643081665, "learning_rate": 1.7656183115380577e-06, "loss": 0.9033, "step": 14188 }, { "epoch": 0.81, "grad_norm": 1.7185779809951782, "learning_rate": 1.7645644336559665e-06, "loss": 0.8737, "step": 14189 }, { "epoch": 0.81, "grad_norm": 1.839493751525879, "learning_rate": 1.7635108399557532e-06, "loss": 0.9342, "step": 14190 }, { "epoch": 0.81, "grad_norm": 1.0113407373428345, "learning_rate": 1.7624575304737713e-06, "loss": 0.5621, "step": 14191 }, { "epoch": 0.81, "grad_norm": 1.6885428428649902, "learning_rate": 1.7614045052463724e-06, "loss": 0.8431, "step": 14192 }, { "epoch": 0.81, "grad_norm": 1.880582332611084, "learning_rate": 1.7603517643098866e-06, "loss": 1.0317, "step": 14193 }, { "epoch": 0.81, "grad_norm": 3.6084463596343994, "learning_rate": 1.7592993077006482e-06, "loss": 0.9631, "step": 14194 }, { "epoch": 0.81, "grad_norm": 1.681396484375, "learning_rate": 1.758247135454969e-06, "loss": 0.8958, "step": 14195 }, { "epoch": 0.81, "grad_norm": 1.828576922416687, "learning_rate": 1.7571952476091604e-06, "loss": 0.9367, "step": 14196 }, { "epoch": 0.81, "grad_norm": 1.6702998876571655, "learning_rate": 1.756143644199516e-06, "loss": 0.9021, "step": 14197 }, { "epoch": 0.81, "grad_norm": 1.7651079893112183, "learning_rate": 1.7550923252623299e-06, "loss": 0.9085, "step": 14198 }, { "epoch": 0.81, "grad_norm": 1.7151782512664795, "learning_rate": 1.7540412908338723e-06, "loss": 0.9235, "step": 14199 }, { "epoch": 0.81, "grad_norm": 1.7548067569732666, "learning_rate": 1.7529905409504167e-06, "loss": 0.8431, "step": 14200 }, { "epoch": 0.81, "grad_norm": 1.7655396461486816, "learning_rate": 1.751940075648223e-06, "loss": 0.8821, "step": 14201 }, { "epoch": 0.81, "grad_norm": 1.8697059154510498, "learning_rate": 1.7508898949635345e-06, "loss": 0.8703, "step": 14202 }, { "epoch": 0.81, "grad_norm": 1.8848613500595093, "learning_rate": 1.7498399989325943e-06, "loss": 0.9447, "step": 14203 }, { "epoch": 0.81, "grad_norm": 1.8009474277496338, "learning_rate": 1.748790387591629e-06, "loss": 0.8516, "step": 14204 }, { "epoch": 0.81, "grad_norm": 1.5711179971694946, "learning_rate": 1.7477410609768597e-06, "loss": 0.9435, "step": 14205 }, { "epoch": 0.81, "grad_norm": 1.739617109298706, "learning_rate": 1.746692019124493e-06, "loss": 0.9154, "step": 14206 }, { "epoch": 0.81, "grad_norm": 1.8294473886489868, "learning_rate": 1.745643262070732e-06, "loss": 0.9235, "step": 14207 }, { "epoch": 0.81, "grad_norm": 1.6991270780563354, "learning_rate": 1.7445947898517624e-06, "loss": 0.8487, "step": 14208 }, { "epoch": 0.81, "grad_norm": 1.8120815753936768, "learning_rate": 1.7435466025037684e-06, "loss": 0.8244, "step": 14209 }, { "epoch": 0.81, "grad_norm": 1.776281476020813, "learning_rate": 1.7424987000629146e-06, "loss": 0.8771, "step": 14210 }, { "epoch": 0.82, "grad_norm": 1.6794159412384033, "learning_rate": 1.7414510825653674e-06, "loss": 0.915, "step": 14211 }, { "epoch": 0.82, "grad_norm": 1.6037272214889526, "learning_rate": 1.7404037500472714e-06, "loss": 0.8254, "step": 14212 }, { "epoch": 0.82, "grad_norm": 1.8331823348999023, "learning_rate": 1.73935670254477e-06, "loss": 0.9082, "step": 14213 }, { "epoch": 0.82, "grad_norm": 1.7944010496139526, "learning_rate": 1.7383099400939963e-06, "loss": 0.8992, "step": 14214 }, { "epoch": 0.82, "grad_norm": 1.8769935369491577, "learning_rate": 1.7372634627310647e-06, "loss": 0.8835, "step": 14215 }, { "epoch": 0.82, "grad_norm": 1.7185628414154053, "learning_rate": 1.7362172704920933e-06, "loss": 0.921, "step": 14216 }, { "epoch": 0.82, "grad_norm": 0.995455801486969, "learning_rate": 1.7351713634131773e-06, "loss": 0.4698, "step": 14217 }, { "epoch": 0.82, "grad_norm": 1.8464833498001099, "learning_rate": 1.7341257415304137e-06, "loss": 0.8171, "step": 14218 }, { "epoch": 0.82, "grad_norm": 1.6105918884277344, "learning_rate": 1.7330804048798777e-06, "loss": 0.8858, "step": 14219 }, { "epoch": 0.82, "grad_norm": 1.681216835975647, "learning_rate": 1.7320353534976474e-06, "loss": 0.9911, "step": 14220 }, { "epoch": 0.82, "grad_norm": 1.7502812147140503, "learning_rate": 1.7309905874197786e-06, "loss": 0.9489, "step": 14221 }, { "epoch": 0.82, "grad_norm": 1.6263118982315063, "learning_rate": 1.7299461066823286e-06, "loss": 0.9049, "step": 14222 }, { "epoch": 0.82, "grad_norm": 1.7487337589263916, "learning_rate": 1.7289019113213346e-06, "loss": 0.8828, "step": 14223 }, { "epoch": 0.82, "grad_norm": 1.6987881660461426, "learning_rate": 1.7278580013728307e-06, "loss": 0.9132, "step": 14224 }, { "epoch": 0.82, "grad_norm": 1.771954894065857, "learning_rate": 1.7268143768728429e-06, "loss": 0.8872, "step": 14225 }, { "epoch": 0.82, "grad_norm": 1.6657689809799194, "learning_rate": 1.725771037857379e-06, "loss": 0.9551, "step": 14226 }, { "epoch": 0.82, "grad_norm": 1.898425817489624, "learning_rate": 1.7247279843624455e-06, "loss": 0.8439, "step": 14227 }, { "epoch": 0.82, "grad_norm": 1.7319918870925903, "learning_rate": 1.7236852164240292e-06, "loss": 0.9171, "step": 14228 }, { "epoch": 0.82, "grad_norm": 1.794089436531067, "learning_rate": 1.7226427340781215e-06, "loss": 0.8083, "step": 14229 }, { "epoch": 0.82, "grad_norm": 1.8060024976730347, "learning_rate": 1.7216005373606881e-06, "loss": 0.9427, "step": 14230 }, { "epoch": 0.82, "grad_norm": 1.9346424341201782, "learning_rate": 1.7205586263076978e-06, "loss": 0.8922, "step": 14231 }, { "epoch": 0.82, "grad_norm": 1.7874208688735962, "learning_rate": 1.7195170009550966e-06, "loss": 0.9424, "step": 14232 }, { "epoch": 0.82, "grad_norm": 0.9846954345703125, "learning_rate": 1.7184756613388376e-06, "loss": 0.525, "step": 14233 }, { "epoch": 0.82, "grad_norm": 1.7438921928405762, "learning_rate": 1.7174346074948478e-06, "loss": 0.924, "step": 14234 }, { "epoch": 0.82, "grad_norm": 1.921628475189209, "learning_rate": 1.7163938394590563e-06, "loss": 0.9272, "step": 14235 }, { "epoch": 0.82, "grad_norm": 1.915191411972046, "learning_rate": 1.7153533572673708e-06, "loss": 0.9535, "step": 14236 }, { "epoch": 0.82, "grad_norm": 1.7039462327957153, "learning_rate": 1.7143131609557017e-06, "loss": 0.92, "step": 14237 }, { "epoch": 0.82, "grad_norm": 1.993430733680725, "learning_rate": 1.713273250559938e-06, "loss": 0.8601, "step": 14238 }, { "epoch": 0.82, "grad_norm": 1.6365638971328735, "learning_rate": 1.7122336261159689e-06, "loss": 0.935, "step": 14239 }, { "epoch": 0.82, "grad_norm": 1.8261125087738037, "learning_rate": 1.7111942876596633e-06, "loss": 0.9117, "step": 14240 }, { "epoch": 0.82, "grad_norm": 1.9686861038208008, "learning_rate": 1.7101552352268901e-06, "loss": 0.9045, "step": 14241 }, { "epoch": 0.82, "grad_norm": 1.7271994352340698, "learning_rate": 1.7091164688535044e-06, "loss": 0.9066, "step": 14242 }, { "epoch": 0.82, "grad_norm": 1.8059329986572266, "learning_rate": 1.7080779885753473e-06, "loss": 0.8867, "step": 14243 }, { "epoch": 0.82, "grad_norm": 1.871031641960144, "learning_rate": 1.707039794428259e-06, "loss": 0.7928, "step": 14244 }, { "epoch": 0.82, "grad_norm": 1.6465849876403809, "learning_rate": 1.7060018864480598e-06, "loss": 0.9053, "step": 14245 }, { "epoch": 0.82, "grad_norm": 1.7444630861282349, "learning_rate": 1.7049642646705688e-06, "loss": 0.8862, "step": 14246 }, { "epoch": 0.82, "grad_norm": 1.780835747718811, "learning_rate": 1.7039269291315885e-06, "loss": 0.8361, "step": 14247 }, { "epoch": 0.82, "grad_norm": 1.8187954425811768, "learning_rate": 1.702889879866917e-06, "loss": 0.8388, "step": 14248 }, { "epoch": 0.82, "grad_norm": 1.773315668106079, "learning_rate": 1.7018531169123364e-06, "loss": 0.8386, "step": 14249 }, { "epoch": 0.82, "grad_norm": 1.8218978643417358, "learning_rate": 1.7008166403036286e-06, "loss": 0.9175, "step": 14250 }, { "epoch": 0.82, "grad_norm": 1.4901678562164307, "learning_rate": 1.6997804500765513e-06, "loss": 0.7915, "step": 14251 }, { "epoch": 0.82, "grad_norm": 1.6536728143692017, "learning_rate": 1.6987445462668695e-06, "loss": 0.8286, "step": 14252 }, { "epoch": 0.82, "grad_norm": 1.7746291160583496, "learning_rate": 1.697708928910321e-06, "loss": 0.9312, "step": 14253 }, { "epoch": 0.82, "grad_norm": 1.7446489334106445, "learning_rate": 1.6966735980426453e-06, "loss": 0.7996, "step": 14254 }, { "epoch": 0.82, "grad_norm": 1.7436262369155884, "learning_rate": 1.6956385536995735e-06, "loss": 0.8566, "step": 14255 }, { "epoch": 0.82, "grad_norm": 1.8321533203125, "learning_rate": 1.6946037959168138e-06, "loss": 0.861, "step": 14256 }, { "epoch": 0.82, "grad_norm": 1.6946762800216675, "learning_rate": 1.69356932473008e-06, "loss": 0.9216, "step": 14257 }, { "epoch": 0.82, "grad_norm": 1.8879473209381104, "learning_rate": 1.6925351401750634e-06, "loss": 0.8533, "step": 14258 }, { "epoch": 0.82, "grad_norm": 1.8471488952636719, "learning_rate": 1.6915012422874555e-06, "loss": 0.9009, "step": 14259 }, { "epoch": 0.82, "grad_norm": 2.0366759300231934, "learning_rate": 1.6904676311029289e-06, "loss": 0.9227, "step": 14260 }, { "epoch": 0.82, "grad_norm": 1.857603907585144, "learning_rate": 1.689434306657154e-06, "loss": 0.848, "step": 14261 }, { "epoch": 0.82, "grad_norm": 1.6743396520614624, "learning_rate": 1.6884012689857854e-06, "loss": 0.8402, "step": 14262 }, { "epoch": 0.82, "grad_norm": 1.8774621486663818, "learning_rate": 1.6873685181244726e-06, "loss": 0.944, "step": 14263 }, { "epoch": 0.82, "grad_norm": 1.6760950088500977, "learning_rate": 1.6863360541088503e-06, "loss": 0.8466, "step": 14264 }, { "epoch": 0.82, "grad_norm": 1.7994827032089233, "learning_rate": 1.6853038769745466e-06, "loss": 0.9446, "step": 14265 }, { "epoch": 0.82, "grad_norm": 1.735919713973999, "learning_rate": 1.6842719867571832e-06, "loss": 0.8534, "step": 14266 }, { "epoch": 0.82, "grad_norm": 1.7978187799453735, "learning_rate": 1.6832403834923617e-06, "loss": 0.9337, "step": 14267 }, { "epoch": 0.82, "grad_norm": 1.7791756391525269, "learning_rate": 1.6822090672156854e-06, "loss": 0.872, "step": 14268 }, { "epoch": 0.82, "grad_norm": 1.5988049507141113, "learning_rate": 1.6811780379627374e-06, "loss": 0.8608, "step": 14269 }, { "epoch": 0.82, "grad_norm": 1.950165033340454, "learning_rate": 1.6801472957690989e-06, "loss": 0.8776, "step": 14270 }, { "epoch": 0.82, "grad_norm": 1.726991891860962, "learning_rate": 1.6791168406703351e-06, "loss": 0.9663, "step": 14271 }, { "epoch": 0.82, "grad_norm": 1.9134963750839233, "learning_rate": 1.6780866727020074e-06, "loss": 0.9385, "step": 14272 }, { "epoch": 0.82, "grad_norm": 1.8603650331497192, "learning_rate": 1.6770567918996604e-06, "loss": 0.965, "step": 14273 }, { "epoch": 0.82, "grad_norm": 1.9431560039520264, "learning_rate": 1.6760271982988363e-06, "loss": 0.9282, "step": 14274 }, { "epoch": 0.82, "grad_norm": 1.7547402381896973, "learning_rate": 1.6749978919350595e-06, "loss": 0.8584, "step": 14275 }, { "epoch": 0.82, "grad_norm": 1.8730500936508179, "learning_rate": 1.673968872843853e-06, "loss": 0.9098, "step": 14276 }, { "epoch": 0.82, "grad_norm": 1.7788445949554443, "learning_rate": 1.6729401410607205e-06, "loss": 0.8579, "step": 14277 }, { "epoch": 0.82, "grad_norm": 2.0393218994140625, "learning_rate": 1.6719116966211624e-06, "loss": 0.9371, "step": 14278 }, { "epoch": 0.82, "grad_norm": 1.7952711582183838, "learning_rate": 1.6708835395606704e-06, "loss": 0.9065, "step": 14279 }, { "epoch": 0.82, "grad_norm": 1.6670825481414795, "learning_rate": 1.6698556699147195e-06, "loss": 0.9141, "step": 14280 }, { "epoch": 0.82, "grad_norm": 1.9021333456039429, "learning_rate": 1.6688280877187824e-06, "loss": 0.9704, "step": 14281 }, { "epoch": 0.82, "grad_norm": 1.9523084163665771, "learning_rate": 1.667800793008313e-06, "loss": 0.9238, "step": 14282 }, { "epoch": 0.82, "grad_norm": 1.7435333728790283, "learning_rate": 1.6667737858187649e-06, "loss": 0.9254, "step": 14283 }, { "epoch": 0.82, "grad_norm": 1.7668544054031372, "learning_rate": 1.6657470661855746e-06, "loss": 0.8475, "step": 14284 }, { "epoch": 0.82, "grad_norm": 1.7622661590576172, "learning_rate": 1.6647206341441735e-06, "loss": 0.9824, "step": 14285 }, { "epoch": 0.82, "grad_norm": 1.7271367311477661, "learning_rate": 1.6636944897299777e-06, "loss": 0.9462, "step": 14286 }, { "epoch": 0.82, "grad_norm": 1.7134380340576172, "learning_rate": 1.6626686329784003e-06, "loss": 0.8616, "step": 14287 }, { "epoch": 0.82, "grad_norm": 1.1179444789886475, "learning_rate": 1.661643063924837e-06, "loss": 0.5259, "step": 14288 }, { "epoch": 0.82, "grad_norm": 1.0577850341796875, "learning_rate": 1.6606177826046822e-06, "loss": 0.5911, "step": 14289 }, { "epoch": 0.82, "grad_norm": 1.6966627836227417, "learning_rate": 1.6595927890533103e-06, "loss": 0.9122, "step": 14290 }, { "epoch": 0.82, "grad_norm": 1.8448669910430908, "learning_rate": 1.6585680833060923e-06, "loss": 0.9101, "step": 14291 }, { "epoch": 0.82, "grad_norm": 1.6884130239486694, "learning_rate": 1.6575436653983923e-06, "loss": 1.0163, "step": 14292 }, { "epoch": 0.82, "grad_norm": 1.8530460596084595, "learning_rate": 1.656519535365554e-06, "loss": 0.8685, "step": 14293 }, { "epoch": 0.82, "grad_norm": 1.7355821132659912, "learning_rate": 1.6554956932429223e-06, "loss": 0.8415, "step": 14294 }, { "epoch": 0.82, "grad_norm": 1.6747983694076538, "learning_rate": 1.6544721390658213e-06, "loss": 0.8036, "step": 14295 }, { "epoch": 0.82, "grad_norm": 1.7244642972946167, "learning_rate": 1.6534488728695786e-06, "loss": 0.8406, "step": 14296 }, { "epoch": 0.82, "grad_norm": 1.7265794277191162, "learning_rate": 1.6524258946894966e-06, "loss": 0.8608, "step": 14297 }, { "epoch": 0.82, "grad_norm": 1.7757856845855713, "learning_rate": 1.6514032045608819e-06, "loss": 1.0044, "step": 14298 }, { "epoch": 0.82, "grad_norm": 1.68044114112854, "learning_rate": 1.650380802519017e-06, "loss": 0.8888, "step": 14299 }, { "epoch": 0.82, "grad_norm": 1.7359893321990967, "learning_rate": 1.6493586885991908e-06, "loss": 0.976, "step": 14300 }, { "epoch": 0.82, "grad_norm": 1.6907399892807007, "learning_rate": 1.648336862836668e-06, "loss": 0.9145, "step": 14301 }, { "epoch": 0.82, "grad_norm": 1.8975483179092407, "learning_rate": 1.647315325266714e-06, "loss": 0.8715, "step": 14302 }, { "epoch": 0.82, "grad_norm": 1.63186514377594, "learning_rate": 1.6462940759245716e-06, "loss": 0.8925, "step": 14303 }, { "epoch": 0.82, "grad_norm": 1.7615134716033936, "learning_rate": 1.6452731148454893e-06, "loss": 0.9618, "step": 14304 }, { "epoch": 0.82, "grad_norm": 1.7152031660079956, "learning_rate": 1.644252442064691e-06, "loss": 1.0029, "step": 14305 }, { "epoch": 0.82, "grad_norm": 1.65349543094635, "learning_rate": 1.643232057617402e-06, "loss": 0.9047, "step": 14306 }, { "epoch": 0.82, "grad_norm": 1.9353394508361816, "learning_rate": 1.6422119615388288e-06, "loss": 0.8764, "step": 14307 }, { "epoch": 0.82, "grad_norm": 0.9299299716949463, "learning_rate": 1.641192153864175e-06, "loss": 0.4796, "step": 14308 }, { "epoch": 0.82, "grad_norm": 1.756333827972412, "learning_rate": 1.6401726346286317e-06, "loss": 0.8628, "step": 14309 }, { "epoch": 0.82, "grad_norm": 1.831635594367981, "learning_rate": 1.6391534038673774e-06, "loss": 0.8596, "step": 14310 }, { "epoch": 0.82, "grad_norm": 2.5277256965637207, "learning_rate": 1.6381344616155859e-06, "loss": 0.8635, "step": 14311 }, { "epoch": 0.82, "grad_norm": 1.7213791608810425, "learning_rate": 1.6371158079084136e-06, "loss": 0.8277, "step": 14312 }, { "epoch": 0.82, "grad_norm": 1.756650686264038, "learning_rate": 1.6360974427810172e-06, "loss": 0.8685, "step": 14313 }, { "epoch": 0.82, "grad_norm": 1.7933175563812256, "learning_rate": 1.6350793662685305e-06, "loss": 0.9306, "step": 14314 }, { "epoch": 0.82, "grad_norm": 1.8213019371032715, "learning_rate": 1.634061578406092e-06, "loss": 0.8476, "step": 14315 }, { "epoch": 0.82, "grad_norm": 1.6642775535583496, "learning_rate": 1.633044079228817e-06, "loss": 0.8428, "step": 14316 }, { "epoch": 0.82, "grad_norm": 1.776218056678772, "learning_rate": 1.6320268687718199e-06, "loss": 0.8844, "step": 14317 }, { "epoch": 0.82, "grad_norm": 1.7835209369659424, "learning_rate": 1.631009947070199e-06, "loss": 0.8475, "step": 14318 }, { "epoch": 0.82, "grad_norm": 1.727022409439087, "learning_rate": 1.6299933141590473e-06, "loss": 0.8876, "step": 14319 }, { "epoch": 0.82, "grad_norm": 1.9957889318466187, "learning_rate": 1.628976970073447e-06, "loss": 0.8827, "step": 14320 }, { "epoch": 0.82, "grad_norm": 1.7999436855316162, "learning_rate": 1.6279609148484666e-06, "loss": 0.9053, "step": 14321 }, { "epoch": 0.82, "grad_norm": 1.8848061561584473, "learning_rate": 1.6269451485191701e-06, "loss": 0.8885, "step": 14322 }, { "epoch": 0.82, "grad_norm": 1.8123704195022583, "learning_rate": 1.6259296711206051e-06, "loss": 0.8348, "step": 14323 }, { "epoch": 0.82, "grad_norm": 1.7478015422821045, "learning_rate": 1.6249144826878182e-06, "loss": 0.9418, "step": 14324 }, { "epoch": 0.82, "grad_norm": 1.67262601852417, "learning_rate": 1.6238995832558358e-06, "loss": 0.8111, "step": 14325 }, { "epoch": 0.82, "grad_norm": 1.5477443933486938, "learning_rate": 1.6228849728596818e-06, "loss": 0.8651, "step": 14326 }, { "epoch": 0.82, "grad_norm": 1.8882970809936523, "learning_rate": 1.6218706515343652e-06, "loss": 0.8542, "step": 14327 }, { "epoch": 0.82, "grad_norm": 1.8037967681884766, "learning_rate": 1.6208566193148922e-06, "loss": 0.9123, "step": 14328 }, { "epoch": 0.82, "grad_norm": 1.7822704315185547, "learning_rate": 1.6198428762362473e-06, "loss": 0.9266, "step": 14329 }, { "epoch": 0.82, "grad_norm": 1.728736400604248, "learning_rate": 1.618829422333419e-06, "loss": 0.9156, "step": 14330 }, { "epoch": 0.82, "grad_norm": 1.1977746486663818, "learning_rate": 1.6178162576413736e-06, "loss": 0.5566, "step": 14331 }, { "epoch": 0.82, "grad_norm": 1.8046597242355347, "learning_rate": 1.6168033821950735e-06, "loss": 0.9103, "step": 14332 }, { "epoch": 0.82, "grad_norm": 1.7510817050933838, "learning_rate": 1.615790796029474e-06, "loss": 0.8537, "step": 14333 }, { "epoch": 0.82, "grad_norm": 1.7535678148269653, "learning_rate": 1.6147784991795113e-06, "loss": 0.8846, "step": 14334 }, { "epoch": 0.82, "grad_norm": 1.6615991592407227, "learning_rate": 1.613766491680121e-06, "loss": 0.8082, "step": 14335 }, { "epoch": 0.82, "grad_norm": 1.8091779947280884, "learning_rate": 1.6127547735662218e-06, "loss": 0.9701, "step": 14336 }, { "epoch": 0.82, "grad_norm": 1.9102187156677246, "learning_rate": 1.6117433448727282e-06, "loss": 0.9518, "step": 14337 }, { "epoch": 0.82, "grad_norm": 1.7487438917160034, "learning_rate": 1.6107322056345388e-06, "loss": 0.948, "step": 14338 }, { "epoch": 0.82, "grad_norm": 1.793394684791565, "learning_rate": 1.6097213558865478e-06, "loss": 0.9151, "step": 14339 }, { "epoch": 0.82, "grad_norm": 1.6446669101715088, "learning_rate": 1.6087107956636338e-06, "loss": 0.898, "step": 14340 }, { "epoch": 0.82, "grad_norm": 1.6988474130630493, "learning_rate": 1.6077005250006717e-06, "loss": 0.8762, "step": 14341 }, { "epoch": 0.82, "grad_norm": 1.8521496057510376, "learning_rate": 1.6066905439325199e-06, "loss": 0.7738, "step": 14342 }, { "epoch": 0.82, "grad_norm": 1.7256203889846802, "learning_rate": 1.6056808524940338e-06, "loss": 0.956, "step": 14343 }, { "epoch": 0.82, "grad_norm": 1.7402937412261963, "learning_rate": 1.60467145072005e-06, "loss": 0.8862, "step": 14344 }, { "epoch": 0.82, "grad_norm": 1.7349143028259277, "learning_rate": 1.6036623386454041e-06, "loss": 0.9085, "step": 14345 }, { "epoch": 0.82, "grad_norm": 1.7306697368621826, "learning_rate": 1.6026535163049184e-06, "loss": 0.8174, "step": 14346 }, { "epoch": 0.82, "grad_norm": 1.723577618598938, "learning_rate": 1.6016449837334004e-06, "loss": 0.9039, "step": 14347 }, { "epoch": 0.82, "grad_norm": 1.6758160591125488, "learning_rate": 1.6006367409656564e-06, "loss": 0.8787, "step": 14348 }, { "epoch": 0.82, "grad_norm": 1.863316297531128, "learning_rate": 1.5996287880364736e-06, "loss": 0.9003, "step": 14349 }, { "epoch": 0.82, "grad_norm": 1.8070380687713623, "learning_rate": 1.5986211249806382e-06, "loss": 0.8482, "step": 14350 }, { "epoch": 0.82, "grad_norm": 1.0333231687545776, "learning_rate": 1.5976137518329182e-06, "loss": 0.5935, "step": 14351 }, { "epoch": 0.82, "grad_norm": 1.6390674114227295, "learning_rate": 1.5966066686280778e-06, "loss": 0.8684, "step": 14352 }, { "epoch": 0.82, "grad_norm": 1.8405100107192993, "learning_rate": 1.595599875400865e-06, "loss": 0.9065, "step": 14353 }, { "epoch": 0.82, "grad_norm": 0.9881863594055176, "learning_rate": 1.5945933721860263e-06, "loss": 0.4933, "step": 14354 }, { "epoch": 0.82, "grad_norm": 1.0013110637664795, "learning_rate": 1.5935871590182883e-06, "loss": 0.4961, "step": 14355 }, { "epoch": 0.82, "grad_norm": 1.699596881866455, "learning_rate": 1.5925812359323745e-06, "loss": 0.8972, "step": 14356 }, { "epoch": 0.82, "grad_norm": 1.0379343032836914, "learning_rate": 1.5915756029630004e-06, "loss": 0.5512, "step": 14357 }, { "epoch": 0.82, "grad_norm": 1.777631163597107, "learning_rate": 1.5905702601448615e-06, "loss": 0.8946, "step": 14358 }, { "epoch": 0.82, "grad_norm": 1.8313721418380737, "learning_rate": 1.5895652075126545e-06, "loss": 0.9193, "step": 14359 }, { "epoch": 0.82, "grad_norm": 1.9245115518569946, "learning_rate": 1.588560445101056e-06, "loss": 0.9203, "step": 14360 }, { "epoch": 0.82, "grad_norm": 1.8366649150848389, "learning_rate": 1.587555972944742e-06, "loss": 0.8777, "step": 14361 }, { "epoch": 0.82, "grad_norm": 1.961328387260437, "learning_rate": 1.5865517910783712e-06, "loss": 0.962, "step": 14362 }, { "epoch": 0.82, "grad_norm": 1.8175925016403198, "learning_rate": 1.585547899536598e-06, "loss": 0.9832, "step": 14363 }, { "epoch": 0.82, "grad_norm": 1.7593896389007568, "learning_rate": 1.5845442983540593e-06, "loss": 0.8848, "step": 14364 }, { "epoch": 0.82, "grad_norm": 0.9625341892242432, "learning_rate": 1.5835409875653884e-06, "loss": 0.5052, "step": 14365 }, { "epoch": 0.82, "grad_norm": 1.770939588546753, "learning_rate": 1.5825379672052088e-06, "loss": 0.8532, "step": 14366 }, { "epoch": 0.82, "grad_norm": 1.7892173528671265, "learning_rate": 1.5815352373081328e-06, "loss": 0.8539, "step": 14367 }, { "epoch": 0.82, "grad_norm": 1.8567909002304077, "learning_rate": 1.580532797908757e-06, "loss": 0.9543, "step": 14368 }, { "epoch": 0.82, "grad_norm": 2.013876438140869, "learning_rate": 1.5795306490416784e-06, "loss": 0.9796, "step": 14369 }, { "epoch": 0.82, "grad_norm": 1.7063997983932495, "learning_rate": 1.5785287907414726e-06, "loss": 0.9636, "step": 14370 }, { "epoch": 0.82, "grad_norm": 1.025414228439331, "learning_rate": 1.5775272230427164e-06, "loss": 0.5462, "step": 14371 }, { "epoch": 0.82, "grad_norm": 1.9535168409347534, "learning_rate": 1.5765259459799664e-06, "loss": 0.923, "step": 14372 }, { "epoch": 0.82, "grad_norm": 1.6779085397720337, "learning_rate": 1.5755249595877752e-06, "loss": 1.0168, "step": 14373 }, { "epoch": 0.82, "grad_norm": 1.902982473373413, "learning_rate": 1.5745242639006886e-06, "loss": 0.8881, "step": 14374 }, { "epoch": 0.82, "grad_norm": 1.5876433849334717, "learning_rate": 1.573523858953231e-06, "loss": 0.946, "step": 14375 }, { "epoch": 0.82, "grad_norm": 1.75193190574646, "learning_rate": 1.572523744779928e-06, "loss": 0.9079, "step": 14376 }, { "epoch": 0.82, "grad_norm": 1.9131932258605957, "learning_rate": 1.5715239214152877e-06, "loss": 0.8516, "step": 14377 }, { "epoch": 0.82, "grad_norm": 1.594518780708313, "learning_rate": 1.570524388893816e-06, "loss": 0.9242, "step": 14378 }, { "epoch": 0.82, "grad_norm": 1.7981634140014648, "learning_rate": 1.5695251472499974e-06, "loss": 1.0, "step": 14379 }, { "epoch": 0.82, "grad_norm": 1.6218276023864746, "learning_rate": 1.5685261965183196e-06, "loss": 0.9352, "step": 14380 }, { "epoch": 0.82, "grad_norm": 1.071269154548645, "learning_rate": 1.5675275367332476e-06, "loss": 0.5487, "step": 14381 }, { "epoch": 0.82, "grad_norm": 1.6671252250671387, "learning_rate": 1.5665291679292472e-06, "loss": 0.8756, "step": 14382 }, { "epoch": 0.82, "grad_norm": 1.8355664014816284, "learning_rate": 1.565531090140765e-06, "loss": 0.8914, "step": 14383 }, { "epoch": 0.82, "grad_norm": 1.8472294807434082, "learning_rate": 1.564533303402247e-06, "loss": 0.8645, "step": 14384 }, { "epoch": 0.83, "grad_norm": 1.7986302375793457, "learning_rate": 1.563535807748119e-06, "loss": 0.9585, "step": 14385 }, { "epoch": 0.83, "grad_norm": 1.6741151809692383, "learning_rate": 1.562538603212803e-06, "loss": 0.9099, "step": 14386 }, { "epoch": 0.83, "grad_norm": 1.724277377128601, "learning_rate": 1.5615416898307135e-06, "loss": 0.9718, "step": 14387 }, { "epoch": 0.83, "grad_norm": 1.8047869205474854, "learning_rate": 1.5605450676362465e-06, "loss": 0.9433, "step": 14388 }, { "epoch": 0.83, "grad_norm": 1.727473258972168, "learning_rate": 1.5595487366637962e-06, "loss": 0.8724, "step": 14389 }, { "epoch": 0.83, "grad_norm": 1.7540818452835083, "learning_rate": 1.5585526969477394e-06, "loss": 0.8661, "step": 14390 }, { "epoch": 0.83, "grad_norm": 1.8403637409210205, "learning_rate": 1.5575569485224519e-06, "loss": 0.8312, "step": 14391 }, { "epoch": 0.83, "grad_norm": 1.7430156469345093, "learning_rate": 1.556561491422287e-06, "loss": 0.8698, "step": 14392 }, { "epoch": 0.83, "grad_norm": 1.8151819705963135, "learning_rate": 1.5555663256816033e-06, "loss": 0.9164, "step": 14393 }, { "epoch": 0.83, "grad_norm": 1.6449781656265259, "learning_rate": 1.5545714513347343e-06, "loss": 0.8366, "step": 14394 }, { "epoch": 0.83, "grad_norm": 1.0620030164718628, "learning_rate": 1.5535768684160158e-06, "loss": 0.5468, "step": 14395 }, { "epoch": 0.83, "grad_norm": 1.733941674232483, "learning_rate": 1.5525825769597625e-06, "loss": 0.938, "step": 14396 }, { "epoch": 0.83, "grad_norm": 1.75095796585083, "learning_rate": 1.5515885770002891e-06, "loss": 0.9465, "step": 14397 }, { "epoch": 0.83, "grad_norm": 1.7997056245803833, "learning_rate": 1.550594868571893e-06, "loss": 0.9154, "step": 14398 }, { "epoch": 0.83, "grad_norm": 1.7493253946304321, "learning_rate": 1.5496014517088654e-06, "loss": 0.8431, "step": 14399 }, { "epoch": 0.83, "grad_norm": 1.7949256896972656, "learning_rate": 1.5486083264454887e-06, "loss": 0.9434, "step": 14400 }, { "epoch": 0.83, "grad_norm": 1.8511708974838257, "learning_rate": 1.547615492816029e-06, "loss": 0.9624, "step": 14401 }, { "epoch": 0.83, "grad_norm": 1.9339182376861572, "learning_rate": 1.5466229508547492e-06, "loss": 0.8446, "step": 14402 }, { "epoch": 0.83, "grad_norm": 1.5918800830841064, "learning_rate": 1.545630700595896e-06, "loss": 0.8253, "step": 14403 }, { "epoch": 0.83, "grad_norm": 1.9751707315444946, "learning_rate": 1.544638742073713e-06, "loss": 0.9335, "step": 14404 }, { "epoch": 0.83, "grad_norm": 1.7182022333145142, "learning_rate": 1.5436470753224264e-06, "loss": 0.8629, "step": 14405 }, { "epoch": 0.83, "grad_norm": 1.8121334314346313, "learning_rate": 1.5426557003762587e-06, "loss": 0.8312, "step": 14406 }, { "epoch": 0.83, "grad_norm": 1.8647698163986206, "learning_rate": 1.541664617269416e-06, "loss": 0.9006, "step": 14407 }, { "epoch": 0.83, "grad_norm": 1.6378693580627441, "learning_rate": 1.5406738260361031e-06, "loss": 0.9883, "step": 14408 }, { "epoch": 0.83, "grad_norm": 1.6593563556671143, "learning_rate": 1.5396833267105026e-06, "loss": 0.7769, "step": 14409 }, { "epoch": 0.83, "grad_norm": 1.6766915321350098, "learning_rate": 1.5386931193267983e-06, "loss": 0.8609, "step": 14410 }, { "epoch": 0.83, "grad_norm": 1.9499013423919678, "learning_rate": 1.5377032039191608e-06, "loss": 0.9692, "step": 14411 }, { "epoch": 0.83, "grad_norm": 1.6247973442077637, "learning_rate": 1.536713580521746e-06, "loss": 0.9148, "step": 14412 }, { "epoch": 0.83, "grad_norm": 1.914451003074646, "learning_rate": 1.5357242491687052e-06, "loss": 0.9583, "step": 14413 }, { "epoch": 0.83, "grad_norm": 1.9622336626052856, "learning_rate": 1.5347352098941748e-06, "loss": 0.9131, "step": 14414 }, { "epoch": 0.83, "grad_norm": 1.8761560916900635, "learning_rate": 1.5337464627322884e-06, "loss": 0.8787, "step": 14415 }, { "epoch": 0.83, "grad_norm": 1.7535345554351807, "learning_rate": 1.5327580077171589e-06, "loss": 0.9009, "step": 14416 }, { "epoch": 0.83, "grad_norm": 1.8927239179611206, "learning_rate": 1.531769844882901e-06, "loss": 0.8654, "step": 14417 }, { "epoch": 0.83, "grad_norm": 1.7496799230575562, "learning_rate": 1.5307819742636088e-06, "loss": 0.926, "step": 14418 }, { "epoch": 0.83, "grad_norm": 1.7160295248031616, "learning_rate": 1.5297943958933748e-06, "loss": 0.8637, "step": 14419 }, { "epoch": 0.83, "grad_norm": 2.0564165115356445, "learning_rate": 1.5288071098062728e-06, "loss": 0.9452, "step": 14420 }, { "epoch": 0.83, "grad_norm": 1.796385407447815, "learning_rate": 1.527820116036377e-06, "loss": 0.8583, "step": 14421 }, { "epoch": 0.83, "grad_norm": 1.1175018548965454, "learning_rate": 1.5268334146177399e-06, "loss": 0.6195, "step": 14422 }, { "epoch": 0.83, "grad_norm": 1.62832510471344, "learning_rate": 1.5258470055844131e-06, "loss": 0.8988, "step": 14423 }, { "epoch": 0.83, "grad_norm": 1.7216987609863281, "learning_rate": 1.5248608889704374e-06, "loss": 0.9537, "step": 14424 }, { "epoch": 0.83, "grad_norm": 1.9396353960037231, "learning_rate": 1.5238750648098354e-06, "loss": 0.9557, "step": 14425 }, { "epoch": 0.83, "grad_norm": 1.7440906763076782, "learning_rate": 1.5228895331366301e-06, "loss": 0.8261, "step": 14426 }, { "epoch": 0.83, "grad_norm": 1.7885385751724243, "learning_rate": 1.5219042939848249e-06, "loss": 0.8973, "step": 14427 }, { "epoch": 0.83, "grad_norm": 1.0642056465148926, "learning_rate": 1.5209193473884232e-06, "loss": 0.5111, "step": 14428 }, { "epoch": 0.83, "grad_norm": 1.7156176567077637, "learning_rate": 1.5199346933814052e-06, "loss": 0.8487, "step": 14429 }, { "epoch": 0.83, "grad_norm": 1.7713056802749634, "learning_rate": 1.5189503319977573e-06, "loss": 0.8843, "step": 14430 }, { "epoch": 0.83, "grad_norm": 1.7010061740875244, "learning_rate": 1.5179662632714364e-06, "loss": 0.8402, "step": 14431 }, { "epoch": 0.83, "grad_norm": 1.752675175666809, "learning_rate": 1.5169824872364115e-06, "loss": 0.855, "step": 14432 }, { "epoch": 0.83, "grad_norm": 1.8046355247497559, "learning_rate": 1.5159990039266215e-06, "loss": 0.9296, "step": 14433 }, { "epoch": 0.83, "grad_norm": 1.7720823287963867, "learning_rate": 1.5150158133760095e-06, "loss": 0.8991, "step": 14434 }, { "epoch": 0.83, "grad_norm": 1.7415226697921753, "learning_rate": 1.5140329156184974e-06, "loss": 0.9119, "step": 14435 }, { "epoch": 0.83, "grad_norm": 1.8239010572433472, "learning_rate": 1.513050310688008e-06, "loss": 0.9358, "step": 14436 }, { "epoch": 0.83, "grad_norm": 1.730281949043274, "learning_rate": 1.5120679986184417e-06, "loss": 0.8895, "step": 14437 }, { "epoch": 0.83, "grad_norm": 1.6903257369995117, "learning_rate": 1.5110859794437016e-06, "loss": 0.8775, "step": 14438 }, { "epoch": 0.83, "grad_norm": 1.767596960067749, "learning_rate": 1.5101042531976696e-06, "loss": 0.8957, "step": 14439 }, { "epoch": 0.83, "grad_norm": 1.7511062622070312, "learning_rate": 1.5091228199142238e-06, "loss": 0.8839, "step": 14440 }, { "epoch": 0.83, "grad_norm": 1.6825076341629028, "learning_rate": 1.508141679627233e-06, "loss": 0.8404, "step": 14441 }, { "epoch": 0.83, "grad_norm": 1.6293699741363525, "learning_rate": 1.50716083237055e-06, "loss": 0.8414, "step": 14442 }, { "epoch": 0.83, "grad_norm": 1.715467095375061, "learning_rate": 1.5061802781780244e-06, "loss": 0.9687, "step": 14443 }, { "epoch": 0.83, "grad_norm": 1.5926454067230225, "learning_rate": 1.50520001708349e-06, "loss": 0.8927, "step": 14444 }, { "epoch": 0.83, "grad_norm": 1.7566200494766235, "learning_rate": 1.5042200491207747e-06, "loss": 0.8873, "step": 14445 }, { "epoch": 0.83, "grad_norm": 1.6798533201217651, "learning_rate": 1.5032403743236924e-06, "loss": 0.8558, "step": 14446 }, { "epoch": 0.83, "grad_norm": 1.879533052444458, "learning_rate": 1.5022609927260512e-06, "loss": 0.9455, "step": 14447 }, { "epoch": 0.83, "grad_norm": 2.0210976600646973, "learning_rate": 1.5012819043616445e-06, "loss": 0.9137, "step": 14448 }, { "epoch": 0.83, "grad_norm": 1.763628602027893, "learning_rate": 1.5003031092642605e-06, "loss": 0.9455, "step": 14449 }, { "epoch": 0.83, "grad_norm": 1.7218728065490723, "learning_rate": 1.4993246074676714e-06, "loss": 0.9265, "step": 14450 }, { "epoch": 0.83, "grad_norm": 1.5834935903549194, "learning_rate": 1.4983463990056467e-06, "loss": 0.8959, "step": 14451 }, { "epoch": 0.83, "grad_norm": 1.6832629442214966, "learning_rate": 1.4973684839119362e-06, "loss": 0.8288, "step": 14452 }, { "epoch": 0.83, "grad_norm": 1.8081368207931519, "learning_rate": 1.4963908622202894e-06, "loss": 0.9487, "step": 14453 }, { "epoch": 0.83, "grad_norm": 1.7063283920288086, "learning_rate": 1.4954135339644416e-06, "loss": 0.9616, "step": 14454 }, { "epoch": 0.83, "grad_norm": 1.804421067237854, "learning_rate": 1.4944364991781147e-06, "loss": 0.9016, "step": 14455 }, { "epoch": 0.83, "grad_norm": 1.031509518623352, "learning_rate": 1.493459757895026e-06, "loss": 0.558, "step": 14456 }, { "epoch": 0.83, "grad_norm": 1.6179637908935547, "learning_rate": 1.4924833101488768e-06, "loss": 0.9058, "step": 14457 }, { "epoch": 0.83, "grad_norm": 1.6956065893173218, "learning_rate": 1.4915071559733673e-06, "loss": 0.9841, "step": 14458 }, { "epoch": 0.83, "grad_norm": 1.6311208009719849, "learning_rate": 1.4905312954021745e-06, "loss": 0.9052, "step": 14459 }, { "epoch": 0.83, "grad_norm": 2.0202057361602783, "learning_rate": 1.48955572846898e-06, "loss": 0.9269, "step": 14460 }, { "epoch": 0.83, "grad_norm": 1.7691367864608765, "learning_rate": 1.4885804552074413e-06, "loss": 0.8756, "step": 14461 }, { "epoch": 0.83, "grad_norm": 1.7620258331298828, "learning_rate": 1.4876054756512182e-06, "loss": 0.8745, "step": 14462 }, { "epoch": 0.83, "grad_norm": 1.795127034187317, "learning_rate": 1.4866307898339493e-06, "loss": 0.9139, "step": 14463 }, { "epoch": 0.83, "grad_norm": 1.9033477306365967, "learning_rate": 1.48565639778927e-06, "loss": 0.8792, "step": 14464 }, { "epoch": 0.83, "grad_norm": 1.7459214925765991, "learning_rate": 1.4846822995508082e-06, "loss": 0.9281, "step": 14465 }, { "epoch": 0.83, "grad_norm": 1.8313684463500977, "learning_rate": 1.4837084951521708e-06, "loss": 0.953, "step": 14466 }, { "epoch": 0.83, "grad_norm": 1.8939236402511597, "learning_rate": 1.4827349846269656e-06, "loss": 0.9173, "step": 14467 }, { "epoch": 0.83, "grad_norm": 1.8759583234786987, "learning_rate": 1.4817617680087826e-06, "loss": 0.9299, "step": 14468 }, { "epoch": 0.83, "grad_norm": 1.1508959531784058, "learning_rate": 1.480788845331208e-06, "loss": 0.5976, "step": 14469 }, { "epoch": 0.83, "grad_norm": 1.7790790796279907, "learning_rate": 1.4798162166278108e-06, "loss": 0.8131, "step": 14470 }, { "epoch": 0.83, "grad_norm": 1.7428867816925049, "learning_rate": 1.4788438819321582e-06, "loss": 0.8461, "step": 14471 }, { "epoch": 0.83, "grad_norm": 1.825386643409729, "learning_rate": 1.477871841277797e-06, "loss": 0.8968, "step": 14472 }, { "epoch": 0.83, "grad_norm": 1.0135072469711304, "learning_rate": 1.476900094698277e-06, "loss": 0.5121, "step": 14473 }, { "epoch": 0.83, "grad_norm": 1.0950936079025269, "learning_rate": 1.4759286422271224e-06, "loss": 0.5238, "step": 14474 }, { "epoch": 0.83, "grad_norm": 1.8173545598983765, "learning_rate": 1.474957483897863e-06, "loss": 0.8334, "step": 14475 }, { "epoch": 0.83, "grad_norm": 1.7673684358596802, "learning_rate": 1.4739866197440046e-06, "loss": 0.8571, "step": 14476 }, { "epoch": 0.83, "grad_norm": 1.0635735988616943, "learning_rate": 1.4730160497990509e-06, "loss": 0.5199, "step": 14477 }, { "epoch": 0.83, "grad_norm": 1.8114100694656372, "learning_rate": 1.4720457740964966e-06, "loss": 0.9029, "step": 14478 }, { "epoch": 0.83, "grad_norm": 1.8555278778076172, "learning_rate": 1.4710757926698182e-06, "loss": 0.8888, "step": 14479 }, { "epoch": 0.83, "grad_norm": 1.7651171684265137, "learning_rate": 1.4701061055524924e-06, "loss": 0.8153, "step": 14480 }, { "epoch": 0.83, "grad_norm": 1.6576436758041382, "learning_rate": 1.4691367127779754e-06, "loss": 0.867, "step": 14481 }, { "epoch": 0.83, "grad_norm": 1.7941083908081055, "learning_rate": 1.468167614379723e-06, "loss": 0.9149, "step": 14482 }, { "epoch": 0.83, "grad_norm": 1.943681001663208, "learning_rate": 1.4671988103911704e-06, "loss": 0.9049, "step": 14483 }, { "epoch": 0.83, "grad_norm": 1.8133623600006104, "learning_rate": 1.4662303008457536e-06, "loss": 0.8955, "step": 14484 }, { "epoch": 0.83, "grad_norm": 1.7643803358078003, "learning_rate": 1.4652620857768895e-06, "loss": 0.9603, "step": 14485 }, { "epoch": 0.83, "grad_norm": 1.8418092727661133, "learning_rate": 1.464294165217992e-06, "loss": 0.8432, "step": 14486 }, { "epoch": 0.83, "grad_norm": 1.7747565507888794, "learning_rate": 1.4633265392024564e-06, "loss": 0.9211, "step": 14487 }, { "epoch": 0.83, "grad_norm": 1.8276382684707642, "learning_rate": 1.4623592077636772e-06, "loss": 0.8965, "step": 14488 }, { "epoch": 0.83, "grad_norm": 1.5917805433273315, "learning_rate": 1.4613921709350342e-06, "loss": 0.8688, "step": 14489 }, { "epoch": 0.83, "grad_norm": 1.7457362413406372, "learning_rate": 1.460425428749893e-06, "loss": 0.9517, "step": 14490 }, { "epoch": 0.83, "grad_norm": 1.878147840499878, "learning_rate": 1.4594589812416182e-06, "loss": 0.9404, "step": 14491 }, { "epoch": 0.83, "grad_norm": 1.8353899717330933, "learning_rate": 1.458492828443555e-06, "loss": 0.8644, "step": 14492 }, { "epoch": 0.83, "grad_norm": 1.8390419483184814, "learning_rate": 1.4575269703890471e-06, "loss": 0.7979, "step": 14493 }, { "epoch": 0.83, "grad_norm": 1.8015797138214111, "learning_rate": 1.4565614071114187e-06, "loss": 0.9113, "step": 14494 }, { "epoch": 0.83, "grad_norm": 1.731790542602539, "learning_rate": 1.4555961386439933e-06, "loss": 0.8135, "step": 14495 }, { "epoch": 0.83, "grad_norm": 0.9548718333244324, "learning_rate": 1.454631165020075e-06, "loss": 0.5364, "step": 14496 }, { "epoch": 0.83, "grad_norm": 1.733827829360962, "learning_rate": 1.4536664862729643e-06, "loss": 0.9148, "step": 14497 }, { "epoch": 0.83, "grad_norm": 1.8217209577560425, "learning_rate": 1.45270210243595e-06, "loss": 0.8724, "step": 14498 }, { "epoch": 0.83, "grad_norm": 1.8738255500793457, "learning_rate": 1.4517380135423132e-06, "loss": 0.8954, "step": 14499 }, { "epoch": 0.83, "grad_norm": 1.6562740802764893, "learning_rate": 1.450774219625316e-06, "loss": 0.906, "step": 14500 }, { "epoch": 0.83, "grad_norm": 1.7145521640777588, "learning_rate": 1.449810720718221e-06, "loss": 0.9558, "step": 14501 }, { "epoch": 0.83, "grad_norm": 1.6991722583770752, "learning_rate": 1.4488475168542725e-06, "loss": 0.9062, "step": 14502 }, { "epoch": 0.83, "grad_norm": 1.7761893272399902, "learning_rate": 1.447884608066712e-06, "loss": 0.8824, "step": 14503 }, { "epoch": 0.83, "grad_norm": 1.8761807680130005, "learning_rate": 1.4469219943887613e-06, "loss": 0.8981, "step": 14504 }, { "epoch": 0.83, "grad_norm": 1.8143270015716553, "learning_rate": 1.445959675853641e-06, "loss": 0.8536, "step": 14505 }, { "epoch": 0.83, "grad_norm": 1.098664402961731, "learning_rate": 1.4449976524945598e-06, "loss": 0.5375, "step": 14506 }, { "epoch": 0.83, "grad_norm": 1.8778107166290283, "learning_rate": 1.44403592434471e-06, "loss": 0.8831, "step": 14507 }, { "epoch": 0.83, "grad_norm": 1.8655641078948975, "learning_rate": 1.443074491437283e-06, "loss": 0.9147, "step": 14508 }, { "epoch": 0.83, "grad_norm": 1.838841199874878, "learning_rate": 1.442113353805449e-06, "loss": 0.8532, "step": 14509 }, { "epoch": 0.83, "grad_norm": 1.6403008699417114, "learning_rate": 1.441152511482381e-06, "loss": 0.8071, "step": 14510 }, { "epoch": 0.83, "grad_norm": 1.9378477334976196, "learning_rate": 1.4401919645012286e-06, "loss": 0.9109, "step": 14511 }, { "epoch": 0.83, "grad_norm": 1.655186653137207, "learning_rate": 1.4392317128951438e-06, "loss": 0.8912, "step": 14512 }, { "epoch": 0.83, "grad_norm": 1.8928735256195068, "learning_rate": 1.438271756697256e-06, "loss": 0.956, "step": 14513 }, { "epoch": 0.83, "grad_norm": 1.75174880027771, "learning_rate": 1.437312095940696e-06, "loss": 0.9112, "step": 14514 }, { "epoch": 0.83, "grad_norm": 1.7490835189819336, "learning_rate": 1.4363527306585744e-06, "loss": 0.9233, "step": 14515 }, { "epoch": 0.83, "grad_norm": 1.8052923679351807, "learning_rate": 1.4353936608840014e-06, "loss": 0.8525, "step": 14516 }, { "epoch": 0.83, "grad_norm": 1.828856110572815, "learning_rate": 1.4344348866500657e-06, "loss": 0.945, "step": 14517 }, { "epoch": 0.83, "grad_norm": 1.8553471565246582, "learning_rate": 1.4334764079898556e-06, "loss": 0.9025, "step": 14518 }, { "epoch": 0.83, "grad_norm": 1.7000377178192139, "learning_rate": 1.4325182249364477e-06, "loss": 0.9362, "step": 14519 }, { "epoch": 0.83, "grad_norm": 1.8315136432647705, "learning_rate": 1.4315603375229003e-06, "loss": 0.9235, "step": 14520 }, { "epoch": 0.83, "grad_norm": 1.8198537826538086, "learning_rate": 1.4306027457822735e-06, "loss": 0.8198, "step": 14521 }, { "epoch": 0.83, "grad_norm": 1.8960990905761719, "learning_rate": 1.4296454497476064e-06, "loss": 0.8894, "step": 14522 }, { "epoch": 0.83, "grad_norm": 1.934336543083191, "learning_rate": 1.428688449451937e-06, "loss": 0.8653, "step": 14523 }, { "epoch": 0.83, "grad_norm": 1.8146134614944458, "learning_rate": 1.4277317449282834e-06, "loss": 0.9836, "step": 14524 }, { "epoch": 0.83, "grad_norm": 1.7093007564544678, "learning_rate": 1.4267753362096637e-06, "loss": 0.9747, "step": 14525 }, { "epoch": 0.83, "grad_norm": 1.744572639465332, "learning_rate": 1.4258192233290769e-06, "loss": 0.8567, "step": 14526 }, { "epoch": 0.83, "grad_norm": 1.0023268461227417, "learning_rate": 1.4248634063195198e-06, "loss": 0.5738, "step": 14527 }, { "epoch": 0.83, "grad_norm": 1.5838576555252075, "learning_rate": 1.4239078852139698e-06, "loss": 0.8924, "step": 14528 }, { "epoch": 0.83, "grad_norm": 1.9270954132080078, "learning_rate": 1.4229526600454058e-06, "loss": 0.869, "step": 14529 }, { "epoch": 0.83, "grad_norm": 1.6948373317718506, "learning_rate": 1.4219977308467836e-06, "loss": 0.884, "step": 14530 }, { "epoch": 0.83, "grad_norm": 1.615168809890747, "learning_rate": 1.421043097651058e-06, "loss": 0.9307, "step": 14531 }, { "epoch": 0.83, "grad_norm": 1.896254062652588, "learning_rate": 1.420088760491174e-06, "loss": 0.8447, "step": 14532 }, { "epoch": 0.83, "grad_norm": 1.7818957567214966, "learning_rate": 1.419134719400057e-06, "loss": 0.942, "step": 14533 }, { "epoch": 0.83, "grad_norm": 1.0576672554016113, "learning_rate": 1.4181809744106334e-06, "loss": 0.5439, "step": 14534 }, { "epoch": 0.83, "grad_norm": 1.7533237934112549, "learning_rate": 1.4172275255558088e-06, "loss": 0.7857, "step": 14535 }, { "epoch": 0.83, "grad_norm": 1.831253170967102, "learning_rate": 1.4162743728684914e-06, "loss": 0.8599, "step": 14536 }, { "epoch": 0.83, "grad_norm": 1.9453730583190918, "learning_rate": 1.4153215163815637e-06, "loss": 0.8341, "step": 14537 }, { "epoch": 0.83, "grad_norm": 1.7503539323806763, "learning_rate": 1.4143689561279138e-06, "loss": 0.8818, "step": 14538 }, { "epoch": 0.83, "grad_norm": 0.9909722208976746, "learning_rate": 1.4134166921404047e-06, "loss": 0.5849, "step": 14539 }, { "epoch": 0.83, "grad_norm": 1.6401209831237793, "learning_rate": 1.412464724451903e-06, "loss": 0.9332, "step": 14540 }, { "epoch": 0.83, "grad_norm": 1.8063476085662842, "learning_rate": 1.4115130530952526e-06, "loss": 0.9075, "step": 14541 }, { "epoch": 0.83, "grad_norm": 1.6439341306686401, "learning_rate": 1.410561678103296e-06, "loss": 0.8542, "step": 14542 }, { "epoch": 0.83, "grad_norm": 1.8168108463287354, "learning_rate": 1.4096105995088648e-06, "loss": 0.8325, "step": 14543 }, { "epoch": 0.83, "grad_norm": 1.7581793069839478, "learning_rate": 1.4086598173447729e-06, "loss": 0.8185, "step": 14544 }, { "epoch": 0.83, "grad_norm": 1.7775856256484985, "learning_rate": 1.407709331643834e-06, "loss": 0.8453, "step": 14545 }, { "epoch": 0.83, "grad_norm": 1.8567034006118774, "learning_rate": 1.4067591424388427e-06, "loss": 0.9075, "step": 14546 }, { "epoch": 0.83, "grad_norm": 1.8719321489334106, "learning_rate": 1.405809249762591e-06, "loss": 0.9418, "step": 14547 }, { "epoch": 0.83, "grad_norm": 1.7581191062927246, "learning_rate": 1.404859653647853e-06, "loss": 0.9579, "step": 14548 }, { "epoch": 0.83, "grad_norm": 1.715213656425476, "learning_rate": 1.4039103541274013e-06, "loss": 0.8069, "step": 14549 }, { "epoch": 0.83, "grad_norm": 1.7716665267944336, "learning_rate": 1.402961351233989e-06, "loss": 0.9809, "step": 14550 }, { "epoch": 0.83, "grad_norm": 1.7329015731811523, "learning_rate": 1.4020126450003669e-06, "loss": 0.8811, "step": 14551 }, { "epoch": 0.83, "grad_norm": 1.8000420331954956, "learning_rate": 1.4010642354592697e-06, "loss": 0.8852, "step": 14552 }, { "epoch": 0.83, "grad_norm": 2.110851764678955, "learning_rate": 1.4001161226434267e-06, "loss": 0.9364, "step": 14553 }, { "epoch": 0.83, "grad_norm": 1.8728327751159668, "learning_rate": 1.399168306585552e-06, "loss": 0.9032, "step": 14554 }, { "epoch": 0.83, "grad_norm": 1.024374008178711, "learning_rate": 1.398220787318354e-06, "loss": 0.5773, "step": 14555 }, { "epoch": 0.83, "grad_norm": 2.0640523433685303, "learning_rate": 1.3972735648745295e-06, "loss": 0.8925, "step": 14556 }, { "epoch": 0.83, "grad_norm": 1.6798175573349, "learning_rate": 1.3963266392867624e-06, "loss": 0.8254, "step": 14557 }, { "epoch": 0.83, "grad_norm": 1.8340222835540771, "learning_rate": 1.3953800105877313e-06, "loss": 0.897, "step": 14558 }, { "epoch": 0.83, "grad_norm": 1.649770975112915, "learning_rate": 1.3944336788100976e-06, "loss": 0.9169, "step": 14559 }, { "epoch": 0.84, "grad_norm": 1.7816296815872192, "learning_rate": 1.393487643986522e-06, "loss": 0.9888, "step": 14560 }, { "epoch": 0.84, "grad_norm": 1.1148887872695923, "learning_rate": 1.3925419061496436e-06, "loss": 0.5355, "step": 14561 }, { "epoch": 0.84, "grad_norm": 1.7746391296386719, "learning_rate": 1.3915964653321023e-06, "loss": 0.9097, "step": 14562 }, { "epoch": 0.84, "grad_norm": 1.766114592552185, "learning_rate": 1.3906513215665162e-06, "loss": 0.9787, "step": 14563 }, { "epoch": 0.84, "grad_norm": 1.7979731559753418, "learning_rate": 1.3897064748855083e-06, "loss": 0.8803, "step": 14564 }, { "epoch": 0.84, "grad_norm": 0.9663290977478027, "learning_rate": 1.3887619253216756e-06, "loss": 0.5337, "step": 14565 }, { "epoch": 0.84, "grad_norm": 1.8370264768600464, "learning_rate": 1.3878176729076166e-06, "loss": 0.8683, "step": 14566 }, { "epoch": 0.84, "grad_norm": 1.8813267946243286, "learning_rate": 1.3868737176759105e-06, "loss": 0.9833, "step": 14567 }, { "epoch": 0.84, "grad_norm": 1.843646764755249, "learning_rate": 1.3859300596591342e-06, "loss": 0.9492, "step": 14568 }, { "epoch": 0.84, "grad_norm": 1.8345117568969727, "learning_rate": 1.3849866988898474e-06, "loss": 0.9316, "step": 14569 }, { "epoch": 0.84, "grad_norm": 1.7999660968780518, "learning_rate": 1.384043635400607e-06, "loss": 0.904, "step": 14570 }, { "epoch": 0.84, "grad_norm": 1.9210731983184814, "learning_rate": 1.38310086922395e-06, "loss": 0.8121, "step": 14571 }, { "epoch": 0.84, "grad_norm": 1.8331910371780396, "learning_rate": 1.3821584003924127e-06, "loss": 0.8792, "step": 14572 }, { "epoch": 0.84, "grad_norm": 1.7950514554977417, "learning_rate": 1.3812162289385178e-06, "loss": 0.9092, "step": 14573 }, { "epoch": 0.84, "grad_norm": 1.8383709192276, "learning_rate": 1.3802743548947729e-06, "loss": 0.933, "step": 14574 }, { "epoch": 0.84, "grad_norm": 1.0912328958511353, "learning_rate": 1.3793327782936839e-06, "loss": 0.5875, "step": 14575 }, { "epoch": 0.84, "grad_norm": 1.8132764101028442, "learning_rate": 1.3783914991677373e-06, "loss": 0.8852, "step": 14576 }, { "epoch": 0.84, "grad_norm": 1.8958594799041748, "learning_rate": 1.3774505175494191e-06, "loss": 0.9726, "step": 14577 }, { "epoch": 0.84, "grad_norm": 1.754158616065979, "learning_rate": 1.3765098334711958e-06, "loss": 0.8495, "step": 14578 }, { "epoch": 0.84, "grad_norm": 1.8186291456222534, "learning_rate": 1.375569446965531e-06, "loss": 0.901, "step": 14579 }, { "epoch": 0.84, "grad_norm": 1.7348216772079468, "learning_rate": 1.3746293580648718e-06, "loss": 0.8185, "step": 14580 }, { "epoch": 0.84, "grad_norm": 1.9503923654556274, "learning_rate": 1.3736895668016603e-06, "loss": 0.9087, "step": 14581 }, { "epoch": 0.84, "grad_norm": 1.6520909070968628, "learning_rate": 1.3727500732083242e-06, "loss": 0.9836, "step": 14582 }, { "epoch": 0.84, "grad_norm": 1.6311596632003784, "learning_rate": 1.3718108773172855e-06, "loss": 0.8208, "step": 14583 }, { "epoch": 0.84, "grad_norm": 1.9474674463272095, "learning_rate": 1.3708719791609494e-06, "loss": 0.8268, "step": 14584 }, { "epoch": 0.84, "grad_norm": 1.6714344024658203, "learning_rate": 1.3699333787717173e-06, "loss": 0.7629, "step": 14585 }, { "epoch": 0.84, "grad_norm": 1.0072879791259766, "learning_rate": 1.3689950761819781e-06, "loss": 0.5606, "step": 14586 }, { "epoch": 0.84, "grad_norm": 1.6865061521530151, "learning_rate": 1.3680570714241082e-06, "loss": 0.8563, "step": 14587 }, { "epoch": 0.84, "grad_norm": 1.7102704048156738, "learning_rate": 1.367119364530478e-06, "loss": 0.8933, "step": 14588 }, { "epoch": 0.84, "grad_norm": 1.7438291311264038, "learning_rate": 1.366181955533441e-06, "loss": 0.9024, "step": 14589 }, { "epoch": 0.84, "grad_norm": 1.6683228015899658, "learning_rate": 1.3652448444653499e-06, "loss": 0.8125, "step": 14590 }, { "epoch": 0.84, "grad_norm": 1.7147173881530762, "learning_rate": 1.3643080313585366e-06, "loss": 0.8906, "step": 14591 }, { "epoch": 0.84, "grad_norm": 1.8748008012771606, "learning_rate": 1.363371516245333e-06, "loss": 0.9295, "step": 14592 }, { "epoch": 0.84, "grad_norm": 1.777127981185913, "learning_rate": 1.3624352991580503e-06, "loss": 0.8788, "step": 14593 }, { "epoch": 0.84, "grad_norm": 1.0049889087677002, "learning_rate": 1.361499380129e-06, "loss": 0.5152, "step": 14594 }, { "epoch": 0.84, "grad_norm": 1.757861614227295, "learning_rate": 1.360563759190473e-06, "loss": 0.9012, "step": 14595 }, { "epoch": 0.84, "grad_norm": 1.7264031171798706, "learning_rate": 1.359628436374757e-06, "loss": 0.8299, "step": 14596 }, { "epoch": 0.84, "grad_norm": 1.7234222888946533, "learning_rate": 1.3586934117141304e-06, "loss": 0.8478, "step": 14597 }, { "epoch": 0.84, "grad_norm": 1.686714768409729, "learning_rate": 1.357758685240853e-06, "loss": 0.9318, "step": 14598 }, { "epoch": 0.84, "grad_norm": 1.7773762941360474, "learning_rate": 1.3568242569871847e-06, "loss": 0.914, "step": 14599 }, { "epoch": 0.84, "grad_norm": 1.6407865285873413, "learning_rate": 1.3558901269853653e-06, "loss": 0.9006, "step": 14600 }, { "epoch": 0.84, "grad_norm": 0.9167946577072144, "learning_rate": 1.354956295267633e-06, "loss": 0.499, "step": 14601 }, { "epoch": 0.84, "grad_norm": 1.7722874879837036, "learning_rate": 1.3540227618662082e-06, "loss": 0.8829, "step": 14602 }, { "epoch": 0.84, "grad_norm": 1.5769129991531372, "learning_rate": 1.3530895268133083e-06, "loss": 0.9115, "step": 14603 }, { "epoch": 0.84, "grad_norm": 1.7653549909591675, "learning_rate": 1.3521565901411327e-06, "loss": 0.8497, "step": 14604 }, { "epoch": 0.84, "grad_norm": 1.7872886657714844, "learning_rate": 1.3512239518818793e-06, "loss": 0.9064, "step": 14605 }, { "epoch": 0.84, "grad_norm": 1.924564242362976, "learning_rate": 1.3502916120677246e-06, "loss": 0.8283, "step": 14606 }, { "epoch": 0.84, "grad_norm": 1.663714051246643, "learning_rate": 1.3493595707308472e-06, "loss": 0.8743, "step": 14607 }, { "epoch": 0.84, "grad_norm": 1.709092378616333, "learning_rate": 1.3484278279034046e-06, "loss": 0.8754, "step": 14608 }, { "epoch": 0.84, "grad_norm": 1.8538317680358887, "learning_rate": 1.3474963836175492e-06, "loss": 0.9097, "step": 14609 }, { "epoch": 0.84, "grad_norm": 1.7197909355163574, "learning_rate": 1.3465652379054273e-06, "loss": 0.9511, "step": 14610 }, { "epoch": 0.84, "grad_norm": 1.766197919845581, "learning_rate": 1.3456343907991632e-06, "loss": 0.9089, "step": 14611 }, { "epoch": 0.84, "grad_norm": 1.7603367567062378, "learning_rate": 1.3447038423308845e-06, "loss": 0.9528, "step": 14612 }, { "epoch": 0.84, "grad_norm": 1.7487378120422363, "learning_rate": 1.3437735925326968e-06, "loss": 0.9684, "step": 14613 }, { "epoch": 0.84, "grad_norm": 1.0952643156051636, "learning_rate": 1.342843641436703e-06, "loss": 0.5713, "step": 14614 }, { "epoch": 0.84, "grad_norm": 1.793286681175232, "learning_rate": 1.341913989074991e-06, "loss": 0.8595, "step": 14615 }, { "epoch": 0.84, "grad_norm": 1.833357810974121, "learning_rate": 1.340984635479644e-06, "loss": 0.8689, "step": 14616 }, { "epoch": 0.84, "grad_norm": 1.680869221687317, "learning_rate": 1.3400555806827265e-06, "loss": 0.7951, "step": 14617 }, { "epoch": 0.84, "grad_norm": 1.8264920711517334, "learning_rate": 1.3391268247163037e-06, "loss": 0.9169, "step": 14618 }, { "epoch": 0.84, "grad_norm": 1.704236626625061, "learning_rate": 1.3381983676124178e-06, "loss": 0.7916, "step": 14619 }, { "epoch": 0.84, "grad_norm": 1.979659914970398, "learning_rate": 1.337270209403111e-06, "loss": 0.8328, "step": 14620 }, { "epoch": 0.84, "grad_norm": 1.7868047952651978, "learning_rate": 1.336342350120413e-06, "loss": 0.9431, "step": 14621 }, { "epoch": 0.84, "grad_norm": 1.8141860961914062, "learning_rate": 1.3354147897963365e-06, "loss": 0.9386, "step": 14622 }, { "epoch": 0.84, "grad_norm": 1.7215797901153564, "learning_rate": 1.3344875284628956e-06, "loss": 0.8701, "step": 14623 }, { "epoch": 0.84, "grad_norm": 1.6068801879882812, "learning_rate": 1.333560566152081e-06, "loss": 0.8972, "step": 14624 }, { "epoch": 0.84, "grad_norm": 1.7842477560043335, "learning_rate": 1.3326339028958846e-06, "loss": 0.8862, "step": 14625 }, { "epoch": 0.84, "grad_norm": 1.0018666982650757, "learning_rate": 1.331707538726279e-06, "loss": 0.5578, "step": 14626 }, { "epoch": 0.84, "grad_norm": 2.108668088912964, "learning_rate": 1.3307814736752344e-06, "loss": 0.9717, "step": 14627 }, { "epoch": 0.84, "grad_norm": 1.7983465194702148, "learning_rate": 1.3298557077747032e-06, "loss": 0.8959, "step": 14628 }, { "epoch": 0.84, "grad_norm": 0.971538245677948, "learning_rate": 1.3289302410566318e-06, "loss": 0.5085, "step": 14629 }, { "epoch": 0.84, "grad_norm": 1.8674952983856201, "learning_rate": 1.328005073552956e-06, "loss": 0.9247, "step": 14630 }, { "epoch": 0.84, "grad_norm": 1.7466880083084106, "learning_rate": 1.3270802052956033e-06, "loss": 0.8768, "step": 14631 }, { "epoch": 0.84, "grad_norm": 1.8704296350479126, "learning_rate": 1.326155636316483e-06, "loss": 0.8788, "step": 14632 }, { "epoch": 0.84, "grad_norm": 1.635634422302246, "learning_rate": 1.3252313666475058e-06, "loss": 0.8601, "step": 14633 }, { "epoch": 0.84, "grad_norm": 1.7505536079406738, "learning_rate": 1.3243073963205589e-06, "loss": 0.8722, "step": 14634 }, { "epoch": 0.84, "grad_norm": 1.7863550186157227, "learning_rate": 1.3233837253675319e-06, "loss": 0.8488, "step": 14635 }, { "epoch": 0.84, "grad_norm": 1.7028264999389648, "learning_rate": 1.3224603538202929e-06, "loss": 0.9937, "step": 14636 }, { "epoch": 0.84, "grad_norm": 1.8244420289993286, "learning_rate": 1.3215372817107098e-06, "loss": 0.896, "step": 14637 }, { "epoch": 0.84, "grad_norm": 1.9127250909805298, "learning_rate": 1.3206145090706302e-06, "loss": 0.9085, "step": 14638 }, { "epoch": 0.84, "grad_norm": 1.7490317821502686, "learning_rate": 1.3196920359318998e-06, "loss": 0.8647, "step": 14639 }, { "epoch": 0.84, "grad_norm": 1.7005019187927246, "learning_rate": 1.3187698623263511e-06, "loss": 0.8674, "step": 14640 }, { "epoch": 0.84, "grad_norm": 1.5824023485183716, "learning_rate": 1.317847988285803e-06, "loss": 0.8092, "step": 14641 }, { "epoch": 0.84, "grad_norm": 1.8160111904144287, "learning_rate": 1.31692641384207e-06, "loss": 0.8675, "step": 14642 }, { "epoch": 0.84, "grad_norm": 1.7849373817443848, "learning_rate": 1.316005139026949e-06, "loss": 0.8471, "step": 14643 }, { "epoch": 0.84, "grad_norm": 1.7585822343826294, "learning_rate": 1.3150841638722355e-06, "loss": 0.8718, "step": 14644 }, { "epoch": 0.84, "grad_norm": 1.8520047664642334, "learning_rate": 1.3141634884097043e-06, "loss": 0.9042, "step": 14645 }, { "epoch": 0.84, "grad_norm": 1.841574788093567, "learning_rate": 1.313243112671131e-06, "loss": 0.8655, "step": 14646 }, { "epoch": 0.84, "grad_norm": 1.7779295444488525, "learning_rate": 1.31232303668827e-06, "loss": 0.9831, "step": 14647 }, { "epoch": 0.84, "grad_norm": 0.9942909479141235, "learning_rate": 1.311403260492875e-06, "loss": 0.5375, "step": 14648 }, { "epoch": 0.84, "grad_norm": 1.8605635166168213, "learning_rate": 1.3104837841166807e-06, "loss": 0.8539, "step": 14649 }, { "epoch": 0.84, "grad_norm": 1.7608391046524048, "learning_rate": 1.309564607591418e-06, "loss": 0.8003, "step": 14650 }, { "epoch": 0.84, "grad_norm": 1.7137819528579712, "learning_rate": 1.3086457309488066e-06, "loss": 0.8902, "step": 14651 }, { "epoch": 0.84, "grad_norm": 1.6845453977584839, "learning_rate": 1.3077271542205517e-06, "loss": 0.8504, "step": 14652 }, { "epoch": 0.84, "grad_norm": 1.8603036403656006, "learning_rate": 1.3068088774383525e-06, "loss": 0.8432, "step": 14653 }, { "epoch": 0.84, "grad_norm": 1.8364906311035156, "learning_rate": 1.305890900633895e-06, "loss": 0.9811, "step": 14654 }, { "epoch": 0.84, "grad_norm": 1.7778388261795044, "learning_rate": 1.304973223838857e-06, "loss": 0.9553, "step": 14655 }, { "epoch": 0.84, "grad_norm": 1.8376907110214233, "learning_rate": 1.304055847084903e-06, "loss": 0.9702, "step": 14656 }, { "epoch": 0.84, "grad_norm": 1.8152296543121338, "learning_rate": 1.3031387704036935e-06, "loss": 0.8531, "step": 14657 }, { "epoch": 0.84, "grad_norm": 0.9518057703971863, "learning_rate": 1.3022219938268677e-06, "loss": 0.4785, "step": 14658 }, { "epoch": 0.84, "grad_norm": 1.8720519542694092, "learning_rate": 1.3013055173860678e-06, "loss": 0.9051, "step": 14659 }, { "epoch": 0.84, "grad_norm": 1.7312129735946655, "learning_rate": 1.3003893411129131e-06, "loss": 0.9132, "step": 14660 }, { "epoch": 0.84, "grad_norm": 1.6815332174301147, "learning_rate": 1.2994734650390239e-06, "loss": 0.8798, "step": 14661 }, { "epoch": 0.84, "grad_norm": 1.8160277605056763, "learning_rate": 1.2985578891959983e-06, "loss": 0.8936, "step": 14662 }, { "epoch": 0.84, "grad_norm": 1.795061707496643, "learning_rate": 1.297642613615434e-06, "loss": 0.8924, "step": 14663 }, { "epoch": 0.84, "grad_norm": 1.8633986711502075, "learning_rate": 1.296727638328915e-06, "loss": 0.8444, "step": 14664 }, { "epoch": 0.84, "grad_norm": 1.6985819339752197, "learning_rate": 1.2958129633680128e-06, "loss": 0.8705, "step": 14665 }, { "epoch": 0.84, "grad_norm": 1.6409038305282593, "learning_rate": 1.294898588764293e-06, "loss": 0.8598, "step": 14666 }, { "epoch": 0.84, "grad_norm": 1.8164708614349365, "learning_rate": 1.2939845145493036e-06, "loss": 0.8959, "step": 14667 }, { "epoch": 0.84, "grad_norm": 1.5908163785934448, "learning_rate": 1.2930707407545917e-06, "loss": 0.9183, "step": 14668 }, { "epoch": 0.84, "grad_norm": 1.9382691383361816, "learning_rate": 1.2921572674116845e-06, "loss": 0.9225, "step": 14669 }, { "epoch": 0.84, "grad_norm": 1.8026989698410034, "learning_rate": 1.2912440945521087e-06, "loss": 0.9021, "step": 14670 }, { "epoch": 0.84, "grad_norm": 1.8587267398834229, "learning_rate": 1.2903312222073695e-06, "loss": 0.9341, "step": 14671 }, { "epoch": 0.84, "grad_norm": 1.7153987884521484, "learning_rate": 1.2894186504089712e-06, "loss": 0.8362, "step": 14672 }, { "epoch": 0.84, "grad_norm": 1.8463294506072998, "learning_rate": 1.2885063791884023e-06, "loss": 0.956, "step": 14673 }, { "epoch": 0.84, "grad_norm": 1.7521140575408936, "learning_rate": 1.2875944085771441e-06, "loss": 0.8768, "step": 14674 }, { "epoch": 0.84, "grad_norm": 1.0366640090942383, "learning_rate": 1.2866827386066672e-06, "loss": 0.5232, "step": 14675 }, { "epoch": 0.84, "grad_norm": 1.7474634647369385, "learning_rate": 1.2857713693084272e-06, "loss": 0.9478, "step": 14676 }, { "epoch": 0.84, "grad_norm": 2.0166056156158447, "learning_rate": 1.2848603007138772e-06, "loss": 0.9165, "step": 14677 }, { "epoch": 0.84, "grad_norm": 1.6873531341552734, "learning_rate": 1.2839495328544515e-06, "loss": 0.9043, "step": 14678 }, { "epoch": 0.84, "grad_norm": 1.9208621978759766, "learning_rate": 1.2830390657615821e-06, "loss": 0.8333, "step": 14679 }, { "epoch": 0.84, "grad_norm": 1.8275736570358276, "learning_rate": 1.2821288994666824e-06, "loss": 0.8807, "step": 14680 }, { "epoch": 0.84, "grad_norm": 1.805232286453247, "learning_rate": 1.2812190340011654e-06, "loss": 0.938, "step": 14681 }, { "epoch": 0.84, "grad_norm": 1.9696357250213623, "learning_rate": 1.2803094693964214e-06, "loss": 0.9082, "step": 14682 }, { "epoch": 0.84, "grad_norm": 1.716220736503601, "learning_rate": 1.2794002056838417e-06, "loss": 0.8086, "step": 14683 }, { "epoch": 0.84, "grad_norm": 1.7015950679779053, "learning_rate": 1.2784912428947994e-06, "loss": 0.9517, "step": 14684 }, { "epoch": 0.84, "grad_norm": 1.683508276939392, "learning_rate": 1.2775825810606635e-06, "loss": 0.8513, "step": 14685 }, { "epoch": 0.84, "grad_norm": 1.800771951675415, "learning_rate": 1.2766742202127858e-06, "loss": 0.9126, "step": 14686 }, { "epoch": 0.84, "grad_norm": 1.7635225057601929, "learning_rate": 1.2757661603825133e-06, "loss": 0.907, "step": 14687 }, { "epoch": 0.84, "grad_norm": 1.7570717334747314, "learning_rate": 1.2748584016011834e-06, "loss": 0.9581, "step": 14688 }, { "epoch": 0.84, "grad_norm": 1.8386635780334473, "learning_rate": 1.273950943900114e-06, "loss": 0.8998, "step": 14689 }, { "epoch": 0.84, "grad_norm": 1.9459898471832275, "learning_rate": 1.273043787310625e-06, "loss": 0.8974, "step": 14690 }, { "epoch": 0.84, "grad_norm": 1.7218345403671265, "learning_rate": 1.2721369318640142e-06, "loss": 0.9154, "step": 14691 }, { "epoch": 0.84, "grad_norm": 1.892529010772705, "learning_rate": 1.2712303775915803e-06, "loss": 0.8184, "step": 14692 }, { "epoch": 0.84, "grad_norm": 1.7933043241500854, "learning_rate": 1.2703241245246012e-06, "loss": 0.886, "step": 14693 }, { "epoch": 0.84, "grad_norm": 1.7797266244888306, "learning_rate": 1.2694181726943533e-06, "loss": 0.9099, "step": 14694 }, { "epoch": 0.84, "grad_norm": 1.0341979265213013, "learning_rate": 1.2685125221320915e-06, "loss": 0.4811, "step": 14695 }, { "epoch": 0.84, "grad_norm": 1.7987921237945557, "learning_rate": 1.2676071728690765e-06, "loss": 0.8194, "step": 14696 }, { "epoch": 0.84, "grad_norm": 1.7758455276489258, "learning_rate": 1.2667021249365442e-06, "loss": 0.9164, "step": 14697 }, { "epoch": 0.84, "grad_norm": 1.9008455276489258, "learning_rate": 1.2657973783657262e-06, "loss": 0.9224, "step": 14698 }, { "epoch": 0.84, "grad_norm": 1.9163676500320435, "learning_rate": 1.2648929331878423e-06, "loss": 0.9543, "step": 14699 }, { "epoch": 0.84, "grad_norm": 1.7035263776779175, "learning_rate": 1.2639887894341042e-06, "loss": 0.9427, "step": 14700 }, { "epoch": 0.84, "grad_norm": 1.7910116910934448, "learning_rate": 1.2630849471357075e-06, "loss": 0.9357, "step": 14701 }, { "epoch": 0.84, "grad_norm": 1.7919225692749023, "learning_rate": 1.2621814063238457e-06, "loss": 0.8516, "step": 14702 }, { "epoch": 0.84, "grad_norm": 1.8103469610214233, "learning_rate": 1.2612781670296936e-06, "loss": 0.9023, "step": 14703 }, { "epoch": 0.84, "grad_norm": 1.7206581830978394, "learning_rate": 1.2603752292844219e-06, "loss": 0.7913, "step": 14704 }, { "epoch": 0.84, "grad_norm": 1.0068676471710205, "learning_rate": 1.2594725931191898e-06, "loss": 0.5049, "step": 14705 }, { "epoch": 0.84, "grad_norm": 1.8440759181976318, "learning_rate": 1.2585702585651404e-06, "loss": 0.8608, "step": 14706 }, { "epoch": 0.84, "grad_norm": 1.9100841283798218, "learning_rate": 1.2576682256534144e-06, "loss": 0.8481, "step": 14707 }, { "epoch": 0.84, "grad_norm": 1.646077275276184, "learning_rate": 1.256766494415137e-06, "loss": 0.9455, "step": 14708 }, { "epoch": 0.84, "grad_norm": 1.7600090503692627, "learning_rate": 1.2558650648814253e-06, "loss": 0.9275, "step": 14709 }, { "epoch": 0.84, "grad_norm": 1.6959959268569946, "learning_rate": 1.2549639370833832e-06, "loss": 0.9367, "step": 14710 }, { "epoch": 0.84, "grad_norm": 1.7412863969802856, "learning_rate": 1.2540631110521085e-06, "loss": 0.9544, "step": 14711 }, { "epoch": 0.84, "grad_norm": 1.886684536933899, "learning_rate": 1.2531625868186835e-06, "loss": 0.9061, "step": 14712 }, { "epoch": 0.84, "grad_norm": 1.8368618488311768, "learning_rate": 1.2522623644141863e-06, "loss": 0.8694, "step": 14713 }, { "epoch": 0.84, "grad_norm": 1.7116056680679321, "learning_rate": 1.2513624438696782e-06, "loss": 0.9193, "step": 14714 }, { "epoch": 0.84, "grad_norm": 1.7453137636184692, "learning_rate": 1.2504628252162143e-06, "loss": 0.7886, "step": 14715 }, { "epoch": 0.84, "grad_norm": 1.8006501197814941, "learning_rate": 1.2495635084848356e-06, "loss": 0.9402, "step": 14716 }, { "epoch": 0.84, "grad_norm": 1.0127891302108765, "learning_rate": 1.2486644937065774e-06, "loss": 0.5225, "step": 14717 }, { "epoch": 0.84, "grad_norm": 1.7752655744552612, "learning_rate": 1.2477657809124632e-06, "loss": 0.923, "step": 14718 }, { "epoch": 0.84, "grad_norm": 1.9243180751800537, "learning_rate": 1.2468673701335022e-06, "loss": 0.9176, "step": 14719 }, { "epoch": 0.84, "grad_norm": 1.6877802610397339, "learning_rate": 1.245969261400699e-06, "loss": 0.8868, "step": 14720 }, { "epoch": 0.84, "grad_norm": 1.7470992803573608, "learning_rate": 1.2450714547450414e-06, "loss": 0.8554, "step": 14721 }, { "epoch": 0.84, "grad_norm": 1.6644604206085205, "learning_rate": 1.2441739501975137e-06, "loss": 0.8545, "step": 14722 }, { "epoch": 0.84, "grad_norm": 1.6703284978866577, "learning_rate": 1.2432767477890828e-06, "loss": 0.8596, "step": 14723 }, { "epoch": 0.84, "grad_norm": 1.8213419914245605, "learning_rate": 1.242379847550712e-06, "loss": 0.9732, "step": 14724 }, { "epoch": 0.84, "grad_norm": 1.9877550601959229, "learning_rate": 1.2414832495133477e-06, "loss": 0.9412, "step": 14725 }, { "epoch": 0.84, "grad_norm": 2.0882115364074707, "learning_rate": 1.2405869537079317e-06, "loss": 0.8787, "step": 14726 }, { "epoch": 0.84, "grad_norm": 1.7571953535079956, "learning_rate": 1.2396909601653906e-06, "loss": 0.7984, "step": 14727 }, { "epoch": 0.84, "grad_norm": 1.8994741439819336, "learning_rate": 1.2387952689166426e-06, "loss": 0.9049, "step": 14728 }, { "epoch": 0.84, "grad_norm": 1.7688002586364746, "learning_rate": 1.237899879992599e-06, "loss": 0.8589, "step": 14729 }, { "epoch": 0.84, "grad_norm": 1.685086965560913, "learning_rate": 1.2370047934241525e-06, "loss": 0.9536, "step": 14730 }, { "epoch": 0.84, "grad_norm": 1.7471513748168945, "learning_rate": 1.2361100092421941e-06, "loss": 0.9025, "step": 14731 }, { "epoch": 0.84, "grad_norm": 1.8116681575775146, "learning_rate": 1.2352155274775967e-06, "loss": 0.9751, "step": 14732 }, { "epoch": 0.84, "grad_norm": 0.9792311787605286, "learning_rate": 1.2343213481612293e-06, "loss": 0.472, "step": 14733 }, { "epoch": 0.85, "grad_norm": 0.9582418203353882, "learning_rate": 1.2334274713239447e-06, "loss": 0.4785, "step": 14734 }, { "epoch": 0.85, "grad_norm": 1.8422155380249023, "learning_rate": 1.2325338969965916e-06, "loss": 0.8384, "step": 14735 }, { "epoch": 0.85, "grad_norm": 1.8625606298446655, "learning_rate": 1.2316406252100011e-06, "loss": 0.8972, "step": 14736 }, { "epoch": 0.85, "grad_norm": 1.8275922536849976, "learning_rate": 1.2307476559950004e-06, "loss": 0.894, "step": 14737 }, { "epoch": 0.85, "grad_norm": 1.7536301612854004, "learning_rate": 1.2298549893824008e-06, "loss": 0.8512, "step": 14738 }, { "epoch": 0.85, "grad_norm": 2.0371437072753906, "learning_rate": 1.2289626254030084e-06, "loss": 0.9631, "step": 14739 }, { "epoch": 0.85, "grad_norm": 1.9717128276824951, "learning_rate": 1.2280705640876134e-06, "loss": 0.8285, "step": 14740 }, { "epoch": 0.85, "grad_norm": 1.7757149934768677, "learning_rate": 1.2271788054669997e-06, "loss": 0.864, "step": 14741 }, { "epoch": 0.85, "grad_norm": 1.7596187591552734, "learning_rate": 1.2262873495719418e-06, "loss": 0.9644, "step": 14742 }, { "epoch": 0.85, "grad_norm": 1.7524757385253906, "learning_rate": 1.2253961964331973e-06, "loss": 0.8478, "step": 14743 }, { "epoch": 0.85, "grad_norm": 1.780735969543457, "learning_rate": 1.2245053460815204e-06, "loss": 0.9573, "step": 14744 }, { "epoch": 0.85, "grad_norm": 1.8444665670394897, "learning_rate": 1.22361479854765e-06, "loss": 0.8974, "step": 14745 }, { "epoch": 0.85, "grad_norm": 1.7797430753707886, "learning_rate": 1.2227245538623178e-06, "loss": 0.8409, "step": 14746 }, { "epoch": 0.85, "grad_norm": 1.7705284357070923, "learning_rate": 1.2218346120562407e-06, "loss": 0.8893, "step": 14747 }, { "epoch": 0.85, "grad_norm": 1.8666045665740967, "learning_rate": 1.220944973160133e-06, "loss": 0.8328, "step": 14748 }, { "epoch": 0.85, "grad_norm": 1.661313772201538, "learning_rate": 1.220055637204689e-06, "loss": 0.8439, "step": 14749 }, { "epoch": 0.85, "grad_norm": 1.7973288297653198, "learning_rate": 1.2191666042206007e-06, "loss": 0.9281, "step": 14750 }, { "epoch": 0.85, "grad_norm": 1.7284934520721436, "learning_rate": 1.2182778742385438e-06, "loss": 0.9466, "step": 14751 }, { "epoch": 0.85, "grad_norm": 1.858979344367981, "learning_rate": 1.2173894472891857e-06, "loss": 0.9179, "step": 14752 }, { "epoch": 0.85, "grad_norm": 1.7412071228027344, "learning_rate": 1.2165013234031864e-06, "loss": 0.9106, "step": 14753 }, { "epoch": 0.85, "grad_norm": 1.856900930404663, "learning_rate": 1.2156135026111892e-06, "loss": 0.9255, "step": 14754 }, { "epoch": 0.85, "grad_norm": 1.7105193138122559, "learning_rate": 1.2147259849438342e-06, "loss": 0.9548, "step": 14755 }, { "epoch": 0.85, "grad_norm": 1.759567141532898, "learning_rate": 1.2138387704317422e-06, "loss": 0.8662, "step": 14756 }, { "epoch": 0.85, "grad_norm": 1.9352344274520874, "learning_rate": 1.2129518591055323e-06, "loss": 0.8995, "step": 14757 }, { "epoch": 0.85, "grad_norm": 1.7994494438171387, "learning_rate": 1.2120652509958075e-06, "loss": 0.9484, "step": 14758 }, { "epoch": 0.85, "grad_norm": 1.7424341440200806, "learning_rate": 1.2111789461331646e-06, "loss": 0.9011, "step": 14759 }, { "epoch": 0.85, "grad_norm": 1.7624995708465576, "learning_rate": 1.2102929445481827e-06, "loss": 0.9092, "step": 14760 }, { "epoch": 0.85, "grad_norm": 1.8309754133224487, "learning_rate": 1.209407246271439e-06, "loss": 0.9086, "step": 14761 }, { "epoch": 0.85, "grad_norm": 1.7244170904159546, "learning_rate": 1.208521851333495e-06, "loss": 0.9141, "step": 14762 }, { "epoch": 0.85, "grad_norm": 1.6323603391647339, "learning_rate": 1.2076367597649075e-06, "loss": 0.8591, "step": 14763 }, { "epoch": 0.85, "grad_norm": 1.7281302213668823, "learning_rate": 1.2067519715962116e-06, "loss": 0.8696, "step": 14764 }, { "epoch": 0.85, "grad_norm": 1.9572097063064575, "learning_rate": 1.2058674868579446e-06, "loss": 0.8585, "step": 14765 }, { "epoch": 0.85, "grad_norm": 1.9160422086715698, "learning_rate": 1.2049833055806227e-06, "loss": 0.9354, "step": 14766 }, { "epoch": 0.85, "grad_norm": 1.7224925756454468, "learning_rate": 1.2040994277947615e-06, "loss": 0.8782, "step": 14767 }, { "epoch": 0.85, "grad_norm": 1.69132661819458, "learning_rate": 1.203215853530857e-06, "loss": 0.8399, "step": 14768 }, { "epoch": 0.85, "grad_norm": 1.8126194477081299, "learning_rate": 1.202332582819402e-06, "loss": 0.9485, "step": 14769 }, { "epoch": 0.85, "grad_norm": 1.7348047494888306, "learning_rate": 1.2014496156908728e-06, "loss": 0.9025, "step": 14770 }, { "epoch": 0.85, "grad_norm": 1.8124852180480957, "learning_rate": 1.200566952175739e-06, "loss": 0.9332, "step": 14771 }, { "epoch": 0.85, "grad_norm": 1.7152577638626099, "learning_rate": 1.199684592304462e-06, "loss": 0.8171, "step": 14772 }, { "epoch": 0.85, "grad_norm": 1.6856400966644287, "learning_rate": 1.198802536107484e-06, "loss": 0.9956, "step": 14773 }, { "epoch": 0.85, "grad_norm": 1.9402037858963013, "learning_rate": 1.1979207836152484e-06, "loss": 0.8895, "step": 14774 }, { "epoch": 0.85, "grad_norm": 1.668998122215271, "learning_rate": 1.1970393348581766e-06, "loss": 0.8255, "step": 14775 }, { "epoch": 0.85, "grad_norm": 1.7074435949325562, "learning_rate": 1.1961581898666895e-06, "loss": 0.9107, "step": 14776 }, { "epoch": 0.85, "grad_norm": 1.6808173656463623, "learning_rate": 1.195277348671189e-06, "loss": 0.7387, "step": 14777 }, { "epoch": 0.85, "grad_norm": 1.7139525413513184, "learning_rate": 1.1943968113020733e-06, "loss": 0.8979, "step": 14778 }, { "epoch": 0.85, "grad_norm": 1.7274811267852783, "learning_rate": 1.193516577789725e-06, "loss": 0.903, "step": 14779 }, { "epoch": 0.85, "grad_norm": 1.6515867710113525, "learning_rate": 1.1926366481645213e-06, "loss": 0.9149, "step": 14780 }, { "epoch": 0.85, "grad_norm": 1.8225899934768677, "learning_rate": 1.191757022456822e-06, "loss": 0.91, "step": 14781 }, { "epoch": 0.85, "grad_norm": 1.883935570716858, "learning_rate": 1.1908777006969841e-06, "loss": 0.8197, "step": 14782 }, { "epoch": 0.85, "grad_norm": 1.7968236207962036, "learning_rate": 1.189998682915351e-06, "loss": 0.8987, "step": 14783 }, { "epoch": 0.85, "grad_norm": 1.7596200704574585, "learning_rate": 1.1891199691422517e-06, "loss": 0.9379, "step": 14784 }, { "epoch": 0.85, "grad_norm": 1.8464593887329102, "learning_rate": 1.1882415594080111e-06, "loss": 0.8107, "step": 14785 }, { "epoch": 0.85, "grad_norm": 1.6986284255981445, "learning_rate": 1.187363453742939e-06, "loss": 0.8749, "step": 14786 }, { "epoch": 0.85, "grad_norm": 1.7600467205047607, "learning_rate": 1.1864856521773382e-06, "loss": 0.8687, "step": 14787 }, { "epoch": 0.85, "grad_norm": 1.7135143280029297, "learning_rate": 1.1856081547414965e-06, "loss": 0.9506, "step": 14788 }, { "epoch": 0.85, "grad_norm": 1.9230210781097412, "learning_rate": 1.1847309614656966e-06, "loss": 0.8943, "step": 14789 }, { "epoch": 0.85, "grad_norm": 1.797799825668335, "learning_rate": 1.183854072380205e-06, "loss": 0.8831, "step": 14790 }, { "epoch": 0.85, "grad_norm": 1.7772151231765747, "learning_rate": 1.1829774875152854e-06, "loss": 0.857, "step": 14791 }, { "epoch": 0.85, "grad_norm": 1.6105971336364746, "learning_rate": 1.1821012069011806e-06, "loss": 0.9017, "step": 14792 }, { "epoch": 0.85, "grad_norm": 1.744730830192566, "learning_rate": 1.1812252305681326e-06, "loss": 0.8856, "step": 14793 }, { "epoch": 0.85, "grad_norm": 1.8852442502975464, "learning_rate": 1.1803495585463665e-06, "loss": 0.9199, "step": 14794 }, { "epoch": 0.85, "grad_norm": 1.849757432937622, "learning_rate": 1.1794741908661012e-06, "loss": 0.8482, "step": 14795 }, { "epoch": 0.85, "grad_norm": 1.7568823099136353, "learning_rate": 1.1785991275575426e-06, "loss": 0.8506, "step": 14796 }, { "epoch": 0.85, "grad_norm": 1.8058956861495972, "learning_rate": 1.1777243686508854e-06, "loss": 0.8276, "step": 14797 }, { "epoch": 0.85, "grad_norm": 1.7867754697799683, "learning_rate": 1.176849914176319e-06, "loss": 0.8471, "step": 14798 }, { "epoch": 0.85, "grad_norm": 1.7113209962844849, "learning_rate": 1.1759757641640125e-06, "loss": 0.876, "step": 14799 }, { "epoch": 0.85, "grad_norm": 1.6810534000396729, "learning_rate": 1.175101918644136e-06, "loss": 0.9932, "step": 14800 }, { "epoch": 0.85, "grad_norm": 1.7239222526550293, "learning_rate": 1.1742283776468389e-06, "loss": 0.8098, "step": 14801 }, { "epoch": 0.85, "grad_norm": 1.8601396083831787, "learning_rate": 1.1733551412022682e-06, "loss": 0.8095, "step": 14802 }, { "epoch": 0.85, "grad_norm": 1.791898488998413, "learning_rate": 1.1724822093405542e-06, "loss": 0.859, "step": 14803 }, { "epoch": 0.85, "grad_norm": 0.9741474390029907, "learning_rate": 1.1716095820918217e-06, "loss": 0.5525, "step": 14804 }, { "epoch": 0.85, "grad_norm": 1.8405451774597168, "learning_rate": 1.17073725948618e-06, "loss": 0.9578, "step": 14805 }, { "epoch": 0.85, "grad_norm": 1.761673092842102, "learning_rate": 1.1698652415537315e-06, "loss": 1.0171, "step": 14806 }, { "epoch": 0.85, "grad_norm": 1.7625985145568848, "learning_rate": 1.16899352832457e-06, "loss": 0.8912, "step": 14807 }, { "epoch": 0.85, "grad_norm": 1.9864790439605713, "learning_rate": 1.1681221198287707e-06, "loss": 0.8616, "step": 14808 }, { "epoch": 0.85, "grad_norm": 1.670388102531433, "learning_rate": 1.167251016096409e-06, "loss": 0.8667, "step": 14809 }, { "epoch": 0.85, "grad_norm": 1.5771379470825195, "learning_rate": 1.166380217157539e-06, "loss": 0.9119, "step": 14810 }, { "epoch": 0.85, "grad_norm": 1.735488772392273, "learning_rate": 1.1655097230422141e-06, "loss": 0.8951, "step": 14811 }, { "epoch": 0.85, "grad_norm": 1.0001025199890137, "learning_rate": 1.1646395337804684e-06, "loss": 0.4815, "step": 14812 }, { "epoch": 0.85, "grad_norm": 1.9586142301559448, "learning_rate": 1.1637696494023331e-06, "loss": 0.8887, "step": 14813 }, { "epoch": 0.85, "grad_norm": 1.7583764791488647, "learning_rate": 1.1629000699378235e-06, "loss": 0.8856, "step": 14814 }, { "epoch": 0.85, "grad_norm": 1.8681720495224, "learning_rate": 1.1620307954169484e-06, "loss": 0.8413, "step": 14815 }, { "epoch": 0.85, "grad_norm": 1.7538697719573975, "learning_rate": 1.1611618258696999e-06, "loss": 0.9364, "step": 14816 }, { "epoch": 0.85, "grad_norm": 1.7741611003875732, "learning_rate": 1.1602931613260694e-06, "loss": 0.9007, "step": 14817 }, { "epoch": 0.85, "grad_norm": 1.1062372922897339, "learning_rate": 1.159424801816027e-06, "loss": 0.5678, "step": 14818 }, { "epoch": 0.85, "grad_norm": 1.7400236129760742, "learning_rate": 1.1585567473695403e-06, "loss": 0.8709, "step": 14819 }, { "epoch": 0.85, "grad_norm": 1.7755361795425415, "learning_rate": 1.157688998016564e-06, "loss": 0.8898, "step": 14820 }, { "epoch": 0.85, "grad_norm": 1.6871265172958374, "learning_rate": 1.1568215537870376e-06, "loss": 0.9804, "step": 14821 }, { "epoch": 0.85, "grad_norm": 1.7692261934280396, "learning_rate": 1.1559544147109004e-06, "loss": 0.8133, "step": 14822 }, { "epoch": 0.85, "grad_norm": 1.7094820737838745, "learning_rate": 1.1550875808180685e-06, "loss": 0.8723, "step": 14823 }, { "epoch": 0.85, "grad_norm": 1.6173863410949707, "learning_rate": 1.154221052138459e-06, "loss": 0.8899, "step": 14824 }, { "epoch": 0.85, "grad_norm": 1.7471132278442383, "learning_rate": 1.1533548287019702e-06, "loss": 0.8852, "step": 14825 }, { "epoch": 0.85, "grad_norm": 1.7408438920974731, "learning_rate": 1.152488910538495e-06, "loss": 0.8076, "step": 14826 }, { "epoch": 0.85, "grad_norm": 1.9347789287567139, "learning_rate": 1.1516232976779095e-06, "loss": 0.8943, "step": 14827 }, { "epoch": 0.85, "grad_norm": 1.7290682792663574, "learning_rate": 1.150757990150091e-06, "loss": 0.8012, "step": 14828 }, { "epoch": 0.85, "grad_norm": 1.7400461435317993, "learning_rate": 1.149892987984893e-06, "loss": 0.8784, "step": 14829 }, { "epoch": 0.85, "grad_norm": 1.8327698707580566, "learning_rate": 1.1490282912121686e-06, "loss": 0.9392, "step": 14830 }, { "epoch": 0.85, "grad_norm": 1.7748452425003052, "learning_rate": 1.1481638998617507e-06, "loss": 0.865, "step": 14831 }, { "epoch": 0.85, "grad_norm": 1.8569293022155762, "learning_rate": 1.1472998139634727e-06, "loss": 0.853, "step": 14832 }, { "epoch": 0.85, "grad_norm": 1.7124027013778687, "learning_rate": 1.146436033547147e-06, "loss": 0.8749, "step": 14833 }, { "epoch": 0.85, "grad_norm": 1.6331645250320435, "learning_rate": 1.1455725586425847e-06, "loss": 0.8735, "step": 14834 }, { "epoch": 0.85, "grad_norm": 2.0667500495910645, "learning_rate": 1.1447093892795769e-06, "loss": 0.9016, "step": 14835 }, { "epoch": 0.85, "grad_norm": 1.7744994163513184, "learning_rate": 1.1438465254879116e-06, "loss": 0.9426, "step": 14836 }, { "epoch": 0.85, "grad_norm": 1.7284291982650757, "learning_rate": 1.1429839672973665e-06, "loss": 0.8813, "step": 14837 }, { "epoch": 0.85, "grad_norm": 1.9361088275909424, "learning_rate": 1.1421217147377018e-06, "loss": 0.8769, "step": 14838 }, { "epoch": 0.85, "grad_norm": 1.8605183362960815, "learning_rate": 1.141259767838675e-06, "loss": 0.8413, "step": 14839 }, { "epoch": 0.85, "grad_norm": 0.9905667304992676, "learning_rate": 1.1403981266300258e-06, "loss": 0.5687, "step": 14840 }, { "epoch": 0.85, "grad_norm": 1.8911683559417725, "learning_rate": 1.1395367911414911e-06, "loss": 0.9432, "step": 14841 }, { "epoch": 0.85, "grad_norm": 1.788665771484375, "learning_rate": 1.1386757614027888e-06, "loss": 0.8168, "step": 14842 }, { "epoch": 0.85, "grad_norm": 1.8488212823867798, "learning_rate": 1.1378150374436347e-06, "loss": 0.9767, "step": 14843 }, { "epoch": 0.85, "grad_norm": 1.6769204139709473, "learning_rate": 1.1369546192937264e-06, "loss": 0.8694, "step": 14844 }, { "epoch": 0.85, "grad_norm": 1.801372766494751, "learning_rate": 1.136094506982759e-06, "loss": 0.9206, "step": 14845 }, { "epoch": 0.85, "grad_norm": 0.9626538157463074, "learning_rate": 1.1352347005404062e-06, "loss": 0.5129, "step": 14846 }, { "epoch": 0.85, "grad_norm": 1.9301203489303589, "learning_rate": 1.1343751999963448e-06, "loss": 0.9109, "step": 14847 }, { "epoch": 0.85, "grad_norm": 1.8862485885620117, "learning_rate": 1.1335160053802273e-06, "loss": 0.959, "step": 14848 }, { "epoch": 0.85, "grad_norm": 1.8061212301254272, "learning_rate": 1.132657116721705e-06, "loss": 0.9016, "step": 14849 }, { "epoch": 0.85, "grad_norm": 2.064295530319214, "learning_rate": 1.1317985340504178e-06, "loss": 0.9604, "step": 14850 }, { "epoch": 0.85, "grad_norm": 1.9635558128356934, "learning_rate": 1.1309402573959882e-06, "loss": 0.8797, "step": 14851 }, { "epoch": 0.85, "grad_norm": 1.7758803367614746, "learning_rate": 1.1300822867880378e-06, "loss": 0.8734, "step": 14852 }, { "epoch": 0.85, "grad_norm": 2.030975341796875, "learning_rate": 1.1292246222561697e-06, "loss": 0.9173, "step": 14853 }, { "epoch": 0.85, "grad_norm": 1.7642524242401123, "learning_rate": 1.1283672638299813e-06, "loss": 0.8545, "step": 14854 }, { "epoch": 0.85, "grad_norm": 1.7300866842269897, "learning_rate": 1.1275102115390546e-06, "loss": 1.0123, "step": 14855 }, { "epoch": 0.85, "grad_norm": 1.7304534912109375, "learning_rate": 1.126653465412969e-06, "loss": 0.9208, "step": 14856 }, { "epoch": 0.85, "grad_norm": 1.6619575023651123, "learning_rate": 1.1257970254812833e-06, "loss": 0.8994, "step": 14857 }, { "epoch": 0.85, "grad_norm": 1.7392650842666626, "learning_rate": 1.124940891773555e-06, "loss": 0.8464, "step": 14858 }, { "epoch": 0.85, "grad_norm": 1.7891305685043335, "learning_rate": 1.1240850643193236e-06, "loss": 0.8295, "step": 14859 }, { "epoch": 0.85, "grad_norm": 1.7653645277023315, "learning_rate": 1.1232295431481222e-06, "loss": 0.9218, "step": 14860 }, { "epoch": 0.85, "grad_norm": 1.6638380289077759, "learning_rate": 1.122374328289475e-06, "loss": 0.9276, "step": 14861 }, { "epoch": 0.85, "grad_norm": 1.6876366138458252, "learning_rate": 1.1215194197728886e-06, "loss": 0.8602, "step": 14862 }, { "epoch": 0.85, "grad_norm": 1.6116549968719482, "learning_rate": 1.120664817627869e-06, "loss": 0.8931, "step": 14863 }, { "epoch": 0.85, "grad_norm": 1.932853102684021, "learning_rate": 1.1198105218839007e-06, "loss": 0.9732, "step": 14864 }, { "epoch": 0.85, "grad_norm": 1.8430944681167603, "learning_rate": 1.1189565325704677e-06, "loss": 0.8903, "step": 14865 }, { "epoch": 0.85, "grad_norm": 1.8514463901519775, "learning_rate": 1.1181028497170344e-06, "loss": 0.8543, "step": 14866 }, { "epoch": 0.85, "grad_norm": 1.6708588600158691, "learning_rate": 1.1172494733530625e-06, "loss": 0.8563, "step": 14867 }, { "epoch": 0.85, "grad_norm": 1.7908377647399902, "learning_rate": 1.1163964035079976e-06, "loss": 0.9798, "step": 14868 }, { "epoch": 0.85, "grad_norm": 1.8075577020645142, "learning_rate": 1.1155436402112785e-06, "loss": 0.9929, "step": 14869 }, { "epoch": 0.85, "grad_norm": 1.6757240295410156, "learning_rate": 1.114691183492329e-06, "loss": 0.8865, "step": 14870 }, { "epoch": 0.85, "grad_norm": 1.799923062324524, "learning_rate": 1.1138390333805682e-06, "loss": 0.959, "step": 14871 }, { "epoch": 0.85, "grad_norm": 1.6274442672729492, "learning_rate": 1.112987189905399e-06, "loss": 0.8843, "step": 14872 }, { "epoch": 0.85, "grad_norm": 2.0177228450775146, "learning_rate": 1.1121356530962157e-06, "loss": 0.9739, "step": 14873 }, { "epoch": 0.85, "grad_norm": 1.6361910104751587, "learning_rate": 1.1112844229824071e-06, "loss": 0.868, "step": 14874 }, { "epoch": 0.85, "grad_norm": 1.9618886709213257, "learning_rate": 1.1104334995933407e-06, "loss": 0.8465, "step": 14875 }, { "epoch": 0.85, "grad_norm": 1.7846596240997314, "learning_rate": 1.1095828829583844e-06, "loss": 0.8327, "step": 14876 }, { "epoch": 0.85, "grad_norm": 1.6273856163024902, "learning_rate": 1.1087325731068854e-06, "loss": 0.8913, "step": 14877 }, { "epoch": 0.85, "grad_norm": 1.8435790538787842, "learning_rate": 1.1078825700681918e-06, "loss": 0.8831, "step": 14878 }, { "epoch": 0.85, "grad_norm": 1.0711643695831299, "learning_rate": 1.1070328738716285e-06, "loss": 0.5418, "step": 14879 }, { "epoch": 0.85, "grad_norm": 1.8297356367111206, "learning_rate": 1.1061834845465225e-06, "loss": 0.9101, "step": 14880 }, { "epoch": 0.85, "grad_norm": 1.8431520462036133, "learning_rate": 1.1053344021221778e-06, "loss": 0.855, "step": 14881 }, { "epoch": 0.85, "grad_norm": 1.582067847251892, "learning_rate": 1.104485626627899e-06, "loss": 0.8298, "step": 14882 }, { "epoch": 0.85, "grad_norm": 4.15044641494751, "learning_rate": 1.1036371580929706e-06, "loss": 0.8888, "step": 14883 }, { "epoch": 0.85, "grad_norm": 1.5642304420471191, "learning_rate": 1.102788996546672e-06, "loss": 0.9006, "step": 14884 }, { "epoch": 0.85, "grad_norm": 1.7663689851760864, "learning_rate": 1.1019411420182747e-06, "loss": 0.8598, "step": 14885 }, { "epoch": 0.85, "grad_norm": 1.6893409490585327, "learning_rate": 1.1010935945370305e-06, "loss": 0.9682, "step": 14886 }, { "epoch": 0.85, "grad_norm": 1.782151222229004, "learning_rate": 1.1002463541321906e-06, "loss": 0.9291, "step": 14887 }, { "epoch": 0.85, "grad_norm": 1.722662091255188, "learning_rate": 1.0993994208329862e-06, "loss": 0.9829, "step": 14888 }, { "epoch": 0.85, "grad_norm": 1.9793977737426758, "learning_rate": 1.098552794668648e-06, "loss": 0.8827, "step": 14889 }, { "epoch": 0.85, "grad_norm": 1.7547744512557983, "learning_rate": 1.0977064756683841e-06, "loss": 0.8589, "step": 14890 }, { "epoch": 0.85, "grad_norm": 1.7955750226974487, "learning_rate": 1.0968604638614055e-06, "loss": 0.8553, "step": 14891 }, { "epoch": 0.85, "grad_norm": 1.8794209957122803, "learning_rate": 1.096014759276899e-06, "loss": 0.9655, "step": 14892 }, { "epoch": 0.85, "grad_norm": 1.872417688369751, "learning_rate": 1.0951693619440517e-06, "loss": 0.9323, "step": 14893 }, { "epoch": 0.85, "grad_norm": 1.8672363758087158, "learning_rate": 1.0943242718920355e-06, "loss": 0.8697, "step": 14894 }, { "epoch": 0.85, "grad_norm": 1.697163462638855, "learning_rate": 1.0934794891500134e-06, "loss": 0.8536, "step": 14895 }, { "epoch": 0.85, "grad_norm": 1.7430111169815063, "learning_rate": 1.092635013747132e-06, "loss": 0.8852, "step": 14896 }, { "epoch": 0.85, "grad_norm": 1.8425328731536865, "learning_rate": 1.091790845712537e-06, "loss": 0.9685, "step": 14897 }, { "epoch": 0.85, "grad_norm": 1.8222815990447998, "learning_rate": 1.090946985075354e-06, "loss": 0.9451, "step": 14898 }, { "epoch": 0.85, "grad_norm": 1.637719750404358, "learning_rate": 1.0901034318647063e-06, "loss": 0.8964, "step": 14899 }, { "epoch": 0.85, "grad_norm": 1.6801607608795166, "learning_rate": 1.0892601861096985e-06, "loss": 0.9068, "step": 14900 }, { "epoch": 0.85, "grad_norm": 1.8916633129119873, "learning_rate": 1.0884172478394317e-06, "loss": 1.0096, "step": 14901 }, { "epoch": 0.85, "grad_norm": 1.7380305528640747, "learning_rate": 1.0875746170829903e-06, "loss": 0.9066, "step": 14902 }, { "epoch": 0.85, "grad_norm": 1.8806736469268799, "learning_rate": 1.0867322938694535e-06, "loss": 0.953, "step": 14903 }, { "epoch": 0.85, "grad_norm": 1.7062561511993408, "learning_rate": 1.085890278227889e-06, "loss": 0.7576, "step": 14904 }, { "epoch": 0.85, "grad_norm": 1.8777328729629517, "learning_rate": 1.085048570187348e-06, "loss": 0.8432, "step": 14905 }, { "epoch": 0.85, "grad_norm": 1.79771089553833, "learning_rate": 1.0842071697768808e-06, "loss": 0.8779, "step": 14906 }, { "epoch": 0.85, "grad_norm": 1.8964142799377441, "learning_rate": 1.0833660770255162e-06, "loss": 0.9333, "step": 14907 }, { "epoch": 0.86, "grad_norm": 1.013614535331726, "learning_rate": 1.082525291962283e-06, "loss": 0.5683, "step": 14908 }, { "epoch": 0.86, "grad_norm": 1.8889468908309937, "learning_rate": 1.0816848146161895e-06, "loss": 0.9218, "step": 14909 }, { "epoch": 0.86, "grad_norm": 1.679005742073059, "learning_rate": 1.0808446450162435e-06, "loss": 0.8734, "step": 14910 }, { "epoch": 0.86, "grad_norm": 1.699961543083191, "learning_rate": 1.0800047831914317e-06, "loss": 0.9117, "step": 14911 }, { "epoch": 0.86, "grad_norm": 1.7158069610595703, "learning_rate": 1.07916522917074e-06, "loss": 0.9132, "step": 14912 }, { "epoch": 0.86, "grad_norm": 1.8884469270706177, "learning_rate": 1.078325982983134e-06, "loss": 0.8976, "step": 14913 }, { "epoch": 0.86, "grad_norm": 1.9490313529968262, "learning_rate": 1.077487044657578e-06, "loss": 0.8698, "step": 14914 }, { "epoch": 0.86, "grad_norm": 1.6767425537109375, "learning_rate": 1.0766484142230215e-06, "loss": 0.8781, "step": 14915 }, { "epoch": 0.86, "grad_norm": 1.943113923072815, "learning_rate": 1.075810091708399e-06, "loss": 0.9006, "step": 14916 }, { "epoch": 0.86, "grad_norm": 1.725237250328064, "learning_rate": 1.0749720771426443e-06, "loss": 0.9273, "step": 14917 }, { "epoch": 0.86, "grad_norm": 1.7463523149490356, "learning_rate": 1.0741343705546704e-06, "loss": 0.883, "step": 14918 }, { "epoch": 0.86, "grad_norm": 1.6340445280075073, "learning_rate": 1.0732969719733877e-06, "loss": 0.8899, "step": 14919 }, { "epoch": 0.86, "grad_norm": 1.7823466062545776, "learning_rate": 1.0724598814276887e-06, "loss": 0.9085, "step": 14920 }, { "epoch": 0.86, "grad_norm": 1.7317627668380737, "learning_rate": 1.0716230989464638e-06, "loss": 0.9062, "step": 14921 }, { "epoch": 0.86, "grad_norm": 1.9632351398468018, "learning_rate": 1.070786624558583e-06, "loss": 0.8874, "step": 14922 }, { "epoch": 0.86, "grad_norm": 1.8129502534866333, "learning_rate": 1.0699504582929144e-06, "loss": 0.9309, "step": 14923 }, { "epoch": 0.86, "grad_norm": 1.6596673727035522, "learning_rate": 1.0691146001783081e-06, "loss": 0.8475, "step": 14924 }, { "epoch": 0.86, "grad_norm": 1.796264886856079, "learning_rate": 1.0682790502436124e-06, "loss": 0.8789, "step": 14925 }, { "epoch": 0.86, "grad_norm": 1.8723270893096924, "learning_rate": 1.0674438085176553e-06, "loss": 0.9483, "step": 14926 }, { "epoch": 0.86, "grad_norm": 1.5680409669876099, "learning_rate": 1.066608875029259e-06, "loss": 0.8487, "step": 14927 }, { "epoch": 0.86, "grad_norm": 1.8118727207183838, "learning_rate": 1.0657742498072388e-06, "loss": 0.9055, "step": 14928 }, { "epoch": 0.86, "grad_norm": 1.996239423751831, "learning_rate": 1.0649399328803912e-06, "loss": 0.8977, "step": 14929 }, { "epoch": 0.86, "grad_norm": 1.8458224534988403, "learning_rate": 1.0641059242775087e-06, "loss": 1.0045, "step": 14930 }, { "epoch": 0.86, "grad_norm": 1.6864839792251587, "learning_rate": 1.0632722240273662e-06, "loss": 0.8899, "step": 14931 }, { "epoch": 0.86, "grad_norm": 1.6400095224380493, "learning_rate": 1.0624388321587387e-06, "loss": 0.908, "step": 14932 }, { "epoch": 0.86, "grad_norm": 1.7352604866027832, "learning_rate": 1.0616057487003794e-06, "loss": 0.8605, "step": 14933 }, { "epoch": 0.86, "grad_norm": 1.819554328918457, "learning_rate": 1.060772973681039e-06, "loss": 0.9217, "step": 14934 }, { "epoch": 0.86, "grad_norm": 1.8149404525756836, "learning_rate": 1.05994050712945e-06, "loss": 0.8856, "step": 14935 }, { "epoch": 0.86, "grad_norm": 1.877528190612793, "learning_rate": 1.0591083490743437e-06, "loss": 0.9106, "step": 14936 }, { "epoch": 0.86, "grad_norm": 1.8921500444412231, "learning_rate": 1.0582764995444305e-06, "loss": 0.937, "step": 14937 }, { "epoch": 0.86, "grad_norm": 1.1025562286376953, "learning_rate": 1.0574449585684176e-06, "loss": 0.5401, "step": 14938 }, { "epoch": 0.86, "grad_norm": 1.8973225355148315, "learning_rate": 1.056613726175002e-06, "loss": 0.9134, "step": 14939 }, { "epoch": 0.86, "grad_norm": 1.8091366291046143, "learning_rate": 1.0557828023928607e-06, "loss": 0.8404, "step": 14940 }, { "epoch": 0.86, "grad_norm": 1.8649598360061646, "learning_rate": 1.054952187250674e-06, "loss": 0.8943, "step": 14941 }, { "epoch": 0.86, "grad_norm": 1.7879281044006348, "learning_rate": 1.054121880777097e-06, "loss": 0.8626, "step": 14942 }, { "epoch": 0.86, "grad_norm": 1.7447692155838013, "learning_rate": 1.0532918830007876e-06, "loss": 0.9264, "step": 14943 }, { "epoch": 0.86, "grad_norm": 1.7139854431152344, "learning_rate": 1.052462193950381e-06, "loss": 0.9175, "step": 14944 }, { "epoch": 0.86, "grad_norm": 1.8053346872329712, "learning_rate": 1.0516328136545129e-06, "loss": 0.9155, "step": 14945 }, { "epoch": 0.86, "grad_norm": 1.7508553266525269, "learning_rate": 1.0508037421417971e-06, "loss": 0.8043, "step": 14946 }, { "epoch": 0.86, "grad_norm": 2.010089159011841, "learning_rate": 1.0499749794408475e-06, "loss": 0.9632, "step": 14947 }, { "epoch": 0.86, "grad_norm": 1.6463533639907837, "learning_rate": 1.0491465255802603e-06, "loss": 0.9299, "step": 14948 }, { "epoch": 0.86, "grad_norm": 1.7881395816802979, "learning_rate": 1.0483183805886233e-06, "loss": 1.0043, "step": 14949 }, { "epoch": 0.86, "grad_norm": 1.694732666015625, "learning_rate": 1.0474905444945128e-06, "loss": 0.8883, "step": 14950 }, { "epoch": 0.86, "grad_norm": 1.6043202877044678, "learning_rate": 1.0466630173264946e-06, "loss": 0.9465, "step": 14951 }, { "epoch": 0.86, "grad_norm": 1.818852424621582, "learning_rate": 1.0458357991131284e-06, "loss": 0.9115, "step": 14952 }, { "epoch": 0.86, "grad_norm": 1.7274229526519775, "learning_rate": 1.0450088898829547e-06, "loss": 0.9153, "step": 14953 }, { "epoch": 0.86, "grad_norm": 1.7107564210891724, "learning_rate": 1.0441822896645104e-06, "loss": 0.8062, "step": 14954 }, { "epoch": 0.86, "grad_norm": 1.7387783527374268, "learning_rate": 1.0433559984863162e-06, "loss": 0.8776, "step": 14955 }, { "epoch": 0.86, "grad_norm": 1.5985060930252075, "learning_rate": 1.0425300163768903e-06, "loss": 0.9396, "step": 14956 }, { "epoch": 0.86, "grad_norm": 1.0059682130813599, "learning_rate": 1.0417043433647289e-06, "loss": 0.4969, "step": 14957 }, { "epoch": 0.86, "grad_norm": 1.9177589416503906, "learning_rate": 1.0408789794783292e-06, "loss": 0.9313, "step": 14958 }, { "epoch": 0.86, "grad_norm": 1.8948943614959717, "learning_rate": 1.040053924746165e-06, "loss": 0.8487, "step": 14959 }, { "epoch": 0.86, "grad_norm": 2.4074482917785645, "learning_rate": 1.0392291791967158e-06, "loss": 0.8925, "step": 14960 }, { "epoch": 0.86, "grad_norm": 1.882422685623169, "learning_rate": 1.0384047428584344e-06, "loss": 0.9122, "step": 14961 }, { "epoch": 0.86, "grad_norm": 1.7333823442459106, "learning_rate": 1.0375806157597734e-06, "loss": 0.9573, "step": 14962 }, { "epoch": 0.86, "grad_norm": 1.8123652935028076, "learning_rate": 1.0367567979291694e-06, "loss": 0.8534, "step": 14963 }, { "epoch": 0.86, "grad_norm": 1.8629822731018066, "learning_rate": 1.0359332893950514e-06, "loss": 0.9177, "step": 14964 }, { "epoch": 0.86, "grad_norm": 1.5997167825698853, "learning_rate": 1.0351100901858335e-06, "loss": 0.8768, "step": 14965 }, { "epoch": 0.86, "grad_norm": 1.8188073635101318, "learning_rate": 1.0342872003299265e-06, "loss": 0.967, "step": 14966 }, { "epoch": 0.86, "grad_norm": 1.846309781074524, "learning_rate": 1.0334646198557208e-06, "loss": 0.95, "step": 14967 }, { "epoch": 0.86, "grad_norm": 1.982743501663208, "learning_rate": 1.0326423487916048e-06, "loss": 0.8022, "step": 14968 }, { "epoch": 0.86, "grad_norm": 1.7773852348327637, "learning_rate": 1.0318203871659538e-06, "loss": 0.8311, "step": 14969 }, { "epoch": 0.86, "grad_norm": 1.1019771099090576, "learning_rate": 1.0309987350071281e-06, "loss": 0.6092, "step": 14970 }, { "epoch": 0.86, "grad_norm": 1.590195655822754, "learning_rate": 1.0301773923434833e-06, "loss": 0.904, "step": 14971 }, { "epoch": 0.86, "grad_norm": 1.666215419769287, "learning_rate": 1.0293563592033595e-06, "loss": 0.922, "step": 14972 }, { "epoch": 0.86, "grad_norm": 1.8345155715942383, "learning_rate": 1.0285356356150899e-06, "loss": 0.8458, "step": 14973 }, { "epoch": 0.86, "grad_norm": 1.841991901397705, "learning_rate": 1.0277152216069942e-06, "loss": 0.923, "step": 14974 }, { "epoch": 0.86, "grad_norm": 1.9013420343399048, "learning_rate": 1.0268951172073838e-06, "loss": 0.9206, "step": 14975 }, { "epoch": 0.86, "grad_norm": 1.766484260559082, "learning_rate": 1.0260753224445564e-06, "loss": 0.8579, "step": 14976 }, { "epoch": 0.86, "grad_norm": 1.6873103380203247, "learning_rate": 1.0252558373468036e-06, "loss": 0.8281, "step": 14977 }, { "epoch": 0.86, "grad_norm": 1.8229858875274658, "learning_rate": 1.0244366619424006e-06, "loss": 0.9006, "step": 14978 }, { "epoch": 0.86, "grad_norm": 1.6585370302200317, "learning_rate": 1.0236177962596173e-06, "loss": 0.8202, "step": 14979 }, { "epoch": 0.86, "grad_norm": 0.9619442224502563, "learning_rate": 1.0227992403267074e-06, "loss": 0.529, "step": 14980 }, { "epoch": 0.86, "grad_norm": 2.014090061187744, "learning_rate": 1.0219809941719195e-06, "loss": 0.9412, "step": 14981 }, { "epoch": 0.86, "grad_norm": 2.0536394119262695, "learning_rate": 1.0211630578234899e-06, "loss": 0.8079, "step": 14982 }, { "epoch": 0.86, "grad_norm": 1.6927961111068726, "learning_rate": 1.0203454313096407e-06, "loss": 0.8818, "step": 14983 }, { "epoch": 0.86, "grad_norm": 1.124111294746399, "learning_rate": 1.0195281146585879e-06, "loss": 0.6468, "step": 14984 }, { "epoch": 0.86, "grad_norm": 1.8876206874847412, "learning_rate": 1.0187111078985324e-06, "loss": 1.0668, "step": 14985 }, { "epoch": 0.86, "grad_norm": 1.752833604812622, "learning_rate": 1.0178944110576704e-06, "loss": 0.8663, "step": 14986 }, { "epoch": 0.86, "grad_norm": 1.6628209352493286, "learning_rate": 1.0170780241641798e-06, "loss": 0.8837, "step": 14987 }, { "epoch": 0.86, "grad_norm": 1.7642520666122437, "learning_rate": 1.0162619472462355e-06, "loss": 0.8182, "step": 14988 }, { "epoch": 0.86, "grad_norm": 1.793492078781128, "learning_rate": 1.0154461803319938e-06, "loss": 0.8995, "step": 14989 }, { "epoch": 0.86, "grad_norm": 1.7705007791519165, "learning_rate": 1.01463072344961e-06, "loss": 0.8643, "step": 14990 }, { "epoch": 0.86, "grad_norm": 1.9232949018478394, "learning_rate": 1.0138155766272185e-06, "loss": 0.9302, "step": 14991 }, { "epoch": 0.86, "grad_norm": 1.9237415790557861, "learning_rate": 1.0130007398929486e-06, "loss": 0.9553, "step": 14992 }, { "epoch": 0.86, "grad_norm": 1.7248718738555908, "learning_rate": 1.0121862132749216e-06, "loss": 0.8873, "step": 14993 }, { "epoch": 0.86, "grad_norm": 1.6651725769042969, "learning_rate": 1.0113719968012403e-06, "loss": 0.889, "step": 14994 }, { "epoch": 0.86, "grad_norm": 2.003671169281006, "learning_rate": 1.0105580905000045e-06, "loss": 0.8715, "step": 14995 }, { "epoch": 0.86, "grad_norm": 1.5913702249526978, "learning_rate": 1.009744494399295e-06, "loss": 0.8059, "step": 14996 }, { "epoch": 0.86, "grad_norm": 1.9233933687210083, "learning_rate": 1.008931208527193e-06, "loss": 0.9002, "step": 14997 }, { "epoch": 0.86, "grad_norm": 1.8158527612686157, "learning_rate": 1.0081182329117566e-06, "loss": 0.8847, "step": 14998 }, { "epoch": 0.86, "grad_norm": 1.7748826742172241, "learning_rate": 1.007305567581045e-06, "loss": 0.8688, "step": 14999 }, { "epoch": 0.86, "grad_norm": 1.8594638109207153, "learning_rate": 1.0064932125630956e-06, "loss": 0.8767, "step": 15000 }, { "epoch": 0.86, "grad_norm": 1.7900792360305786, "learning_rate": 1.0056811678859458e-06, "loss": 0.8046, "step": 15001 }, { "epoch": 0.86, "grad_norm": 1.8237502574920654, "learning_rate": 1.0048694335776111e-06, "loss": 0.8702, "step": 15002 }, { "epoch": 0.86, "grad_norm": 1.8914153575897217, "learning_rate": 1.0040580096661079e-06, "loss": 0.9477, "step": 15003 }, { "epoch": 0.86, "grad_norm": 1.7254283428192139, "learning_rate": 1.0032468961794317e-06, "loss": 0.8188, "step": 15004 }, { "epoch": 0.86, "grad_norm": 1.8993984460830688, "learning_rate": 1.0024360931455735e-06, "loss": 0.8996, "step": 15005 }, { "epoch": 0.86, "grad_norm": 1.8457854986190796, "learning_rate": 1.0016256005925152e-06, "loss": 0.9287, "step": 15006 }, { "epoch": 0.86, "grad_norm": 1.9410896301269531, "learning_rate": 1.0008154185482178e-06, "loss": 0.9062, "step": 15007 }, { "epoch": 0.86, "grad_norm": 1.0482933521270752, "learning_rate": 1.0000055470406445e-06, "loss": 0.4759, "step": 15008 }, { "epoch": 0.86, "grad_norm": 1.9075489044189453, "learning_rate": 9.991959860977384e-07, "loss": 0.8627, "step": 15009 }, { "epoch": 0.86, "grad_norm": 1.7341303825378418, "learning_rate": 9.983867357474374e-07, "loss": 0.955, "step": 15010 }, { "epoch": 0.86, "grad_norm": 1.688016414642334, "learning_rate": 9.975777960176625e-07, "loss": 0.835, "step": 15011 }, { "epoch": 0.86, "grad_norm": 1.80156409740448, "learning_rate": 9.967691669363334e-07, "loss": 0.8995, "step": 15012 }, { "epoch": 0.86, "grad_norm": 1.6473296880722046, "learning_rate": 9.959608485313488e-07, "loss": 0.9735, "step": 15013 }, { "epoch": 0.86, "grad_norm": 1.7560105323791504, "learning_rate": 9.951528408306054e-07, "loss": 0.9356, "step": 15014 }, { "epoch": 0.86, "grad_norm": 1.7326314449310303, "learning_rate": 9.94345143861981e-07, "loss": 0.9622, "step": 15015 }, { "epoch": 0.86, "grad_norm": 1.6863282918930054, "learning_rate": 9.935377576533523e-07, "loss": 0.864, "step": 15016 }, { "epoch": 0.86, "grad_norm": 1.7707799673080444, "learning_rate": 9.927306822325745e-07, "loss": 0.9043, "step": 15017 }, { "epoch": 0.86, "grad_norm": 1.7395671606063843, "learning_rate": 9.919239176274998e-07, "loss": 0.9411, "step": 15018 }, { "epoch": 0.86, "grad_norm": 1.670168399810791, "learning_rate": 9.911174638659703e-07, "loss": 0.9057, "step": 15019 }, { "epoch": 0.86, "grad_norm": 1.8225750923156738, "learning_rate": 9.903113209758098e-07, "loss": 0.9408, "step": 15020 }, { "epoch": 0.86, "grad_norm": 1.8781756162643433, "learning_rate": 9.895054889848389e-07, "loss": 0.9022, "step": 15021 }, { "epoch": 0.86, "grad_norm": 1.8368732929229736, "learning_rate": 9.88699967920863e-07, "loss": 0.8923, "step": 15022 }, { "epoch": 0.86, "grad_norm": 1.8686180114746094, "learning_rate": 9.878947578116804e-07, "loss": 0.9404, "step": 15023 }, { "epoch": 0.86, "grad_norm": 1.8079357147216797, "learning_rate": 9.870898586850742e-07, "loss": 0.9248, "step": 15024 }, { "epoch": 0.86, "grad_norm": 1.8096425533294678, "learning_rate": 9.862852705688198e-07, "loss": 0.8643, "step": 15025 }, { "epoch": 0.86, "grad_norm": 1.7631109952926636, "learning_rate": 9.85480993490683e-07, "loss": 0.9202, "step": 15026 }, { "epoch": 0.86, "grad_norm": 1.845440149307251, "learning_rate": 9.846770274784168e-07, "loss": 0.9432, "step": 15027 }, { "epoch": 0.86, "grad_norm": 1.7919163703918457, "learning_rate": 9.838733725597615e-07, "loss": 0.9022, "step": 15028 }, { "epoch": 0.86, "grad_norm": 1.8412429094314575, "learning_rate": 9.830700287624528e-07, "loss": 0.8272, "step": 15029 }, { "epoch": 0.86, "grad_norm": 1.7344106435775757, "learning_rate": 9.822669961142074e-07, "loss": 0.8765, "step": 15030 }, { "epoch": 0.86, "grad_norm": 1.6600439548492432, "learning_rate": 9.814642746427394e-07, "loss": 0.906, "step": 15031 }, { "epoch": 0.86, "grad_norm": 1.6861951351165771, "learning_rate": 9.806618643757459e-07, "loss": 0.9848, "step": 15032 }, { "epoch": 0.86, "grad_norm": 1.814062476158142, "learning_rate": 9.79859765340918e-07, "loss": 0.8422, "step": 15033 }, { "epoch": 0.86, "grad_norm": 1.857266902923584, "learning_rate": 9.790579775659326e-07, "loss": 0.9027, "step": 15034 }, { "epoch": 0.86, "grad_norm": 1.6585924625396729, "learning_rate": 9.78256501078456e-07, "loss": 0.8907, "step": 15035 }, { "epoch": 0.86, "grad_norm": 1.982703447341919, "learning_rate": 9.77455335906149e-07, "loss": 0.8893, "step": 15036 }, { "epoch": 0.86, "grad_norm": 1.7560428380966187, "learning_rate": 9.766544820766522e-07, "loss": 0.9072, "step": 15037 }, { "epoch": 0.86, "grad_norm": 1.7224732637405396, "learning_rate": 9.75853939617606e-07, "loss": 0.9246, "step": 15038 }, { "epoch": 0.86, "grad_norm": 1.7476567029953003, "learning_rate": 9.750537085566302e-07, "loss": 0.9563, "step": 15039 }, { "epoch": 0.86, "grad_norm": 1.7501264810562134, "learning_rate": 9.74253788921342e-07, "loss": 0.9091, "step": 15040 }, { "epoch": 0.86, "grad_norm": 1.9733738899230957, "learning_rate": 9.734541807393428e-07, "loss": 0.8785, "step": 15041 }, { "epoch": 0.86, "grad_norm": 1.7972707748413086, "learning_rate": 9.726548840382256e-07, "loss": 0.94, "step": 15042 }, { "epoch": 0.86, "grad_norm": 1.8973783254623413, "learning_rate": 9.718558988455706e-07, "loss": 0.8888, "step": 15043 }, { "epoch": 0.86, "grad_norm": 1.8386483192443848, "learning_rate": 9.710572251889505e-07, "loss": 0.8885, "step": 15044 }, { "epoch": 0.86, "grad_norm": 1.8734389543533325, "learning_rate": 9.70258863095923e-07, "loss": 0.9636, "step": 15045 }, { "epoch": 0.86, "grad_norm": 0.9930318593978882, "learning_rate": 9.694608125940385e-07, "loss": 0.5489, "step": 15046 }, { "epoch": 0.86, "grad_norm": 1.852248191833496, "learning_rate": 9.686630737108372e-07, "loss": 0.8996, "step": 15047 }, { "epoch": 0.86, "grad_norm": 1.8760344982147217, "learning_rate": 9.678656464738433e-07, "loss": 0.9484, "step": 15048 }, { "epoch": 0.86, "grad_norm": 1.8614131212234497, "learning_rate": 9.670685309105786e-07, "loss": 0.8531, "step": 15049 }, { "epoch": 0.86, "grad_norm": 1.6949517726898193, "learning_rate": 9.662717270485432e-07, "loss": 0.802, "step": 15050 }, { "epoch": 0.86, "grad_norm": 1.816502571105957, "learning_rate": 9.654752349152384e-07, "loss": 0.893, "step": 15051 }, { "epoch": 0.86, "grad_norm": 1.8275442123413086, "learning_rate": 9.646790545381447e-07, "loss": 0.9257, "step": 15052 }, { "epoch": 0.86, "grad_norm": 1.7770590782165527, "learning_rate": 9.638831859447385e-07, "loss": 0.8659, "step": 15053 }, { "epoch": 0.86, "grad_norm": 1.8152029514312744, "learning_rate": 9.630876291624802e-07, "loss": 0.8958, "step": 15054 }, { "epoch": 0.86, "grad_norm": 1.7617536783218384, "learning_rate": 9.62292384218827e-07, "loss": 0.8945, "step": 15055 }, { "epoch": 0.86, "grad_norm": 2.1205124855041504, "learning_rate": 9.614974511412156e-07, "loss": 0.8867, "step": 15056 }, { "epoch": 0.86, "grad_norm": 1.732620120048523, "learning_rate": 9.60702829957082e-07, "loss": 0.8581, "step": 15057 }, { "epoch": 0.86, "grad_norm": 1.7106659412384033, "learning_rate": 9.599085206938397e-07, "loss": 0.8498, "step": 15058 }, { "epoch": 0.86, "grad_norm": 1.76889967918396, "learning_rate": 9.591145233789034e-07, "loss": 0.9676, "step": 15059 }, { "epoch": 0.86, "grad_norm": 1.727650761604309, "learning_rate": 9.583208380396714e-07, "loss": 0.7985, "step": 15060 }, { "epoch": 0.86, "grad_norm": 1.8442251682281494, "learning_rate": 9.575274647035282e-07, "loss": 0.9606, "step": 15061 }, { "epoch": 0.86, "grad_norm": 1.798671007156372, "learning_rate": 9.567344033978555e-07, "loss": 0.8341, "step": 15062 }, { "epoch": 0.86, "grad_norm": 1.0089762210845947, "learning_rate": 9.559416541500154e-07, "loss": 0.4918, "step": 15063 }, { "epoch": 0.86, "grad_norm": 1.8915917873382568, "learning_rate": 9.551492169873666e-07, "loss": 0.9208, "step": 15064 }, { "epoch": 0.86, "grad_norm": 1.8015803098678589, "learning_rate": 9.543570919372513e-07, "loss": 0.904, "step": 15065 }, { "epoch": 0.86, "grad_norm": 1.8134909868240356, "learning_rate": 9.535652790270067e-07, "loss": 0.9379, "step": 15066 }, { "epoch": 0.86, "grad_norm": 1.7277216911315918, "learning_rate": 9.527737782839519e-07, "loss": 0.9128, "step": 15067 }, { "epoch": 0.86, "grad_norm": 1.757851481437683, "learning_rate": 9.519825897354029e-07, "loss": 0.856, "step": 15068 }, { "epoch": 0.86, "grad_norm": 1.8581829071044922, "learning_rate": 9.51191713408659e-07, "loss": 0.9758, "step": 15069 }, { "epoch": 0.86, "grad_norm": 1.8184823989868164, "learning_rate": 9.504011493310128e-07, "loss": 0.8702, "step": 15070 }, { "epoch": 0.86, "grad_norm": 1.7623789310455322, "learning_rate": 9.496108975297447e-07, "loss": 0.8647, "step": 15071 }, { "epoch": 0.86, "grad_norm": 1.661779522895813, "learning_rate": 9.488209580321217e-07, "loss": 0.8028, "step": 15072 }, { "epoch": 0.86, "grad_norm": 1.7767212390899658, "learning_rate": 9.480313308654054e-07, "loss": 0.9565, "step": 15073 }, { "epoch": 0.86, "grad_norm": 1.0178263187408447, "learning_rate": 9.472420160568407e-07, "loss": 0.5721, "step": 15074 }, { "epoch": 0.86, "grad_norm": 1.6802396774291992, "learning_rate": 9.464530136336691e-07, "loss": 0.7907, "step": 15075 }, { "epoch": 0.86, "grad_norm": 1.7329049110412598, "learning_rate": 9.456643236231111e-07, "loss": 0.8673, "step": 15076 }, { "epoch": 0.86, "grad_norm": 1.6688580513000488, "learning_rate": 9.448759460523871e-07, "loss": 0.9539, "step": 15077 }, { "epoch": 0.86, "grad_norm": 1.9005753993988037, "learning_rate": 9.440878809486975e-07, "loss": 0.9538, "step": 15078 }, { "epoch": 0.86, "grad_norm": 1.6351451873779297, "learning_rate": 9.433001283392407e-07, "loss": 0.8418, "step": 15079 }, { "epoch": 0.86, "grad_norm": 1.8428897857666016, "learning_rate": 9.425126882511948e-07, "loss": 0.8815, "step": 15080 }, { "epoch": 0.86, "grad_norm": 1.722648024559021, "learning_rate": 9.417255607117382e-07, "loss": 0.9011, "step": 15081 }, { "epoch": 0.86, "grad_norm": 1.618863821029663, "learning_rate": 9.409387457480268e-07, "loss": 0.8018, "step": 15082 }, { "epoch": 0.87, "grad_norm": 1.7407630681991577, "learning_rate": 9.401522433872135e-07, "loss": 0.8597, "step": 15083 }, { "epoch": 0.87, "grad_norm": 1.6356356143951416, "learning_rate": 9.393660536564408e-07, "loss": 0.9266, "step": 15084 }, { "epoch": 0.87, "grad_norm": 1.8879754543304443, "learning_rate": 9.385801765828339e-07, "loss": 0.9055, "step": 15085 }, { "epoch": 0.87, "grad_norm": 1.6995222568511963, "learning_rate": 9.377946121935144e-07, "loss": 0.8397, "step": 15086 }, { "epoch": 0.87, "grad_norm": 1.8619149923324585, "learning_rate": 9.370093605155872e-07, "loss": 0.93, "step": 15087 }, { "epoch": 0.87, "grad_norm": 1.723923683166504, "learning_rate": 9.362244215761529e-07, "loss": 0.8559, "step": 15088 }, { "epoch": 0.87, "grad_norm": 1.7686301469802856, "learning_rate": 9.354397954022931e-07, "loss": 0.8448, "step": 15089 }, { "epoch": 0.87, "grad_norm": 1.7810715436935425, "learning_rate": 9.346554820210863e-07, "loss": 0.8413, "step": 15090 }, { "epoch": 0.87, "grad_norm": 1.7437270879745483, "learning_rate": 9.338714814595928e-07, "loss": 0.9076, "step": 15091 }, { "epoch": 0.87, "grad_norm": 1.8411232233047485, "learning_rate": 9.330877937448724e-07, "loss": 0.939, "step": 15092 }, { "epoch": 0.87, "grad_norm": 1.7314757108688354, "learning_rate": 9.323044189039632e-07, "loss": 0.9164, "step": 15093 }, { "epoch": 0.87, "grad_norm": 1.8791905641555786, "learning_rate": 9.315213569639004e-07, "loss": 0.9976, "step": 15094 }, { "epoch": 0.87, "grad_norm": 1.7264031171798706, "learning_rate": 9.307386079517022e-07, "loss": 0.8594, "step": 15095 }, { "epoch": 0.87, "grad_norm": 1.7713074684143066, "learning_rate": 9.299561718943829e-07, "loss": 0.8778, "step": 15096 }, { "epoch": 0.87, "grad_norm": 1.7188791036605835, "learning_rate": 9.291740488189383e-07, "loss": 0.7986, "step": 15097 }, { "epoch": 0.87, "grad_norm": 1.7765638828277588, "learning_rate": 9.283922387523603e-07, "loss": 0.8795, "step": 15098 }, { "epoch": 0.87, "grad_norm": 1.8715845346450806, "learning_rate": 9.27610741721624e-07, "loss": 0.9844, "step": 15099 }, { "epoch": 0.87, "grad_norm": 1.764789342880249, "learning_rate": 9.268295577536979e-07, "loss": 0.8705, "step": 15100 }, { "epoch": 0.87, "grad_norm": 1.7246286869049072, "learning_rate": 9.260486868755414e-07, "loss": 0.8275, "step": 15101 }, { "epoch": 0.87, "grad_norm": 1.606115460395813, "learning_rate": 9.252681291140953e-07, "loss": 0.8048, "step": 15102 }, { "epoch": 0.87, "grad_norm": 1.71206533908844, "learning_rate": 9.24487884496299e-07, "loss": 0.8757, "step": 15103 }, { "epoch": 0.87, "grad_norm": 1.7506659030914307, "learning_rate": 9.237079530490722e-07, "loss": 0.8756, "step": 15104 }, { "epoch": 0.87, "grad_norm": 1.6884684562683105, "learning_rate": 9.229283347993324e-07, "loss": 0.9052, "step": 15105 }, { "epoch": 0.87, "grad_norm": 1.730265498161316, "learning_rate": 9.221490297739777e-07, "loss": 0.8346, "step": 15106 }, { "epoch": 0.87, "grad_norm": 1.0522516965866089, "learning_rate": 9.213700379999036e-07, "loss": 0.5865, "step": 15107 }, { "epoch": 0.87, "grad_norm": 0.9417134523391724, "learning_rate": 9.205913595039883e-07, "loss": 0.5228, "step": 15108 }, { "epoch": 0.87, "grad_norm": 1.880674123764038, "learning_rate": 9.198129943131051e-07, "loss": 0.8776, "step": 15109 }, { "epoch": 0.87, "grad_norm": 1.8207496404647827, "learning_rate": 9.190349424541078e-07, "loss": 0.8326, "step": 15110 }, { "epoch": 0.87, "grad_norm": 1.8347065448760986, "learning_rate": 9.182572039538506e-07, "loss": 0.8712, "step": 15111 }, { "epoch": 0.87, "grad_norm": 1.8834484815597534, "learning_rate": 9.174797788391676e-07, "loss": 0.8954, "step": 15112 }, { "epoch": 0.87, "grad_norm": 1.8277864456176758, "learning_rate": 9.167026671368851e-07, "loss": 0.8122, "step": 15113 }, { "epoch": 0.87, "grad_norm": 1.6601061820983887, "learning_rate": 9.159258688738226e-07, "loss": 0.7879, "step": 15114 }, { "epoch": 0.87, "grad_norm": 1.8744237422943115, "learning_rate": 9.151493840767811e-07, "loss": 0.8523, "step": 15115 }, { "epoch": 0.87, "grad_norm": 1.925638198852539, "learning_rate": 9.143732127725591e-07, "loss": 0.8906, "step": 15116 }, { "epoch": 0.87, "grad_norm": 1.5890482664108276, "learning_rate": 9.135973549879351e-07, "loss": 0.8341, "step": 15117 }, { "epoch": 0.87, "grad_norm": 1.9622678756713867, "learning_rate": 9.128218107496878e-07, "loss": 0.8564, "step": 15118 }, { "epoch": 0.87, "grad_norm": 1.7663103342056274, "learning_rate": 9.120465800845723e-07, "loss": 0.9508, "step": 15119 }, { "epoch": 0.87, "grad_norm": 1.0400755405426025, "learning_rate": 9.112716630193463e-07, "loss": 0.487, "step": 15120 }, { "epoch": 0.87, "grad_norm": 1.796094536781311, "learning_rate": 9.10497059580745e-07, "loss": 0.9509, "step": 15121 }, { "epoch": 0.87, "grad_norm": 1.8268377780914307, "learning_rate": 9.097227697955003e-07, "loss": 0.9927, "step": 15122 }, { "epoch": 0.87, "grad_norm": 1.602809190750122, "learning_rate": 9.089487936903296e-07, "loss": 0.8955, "step": 15123 }, { "epoch": 0.87, "grad_norm": 1.638709306716919, "learning_rate": 9.081751312919406e-07, "loss": 0.9518, "step": 15124 }, { "epoch": 0.87, "grad_norm": 1.8332468271255493, "learning_rate": 9.074017826270332e-07, "loss": 0.9191, "step": 15125 }, { "epoch": 0.87, "grad_norm": 1.694878339767456, "learning_rate": 9.066287477222879e-07, "loss": 0.9124, "step": 15126 }, { "epoch": 0.87, "grad_norm": 1.8779165744781494, "learning_rate": 9.058560266043869e-07, "loss": 0.9178, "step": 15127 }, { "epoch": 0.87, "grad_norm": 1.6200839281082153, "learning_rate": 9.050836192999879e-07, "loss": 0.8678, "step": 15128 }, { "epoch": 0.87, "grad_norm": 1.5960053205490112, "learning_rate": 9.043115258357494e-07, "loss": 0.8739, "step": 15129 }, { "epoch": 0.87, "grad_norm": 1.6541297435760498, "learning_rate": 9.035397462383111e-07, "loss": 0.9939, "step": 15130 }, { "epoch": 0.87, "grad_norm": 1.6889852285385132, "learning_rate": 9.027682805343074e-07, "loss": 0.927, "step": 15131 }, { "epoch": 0.87, "grad_norm": 1.6429500579833984, "learning_rate": 9.01997128750357e-07, "loss": 0.8253, "step": 15132 }, { "epoch": 0.87, "grad_norm": 1.7418010234832764, "learning_rate": 9.012262909130732e-07, "loss": 0.8779, "step": 15133 }, { "epoch": 0.87, "grad_norm": 1.7570445537567139, "learning_rate": 9.004557670490522e-07, "loss": 0.8924, "step": 15134 }, { "epoch": 0.87, "grad_norm": 1.7874356508255005, "learning_rate": 8.99685557184885e-07, "loss": 0.9848, "step": 15135 }, { "epoch": 0.87, "grad_norm": 1.73018479347229, "learning_rate": 8.989156613471473e-07, "loss": 0.8521, "step": 15136 }, { "epoch": 0.87, "grad_norm": 1.5975689888000488, "learning_rate": 8.981460795624075e-07, "loss": 0.863, "step": 15137 }, { "epoch": 0.87, "grad_norm": 1.7245250940322876, "learning_rate": 8.973768118572234e-07, "loss": 0.882, "step": 15138 }, { "epoch": 0.87, "grad_norm": 1.7696150541305542, "learning_rate": 8.966078582581361e-07, "loss": 0.8726, "step": 15139 }, { "epoch": 0.87, "grad_norm": 1.6990641355514526, "learning_rate": 8.958392187916842e-07, "loss": 0.8661, "step": 15140 }, { "epoch": 0.87, "grad_norm": 2.181406259536743, "learning_rate": 8.950708934843876e-07, "loss": 0.9239, "step": 15141 }, { "epoch": 0.87, "grad_norm": 1.7135200500488281, "learning_rate": 8.94302882362762e-07, "loss": 0.9212, "step": 15142 }, { "epoch": 0.87, "grad_norm": 1.819069743156433, "learning_rate": 8.93535185453308e-07, "loss": 0.8345, "step": 15143 }, { "epoch": 0.87, "grad_norm": 1.9195365905761719, "learning_rate": 8.92767802782517e-07, "loss": 0.9481, "step": 15144 }, { "epoch": 0.87, "grad_norm": 1.87208092212677, "learning_rate": 8.920007343768689e-07, "loss": 0.9822, "step": 15145 }, { "epoch": 0.87, "grad_norm": 1.758432388305664, "learning_rate": 8.912339802628333e-07, "loss": 0.8813, "step": 15146 }, { "epoch": 0.87, "grad_norm": 1.6271404027938843, "learning_rate": 8.904675404668683e-07, "loss": 0.8822, "step": 15147 }, { "epoch": 0.87, "grad_norm": 1.6618837118148804, "learning_rate": 8.897014150154237e-07, "loss": 0.8927, "step": 15148 }, { "epoch": 0.87, "grad_norm": 1.6085976362228394, "learning_rate": 8.889356039349317e-07, "loss": 0.8504, "step": 15149 }, { "epoch": 0.87, "grad_norm": 1.740433931350708, "learning_rate": 8.881701072518223e-07, "loss": 0.871, "step": 15150 }, { "epoch": 0.87, "grad_norm": 1.8165339231491089, "learning_rate": 8.874049249925121e-07, "loss": 0.8814, "step": 15151 }, { "epoch": 0.87, "grad_norm": 1.7171921730041504, "learning_rate": 8.866400571833999e-07, "loss": 0.8754, "step": 15152 }, { "epoch": 0.87, "grad_norm": 1.7877271175384521, "learning_rate": 8.858755038508849e-07, "loss": 0.8934, "step": 15153 }, { "epoch": 0.87, "grad_norm": 1.6933064460754395, "learning_rate": 8.851112650213445e-07, "loss": 0.9077, "step": 15154 }, { "epoch": 0.87, "grad_norm": 1.8075060844421387, "learning_rate": 8.843473407211545e-07, "loss": 0.8545, "step": 15155 }, { "epoch": 0.87, "grad_norm": 1.7054356336593628, "learning_rate": 8.835837309766726e-07, "loss": 0.8384, "step": 15156 }, { "epoch": 0.87, "grad_norm": 1.9078304767608643, "learning_rate": 8.828204358142511e-07, "loss": 0.8444, "step": 15157 }, { "epoch": 0.87, "grad_norm": 1.974923849105835, "learning_rate": 8.820574552602279e-07, "loss": 0.9792, "step": 15158 }, { "epoch": 0.87, "grad_norm": 1.695038080215454, "learning_rate": 8.81294789340934e-07, "loss": 0.9137, "step": 15159 }, { "epoch": 0.87, "grad_norm": 1.7332298755645752, "learning_rate": 8.805324380826829e-07, "loss": 0.9554, "step": 15160 }, { "epoch": 0.87, "grad_norm": 1.6782861948013306, "learning_rate": 8.797704015117847e-07, "loss": 0.9144, "step": 15161 }, { "epoch": 0.87, "grad_norm": 1.7296689748764038, "learning_rate": 8.790086796545328e-07, "loss": 0.9229, "step": 15162 }, { "epoch": 0.87, "grad_norm": 1.762547492980957, "learning_rate": 8.782472725372138e-07, "loss": 0.9497, "step": 15163 }, { "epoch": 0.87, "grad_norm": 1.6892166137695312, "learning_rate": 8.774861801861001e-07, "loss": 0.9997, "step": 15164 }, { "epoch": 0.87, "grad_norm": 1.9292035102844238, "learning_rate": 8.767254026274563e-07, "loss": 0.9049, "step": 15165 }, { "epoch": 0.87, "grad_norm": 1.7236320972442627, "learning_rate": 8.759649398875325e-07, "loss": 0.8938, "step": 15166 }, { "epoch": 0.87, "grad_norm": 1.6645889282226562, "learning_rate": 8.752047919925722e-07, "loss": 0.9131, "step": 15167 }, { "epoch": 0.87, "grad_norm": 1.8525749444961548, "learning_rate": 8.744449589688064e-07, "loss": 0.8895, "step": 15168 }, { "epoch": 0.87, "grad_norm": 1.763445496559143, "learning_rate": 8.736854408424522e-07, "loss": 0.8549, "step": 15169 }, { "epoch": 0.87, "grad_norm": 1.6880347728729248, "learning_rate": 8.729262376397219e-07, "loss": 0.9102, "step": 15170 }, { "epoch": 0.87, "grad_norm": 2.0608956813812256, "learning_rate": 8.721673493868111e-07, "loss": 0.8427, "step": 15171 }, { "epoch": 0.87, "grad_norm": 1.8918505907058716, "learning_rate": 8.714087761099077e-07, "loss": 0.8568, "step": 15172 }, { "epoch": 0.87, "grad_norm": 1.645162582397461, "learning_rate": 8.706505178351865e-07, "loss": 0.8695, "step": 15173 }, { "epoch": 0.87, "grad_norm": 1.6317764520645142, "learning_rate": 8.698925745888165e-07, "loss": 0.8892, "step": 15174 }, { "epoch": 0.87, "grad_norm": 1.7027106285095215, "learning_rate": 8.691349463969467e-07, "loss": 0.8413, "step": 15175 }, { "epoch": 0.87, "grad_norm": 1.9443846940994263, "learning_rate": 8.683776332857274e-07, "loss": 0.9003, "step": 15176 }, { "epoch": 0.87, "grad_norm": 1.9066749811172485, "learning_rate": 8.676206352812844e-07, "loss": 0.9099, "step": 15177 }, { "epoch": 0.87, "grad_norm": 1.6307892799377441, "learning_rate": 8.668639524097444e-07, "loss": 0.9106, "step": 15178 }, { "epoch": 0.87, "grad_norm": 1.8742786645889282, "learning_rate": 8.661075846972177e-07, "loss": 0.8782, "step": 15179 }, { "epoch": 0.87, "grad_norm": 1.99409818649292, "learning_rate": 8.653515321698025e-07, "loss": 0.8777, "step": 15180 }, { "epoch": 0.87, "grad_norm": 1.911147117614746, "learning_rate": 8.64595794853591e-07, "loss": 0.9458, "step": 15181 }, { "epoch": 0.87, "grad_norm": 1.8192955255508423, "learning_rate": 8.63840372774658e-07, "loss": 0.931, "step": 15182 }, { "epoch": 0.87, "grad_norm": 1.830049753189087, "learning_rate": 8.630852659590749e-07, "loss": 0.8523, "step": 15183 }, { "epoch": 0.87, "grad_norm": 1.703125238418579, "learning_rate": 8.623304744328942e-07, "loss": 0.894, "step": 15184 }, { "epoch": 0.87, "grad_norm": 0.9576330184936523, "learning_rate": 8.61575998222166e-07, "loss": 0.5642, "step": 15185 }, { "epoch": 0.87, "grad_norm": 1.644372582435608, "learning_rate": 8.608218373529209e-07, "loss": 0.8432, "step": 15186 }, { "epoch": 0.87, "grad_norm": 1.6589711904525757, "learning_rate": 8.600679918511868e-07, "loss": 0.88, "step": 15187 }, { "epoch": 0.87, "grad_norm": 1.9155924320220947, "learning_rate": 8.593144617429727e-07, "loss": 0.8935, "step": 15188 }, { "epoch": 0.87, "grad_norm": 1.742495059967041, "learning_rate": 8.585612470542859e-07, "loss": 0.8478, "step": 15189 }, { "epoch": 0.87, "grad_norm": 1.8457953929901123, "learning_rate": 8.578083478111121e-07, "loss": 0.8878, "step": 15190 }, { "epoch": 0.87, "grad_norm": 1.741173505783081, "learning_rate": 8.570557640394351e-07, "loss": 0.8439, "step": 15191 }, { "epoch": 0.87, "grad_norm": 1.852959394454956, "learning_rate": 8.56303495765225e-07, "loss": 0.9241, "step": 15192 }, { "epoch": 0.87, "grad_norm": 1.8397605419158936, "learning_rate": 8.555515430144379e-07, "loss": 0.8774, "step": 15193 }, { "epoch": 0.87, "grad_norm": 1.769058346748352, "learning_rate": 8.547999058130252e-07, "loss": 0.8968, "step": 15194 }, { "epoch": 0.87, "grad_norm": 1.7914997339248657, "learning_rate": 8.540485841869195e-07, "loss": 0.8567, "step": 15195 }, { "epoch": 0.87, "grad_norm": 1.8630309104919434, "learning_rate": 8.532975781620511e-07, "loss": 0.9072, "step": 15196 }, { "epoch": 0.87, "grad_norm": 1.7516695261001587, "learning_rate": 8.525468877643316e-07, "loss": 1.0045, "step": 15197 }, { "epoch": 0.87, "grad_norm": 1.6907166242599487, "learning_rate": 8.517965130196681e-07, "loss": 0.871, "step": 15198 }, { "epoch": 0.87, "grad_norm": 1.8259077072143555, "learning_rate": 8.510464539539498e-07, "loss": 0.8816, "step": 15199 }, { "epoch": 0.87, "grad_norm": 1.5879443883895874, "learning_rate": 8.502967105930648e-07, "loss": 0.9223, "step": 15200 }, { "epoch": 0.87, "grad_norm": 1.8313791751861572, "learning_rate": 8.495472829628793e-07, "loss": 0.9014, "step": 15201 }, { "epoch": 0.87, "grad_norm": 1.8462830781936646, "learning_rate": 8.487981710892579e-07, "loss": 0.853, "step": 15202 }, { "epoch": 0.87, "grad_norm": 1.8247673511505127, "learning_rate": 8.480493749980468e-07, "loss": 0.9014, "step": 15203 }, { "epoch": 0.87, "grad_norm": 1.5998510122299194, "learning_rate": 8.473008947150873e-07, "loss": 0.824, "step": 15204 }, { "epoch": 0.87, "grad_norm": 0.997332751750946, "learning_rate": 8.465527302662091e-07, "loss": 0.5273, "step": 15205 }, { "epoch": 0.87, "grad_norm": 1.5914613008499146, "learning_rate": 8.458048816772246e-07, "loss": 0.7913, "step": 15206 }, { "epoch": 0.87, "grad_norm": 1.6336804628372192, "learning_rate": 8.450573489739445e-07, "loss": 0.7912, "step": 15207 }, { "epoch": 0.87, "grad_norm": 1.8204389810562134, "learning_rate": 8.443101321821601e-07, "loss": 0.8699, "step": 15208 }, { "epoch": 0.87, "grad_norm": 1.7686796188354492, "learning_rate": 8.435632313276587e-07, "loss": 0.9126, "step": 15209 }, { "epoch": 0.87, "grad_norm": 1.9280214309692383, "learning_rate": 8.428166464362119e-07, "loss": 0.9493, "step": 15210 }, { "epoch": 0.87, "grad_norm": 1.7088472843170166, "learning_rate": 8.420703775335848e-07, "loss": 0.8734, "step": 15211 }, { "epoch": 0.87, "grad_norm": 1.6044533252716064, "learning_rate": 8.413244246455255e-07, "loss": 0.9046, "step": 15212 }, { "epoch": 0.87, "grad_norm": 1.8642398118972778, "learning_rate": 8.405787877977778e-07, "loss": 0.865, "step": 15213 }, { "epoch": 0.87, "grad_norm": 1.808875560760498, "learning_rate": 8.39833467016069e-07, "loss": 0.9862, "step": 15214 }, { "epoch": 0.87, "grad_norm": 1.900651216506958, "learning_rate": 8.390884623261198e-07, "loss": 0.9192, "step": 15215 }, { "epoch": 0.87, "grad_norm": 1.8250494003295898, "learning_rate": 8.383437737536382e-07, "loss": 0.8297, "step": 15216 }, { "epoch": 0.87, "grad_norm": 1.6764636039733887, "learning_rate": 8.375994013243205e-07, "loss": 0.9021, "step": 15217 }, { "epoch": 0.87, "grad_norm": 1.7157115936279297, "learning_rate": 8.368553450638539e-07, "loss": 0.7863, "step": 15218 }, { "epoch": 0.87, "grad_norm": 1.7138679027557373, "learning_rate": 8.361116049979124e-07, "loss": 0.8739, "step": 15219 }, { "epoch": 0.87, "grad_norm": 3.0029304027557373, "learning_rate": 8.35368181152163e-07, "loss": 0.8971, "step": 15220 }, { "epoch": 0.87, "grad_norm": 1.6574784517288208, "learning_rate": 8.346250735522543e-07, "loss": 0.8902, "step": 15221 }, { "epoch": 0.87, "grad_norm": 1.8043324947357178, "learning_rate": 8.338822822238346e-07, "loss": 0.9497, "step": 15222 }, { "epoch": 0.87, "grad_norm": 1.6096408367156982, "learning_rate": 8.33139807192529e-07, "loss": 0.8967, "step": 15223 }, { "epoch": 0.87, "grad_norm": 1.7971632480621338, "learning_rate": 8.323976484839657e-07, "loss": 0.8081, "step": 15224 }, { "epoch": 0.87, "grad_norm": 1.8918967247009277, "learning_rate": 8.31655806123749e-07, "loss": 0.9406, "step": 15225 }, { "epoch": 0.87, "grad_norm": 2.0474677085876465, "learning_rate": 8.309142801374825e-07, "loss": 0.9861, "step": 15226 }, { "epoch": 0.87, "grad_norm": 1.6858329772949219, "learning_rate": 8.301730705507483e-07, "loss": 0.8984, "step": 15227 }, { "epoch": 0.87, "grad_norm": 1.6777770519256592, "learning_rate": 8.294321773891289e-07, "loss": 0.8557, "step": 15228 }, { "epoch": 0.87, "grad_norm": 1.7256790399551392, "learning_rate": 8.286916006781865e-07, "loss": 0.8595, "step": 15229 }, { "epoch": 0.87, "grad_norm": 1.7659342288970947, "learning_rate": 8.279513404434792e-07, "loss": 0.8736, "step": 15230 }, { "epoch": 0.87, "grad_norm": 1.8282809257507324, "learning_rate": 8.272113967105477e-07, "loss": 0.8682, "step": 15231 }, { "epoch": 0.87, "grad_norm": 1.0442488193511963, "learning_rate": 8.264717695049284e-07, "loss": 0.5211, "step": 15232 }, { "epoch": 0.87, "grad_norm": 1.88129460811615, "learning_rate": 8.257324588521454e-07, "loss": 0.9203, "step": 15233 }, { "epoch": 0.87, "grad_norm": 1.0429025888442993, "learning_rate": 8.249934647777058e-07, "loss": 0.5053, "step": 15234 }, { "epoch": 0.87, "grad_norm": 1.761221170425415, "learning_rate": 8.24254787307115e-07, "loss": 0.9238, "step": 15235 }, { "epoch": 0.87, "grad_norm": 1.5914571285247803, "learning_rate": 8.235164264658568e-07, "loss": 0.7877, "step": 15236 }, { "epoch": 0.87, "grad_norm": 1.6997902393341064, "learning_rate": 8.227783822794155e-07, "loss": 0.9062, "step": 15237 }, { "epoch": 0.87, "grad_norm": 1.7110528945922852, "learning_rate": 8.220406547732551e-07, "loss": 0.8518, "step": 15238 }, { "epoch": 0.87, "grad_norm": 1.6429951190948486, "learning_rate": 8.213032439728364e-07, "loss": 0.8685, "step": 15239 }, { "epoch": 0.87, "grad_norm": 1.8685827255249023, "learning_rate": 8.20566149903601e-07, "loss": 0.9036, "step": 15240 }, { "epoch": 0.87, "grad_norm": 1.7199651002883911, "learning_rate": 8.198293725909867e-07, "loss": 0.8735, "step": 15241 }, { "epoch": 0.87, "grad_norm": 1.9008992910385132, "learning_rate": 8.190929120604163e-07, "loss": 0.9161, "step": 15242 }, { "epoch": 0.87, "grad_norm": 1.880269169807434, "learning_rate": 8.183567683373062e-07, "loss": 0.9481, "step": 15243 }, { "epoch": 0.87, "grad_norm": 1.0379102230072021, "learning_rate": 8.176209414470526e-07, "loss": 0.5368, "step": 15244 }, { "epoch": 0.87, "grad_norm": 1.7062644958496094, "learning_rate": 8.16885431415052e-07, "loss": 0.8623, "step": 15245 }, { "epoch": 0.87, "grad_norm": 1.8536968231201172, "learning_rate": 8.161502382666841e-07, "loss": 0.8754, "step": 15246 }, { "epoch": 0.87, "grad_norm": 1.8501344919204712, "learning_rate": 8.154153620273153e-07, "loss": 0.897, "step": 15247 }, { "epoch": 0.87, "grad_norm": 1.8082139492034912, "learning_rate": 8.146808027223085e-07, "loss": 0.9115, "step": 15248 }, { "epoch": 0.87, "grad_norm": 1.7853705883026123, "learning_rate": 8.139465603770069e-07, "loss": 0.8958, "step": 15249 }, { "epoch": 0.87, "grad_norm": 1.682013988494873, "learning_rate": 8.132126350167513e-07, "loss": 0.8813, "step": 15250 }, { "epoch": 0.87, "grad_norm": 1.837856411933899, "learning_rate": 8.124790266668647e-07, "loss": 0.9178, "step": 15251 }, { "epoch": 0.87, "grad_norm": 1.8251163959503174, "learning_rate": 8.117457353526626e-07, "loss": 0.9806, "step": 15252 }, { "epoch": 0.87, "grad_norm": 1.8140475749969482, "learning_rate": 8.110127610994478e-07, "loss": 0.8961, "step": 15253 }, { "epoch": 0.87, "grad_norm": 1.8378630876541138, "learning_rate": 8.10280103932517e-07, "loss": 0.9992, "step": 15254 }, { "epoch": 0.87, "grad_norm": 1.7668818235397339, "learning_rate": 8.095477638771465e-07, "loss": 0.8274, "step": 15255 }, { "epoch": 0.87, "grad_norm": 1.733489751815796, "learning_rate": 8.088157409586094e-07, "loss": 0.8541, "step": 15256 }, { "epoch": 0.88, "grad_norm": 1.7906293869018555, "learning_rate": 8.080840352021702e-07, "loss": 0.8856, "step": 15257 }, { "epoch": 0.88, "grad_norm": 1.6362950801849365, "learning_rate": 8.073526466330716e-07, "loss": 0.9162, "step": 15258 }, { "epoch": 0.88, "grad_norm": 1.8058305978775024, "learning_rate": 8.06621575276556e-07, "loss": 0.9176, "step": 15259 }, { "epoch": 0.88, "grad_norm": 1.8573954105377197, "learning_rate": 8.058908211578476e-07, "loss": 0.9214, "step": 15260 }, { "epoch": 0.88, "grad_norm": 1.6094926595687866, "learning_rate": 8.05160384302166e-07, "loss": 0.9377, "step": 15261 }, { "epoch": 0.88, "grad_norm": 1.8726974725723267, "learning_rate": 8.044302647347135e-07, "loss": 0.8235, "step": 15262 }, { "epoch": 0.88, "grad_norm": 1.7805413007736206, "learning_rate": 8.037004624806866e-07, "loss": 0.9444, "step": 15263 }, { "epoch": 0.88, "grad_norm": 1.680458664894104, "learning_rate": 8.029709775652672e-07, "loss": 0.8491, "step": 15264 }, { "epoch": 0.88, "grad_norm": 1.614868402481079, "learning_rate": 8.022418100136298e-07, "loss": 0.8931, "step": 15265 }, { "epoch": 0.88, "grad_norm": 1.6888545751571655, "learning_rate": 8.01512959850933e-07, "loss": 0.8708, "step": 15266 }, { "epoch": 0.88, "grad_norm": 1.7408480644226074, "learning_rate": 8.007844271023301e-07, "loss": 0.8794, "step": 15267 }, { "epoch": 0.88, "grad_norm": 1.7664724588394165, "learning_rate": 8.000562117929589e-07, "loss": 0.8884, "step": 15268 }, { "epoch": 0.88, "grad_norm": 1.9032137393951416, "learning_rate": 7.993283139479479e-07, "loss": 0.8686, "step": 15269 }, { "epoch": 0.88, "grad_norm": 1.7440332174301147, "learning_rate": 7.986007335924184e-07, "loss": 0.8824, "step": 15270 }, { "epoch": 0.88, "grad_norm": 1.8190518617630005, "learning_rate": 7.978734707514724e-07, "loss": 0.9718, "step": 15271 }, { "epoch": 0.88, "grad_norm": 1.6874903440475464, "learning_rate": 7.971465254502097e-07, "loss": 0.9005, "step": 15272 }, { "epoch": 0.88, "grad_norm": 1.0436069965362549, "learning_rate": 7.964198977137116e-07, "loss": 0.513, "step": 15273 }, { "epoch": 0.88, "grad_norm": 1.8280973434448242, "learning_rate": 7.956935875670547e-07, "loss": 0.8382, "step": 15274 }, { "epoch": 0.88, "grad_norm": 1.0568522214889526, "learning_rate": 7.949675950352998e-07, "loss": 0.5608, "step": 15275 }, { "epoch": 0.88, "grad_norm": 1.754268765449524, "learning_rate": 7.942419201435014e-07, "loss": 0.8751, "step": 15276 }, { "epoch": 0.88, "grad_norm": 1.7185472249984741, "learning_rate": 7.935165629166974e-07, "loss": 0.8699, "step": 15277 }, { "epoch": 0.88, "grad_norm": 1.7946736812591553, "learning_rate": 7.927915233799221e-07, "loss": 0.8884, "step": 15278 }, { "epoch": 0.88, "grad_norm": 1.730396032333374, "learning_rate": 7.92066801558189e-07, "loss": 0.8469, "step": 15279 }, { "epoch": 0.88, "grad_norm": 1.8024253845214844, "learning_rate": 7.913423974765111e-07, "loss": 0.9022, "step": 15280 }, { "epoch": 0.88, "grad_norm": 1.9227482080459595, "learning_rate": 7.906183111598831e-07, "loss": 0.8795, "step": 15281 }, { "epoch": 0.88, "grad_norm": 1.9201732873916626, "learning_rate": 7.898945426332905e-07, "loss": 0.8677, "step": 15282 }, { "epoch": 0.88, "grad_norm": 0.964017391204834, "learning_rate": 7.891710919217133e-07, "loss": 0.5305, "step": 15283 }, { "epoch": 0.88, "grad_norm": 1.7004503011703491, "learning_rate": 7.884479590501093e-07, "loss": 0.9149, "step": 15284 }, { "epoch": 0.88, "grad_norm": 1.8003718852996826, "learning_rate": 7.877251440434363e-07, "loss": 0.9443, "step": 15285 }, { "epoch": 0.88, "grad_norm": 1.7350023984909058, "learning_rate": 7.870026469266334e-07, "loss": 0.8812, "step": 15286 }, { "epoch": 0.88, "grad_norm": 1.770645260810852, "learning_rate": 7.86280467724636e-07, "loss": 0.9295, "step": 15287 }, { "epoch": 0.88, "grad_norm": 1.8011971712112427, "learning_rate": 7.85558606462361e-07, "loss": 0.8509, "step": 15288 }, { "epoch": 0.88, "grad_norm": 1.7753578424453735, "learning_rate": 7.848370631647184e-07, "loss": 0.8769, "step": 15289 }, { "epoch": 0.88, "grad_norm": 1.6029423475265503, "learning_rate": 7.84115837856606e-07, "loss": 0.8389, "step": 15290 }, { "epoch": 0.88, "grad_norm": 1.6923832893371582, "learning_rate": 7.833949305629163e-07, "loss": 0.9069, "step": 15291 }, { "epoch": 0.88, "grad_norm": 2.0122928619384766, "learning_rate": 7.826743413085192e-07, "loss": 0.8846, "step": 15292 }, { "epoch": 0.88, "grad_norm": 1.7595362663269043, "learning_rate": 7.819540701182848e-07, "loss": 0.8581, "step": 15293 }, { "epoch": 0.88, "grad_norm": 1.6417657136917114, "learning_rate": 7.812341170170646e-07, "loss": 0.9159, "step": 15294 }, { "epoch": 0.88, "grad_norm": 1.5641648769378662, "learning_rate": 7.80514482029704e-07, "loss": 0.8807, "step": 15295 }, { "epoch": 0.88, "grad_norm": 1.8082481622695923, "learning_rate": 7.797951651810343e-07, "loss": 0.9057, "step": 15296 }, { "epoch": 0.88, "grad_norm": 1.857820749282837, "learning_rate": 7.790761664958791e-07, "loss": 0.8962, "step": 15297 }, { "epoch": 0.88, "grad_norm": 1.7280539274215698, "learning_rate": 7.783574859990461e-07, "loss": 0.9306, "step": 15298 }, { "epoch": 0.88, "grad_norm": 1.8621299266815186, "learning_rate": 7.776391237153369e-07, "loss": 0.894, "step": 15299 }, { "epoch": 0.88, "grad_norm": 2.017876148223877, "learning_rate": 7.769210796695415e-07, "loss": 0.9237, "step": 15300 }, { "epoch": 0.88, "grad_norm": 1.8639944791793823, "learning_rate": 7.762033538864344e-07, "loss": 0.783, "step": 15301 }, { "epoch": 0.88, "grad_norm": 1.844602108001709, "learning_rate": 7.75485946390786e-07, "loss": 0.8771, "step": 15302 }, { "epoch": 0.88, "grad_norm": 1.8975077867507935, "learning_rate": 7.747688572073475e-07, "loss": 0.9442, "step": 15303 }, { "epoch": 0.88, "grad_norm": 1.7101982831954956, "learning_rate": 7.740520863608681e-07, "loss": 0.8695, "step": 15304 }, { "epoch": 0.88, "grad_norm": 1.7556662559509277, "learning_rate": 7.733356338760778e-07, "loss": 0.8965, "step": 15305 }, { "epoch": 0.88, "grad_norm": 1.7240653038024902, "learning_rate": 7.726194997777036e-07, "loss": 0.8471, "step": 15306 }, { "epoch": 0.88, "grad_norm": 1.7658778429031372, "learning_rate": 7.719036840904525e-07, "loss": 0.9852, "step": 15307 }, { "epoch": 0.88, "grad_norm": 1.7187371253967285, "learning_rate": 7.711881868390292e-07, "loss": 0.8245, "step": 15308 }, { "epoch": 0.88, "grad_norm": 1.834443211555481, "learning_rate": 7.704730080481205e-07, "loss": 0.8566, "step": 15309 }, { "epoch": 0.88, "grad_norm": 1.628485083580017, "learning_rate": 7.697581477424055e-07, "loss": 1.0261, "step": 15310 }, { "epoch": 0.88, "grad_norm": 1.8690414428710938, "learning_rate": 7.690436059465567e-07, "loss": 0.9066, "step": 15311 }, { "epoch": 0.88, "grad_norm": 1.736380934715271, "learning_rate": 7.683293826852245e-07, "loss": 0.859, "step": 15312 }, { "epoch": 0.88, "grad_norm": 1.8093090057373047, "learning_rate": 7.676154779830591e-07, "loss": 0.8795, "step": 15313 }, { "epoch": 0.88, "grad_norm": 1.7221871614456177, "learning_rate": 7.669018918646932e-07, "loss": 0.8856, "step": 15314 }, { "epoch": 0.88, "grad_norm": 1.830049991607666, "learning_rate": 7.661886243547534e-07, "loss": 0.9, "step": 15315 }, { "epoch": 0.88, "grad_norm": 1.6073338985443115, "learning_rate": 7.654756754778481e-07, "loss": 0.8534, "step": 15316 }, { "epoch": 0.88, "grad_norm": 1.694115161895752, "learning_rate": 7.64763045258583e-07, "loss": 0.9407, "step": 15317 }, { "epoch": 0.88, "grad_norm": 1.664668321609497, "learning_rate": 7.640507337215463e-07, "loss": 0.8868, "step": 15318 }, { "epoch": 0.88, "grad_norm": 1.7007136344909668, "learning_rate": 7.633387408913207e-07, "loss": 0.9312, "step": 15319 }, { "epoch": 0.88, "grad_norm": 1.8537704944610596, "learning_rate": 7.626270667924728e-07, "loss": 0.9057, "step": 15320 }, { "epoch": 0.88, "grad_norm": 1.0101597309112549, "learning_rate": 7.619157114495623e-07, "loss": 0.4836, "step": 15321 }, { "epoch": 0.88, "grad_norm": 1.8031200170516968, "learning_rate": 7.612046748871327e-07, "loss": 0.8791, "step": 15322 }, { "epoch": 0.88, "grad_norm": 1.7335458993911743, "learning_rate": 7.604939571297232e-07, "loss": 0.8428, "step": 15323 }, { "epoch": 0.88, "grad_norm": 1.6624542474746704, "learning_rate": 7.597835582018586e-07, "loss": 0.8616, "step": 15324 }, { "epoch": 0.88, "grad_norm": 1.754930019378662, "learning_rate": 7.590734781280506e-07, "loss": 0.9545, "step": 15325 }, { "epoch": 0.88, "grad_norm": 1.8274152278900146, "learning_rate": 7.583637169328062e-07, "loss": 0.9444, "step": 15326 }, { "epoch": 0.88, "grad_norm": 1.8776918649673462, "learning_rate": 7.576542746406112e-07, "loss": 0.8457, "step": 15327 }, { "epoch": 0.88, "grad_norm": 1.7671899795532227, "learning_rate": 7.569451512759518e-07, "loss": 0.8489, "step": 15328 }, { "epoch": 0.88, "grad_norm": 1.9043998718261719, "learning_rate": 7.562363468632949e-07, "loss": 0.9008, "step": 15329 }, { "epoch": 0.88, "grad_norm": 1.903128743171692, "learning_rate": 7.555278614271011e-07, "loss": 0.8482, "step": 15330 }, { "epoch": 0.88, "grad_norm": 1.8257449865341187, "learning_rate": 7.548196949918152e-07, "loss": 0.8914, "step": 15331 }, { "epoch": 0.88, "grad_norm": 1.7120627164840698, "learning_rate": 7.541118475818787e-07, "loss": 0.8215, "step": 15332 }, { "epoch": 0.88, "grad_norm": 1.6204255819320679, "learning_rate": 7.534043192217133e-07, "loss": 0.9274, "step": 15333 }, { "epoch": 0.88, "grad_norm": 1.7735533714294434, "learning_rate": 7.526971099357372e-07, "loss": 0.8458, "step": 15334 }, { "epoch": 0.88, "grad_norm": 1.703644037246704, "learning_rate": 7.519902197483508e-07, "loss": 0.8486, "step": 15335 }, { "epoch": 0.88, "grad_norm": 1.8146334886550903, "learning_rate": 7.512836486839492e-07, "loss": 0.8954, "step": 15336 }, { "epoch": 0.88, "grad_norm": 2.004289150238037, "learning_rate": 7.50577396766915e-07, "loss": 0.8612, "step": 15337 }, { "epoch": 0.88, "grad_norm": 0.9826062917709351, "learning_rate": 7.498714640216154e-07, "loss": 0.5011, "step": 15338 }, { "epoch": 0.88, "grad_norm": 1.919113278388977, "learning_rate": 7.491658504724142e-07, "loss": 0.9769, "step": 15339 }, { "epoch": 0.88, "grad_norm": 1.7631869316101074, "learning_rate": 7.484605561436575e-07, "loss": 0.8657, "step": 15340 }, { "epoch": 0.88, "grad_norm": 1.0938211679458618, "learning_rate": 7.477555810596848e-07, "loss": 0.4978, "step": 15341 }, { "epoch": 0.88, "grad_norm": 1.8437411785125732, "learning_rate": 7.470509252448199e-07, "loss": 0.9147, "step": 15342 }, { "epoch": 0.88, "grad_norm": 1.836773157119751, "learning_rate": 7.463465887233834e-07, "loss": 0.8811, "step": 15343 }, { "epoch": 0.88, "grad_norm": 1.7091957330703735, "learning_rate": 7.456425715196747e-07, "loss": 0.8978, "step": 15344 }, { "epoch": 0.88, "grad_norm": 1.8344340324401855, "learning_rate": 7.44938873657991e-07, "loss": 0.8608, "step": 15345 }, { "epoch": 0.88, "grad_norm": 1.8668969869613647, "learning_rate": 7.44235495162613e-07, "loss": 0.8088, "step": 15346 }, { "epoch": 0.88, "grad_norm": 1.6947628259658813, "learning_rate": 7.435324360578122e-07, "loss": 0.9283, "step": 15347 }, { "epoch": 0.88, "grad_norm": 1.8333630561828613, "learning_rate": 7.428296963678527e-07, "loss": 0.888, "step": 15348 }, { "epoch": 0.88, "grad_norm": 1.9681718349456787, "learning_rate": 7.421272761169795e-07, "loss": 0.9329, "step": 15349 }, { "epoch": 0.88, "grad_norm": 1.6789928674697876, "learning_rate": 7.414251753294344e-07, "loss": 0.847, "step": 15350 }, { "epoch": 0.88, "grad_norm": 1.9197742938995361, "learning_rate": 7.407233940294422e-07, "loss": 0.8764, "step": 15351 }, { "epoch": 0.88, "grad_norm": 1.6077734231948853, "learning_rate": 7.400219322412239e-07, "loss": 0.9421, "step": 15352 }, { "epoch": 0.88, "grad_norm": 1.6395260095596313, "learning_rate": 7.393207899889787e-07, "loss": 0.822, "step": 15353 }, { "epoch": 0.88, "grad_norm": 1.7577667236328125, "learning_rate": 7.386199672969063e-07, "loss": 0.9119, "step": 15354 }, { "epoch": 0.88, "grad_norm": 1.912465214729309, "learning_rate": 7.379194641891874e-07, "loss": 0.8642, "step": 15355 }, { "epoch": 0.88, "grad_norm": 1.8100286722183228, "learning_rate": 7.372192806899947e-07, "loss": 0.8424, "step": 15356 }, { "epoch": 0.88, "grad_norm": 2.0536580085754395, "learning_rate": 7.365194168234902e-07, "loss": 0.8748, "step": 15357 }, { "epoch": 0.88, "grad_norm": 1.845513939857483, "learning_rate": 7.358198726138255e-07, "loss": 1.0209, "step": 15358 }, { "epoch": 0.88, "grad_norm": 1.7771759033203125, "learning_rate": 7.35120648085137e-07, "loss": 0.7919, "step": 15359 }, { "epoch": 0.88, "grad_norm": 1.8251824378967285, "learning_rate": 7.344217432615564e-07, "loss": 0.8886, "step": 15360 }, { "epoch": 0.88, "grad_norm": 1.6980317831039429, "learning_rate": 7.337231581671977e-07, "loss": 0.8138, "step": 15361 }, { "epoch": 0.88, "grad_norm": 1.7436755895614624, "learning_rate": 7.330248928261697e-07, "loss": 1.0136, "step": 15362 }, { "epoch": 0.88, "grad_norm": 1.0328305959701538, "learning_rate": 7.32326947262565e-07, "loss": 0.5768, "step": 15363 }, { "epoch": 0.88, "grad_norm": 1.8750241994857788, "learning_rate": 7.316293215004689e-07, "loss": 0.9608, "step": 15364 }, { "epoch": 0.88, "grad_norm": 1.871216893196106, "learning_rate": 7.309320155639565e-07, "loss": 0.9215, "step": 15365 }, { "epoch": 0.88, "grad_norm": 2.061514377593994, "learning_rate": 7.302350294770866e-07, "loss": 0.8723, "step": 15366 }, { "epoch": 0.88, "grad_norm": 1.7541924715042114, "learning_rate": 7.29538363263913e-07, "loss": 0.8933, "step": 15367 }, { "epoch": 0.88, "grad_norm": 1.8216084241867065, "learning_rate": 7.288420169484734e-07, "loss": 0.8812, "step": 15368 }, { "epoch": 0.88, "grad_norm": 1.685610294342041, "learning_rate": 7.281459905547994e-07, "loss": 0.8764, "step": 15369 }, { "epoch": 0.88, "grad_norm": 1.8366074562072754, "learning_rate": 7.274502841069053e-07, "loss": 0.8876, "step": 15370 }, { "epoch": 0.88, "grad_norm": 1.7516920566558838, "learning_rate": 7.267548976288019e-07, "loss": 0.8532, "step": 15371 }, { "epoch": 0.88, "grad_norm": 1.838331937789917, "learning_rate": 7.260598311444822e-07, "loss": 0.9454, "step": 15372 }, { "epoch": 0.88, "grad_norm": 1.7801814079284668, "learning_rate": 7.253650846779325e-07, "loss": 0.8898, "step": 15373 }, { "epoch": 0.88, "grad_norm": 1.8323547840118408, "learning_rate": 7.246706582531249e-07, "loss": 0.9131, "step": 15374 }, { "epoch": 0.88, "grad_norm": 1.8539369106292725, "learning_rate": 7.239765518940256e-07, "loss": 0.9305, "step": 15375 }, { "epoch": 0.88, "grad_norm": 1.696033000946045, "learning_rate": 7.232827656245823e-07, "loss": 0.9876, "step": 15376 }, { "epoch": 0.88, "grad_norm": 1.7499042749404907, "learning_rate": 7.225892994687367e-07, "loss": 0.9156, "step": 15377 }, { "epoch": 0.88, "grad_norm": 1.9029293060302734, "learning_rate": 7.218961534504209e-07, "loss": 0.8594, "step": 15378 }, { "epoch": 0.88, "grad_norm": 1.0207233428955078, "learning_rate": 7.212033275935493e-07, "loss": 0.5412, "step": 15379 }, { "epoch": 0.88, "grad_norm": 1.8558810949325562, "learning_rate": 7.205108219220336e-07, "loss": 0.9017, "step": 15380 }, { "epoch": 0.88, "grad_norm": 1.7028923034667969, "learning_rate": 7.19818636459767e-07, "loss": 0.891, "step": 15381 }, { "epoch": 0.88, "grad_norm": 1.8055140972137451, "learning_rate": 7.191267712306372e-07, "loss": 0.7845, "step": 15382 }, { "epoch": 0.88, "grad_norm": 1.0377800464630127, "learning_rate": 7.184352262585159e-07, "loss": 0.5333, "step": 15383 }, { "epoch": 0.88, "grad_norm": 1.8510552644729614, "learning_rate": 7.177440015672699e-07, "loss": 0.8386, "step": 15384 }, { "epoch": 0.88, "grad_norm": 1.8055551052093506, "learning_rate": 7.170530971807477e-07, "loss": 0.9669, "step": 15385 }, { "epoch": 0.88, "grad_norm": 1.8047500848770142, "learning_rate": 7.163625131227936e-07, "loss": 0.9222, "step": 15386 }, { "epoch": 0.88, "grad_norm": 1.619687557220459, "learning_rate": 7.156722494172352e-07, "loss": 0.8661, "step": 15387 }, { "epoch": 0.88, "grad_norm": 1.8669445514678955, "learning_rate": 7.149823060878946e-07, "loss": 0.9242, "step": 15388 }, { "epoch": 0.88, "grad_norm": 1.6497832536697388, "learning_rate": 7.142926831585761e-07, "loss": 0.9129, "step": 15389 }, { "epoch": 0.88, "grad_norm": 1.6180696487426758, "learning_rate": 7.136033806530784e-07, "loss": 0.8539, "step": 15390 }, { "epoch": 0.88, "grad_norm": 1.9061260223388672, "learning_rate": 7.129143985951892e-07, "loss": 0.9253, "step": 15391 }, { "epoch": 0.88, "grad_norm": 1.6800200939178467, "learning_rate": 7.122257370086793e-07, "loss": 0.8556, "step": 15392 }, { "epoch": 0.88, "grad_norm": 1.700907826423645, "learning_rate": 7.115373959173177e-07, "loss": 0.8428, "step": 15393 }, { "epoch": 0.88, "grad_norm": 1.8817559480667114, "learning_rate": 7.10849375344852e-07, "loss": 0.838, "step": 15394 }, { "epoch": 0.88, "grad_norm": 1.793295979499817, "learning_rate": 7.101616753150275e-07, "loss": 0.8888, "step": 15395 }, { "epoch": 0.88, "grad_norm": 1.8626763820648193, "learning_rate": 7.094742958515722e-07, "loss": 0.8945, "step": 15396 }, { "epoch": 0.88, "grad_norm": 1.722426176071167, "learning_rate": 7.08787236978209e-07, "loss": 0.8826, "step": 15397 }, { "epoch": 0.88, "grad_norm": 1.987001895904541, "learning_rate": 7.081004987186424e-07, "loss": 0.8784, "step": 15398 }, { "epoch": 0.88, "grad_norm": 1.7001419067382812, "learning_rate": 7.074140810965724e-07, "loss": 0.9045, "step": 15399 }, { "epoch": 0.88, "grad_norm": 1.9331961870193481, "learning_rate": 7.067279841356844e-07, "loss": 0.8307, "step": 15400 }, { "epoch": 0.88, "grad_norm": 1.7904248237609863, "learning_rate": 7.060422078596529e-07, "loss": 0.9443, "step": 15401 }, { "epoch": 0.88, "grad_norm": 1.7708702087402344, "learning_rate": 7.053567522921457e-07, "loss": 0.8315, "step": 15402 }, { "epoch": 0.88, "grad_norm": 1.8293017148971558, "learning_rate": 7.046716174568114e-07, "loss": 0.8291, "step": 15403 }, { "epoch": 0.88, "grad_norm": 1.6780261993408203, "learning_rate": 7.039868033772956e-07, "loss": 0.8253, "step": 15404 }, { "epoch": 0.88, "grad_norm": 1.8134949207305908, "learning_rate": 7.033023100772262e-07, "loss": 0.8511, "step": 15405 }, { "epoch": 0.88, "grad_norm": 1.6957945823669434, "learning_rate": 7.026181375802266e-07, "loss": 0.8699, "step": 15406 }, { "epoch": 0.88, "grad_norm": 1.9956048727035522, "learning_rate": 7.019342859099032e-07, "loss": 0.8516, "step": 15407 }, { "epoch": 0.88, "grad_norm": 1.7115371227264404, "learning_rate": 7.012507550898551e-07, "loss": 0.9003, "step": 15408 }, { "epoch": 0.88, "grad_norm": 1.6581414937973022, "learning_rate": 7.005675451436667e-07, "loss": 0.8825, "step": 15409 }, { "epoch": 0.88, "grad_norm": 1.609484314918518, "learning_rate": 6.998846560949168e-07, "loss": 0.8471, "step": 15410 }, { "epoch": 0.88, "grad_norm": 1.700181007385254, "learning_rate": 6.992020879671679e-07, "loss": 0.9138, "step": 15411 }, { "epoch": 0.88, "grad_norm": 1.7253769636154175, "learning_rate": 6.985198407839755e-07, "loss": 0.9419, "step": 15412 }, { "epoch": 0.88, "grad_norm": 1.7705073356628418, "learning_rate": 6.978379145688785e-07, "loss": 0.9007, "step": 15413 }, { "epoch": 0.88, "grad_norm": 1.8615552186965942, "learning_rate": 6.971563093454114e-07, "loss": 0.8839, "step": 15414 }, { "epoch": 0.88, "grad_norm": 1.7783173322677612, "learning_rate": 6.964750251370945e-07, "loss": 0.8425, "step": 15415 }, { "epoch": 0.88, "grad_norm": 1.7722424268722534, "learning_rate": 6.957940619674352e-07, "loss": 0.8919, "step": 15416 }, { "epoch": 0.88, "grad_norm": 1.8861054182052612, "learning_rate": 6.951134198599341e-07, "loss": 0.9458, "step": 15417 }, { "epoch": 0.88, "grad_norm": 1.8971647024154663, "learning_rate": 6.944330988380743e-07, "loss": 0.8908, "step": 15418 }, { "epoch": 0.88, "grad_norm": 1.8293219804763794, "learning_rate": 6.93753098925336e-07, "loss": 0.857, "step": 15419 }, { "epoch": 0.88, "grad_norm": 1.776326060295105, "learning_rate": 6.930734201451817e-07, "loss": 0.8946, "step": 15420 }, { "epoch": 0.88, "grad_norm": 2.0064682960510254, "learning_rate": 6.923940625210668e-07, "loss": 0.9297, "step": 15421 }, { "epoch": 0.88, "grad_norm": 1.7492276430130005, "learning_rate": 6.917150260764293e-07, "loss": 0.8724, "step": 15422 }, { "epoch": 0.88, "grad_norm": 1.7210890054702759, "learning_rate": 6.910363108347084e-07, "loss": 0.9525, "step": 15423 }, { "epoch": 0.88, "grad_norm": 1.9671790599822998, "learning_rate": 6.903579168193197e-07, "loss": 0.8913, "step": 15424 }, { "epoch": 0.88, "grad_norm": 1.7515835762023926, "learning_rate": 6.896798440536744e-07, "loss": 0.9254, "step": 15425 }, { "epoch": 0.88, "grad_norm": 1.7188644409179688, "learning_rate": 6.890020925611696e-07, "loss": 0.9043, "step": 15426 }, { "epoch": 0.88, "grad_norm": 1.7102930545806885, "learning_rate": 6.883246623651951e-07, "loss": 0.8841, "step": 15427 }, { "epoch": 0.88, "grad_norm": 1.7879194021224976, "learning_rate": 6.876475534891236e-07, "loss": 0.9297, "step": 15428 }, { "epoch": 0.88, "grad_norm": 1.7796441316604614, "learning_rate": 6.86970765956323e-07, "loss": 0.8203, "step": 15429 }, { "epoch": 0.88, "grad_norm": 1.8255034685134888, "learning_rate": 6.862942997901456e-07, "loss": 0.8743, "step": 15430 }, { "epoch": 0.88, "grad_norm": 1.7486450672149658, "learning_rate": 6.856181550139341e-07, "loss": 0.8166, "step": 15431 }, { "epoch": 0.89, "grad_norm": 1.8995726108551025, "learning_rate": 6.849423316510239e-07, "loss": 0.9474, "step": 15432 }, { "epoch": 0.89, "grad_norm": 1.7616682052612305, "learning_rate": 6.842668297247312e-07, "loss": 0.8631, "step": 15433 }, { "epoch": 0.89, "grad_norm": 1.1272461414337158, "learning_rate": 6.835916492583694e-07, "loss": 0.5383, "step": 15434 }, { "epoch": 0.89, "grad_norm": 1.7339755296707153, "learning_rate": 6.829167902752342e-07, "loss": 0.9032, "step": 15435 }, { "epoch": 0.89, "grad_norm": 3.1219072341918945, "learning_rate": 6.822422527986161e-07, "loss": 0.9946, "step": 15436 }, { "epoch": 0.89, "grad_norm": 1.712729811668396, "learning_rate": 6.815680368517874e-07, "loss": 0.8669, "step": 15437 }, { "epoch": 0.89, "grad_norm": 1.9047094583511353, "learning_rate": 6.808941424580184e-07, "loss": 0.9317, "step": 15438 }, { "epoch": 0.89, "grad_norm": 1.7539697885513306, "learning_rate": 6.802205696405584e-07, "loss": 0.8974, "step": 15439 }, { "epoch": 0.89, "grad_norm": 1.8198806047439575, "learning_rate": 6.795473184226542e-07, "loss": 0.9215, "step": 15440 }, { "epoch": 0.89, "grad_norm": 1.8968397378921509, "learning_rate": 6.788743888275351e-07, "loss": 0.927, "step": 15441 }, { "epoch": 0.89, "grad_norm": 1.8130605220794678, "learning_rate": 6.782017808784236e-07, "loss": 0.8549, "step": 15442 }, { "epoch": 0.89, "grad_norm": 1.68769371509552, "learning_rate": 6.7752949459853e-07, "loss": 0.8329, "step": 15443 }, { "epoch": 0.89, "grad_norm": 1.7522064447402954, "learning_rate": 6.768575300110514e-07, "loss": 0.8102, "step": 15444 }, { "epoch": 0.89, "grad_norm": 1.762490153312683, "learning_rate": 6.76185887139178e-07, "loss": 0.9177, "step": 15445 }, { "epoch": 0.89, "grad_norm": 1.8453413248062134, "learning_rate": 6.755145660060825e-07, "loss": 0.8769, "step": 15446 }, { "epoch": 0.89, "grad_norm": 1.6166960000991821, "learning_rate": 6.74843566634934e-07, "loss": 0.9805, "step": 15447 }, { "epoch": 0.89, "grad_norm": 1.856553316116333, "learning_rate": 6.741728890488841e-07, "loss": 0.8987, "step": 15448 }, { "epoch": 0.89, "grad_norm": 1.9598171710968018, "learning_rate": 6.735025332710776e-07, "loss": 0.9108, "step": 15449 }, { "epoch": 0.89, "grad_norm": 1.7338714599609375, "learning_rate": 6.72832499324646e-07, "loss": 0.9798, "step": 15450 }, { "epoch": 0.89, "grad_norm": 1.8061928749084473, "learning_rate": 6.721627872327119e-07, "loss": 0.9121, "step": 15451 }, { "epoch": 0.89, "grad_norm": 1.6309852600097656, "learning_rate": 6.714933970183813e-07, "loss": 0.9267, "step": 15452 }, { "epoch": 0.89, "grad_norm": 1.7296868562698364, "learning_rate": 6.708243287047578e-07, "loss": 0.8577, "step": 15453 }, { "epoch": 0.89, "grad_norm": 1.6793081760406494, "learning_rate": 6.701555823149242e-07, "loss": 0.9314, "step": 15454 }, { "epoch": 0.89, "grad_norm": 1.7906148433685303, "learning_rate": 6.694871578719608e-07, "loss": 0.8813, "step": 15455 }, { "epoch": 0.89, "grad_norm": 1.7418558597564697, "learning_rate": 6.688190553989327e-07, "loss": 0.9142, "step": 15456 }, { "epoch": 0.89, "grad_norm": 1.7237902879714966, "learning_rate": 6.681512749188923e-07, "loss": 0.8722, "step": 15457 }, { "epoch": 0.89, "grad_norm": 1.6942803859710693, "learning_rate": 6.674838164548847e-07, "loss": 0.9119, "step": 15458 }, { "epoch": 0.89, "grad_norm": 1.9457521438598633, "learning_rate": 6.668166800299402e-07, "loss": 0.8638, "step": 15459 }, { "epoch": 0.89, "grad_norm": 1.7421132326126099, "learning_rate": 6.661498656670828e-07, "loss": 0.8351, "step": 15460 }, { "epoch": 0.89, "grad_norm": 1.6366709470748901, "learning_rate": 6.654833733893184e-07, "loss": 0.8619, "step": 15461 }, { "epoch": 0.89, "grad_norm": 1.0346012115478516, "learning_rate": 6.648172032196487e-07, "loss": 0.4982, "step": 15462 }, { "epoch": 0.89, "grad_norm": 1.909983515739441, "learning_rate": 6.641513551810608e-07, "loss": 0.9558, "step": 15463 }, { "epoch": 0.89, "grad_norm": 1.6495444774627686, "learning_rate": 6.634858292965307e-07, "loss": 0.7992, "step": 15464 }, { "epoch": 0.89, "grad_norm": 1.8287988901138306, "learning_rate": 6.628206255890235e-07, "loss": 0.9156, "step": 15465 }, { "epoch": 0.89, "grad_norm": 1.6711454391479492, "learning_rate": 6.621557440814963e-07, "loss": 0.896, "step": 15466 }, { "epoch": 0.89, "grad_norm": 1.7442529201507568, "learning_rate": 6.614911847968875e-07, "loss": 0.907, "step": 15467 }, { "epoch": 0.89, "grad_norm": 1.6668674945831299, "learning_rate": 6.60826947758132e-07, "loss": 0.8805, "step": 15468 }, { "epoch": 0.89, "grad_norm": 1.871113657951355, "learning_rate": 6.601630329881525e-07, "loss": 0.8464, "step": 15469 }, { "epoch": 0.89, "grad_norm": 1.1420254707336426, "learning_rate": 6.594994405098554e-07, "loss": 0.6099, "step": 15470 }, { "epoch": 0.89, "grad_norm": 1.9286190271377563, "learning_rate": 6.588361703461433e-07, "loss": 0.9271, "step": 15471 }, { "epoch": 0.89, "grad_norm": 1.7409298419952393, "learning_rate": 6.581732225198989e-07, "loss": 0.9413, "step": 15472 }, { "epoch": 0.89, "grad_norm": 1.8043605089187622, "learning_rate": 6.57510597054003e-07, "loss": 0.89, "step": 15473 }, { "epoch": 0.89, "grad_norm": 1.6918832063674927, "learning_rate": 6.568482939713172e-07, "loss": 0.8051, "step": 15474 }, { "epoch": 0.89, "grad_norm": 1.8590641021728516, "learning_rate": 6.561863132947e-07, "loss": 0.9181, "step": 15475 }, { "epoch": 0.89, "grad_norm": 1.6623128652572632, "learning_rate": 6.555246550469907e-07, "loss": 0.9372, "step": 15476 }, { "epoch": 0.89, "grad_norm": 1.7385882139205933, "learning_rate": 6.548633192510234e-07, "loss": 0.9223, "step": 15477 }, { "epoch": 0.89, "grad_norm": 1.0345485210418701, "learning_rate": 6.542023059296176e-07, "loss": 0.5543, "step": 15478 }, { "epoch": 0.89, "grad_norm": 1.7842947244644165, "learning_rate": 6.53541615105584e-07, "loss": 0.8839, "step": 15479 }, { "epoch": 0.89, "grad_norm": 1.7564575672149658, "learning_rate": 6.528812468017221e-07, "loss": 0.9038, "step": 15480 }, { "epoch": 0.89, "grad_norm": 1.7557716369628906, "learning_rate": 6.522212010408168e-07, "loss": 0.934, "step": 15481 }, { "epoch": 0.89, "grad_norm": 1.8131500482559204, "learning_rate": 6.51561477845648e-07, "loss": 0.8901, "step": 15482 }, { "epoch": 0.89, "grad_norm": 1.0179673433303833, "learning_rate": 6.509020772389763e-07, "loss": 0.5193, "step": 15483 }, { "epoch": 0.89, "grad_norm": 1.9805660247802734, "learning_rate": 6.502429992435599e-07, "loss": 0.9208, "step": 15484 }, { "epoch": 0.89, "grad_norm": 1.8453505039215088, "learning_rate": 6.495842438821387e-07, "loss": 0.871, "step": 15485 }, { "epoch": 0.89, "grad_norm": 1.7040224075317383, "learning_rate": 6.489258111774477e-07, "loss": 0.8238, "step": 15486 }, { "epoch": 0.89, "grad_norm": 1.7427481412887573, "learning_rate": 6.482677011522042e-07, "loss": 0.9231, "step": 15487 }, { "epoch": 0.89, "grad_norm": 1.814207673072815, "learning_rate": 6.476099138291192e-07, "loss": 0.9063, "step": 15488 }, { "epoch": 0.89, "grad_norm": 1.8178634643554688, "learning_rate": 6.469524492308921e-07, "loss": 0.8599, "step": 15489 }, { "epoch": 0.89, "grad_norm": 1.7496860027313232, "learning_rate": 6.462953073802103e-07, "loss": 0.8705, "step": 15490 }, { "epoch": 0.89, "grad_norm": 1.7155566215515137, "learning_rate": 6.456384882997468e-07, "loss": 0.9304, "step": 15491 }, { "epoch": 0.89, "grad_norm": 1.6795132160186768, "learning_rate": 6.449819920121702e-07, "loss": 0.9265, "step": 15492 }, { "epoch": 0.89, "grad_norm": 1.7608604431152344, "learning_rate": 6.443258185401324e-07, "loss": 0.8753, "step": 15493 }, { "epoch": 0.89, "grad_norm": 2.008249282836914, "learning_rate": 6.436699679062775e-07, "loss": 0.9294, "step": 15494 }, { "epoch": 0.89, "grad_norm": 1.1016560792922974, "learning_rate": 6.430144401332338e-07, "loss": 0.524, "step": 15495 }, { "epoch": 0.89, "grad_norm": 1.6687204837799072, "learning_rate": 6.423592352436248e-07, "loss": 0.8754, "step": 15496 }, { "epoch": 0.89, "grad_norm": 1.9280526638031006, "learning_rate": 6.417043532600609e-07, "loss": 0.9332, "step": 15497 }, { "epoch": 0.89, "grad_norm": 1.694471836090088, "learning_rate": 6.410497942051363e-07, "loss": 0.9066, "step": 15498 }, { "epoch": 0.89, "grad_norm": 1.8368369340896606, "learning_rate": 6.403955581014421e-07, "loss": 0.8805, "step": 15499 }, { "epoch": 0.89, "grad_norm": 1.7158629894256592, "learning_rate": 6.39741644971551e-07, "loss": 0.869, "step": 15500 }, { "epoch": 0.89, "grad_norm": 1.7269920110702515, "learning_rate": 6.390880548380296e-07, "loss": 0.9283, "step": 15501 }, { "epoch": 0.89, "grad_norm": 1.9694809913635254, "learning_rate": 6.384347877234299e-07, "loss": 0.829, "step": 15502 }, { "epoch": 0.89, "grad_norm": 2.016885280609131, "learning_rate": 6.377818436502969e-07, "loss": 0.9519, "step": 15503 }, { "epoch": 0.89, "grad_norm": 1.8114701509475708, "learning_rate": 6.371292226411574e-07, "loss": 0.8159, "step": 15504 }, { "epoch": 0.89, "grad_norm": 1.6478521823883057, "learning_rate": 6.364769247185376e-07, "loss": 0.8758, "step": 15505 }, { "epoch": 0.89, "grad_norm": 1.737593650817871, "learning_rate": 6.358249499049407e-07, "loss": 0.852, "step": 15506 }, { "epoch": 0.89, "grad_norm": 1.7148728370666504, "learning_rate": 6.351732982228687e-07, "loss": 0.8892, "step": 15507 }, { "epoch": 0.89, "grad_norm": 1.7233200073242188, "learning_rate": 6.345219696948046e-07, "loss": 0.913, "step": 15508 }, { "epoch": 0.89, "grad_norm": 1.8590095043182373, "learning_rate": 6.338709643432261e-07, "loss": 0.9252, "step": 15509 }, { "epoch": 0.89, "grad_norm": 1.750510811805725, "learning_rate": 6.332202821905986e-07, "loss": 0.8538, "step": 15510 }, { "epoch": 0.89, "grad_norm": 1.114027500152588, "learning_rate": 6.32569923259373e-07, "loss": 0.5378, "step": 15511 }, { "epoch": 0.89, "grad_norm": 1.7650784254074097, "learning_rate": 6.319198875719945e-07, "loss": 0.8798, "step": 15512 }, { "epoch": 0.89, "grad_norm": 1.8563258647918701, "learning_rate": 6.312701751508898e-07, "loss": 0.8992, "step": 15513 }, { "epoch": 0.89, "grad_norm": 1.9289206266403198, "learning_rate": 6.306207860184832e-07, "loss": 0.9787, "step": 15514 }, { "epoch": 0.89, "grad_norm": 1.861488938331604, "learning_rate": 6.299717201971789e-07, "loss": 0.8757, "step": 15515 }, { "epoch": 0.89, "grad_norm": 1.8455007076263428, "learning_rate": 6.293229777093779e-07, "loss": 0.97, "step": 15516 }, { "epoch": 0.89, "grad_norm": 1.9264119863510132, "learning_rate": 6.286745585774634e-07, "loss": 0.9504, "step": 15517 }, { "epoch": 0.89, "grad_norm": 0.9496403336524963, "learning_rate": 6.28026462823813e-07, "loss": 0.4688, "step": 15518 }, { "epoch": 0.89, "grad_norm": 1.7758005857467651, "learning_rate": 6.273786904707901e-07, "loss": 0.9176, "step": 15519 }, { "epoch": 0.89, "grad_norm": 1.9176400899887085, "learning_rate": 6.267312415407478e-07, "loss": 0.905, "step": 15520 }, { "epoch": 0.89, "grad_norm": 1.701231837272644, "learning_rate": 6.260841160560249e-07, "loss": 0.882, "step": 15521 }, { "epoch": 0.89, "grad_norm": 1.010457158088684, "learning_rate": 6.254373140389546e-07, "loss": 0.4875, "step": 15522 }, { "epoch": 0.89, "grad_norm": 1.8519154787063599, "learning_rate": 6.247908355118581e-07, "loss": 0.8354, "step": 15523 }, { "epoch": 0.89, "grad_norm": 1.8883432149887085, "learning_rate": 6.241446804970397e-07, "loss": 0.9274, "step": 15524 }, { "epoch": 0.89, "grad_norm": 1.86014986038208, "learning_rate": 6.234988490167981e-07, "loss": 0.9595, "step": 15525 }, { "epoch": 0.89, "grad_norm": 1.6856034994125366, "learning_rate": 6.22853341093419e-07, "loss": 0.8962, "step": 15526 }, { "epoch": 0.89, "grad_norm": 1.8290272951126099, "learning_rate": 6.222081567491778e-07, "loss": 0.8819, "step": 15527 }, { "epoch": 0.89, "grad_norm": 1.7978019714355469, "learning_rate": 6.215632960063367e-07, "loss": 0.8613, "step": 15528 }, { "epoch": 0.89, "grad_norm": 1.7104436159133911, "learning_rate": 6.20918758887149e-07, "loss": 0.9095, "step": 15529 }, { "epoch": 0.89, "grad_norm": 1.8589082956314087, "learning_rate": 6.202745454138548e-07, "loss": 0.9629, "step": 15530 }, { "epoch": 0.89, "grad_norm": 1.744292974472046, "learning_rate": 6.196306556086862e-07, "loss": 0.9372, "step": 15531 }, { "epoch": 0.89, "grad_norm": 1.8318431377410889, "learning_rate": 6.189870894938587e-07, "loss": 0.8565, "step": 15532 }, { "epoch": 0.89, "grad_norm": 1.932905912399292, "learning_rate": 6.183438470915826e-07, "loss": 0.8697, "step": 15533 }, { "epoch": 0.89, "grad_norm": 0.9996007084846497, "learning_rate": 6.177009284240542e-07, "loss": 0.5517, "step": 15534 }, { "epoch": 0.89, "grad_norm": 2.324336528778076, "learning_rate": 6.170583335134584e-07, "loss": 0.9225, "step": 15535 }, { "epoch": 0.89, "grad_norm": 1.6341477632522583, "learning_rate": 6.164160623819693e-07, "loss": 0.8925, "step": 15536 }, { "epoch": 0.89, "grad_norm": 1.696211814880371, "learning_rate": 6.157741150517494e-07, "loss": 0.8662, "step": 15537 }, { "epoch": 0.89, "grad_norm": 1.6755071878433228, "learning_rate": 6.15132491544952e-07, "loss": 0.9652, "step": 15538 }, { "epoch": 0.89, "grad_norm": 1.8382385969161987, "learning_rate": 6.14491191883716e-07, "loss": 0.9468, "step": 15539 }, { "epoch": 0.89, "grad_norm": 1.6141189336776733, "learning_rate": 6.138502160901727e-07, "loss": 0.849, "step": 15540 }, { "epoch": 0.89, "grad_norm": 1.722029447555542, "learning_rate": 6.132095641864378e-07, "loss": 0.8759, "step": 15541 }, { "epoch": 0.89, "grad_norm": 1.6465224027633667, "learning_rate": 6.125692361946211e-07, "loss": 0.8796, "step": 15542 }, { "epoch": 0.89, "grad_norm": 1.679895281791687, "learning_rate": 6.119292321368153e-07, "loss": 0.8483, "step": 15543 }, { "epoch": 0.89, "grad_norm": 1.8032406568527222, "learning_rate": 6.112895520351103e-07, "loss": 0.9443, "step": 15544 }, { "epoch": 0.89, "grad_norm": 1.6419126987457275, "learning_rate": 6.10650195911574e-07, "loss": 0.8833, "step": 15545 }, { "epoch": 0.89, "grad_norm": 1.5212006568908691, "learning_rate": 6.100111637882711e-07, "loss": 0.765, "step": 15546 }, { "epoch": 0.89, "grad_norm": 1.759412407875061, "learning_rate": 6.093724556872549e-07, "loss": 0.9028, "step": 15547 }, { "epoch": 0.89, "grad_norm": 1.0550742149353027, "learning_rate": 6.087340716305623e-07, "loss": 0.5482, "step": 15548 }, { "epoch": 0.89, "grad_norm": 2.059455156326294, "learning_rate": 6.080960116402245e-07, "loss": 0.9912, "step": 15549 }, { "epoch": 0.89, "grad_norm": 1.888584017753601, "learning_rate": 6.074582757382575e-07, "loss": 0.8927, "step": 15550 }, { "epoch": 0.89, "grad_norm": 1.664782166481018, "learning_rate": 6.068208639466688e-07, "loss": 0.9334, "step": 15551 }, { "epoch": 0.89, "grad_norm": 1.723462700843811, "learning_rate": 6.061837762874523e-07, "loss": 0.8874, "step": 15552 }, { "epoch": 0.89, "grad_norm": 1.8481340408325195, "learning_rate": 6.055470127825946e-07, "loss": 0.88, "step": 15553 }, { "epoch": 0.89, "grad_norm": 1.0063462257385254, "learning_rate": 6.049105734540639e-07, "loss": 0.4903, "step": 15554 }, { "epoch": 0.89, "grad_norm": 1.6952629089355469, "learning_rate": 6.042744583238291e-07, "loss": 0.8773, "step": 15555 }, { "epoch": 0.89, "grad_norm": 1.717790961265564, "learning_rate": 6.036386674138339e-07, "loss": 0.9307, "step": 15556 }, { "epoch": 0.89, "grad_norm": 1.775532841682434, "learning_rate": 6.030032007460229e-07, "loss": 0.8684, "step": 15557 }, { "epoch": 0.89, "grad_norm": 1.68381667137146, "learning_rate": 6.023680583423209e-07, "loss": 0.8578, "step": 15558 }, { "epoch": 0.89, "grad_norm": 1.6865746974945068, "learning_rate": 6.017332402246468e-07, "loss": 0.8708, "step": 15559 }, { "epoch": 0.89, "grad_norm": 1.9333467483520508, "learning_rate": 6.010987464149043e-07, "loss": 0.8167, "step": 15560 }, { "epoch": 0.89, "grad_norm": 1.737566590309143, "learning_rate": 6.004645769349915e-07, "loss": 0.9618, "step": 15561 }, { "epoch": 0.89, "grad_norm": 1.7951821088790894, "learning_rate": 5.998307318067875e-07, "loss": 0.9258, "step": 15562 }, { "epoch": 0.89, "grad_norm": 1.781555414199829, "learning_rate": 5.99197211052166e-07, "loss": 0.8705, "step": 15563 }, { "epoch": 0.89, "grad_norm": 1.73175048828125, "learning_rate": 5.985640146929906e-07, "loss": 0.8543, "step": 15564 }, { "epoch": 0.89, "grad_norm": 1.5270600318908691, "learning_rate": 5.979311427511081e-07, "loss": 0.8526, "step": 15565 }, { "epoch": 0.89, "grad_norm": 1.8287838697433472, "learning_rate": 5.972985952483601e-07, "loss": 0.9412, "step": 15566 }, { "epoch": 0.89, "grad_norm": 1.6838566064834595, "learning_rate": 5.966663722065691e-07, "loss": 0.8423, "step": 15567 }, { "epoch": 0.89, "grad_norm": 1.7391963005065918, "learning_rate": 5.960344736475576e-07, "loss": 0.8679, "step": 15568 }, { "epoch": 0.89, "grad_norm": 1.6428356170654297, "learning_rate": 5.95402899593125e-07, "loss": 0.8845, "step": 15569 }, { "epoch": 0.89, "grad_norm": 1.8367490768432617, "learning_rate": 5.947716500650702e-07, "loss": 0.9521, "step": 15570 }, { "epoch": 0.89, "grad_norm": 1.7687405347824097, "learning_rate": 5.941407250851705e-07, "loss": 0.9225, "step": 15571 }, { "epoch": 0.89, "grad_norm": 1.8504630327224731, "learning_rate": 5.935101246752029e-07, "loss": 0.8998, "step": 15572 }, { "epoch": 0.89, "grad_norm": 1.8147363662719727, "learning_rate": 5.928798488569221e-07, "loss": 0.9059, "step": 15573 }, { "epoch": 0.89, "grad_norm": 1.8694202899932861, "learning_rate": 5.922498976520818e-07, "loss": 0.9067, "step": 15574 }, { "epoch": 0.89, "grad_norm": 1.6132831573486328, "learning_rate": 5.916202710824171e-07, "loss": 0.9781, "step": 15575 }, { "epoch": 0.89, "grad_norm": 1.8362089395523071, "learning_rate": 5.909909691696558e-07, "loss": 0.8844, "step": 15576 }, { "epoch": 0.89, "grad_norm": 1.705001711845398, "learning_rate": 5.903619919355141e-07, "loss": 0.8366, "step": 15577 }, { "epoch": 0.89, "grad_norm": 1.8806865215301514, "learning_rate": 5.897333394016935e-07, "loss": 0.8806, "step": 15578 }, { "epoch": 0.89, "grad_norm": 1.7190494537353516, "learning_rate": 5.891050115898911e-07, "loss": 0.8013, "step": 15579 }, { "epoch": 0.89, "grad_norm": 1.841902494430542, "learning_rate": 5.88477008521785e-07, "loss": 0.9714, "step": 15580 }, { "epoch": 0.89, "grad_norm": 1.7925434112548828, "learning_rate": 5.87849330219048e-07, "loss": 0.8926, "step": 15581 }, { "epoch": 0.89, "grad_norm": 1.7538598775863647, "learning_rate": 5.872219767033382e-07, "loss": 0.8815, "step": 15582 }, { "epoch": 0.89, "grad_norm": 1.8988202810287476, "learning_rate": 5.865949479963052e-07, "loss": 0.9301, "step": 15583 }, { "epoch": 0.89, "grad_norm": 1.6966590881347656, "learning_rate": 5.859682441195846e-07, "loss": 0.9732, "step": 15584 }, { "epoch": 0.89, "grad_norm": 1.8185322284698486, "learning_rate": 5.853418650948039e-07, "loss": 0.8626, "step": 15585 }, { "epoch": 0.89, "grad_norm": 1.0373144149780273, "learning_rate": 5.847158109435746e-07, "loss": 0.5108, "step": 15586 }, { "epoch": 0.89, "grad_norm": 1.8377195596694946, "learning_rate": 5.840900816875028e-07, "loss": 0.9463, "step": 15587 }, { "epoch": 0.89, "grad_norm": 1.6262853145599365, "learning_rate": 5.834646773481811e-07, "loss": 0.9424, "step": 15588 }, { "epoch": 0.89, "grad_norm": 1.780945062637329, "learning_rate": 5.82839597947189e-07, "loss": 0.916, "step": 15589 }, { "epoch": 0.89, "grad_norm": 1.8211147785186768, "learning_rate": 5.822148435060971e-07, "loss": 0.8665, "step": 15590 }, { "epoch": 0.89, "grad_norm": 1.5948418378829956, "learning_rate": 5.815904140464623e-07, "loss": 0.8741, "step": 15591 }, { "epoch": 0.89, "grad_norm": 1.6296415328979492, "learning_rate": 5.809663095898332e-07, "loss": 0.8931, "step": 15592 }, { "epoch": 0.89, "grad_norm": 1.8155616521835327, "learning_rate": 5.803425301577459e-07, "loss": 0.887, "step": 15593 }, { "epoch": 0.89, "grad_norm": 1.7767034769058228, "learning_rate": 5.797190757717264e-07, "loss": 0.8596, "step": 15594 }, { "epoch": 0.89, "grad_norm": 1.8372561931610107, "learning_rate": 5.790959464532852e-07, "loss": 0.9633, "step": 15595 }, { "epoch": 0.89, "grad_norm": 1.804306149482727, "learning_rate": 5.784731422239276e-07, "loss": 0.9006, "step": 15596 }, { "epoch": 0.89, "grad_norm": 1.8901151418685913, "learning_rate": 5.77850663105144e-07, "loss": 0.9367, "step": 15597 }, { "epoch": 0.89, "grad_norm": 1.6477718353271484, "learning_rate": 5.772285091184138e-07, "loss": 0.8859, "step": 15598 }, { "epoch": 0.89, "grad_norm": 1.7240761518478394, "learning_rate": 5.766066802852066e-07, "loss": 0.9518, "step": 15599 }, { "epoch": 0.89, "grad_norm": 1.9692060947418213, "learning_rate": 5.759851766269786e-07, "loss": 0.9452, "step": 15600 }, { "epoch": 0.89, "grad_norm": 1.8080519437789917, "learning_rate": 5.753639981651792e-07, "loss": 0.9087, "step": 15601 }, { "epoch": 0.89, "grad_norm": 1.6249909400939941, "learning_rate": 5.747431449212393e-07, "loss": 0.8644, "step": 15602 }, { "epoch": 0.89, "grad_norm": 1.7710704803466797, "learning_rate": 5.74122616916587e-07, "loss": 0.8791, "step": 15603 }, { "epoch": 0.89, "grad_norm": 1.1158372163772583, "learning_rate": 5.73502414172632e-07, "loss": 0.5356, "step": 15604 }, { "epoch": 0.89, "grad_norm": 1.8361241817474365, "learning_rate": 5.728825367107782e-07, "loss": 0.9456, "step": 15605 }, { "epoch": 0.9, "grad_norm": 1.802695393562317, "learning_rate": 5.722629845524131e-07, "loss": 1.0117, "step": 15606 }, { "epoch": 0.9, "grad_norm": 1.7006168365478516, "learning_rate": 5.716437577189182e-07, "loss": 0.8232, "step": 15607 }, { "epoch": 0.9, "grad_norm": 1.8938943147659302, "learning_rate": 5.710248562316589e-07, "loss": 0.9121, "step": 15608 }, { "epoch": 0.9, "grad_norm": 1.7587521076202393, "learning_rate": 5.704062801119947e-07, "loss": 0.8805, "step": 15609 }, { "epoch": 0.9, "grad_norm": 1.756843090057373, "learning_rate": 5.697880293812674e-07, "loss": 0.9176, "step": 15610 }, { "epoch": 0.9, "grad_norm": 1.7367825508117676, "learning_rate": 5.691701040608133e-07, "loss": 0.8907, "step": 15611 }, { "epoch": 0.9, "grad_norm": 1.7477002143859863, "learning_rate": 5.685525041719553e-07, "loss": 0.8997, "step": 15612 }, { "epoch": 0.9, "grad_norm": 1.8525936603546143, "learning_rate": 5.679352297360041e-07, "loss": 0.9056, "step": 15613 }, { "epoch": 0.9, "grad_norm": 1.9039686918258667, "learning_rate": 5.673182807742627e-07, "loss": 0.9175, "step": 15614 }, { "epoch": 0.9, "grad_norm": 1.80568265914917, "learning_rate": 5.667016573080164e-07, "loss": 0.9697, "step": 15615 }, { "epoch": 0.9, "grad_norm": 1.8497459888458252, "learning_rate": 5.660853593585458e-07, "loss": 0.8802, "step": 15616 }, { "epoch": 0.9, "grad_norm": 1.7925468683242798, "learning_rate": 5.654693869471162e-07, "loss": 0.8679, "step": 15617 }, { "epoch": 0.9, "grad_norm": 1.8453634977340698, "learning_rate": 5.64853740094985e-07, "loss": 0.9208, "step": 15618 }, { "epoch": 0.9, "grad_norm": 1.7078968286514282, "learning_rate": 5.64238418823394e-07, "loss": 0.8676, "step": 15619 }, { "epoch": 0.9, "grad_norm": 1.749570608139038, "learning_rate": 5.636234231535775e-07, "loss": 0.8422, "step": 15620 }, { "epoch": 0.9, "grad_norm": 1.775765299797058, "learning_rate": 5.630087531067574e-07, "loss": 0.7934, "step": 15621 }, { "epoch": 0.9, "grad_norm": 1.7809996604919434, "learning_rate": 5.623944087041444e-07, "loss": 0.8311, "step": 15622 }, { "epoch": 0.9, "grad_norm": 1.8752776384353638, "learning_rate": 5.617803899669372e-07, "loss": 0.9098, "step": 15623 }, { "epoch": 0.9, "grad_norm": 0.9358185529708862, "learning_rate": 5.611666969163243e-07, "loss": 0.5014, "step": 15624 }, { "epoch": 0.9, "grad_norm": 1.7852346897125244, "learning_rate": 5.605533295734822e-07, "loss": 0.957, "step": 15625 }, { "epoch": 0.9, "grad_norm": 1.7740525007247925, "learning_rate": 5.599402879595772e-07, "loss": 0.8256, "step": 15626 }, { "epoch": 0.9, "grad_norm": 1.8986852169036865, "learning_rate": 5.593275720957625e-07, "loss": 0.9134, "step": 15627 }, { "epoch": 0.9, "grad_norm": 1.7122132778167725, "learning_rate": 5.587151820031811e-07, "loss": 0.8797, "step": 15628 }, { "epoch": 0.9, "grad_norm": 1.8016579151153564, "learning_rate": 5.581031177029672e-07, "loss": 0.8761, "step": 15629 }, { "epoch": 0.9, "grad_norm": 1.7391622066497803, "learning_rate": 5.574913792162395e-07, "loss": 0.9061, "step": 15630 }, { "epoch": 0.9, "grad_norm": 1.8038105964660645, "learning_rate": 5.568799665641078e-07, "loss": 0.9224, "step": 15631 }, { "epoch": 0.9, "grad_norm": 1.9782445430755615, "learning_rate": 5.562688797676696e-07, "loss": 0.9664, "step": 15632 }, { "epoch": 0.9, "grad_norm": 1.905548334121704, "learning_rate": 5.556581188480126e-07, "loss": 0.8787, "step": 15633 }, { "epoch": 0.9, "grad_norm": 1.6789883375167847, "learning_rate": 5.55047683826212e-07, "loss": 0.8991, "step": 15634 }, { "epoch": 0.9, "grad_norm": 1.811604380607605, "learning_rate": 5.544375747233333e-07, "loss": 0.9344, "step": 15635 }, { "epoch": 0.9, "grad_norm": 1.6902881860733032, "learning_rate": 5.538277915604273e-07, "loss": 0.8849, "step": 15636 }, { "epoch": 0.9, "grad_norm": 1.715160846710205, "learning_rate": 5.532183343585396e-07, "loss": 0.9737, "step": 15637 }, { "epoch": 0.9, "grad_norm": 1.0077650547027588, "learning_rate": 5.526092031386965e-07, "loss": 0.5393, "step": 15638 }, { "epoch": 0.9, "grad_norm": 1.7462610006332397, "learning_rate": 5.520003979219202e-07, "loss": 0.9459, "step": 15639 }, { "epoch": 0.9, "grad_norm": 1.730873703956604, "learning_rate": 5.513919187292182e-07, "loss": 0.8546, "step": 15640 }, { "epoch": 0.9, "grad_norm": 1.6832468509674072, "learning_rate": 5.507837655815873e-07, "loss": 0.9624, "step": 15641 }, { "epoch": 0.9, "grad_norm": 2.0448713302612305, "learning_rate": 5.501759385000138e-07, "loss": 0.9672, "step": 15642 }, { "epoch": 0.9, "grad_norm": 1.6207120418548584, "learning_rate": 5.495684375054711e-07, "loss": 0.9207, "step": 15643 }, { "epoch": 0.9, "grad_norm": 1.773476004600525, "learning_rate": 5.489612626189245e-07, "loss": 0.8333, "step": 15644 }, { "epoch": 0.9, "grad_norm": 1.7257273197174072, "learning_rate": 5.483544138613217e-07, "loss": 0.8128, "step": 15645 }, { "epoch": 0.9, "grad_norm": 1.684701919555664, "learning_rate": 5.477478912536083e-07, "loss": 0.8832, "step": 15646 }, { "epoch": 0.9, "grad_norm": 1.9602268934249878, "learning_rate": 5.471416948167107e-07, "loss": 0.9757, "step": 15647 }, { "epoch": 0.9, "grad_norm": 1.7607377767562866, "learning_rate": 5.46535824571548e-07, "loss": 0.9789, "step": 15648 }, { "epoch": 0.9, "grad_norm": 2.098794460296631, "learning_rate": 5.459302805390254e-07, "loss": 0.9081, "step": 15649 }, { "epoch": 0.9, "grad_norm": 1.7969435453414917, "learning_rate": 5.453250627400419e-07, "loss": 0.9311, "step": 15650 }, { "epoch": 0.9, "grad_norm": 1.7865899801254272, "learning_rate": 5.447201711954775e-07, "loss": 0.9158, "step": 15651 }, { "epoch": 0.9, "grad_norm": 1.7946916818618774, "learning_rate": 5.441156059262109e-07, "loss": 0.8306, "step": 15652 }, { "epoch": 0.9, "grad_norm": 2.000335693359375, "learning_rate": 5.435113669530978e-07, "loss": 0.9067, "step": 15653 }, { "epoch": 0.9, "grad_norm": 1.7875988483428955, "learning_rate": 5.429074542969926e-07, "loss": 0.8493, "step": 15654 }, { "epoch": 0.9, "grad_norm": 1.774566411972046, "learning_rate": 5.423038679787352e-07, "loss": 0.8609, "step": 15655 }, { "epoch": 0.9, "grad_norm": 1.794792890548706, "learning_rate": 5.417006080191501e-07, "loss": 0.9016, "step": 15656 }, { "epoch": 0.9, "grad_norm": 1.8023427724838257, "learning_rate": 5.410976744390584e-07, "loss": 0.931, "step": 15657 }, { "epoch": 0.9, "grad_norm": 1.76619291305542, "learning_rate": 5.404950672592623e-07, "loss": 0.9598, "step": 15658 }, { "epoch": 0.9, "grad_norm": 1.7884960174560547, "learning_rate": 5.398927865005588e-07, "loss": 1.0358, "step": 15659 }, { "epoch": 0.9, "grad_norm": 1.6713100671768188, "learning_rate": 5.392908321837275e-07, "loss": 0.8963, "step": 15660 }, { "epoch": 0.9, "grad_norm": 1.721921682357788, "learning_rate": 5.386892043295433e-07, "loss": 0.8999, "step": 15661 }, { "epoch": 0.9, "grad_norm": 1.7572952508926392, "learning_rate": 5.380879029587649e-07, "loss": 0.8587, "step": 15662 }, { "epoch": 0.9, "grad_norm": 1.9742989540100098, "learning_rate": 5.374869280921436e-07, "loss": 0.8605, "step": 15663 }, { "epoch": 0.9, "grad_norm": 1.6886658668518066, "learning_rate": 5.368862797504149e-07, "loss": 0.8876, "step": 15664 }, { "epoch": 0.9, "grad_norm": 1.6170834302902222, "learning_rate": 5.362859579543056e-07, "loss": 0.8683, "step": 15665 }, { "epoch": 0.9, "grad_norm": 1.8819648027420044, "learning_rate": 5.356859627245337e-07, "loss": 0.8384, "step": 15666 }, { "epoch": 0.9, "grad_norm": 1.7028852701187134, "learning_rate": 5.350862940818014e-07, "loss": 0.9006, "step": 15667 }, { "epoch": 0.9, "grad_norm": 1.5484930276870728, "learning_rate": 5.344869520468021e-07, "loss": 0.9625, "step": 15668 }, { "epoch": 0.9, "grad_norm": 1.7978967428207397, "learning_rate": 5.338879366402161e-07, "loss": 0.9439, "step": 15669 }, { "epoch": 0.9, "grad_norm": 1.8412784337997437, "learning_rate": 5.332892478827168e-07, "loss": 0.8419, "step": 15670 }, { "epoch": 0.9, "grad_norm": 1.6158326864242554, "learning_rate": 5.326908857949586e-07, "loss": 0.9335, "step": 15671 }, { "epoch": 0.9, "grad_norm": 1.9648857116699219, "learning_rate": 5.320928503975953e-07, "loss": 0.9231, "step": 15672 }, { "epoch": 0.9, "grad_norm": 1.6637235879898071, "learning_rate": 5.31495141711258e-07, "loss": 0.8077, "step": 15673 }, { "epoch": 0.9, "grad_norm": 1.794452428817749, "learning_rate": 5.308977597565756e-07, "loss": 0.8253, "step": 15674 }, { "epoch": 0.9, "grad_norm": 1.7804847955703735, "learning_rate": 5.303007045541586e-07, "loss": 0.8895, "step": 15675 }, { "epoch": 0.9, "grad_norm": 1.9212371110916138, "learning_rate": 5.297039761246137e-07, "loss": 0.8571, "step": 15676 }, { "epoch": 0.9, "grad_norm": 1.0678497552871704, "learning_rate": 5.291075744885288e-07, "loss": 0.531, "step": 15677 }, { "epoch": 0.9, "grad_norm": 1.7733186483383179, "learning_rate": 5.285114996664864e-07, "loss": 0.9076, "step": 15678 }, { "epoch": 0.9, "grad_norm": 1.7166513204574585, "learning_rate": 5.279157516790545e-07, "loss": 0.8949, "step": 15679 }, { "epoch": 0.9, "grad_norm": 1.7937605381011963, "learning_rate": 5.273203305467911e-07, "loss": 0.8907, "step": 15680 }, { "epoch": 0.9, "grad_norm": 1.9153492450714111, "learning_rate": 5.267252362902431e-07, "loss": 0.936, "step": 15681 }, { "epoch": 0.9, "grad_norm": 1.749113917350769, "learning_rate": 5.261304689299429e-07, "loss": 0.9013, "step": 15682 }, { "epoch": 0.9, "grad_norm": 1.0417803525924683, "learning_rate": 5.255360284864175e-07, "loss": 0.5309, "step": 15683 }, { "epoch": 0.9, "grad_norm": 1.7653416395187378, "learning_rate": 5.24941914980176e-07, "loss": 0.8583, "step": 15684 }, { "epoch": 0.9, "grad_norm": 1.0027296543121338, "learning_rate": 5.243481284317232e-07, "loss": 0.5005, "step": 15685 }, { "epoch": 0.9, "grad_norm": 1.86319100856781, "learning_rate": 5.237546688615447e-07, "loss": 0.9205, "step": 15686 }, { "epoch": 0.9, "grad_norm": 1.840283989906311, "learning_rate": 5.231615362901255e-07, "loss": 0.9122, "step": 15687 }, { "epoch": 0.9, "grad_norm": 1.8656138181686401, "learning_rate": 5.225687307379268e-07, "loss": 0.8741, "step": 15688 }, { "epoch": 0.9, "grad_norm": 1.9472781419754028, "learning_rate": 5.219762522254079e-07, "loss": 0.9382, "step": 15689 }, { "epoch": 0.9, "grad_norm": 1.863309621810913, "learning_rate": 5.213841007730125e-07, "loss": 0.9011, "step": 15690 }, { "epoch": 0.9, "grad_norm": 1.8095654249191284, "learning_rate": 5.207922764011752e-07, "loss": 0.8874, "step": 15691 }, { "epoch": 0.9, "grad_norm": 1.7858742475509644, "learning_rate": 5.202007791303165e-07, "loss": 0.9017, "step": 15692 }, { "epoch": 0.9, "grad_norm": 1.7495330572128296, "learning_rate": 5.196096089808489e-07, "loss": 0.8601, "step": 15693 }, { "epoch": 0.9, "grad_norm": 1.9624909162521362, "learning_rate": 5.190187659731705e-07, "loss": 0.8931, "step": 15694 }, { "epoch": 0.9, "grad_norm": 1.7143641710281372, "learning_rate": 5.184282501276694e-07, "loss": 0.8443, "step": 15695 }, { "epoch": 0.9, "grad_norm": 1.853432059288025, "learning_rate": 5.17838061464726e-07, "loss": 0.9205, "step": 15696 }, { "epoch": 0.9, "grad_norm": 1.8027478456497192, "learning_rate": 5.172482000047019e-07, "loss": 0.9873, "step": 15697 }, { "epoch": 0.9, "grad_norm": 1.9007970094680786, "learning_rate": 5.166586657679551e-07, "loss": 0.8908, "step": 15698 }, { "epoch": 0.9, "grad_norm": 2.1118667125701904, "learning_rate": 5.16069458774825e-07, "loss": 1.0109, "step": 15699 }, { "epoch": 0.9, "grad_norm": 1.816728949546814, "learning_rate": 5.154805790456486e-07, "loss": 0.9295, "step": 15700 }, { "epoch": 0.9, "grad_norm": 1.8203498125076294, "learning_rate": 5.148920266007407e-07, "loss": 0.9413, "step": 15701 }, { "epoch": 0.9, "grad_norm": 1.8068729639053345, "learning_rate": 5.143038014604152e-07, "loss": 0.8825, "step": 15702 }, { "epoch": 0.9, "grad_norm": 1.86770498752594, "learning_rate": 5.137159036449668e-07, "loss": 0.9791, "step": 15703 }, { "epoch": 0.9, "grad_norm": 1.8436784744262695, "learning_rate": 5.131283331746851e-07, "loss": 0.9117, "step": 15704 }, { "epoch": 0.9, "grad_norm": 1.8742127418518066, "learning_rate": 5.125410900698425e-07, "loss": 0.9951, "step": 15705 }, { "epoch": 0.9, "grad_norm": 1.696946144104004, "learning_rate": 5.119541743507062e-07, "loss": 0.9304, "step": 15706 }, { "epoch": 0.9, "grad_norm": 1.7339062690734863, "learning_rate": 5.113675860375267e-07, "loss": 0.8848, "step": 15707 }, { "epoch": 0.9, "grad_norm": 1.8559589385986328, "learning_rate": 5.107813251505456e-07, "loss": 0.9666, "step": 15708 }, { "epoch": 0.9, "grad_norm": 1.7508392333984375, "learning_rate": 5.101953917099955e-07, "loss": 0.8681, "step": 15709 }, { "epoch": 0.9, "grad_norm": 1.6933382749557495, "learning_rate": 5.096097857360927e-07, "loss": 0.8133, "step": 15710 }, { "epoch": 0.9, "grad_norm": 1.6814154386520386, "learning_rate": 5.090245072490474e-07, "loss": 0.9545, "step": 15711 }, { "epoch": 0.9, "grad_norm": 1.718481183052063, "learning_rate": 5.084395562690525e-07, "loss": 0.8492, "step": 15712 }, { "epoch": 0.9, "grad_norm": 1.8031295537948608, "learning_rate": 5.078549328162963e-07, "loss": 0.882, "step": 15713 }, { "epoch": 0.9, "grad_norm": 1.9920603036880493, "learning_rate": 5.072706369109504e-07, "loss": 0.8628, "step": 15714 }, { "epoch": 0.9, "grad_norm": 2.0501608848571777, "learning_rate": 5.066866685731786e-07, "loss": 0.8813, "step": 15715 }, { "epoch": 0.9, "grad_norm": 1.6383816003799438, "learning_rate": 5.061030278231305e-07, "loss": 0.8585, "step": 15716 }, { "epoch": 0.9, "grad_norm": 1.768262267112732, "learning_rate": 5.055197146809476e-07, "loss": 0.9008, "step": 15717 }, { "epoch": 0.9, "grad_norm": 1.8149127960205078, "learning_rate": 5.049367291667572e-07, "loss": 0.8396, "step": 15718 }, { "epoch": 0.9, "grad_norm": 1.70863938331604, "learning_rate": 5.043540713006756e-07, "loss": 0.9565, "step": 15719 }, { "epoch": 0.9, "grad_norm": 1.7773997783660889, "learning_rate": 5.03771741102812e-07, "loss": 0.927, "step": 15720 }, { "epoch": 0.9, "grad_norm": 1.8717988729476929, "learning_rate": 5.031897385932582e-07, "loss": 0.8581, "step": 15721 }, { "epoch": 0.9, "grad_norm": 1.8253116607666016, "learning_rate": 5.026080637920983e-07, "loss": 0.8837, "step": 15722 }, { "epoch": 0.9, "grad_norm": 1.9104567766189575, "learning_rate": 5.020267167194038e-07, "loss": 0.9371, "step": 15723 }, { "epoch": 0.9, "grad_norm": 1.9085501432418823, "learning_rate": 5.014456973952375e-07, "loss": 0.8923, "step": 15724 }, { "epoch": 0.9, "grad_norm": 1.8560725450515747, "learning_rate": 5.008650058396448e-07, "loss": 0.8573, "step": 15725 }, { "epoch": 0.9, "grad_norm": 1.8208476305007935, "learning_rate": 5.00284642072667e-07, "loss": 0.8889, "step": 15726 }, { "epoch": 0.9, "grad_norm": 1.888777494430542, "learning_rate": 4.997046061143296e-07, "loss": 0.9014, "step": 15727 }, { "epoch": 0.9, "grad_norm": 1.78938889503479, "learning_rate": 4.991248979846486e-07, "loss": 0.8624, "step": 15728 }, { "epoch": 0.9, "grad_norm": 1.7296125888824463, "learning_rate": 4.985455177036269e-07, "loss": 0.8509, "step": 15729 }, { "epoch": 0.9, "grad_norm": 1.6556065082550049, "learning_rate": 4.979664652912597e-07, "loss": 0.9032, "step": 15730 }, { "epoch": 0.9, "grad_norm": 1.6765328645706177, "learning_rate": 4.973877407675253e-07, "loss": 0.9402, "step": 15731 }, { "epoch": 0.9, "grad_norm": 1.7139443159103394, "learning_rate": 4.968093441523958e-07, "loss": 0.8296, "step": 15732 }, { "epoch": 0.9, "grad_norm": 1.7192199230194092, "learning_rate": 4.962312754658305e-07, "loss": 0.8702, "step": 15733 }, { "epoch": 0.9, "grad_norm": 1.6450542211532593, "learning_rate": 4.956535347277758e-07, "loss": 0.9321, "step": 15734 }, { "epoch": 0.9, "grad_norm": 1.8065743446350098, "learning_rate": 4.95076121958169e-07, "loss": 0.9276, "step": 15735 }, { "epoch": 0.9, "grad_norm": 1.7082480192184448, "learning_rate": 4.944990371769331e-07, "loss": 0.8433, "step": 15736 }, { "epoch": 0.9, "grad_norm": 1.7138797044754028, "learning_rate": 4.939222804039833e-07, "loss": 0.859, "step": 15737 }, { "epoch": 0.9, "grad_norm": 1.725624680519104, "learning_rate": 4.933458516592216e-07, "loss": 0.9419, "step": 15738 }, { "epoch": 0.9, "grad_norm": 1.776052713394165, "learning_rate": 4.927697509625396e-07, "loss": 0.8684, "step": 15739 }, { "epoch": 0.9, "grad_norm": 1.7903958559036255, "learning_rate": 4.921939783338137e-07, "loss": 1.014, "step": 15740 }, { "epoch": 0.9, "grad_norm": 1.7034764289855957, "learning_rate": 4.916185337929169e-07, "loss": 0.8972, "step": 15741 }, { "epoch": 0.9, "grad_norm": 1.716565728187561, "learning_rate": 4.910434173597023e-07, "loss": 0.9402, "step": 15742 }, { "epoch": 0.9, "grad_norm": 1.8492597341537476, "learning_rate": 4.904686290540184e-07, "loss": 0.9752, "step": 15743 }, { "epoch": 0.9, "grad_norm": 1.7823865413665771, "learning_rate": 4.898941688956981e-07, "loss": 0.8729, "step": 15744 }, { "epoch": 0.9, "grad_norm": 1.696645975112915, "learning_rate": 4.893200369045636e-07, "loss": 0.9579, "step": 15745 }, { "epoch": 0.9, "grad_norm": 2.059925079345703, "learning_rate": 4.8874623310043e-07, "loss": 0.9037, "step": 15746 }, { "epoch": 0.9, "grad_norm": 1.6956720352172852, "learning_rate": 4.881727575030926e-07, "loss": 0.8757, "step": 15747 }, { "epoch": 0.9, "grad_norm": 1.6883294582366943, "learning_rate": 4.875996101323455e-07, "loss": 0.8769, "step": 15748 }, { "epoch": 0.9, "grad_norm": 1.7607452869415283, "learning_rate": 4.870267910079618e-07, "loss": 0.9245, "step": 15749 }, { "epoch": 0.9, "grad_norm": 1.8052239418029785, "learning_rate": 4.864543001497113e-07, "loss": 0.8465, "step": 15750 }, { "epoch": 0.9, "grad_norm": 1.9014909267425537, "learning_rate": 4.858821375773471e-07, "loss": 0.8528, "step": 15751 }, { "epoch": 0.9, "grad_norm": 1.7933992147445679, "learning_rate": 4.853103033106143e-07, "loss": 0.8965, "step": 15752 }, { "epoch": 0.9, "grad_norm": 1.7145254611968994, "learning_rate": 4.847387973692441e-07, "loss": 0.9, "step": 15753 }, { "epoch": 0.9, "grad_norm": 1.8281176090240479, "learning_rate": 4.841676197729594e-07, "loss": 0.7973, "step": 15754 }, { "epoch": 0.9, "grad_norm": 1.7307642698287964, "learning_rate": 4.835967705414679e-07, "loss": 0.956, "step": 15755 }, { "epoch": 0.9, "grad_norm": 0.9950353503227234, "learning_rate": 4.830262496944693e-07, "loss": 0.5799, "step": 15756 }, { "epoch": 0.9, "grad_norm": 1.8266314268112183, "learning_rate": 4.824560572516501e-07, "loss": 0.9225, "step": 15757 }, { "epoch": 0.9, "grad_norm": 2.0209341049194336, "learning_rate": 4.818861932326868e-07, "loss": 0.9647, "step": 15758 }, { "epoch": 0.9, "grad_norm": 1.9397742748260498, "learning_rate": 4.813166576572415e-07, "loss": 0.8734, "step": 15759 }, { "epoch": 0.9, "grad_norm": 1.6650595664978027, "learning_rate": 4.807474505449705e-07, "loss": 0.8131, "step": 15760 }, { "epoch": 0.9, "grad_norm": 1.929392695426941, "learning_rate": 4.801785719155128e-07, "loss": 0.9177, "step": 15761 }, { "epoch": 0.9, "grad_norm": 1.7827253341674805, "learning_rate": 4.796100217885003e-07, "loss": 0.8413, "step": 15762 }, { "epoch": 0.9, "grad_norm": 1.8335567712783813, "learning_rate": 4.790418001835529e-07, "loss": 0.8995, "step": 15763 }, { "epoch": 0.9, "grad_norm": 0.9441468715667725, "learning_rate": 4.78473907120276e-07, "loss": 0.5192, "step": 15764 }, { "epoch": 0.9, "grad_norm": 1.7356173992156982, "learning_rate": 4.779063426182684e-07, "loss": 0.8767, "step": 15765 }, { "epoch": 0.9, "grad_norm": 1.8008344173431396, "learning_rate": 4.773391066971134e-07, "loss": 0.9068, "step": 15766 }, { "epoch": 0.9, "grad_norm": 1.128982424736023, "learning_rate": 4.767721993763863e-07, "loss": 0.5993, "step": 15767 }, { "epoch": 0.9, "grad_norm": 1.8010064363479614, "learning_rate": 4.7620562067564715e-07, "loss": 0.8734, "step": 15768 }, { "epoch": 0.9, "grad_norm": 1.833927869796753, "learning_rate": 4.756393706144491e-07, "loss": 0.9064, "step": 15769 }, { "epoch": 0.9, "grad_norm": 1.9079430103302002, "learning_rate": 4.75073449212331e-07, "loss": 0.9044, "step": 15770 }, { "epoch": 0.9, "grad_norm": 1.869832158088684, "learning_rate": 4.745078564888217e-07, "loss": 0.8867, "step": 15771 }, { "epoch": 0.9, "grad_norm": 1.8267983198165894, "learning_rate": 4.7394259246343666e-07, "loss": 0.8493, "step": 15772 }, { "epoch": 0.9, "grad_norm": 1.6709568500518799, "learning_rate": 4.7337765715568364e-07, "loss": 0.8659, "step": 15773 }, { "epoch": 0.9, "grad_norm": 1.7146183252334595, "learning_rate": 4.728130505850559e-07, "loss": 0.9038, "step": 15774 }, { "epoch": 0.9, "grad_norm": 1.7966071367263794, "learning_rate": 4.7224877277103673e-07, "loss": 0.8676, "step": 15775 }, { "epoch": 0.9, "grad_norm": 1.967586636543274, "learning_rate": 4.716848237330984e-07, "loss": 0.9224, "step": 15776 }, { "epoch": 0.9, "grad_norm": 1.7624552249908447, "learning_rate": 4.7112120349069976e-07, "loss": 0.9134, "step": 15777 }, { "epoch": 0.9, "grad_norm": 1.844422459602356, "learning_rate": 4.7055791206329194e-07, "loss": 0.914, "step": 15778 }, { "epoch": 0.9, "grad_norm": 1.7868367433547974, "learning_rate": 4.699949494703093e-07, "loss": 0.8833, "step": 15779 }, { "epoch": 0.91, "grad_norm": 1.7061166763305664, "learning_rate": 4.694323157311809e-07, "loss": 0.9134, "step": 15780 }, { "epoch": 0.91, "grad_norm": 1.8165570497512817, "learning_rate": 4.6887001086531994e-07, "loss": 0.9239, "step": 15781 }, { "epoch": 0.91, "grad_norm": 2.007720470428467, "learning_rate": 4.6830803489213206e-07, "loss": 0.8854, "step": 15782 }, { "epoch": 0.91, "grad_norm": 1.8768231868743896, "learning_rate": 4.6774638783100625e-07, "loss": 0.9213, "step": 15783 }, { "epoch": 0.91, "grad_norm": 1.754347801208496, "learning_rate": 4.6718506970132803e-07, "loss": 0.8921, "step": 15784 }, { "epoch": 0.91, "grad_norm": 1.823531985282898, "learning_rate": 4.66624080522462e-07, "loss": 0.9453, "step": 15785 }, { "epoch": 0.91, "grad_norm": 1.76090407371521, "learning_rate": 4.6606342031376816e-07, "loss": 0.8468, "step": 15786 }, { "epoch": 0.91, "grad_norm": 1.830092430114746, "learning_rate": 4.6550308909459554e-07, "loss": 0.8878, "step": 15787 }, { "epoch": 0.91, "grad_norm": 0.9652304649353027, "learning_rate": 4.6494308688427635e-07, "loss": 0.5434, "step": 15788 }, { "epoch": 0.91, "grad_norm": 1.7110916376113892, "learning_rate": 4.6438341370213745e-07, "loss": 0.8519, "step": 15789 }, { "epoch": 0.91, "grad_norm": 0.9821134805679321, "learning_rate": 4.638240695674889e-07, "loss": 0.558, "step": 15790 }, { "epoch": 0.91, "grad_norm": 1.8891531229019165, "learning_rate": 4.6326505449963535e-07, "loss": 0.8531, "step": 15791 }, { "epoch": 0.91, "grad_norm": 1.8857909440994263, "learning_rate": 4.6270636851786234e-07, "loss": 0.9731, "step": 15792 }, { "epoch": 0.91, "grad_norm": 1.8339778184890747, "learning_rate": 4.621480116414534e-07, "loss": 0.8119, "step": 15793 }, { "epoch": 0.91, "grad_norm": 1.009507656097412, "learning_rate": 4.6158998388967315e-07, "loss": 0.5773, "step": 15794 }, { "epoch": 0.91, "grad_norm": 1.904154658317566, "learning_rate": 4.6103228528177834e-07, "loss": 0.9033, "step": 15795 }, { "epoch": 0.91, "grad_norm": 1.9575434923171997, "learning_rate": 4.6047491583701253e-07, "loss": 0.8975, "step": 15796 }, { "epoch": 0.91, "grad_norm": 1.7517492771148682, "learning_rate": 4.5991787557460923e-07, "loss": 0.8387, "step": 15797 }, { "epoch": 0.91, "grad_norm": 0.9346876740455627, "learning_rate": 4.5936116451379186e-07, "loss": 0.4791, "step": 15798 }, { "epoch": 0.91, "grad_norm": 1.6114987134933472, "learning_rate": 4.588047826737696e-07, "loss": 0.8701, "step": 15799 }, { "epoch": 0.91, "grad_norm": 1.7082452774047852, "learning_rate": 4.582487300737437e-07, "loss": 0.8997, "step": 15800 }, { "epoch": 0.91, "grad_norm": 1.760166049003601, "learning_rate": 4.5769300673289776e-07, "loss": 0.9715, "step": 15801 }, { "epoch": 0.91, "grad_norm": 1.8668776750564575, "learning_rate": 4.57137612670413e-07, "loss": 0.9953, "step": 15802 }, { "epoch": 0.91, "grad_norm": 2.0007483959198, "learning_rate": 4.5658254790545085e-07, "loss": 0.9302, "step": 15803 }, { "epoch": 0.91, "grad_norm": 1.6467692852020264, "learning_rate": 4.5602781245716707e-07, "loss": 0.7793, "step": 15804 }, { "epoch": 0.91, "grad_norm": 1.8213754892349243, "learning_rate": 4.5547340634470303e-07, "loss": 0.8517, "step": 15805 }, { "epoch": 0.91, "grad_norm": 1.7014737129211426, "learning_rate": 4.549193295871912e-07, "loss": 0.8727, "step": 15806 }, { "epoch": 0.91, "grad_norm": 1.9187073707580566, "learning_rate": 4.543655822037496e-07, "loss": 0.8901, "step": 15807 }, { "epoch": 0.91, "grad_norm": 1.7303494215011597, "learning_rate": 4.538121642134874e-07, "loss": 0.894, "step": 15808 }, { "epoch": 0.91, "grad_norm": 1.6147066354751587, "learning_rate": 4.5325907563550045e-07, "loss": 0.7855, "step": 15809 }, { "epoch": 0.91, "grad_norm": 1.8874821662902832, "learning_rate": 4.5270631648887455e-07, "loss": 0.9575, "step": 15810 }, { "epoch": 0.91, "grad_norm": 1.0284878015518188, "learning_rate": 4.5215388679268556e-07, "loss": 0.535, "step": 15811 }, { "epoch": 0.91, "grad_norm": 1.803733229637146, "learning_rate": 4.5160178656599495e-07, "loss": 0.8653, "step": 15812 }, { "epoch": 0.91, "grad_norm": 1.8952652215957642, "learning_rate": 4.510500158278541e-07, "loss": 0.8971, "step": 15813 }, { "epoch": 0.91, "grad_norm": 1.7323887348175049, "learning_rate": 4.504985745973034e-07, "loss": 0.8429, "step": 15814 }, { "epoch": 0.91, "grad_norm": 1.8402196168899536, "learning_rate": 4.49947462893372e-07, "loss": 0.8667, "step": 15815 }, { "epoch": 0.91, "grad_norm": 1.849502682685852, "learning_rate": 4.493966807350758e-07, "loss": 0.8452, "step": 15816 }, { "epoch": 0.91, "grad_norm": 1.7073493003845215, "learning_rate": 4.4884622814142187e-07, "loss": 0.8369, "step": 15817 }, { "epoch": 0.91, "grad_norm": 1.6291992664337158, "learning_rate": 4.482961051314028e-07, "loss": 0.8464, "step": 15818 }, { "epoch": 0.91, "grad_norm": 1.0040384531021118, "learning_rate": 4.4774631172400663e-07, "loss": 0.4712, "step": 15819 }, { "epoch": 0.91, "grad_norm": 1.9278392791748047, "learning_rate": 4.471968479381994e-07, "loss": 0.8817, "step": 15820 }, { "epoch": 0.91, "grad_norm": 1.801424503326416, "learning_rate": 4.4664771379294704e-07, "loss": 0.8682, "step": 15821 }, { "epoch": 0.91, "grad_norm": 1.7981007099151611, "learning_rate": 4.4609890930719324e-07, "loss": 0.8557, "step": 15822 }, { "epoch": 0.91, "grad_norm": 2.8545074462890625, "learning_rate": 4.455504344998807e-07, "loss": 0.9202, "step": 15823 }, { "epoch": 0.91, "grad_norm": 1.7270314693450928, "learning_rate": 4.4500228938993195e-07, "loss": 0.9089, "step": 15824 }, { "epoch": 0.91, "grad_norm": 0.9364813566207886, "learning_rate": 4.444544739962642e-07, "loss": 0.4811, "step": 15825 }, { "epoch": 0.91, "grad_norm": 1.7959704399108887, "learning_rate": 4.439069883377789e-07, "loss": 0.8656, "step": 15826 }, { "epoch": 0.91, "grad_norm": 1.761427402496338, "learning_rate": 4.433598324333699e-07, "loss": 0.9237, "step": 15827 }, { "epoch": 0.91, "grad_norm": 1.7590771913528442, "learning_rate": 4.428130063019187e-07, "loss": 0.7513, "step": 15828 }, { "epoch": 0.91, "grad_norm": 1.0862759351730347, "learning_rate": 4.4226650996229247e-07, "loss": 0.5485, "step": 15829 }, { "epoch": 0.91, "grad_norm": 1.6861910820007324, "learning_rate": 4.417203434333517e-07, "loss": 0.9668, "step": 15830 }, { "epoch": 0.91, "grad_norm": 1.7583849430084229, "learning_rate": 4.4117450673394125e-07, "loss": 0.9017, "step": 15831 }, { "epoch": 0.91, "grad_norm": 1.9644854068756104, "learning_rate": 4.406289998828972e-07, "loss": 0.8992, "step": 15832 }, { "epoch": 0.91, "grad_norm": 1.851729154586792, "learning_rate": 4.4008382289904337e-07, "loss": 0.9028, "step": 15833 }, { "epoch": 0.91, "grad_norm": 1.8522791862487793, "learning_rate": 4.3953897580119255e-07, "loss": 0.7929, "step": 15834 }, { "epoch": 0.91, "grad_norm": 1.827263593673706, "learning_rate": 4.3899445860814516e-07, "loss": 0.997, "step": 15835 }, { "epoch": 0.91, "grad_norm": 1.778059959411621, "learning_rate": 4.384502713386918e-07, "loss": 0.8277, "step": 15836 }, { "epoch": 0.91, "grad_norm": 1.747892141342163, "learning_rate": 4.379064140116096e-07, "loss": 0.91, "step": 15837 }, { "epoch": 0.91, "grad_norm": 1.7599698305130005, "learning_rate": 4.3736288664566805e-07, "loss": 0.9697, "step": 15838 }, { "epoch": 0.91, "grad_norm": 1.8990840911865234, "learning_rate": 4.3681968925961994e-07, "loss": 0.9091, "step": 15839 }, { "epoch": 0.91, "grad_norm": 1.856643795967102, "learning_rate": 4.362768218722102e-07, "loss": 0.8383, "step": 15840 }, { "epoch": 0.91, "grad_norm": 1.8139196634292603, "learning_rate": 4.3573428450217394e-07, "loss": 0.9175, "step": 15841 }, { "epoch": 0.91, "grad_norm": 1.7533811330795288, "learning_rate": 4.351920771682283e-07, "loss": 0.8531, "step": 15842 }, { "epoch": 0.91, "grad_norm": 1.5952285528182983, "learning_rate": 4.346501998890884e-07, "loss": 0.8685, "step": 15843 }, { "epoch": 0.91, "grad_norm": 1.837520956993103, "learning_rate": 4.341086526834493e-07, "loss": 0.9518, "step": 15844 }, { "epoch": 0.91, "grad_norm": 0.9442716836929321, "learning_rate": 4.335674355699992e-07, "loss": 0.4729, "step": 15845 }, { "epoch": 0.91, "grad_norm": 1.7076964378356934, "learning_rate": 4.3302654856741343e-07, "loss": 0.8808, "step": 15846 }, { "epoch": 0.91, "grad_norm": 1.8478697538375854, "learning_rate": 4.324859916943591e-07, "loss": 0.8889, "step": 15847 }, { "epoch": 0.91, "grad_norm": 1.7950849533081055, "learning_rate": 4.3194576496948584e-07, "loss": 0.7923, "step": 15848 }, { "epoch": 0.91, "grad_norm": 1.90106999874115, "learning_rate": 4.314058684114386e-07, "loss": 0.9557, "step": 15849 }, { "epoch": 0.91, "grad_norm": 1.8818327188491821, "learning_rate": 4.3086630203884374e-07, "loss": 0.7985, "step": 15850 }, { "epoch": 0.91, "grad_norm": 1.635120153427124, "learning_rate": 4.3032706587032403e-07, "loss": 0.8484, "step": 15851 }, { "epoch": 0.91, "grad_norm": 1.0191941261291504, "learning_rate": 4.297881599244857e-07, "loss": 0.516, "step": 15852 }, { "epoch": 0.91, "grad_norm": 1.6411631107330322, "learning_rate": 4.2924958421992293e-07, "loss": 0.8478, "step": 15853 }, { "epoch": 0.91, "grad_norm": 1.7756330966949463, "learning_rate": 4.28711338775224e-07, "loss": 0.8757, "step": 15854 }, { "epoch": 0.91, "grad_norm": 1.7151927947998047, "learning_rate": 4.2817342360895965e-07, "loss": 0.7637, "step": 15855 }, { "epoch": 0.91, "grad_norm": 1.9657961130142212, "learning_rate": 4.2763583873969394e-07, "loss": 0.8647, "step": 15856 }, { "epoch": 0.91, "grad_norm": 1.9946190118789673, "learning_rate": 4.270985841859743e-07, "loss": 0.893, "step": 15857 }, { "epoch": 0.91, "grad_norm": 1.854508638381958, "learning_rate": 4.2656165996634247e-07, "loss": 0.9895, "step": 15858 }, { "epoch": 0.91, "grad_norm": 1.8718602657318115, "learning_rate": 4.2602506609932484e-07, "loss": 0.9856, "step": 15859 }, { "epoch": 0.91, "grad_norm": 1.6180856227874756, "learning_rate": 4.2548880260343985e-07, "loss": 0.8654, "step": 15860 }, { "epoch": 0.91, "grad_norm": 1.9282160997390747, "learning_rate": 4.2495286949718937e-07, "loss": 0.9833, "step": 15861 }, { "epoch": 0.91, "grad_norm": 1.8110114336013794, "learning_rate": 4.244172667990698e-07, "loss": 0.8345, "step": 15862 }, { "epoch": 0.91, "grad_norm": 1.855578899383545, "learning_rate": 4.2388199452756075e-07, "loss": 0.928, "step": 15863 }, { "epoch": 0.91, "grad_norm": 1.8390268087387085, "learning_rate": 4.2334705270113405e-07, "loss": 0.8307, "step": 15864 }, { "epoch": 0.91, "grad_norm": 1.7110477685928345, "learning_rate": 4.2281244133825173e-07, "loss": 0.8727, "step": 15865 }, { "epoch": 0.91, "grad_norm": 1.7367172241210938, "learning_rate": 4.222781604573567e-07, "loss": 0.7846, "step": 15866 }, { "epoch": 0.91, "grad_norm": 1.9426498413085938, "learning_rate": 4.2174421007688983e-07, "loss": 0.9402, "step": 15867 }, { "epoch": 0.91, "grad_norm": 1.7359305620193481, "learning_rate": 4.21210590215273e-07, "loss": 0.8705, "step": 15868 }, { "epoch": 0.91, "grad_norm": 1.7773189544677734, "learning_rate": 4.206773008909226e-07, "loss": 0.9183, "step": 15869 }, { "epoch": 0.91, "grad_norm": 1.9586496353149414, "learning_rate": 4.201443421222384e-07, "loss": 0.8646, "step": 15870 }, { "epoch": 0.91, "grad_norm": 1.523102045059204, "learning_rate": 4.196117139276146e-07, "loss": 0.9036, "step": 15871 }, { "epoch": 0.91, "grad_norm": 1.7972440719604492, "learning_rate": 4.190794163254275e-07, "loss": 0.9298, "step": 15872 }, { "epoch": 0.91, "grad_norm": 1.8890737295150757, "learning_rate": 4.18547449334048e-07, "loss": 0.8857, "step": 15873 }, { "epoch": 0.91, "grad_norm": 1.9448047876358032, "learning_rate": 4.1801581297182926e-07, "loss": 0.9867, "step": 15874 }, { "epoch": 0.91, "grad_norm": 1.7496426105499268, "learning_rate": 4.1748450725711875e-07, "loss": 1.0168, "step": 15875 }, { "epoch": 0.91, "grad_norm": 1.9961042404174805, "learning_rate": 4.169535322082519e-07, "loss": 0.9143, "step": 15876 }, { "epoch": 0.91, "grad_norm": 1.7489299774169922, "learning_rate": 4.164228878435483e-07, "loss": 1.0009, "step": 15877 }, { "epoch": 0.91, "grad_norm": 1.8291044235229492, "learning_rate": 4.1589257418132135e-07, "loss": 0.9289, "step": 15878 }, { "epoch": 0.91, "grad_norm": 1.7482285499572754, "learning_rate": 4.1536259123986735e-07, "loss": 0.9399, "step": 15879 }, { "epoch": 0.91, "grad_norm": 2.015943765640259, "learning_rate": 4.1483293903747944e-07, "loss": 0.8717, "step": 15880 }, { "epoch": 0.91, "grad_norm": 1.93864107131958, "learning_rate": 4.1430361759242976e-07, "loss": 0.942, "step": 15881 }, { "epoch": 0.91, "grad_norm": 1.913555383682251, "learning_rate": 4.1377462692298695e-07, "loss": 0.8557, "step": 15882 }, { "epoch": 0.91, "grad_norm": 1.8105446100234985, "learning_rate": 4.1324596704740203e-07, "loss": 0.8742, "step": 15883 }, { "epoch": 0.91, "grad_norm": 1.0868711471557617, "learning_rate": 4.127176379839193e-07, "loss": 0.5959, "step": 15884 }, { "epoch": 0.91, "grad_norm": 1.9166744947433472, "learning_rate": 4.121896397507708e-07, "loss": 0.9479, "step": 15885 }, { "epoch": 0.91, "grad_norm": 1.0831048488616943, "learning_rate": 4.1166197236617634e-07, "loss": 0.541, "step": 15886 }, { "epoch": 0.91, "grad_norm": 1.7250031232833862, "learning_rate": 4.1113463584834144e-07, "loss": 0.8544, "step": 15887 }, { "epoch": 0.91, "grad_norm": 1.8878393173217773, "learning_rate": 4.106076302154671e-07, "loss": 0.8975, "step": 15888 }, { "epoch": 0.91, "grad_norm": 1.673466682434082, "learning_rate": 4.100809554857343e-07, "loss": 0.9465, "step": 15889 }, { "epoch": 0.91, "grad_norm": 1.6400001049041748, "learning_rate": 4.095546116773208e-07, "loss": 0.8602, "step": 15890 }, { "epoch": 0.91, "grad_norm": 1.9134602546691895, "learning_rate": 4.0902859880838643e-07, "loss": 0.8467, "step": 15891 }, { "epoch": 0.91, "grad_norm": 1.0014296770095825, "learning_rate": 4.0850291689708553e-07, "loss": 0.5115, "step": 15892 }, { "epoch": 0.91, "grad_norm": 1.702964425086975, "learning_rate": 4.079775659615548e-07, "loss": 0.926, "step": 15893 }, { "epoch": 0.91, "grad_norm": 0.9945602416992188, "learning_rate": 4.074525460199241e-07, "loss": 0.5498, "step": 15894 }, { "epoch": 0.91, "grad_norm": 1.774322509765625, "learning_rate": 4.0692785709031125e-07, "loss": 0.9044, "step": 15895 }, { "epoch": 0.91, "grad_norm": 1.690029501914978, "learning_rate": 4.0640349919082056e-07, "loss": 0.9713, "step": 15896 }, { "epoch": 0.91, "grad_norm": 1.5604135990142822, "learning_rate": 4.058794723395465e-07, "loss": 0.8533, "step": 15897 }, { "epoch": 0.91, "grad_norm": 1.8438425064086914, "learning_rate": 4.0535577655457127e-07, "loss": 0.8902, "step": 15898 }, { "epoch": 0.91, "grad_norm": 1.811793327331543, "learning_rate": 4.0483241185396815e-07, "loss": 0.9056, "step": 15899 }, { "epoch": 0.91, "grad_norm": 1.7385178804397583, "learning_rate": 4.0430937825579386e-07, "loss": 0.9308, "step": 15900 }, { "epoch": 0.91, "grad_norm": 1.0665388107299805, "learning_rate": 4.037866757780995e-07, "loss": 0.5866, "step": 15901 }, { "epoch": 0.91, "grad_norm": 2.056377649307251, "learning_rate": 4.032643044389195e-07, "loss": 0.9549, "step": 15902 }, { "epoch": 0.91, "grad_norm": 1.7054698467254639, "learning_rate": 4.027422642562828e-07, "loss": 0.8626, "step": 15903 }, { "epoch": 0.91, "grad_norm": 2.0850942134857178, "learning_rate": 4.0222055524820057e-07, "loss": 0.8922, "step": 15904 }, { "epoch": 0.91, "grad_norm": 1.7183587551116943, "learning_rate": 4.0169917743267616e-07, "loss": 0.8545, "step": 15905 }, { "epoch": 0.91, "grad_norm": 1.8127549886703491, "learning_rate": 4.0117813082770296e-07, "loss": 0.8847, "step": 15906 }, { "epoch": 0.91, "grad_norm": 1.7577332258224487, "learning_rate": 4.006574154512577e-07, "loss": 0.8403, "step": 15907 }, { "epoch": 0.91, "grad_norm": 1.8667739629745483, "learning_rate": 4.0013703132131153e-07, "loss": 0.8531, "step": 15908 }, { "epoch": 0.91, "grad_norm": 1.7228500843048096, "learning_rate": 3.9961697845581905e-07, "loss": 0.7923, "step": 15909 }, { "epoch": 0.91, "grad_norm": 1.6586765050888062, "learning_rate": 3.9909725687272914e-07, "loss": 0.7987, "step": 15910 }, { "epoch": 0.91, "grad_norm": 1.8758174180984497, "learning_rate": 3.98577866589972e-07, "loss": 0.9429, "step": 15911 }, { "epoch": 0.91, "grad_norm": 1.810036063194275, "learning_rate": 3.9805880762547323e-07, "loss": 0.8145, "step": 15912 }, { "epoch": 0.91, "grad_norm": 1.6515954732894897, "learning_rate": 3.975400799971418e-07, "loss": 0.8672, "step": 15913 }, { "epoch": 0.91, "grad_norm": 1.821568250656128, "learning_rate": 3.970216837228802e-07, "loss": 0.9317, "step": 15914 }, { "epoch": 0.91, "grad_norm": 1.8133752346038818, "learning_rate": 3.96503618820574e-07, "loss": 0.9489, "step": 15915 }, { "epoch": 0.91, "grad_norm": 1.899345874786377, "learning_rate": 3.9598588530810335e-07, "loss": 0.8526, "step": 15916 }, { "epoch": 0.91, "grad_norm": 1.6724337339401245, "learning_rate": 3.9546848320333067e-07, "loss": 0.7796, "step": 15917 }, { "epoch": 0.91, "grad_norm": 1.8203648328781128, "learning_rate": 3.949514125241116e-07, "loss": 0.8627, "step": 15918 }, { "epoch": 0.91, "grad_norm": 1.801805019378662, "learning_rate": 3.9443467328828976e-07, "loss": 0.8723, "step": 15919 }, { "epoch": 0.91, "grad_norm": 1.72738778591156, "learning_rate": 3.9391826551369417e-07, "loss": 0.9546, "step": 15920 }, { "epoch": 0.91, "grad_norm": 1.7988413572311401, "learning_rate": 3.934021892181461e-07, "loss": 0.8887, "step": 15921 }, { "epoch": 0.91, "grad_norm": 1.786757230758667, "learning_rate": 3.9288644441945356e-07, "loss": 0.8449, "step": 15922 }, { "epoch": 0.91, "grad_norm": 2.2779321670532227, "learning_rate": 3.923710311354134e-07, "loss": 0.9014, "step": 15923 }, { "epoch": 0.91, "grad_norm": 1.7580676078796387, "learning_rate": 3.918559493838114e-07, "loss": 0.9546, "step": 15924 }, { "epoch": 0.91, "grad_norm": 1.6649545431137085, "learning_rate": 3.913411991824212e-07, "loss": 0.8636, "step": 15925 }, { "epoch": 0.91, "grad_norm": 1.76620614528656, "learning_rate": 3.908267805490051e-07, "loss": 0.871, "step": 15926 }, { "epoch": 0.91, "grad_norm": 1.8247005939483643, "learning_rate": 3.9031269350131574e-07, "loss": 0.9103, "step": 15927 }, { "epoch": 0.91, "grad_norm": 1.7880696058273315, "learning_rate": 3.897989380570899e-07, "loss": 0.8994, "step": 15928 }, { "epoch": 0.91, "grad_norm": 1.7409021854400635, "learning_rate": 3.892855142340579e-07, "loss": 0.9078, "step": 15929 }, { "epoch": 0.91, "grad_norm": 1.879544734954834, "learning_rate": 3.8877242204993784e-07, "loss": 0.9704, "step": 15930 }, { "epoch": 0.91, "grad_norm": 1.7329720258712769, "learning_rate": 3.882596615224332e-07, "loss": 0.8924, "step": 15931 }, { "epoch": 0.91, "grad_norm": 1.8035959005355835, "learning_rate": 3.8774723266923886e-07, "loss": 0.9232, "step": 15932 }, { "epoch": 0.91, "grad_norm": 1.7096164226531982, "learning_rate": 3.8723513550803506e-07, "loss": 0.9052, "step": 15933 }, { "epoch": 0.91, "grad_norm": 1.8174573183059692, "learning_rate": 3.867233700564965e-07, "loss": 0.8994, "step": 15934 }, { "epoch": 0.91, "grad_norm": 1.7200968265533447, "learning_rate": 3.8621193633227916e-07, "loss": 0.8657, "step": 15935 }, { "epoch": 0.91, "grad_norm": 1.842258095741272, "learning_rate": 3.857008343530344e-07, "loss": 0.9125, "step": 15936 }, { "epoch": 0.91, "grad_norm": 1.8510268926620483, "learning_rate": 3.851900641363959e-07, "loss": 0.9554, "step": 15937 }, { "epoch": 0.91, "grad_norm": 1.0929793119430542, "learning_rate": 3.8467962569999183e-07, "loss": 0.5181, "step": 15938 }, { "epoch": 0.91, "grad_norm": 1.6862468719482422, "learning_rate": 3.8416951906143253e-07, "loss": 0.8226, "step": 15939 }, { "epoch": 0.91, "grad_norm": 1.798294186592102, "learning_rate": 3.8365974423832496e-07, "loss": 0.8832, "step": 15940 }, { "epoch": 0.91, "grad_norm": 1.7634085416793823, "learning_rate": 3.8315030124825516e-07, "loss": 0.8715, "step": 15941 }, { "epoch": 0.91, "grad_norm": 1.689590573310852, "learning_rate": 3.8264119010880564e-07, "loss": 0.9394, "step": 15942 }, { "epoch": 0.91, "grad_norm": 1.9379509687423706, "learning_rate": 3.821324108375446e-07, "loss": 0.9528, "step": 15943 }, { "epoch": 0.91, "grad_norm": 1.763135552406311, "learning_rate": 3.816239634520258e-07, "loss": 0.9559, "step": 15944 }, { "epoch": 0.91, "grad_norm": 2.3083832263946533, "learning_rate": 3.811158479697985e-07, "loss": 0.9314, "step": 15945 }, { "epoch": 0.91, "grad_norm": 1.6003978252410889, "learning_rate": 3.8060806440839205e-07, "loss": 0.8946, "step": 15946 }, { "epoch": 0.91, "grad_norm": 1.835839867591858, "learning_rate": 3.801006127853313e-07, "loss": 0.9251, "step": 15947 }, { "epoch": 0.91, "grad_norm": 2.002423048019409, "learning_rate": 3.795934931181267e-07, "loss": 0.9791, "step": 15948 }, { "epoch": 0.91, "grad_norm": 1.7746679782867432, "learning_rate": 3.7908670542427637e-07, "loss": 0.9778, "step": 15949 }, { "epoch": 0.91, "grad_norm": 1.7696954011917114, "learning_rate": 3.785802497212676e-07, "loss": 0.8341, "step": 15950 }, { "epoch": 0.91, "grad_norm": 1.8632299900054932, "learning_rate": 3.780741260265808e-07, "loss": 0.9339, "step": 15951 }, { "epoch": 0.91, "grad_norm": 1.8545212745666504, "learning_rate": 3.775683343576764e-07, "loss": 0.8383, "step": 15952 }, { "epoch": 0.91, "grad_norm": 1.8567663431167603, "learning_rate": 3.7706287473201155e-07, "loss": 0.9113, "step": 15953 }, { "epoch": 0.91, "grad_norm": 1.720462441444397, "learning_rate": 3.7655774716702453e-07, "loss": 0.8549, "step": 15954 }, { "epoch": 0.92, "grad_norm": 1.7620278596878052, "learning_rate": 3.7605295168014813e-07, "loss": 0.9328, "step": 15955 }, { "epoch": 0.92, "grad_norm": 1.7949262857437134, "learning_rate": 3.755484882888005e-07, "loss": 0.8645, "step": 15956 }, { "epoch": 0.92, "grad_norm": 1.8074122667312622, "learning_rate": 3.750443570103912e-07, "loss": 0.9107, "step": 15957 }, { "epoch": 0.92, "grad_norm": 1.808741807937622, "learning_rate": 3.745405578623129e-07, "loss": 0.9098, "step": 15958 }, { "epoch": 0.92, "grad_norm": 1.8140826225280762, "learning_rate": 3.740370908619528e-07, "loss": 0.897, "step": 15959 }, { "epoch": 0.92, "grad_norm": 1.8312243223190308, "learning_rate": 3.7353395602668486e-07, "loss": 0.9867, "step": 15960 }, { "epoch": 0.92, "grad_norm": 2.059182643890381, "learning_rate": 3.7303115337386843e-07, "loss": 0.8797, "step": 15961 }, { "epoch": 0.92, "grad_norm": 1.9641932249069214, "learning_rate": 3.725286829208563e-07, "loss": 0.8713, "step": 15962 }, { "epoch": 0.92, "grad_norm": 2.0012917518615723, "learning_rate": 3.720265446849858e-07, "loss": 0.8169, "step": 15963 }, { "epoch": 0.92, "grad_norm": 1.7301795482635498, "learning_rate": 3.715247386835841e-07, "loss": 0.902, "step": 15964 }, { "epoch": 0.92, "grad_norm": 1.7078012228012085, "learning_rate": 3.7102326493396736e-07, "loss": 0.8431, "step": 15965 }, { "epoch": 0.92, "grad_norm": 1.7636345624923706, "learning_rate": 3.7052212345344176e-07, "loss": 0.896, "step": 15966 }, { "epoch": 0.92, "grad_norm": 1.8149771690368652, "learning_rate": 3.7002131425929677e-07, "loss": 0.8703, "step": 15967 }, { "epoch": 0.92, "grad_norm": 1.6916556358337402, "learning_rate": 3.695208373688175e-07, "loss": 0.8962, "step": 15968 }, { "epoch": 0.92, "grad_norm": 1.9374797344207764, "learning_rate": 3.690206927992712e-07, "loss": 0.8997, "step": 15969 }, { "epoch": 0.92, "grad_norm": 1.5948518514633179, "learning_rate": 3.685208805679186e-07, "loss": 0.9563, "step": 15970 }, { "epoch": 0.92, "grad_norm": 1.761960744857788, "learning_rate": 3.680214006920058e-07, "loss": 0.8667, "step": 15971 }, { "epoch": 0.92, "grad_norm": 1.0561821460723877, "learning_rate": 3.675222531887679e-07, "loss": 0.5755, "step": 15972 }, { "epoch": 0.92, "grad_norm": 1.7220972776412964, "learning_rate": 3.670234380754312e-07, "loss": 0.8981, "step": 15973 }, { "epoch": 0.92, "grad_norm": 1.7780568599700928, "learning_rate": 3.665249553692052e-07, "loss": 0.8575, "step": 15974 }, { "epoch": 0.92, "grad_norm": 1.8450536727905273, "learning_rate": 3.6602680508729506e-07, "loss": 0.8799, "step": 15975 }, { "epoch": 0.92, "grad_norm": 1.8215750455856323, "learning_rate": 3.65528987246887e-07, "loss": 0.9125, "step": 15976 }, { "epoch": 0.92, "grad_norm": 1.6484339237213135, "learning_rate": 3.650315018651618e-07, "loss": 0.8873, "step": 15977 }, { "epoch": 0.92, "grad_norm": 1.6994446516036987, "learning_rate": 3.645343489592834e-07, "loss": 0.8745, "step": 15978 }, { "epoch": 0.92, "grad_norm": 1.7784489393234253, "learning_rate": 3.640375285464115e-07, "loss": 0.8452, "step": 15979 }, { "epoch": 0.92, "grad_norm": 1.6236860752105713, "learning_rate": 3.635410406436857e-07, "loss": 0.9537, "step": 15980 }, { "epoch": 0.92, "grad_norm": 1.762093424797058, "learning_rate": 3.6304488526824113e-07, "loss": 0.8743, "step": 15981 }, { "epoch": 0.92, "grad_norm": 1.8169455528259277, "learning_rate": 3.6254906243719744e-07, "loss": 0.8901, "step": 15982 }, { "epoch": 0.92, "grad_norm": 1.9161959886550903, "learning_rate": 3.620535721676643e-07, "loss": 0.8909, "step": 15983 }, { "epoch": 0.92, "grad_norm": 1.8205138444900513, "learning_rate": 3.6155841447674035e-07, "loss": 1.0179, "step": 15984 }, { "epoch": 0.92, "grad_norm": 1.7061212062835693, "learning_rate": 3.6106358938151065e-07, "loss": 0.8711, "step": 15985 }, { "epoch": 0.92, "grad_norm": 1.7559456825256348, "learning_rate": 3.6056909689905274e-07, "loss": 0.8928, "step": 15986 }, { "epoch": 0.92, "grad_norm": 1.8215025663375854, "learning_rate": 3.600749370464274e-07, "loss": 0.8903, "step": 15987 }, { "epoch": 0.92, "grad_norm": 1.8701554536819458, "learning_rate": 3.595811098406887e-07, "loss": 0.897, "step": 15988 }, { "epoch": 0.92, "grad_norm": 1.980040431022644, "learning_rate": 3.5908761529887536e-07, "loss": 0.8842, "step": 15989 }, { "epoch": 0.92, "grad_norm": 1.8623727560043335, "learning_rate": 3.5859445343801926e-07, "loss": 0.8437, "step": 15990 }, { "epoch": 0.92, "grad_norm": 1.7643805742263794, "learning_rate": 3.581016242751356e-07, "loss": 0.8693, "step": 15991 }, { "epoch": 0.92, "grad_norm": 1.9034552574157715, "learning_rate": 3.57609127827232e-07, "loss": 0.9462, "step": 15992 }, { "epoch": 0.92, "grad_norm": 1.9294816255569458, "learning_rate": 3.5711696411130035e-07, "loss": 0.9132, "step": 15993 }, { "epoch": 0.92, "grad_norm": 1.7443877458572388, "learning_rate": 3.5662513314432823e-07, "loss": 0.8514, "step": 15994 }, { "epoch": 0.92, "grad_norm": 1.7230364084243774, "learning_rate": 3.561336349432842e-07, "loss": 0.9014, "step": 15995 }, { "epoch": 0.92, "grad_norm": 1.8636956214904785, "learning_rate": 3.5564246952512817e-07, "loss": 0.9478, "step": 15996 }, { "epoch": 0.92, "grad_norm": 1.7242804765701294, "learning_rate": 3.55151636906812e-07, "loss": 0.9772, "step": 15997 }, { "epoch": 0.92, "grad_norm": 1.7394336462020874, "learning_rate": 3.5466113710526997e-07, "loss": 0.9589, "step": 15998 }, { "epoch": 0.92, "grad_norm": 1.7706475257873535, "learning_rate": 3.5417097013743075e-07, "loss": 0.8727, "step": 15999 }, { "epoch": 0.92, "grad_norm": 0.981211245059967, "learning_rate": 3.5368113602020414e-07, "loss": 0.5746, "step": 16000 }, { "epoch": 0.92, "grad_norm": 1.6546509265899658, "learning_rate": 3.531916347704978e-07, "loss": 0.796, "step": 16001 }, { "epoch": 0.92, "grad_norm": 1.804200530052185, "learning_rate": 3.5270246640519925e-07, "loss": 0.898, "step": 16002 }, { "epoch": 0.92, "grad_norm": 1.9052832126617432, "learning_rate": 3.5221363094119166e-07, "loss": 0.866, "step": 16003 }, { "epoch": 0.92, "grad_norm": 1.696407437324524, "learning_rate": 3.5172512839533934e-07, "loss": 0.8817, "step": 16004 }, { "epoch": 0.92, "grad_norm": 1.8722658157348633, "learning_rate": 3.5123695878450327e-07, "loss": 0.8628, "step": 16005 }, { "epoch": 0.92, "grad_norm": 1.7976242303848267, "learning_rate": 3.507491221255266e-07, "loss": 0.9414, "step": 16006 }, { "epoch": 0.92, "grad_norm": 1.7640900611877441, "learning_rate": 3.5026161843524254e-07, "loss": 0.9239, "step": 16007 }, { "epoch": 0.92, "grad_norm": 1.771964192390442, "learning_rate": 3.4977444773047653e-07, "loss": 0.8632, "step": 16008 }, { "epoch": 0.92, "grad_norm": 1.6969386339187622, "learning_rate": 3.4928761002803625e-07, "loss": 0.9189, "step": 16009 }, { "epoch": 0.92, "grad_norm": 1.8852514028549194, "learning_rate": 3.4880110534472265e-07, "loss": 0.8797, "step": 16010 }, { "epoch": 0.92, "grad_norm": 2.1289188861846924, "learning_rate": 3.483149336973235e-07, "loss": 0.9061, "step": 16011 }, { "epoch": 0.92, "grad_norm": 1.7376195192337036, "learning_rate": 3.478290951026153e-07, "loss": 0.9327, "step": 16012 }, { "epoch": 0.92, "grad_norm": 1.7414181232452393, "learning_rate": 3.4734358957736247e-07, "loss": 0.8543, "step": 16013 }, { "epoch": 0.92, "grad_norm": 1.8411325216293335, "learning_rate": 3.4685841713831937e-07, "loss": 0.871, "step": 16014 }, { "epoch": 0.92, "grad_norm": 1.9469294548034668, "learning_rate": 3.463735778022259e-07, "loss": 0.9475, "step": 16015 }, { "epoch": 0.92, "grad_norm": 1.6670212745666504, "learning_rate": 3.458890715858143e-07, "loss": 0.8557, "step": 16016 }, { "epoch": 0.92, "grad_norm": 1.0476583242416382, "learning_rate": 3.454048985058034e-07, "loss": 0.5805, "step": 16017 }, { "epoch": 0.92, "grad_norm": 1.0331312417984009, "learning_rate": 3.449210585789009e-07, "loss": 0.5023, "step": 16018 }, { "epoch": 0.92, "grad_norm": 1.045556664466858, "learning_rate": 3.4443755182180125e-07, "loss": 0.5286, "step": 16019 }, { "epoch": 0.92, "grad_norm": 1.6600151062011719, "learning_rate": 3.4395437825119117e-07, "loss": 0.8311, "step": 16020 }, { "epoch": 0.92, "grad_norm": 1.6164562702178955, "learning_rate": 3.4347153788374167e-07, "loss": 0.8996, "step": 16021 }, { "epoch": 0.92, "grad_norm": 1.805677890777588, "learning_rate": 3.429890307361161e-07, "loss": 0.9129, "step": 16022 }, { "epoch": 0.92, "grad_norm": 1.8209412097930908, "learning_rate": 3.4250685682496233e-07, "loss": 0.8245, "step": 16023 }, { "epoch": 0.92, "grad_norm": 1.7682663202285767, "learning_rate": 3.420250161669203e-07, "loss": 0.8792, "step": 16024 }, { "epoch": 0.92, "grad_norm": 1.8460445404052734, "learning_rate": 3.4154350877861565e-07, "loss": 0.8908, "step": 16025 }, { "epoch": 0.92, "grad_norm": 1.8630725145339966, "learning_rate": 3.4106233467666504e-07, "loss": 0.8475, "step": 16026 }, { "epoch": 0.92, "grad_norm": 1.7359724044799805, "learning_rate": 3.4058149387767305e-07, "loss": 0.931, "step": 16027 }, { "epoch": 0.92, "grad_norm": 1.7361557483673096, "learning_rate": 3.4010098639822964e-07, "loss": 0.8737, "step": 16028 }, { "epoch": 0.92, "grad_norm": 2.1061549186706543, "learning_rate": 3.396208122549194e-07, "loss": 0.868, "step": 16029 }, { "epoch": 0.92, "grad_norm": 1.253780484199524, "learning_rate": 3.391409714643079e-07, "loss": 0.5529, "step": 16030 }, { "epoch": 0.92, "grad_norm": 1.1058951616287231, "learning_rate": 3.386614640429553e-07, "loss": 0.5813, "step": 16031 }, { "epoch": 0.92, "grad_norm": 1.7368977069854736, "learning_rate": 3.3818229000740833e-07, "loss": 0.8316, "step": 16032 }, { "epoch": 0.92, "grad_norm": 1.8827120065689087, "learning_rate": 3.377034493742015e-07, "loss": 0.8448, "step": 16033 }, { "epoch": 0.92, "grad_norm": 1.536138892173767, "learning_rate": 3.372249421598572e-07, "loss": 0.8511, "step": 16034 }, { "epoch": 0.92, "grad_norm": 1.8005306720733643, "learning_rate": 3.3674676838088893e-07, "loss": 0.9824, "step": 16035 }, { "epoch": 0.92, "grad_norm": 1.8767318725585938, "learning_rate": 3.3626892805379565e-07, "loss": 0.8387, "step": 16036 }, { "epoch": 0.92, "grad_norm": 0.9783217310905457, "learning_rate": 3.357914211950675e-07, "loss": 0.5722, "step": 16037 }, { "epoch": 0.92, "grad_norm": 1.6194586753845215, "learning_rate": 3.353142478211824e-07, "loss": 0.9219, "step": 16038 }, { "epoch": 0.92, "grad_norm": 1.7989591360092163, "learning_rate": 3.3483740794860386e-07, "loss": 0.799, "step": 16039 }, { "epoch": 0.92, "grad_norm": 1.9079878330230713, "learning_rate": 3.3436090159378987e-07, "loss": 0.9051, "step": 16040 }, { "epoch": 0.92, "grad_norm": 1.8095263242721558, "learning_rate": 3.338847287731795e-07, "loss": 0.8747, "step": 16041 }, { "epoch": 0.92, "grad_norm": 1.8701649904251099, "learning_rate": 3.3340888950320725e-07, "loss": 0.8645, "step": 16042 }, { "epoch": 0.92, "grad_norm": 1.7751802206039429, "learning_rate": 3.3293338380029017e-07, "loss": 0.7998, "step": 16043 }, { "epoch": 0.92, "grad_norm": 1.7383900880813599, "learning_rate": 3.324582116808395e-07, "loss": 0.9046, "step": 16044 }, { "epoch": 0.92, "grad_norm": 1.743598222732544, "learning_rate": 3.3198337316124987e-07, "loss": 0.9032, "step": 16045 }, { "epoch": 0.92, "grad_norm": 1.7166293859481812, "learning_rate": 3.315088682579082e-07, "loss": 0.8994, "step": 16046 }, { "epoch": 0.92, "grad_norm": 1.7939342260360718, "learning_rate": 3.3103469698718694e-07, "loss": 0.9371, "step": 16047 }, { "epoch": 0.92, "grad_norm": 1.7819585800170898, "learning_rate": 3.305608593654508e-07, "loss": 0.8843, "step": 16048 }, { "epoch": 0.92, "grad_norm": 1.7243385314941406, "learning_rate": 3.3008735540904666e-07, "loss": 0.9563, "step": 16049 }, { "epoch": 0.92, "grad_norm": 1.7253384590148926, "learning_rate": 3.2961418513431596e-07, "loss": 0.8331, "step": 16050 }, { "epoch": 0.92, "grad_norm": 1.7176647186279297, "learning_rate": 3.2914134855758894e-07, "loss": 0.8971, "step": 16051 }, { "epoch": 0.92, "grad_norm": 1.9830152988433838, "learning_rate": 3.286688456951781e-07, "loss": 0.8723, "step": 16052 }, { "epoch": 0.92, "grad_norm": 1.8986012935638428, "learning_rate": 3.2819667656339036e-07, "loss": 0.8976, "step": 16053 }, { "epoch": 0.92, "grad_norm": 1.8135855197906494, "learning_rate": 3.2772484117851723e-07, "loss": 0.9289, "step": 16054 }, { "epoch": 0.92, "grad_norm": 1.6852836608886719, "learning_rate": 3.272533395568422e-07, "loss": 0.8648, "step": 16055 }, { "epoch": 0.92, "grad_norm": 1.769431233406067, "learning_rate": 3.2678217171463355e-07, "loss": 0.8704, "step": 16056 }, { "epoch": 0.92, "grad_norm": 1.6774264574050903, "learning_rate": 3.263113376681526e-07, "loss": 0.9187, "step": 16057 }, { "epoch": 0.92, "grad_norm": 1.7942821979522705, "learning_rate": 3.2584083743364416e-07, "loss": 0.9553, "step": 16058 }, { "epoch": 0.92, "grad_norm": 1.7275360822677612, "learning_rate": 3.253706710273452e-07, "loss": 0.8616, "step": 16059 }, { "epoch": 0.92, "grad_norm": 1.7733031511306763, "learning_rate": 3.2490083846547835e-07, "loss": 0.996, "step": 16060 }, { "epoch": 0.92, "grad_norm": 0.9808598160743713, "learning_rate": 3.2443133976425733e-07, "loss": 0.5259, "step": 16061 }, { "epoch": 0.92, "grad_norm": 1.8284358978271484, "learning_rate": 3.239621749398847e-07, "loss": 0.9432, "step": 16062 }, { "epoch": 0.92, "grad_norm": 1.7595874071121216, "learning_rate": 3.2349334400854746e-07, "loss": 0.8562, "step": 16063 }, { "epoch": 0.92, "grad_norm": 1.0034810304641724, "learning_rate": 3.23024846986425e-07, "loss": 0.5445, "step": 16064 }, { "epoch": 0.92, "grad_norm": 1.833794355392456, "learning_rate": 3.225566838896832e-07, "loss": 0.9258, "step": 16065 }, { "epoch": 0.92, "grad_norm": 1.6624916791915894, "learning_rate": 3.2208885473447806e-07, "loss": 0.9563, "step": 16066 }, { "epoch": 0.92, "grad_norm": 1.7818577289581299, "learning_rate": 3.216213595369522e-07, "loss": 0.9376, "step": 16067 }, { "epoch": 0.92, "grad_norm": 1.7648690938949585, "learning_rate": 3.2115419831323715e-07, "loss": 0.9309, "step": 16068 }, { "epoch": 0.92, "grad_norm": 1.7156198024749756, "learning_rate": 3.206873710794545e-07, "loss": 0.9243, "step": 16069 }, { "epoch": 0.92, "grad_norm": 1.6340789794921875, "learning_rate": 3.2022087785171243e-07, "loss": 0.9183, "step": 16070 }, { "epoch": 0.92, "grad_norm": 1.7318247556686401, "learning_rate": 3.1975471864610805e-07, "loss": 0.8846, "step": 16071 }, { "epoch": 0.92, "grad_norm": 1.781927227973938, "learning_rate": 3.192888934787286e-07, "loss": 0.9002, "step": 16072 }, { "epoch": 0.92, "grad_norm": 1.6848148107528687, "learning_rate": 3.188234023656467e-07, "loss": 0.8772, "step": 16073 }, { "epoch": 0.92, "grad_norm": 1.1297677755355835, "learning_rate": 3.1835824532292616e-07, "loss": 0.5209, "step": 16074 }, { "epoch": 0.92, "grad_norm": 1.8265327215194702, "learning_rate": 3.178934223666186e-07, "loss": 0.9265, "step": 16075 }, { "epoch": 0.92, "grad_norm": 1.7990341186523438, "learning_rate": 3.174289335127612e-07, "loss": 0.906, "step": 16076 }, { "epoch": 0.92, "grad_norm": 1.9134149551391602, "learning_rate": 3.1696477877738664e-07, "loss": 0.8379, "step": 16077 }, { "epoch": 0.92, "grad_norm": 2.0070412158966064, "learning_rate": 3.1650095817650773e-07, "loss": 0.9389, "step": 16078 }, { "epoch": 0.92, "grad_norm": 1.621281385421753, "learning_rate": 3.1603747172613165e-07, "loss": 0.8458, "step": 16079 }, { "epoch": 0.92, "grad_norm": 1.0595463514328003, "learning_rate": 3.1557431944225005e-07, "loss": 0.5352, "step": 16080 }, { "epoch": 0.92, "grad_norm": 1.9107943773269653, "learning_rate": 3.1511150134084787e-07, "loss": 0.8584, "step": 16081 }, { "epoch": 0.92, "grad_norm": 1.7533669471740723, "learning_rate": 3.146490174378913e-07, "loss": 0.8822, "step": 16082 }, { "epoch": 0.92, "grad_norm": 0.9546745419502258, "learning_rate": 3.141868677493454e-07, "loss": 0.5225, "step": 16083 }, { "epoch": 0.92, "grad_norm": 1.7685881853103638, "learning_rate": 3.137250522911528e-07, "loss": 0.8396, "step": 16084 }, { "epoch": 0.92, "grad_norm": 1.8378522396087646, "learning_rate": 3.132635710792531e-07, "loss": 0.9302, "step": 16085 }, { "epoch": 0.92, "grad_norm": 1.8987963199615479, "learning_rate": 3.1280242412956687e-07, "loss": 0.9008, "step": 16086 }, { "epoch": 0.92, "grad_norm": 1.6857706308364868, "learning_rate": 3.123416114580091e-07, "loss": 0.9239, "step": 16087 }, { "epoch": 0.92, "grad_norm": 1.8007545471191406, "learning_rate": 3.118811330804816e-07, "loss": 0.857, "step": 16088 }, { "epoch": 0.92, "grad_norm": 1.9181270599365234, "learning_rate": 3.114209890128739e-07, "loss": 0.8288, "step": 16089 }, { "epoch": 0.92, "grad_norm": 1.8000327348709106, "learning_rate": 3.1096117927106205e-07, "loss": 0.8175, "step": 16090 }, { "epoch": 0.92, "grad_norm": 1.8781778812408447, "learning_rate": 3.105017038709157e-07, "loss": 0.8544, "step": 16091 }, { "epoch": 0.92, "grad_norm": 1.9210681915283203, "learning_rate": 3.100425628282899e-07, "loss": 0.8478, "step": 16092 }, { "epoch": 0.92, "grad_norm": 1.7471415996551514, "learning_rate": 3.095837561590265e-07, "loss": 0.8533, "step": 16093 }, { "epoch": 0.92, "grad_norm": 1.9061459302902222, "learning_rate": 3.0912528387895937e-07, "loss": 0.9306, "step": 16094 }, { "epoch": 0.92, "grad_norm": 1.8363149166107178, "learning_rate": 3.0866714600390704e-07, "loss": 1.0026, "step": 16095 }, { "epoch": 0.92, "grad_norm": 1.7845964431762695, "learning_rate": 3.0820934254968126e-07, "loss": 0.9465, "step": 16096 }, { "epoch": 0.92, "grad_norm": 1.948388695716858, "learning_rate": 3.0775187353207614e-07, "loss": 0.874, "step": 16097 }, { "epoch": 0.92, "grad_norm": 1.740517497062683, "learning_rate": 3.072947389668823e-07, "loss": 0.8771, "step": 16098 }, { "epoch": 0.92, "grad_norm": 2.3168516159057617, "learning_rate": 3.0683793886986943e-07, "loss": 0.879, "step": 16099 }, { "epoch": 0.92, "grad_norm": 1.9168965816497803, "learning_rate": 3.063814732568038e-07, "loss": 0.9173, "step": 16100 }, { "epoch": 0.92, "grad_norm": 1.865510106086731, "learning_rate": 3.0592534214343495e-07, "loss": 0.8821, "step": 16101 }, { "epoch": 0.92, "grad_norm": 1.641528606414795, "learning_rate": 3.0546954554550366e-07, "loss": 0.8791, "step": 16102 }, { "epoch": 0.92, "grad_norm": 1.6778312921524048, "learning_rate": 3.050140834787374e-07, "loss": 0.909, "step": 16103 }, { "epoch": 0.92, "grad_norm": 1.6749008893966675, "learning_rate": 3.0455895595885246e-07, "loss": 0.9597, "step": 16104 }, { "epoch": 0.92, "grad_norm": 2.851684808731079, "learning_rate": 3.041041630015562e-07, "loss": 0.8651, "step": 16105 }, { "epoch": 0.92, "grad_norm": 1.7146273851394653, "learning_rate": 3.036497046225395e-07, "loss": 0.8744, "step": 16106 }, { "epoch": 0.92, "grad_norm": 1.7540463209152222, "learning_rate": 3.0319558083748754e-07, "loss": 0.9171, "step": 16107 }, { "epoch": 0.92, "grad_norm": 1.798244595527649, "learning_rate": 3.0274179166206785e-07, "loss": 0.9312, "step": 16108 }, { "epoch": 0.92, "grad_norm": 1.01286780834198, "learning_rate": 3.022883371119423e-07, "loss": 0.5771, "step": 16109 }, { "epoch": 0.92, "grad_norm": 1.6508347988128662, "learning_rate": 3.01835217202755e-07, "loss": 0.8731, "step": 16110 }, { "epoch": 0.92, "grad_norm": 1.8392674922943115, "learning_rate": 3.013824319501446e-07, "loss": 0.8802, "step": 16111 }, { "epoch": 0.92, "grad_norm": 1.656704068183899, "learning_rate": 3.009299813697331e-07, "loss": 0.8668, "step": 16112 }, { "epoch": 0.92, "grad_norm": 1.8883048295974731, "learning_rate": 3.0047786547713677e-07, "loss": 0.8809, "step": 16113 }, { "epoch": 0.92, "grad_norm": 1.662778377532959, "learning_rate": 3.000260842879532e-07, "loss": 0.8824, "step": 16114 }, { "epoch": 0.92, "grad_norm": 1.6404680013656616, "learning_rate": 2.9957463781777443e-07, "loss": 0.7754, "step": 16115 }, { "epoch": 0.92, "grad_norm": 1.8990052938461304, "learning_rate": 2.991235260821779e-07, "loss": 0.9018, "step": 16116 }, { "epoch": 0.92, "grad_norm": 1.8069746494293213, "learning_rate": 2.986727490967289e-07, "loss": 0.7892, "step": 16117 }, { "epoch": 0.92, "grad_norm": 1.6222139596939087, "learning_rate": 2.982223068769863e-07, "loss": 0.9439, "step": 16118 }, { "epoch": 0.92, "grad_norm": 1.5850636959075928, "learning_rate": 2.9777219943848856e-07, "loss": 0.8239, "step": 16119 }, { "epoch": 0.92, "grad_norm": 1.8803397417068481, "learning_rate": 2.9732242679677227e-07, "loss": 0.8058, "step": 16120 }, { "epoch": 0.92, "grad_norm": 1.6956595182418823, "learning_rate": 2.9687298896735384e-07, "loss": 0.9009, "step": 16121 }, { "epoch": 0.92, "grad_norm": 1.7697738409042358, "learning_rate": 2.964238859657453e-07, "loss": 0.941, "step": 16122 }, { "epoch": 0.92, "grad_norm": 1.8401726484298706, "learning_rate": 2.9597511780744104e-07, "loss": 0.8962, "step": 16123 }, { "epoch": 0.92, "grad_norm": 1.8243794441223145, "learning_rate": 2.9552668450792965e-07, "loss": 0.8883, "step": 16124 }, { "epoch": 0.92, "grad_norm": 1.6852229833602905, "learning_rate": 2.9507858608268325e-07, "loss": 0.9278, "step": 16125 }, { "epoch": 0.92, "grad_norm": 1.8400501012802124, "learning_rate": 2.9463082254716725e-07, "loss": 0.9365, "step": 16126 }, { "epoch": 0.92, "grad_norm": 0.9815301299095154, "learning_rate": 2.941833939168282e-07, "loss": 0.5137, "step": 16127 }, { "epoch": 0.92, "grad_norm": 1.8687422275543213, "learning_rate": 2.937363002071081e-07, "loss": 0.877, "step": 16128 }, { "epoch": 0.93, "grad_norm": 1.7938392162322998, "learning_rate": 2.932895414334369e-07, "loss": 0.8558, "step": 16129 }, { "epoch": 0.93, "grad_norm": 1.891542911529541, "learning_rate": 2.9284311761122787e-07, "loss": 0.8359, "step": 16130 }, { "epoch": 0.93, "grad_norm": 1.8307249546051025, "learning_rate": 2.923970287558875e-07, "loss": 0.8985, "step": 16131 }, { "epoch": 0.93, "grad_norm": 1.7825486660003662, "learning_rate": 2.9195127488280795e-07, "loss": 0.9025, "step": 16132 }, { "epoch": 0.93, "grad_norm": 1.8482294082641602, "learning_rate": 2.9150585600737247e-07, "loss": 0.9454, "step": 16133 }, { "epoch": 0.93, "grad_norm": 1.8095290660858154, "learning_rate": 2.910607721449488e-07, "loss": 0.9362, "step": 16134 }, { "epoch": 0.93, "grad_norm": 1.6416980028152466, "learning_rate": 2.90616023310899e-07, "loss": 0.891, "step": 16135 }, { "epoch": 0.93, "grad_norm": 1.6791030168533325, "learning_rate": 2.9017160952056646e-07, "loss": 0.9153, "step": 16136 }, { "epoch": 0.93, "grad_norm": 0.949495255947113, "learning_rate": 2.8972753078928994e-07, "loss": 0.5017, "step": 16137 }, { "epoch": 0.93, "grad_norm": 1.7111949920654297, "learning_rate": 2.892837871323906e-07, "loss": 0.931, "step": 16138 }, { "epoch": 0.93, "grad_norm": 1.4629085063934326, "learning_rate": 2.8884037856518277e-07, "loss": 0.8134, "step": 16139 }, { "epoch": 0.93, "grad_norm": 1.8181954622268677, "learning_rate": 2.8839730510296536e-07, "loss": 0.9739, "step": 16140 }, { "epoch": 0.93, "grad_norm": 1.7331819534301758, "learning_rate": 2.8795456676102837e-07, "loss": 0.89, "step": 16141 }, { "epoch": 0.93, "grad_norm": 1.8123481273651123, "learning_rate": 2.8751216355465075e-07, "loss": 0.809, "step": 16142 }, { "epoch": 0.93, "grad_norm": 1.7138338088989258, "learning_rate": 2.8707009549909793e-07, "loss": 0.8243, "step": 16143 }, { "epoch": 0.93, "grad_norm": 1.9347686767578125, "learning_rate": 2.8662836260962444e-07, "loss": 0.8708, "step": 16144 }, { "epoch": 0.93, "grad_norm": 1.954247236251831, "learning_rate": 2.861869649014715e-07, "loss": 0.871, "step": 16145 }, { "epoch": 0.93, "grad_norm": 1.6859766244888306, "learning_rate": 2.857459023898734e-07, "loss": 0.9496, "step": 16146 }, { "epoch": 0.93, "grad_norm": 1.7164487838745117, "learning_rate": 2.853051750900471e-07, "loss": 0.8696, "step": 16147 }, { "epoch": 0.93, "grad_norm": 1.8968310356140137, "learning_rate": 2.8486478301720246e-07, "loss": 0.9632, "step": 16148 }, { "epoch": 0.93, "grad_norm": 1.5730295181274414, "learning_rate": 2.844247261865363e-07, "loss": 0.8537, "step": 16149 }, { "epoch": 0.93, "grad_norm": 1.7843034267425537, "learning_rate": 2.839850046132342e-07, "loss": 0.8481, "step": 16150 }, { "epoch": 0.93, "grad_norm": 1.9352452754974365, "learning_rate": 2.835456183124685e-07, "loss": 0.9387, "step": 16151 }, { "epoch": 0.93, "grad_norm": 1.6448343992233276, "learning_rate": 2.8310656729940157e-07, "loss": 0.843, "step": 16152 }, { "epoch": 0.93, "grad_norm": 2.0222015380859375, "learning_rate": 2.8266785158918454e-07, "loss": 0.9801, "step": 16153 }, { "epoch": 0.93, "grad_norm": 1.8364253044128418, "learning_rate": 2.822294711969553e-07, "loss": 0.8992, "step": 16154 }, { "epoch": 0.93, "grad_norm": 1.9215871095657349, "learning_rate": 2.817914261378407e-07, "loss": 0.9338, "step": 16155 }, { "epoch": 0.93, "grad_norm": 1.8381654024124146, "learning_rate": 2.8135371642695865e-07, "loss": 0.9219, "step": 16156 }, { "epoch": 0.93, "grad_norm": 1.836000680923462, "learning_rate": 2.809163420794114e-07, "loss": 0.9067, "step": 16157 }, { "epoch": 0.93, "grad_norm": 1.0308854579925537, "learning_rate": 2.8047930311029147e-07, "loss": 0.5192, "step": 16158 }, { "epoch": 0.93, "grad_norm": 1.734331727027893, "learning_rate": 2.8004259953468115e-07, "loss": 0.8925, "step": 16159 }, { "epoch": 0.93, "grad_norm": 1.6828244924545288, "learning_rate": 2.7960623136764844e-07, "loss": 0.7999, "step": 16160 }, { "epoch": 0.93, "grad_norm": 1.671135663986206, "learning_rate": 2.7917019862425344e-07, "loss": 0.8644, "step": 16161 }, { "epoch": 0.93, "grad_norm": 1.6954333782196045, "learning_rate": 2.787345013195386e-07, "loss": 0.9731, "step": 16162 }, { "epoch": 0.93, "grad_norm": 1.7403264045715332, "learning_rate": 2.7829913946854305e-07, "loss": 0.9298, "step": 16163 }, { "epoch": 0.93, "grad_norm": 1.8312069177627563, "learning_rate": 2.778641130862858e-07, "loss": 0.8975, "step": 16164 }, { "epoch": 0.93, "grad_norm": 1.8255512714385986, "learning_rate": 2.774294221877816e-07, "loss": 0.8557, "step": 16165 }, { "epoch": 0.93, "grad_norm": 1.7426769733428955, "learning_rate": 2.7699506678802837e-07, "loss": 0.9236, "step": 16166 }, { "epoch": 0.93, "grad_norm": 1.88303804397583, "learning_rate": 2.7656104690201636e-07, "loss": 0.9352, "step": 16167 }, { "epoch": 0.93, "grad_norm": 1.7245970964431763, "learning_rate": 2.7612736254472026e-07, "loss": 0.878, "step": 16168 }, { "epoch": 0.93, "grad_norm": 1.8192698955535889, "learning_rate": 2.7569401373110595e-07, "loss": 0.8589, "step": 16169 }, { "epoch": 0.93, "grad_norm": 1.8533731698989868, "learning_rate": 2.7526100047612804e-07, "loss": 0.9104, "step": 16170 }, { "epoch": 0.93, "grad_norm": 1.6068453788757324, "learning_rate": 2.7482832279472796e-07, "loss": 0.852, "step": 16171 }, { "epoch": 0.93, "grad_norm": 2.0124881267547607, "learning_rate": 2.7439598070183705e-07, "loss": 0.8949, "step": 16172 }, { "epoch": 0.93, "grad_norm": 1.9334485530853271, "learning_rate": 2.739639742123723e-07, "loss": 0.866, "step": 16173 }, { "epoch": 0.93, "grad_norm": 1.7058124542236328, "learning_rate": 2.735323033412429e-07, "loss": 0.9091, "step": 16174 }, { "epoch": 0.93, "grad_norm": 1.7397733926773071, "learning_rate": 2.731009681033436e-07, "loss": 0.936, "step": 16175 }, { "epoch": 0.93, "grad_norm": 1.765415906906128, "learning_rate": 2.726699685135603e-07, "loss": 0.8738, "step": 16176 }, { "epoch": 0.93, "grad_norm": 1.8463367223739624, "learning_rate": 2.722393045867622e-07, "loss": 0.8949, "step": 16177 }, { "epoch": 0.93, "grad_norm": 1.8276586532592773, "learning_rate": 2.718089763378129e-07, "loss": 0.9126, "step": 16178 }, { "epoch": 0.93, "grad_norm": 1.7087610960006714, "learning_rate": 2.713789837815617e-07, "loss": 0.88, "step": 16179 }, { "epoch": 0.93, "grad_norm": 1.7833597660064697, "learning_rate": 2.709493269328456e-07, "loss": 0.7971, "step": 16180 }, { "epoch": 0.93, "grad_norm": 1.7066795825958252, "learning_rate": 2.705200058064916e-07, "loss": 0.9262, "step": 16181 }, { "epoch": 0.93, "grad_norm": 1.6766881942749023, "learning_rate": 2.700910204173124e-07, "loss": 0.8816, "step": 16182 }, { "epoch": 0.93, "grad_norm": 1.8699969053268433, "learning_rate": 2.696623707801149e-07, "loss": 0.9221, "step": 16183 }, { "epoch": 0.93, "grad_norm": 1.7617461681365967, "learning_rate": 2.692340569096874e-07, "loss": 0.9278, "step": 16184 }, { "epoch": 0.93, "grad_norm": 1.7738142013549805, "learning_rate": 2.6880607882081135e-07, "loss": 0.9778, "step": 16185 }, { "epoch": 0.93, "grad_norm": 1.884142279624939, "learning_rate": 2.683784365282527e-07, "loss": 0.8555, "step": 16186 }, { "epoch": 0.93, "grad_norm": 1.7024891376495361, "learning_rate": 2.6795113004677187e-07, "loss": 0.9031, "step": 16187 }, { "epoch": 0.93, "grad_norm": 1.7708393335342407, "learning_rate": 2.6752415939111154e-07, "loss": 0.9549, "step": 16188 }, { "epoch": 0.93, "grad_norm": 0.9742568731307983, "learning_rate": 2.6709752457600657e-07, "loss": 0.482, "step": 16189 }, { "epoch": 0.93, "grad_norm": 1.9434130191802979, "learning_rate": 2.6667122561617744e-07, "loss": 0.8558, "step": 16190 }, { "epoch": 0.93, "grad_norm": 1.8814247846603394, "learning_rate": 2.6624526252633564e-07, "loss": 0.9564, "step": 16191 }, { "epoch": 0.93, "grad_norm": 1.8548383712768555, "learning_rate": 2.6581963532117947e-07, "loss": 0.9453, "step": 16192 }, { "epoch": 0.93, "grad_norm": 1.7945811748504639, "learning_rate": 2.653943440153961e-07, "loss": 0.8389, "step": 16193 }, { "epoch": 0.93, "grad_norm": 1.7402881383895874, "learning_rate": 2.649693886236626e-07, "loss": 0.8446, "step": 16194 }, { "epoch": 0.93, "grad_norm": 1.6852415800094604, "learning_rate": 2.6454476916063953e-07, "loss": 0.8128, "step": 16195 }, { "epoch": 0.93, "grad_norm": 1.8853914737701416, "learning_rate": 2.64120485640984e-07, "loss": 0.9464, "step": 16196 }, { "epoch": 0.93, "grad_norm": 1.714269995689392, "learning_rate": 2.6369653807933327e-07, "loss": 0.8983, "step": 16197 }, { "epoch": 0.93, "grad_norm": 1.8072229623794556, "learning_rate": 2.6327292649031775e-07, "loss": 0.9442, "step": 16198 }, { "epoch": 0.93, "grad_norm": 1.723638653755188, "learning_rate": 2.6284965088855583e-07, "loss": 0.9605, "step": 16199 }, { "epoch": 0.93, "grad_norm": 1.6997871398925781, "learning_rate": 2.624267112886525e-07, "loss": 0.9037, "step": 16200 }, { "epoch": 0.93, "grad_norm": 1.8215404748916626, "learning_rate": 2.620041077052016e-07, "loss": 0.8945, "step": 16201 }, { "epoch": 0.93, "grad_norm": 1.7320815324783325, "learning_rate": 2.615818401527881e-07, "loss": 0.8379, "step": 16202 }, { "epoch": 0.93, "grad_norm": 1.7023767232894897, "learning_rate": 2.611599086459815e-07, "loss": 0.9388, "step": 16203 }, { "epoch": 0.93, "grad_norm": 1.7626385688781738, "learning_rate": 2.607383131993424e-07, "loss": 0.8888, "step": 16204 }, { "epoch": 0.93, "grad_norm": 1.9082413911819458, "learning_rate": 2.60317053827418e-07, "loss": 0.9471, "step": 16205 }, { "epoch": 0.93, "grad_norm": 1.6569337844848633, "learning_rate": 2.598961305447456e-07, "loss": 0.887, "step": 16206 }, { "epoch": 0.93, "grad_norm": 1.6226907968521118, "learning_rate": 2.5947554336585134e-07, "loss": 0.8509, "step": 16207 }, { "epoch": 0.93, "grad_norm": 1.0655992031097412, "learning_rate": 2.5905529230524475e-07, "loss": 0.5428, "step": 16208 }, { "epoch": 0.93, "grad_norm": 1.7991751432418823, "learning_rate": 2.5863537737743196e-07, "loss": 0.8528, "step": 16209 }, { "epoch": 0.93, "grad_norm": 1.5879261493682861, "learning_rate": 2.5821579859689914e-07, "loss": 0.8846, "step": 16210 }, { "epoch": 0.93, "grad_norm": 1.7403408288955688, "learning_rate": 2.57796555978127e-07, "loss": 0.9023, "step": 16211 }, { "epoch": 0.93, "grad_norm": 1.7279956340789795, "learning_rate": 2.573776495355818e-07, "loss": 0.8935, "step": 16212 }, { "epoch": 0.93, "grad_norm": 1.7943546772003174, "learning_rate": 2.5695907928371955e-07, "loss": 0.9984, "step": 16213 }, { "epoch": 0.93, "grad_norm": 1.8842931985855103, "learning_rate": 2.565408452369822e-07, "loss": 0.9385, "step": 16214 }, { "epoch": 0.93, "grad_norm": 1.0422879457473755, "learning_rate": 2.561229474098048e-07, "loss": 0.5418, "step": 16215 }, { "epoch": 0.93, "grad_norm": 1.6120043992996216, "learning_rate": 2.5570538581660476e-07, "loss": 0.8638, "step": 16216 }, { "epoch": 0.93, "grad_norm": 1.7499767541885376, "learning_rate": 2.5528816047179275e-07, "loss": 0.9719, "step": 16217 }, { "epoch": 0.93, "grad_norm": 1.8366278409957886, "learning_rate": 2.5487127138976497e-07, "loss": 0.8156, "step": 16218 }, { "epoch": 0.93, "grad_norm": 1.563151240348816, "learning_rate": 2.544547185849089e-07, "loss": 0.8917, "step": 16219 }, { "epoch": 0.93, "grad_norm": 1.81069815158844, "learning_rate": 2.540385020715963e-07, "loss": 0.912, "step": 16220 }, { "epoch": 0.93, "grad_norm": 1.9182506799697876, "learning_rate": 2.536226218641924e-07, "loss": 0.9845, "step": 16221 }, { "epoch": 0.93, "grad_norm": 1.7533453702926636, "learning_rate": 2.532070779770446e-07, "loss": 1.0208, "step": 16222 }, { "epoch": 0.93, "grad_norm": 1.6919492483139038, "learning_rate": 2.527918704244936e-07, "loss": 0.8924, "step": 16223 }, { "epoch": 0.93, "grad_norm": 1.9972357749938965, "learning_rate": 2.523769992208691e-07, "loss": 0.937, "step": 16224 }, { "epoch": 0.93, "grad_norm": 1.6524839401245117, "learning_rate": 2.519624643804852e-07, "loss": 0.9195, "step": 16225 }, { "epoch": 0.93, "grad_norm": 1.898565649986267, "learning_rate": 2.51548265917646e-07, "loss": 0.9371, "step": 16226 }, { "epoch": 0.93, "grad_norm": 1.650154709815979, "learning_rate": 2.511344038466457e-07, "loss": 0.9478, "step": 16227 }, { "epoch": 0.93, "grad_norm": 1.7800287008285522, "learning_rate": 2.507208781817638e-07, "loss": 0.9161, "step": 16228 }, { "epoch": 0.93, "grad_norm": 1.674424171447754, "learning_rate": 2.503076889372713e-07, "loss": 0.9083, "step": 16229 }, { "epoch": 0.93, "grad_norm": 1.8096734285354614, "learning_rate": 2.498948361274267e-07, "loss": 0.8262, "step": 16230 }, { "epoch": 0.93, "grad_norm": 1.7749285697937012, "learning_rate": 2.494823197664742e-07, "loss": 0.7951, "step": 16231 }, { "epoch": 0.93, "grad_norm": 1.7971230745315552, "learning_rate": 2.4907013986865015e-07, "loss": 0.8337, "step": 16232 }, { "epoch": 0.93, "grad_norm": 2.022780418395996, "learning_rate": 2.4865829644817764e-07, "loss": 0.8521, "step": 16233 }, { "epoch": 0.93, "grad_norm": 1.7016489505767822, "learning_rate": 2.4824678951926864e-07, "loss": 0.8447, "step": 16234 }, { "epoch": 0.93, "grad_norm": 1.6620415449142456, "learning_rate": 2.4783561909612063e-07, "loss": 0.9028, "step": 16235 }, { "epoch": 0.93, "grad_norm": 1.7939611673355103, "learning_rate": 2.474247851929246e-07, "loss": 0.7635, "step": 16236 }, { "epoch": 0.93, "grad_norm": 1.7018694877624512, "learning_rate": 2.4701428782385794e-07, "loss": 0.8986, "step": 16237 }, { "epoch": 0.93, "grad_norm": 1.8474808931350708, "learning_rate": 2.4660412700308276e-07, "loss": 0.8886, "step": 16238 }, { "epoch": 0.93, "grad_norm": 1.665451169013977, "learning_rate": 2.461943027447555e-07, "loss": 0.8525, "step": 16239 }, { "epoch": 0.93, "grad_norm": 1.752081274986267, "learning_rate": 2.457848150630149e-07, "loss": 0.8582, "step": 16240 }, { "epoch": 0.93, "grad_norm": 1.7202935218811035, "learning_rate": 2.4537566397199506e-07, "loss": 0.8369, "step": 16241 }, { "epoch": 0.93, "grad_norm": 1.8139984607696533, "learning_rate": 2.449668494858115e-07, "loss": 0.8824, "step": 16242 }, { "epoch": 0.93, "grad_norm": 1.9071663618087769, "learning_rate": 2.445583716185729e-07, "loss": 0.9147, "step": 16243 }, { "epoch": 0.93, "grad_norm": 1.819251298904419, "learning_rate": 2.4415023038437345e-07, "loss": 0.8814, "step": 16244 }, { "epoch": 0.93, "grad_norm": 1.9118560552597046, "learning_rate": 2.4374242579729866e-07, "loss": 0.8625, "step": 16245 }, { "epoch": 0.93, "grad_norm": 1.0151519775390625, "learning_rate": 2.4333495787141837e-07, "loss": 0.5282, "step": 16246 }, { "epoch": 0.93, "grad_norm": 1.732537865638733, "learning_rate": 2.429278266207946e-07, "loss": 0.8882, "step": 16247 }, { "epoch": 0.93, "grad_norm": 1.6146734952926636, "learning_rate": 2.425210320594773e-07, "loss": 0.8437, "step": 16248 }, { "epoch": 0.93, "grad_norm": 1.115344524383545, "learning_rate": 2.4211457420150184e-07, "loss": 0.5762, "step": 16249 }, { "epoch": 0.93, "grad_norm": 1.707578420639038, "learning_rate": 2.4170845306089596e-07, "loss": 0.8836, "step": 16250 }, { "epoch": 0.93, "grad_norm": 1.8316994905471802, "learning_rate": 2.4130266865167175e-07, "loss": 0.879, "step": 16251 }, { "epoch": 0.93, "grad_norm": 1.923264980316162, "learning_rate": 2.408972209878335e-07, "loss": 0.8821, "step": 16252 }, { "epoch": 0.93, "grad_norm": 1.8193572759628296, "learning_rate": 2.4049211008337127e-07, "loss": 0.9427, "step": 16253 }, { "epoch": 0.93, "grad_norm": 1.6619873046875, "learning_rate": 2.4008733595226376e-07, "loss": 0.8937, "step": 16254 }, { "epoch": 0.93, "grad_norm": 1.6813629865646362, "learning_rate": 2.3968289860847873e-07, "loss": 0.8757, "step": 16255 }, { "epoch": 0.93, "grad_norm": 1.7966097593307495, "learning_rate": 2.3927879806597274e-07, "loss": 1.0082, "step": 16256 }, { "epoch": 0.93, "grad_norm": 1.7698556184768677, "learning_rate": 2.388750343386903e-07, "loss": 0.8974, "step": 16257 }, { "epoch": 0.93, "grad_norm": 1.9665919542312622, "learning_rate": 2.3847160744056354e-07, "loss": 0.932, "step": 16258 }, { "epoch": 0.93, "grad_norm": 1.6363818645477295, "learning_rate": 2.380685173855135e-07, "loss": 0.8569, "step": 16259 }, { "epoch": 0.93, "grad_norm": 1.6899081468582153, "learning_rate": 2.3766576418745024e-07, "loss": 0.9221, "step": 16260 }, { "epoch": 0.93, "grad_norm": 1.9260165691375732, "learning_rate": 2.3726334786027261e-07, "loss": 0.8682, "step": 16261 }, { "epoch": 0.93, "grad_norm": 1.7516756057739258, "learning_rate": 2.3686126841786394e-07, "loss": 0.8974, "step": 16262 }, { "epoch": 0.93, "grad_norm": 2.018200159072876, "learning_rate": 2.3645952587410204e-07, "loss": 0.9216, "step": 16263 }, { "epoch": 0.93, "grad_norm": 1.8899859189987183, "learning_rate": 2.3605812024284802e-07, "loss": 0.7772, "step": 16264 }, { "epoch": 0.93, "grad_norm": 1.814500331878662, "learning_rate": 2.3565705153795415e-07, "loss": 0.883, "step": 16265 }, { "epoch": 0.93, "grad_norm": 1.633141279220581, "learning_rate": 2.3525631977325825e-07, "loss": 0.9266, "step": 16266 }, { "epoch": 0.93, "grad_norm": 1.062927007675171, "learning_rate": 2.3485592496259258e-07, "loss": 0.5335, "step": 16267 }, { "epoch": 0.93, "grad_norm": 1.648722529411316, "learning_rate": 2.344558671197694e-07, "loss": 0.8153, "step": 16268 }, { "epoch": 0.93, "grad_norm": 1.7400288581848145, "learning_rate": 2.3405614625859552e-07, "loss": 0.9327, "step": 16269 }, { "epoch": 0.93, "grad_norm": 1.6756274700164795, "learning_rate": 2.3365676239286428e-07, "loss": 0.8973, "step": 16270 }, { "epoch": 0.93, "grad_norm": 1.7360039949417114, "learning_rate": 2.332577155363569e-07, "loss": 0.8616, "step": 16271 }, { "epoch": 0.93, "grad_norm": 1.774493932723999, "learning_rate": 2.3285900570284348e-07, "loss": 0.8567, "step": 16272 }, { "epoch": 0.93, "grad_norm": 1.8044387102127075, "learning_rate": 2.3246063290608189e-07, "loss": 0.9097, "step": 16273 }, { "epoch": 0.93, "grad_norm": 1.749638557434082, "learning_rate": 2.3206259715982005e-07, "loss": 0.9074, "step": 16274 }, { "epoch": 0.93, "grad_norm": 1.6550493240356445, "learning_rate": 2.316648984777925e-07, "loss": 0.9375, "step": 16275 }, { "epoch": 0.93, "grad_norm": 1.8353968858718872, "learning_rate": 2.3126753687372273e-07, "loss": 0.9189, "step": 16276 }, { "epoch": 0.93, "grad_norm": 1.8457140922546387, "learning_rate": 2.3087051236132086e-07, "loss": 0.9426, "step": 16277 }, { "epoch": 0.93, "grad_norm": 1.9276691675186157, "learning_rate": 2.3047382495429037e-07, "loss": 0.9491, "step": 16278 }, { "epoch": 0.93, "grad_norm": 1.8845618963241577, "learning_rate": 2.3007747466631701e-07, "loss": 0.935, "step": 16279 }, { "epoch": 0.93, "grad_norm": 1.841586947441101, "learning_rate": 2.2968146151107872e-07, "loss": 0.872, "step": 16280 }, { "epoch": 0.93, "grad_norm": 0.984134316444397, "learning_rate": 2.2928578550224124e-07, "loss": 0.5051, "step": 16281 }, { "epoch": 0.93, "grad_norm": 1.753153681755066, "learning_rate": 2.2889044665345806e-07, "loss": 0.8885, "step": 16282 }, { "epoch": 0.93, "grad_norm": 1.0393688678741455, "learning_rate": 2.2849544497837052e-07, "loss": 0.5473, "step": 16283 }, { "epoch": 0.93, "grad_norm": 1.7542282342910767, "learning_rate": 2.2810078049061102e-07, "loss": 0.8775, "step": 16284 }, { "epoch": 0.93, "grad_norm": 1.7539726495742798, "learning_rate": 2.2770645320379538e-07, "loss": 0.8507, "step": 16285 }, { "epoch": 0.93, "grad_norm": 1.7383103370666504, "learning_rate": 2.2731246313153376e-07, "loss": 0.8126, "step": 16286 }, { "epoch": 0.93, "grad_norm": 1.8871041536331177, "learning_rate": 2.2691881028741868e-07, "loss": 0.8636, "step": 16287 }, { "epoch": 0.93, "grad_norm": 1.952820897102356, "learning_rate": 2.2652549468503593e-07, "loss": 0.9593, "step": 16288 }, { "epoch": 0.93, "grad_norm": 1.8409380912780762, "learning_rate": 2.2613251633795685e-07, "loss": 0.963, "step": 16289 }, { "epoch": 0.93, "grad_norm": 1.829919457435608, "learning_rate": 2.2573987525974284e-07, "loss": 0.8811, "step": 16290 }, { "epoch": 0.93, "grad_norm": 1.87303626537323, "learning_rate": 2.2534757146394305e-07, "loss": 0.9335, "step": 16291 }, { "epoch": 0.93, "grad_norm": 1.613935112953186, "learning_rate": 2.249556049640933e-07, "loss": 0.8567, "step": 16292 }, { "epoch": 0.93, "grad_norm": 1.8837584257125854, "learning_rate": 2.2456397577372057e-07, "loss": 0.94, "step": 16293 }, { "epoch": 0.93, "grad_norm": 1.8737622499465942, "learning_rate": 2.241726839063385e-07, "loss": 0.9638, "step": 16294 }, { "epoch": 0.93, "grad_norm": 1.6680828332901, "learning_rate": 2.237817293754496e-07, "loss": 0.8731, "step": 16295 }, { "epoch": 0.93, "grad_norm": 1.6804893016815186, "learning_rate": 2.2339111219454311e-07, "loss": 0.8865, "step": 16296 }, { "epoch": 0.93, "grad_norm": 1.7263448238372803, "learning_rate": 2.2300083237710158e-07, "loss": 0.8425, "step": 16297 }, { "epoch": 0.93, "grad_norm": 0.9436613321304321, "learning_rate": 2.2261088993658863e-07, "loss": 0.5149, "step": 16298 }, { "epoch": 0.93, "grad_norm": 1.8394290208816528, "learning_rate": 2.2222128488646356e-07, "loss": 0.9258, "step": 16299 }, { "epoch": 0.93, "grad_norm": 1.6884090900421143, "learning_rate": 2.2183201724016667e-07, "loss": 0.8742, "step": 16300 }, { "epoch": 0.93, "grad_norm": 1.8040472269058228, "learning_rate": 2.2144308701113393e-07, "loss": 0.9279, "step": 16301 }, { "epoch": 0.93, "grad_norm": 1.7878984212875366, "learning_rate": 2.210544942127857e-07, "loss": 0.9037, "step": 16302 }, { "epoch": 0.94, "grad_norm": 1.868920087814331, "learning_rate": 2.20666238858529e-07, "loss": 0.9256, "step": 16303 }, { "epoch": 0.94, "grad_norm": 1.658509612083435, "learning_rate": 2.2027832096176428e-07, "loss": 0.9527, "step": 16304 }, { "epoch": 0.94, "grad_norm": 1.728318452835083, "learning_rate": 2.1989074053587413e-07, "loss": 0.8891, "step": 16305 }, { "epoch": 0.94, "grad_norm": 1.7210944890975952, "learning_rate": 2.1950349759423674e-07, "loss": 0.7879, "step": 16306 }, { "epoch": 0.94, "grad_norm": 0.936753511428833, "learning_rate": 2.1911659215021252e-07, "loss": 0.4847, "step": 16307 }, { "epoch": 0.94, "grad_norm": 1.8152029514312744, "learning_rate": 2.1873002421715305e-07, "loss": 0.8708, "step": 16308 }, { "epoch": 0.94, "grad_norm": 1.8049331903457642, "learning_rate": 2.1834379380839655e-07, "loss": 0.9684, "step": 16309 }, { "epoch": 0.94, "grad_norm": 1.7111965417861938, "learning_rate": 2.1795790093727344e-07, "loss": 0.8844, "step": 16310 }, { "epoch": 0.94, "grad_norm": 1.756780743598938, "learning_rate": 2.175723456170964e-07, "loss": 0.839, "step": 16311 }, { "epoch": 0.94, "grad_norm": 1.860119104385376, "learning_rate": 2.1718712786117258e-07, "loss": 0.8265, "step": 16312 }, { "epoch": 0.94, "grad_norm": 1.7923767566680908, "learning_rate": 2.1680224768279356e-07, "loss": 0.9125, "step": 16313 }, { "epoch": 0.94, "grad_norm": 1.8300269842147827, "learning_rate": 2.1641770509524095e-07, "loss": 0.8816, "step": 16314 }, { "epoch": 0.94, "grad_norm": 1.6666057109832764, "learning_rate": 2.1603350011178416e-07, "loss": 0.9806, "step": 16315 }, { "epoch": 0.94, "grad_norm": 1.7954477071762085, "learning_rate": 2.1564963274568028e-07, "loss": 0.8959, "step": 16316 }, { "epoch": 0.94, "grad_norm": 1.8551993370056152, "learning_rate": 2.152661030101766e-07, "loss": 0.8808, "step": 16317 }, { "epoch": 0.94, "grad_norm": 1.796030044555664, "learning_rate": 2.1488291091850577e-07, "loss": 0.9032, "step": 16318 }, { "epoch": 0.94, "grad_norm": 1.741132140159607, "learning_rate": 2.1450005648389395e-07, "loss": 0.9399, "step": 16319 }, { "epoch": 0.94, "grad_norm": 1.7671053409576416, "learning_rate": 2.1411753971954941e-07, "loss": 0.9029, "step": 16320 }, { "epoch": 0.94, "grad_norm": 0.961742103099823, "learning_rate": 2.1373536063867384e-07, "loss": 0.5443, "step": 16321 }, { "epoch": 0.94, "grad_norm": 1.7057327032089233, "learning_rate": 2.1335351925445335e-07, "loss": 0.856, "step": 16322 }, { "epoch": 0.94, "grad_norm": 1.6808359622955322, "learning_rate": 2.129720155800652e-07, "loss": 0.9321, "step": 16323 }, { "epoch": 0.94, "grad_norm": 1.8065845966339111, "learning_rate": 2.1259084962867326e-07, "loss": 0.8852, "step": 16324 }, { "epoch": 0.94, "grad_norm": 1.7093238830566406, "learning_rate": 2.1221002141343261e-07, "loss": 0.9935, "step": 16325 }, { "epoch": 0.94, "grad_norm": 1.9793174266815186, "learning_rate": 2.118295309474816e-07, "loss": 0.8969, "step": 16326 }, { "epoch": 0.94, "grad_norm": 1.9372689723968506, "learning_rate": 2.1144937824395083e-07, "loss": 0.9584, "step": 16327 }, { "epoch": 0.94, "grad_norm": 1.7589476108551025, "learning_rate": 2.1106956331595986e-07, "loss": 0.9013, "step": 16328 }, { "epoch": 0.94, "grad_norm": 1.7151142358779907, "learning_rate": 2.1069008617661369e-07, "loss": 0.913, "step": 16329 }, { "epoch": 0.94, "grad_norm": 1.0841842889785767, "learning_rate": 2.1031094683900855e-07, "loss": 0.5228, "step": 16330 }, { "epoch": 0.94, "grad_norm": 1.9114629030227661, "learning_rate": 2.0993214531622397e-07, "loss": 0.848, "step": 16331 }, { "epoch": 0.94, "grad_norm": 1.725567102432251, "learning_rate": 2.0955368162133504e-07, "loss": 0.8853, "step": 16332 }, { "epoch": 0.94, "grad_norm": 1.6830344200134277, "learning_rate": 2.0917555576740022e-07, "loss": 0.7919, "step": 16333 }, { "epoch": 0.94, "grad_norm": 1.8170216083526611, "learning_rate": 2.0879776776746684e-07, "loss": 0.9222, "step": 16334 }, { "epoch": 0.94, "grad_norm": 1.7129426002502441, "learning_rate": 2.0842031763457228e-07, "loss": 0.8525, "step": 16335 }, { "epoch": 0.94, "grad_norm": 1.7161333560943604, "learning_rate": 2.080432053817405e-07, "loss": 0.9027, "step": 16336 }, { "epoch": 0.94, "grad_norm": 1.9157575368881226, "learning_rate": 2.0766643102198448e-07, "loss": 0.9308, "step": 16337 }, { "epoch": 0.94, "grad_norm": 1.728074073791504, "learning_rate": 2.0728999456830712e-07, "loss": 0.8365, "step": 16338 }, { "epoch": 0.94, "grad_norm": 1.8389438390731812, "learning_rate": 2.0691389603369695e-07, "loss": 0.8551, "step": 16339 }, { "epoch": 0.94, "grad_norm": 1.844509482383728, "learning_rate": 2.0653813543113133e-07, "loss": 0.8367, "step": 16340 }, { "epoch": 0.94, "grad_norm": 1.7989506721496582, "learning_rate": 2.061627127735799e-07, "loss": 0.991, "step": 16341 }, { "epoch": 0.94, "grad_norm": 1.7589361667633057, "learning_rate": 2.0578762807399343e-07, "loss": 0.846, "step": 16342 }, { "epoch": 0.94, "grad_norm": 1.569637417793274, "learning_rate": 2.0541288134531824e-07, "loss": 0.8799, "step": 16343 }, { "epoch": 0.94, "grad_norm": 1.63938570022583, "learning_rate": 2.050384726004828e-07, "loss": 0.9558, "step": 16344 }, { "epoch": 0.94, "grad_norm": 1.7267253398895264, "learning_rate": 2.0466440185241021e-07, "loss": 0.8851, "step": 16345 }, { "epoch": 0.94, "grad_norm": 1.8333081007003784, "learning_rate": 2.0429066911400452e-07, "loss": 0.8738, "step": 16346 }, { "epoch": 0.94, "grad_norm": 1.885105013847351, "learning_rate": 2.0391727439816655e-07, "loss": 0.9303, "step": 16347 }, { "epoch": 0.94, "grad_norm": 1.9842063188552856, "learning_rate": 2.0354421771777821e-07, "loss": 0.8577, "step": 16348 }, { "epoch": 0.94, "grad_norm": 1.8866575956344604, "learning_rate": 2.0317149908571475e-07, "loss": 0.8653, "step": 16349 }, { "epoch": 0.94, "grad_norm": 1.6680948734283447, "learning_rate": 2.027991185148359e-07, "loss": 0.8853, "step": 16350 }, { "epoch": 0.94, "grad_norm": 1.658965826034546, "learning_rate": 2.024270760179936e-07, "loss": 0.7819, "step": 16351 }, { "epoch": 0.94, "grad_norm": 1.595818042755127, "learning_rate": 2.0205537160802202e-07, "loss": 0.9195, "step": 16352 }, { "epoch": 0.94, "grad_norm": 1.7592500448226929, "learning_rate": 2.01684005297752e-07, "loss": 0.9689, "step": 16353 }, { "epoch": 0.94, "grad_norm": 1.786474585533142, "learning_rate": 2.0131297709999554e-07, "loss": 0.8934, "step": 16354 }, { "epoch": 0.94, "grad_norm": 1.7921550273895264, "learning_rate": 2.0094228702755568e-07, "loss": 0.9029, "step": 16355 }, { "epoch": 0.94, "grad_norm": 1.6838581562042236, "learning_rate": 2.005719350932267e-07, "loss": 0.8403, "step": 16356 }, { "epoch": 0.94, "grad_norm": 1.04280424118042, "learning_rate": 2.0020192130978611e-07, "loss": 0.534, "step": 16357 }, { "epoch": 0.94, "grad_norm": 1.028695821762085, "learning_rate": 1.998322456900026e-07, "loss": 0.5503, "step": 16358 }, { "epoch": 0.94, "grad_norm": 1.8095715045928955, "learning_rate": 1.9946290824663262e-07, "loss": 0.9712, "step": 16359 }, { "epoch": 0.94, "grad_norm": 1.777685523033142, "learning_rate": 1.9909390899242153e-07, "loss": 0.8318, "step": 16360 }, { "epoch": 0.94, "grad_norm": 1.6515487432479858, "learning_rate": 1.987252479401014e-07, "loss": 0.8473, "step": 16361 }, { "epoch": 0.94, "grad_norm": 1.7955330610275269, "learning_rate": 1.9835692510239424e-07, "loss": 0.8583, "step": 16362 }, { "epoch": 0.94, "grad_norm": 1.7963247299194336, "learning_rate": 1.9798894049200878e-07, "loss": 0.8396, "step": 16363 }, { "epoch": 0.94, "grad_norm": 1.807252287864685, "learning_rate": 1.97621294121646e-07, "loss": 0.8203, "step": 16364 }, { "epoch": 0.94, "grad_norm": 1.7473043203353882, "learning_rate": 1.9725398600398905e-07, "loss": 0.9325, "step": 16365 }, { "epoch": 0.94, "grad_norm": 1.8721873760223389, "learning_rate": 1.9688701615171558e-07, "loss": 0.9282, "step": 16366 }, { "epoch": 0.94, "grad_norm": 2.081289052963257, "learning_rate": 1.9652038457748547e-07, "loss": 0.9149, "step": 16367 }, { "epoch": 0.94, "grad_norm": 1.6495956182479858, "learning_rate": 1.9615409129395303e-07, "loss": 0.9168, "step": 16368 }, { "epoch": 0.94, "grad_norm": 1.8121864795684814, "learning_rate": 1.9578813631375704e-07, "loss": 0.9037, "step": 16369 }, { "epoch": 0.94, "grad_norm": 1.7582117319107056, "learning_rate": 1.9542251964952518e-07, "loss": 0.8961, "step": 16370 }, { "epoch": 0.94, "grad_norm": 1.8538521528244019, "learning_rate": 1.9505724131387515e-07, "loss": 0.9293, "step": 16371 }, { "epoch": 0.94, "grad_norm": 1.753697395324707, "learning_rate": 1.9469230131940907e-07, "loss": 0.8655, "step": 16372 }, { "epoch": 0.94, "grad_norm": 1.8507347106933594, "learning_rate": 1.9432769967872357e-07, "loss": 0.8575, "step": 16373 }, { "epoch": 0.94, "grad_norm": 1.8493958711624146, "learning_rate": 1.939634364043963e-07, "loss": 0.9094, "step": 16374 }, { "epoch": 0.94, "grad_norm": 1.6765323877334595, "learning_rate": 1.9359951150900059e-07, "loss": 0.841, "step": 16375 }, { "epoch": 0.94, "grad_norm": 1.8059134483337402, "learning_rate": 1.9323592500509082e-07, "loss": 0.8376, "step": 16376 }, { "epoch": 0.94, "grad_norm": 1.9414695501327515, "learning_rate": 1.9287267690521582e-07, "loss": 0.932, "step": 16377 }, { "epoch": 0.94, "grad_norm": 1.8842369318008423, "learning_rate": 1.9250976722191006e-07, "loss": 0.8829, "step": 16378 }, { "epoch": 0.94, "grad_norm": 1.8446831703186035, "learning_rate": 1.921471959676957e-07, "loss": 0.9176, "step": 16379 }, { "epoch": 0.94, "grad_norm": 1.861767292022705, "learning_rate": 1.91784963155085e-07, "loss": 0.9212, "step": 16380 }, { "epoch": 0.94, "grad_norm": 1.675605058670044, "learning_rate": 1.9142306879657569e-07, "loss": 0.9243, "step": 16381 }, { "epoch": 0.94, "grad_norm": 1.812416911125183, "learning_rate": 1.910615129046589e-07, "loss": 0.8695, "step": 16382 }, { "epoch": 0.94, "grad_norm": 1.0805389881134033, "learning_rate": 1.9070029549180802e-07, "loss": 0.5867, "step": 16383 }, { "epoch": 0.94, "grad_norm": 1.7600376605987549, "learning_rate": 1.903394165704897e-07, "loss": 0.9126, "step": 16384 }, { "epoch": 0.94, "grad_norm": 0.9993528723716736, "learning_rate": 1.8997887615315513e-07, "loss": 0.5213, "step": 16385 }, { "epoch": 0.94, "grad_norm": 1.7143893241882324, "learning_rate": 1.8961867425224655e-07, "loss": 0.8144, "step": 16386 }, { "epoch": 0.94, "grad_norm": 1.6862927675247192, "learning_rate": 1.8925881088019292e-07, "loss": 0.9937, "step": 16387 }, { "epoch": 0.94, "grad_norm": 1.7340646982192993, "learning_rate": 1.8889928604941432e-07, "loss": 0.9101, "step": 16388 }, { "epoch": 0.94, "grad_norm": 1.7498443126678467, "learning_rate": 1.8854009977231303e-07, "loss": 0.8884, "step": 16389 }, { "epoch": 0.94, "grad_norm": 1.8975346088409424, "learning_rate": 1.881812520612869e-07, "loss": 0.8528, "step": 16390 }, { "epoch": 0.94, "grad_norm": 1.7859089374542236, "learning_rate": 1.8782274292871717e-07, "loss": 0.9032, "step": 16391 }, { "epoch": 0.94, "grad_norm": 1.7822070121765137, "learning_rate": 1.874645723869739e-07, "loss": 0.9685, "step": 16392 }, { "epoch": 0.94, "grad_norm": 1.7800108194351196, "learning_rate": 1.8710674044842058e-07, "loss": 0.9029, "step": 16393 }, { "epoch": 0.94, "grad_norm": 1.650575041770935, "learning_rate": 1.8674924712540066e-07, "loss": 0.8885, "step": 16394 }, { "epoch": 0.94, "grad_norm": 1.7646385431289673, "learning_rate": 1.8639209243025315e-07, "loss": 0.9304, "step": 16395 }, { "epoch": 0.94, "grad_norm": 1.785351037979126, "learning_rate": 1.860352763753004e-07, "loss": 0.8679, "step": 16396 }, { "epoch": 0.94, "grad_norm": 1.7265164852142334, "learning_rate": 1.85678798972857e-07, "loss": 0.9392, "step": 16397 }, { "epoch": 0.94, "grad_norm": 1.689312219619751, "learning_rate": 1.8532266023522206e-07, "loss": 0.8759, "step": 16398 }, { "epoch": 0.94, "grad_norm": 1.7069814205169678, "learning_rate": 1.849668601746868e-07, "loss": 0.8919, "step": 16399 }, { "epoch": 0.94, "grad_norm": 1.6813130378723145, "learning_rate": 1.8461139880352695e-07, "loss": 0.9555, "step": 16400 }, { "epoch": 0.94, "grad_norm": 1.9528535604476929, "learning_rate": 1.8425627613401165e-07, "loss": 0.8984, "step": 16401 }, { "epoch": 0.94, "grad_norm": 2.0073697566986084, "learning_rate": 1.8390149217839103e-07, "loss": 0.8432, "step": 16402 }, { "epoch": 0.94, "grad_norm": 1.6456698179244995, "learning_rate": 1.835470469489109e-07, "loss": 0.8569, "step": 16403 }, { "epoch": 0.94, "grad_norm": 1.7223634719848633, "learning_rate": 1.8319294045779922e-07, "loss": 0.8948, "step": 16404 }, { "epoch": 0.94, "grad_norm": 1.747360110282898, "learning_rate": 1.8283917271727846e-07, "loss": 0.9302, "step": 16405 }, { "epoch": 0.94, "grad_norm": 1.7781614065170288, "learning_rate": 1.8248574373955442e-07, "loss": 0.9435, "step": 16406 }, { "epoch": 0.94, "grad_norm": 1.0016318559646606, "learning_rate": 1.8213265353682396e-07, "loss": 0.535, "step": 16407 }, { "epoch": 0.94, "grad_norm": 1.8278100490570068, "learning_rate": 1.8177990212126962e-07, "loss": 0.8483, "step": 16408 }, { "epoch": 0.94, "grad_norm": 1.8017596006393433, "learning_rate": 1.8142748950506494e-07, "loss": 0.8634, "step": 16409 }, { "epoch": 0.94, "grad_norm": 1.9471365213394165, "learning_rate": 1.8107541570037136e-07, "loss": 0.8854, "step": 16410 }, { "epoch": 0.94, "grad_norm": 1.760509729385376, "learning_rate": 1.8072368071933577e-07, "loss": 0.88, "step": 16411 }, { "epoch": 0.94, "grad_norm": 1.8247904777526855, "learning_rate": 1.803722845740985e-07, "loss": 0.8408, "step": 16412 }, { "epoch": 0.94, "grad_norm": 1.8561985492706299, "learning_rate": 1.8002122727678096e-07, "loss": 0.9246, "step": 16413 }, { "epoch": 0.94, "grad_norm": 1.6995141506195068, "learning_rate": 1.7967050883950233e-07, "loss": 0.8736, "step": 16414 }, { "epoch": 0.94, "grad_norm": 2.039583444595337, "learning_rate": 1.7932012927436182e-07, "loss": 0.8991, "step": 16415 }, { "epoch": 0.94, "grad_norm": 1.7881120443344116, "learning_rate": 1.789700885934509e-07, "loss": 0.9653, "step": 16416 }, { "epoch": 0.94, "grad_norm": 1.7039881944656372, "learning_rate": 1.7862038680884763e-07, "loss": 0.8661, "step": 16417 }, { "epoch": 0.94, "grad_norm": 1.7617466449737549, "learning_rate": 1.782710239326202e-07, "loss": 0.9392, "step": 16418 }, { "epoch": 0.94, "grad_norm": 1.8066859245300293, "learning_rate": 1.7792199997682335e-07, "loss": 0.8956, "step": 16419 }, { "epoch": 0.94, "grad_norm": 1.7197020053863525, "learning_rate": 1.7757331495350306e-07, "loss": 0.9227, "step": 16420 }, { "epoch": 0.94, "grad_norm": 2.004526376724243, "learning_rate": 1.7722496887468854e-07, "loss": 0.9252, "step": 16421 }, { "epoch": 0.94, "grad_norm": 1.8045226335525513, "learning_rate": 1.768769617524013e-07, "loss": 0.8541, "step": 16422 }, { "epoch": 0.94, "grad_norm": 1.7096847295761108, "learning_rate": 1.7652929359865068e-07, "loss": 0.9347, "step": 16423 }, { "epoch": 0.94, "grad_norm": 1.8708813190460205, "learning_rate": 1.7618196442543366e-07, "loss": 0.8864, "step": 16424 }, { "epoch": 0.94, "grad_norm": 1.799077033996582, "learning_rate": 1.7583497424473516e-07, "loss": 0.9929, "step": 16425 }, { "epoch": 0.94, "grad_norm": 1.9251153469085693, "learning_rate": 1.7548832306852893e-07, "loss": 0.8797, "step": 16426 }, { "epoch": 0.94, "grad_norm": 1.7964842319488525, "learning_rate": 1.7514201090877758e-07, "loss": 0.8982, "step": 16427 }, { "epoch": 0.94, "grad_norm": 1.82236909866333, "learning_rate": 1.7479603777742937e-07, "loss": 0.8563, "step": 16428 }, { "epoch": 0.94, "grad_norm": 1.5631624460220337, "learning_rate": 1.7445040368642585e-07, "loss": 0.9045, "step": 16429 }, { "epoch": 0.94, "grad_norm": 1.8029755353927612, "learning_rate": 1.7410510864769081e-07, "loss": 0.9283, "step": 16430 }, { "epoch": 0.94, "grad_norm": 1.9529526233673096, "learning_rate": 1.737601526731425e-07, "loss": 0.8742, "step": 16431 }, { "epoch": 0.94, "grad_norm": 1.764808177947998, "learning_rate": 1.7341553577468252e-07, "loss": 0.8237, "step": 16432 }, { "epoch": 0.94, "grad_norm": 1.8694605827331543, "learning_rate": 1.7307125796420133e-07, "loss": 1.0193, "step": 16433 }, { "epoch": 0.94, "grad_norm": 1.9062522649765015, "learning_rate": 1.7272731925358277e-07, "loss": 0.9472, "step": 16434 }, { "epoch": 0.94, "grad_norm": 1.9673916101455688, "learning_rate": 1.723837196546918e-07, "loss": 0.8847, "step": 16435 }, { "epoch": 0.94, "grad_norm": 1.846174955368042, "learning_rate": 1.7204045917938672e-07, "loss": 0.9184, "step": 16436 }, { "epoch": 0.94, "grad_norm": 1.642804503440857, "learning_rate": 1.7169753783951137e-07, "loss": 0.8761, "step": 16437 }, { "epoch": 0.94, "grad_norm": 1.61570143699646, "learning_rate": 1.7135495564690075e-07, "loss": 0.8591, "step": 16438 }, { "epoch": 0.94, "grad_norm": 1.7706712484359741, "learning_rate": 1.7101271261337537e-07, "loss": 0.8105, "step": 16439 }, { "epoch": 0.94, "grad_norm": 1.879559874534607, "learning_rate": 1.706708087507447e-07, "loss": 0.9938, "step": 16440 }, { "epoch": 0.94, "grad_norm": 1.7266027927398682, "learning_rate": 1.7032924407080709e-07, "loss": 0.8774, "step": 16441 }, { "epoch": 0.94, "grad_norm": 1.918654441833496, "learning_rate": 1.6998801858534975e-07, "loss": 0.9498, "step": 16442 }, { "epoch": 0.94, "grad_norm": 1.8942171335220337, "learning_rate": 1.696471323061466e-07, "loss": 0.9064, "step": 16443 }, { "epoch": 0.94, "grad_norm": 1.7254973649978638, "learning_rate": 1.6930658524496158e-07, "loss": 0.9206, "step": 16444 }, { "epoch": 0.94, "grad_norm": 1.9922207593917847, "learning_rate": 1.6896637741354417e-07, "loss": 0.912, "step": 16445 }, { "epoch": 0.94, "grad_norm": 1.7686071395874023, "learning_rate": 1.6862650882363497e-07, "loss": 0.8724, "step": 16446 }, { "epoch": 0.94, "grad_norm": 1.8459292650222778, "learning_rate": 1.6828697948696348e-07, "loss": 0.9444, "step": 16447 }, { "epoch": 0.94, "grad_norm": 1.7722623348236084, "learning_rate": 1.679477894152437e-07, "loss": 0.9586, "step": 16448 }, { "epoch": 0.94, "grad_norm": 1.5576446056365967, "learning_rate": 1.676089386201818e-07, "loss": 0.9035, "step": 16449 }, { "epoch": 0.94, "grad_norm": 1.6673877239227295, "learning_rate": 1.672704271134684e-07, "loss": 0.9382, "step": 16450 }, { "epoch": 0.94, "grad_norm": 1.7748799324035645, "learning_rate": 1.6693225490678755e-07, "loss": 0.8937, "step": 16451 }, { "epoch": 0.94, "grad_norm": 1.7299423217773438, "learning_rate": 1.6659442201180543e-07, "loss": 0.933, "step": 16452 }, { "epoch": 0.94, "grad_norm": 1.8745492696762085, "learning_rate": 1.662569284401827e-07, "loss": 0.8875, "step": 16453 }, { "epoch": 0.94, "grad_norm": 1.934075951576233, "learning_rate": 1.659197742035623e-07, "loss": 0.9163, "step": 16454 }, { "epoch": 0.94, "grad_norm": 1.8632644414901733, "learning_rate": 1.655829593135816e-07, "loss": 0.8921, "step": 16455 }, { "epoch": 0.94, "grad_norm": 1.5784367322921753, "learning_rate": 1.6524648378186125e-07, "loss": 0.863, "step": 16456 }, { "epoch": 0.94, "grad_norm": 1.8589651584625244, "learning_rate": 1.649103476200131e-07, "loss": 0.897, "step": 16457 }, { "epoch": 0.94, "grad_norm": 1.707099199295044, "learning_rate": 1.6457455083963346e-07, "loss": 0.8793, "step": 16458 }, { "epoch": 0.94, "grad_norm": 1.6957958936691284, "learning_rate": 1.6423909345231304e-07, "loss": 0.8291, "step": 16459 }, { "epoch": 0.94, "grad_norm": 1.764585256576538, "learning_rate": 1.639039754696281e-07, "loss": 0.9135, "step": 16460 }, { "epoch": 0.94, "grad_norm": 1.7115422487258911, "learning_rate": 1.6356919690313832e-07, "loss": 0.8875, "step": 16461 }, { "epoch": 0.94, "grad_norm": 1.8564659357070923, "learning_rate": 1.632347577644e-07, "loss": 0.8248, "step": 16462 }, { "epoch": 0.94, "grad_norm": 1.8585751056671143, "learning_rate": 1.629006580649528e-07, "loss": 0.8287, "step": 16463 }, { "epoch": 0.94, "grad_norm": 1.029334545135498, "learning_rate": 1.6256689781632416e-07, "loss": 0.5804, "step": 16464 }, { "epoch": 0.94, "grad_norm": 2.0663323402404785, "learning_rate": 1.6223347703003156e-07, "loss": 0.9443, "step": 16465 }, { "epoch": 0.94, "grad_norm": 1.7953803539276123, "learning_rate": 1.6190039571758243e-07, "loss": 0.9376, "step": 16466 }, { "epoch": 0.94, "grad_norm": 2.6454825401306152, "learning_rate": 1.6156765389046868e-07, "loss": 0.9291, "step": 16467 }, { "epoch": 0.94, "grad_norm": 1.9765831232070923, "learning_rate": 1.6123525156017228e-07, "loss": 0.935, "step": 16468 }, { "epoch": 0.94, "grad_norm": 1.6322416067123413, "learning_rate": 1.6090318873816292e-07, "loss": 0.8417, "step": 16469 }, { "epoch": 0.94, "grad_norm": 1.8629791736602783, "learning_rate": 1.605714654359014e-07, "loss": 0.8786, "step": 16470 }, { "epoch": 0.94, "grad_norm": 1.706268310546875, "learning_rate": 1.6024008166483308e-07, "loss": 0.8945, "step": 16471 }, { "epoch": 0.94, "grad_norm": 1.7280964851379395, "learning_rate": 1.5990903743639318e-07, "loss": 0.8963, "step": 16472 }, { "epoch": 0.94, "grad_norm": 1.7247207164764404, "learning_rate": 1.5957833276200486e-07, "loss": 0.9038, "step": 16473 }, { "epoch": 0.94, "grad_norm": 1.740746259689331, "learning_rate": 1.5924796765308004e-07, "loss": 0.9098, "step": 16474 }, { "epoch": 0.94, "grad_norm": 1.7239288091659546, "learning_rate": 1.5891794212102073e-07, "loss": 0.8278, "step": 16475 }, { "epoch": 0.94, "grad_norm": 1.7507599592208862, "learning_rate": 1.585882561772112e-07, "loss": 0.9142, "step": 16476 }, { "epoch": 0.94, "grad_norm": 1.9095275402069092, "learning_rate": 1.5825890983303115e-07, "loss": 0.9631, "step": 16477 }, { "epoch": 0.95, "grad_norm": 1.691354751586914, "learning_rate": 1.579299030998427e-07, "loss": 0.8627, "step": 16478 }, { "epoch": 0.95, "grad_norm": 1.7601280212402344, "learning_rate": 1.5760123598900222e-07, "loss": 0.8822, "step": 16479 }, { "epoch": 0.95, "grad_norm": 1.9213297367095947, "learning_rate": 1.572729085118485e-07, "loss": 0.7579, "step": 16480 }, { "epoch": 0.95, "grad_norm": 1.9090956449508667, "learning_rate": 1.5694492067971245e-07, "loss": 0.9249, "step": 16481 }, { "epoch": 0.95, "grad_norm": 1.7180875539779663, "learning_rate": 1.5661727250391167e-07, "loss": 0.8416, "step": 16482 }, { "epoch": 0.95, "grad_norm": 1.7013002634048462, "learning_rate": 1.562899639957538e-07, "loss": 0.8544, "step": 16483 }, { "epoch": 0.95, "grad_norm": 1.7529064416885376, "learning_rate": 1.559629951665298e-07, "loss": 0.8571, "step": 16484 }, { "epoch": 0.95, "grad_norm": 1.568910002708435, "learning_rate": 1.556363660275262e-07, "loss": 0.7811, "step": 16485 }, { "epoch": 0.95, "grad_norm": 1.8314090967178345, "learning_rate": 1.5531007659001175e-07, "loss": 0.9341, "step": 16486 }, { "epoch": 0.95, "grad_norm": 1.8949534893035889, "learning_rate": 1.5498412686524634e-07, "loss": 0.8667, "step": 16487 }, { "epoch": 0.95, "grad_norm": 1.0249289274215698, "learning_rate": 1.5465851686447876e-07, "loss": 0.5592, "step": 16488 }, { "epoch": 0.95, "grad_norm": 1.738390564918518, "learning_rate": 1.5433324659894333e-07, "loss": 0.9648, "step": 16489 }, { "epoch": 0.95, "grad_norm": 1.6103278398513794, "learning_rate": 1.5400831607986443e-07, "loss": 0.8457, "step": 16490 }, { "epoch": 0.95, "grad_norm": 1.7184944152832031, "learning_rate": 1.5368372531845532e-07, "loss": 0.9577, "step": 16491 }, { "epoch": 0.95, "grad_norm": 1.7703791856765747, "learning_rate": 1.53359474325917e-07, "loss": 0.8672, "step": 16492 }, { "epoch": 0.95, "grad_norm": 1.9693399667739868, "learning_rate": 1.5303556311343616e-07, "loss": 0.8843, "step": 16493 }, { "epoch": 0.95, "grad_norm": 1.8252665996551514, "learning_rate": 1.5271199169219264e-07, "loss": 0.8591, "step": 16494 }, { "epoch": 0.95, "grad_norm": 1.89171302318573, "learning_rate": 1.5238876007335091e-07, "loss": 0.8467, "step": 16495 }, { "epoch": 0.95, "grad_norm": 1.709255576133728, "learning_rate": 1.5206586826806536e-07, "loss": 0.8933, "step": 16496 }, { "epoch": 0.95, "grad_norm": 1.865009069442749, "learning_rate": 1.5174331628747596e-07, "loss": 0.8926, "step": 16497 }, { "epoch": 0.95, "grad_norm": 1.86410391330719, "learning_rate": 1.5142110414271716e-07, "loss": 0.9913, "step": 16498 }, { "epoch": 0.95, "grad_norm": 1.7553921937942505, "learning_rate": 1.510992318449034e-07, "loss": 0.9054, "step": 16499 }, { "epoch": 0.95, "grad_norm": 1.7745091915130615, "learning_rate": 1.5077769940514242e-07, "loss": 0.9481, "step": 16500 }, { "epoch": 0.95, "grad_norm": 2.043612241744995, "learning_rate": 1.5045650683453205e-07, "loss": 0.8476, "step": 16501 }, { "epoch": 0.95, "grad_norm": 1.9572433233261108, "learning_rate": 1.5013565414415342e-07, "loss": 0.9025, "step": 16502 }, { "epoch": 0.95, "grad_norm": 1.794930338859558, "learning_rate": 1.498151413450799e-07, "loss": 0.8519, "step": 16503 }, { "epoch": 0.95, "grad_norm": 1.702752709388733, "learning_rate": 1.494949684483693e-07, "loss": 0.9376, "step": 16504 }, { "epoch": 0.95, "grad_norm": 1.575696587562561, "learning_rate": 1.4917513546507167e-07, "loss": 0.8486, "step": 16505 }, { "epoch": 0.95, "grad_norm": 1.7192968130111694, "learning_rate": 1.4885564240622265e-07, "loss": 0.9022, "step": 16506 }, { "epoch": 0.95, "grad_norm": 1.6231508255004883, "learning_rate": 1.485364892828478e-07, "loss": 0.8916, "step": 16507 }, { "epoch": 0.95, "grad_norm": 1.7822118997573853, "learning_rate": 1.482176761059584e-07, "loss": 0.9637, "step": 16508 }, { "epoch": 0.95, "grad_norm": 1.7868828773498535, "learning_rate": 1.478992028865589e-07, "loss": 0.9137, "step": 16509 }, { "epoch": 0.95, "grad_norm": 1.8035506010055542, "learning_rate": 1.4758106963563612e-07, "loss": 0.9549, "step": 16510 }, { "epoch": 0.95, "grad_norm": 1.8400654792785645, "learning_rate": 1.4726327636417015e-07, "loss": 0.8627, "step": 16511 }, { "epoch": 0.95, "grad_norm": 1.8744951486587524, "learning_rate": 1.4694582308312444e-07, "loss": 0.866, "step": 16512 }, { "epoch": 0.95, "grad_norm": 1.7350282669067383, "learning_rate": 1.466287098034558e-07, "loss": 1.0024, "step": 16513 }, { "epoch": 0.95, "grad_norm": 1.10121750831604, "learning_rate": 1.4631193653610542e-07, "loss": 0.5384, "step": 16514 }, { "epoch": 0.95, "grad_norm": 2.0601329803466797, "learning_rate": 1.4599550329200574e-07, "loss": 0.928, "step": 16515 }, { "epoch": 0.95, "grad_norm": 1.8255534172058105, "learning_rate": 1.4567941008207466e-07, "loss": 1.0136, "step": 16516 }, { "epoch": 0.95, "grad_norm": 1.7194275856018066, "learning_rate": 1.4536365691722122e-07, "loss": 0.8817, "step": 16517 }, { "epoch": 0.95, "grad_norm": 1.8004549741744995, "learning_rate": 1.4504824380833892e-07, "loss": 0.9848, "step": 16518 }, { "epoch": 0.95, "grad_norm": 1.864165186882019, "learning_rate": 1.4473317076631355e-07, "loss": 0.9286, "step": 16519 }, { "epoch": 0.95, "grad_norm": 1.7690070867538452, "learning_rate": 1.4441843780201747e-07, "loss": 0.9085, "step": 16520 }, { "epoch": 0.95, "grad_norm": 1.7431583404541016, "learning_rate": 1.441040449263098e-07, "loss": 0.9473, "step": 16521 }, { "epoch": 0.95, "grad_norm": 1.7609999179840088, "learning_rate": 1.4378999215004076e-07, "loss": 0.9518, "step": 16522 }, { "epoch": 0.95, "grad_norm": 1.7276779413223267, "learning_rate": 1.4347627948404607e-07, "loss": 0.927, "step": 16523 }, { "epoch": 0.95, "grad_norm": 2.0218112468719482, "learning_rate": 1.431629069391516e-07, "loss": 0.927, "step": 16524 }, { "epoch": 0.95, "grad_norm": 1.705073356628418, "learning_rate": 1.4284987452617306e-07, "loss": 0.9891, "step": 16525 }, { "epoch": 0.95, "grad_norm": 1.9493002891540527, "learning_rate": 1.425371822559085e-07, "loss": 0.898, "step": 16526 }, { "epoch": 0.95, "grad_norm": 1.7836133241653442, "learning_rate": 1.4222483013915156e-07, "loss": 0.9047, "step": 16527 }, { "epoch": 0.95, "grad_norm": 1.9222790002822876, "learning_rate": 1.4191281818667914e-07, "loss": 0.9553, "step": 16528 }, { "epoch": 0.95, "grad_norm": 1.626082181930542, "learning_rate": 1.4160114640925704e-07, "loss": 0.8374, "step": 16529 }, { "epoch": 0.95, "grad_norm": 1.8756816387176514, "learning_rate": 1.4128981481764115e-07, "loss": 0.8733, "step": 16530 }, { "epoch": 0.95, "grad_norm": 1.7788617610931396, "learning_rate": 1.4097882342257508e-07, "loss": 0.9497, "step": 16531 }, { "epoch": 0.95, "grad_norm": 1.744699478149414, "learning_rate": 1.4066817223478913e-07, "loss": 0.8517, "step": 16532 }, { "epoch": 0.95, "grad_norm": 1.8373404741287231, "learning_rate": 1.4035786126500473e-07, "loss": 0.935, "step": 16533 }, { "epoch": 0.95, "grad_norm": 1.7422071695327759, "learning_rate": 1.4004789052392777e-07, "loss": 0.8591, "step": 16534 }, { "epoch": 0.95, "grad_norm": 1.9151790142059326, "learning_rate": 1.3973826002225631e-07, "loss": 0.8738, "step": 16535 }, { "epoch": 0.95, "grad_norm": 1.6773675680160522, "learning_rate": 1.39428969770673e-07, "loss": 0.8623, "step": 16536 }, { "epoch": 0.95, "grad_norm": 1.6714239120483398, "learning_rate": 1.3912001977985146e-07, "loss": 0.8179, "step": 16537 }, { "epoch": 0.95, "grad_norm": 2.038982629776001, "learning_rate": 1.3881141006045318e-07, "loss": 0.9548, "step": 16538 }, { "epoch": 0.95, "grad_norm": 1.7970119714736938, "learning_rate": 1.3850314062312742e-07, "loss": 0.9082, "step": 16539 }, { "epoch": 0.95, "grad_norm": 1.786732792854309, "learning_rate": 1.3819521147851122e-07, "loss": 0.8333, "step": 16540 }, { "epoch": 0.95, "grad_norm": 1.8275790214538574, "learning_rate": 1.3788762263722943e-07, "loss": 1.0284, "step": 16541 }, { "epoch": 0.95, "grad_norm": 1.7548185586929321, "learning_rate": 1.37580374109898e-07, "loss": 0.9106, "step": 16542 }, { "epoch": 0.95, "grad_norm": 1.8583842515945435, "learning_rate": 1.3727346590711843e-07, "loss": 0.8834, "step": 16543 }, { "epoch": 0.95, "grad_norm": 1.0873218774795532, "learning_rate": 1.3696689803948114e-07, "loss": 0.5724, "step": 16544 }, { "epoch": 0.95, "grad_norm": 1.8137444257736206, "learning_rate": 1.3666067051756326e-07, "loss": 0.8949, "step": 16545 }, { "epoch": 0.95, "grad_norm": 1.7303413152694702, "learning_rate": 1.363547833519352e-07, "loss": 0.8784, "step": 16546 }, { "epoch": 0.95, "grad_norm": 1.6444858312606812, "learning_rate": 1.360492365531496e-07, "loss": 0.8531, "step": 16547 }, { "epoch": 0.95, "grad_norm": 1.7975293397903442, "learning_rate": 1.357440301317525e-07, "loss": 0.8912, "step": 16548 }, { "epoch": 0.95, "grad_norm": 1.8063907623291016, "learning_rate": 1.3543916409827328e-07, "loss": 0.8986, "step": 16549 }, { "epoch": 0.95, "grad_norm": 1.8259648084640503, "learning_rate": 1.351346384632335e-07, "loss": 0.9029, "step": 16550 }, { "epoch": 0.95, "grad_norm": 1.8852148056030273, "learning_rate": 1.348304532371403e-07, "loss": 0.884, "step": 16551 }, { "epoch": 0.95, "grad_norm": 1.6538900136947632, "learning_rate": 1.3452660843049082e-07, "loss": 0.8997, "step": 16552 }, { "epoch": 0.95, "grad_norm": 1.6715694665908813, "learning_rate": 1.3422310405377003e-07, "loss": 0.9102, "step": 16553 }, { "epoch": 0.95, "grad_norm": 1.8713077306747437, "learning_rate": 1.339199401174507e-07, "loss": 0.885, "step": 16554 }, { "epoch": 0.95, "grad_norm": 1.7181580066680908, "learning_rate": 1.336171166319955e-07, "loss": 0.9071, "step": 16555 }, { "epoch": 0.95, "grad_norm": 1.732968807220459, "learning_rate": 1.333146336078528e-07, "loss": 0.8967, "step": 16556 }, { "epoch": 0.95, "grad_norm": 1.7870614528656006, "learning_rate": 1.3301249105546087e-07, "loss": 0.8683, "step": 16557 }, { "epoch": 0.95, "grad_norm": 1.8052188158035278, "learning_rate": 1.3271068898524475e-07, "loss": 0.8922, "step": 16558 }, { "epoch": 0.95, "grad_norm": 1.726914405822754, "learning_rate": 1.3240922740761942e-07, "loss": 0.8456, "step": 16559 }, { "epoch": 0.95, "grad_norm": 0.9734898209571838, "learning_rate": 1.3210810633298765e-07, "loss": 0.5558, "step": 16560 }, { "epoch": 0.95, "grad_norm": 1.9070172309875488, "learning_rate": 1.3180732577174117e-07, "loss": 0.8776, "step": 16561 }, { "epoch": 0.95, "grad_norm": 1.9358142614364624, "learning_rate": 1.3150688573425829e-07, "loss": 0.9322, "step": 16562 }, { "epoch": 0.95, "grad_norm": 1.6691879034042358, "learning_rate": 1.3120678623090521e-07, "loss": 0.9626, "step": 16563 }, { "epoch": 0.95, "grad_norm": 1.8167921304702759, "learning_rate": 1.3090702727203918e-07, "loss": 0.8886, "step": 16564 }, { "epoch": 0.95, "grad_norm": 1.7664705514907837, "learning_rate": 1.3060760886800417e-07, "loss": 0.9267, "step": 16565 }, { "epoch": 0.95, "grad_norm": 1.7893266677856445, "learning_rate": 1.3030853102912965e-07, "loss": 0.8985, "step": 16566 }, { "epoch": 0.95, "grad_norm": 1.8319549560546875, "learning_rate": 1.300097937657385e-07, "loss": 0.8323, "step": 16567 }, { "epoch": 0.95, "grad_norm": 1.7281087636947632, "learning_rate": 1.297113970881403e-07, "loss": 0.8651, "step": 16568 }, { "epoch": 0.95, "grad_norm": 1.7932853698730469, "learning_rate": 1.2941334100662784e-07, "loss": 0.971, "step": 16569 }, { "epoch": 0.95, "grad_norm": 0.9154644012451172, "learning_rate": 1.291156255314907e-07, "loss": 0.4918, "step": 16570 }, { "epoch": 0.95, "grad_norm": 1.6723839044570923, "learning_rate": 1.2881825067299848e-07, "loss": 0.8406, "step": 16571 }, { "epoch": 0.95, "grad_norm": 1.8287107944488525, "learning_rate": 1.2852121644141624e-07, "loss": 0.8858, "step": 16572 }, { "epoch": 0.95, "grad_norm": 1.8438234329223633, "learning_rate": 1.2822452284699026e-07, "loss": 0.9524, "step": 16573 }, { "epoch": 0.95, "grad_norm": 1.7729114294052124, "learning_rate": 1.2792816989996127e-07, "loss": 0.924, "step": 16574 }, { "epoch": 0.95, "grad_norm": 1.7965925931930542, "learning_rate": 1.2763215761055437e-07, "loss": 0.8552, "step": 16575 }, { "epoch": 0.95, "grad_norm": 1.7768440246582031, "learning_rate": 1.2733648598898475e-07, "loss": 0.8628, "step": 16576 }, { "epoch": 0.95, "grad_norm": 1.625126600265503, "learning_rate": 1.2704115504545312e-07, "loss": 0.8198, "step": 16577 }, { "epoch": 0.95, "grad_norm": 1.651566982269287, "learning_rate": 1.2674616479015355e-07, "loss": 0.9041, "step": 16578 }, { "epoch": 0.95, "grad_norm": 1.6268478631973267, "learning_rate": 1.264515152332646e-07, "loss": 0.9429, "step": 16579 }, { "epoch": 0.95, "grad_norm": 1.606135368347168, "learning_rate": 1.2615720638495142e-07, "loss": 0.8608, "step": 16580 }, { "epoch": 0.95, "grad_norm": 1.6384949684143066, "learning_rate": 1.2586323825537372e-07, "loss": 0.8969, "step": 16581 }, { "epoch": 0.95, "grad_norm": 1.7512062788009644, "learning_rate": 1.2556961085467223e-07, "loss": 0.8414, "step": 16582 }, { "epoch": 0.95, "grad_norm": 1.6920546293258667, "learning_rate": 1.2527632419297997e-07, "loss": 0.9094, "step": 16583 }, { "epoch": 0.95, "grad_norm": 1.5868085622787476, "learning_rate": 1.2498337828041886e-07, "loss": 0.7516, "step": 16584 }, { "epoch": 0.95, "grad_norm": 1.8788325786590576, "learning_rate": 1.2469077312709633e-07, "loss": 0.8882, "step": 16585 }, { "epoch": 0.95, "grad_norm": 1.8532047271728516, "learning_rate": 1.2439850874310877e-07, "loss": 0.8798, "step": 16586 }, { "epoch": 0.95, "grad_norm": 1.985978603363037, "learning_rate": 1.241065851385437e-07, "loss": 0.8682, "step": 16587 }, { "epoch": 0.95, "grad_norm": 1.7965304851531982, "learning_rate": 1.2381500232347188e-07, "loss": 0.8789, "step": 16588 }, { "epoch": 0.95, "grad_norm": 1.6530011892318726, "learning_rate": 1.2352376030795753e-07, "loss": 0.89, "step": 16589 }, { "epoch": 0.95, "grad_norm": 1.7447978258132935, "learning_rate": 1.232328591020482e-07, "loss": 0.8827, "step": 16590 }, { "epoch": 0.95, "grad_norm": 1.773024559020996, "learning_rate": 1.2294229871578356e-07, "loss": 0.8629, "step": 16591 }, { "epoch": 0.95, "grad_norm": 1.684670090675354, "learning_rate": 1.2265207915919008e-07, "loss": 0.9349, "step": 16592 }, { "epoch": 0.95, "grad_norm": 1.7269409894943237, "learning_rate": 1.2236220044228196e-07, "loss": 0.9854, "step": 16593 }, { "epoch": 0.95, "grad_norm": 1.6446491479873657, "learning_rate": 1.2207266257506234e-07, "loss": 0.92, "step": 16594 }, { "epoch": 0.95, "grad_norm": 1.7803030014038086, "learning_rate": 1.2178346556752207e-07, "loss": 0.8812, "step": 16595 }, { "epoch": 0.95, "grad_norm": 1.9164819717407227, "learning_rate": 1.2149460942964097e-07, "loss": 0.9484, "step": 16596 }, { "epoch": 0.95, "grad_norm": 1.79301917552948, "learning_rate": 1.212060941713855e-07, "loss": 0.9546, "step": 16597 }, { "epoch": 0.95, "grad_norm": 2.036015033721924, "learning_rate": 1.2091791980271438e-07, "loss": 0.9546, "step": 16598 }, { "epoch": 0.95, "grad_norm": 1.8012720346450806, "learning_rate": 1.2063008633356743e-07, "loss": 0.8533, "step": 16599 }, { "epoch": 0.95, "grad_norm": 1.7246770858764648, "learning_rate": 1.2034259377388113e-07, "loss": 0.9456, "step": 16600 }, { "epoch": 0.95, "grad_norm": 1.6861293315887451, "learning_rate": 1.200554421335731e-07, "loss": 0.9462, "step": 16601 }, { "epoch": 0.95, "grad_norm": 1.8792359828948975, "learning_rate": 1.1976863142255324e-07, "loss": 0.929, "step": 16602 }, { "epoch": 0.95, "grad_norm": 1.8180543184280396, "learning_rate": 1.1948216165071912e-07, "loss": 0.9709, "step": 16603 }, { "epoch": 0.95, "grad_norm": 1.9028102159500122, "learning_rate": 1.191960328279551e-07, "loss": 0.8795, "step": 16604 }, { "epoch": 0.95, "grad_norm": 1.5768588781356812, "learning_rate": 1.1891024496413661e-07, "loss": 0.8467, "step": 16605 }, { "epoch": 0.95, "grad_norm": 1.8773727416992188, "learning_rate": 1.186247980691213e-07, "loss": 0.881, "step": 16606 }, { "epoch": 0.95, "grad_norm": 1.9762147665023804, "learning_rate": 1.1833969215276352e-07, "loss": 0.8532, "step": 16607 }, { "epoch": 0.95, "grad_norm": 1.891180396080017, "learning_rate": 1.1805492722489876e-07, "loss": 0.9476, "step": 16608 }, { "epoch": 0.95, "grad_norm": 1.7515558004379272, "learning_rate": 1.1777050329535577e-07, "loss": 0.8814, "step": 16609 }, { "epoch": 0.95, "grad_norm": 1.96222984790802, "learning_rate": 1.1748642037394564e-07, "loss": 0.9238, "step": 16610 }, { "epoch": 0.95, "grad_norm": 1.8126362562179565, "learning_rate": 1.1720267847047495e-07, "loss": 0.9303, "step": 16611 }, { "epoch": 0.95, "grad_norm": 1.5835965871810913, "learning_rate": 1.1691927759473254e-07, "loss": 0.8836, "step": 16612 }, { "epoch": 0.95, "grad_norm": 2.0533149242401123, "learning_rate": 1.1663621775649946e-07, "loss": 0.9243, "step": 16613 }, { "epoch": 0.95, "grad_norm": 1.7795202732086182, "learning_rate": 1.1635349896554126e-07, "loss": 0.9172, "step": 16614 }, { "epoch": 0.95, "grad_norm": 1.817523717880249, "learning_rate": 1.1607112123161679e-07, "loss": 0.885, "step": 16615 }, { "epoch": 0.95, "grad_norm": 1.8167636394500732, "learning_rate": 1.1578908456446713e-07, "loss": 0.8674, "step": 16616 }, { "epoch": 0.95, "grad_norm": 2.2300267219543457, "learning_rate": 1.1550738897382563e-07, "loss": 0.8808, "step": 16617 }, { "epoch": 0.95, "grad_norm": 1.9196256399154663, "learning_rate": 1.1522603446941338e-07, "loss": 0.892, "step": 16618 }, { "epoch": 0.95, "grad_norm": 1.8228071928024292, "learning_rate": 1.1494502106093708e-07, "loss": 0.9416, "step": 16619 }, { "epoch": 0.95, "grad_norm": 1.7819263935089111, "learning_rate": 1.1466434875809784e-07, "loss": 0.9334, "step": 16620 }, { "epoch": 0.95, "grad_norm": 1.844464898109436, "learning_rate": 1.1438401757057682e-07, "loss": 0.9009, "step": 16621 }, { "epoch": 0.95, "grad_norm": 1.8060771226882935, "learning_rate": 1.1410402750804961e-07, "loss": 0.906, "step": 16622 }, { "epoch": 0.95, "grad_norm": 1.8951303958892822, "learning_rate": 1.1382437858017626e-07, "loss": 0.8879, "step": 16623 }, { "epoch": 0.95, "grad_norm": 1.7181979417800903, "learning_rate": 1.1354507079660904e-07, "loss": 0.7693, "step": 16624 }, { "epoch": 0.95, "grad_norm": 1.841902494430542, "learning_rate": 1.1326610416698358e-07, "loss": 1.0096, "step": 16625 }, { "epoch": 0.95, "grad_norm": 1.808828353881836, "learning_rate": 1.1298747870092886e-07, "loss": 0.8968, "step": 16626 }, { "epoch": 0.95, "grad_norm": 1.9094401597976685, "learning_rate": 1.1270919440805605e-07, "loss": 0.8973, "step": 16627 }, { "epoch": 0.95, "grad_norm": 1.7344419956207275, "learning_rate": 1.1243125129797194e-07, "loss": 0.8725, "step": 16628 }, { "epoch": 0.95, "grad_norm": 1.7809631824493408, "learning_rate": 1.121536493802644e-07, "loss": 0.9343, "step": 16629 }, { "epoch": 0.95, "grad_norm": 1.8784180879592896, "learning_rate": 1.1187638866451355e-07, "loss": 0.8436, "step": 16630 }, { "epoch": 0.95, "grad_norm": 1.8009629249572754, "learning_rate": 1.115994691602873e-07, "loss": 0.8634, "step": 16631 }, { "epoch": 0.95, "grad_norm": 1.7326936721801758, "learning_rate": 1.1132289087714132e-07, "loss": 0.8608, "step": 16632 }, { "epoch": 0.95, "grad_norm": 1.8138861656188965, "learning_rate": 1.110466538246202e-07, "loss": 0.8992, "step": 16633 }, { "epoch": 0.95, "grad_norm": 1.681660532951355, "learning_rate": 1.1077075801225412e-07, "loss": 0.924, "step": 16634 }, { "epoch": 0.95, "grad_norm": 1.7696179151535034, "learning_rate": 1.1049520344956654e-07, "loss": 0.9029, "step": 16635 }, { "epoch": 0.95, "grad_norm": 1.738041877746582, "learning_rate": 1.1021999014606322e-07, "loss": 0.9149, "step": 16636 }, { "epoch": 0.95, "grad_norm": 1.8930988311767578, "learning_rate": 1.099451181112421e-07, "loss": 0.8898, "step": 16637 }, { "epoch": 0.95, "grad_norm": 1.8724775314331055, "learning_rate": 1.096705873545878e-07, "loss": 0.8356, "step": 16638 }, { "epoch": 0.95, "grad_norm": 1.8593138456344604, "learning_rate": 1.0939639788557499e-07, "loss": 0.895, "step": 16639 }, { "epoch": 0.95, "grad_norm": 1.7877471446990967, "learning_rate": 1.0912254971366276e-07, "loss": 0.895, "step": 16640 }, { "epoch": 0.95, "grad_norm": 1.6651346683502197, "learning_rate": 1.0884904284830355e-07, "loss": 0.8845, "step": 16641 }, { "epoch": 0.95, "grad_norm": 1.0390714406967163, "learning_rate": 1.0857587729893316e-07, "loss": 0.5337, "step": 16642 }, { "epoch": 0.95, "grad_norm": 1.6564645767211914, "learning_rate": 1.0830305307497958e-07, "loss": 0.8884, "step": 16643 }, { "epoch": 0.95, "grad_norm": 1.1324530839920044, "learning_rate": 1.0803057018585528e-07, "loss": 0.5359, "step": 16644 }, { "epoch": 0.95, "grad_norm": 1.7464479207992554, "learning_rate": 1.0775842864096387e-07, "loss": 0.8361, "step": 16645 }, { "epoch": 0.95, "grad_norm": 1.7493220567703247, "learning_rate": 1.0748662844969781e-07, "loss": 0.8915, "step": 16646 }, { "epoch": 0.95, "grad_norm": 1.842637538909912, "learning_rate": 1.0721516962143296e-07, "loss": 0.9855, "step": 16647 }, { "epoch": 0.95, "grad_norm": 1.865507960319519, "learning_rate": 1.069440521655396e-07, "loss": 0.8383, "step": 16648 }, { "epoch": 0.95, "grad_norm": 1.5383331775665283, "learning_rate": 1.0667327609137024e-07, "loss": 0.8725, "step": 16649 }, { "epoch": 0.95, "grad_norm": 1.7621910572052002, "learning_rate": 1.0640284140827183e-07, "loss": 0.8831, "step": 16650 }, { "epoch": 0.95, "grad_norm": 2.097637891769409, "learning_rate": 1.0613274812557361e-07, "loss": 0.9048, "step": 16651 }, { "epoch": 0.96, "grad_norm": 1.6516413688659668, "learning_rate": 1.0586299625259699e-07, "loss": 0.8574, "step": 16652 }, { "epoch": 0.96, "grad_norm": 1.7174369096755981, "learning_rate": 1.055935857986512e-07, "loss": 0.8556, "step": 16653 }, { "epoch": 0.96, "grad_norm": 0.951137125492096, "learning_rate": 1.0532451677303102e-07, "loss": 0.5239, "step": 16654 }, { "epoch": 0.96, "grad_norm": 1.8946439027786255, "learning_rate": 1.0505578918502124e-07, "loss": 0.7897, "step": 16655 }, { "epoch": 0.96, "grad_norm": 1.584452748298645, "learning_rate": 1.0478740304389668e-07, "loss": 0.7435, "step": 16656 }, { "epoch": 0.96, "grad_norm": 1.8971904516220093, "learning_rate": 1.045193583589188e-07, "loss": 0.9921, "step": 16657 }, { "epoch": 0.96, "grad_norm": 1.718258261680603, "learning_rate": 1.0425165513933355e-07, "loss": 0.9481, "step": 16658 }, { "epoch": 0.96, "grad_norm": 1.7395762205123901, "learning_rate": 1.0398429339438353e-07, "loss": 0.9145, "step": 16659 }, { "epoch": 0.96, "grad_norm": 1.6841444969177246, "learning_rate": 1.0371727313329027e-07, "loss": 1.0403, "step": 16660 }, { "epoch": 0.96, "grad_norm": 1.7818424701690674, "learning_rate": 1.0345059436527082e-07, "loss": 0.8786, "step": 16661 }, { "epoch": 0.96, "grad_norm": 1.848825216293335, "learning_rate": 1.0318425709952562e-07, "loss": 0.8428, "step": 16662 }, { "epoch": 0.96, "grad_norm": 1.8447976112365723, "learning_rate": 1.029182613452473e-07, "loss": 0.8852, "step": 16663 }, { "epoch": 0.96, "grad_norm": 1.6873865127563477, "learning_rate": 1.0265260711161184e-07, "loss": 0.975, "step": 16664 }, { "epoch": 0.96, "grad_norm": 1.6499338150024414, "learning_rate": 1.023872944077886e-07, "loss": 0.9421, "step": 16665 }, { "epoch": 0.96, "grad_norm": 1.0688316822052002, "learning_rate": 1.0212232324293248e-07, "loss": 0.5359, "step": 16666 }, { "epoch": 0.96, "grad_norm": 1.56336510181427, "learning_rate": 1.0185769362618614e-07, "loss": 0.9357, "step": 16667 }, { "epoch": 0.96, "grad_norm": 1.8793442249298096, "learning_rate": 1.0159340556668007e-07, "loss": 0.8075, "step": 16668 }, { "epoch": 0.96, "grad_norm": 1.7353289127349854, "learning_rate": 1.0132945907353697e-07, "loss": 0.9387, "step": 16669 }, { "epoch": 0.96, "grad_norm": 1.71719229221344, "learning_rate": 1.010658541558629e-07, "loss": 0.8756, "step": 16670 }, { "epoch": 0.96, "grad_norm": 1.7425702810287476, "learning_rate": 1.0080259082275501e-07, "loss": 0.9315, "step": 16671 }, { "epoch": 0.96, "grad_norm": 1.883992314338684, "learning_rate": 1.0053966908329716e-07, "loss": 0.897, "step": 16672 }, { "epoch": 0.96, "grad_norm": 1.644033432006836, "learning_rate": 1.0027708894656208e-07, "loss": 0.8737, "step": 16673 }, { "epoch": 0.96, "grad_norm": 1.781711459159851, "learning_rate": 1.0001485042161141e-07, "loss": 0.843, "step": 16674 }, { "epoch": 0.96, "grad_norm": 0.9693657159805298, "learning_rate": 9.975295351749348e-08, "loss": 0.4812, "step": 16675 }, { "epoch": 0.96, "grad_norm": 1.780993938446045, "learning_rate": 9.949139824324661e-08, "loss": 0.8793, "step": 16676 }, { "epoch": 0.96, "grad_norm": 1.7134166955947876, "learning_rate": 9.923018460789358e-08, "loss": 0.8925, "step": 16677 }, { "epoch": 0.96, "grad_norm": 1.8056004047393799, "learning_rate": 9.896931262045162e-08, "loss": 0.8949, "step": 16678 }, { "epoch": 0.96, "grad_norm": 1.7053399085998535, "learning_rate": 9.870878228992132e-08, "loss": 0.8244, "step": 16679 }, { "epoch": 0.96, "grad_norm": 1.9220213890075684, "learning_rate": 9.844859362529324e-08, "loss": 0.8981, "step": 16680 }, { "epoch": 0.96, "grad_norm": 1.7998857498168945, "learning_rate": 9.818874663554356e-08, "loss": 0.8405, "step": 16681 }, { "epoch": 0.96, "grad_norm": 1.8952929973602295, "learning_rate": 9.792924132964287e-08, "loss": 0.9372, "step": 16682 }, { "epoch": 0.96, "grad_norm": 1.7541149854660034, "learning_rate": 9.76700777165418e-08, "loss": 0.8867, "step": 16683 }, { "epoch": 0.96, "grad_norm": 1.762635350227356, "learning_rate": 9.741125580518651e-08, "loss": 0.9163, "step": 16684 }, { "epoch": 0.96, "grad_norm": 1.7036212682724, "learning_rate": 9.715277560450653e-08, "loss": 0.8809, "step": 16685 }, { "epoch": 0.96, "grad_norm": 1.6715259552001953, "learning_rate": 9.689463712342251e-08, "loss": 0.8324, "step": 16686 }, { "epoch": 0.96, "grad_norm": 1.7778007984161377, "learning_rate": 9.663684037084064e-08, "loss": 0.8009, "step": 16687 }, { "epoch": 0.96, "grad_norm": 1.7729668617248535, "learning_rate": 9.637938535565716e-08, "loss": 0.847, "step": 16688 }, { "epoch": 0.96, "grad_norm": 1.8433982133865356, "learning_rate": 9.612227208675718e-08, "loss": 0.8444, "step": 16689 }, { "epoch": 0.96, "grad_norm": 1.8206764459609985, "learning_rate": 9.586550057301247e-08, "loss": 0.9588, "step": 16690 }, { "epoch": 0.96, "grad_norm": 1.8578920364379883, "learning_rate": 9.560907082328263e-08, "loss": 0.9852, "step": 16691 }, { "epoch": 0.96, "grad_norm": 1.6715120077133179, "learning_rate": 9.535298284641725e-08, "loss": 0.8814, "step": 16692 }, { "epoch": 0.96, "grad_norm": 1.7341216802597046, "learning_rate": 9.509723665125259e-08, "loss": 0.9187, "step": 16693 }, { "epoch": 0.96, "grad_norm": 1.7308123111724854, "learning_rate": 9.484183224661381e-08, "loss": 0.8959, "step": 16694 }, { "epoch": 0.96, "grad_norm": 1.8514095544815063, "learning_rate": 9.458676964131496e-08, "loss": 0.962, "step": 16695 }, { "epoch": 0.96, "grad_norm": 1.9360171556472778, "learning_rate": 9.433204884415681e-08, "loss": 0.8415, "step": 16696 }, { "epoch": 0.96, "grad_norm": 1.7884798049926758, "learning_rate": 9.407766986393007e-08, "loss": 0.8342, "step": 16697 }, { "epoch": 0.96, "grad_norm": 1.8000905513763428, "learning_rate": 9.382363270941108e-08, "loss": 0.8586, "step": 16698 }, { "epoch": 0.96, "grad_norm": 1.616525650024414, "learning_rate": 9.356993738936615e-08, "loss": 0.8999, "step": 16699 }, { "epoch": 0.96, "grad_norm": 1.9182251691818237, "learning_rate": 9.33165839125516e-08, "loss": 0.8527, "step": 16700 }, { "epoch": 0.96, "grad_norm": 1.6902246475219727, "learning_rate": 9.306357228770713e-08, "loss": 0.8372, "step": 16701 }, { "epoch": 0.96, "grad_norm": 0.8984614014625549, "learning_rate": 9.281090252356684e-08, "loss": 0.4854, "step": 16702 }, { "epoch": 0.96, "grad_norm": 2.1479718685150146, "learning_rate": 9.255857462884599e-08, "loss": 0.8815, "step": 16703 }, { "epoch": 0.96, "grad_norm": 1.8117250204086304, "learning_rate": 9.230658861225428e-08, "loss": 0.8504, "step": 16704 }, { "epoch": 0.96, "grad_norm": 1.733026146888733, "learning_rate": 9.205494448248476e-08, "loss": 0.8416, "step": 16705 }, { "epoch": 0.96, "grad_norm": 1.902415156364441, "learning_rate": 9.18036422482238e-08, "loss": 0.9298, "step": 16706 }, { "epoch": 0.96, "grad_norm": 1.6994664669036865, "learning_rate": 9.155268191814114e-08, "loss": 0.8525, "step": 16707 }, { "epoch": 0.96, "grad_norm": 0.9498339295387268, "learning_rate": 9.130206350089765e-08, "loss": 0.4791, "step": 16708 }, { "epoch": 0.96, "grad_norm": 1.7519595623016357, "learning_rate": 9.105178700514084e-08, "loss": 0.8754, "step": 16709 }, { "epoch": 0.96, "grad_norm": 1.7514469623565674, "learning_rate": 9.080185243950712e-08, "loss": 0.9439, "step": 16710 }, { "epoch": 0.96, "grad_norm": 0.9952343106269836, "learning_rate": 9.055225981262184e-08, "loss": 0.5427, "step": 16711 }, { "epoch": 0.96, "grad_norm": 1.599165916442871, "learning_rate": 9.030300913309698e-08, "loss": 0.8042, "step": 16712 }, { "epoch": 0.96, "grad_norm": 1.5810967683792114, "learning_rate": 9.005410040953344e-08, "loss": 0.9077, "step": 16713 }, { "epoch": 0.96, "grad_norm": 1.9196925163269043, "learning_rate": 8.980553365051992e-08, "loss": 0.8921, "step": 16714 }, { "epoch": 0.96, "grad_norm": 1.9037365913391113, "learning_rate": 8.95573088646362e-08, "loss": 0.8476, "step": 16715 }, { "epoch": 0.96, "grad_norm": 1.7106841802597046, "learning_rate": 8.930942606044434e-08, "loss": 0.9139, "step": 16716 }, { "epoch": 0.96, "grad_norm": 3.833024501800537, "learning_rate": 8.906188524650083e-08, "loss": 0.8717, "step": 16717 }, { "epoch": 0.96, "grad_norm": 1.6605820655822754, "learning_rate": 8.881468643134661e-08, "loss": 0.9245, "step": 16718 }, { "epoch": 0.96, "grad_norm": 1.6086612939834595, "learning_rate": 8.856782962351152e-08, "loss": 0.8964, "step": 16719 }, { "epoch": 0.96, "grad_norm": 1.8210073709487915, "learning_rate": 8.832131483151319e-08, "loss": 0.8985, "step": 16720 }, { "epoch": 0.96, "grad_norm": 1.0180044174194336, "learning_rate": 8.807514206386037e-08, "loss": 0.5604, "step": 16721 }, { "epoch": 0.96, "grad_norm": 1.7961909770965576, "learning_rate": 8.782931132904627e-08, "loss": 1.0014, "step": 16722 }, { "epoch": 0.96, "grad_norm": 1.779679298400879, "learning_rate": 8.758382263555299e-08, "loss": 0.8758, "step": 16723 }, { "epoch": 0.96, "grad_norm": 1.999839425086975, "learning_rate": 8.733867599185487e-08, "loss": 0.889, "step": 16724 }, { "epoch": 0.96, "grad_norm": 1.7902342081069946, "learning_rate": 8.709387140640736e-08, "loss": 0.9615, "step": 16725 }, { "epoch": 0.96, "grad_norm": 1.9254785776138306, "learning_rate": 8.684940888766036e-08, "loss": 0.9201, "step": 16726 }, { "epoch": 0.96, "grad_norm": 1.8096891641616821, "learning_rate": 8.660528844404936e-08, "loss": 0.913, "step": 16727 }, { "epoch": 0.96, "grad_norm": 1.7044142484664917, "learning_rate": 8.636151008399762e-08, "loss": 0.9629, "step": 16728 }, { "epoch": 0.96, "grad_norm": 1.5496708154678345, "learning_rate": 8.61180738159173e-08, "loss": 0.871, "step": 16729 }, { "epoch": 0.96, "grad_norm": 2.0486814975738525, "learning_rate": 8.587497964820946e-08, "loss": 0.8495, "step": 16730 }, { "epoch": 0.96, "grad_norm": 1.9881958961486816, "learning_rate": 8.563222758926181e-08, "loss": 0.8516, "step": 16731 }, { "epoch": 0.96, "grad_norm": 1.599428653717041, "learning_rate": 8.538981764745102e-08, "loss": 0.8877, "step": 16732 }, { "epoch": 0.96, "grad_norm": 1.0189226865768433, "learning_rate": 8.51477498311426e-08, "loss": 0.5455, "step": 16733 }, { "epoch": 0.96, "grad_norm": 1.7823212146759033, "learning_rate": 8.490602414868876e-08, "loss": 0.9574, "step": 16734 }, { "epoch": 0.96, "grad_norm": 1.8173750638961792, "learning_rate": 8.466464060843282e-08, "loss": 0.8808, "step": 16735 }, { "epoch": 0.96, "grad_norm": 1.5656057596206665, "learning_rate": 8.442359921870148e-08, "loss": 0.9062, "step": 16736 }, { "epoch": 0.96, "grad_norm": 1.764848232269287, "learning_rate": 8.418289998781359e-08, "loss": 0.8444, "step": 16737 }, { "epoch": 0.96, "grad_norm": 1.699743628501892, "learning_rate": 8.394254292407589e-08, "loss": 0.8085, "step": 16738 }, { "epoch": 0.96, "grad_norm": 1.8418811559677124, "learning_rate": 8.37025280357806e-08, "loss": 0.9323, "step": 16739 }, { "epoch": 0.96, "grad_norm": 1.830573320388794, "learning_rate": 8.346285533121224e-08, "loss": 0.9605, "step": 16740 }, { "epoch": 0.96, "grad_norm": 1.1512264013290405, "learning_rate": 8.322352481863971e-08, "loss": 0.5256, "step": 16741 }, { "epoch": 0.96, "grad_norm": 1.803523302078247, "learning_rate": 8.298453650632088e-08, "loss": 0.8608, "step": 16742 }, { "epoch": 0.96, "grad_norm": 1.7614006996154785, "learning_rate": 8.274589040250469e-08, "loss": 0.8161, "step": 16743 }, { "epoch": 0.96, "grad_norm": 1.7893513441085815, "learning_rate": 8.250758651542456e-08, "loss": 0.812, "step": 16744 }, { "epoch": 0.96, "grad_norm": 1.780419111251831, "learning_rate": 8.226962485330392e-08, "loss": 0.8996, "step": 16745 }, { "epoch": 0.96, "grad_norm": 1.713121771812439, "learning_rate": 8.203200542435507e-08, "loss": 0.9211, "step": 16746 }, { "epoch": 0.96, "grad_norm": 1.6605677604675293, "learning_rate": 8.179472823677703e-08, "loss": 0.848, "step": 16747 }, { "epoch": 0.96, "grad_norm": 1.686124324798584, "learning_rate": 8.155779329875768e-08, "loss": 0.9004, "step": 16748 }, { "epoch": 0.96, "grad_norm": 1.7454346418380737, "learning_rate": 8.13212006184727e-08, "loss": 0.8211, "step": 16749 }, { "epoch": 0.96, "grad_norm": 1.7194617986679077, "learning_rate": 8.108495020408558e-08, "loss": 0.8278, "step": 16750 }, { "epoch": 0.96, "grad_norm": 1.7110462188720703, "learning_rate": 8.084904206375088e-08, "loss": 0.9624, "step": 16751 }, { "epoch": 0.96, "grad_norm": 1.7371922731399536, "learning_rate": 8.061347620560656e-08, "loss": 0.854, "step": 16752 }, { "epoch": 0.96, "grad_norm": 1.9698975086212158, "learning_rate": 8.037825263778276e-08, "loss": 0.862, "step": 16753 }, { "epoch": 0.96, "grad_norm": 1.8095905780792236, "learning_rate": 8.014337136839633e-08, "loss": 0.9402, "step": 16754 }, { "epoch": 0.96, "grad_norm": 1.7126554250717163, "learning_rate": 7.990883240555191e-08, "loss": 0.9407, "step": 16755 }, { "epoch": 0.96, "grad_norm": 1.7946914434432983, "learning_rate": 7.967463575734413e-08, "loss": 0.8918, "step": 16756 }, { "epoch": 0.96, "grad_norm": 1.9343855381011963, "learning_rate": 7.944078143185207e-08, "loss": 0.8248, "step": 16757 }, { "epoch": 0.96, "grad_norm": 1.84098482131958, "learning_rate": 7.920726943714707e-08, "loss": 0.8504, "step": 16758 }, { "epoch": 0.96, "grad_norm": 2.9783902168273926, "learning_rate": 7.8974099781286e-08, "loss": 0.9248, "step": 16759 }, { "epoch": 0.96, "grad_norm": 1.650917410850525, "learning_rate": 7.874127247231688e-08, "loss": 0.9578, "step": 16760 }, { "epoch": 0.96, "grad_norm": 1.7563252449035645, "learning_rate": 7.850878751827107e-08, "loss": 0.8776, "step": 16761 }, { "epoch": 0.96, "grad_norm": 1.762316107749939, "learning_rate": 7.827664492717323e-08, "loss": 0.8346, "step": 16762 }, { "epoch": 0.96, "grad_norm": 1.75888991355896, "learning_rate": 7.804484470703255e-08, "loss": 0.8616, "step": 16763 }, { "epoch": 0.96, "grad_norm": 1.7647573947906494, "learning_rate": 7.781338686584928e-08, "loss": 0.8701, "step": 16764 }, { "epoch": 0.96, "grad_norm": 1.64426589012146, "learning_rate": 7.758227141160923e-08, "loss": 0.8792, "step": 16765 }, { "epoch": 0.96, "grad_norm": 1.8904253244400024, "learning_rate": 7.735149835228717e-08, "loss": 0.9248, "step": 16766 }, { "epoch": 0.96, "grad_norm": 1.821258544921875, "learning_rate": 7.712106769584782e-08, "loss": 0.9874, "step": 16767 }, { "epoch": 0.96, "grad_norm": 1.7079551219940186, "learning_rate": 7.689097945024149e-08, "loss": 0.9205, "step": 16768 }, { "epoch": 0.96, "grad_norm": 1.7915880680084229, "learning_rate": 7.66612336234096e-08, "loss": 0.8896, "step": 16769 }, { "epoch": 0.96, "grad_norm": 1.6094939708709717, "learning_rate": 7.643183022327694e-08, "loss": 0.8724, "step": 16770 }, { "epoch": 0.96, "grad_norm": 1.6901040077209473, "learning_rate": 7.620276925776271e-08, "loss": 0.8766, "step": 16771 }, { "epoch": 0.96, "grad_norm": 1.8767811059951782, "learning_rate": 7.597405073476949e-08, "loss": 0.8464, "step": 16772 }, { "epoch": 0.96, "grad_norm": 1.8378677368164062, "learning_rate": 7.574567466219096e-08, "loss": 0.935, "step": 16773 }, { "epoch": 0.96, "grad_norm": 0.9892394542694092, "learning_rate": 7.551764104790527e-08, "loss": 0.5458, "step": 16774 }, { "epoch": 0.96, "grad_norm": 1.7656549215316772, "learning_rate": 7.528994989978389e-08, "loss": 0.9198, "step": 16775 }, { "epoch": 0.96, "grad_norm": 1.6966168880462646, "learning_rate": 7.506260122568277e-08, "loss": 0.8504, "step": 16776 }, { "epoch": 0.96, "grad_norm": 1.6978414058685303, "learning_rate": 7.483559503344673e-08, "loss": 0.8553, "step": 16777 }, { "epoch": 0.96, "grad_norm": 1.6987836360931396, "learning_rate": 7.460893133090952e-08, "loss": 0.8564, "step": 16778 }, { "epoch": 0.96, "grad_norm": 1.874830961227417, "learning_rate": 7.438261012589265e-08, "loss": 0.923, "step": 16779 }, { "epoch": 0.96, "grad_norm": 1.7200143337249756, "learning_rate": 7.415663142620655e-08, "loss": 0.9686, "step": 16780 }, { "epoch": 0.96, "grad_norm": 1.067589282989502, "learning_rate": 7.393099523964719e-08, "loss": 0.5695, "step": 16781 }, { "epoch": 0.96, "grad_norm": 1.8484282493591309, "learning_rate": 7.370570157400281e-08, "loss": 0.8738, "step": 16782 }, { "epoch": 0.96, "grad_norm": 1.6711485385894775, "learning_rate": 7.348075043704605e-08, "loss": 0.8844, "step": 16783 }, { "epoch": 0.96, "grad_norm": 1.7487764358520508, "learning_rate": 7.325614183654072e-08, "loss": 0.8899, "step": 16784 }, { "epoch": 0.96, "grad_norm": 1.0184872150421143, "learning_rate": 7.303187578023618e-08, "loss": 0.5378, "step": 16785 }, { "epoch": 0.96, "grad_norm": 1.9166746139526367, "learning_rate": 7.280795227587179e-08, "loss": 0.9465, "step": 16786 }, { "epoch": 0.96, "grad_norm": 1.0317682027816772, "learning_rate": 7.258437133117468e-08, "loss": 0.5222, "step": 16787 }, { "epoch": 0.96, "grad_norm": 1.6860440969467163, "learning_rate": 7.236113295385983e-08, "loss": 0.8689, "step": 16788 }, { "epoch": 0.96, "grad_norm": 1.8362212181091309, "learning_rate": 7.213823715162993e-08, "loss": 0.8992, "step": 16789 }, { "epoch": 0.96, "grad_norm": 1.9014166593551636, "learning_rate": 7.191568393217774e-08, "loss": 0.8837, "step": 16790 }, { "epoch": 0.96, "grad_norm": 1.8883302211761475, "learning_rate": 7.169347330318155e-08, "loss": 0.914, "step": 16791 }, { "epoch": 0.96, "grad_norm": 1.8568795919418335, "learning_rate": 7.147160527231079e-08, "loss": 0.8594, "step": 16792 }, { "epoch": 0.96, "grad_norm": 1.8146347999572754, "learning_rate": 7.125007984722043e-08, "loss": 0.9158, "step": 16793 }, { "epoch": 0.96, "grad_norm": 1.6529438495635986, "learning_rate": 7.102889703555548e-08, "loss": 0.9224, "step": 16794 }, { "epoch": 0.96, "grad_norm": 1.5970295667648315, "learning_rate": 7.080805684494652e-08, "loss": 0.8878, "step": 16795 }, { "epoch": 0.96, "grad_norm": 1.7198632955551147, "learning_rate": 7.058755928301631e-08, "loss": 0.8553, "step": 16796 }, { "epoch": 0.96, "grad_norm": 1.7496482133865356, "learning_rate": 7.036740435737321e-08, "loss": 0.8263, "step": 16797 }, { "epoch": 0.96, "grad_norm": 1.6216895580291748, "learning_rate": 7.014759207561339e-08, "loss": 0.9321, "step": 16798 }, { "epoch": 0.96, "grad_norm": 1.854547381401062, "learning_rate": 6.992812244532188e-08, "loss": 0.8848, "step": 16799 }, { "epoch": 0.96, "grad_norm": 1.8922069072723389, "learning_rate": 6.97089954740715e-08, "loss": 0.9181, "step": 16800 }, { "epoch": 0.96, "grad_norm": 1.6890883445739746, "learning_rate": 6.949021116942622e-08, "loss": 0.8787, "step": 16801 }, { "epoch": 0.96, "grad_norm": 1.8593213558197021, "learning_rate": 6.927176953893334e-08, "loss": 0.9281, "step": 16802 }, { "epoch": 0.96, "grad_norm": 1.7434062957763672, "learning_rate": 6.905367059013013e-08, "loss": 0.8783, "step": 16803 }, { "epoch": 0.96, "grad_norm": 1.8309246301651, "learning_rate": 6.883591433054615e-08, "loss": 0.8093, "step": 16804 }, { "epoch": 0.96, "grad_norm": 0.945576548576355, "learning_rate": 6.861850076769095e-08, "loss": 0.511, "step": 16805 }, { "epoch": 0.96, "grad_norm": 1.8680192232131958, "learning_rate": 6.840142990907072e-08, "loss": 0.8819, "step": 16806 }, { "epoch": 0.96, "grad_norm": 1.9060392379760742, "learning_rate": 6.818470176217284e-08, "loss": 0.9007, "step": 16807 }, { "epoch": 0.96, "grad_norm": 1.8418883085250854, "learning_rate": 6.79683163344791e-08, "loss": 0.9405, "step": 16808 }, { "epoch": 0.96, "grad_norm": 1.6961679458618164, "learning_rate": 6.775227363345349e-08, "loss": 0.8422, "step": 16809 }, { "epoch": 0.96, "grad_norm": 1.9760373830795288, "learning_rate": 6.75365736665523e-08, "loss": 0.9317, "step": 16810 }, { "epoch": 0.96, "grad_norm": 1.9569554328918457, "learning_rate": 6.732121644121958e-08, "loss": 0.9044, "step": 16811 }, { "epoch": 0.96, "grad_norm": 1.8706907033920288, "learning_rate": 6.710620196488605e-08, "loss": 0.9505, "step": 16812 }, { "epoch": 0.96, "grad_norm": 1.7905306816101074, "learning_rate": 6.689153024497019e-08, "loss": 0.9122, "step": 16813 }, { "epoch": 0.96, "grad_norm": 1.8795119524002075, "learning_rate": 6.667720128888056e-08, "loss": 0.8574, "step": 16814 }, { "epoch": 0.96, "grad_norm": 1.8214315176010132, "learning_rate": 6.646321510401344e-08, "loss": 0.8992, "step": 16815 }, { "epoch": 0.96, "grad_norm": 1.8094909191131592, "learning_rate": 6.624957169775293e-08, "loss": 0.9237, "step": 16816 }, { "epoch": 0.96, "grad_norm": 1.7679553031921387, "learning_rate": 6.603627107746979e-08, "loss": 0.937, "step": 16817 }, { "epoch": 0.96, "grad_norm": 1.8337820768356323, "learning_rate": 6.582331325052704e-08, "loss": 0.8813, "step": 16818 }, { "epoch": 0.96, "grad_norm": 1.0413517951965332, "learning_rate": 6.561069822427103e-08, "loss": 0.5715, "step": 16819 }, { "epoch": 0.96, "grad_norm": 1.8462769985198975, "learning_rate": 6.539842600603918e-08, "loss": 0.8639, "step": 16820 }, { "epoch": 0.96, "grad_norm": 1.80979585647583, "learning_rate": 6.518649660315568e-08, "loss": 0.9913, "step": 16821 }, { "epoch": 0.96, "grad_norm": 1.728592038154602, "learning_rate": 6.497491002293576e-08, "loss": 0.9206, "step": 16822 }, { "epoch": 0.96, "grad_norm": 1.9578533172607422, "learning_rate": 6.476366627267917e-08, "loss": 0.934, "step": 16823 }, { "epoch": 0.96, "grad_norm": 1.8968058824539185, "learning_rate": 6.455276535967448e-08, "loss": 0.897, "step": 16824 }, { "epoch": 0.96, "grad_norm": 1.6480591297149658, "learning_rate": 6.434220729120145e-08, "loss": 0.8769, "step": 16825 }, { "epoch": 0.97, "grad_norm": 1.821168065071106, "learning_rate": 6.413199207452314e-08, "loss": 0.9271, "step": 16826 }, { "epoch": 0.97, "grad_norm": 1.8033379316329956, "learning_rate": 6.39221197168971e-08, "loss": 0.8754, "step": 16827 }, { "epoch": 0.97, "grad_norm": 1.7585662603378296, "learning_rate": 6.371259022556198e-08, "loss": 0.8326, "step": 16828 }, { "epoch": 0.97, "grad_norm": 1.6405359506607056, "learning_rate": 6.350340360774976e-08, "loss": 0.8723, "step": 16829 }, { "epoch": 0.97, "grad_norm": 1.6803693771362305, "learning_rate": 6.329455987067912e-08, "loss": 0.8518, "step": 16830 }, { "epoch": 0.97, "grad_norm": 0.963234543800354, "learning_rate": 6.30860590215554e-08, "loss": 0.4813, "step": 16831 }, { "epoch": 0.97, "grad_norm": 1.7980083227157593, "learning_rate": 6.287790106757396e-08, "loss": 0.8532, "step": 16832 }, { "epoch": 0.97, "grad_norm": 1.7277086973190308, "learning_rate": 6.267008601591906e-08, "loss": 0.942, "step": 16833 }, { "epoch": 0.97, "grad_norm": 1.7424904108047485, "learning_rate": 6.24626138737594e-08, "loss": 0.8639, "step": 16834 }, { "epoch": 0.97, "grad_norm": 1.754137396812439, "learning_rate": 6.225548464825592e-08, "loss": 0.8617, "step": 16835 }, { "epoch": 0.97, "grad_norm": 1.0378326177597046, "learning_rate": 6.204869834655624e-08, "loss": 0.6097, "step": 16836 }, { "epoch": 0.97, "grad_norm": 1.7844176292419434, "learning_rate": 6.184225497579577e-08, "loss": 0.9808, "step": 16837 }, { "epoch": 0.97, "grad_norm": 1.645247220993042, "learning_rate": 6.163615454309769e-08, "loss": 0.9077, "step": 16838 }, { "epoch": 0.97, "grad_norm": 1.7880001068115234, "learning_rate": 6.143039705557297e-08, "loss": 0.9124, "step": 16839 }, { "epoch": 0.97, "grad_norm": 1.861774206161499, "learning_rate": 6.122498252032483e-08, "loss": 0.9008, "step": 16840 }, { "epoch": 0.97, "grad_norm": 0.9493675827980042, "learning_rate": 6.10199109444376e-08, "loss": 0.5769, "step": 16841 }, { "epoch": 0.97, "grad_norm": 2.164233446121216, "learning_rate": 6.081518233499117e-08, "loss": 0.9231, "step": 16842 }, { "epoch": 0.97, "grad_norm": 1.9515260457992554, "learning_rate": 6.061079669904879e-08, "loss": 0.8373, "step": 16843 }, { "epoch": 0.97, "grad_norm": 1.0363835096359253, "learning_rate": 6.040675404366259e-08, "loss": 0.5166, "step": 16844 }, { "epoch": 0.97, "grad_norm": 1.9002394676208496, "learning_rate": 6.02030543758747e-08, "loss": 0.8562, "step": 16845 }, { "epoch": 0.97, "grad_norm": 1.804194450378418, "learning_rate": 5.999969770271397e-08, "loss": 0.8752, "step": 16846 }, { "epoch": 0.97, "grad_norm": 1.719359040260315, "learning_rate": 5.979668403119699e-08, "loss": 0.8818, "step": 16847 }, { "epoch": 0.97, "grad_norm": 1.6977514028549194, "learning_rate": 5.959401336833037e-08, "loss": 0.9692, "step": 16848 }, { "epoch": 0.97, "grad_norm": 1.8752810955047607, "learning_rate": 5.9391685721106315e-08, "loss": 0.8759, "step": 16849 }, { "epoch": 0.97, "grad_norm": 1.6643203496932983, "learning_rate": 5.918970109650701e-08, "loss": 0.8666, "step": 16850 }, { "epoch": 0.97, "grad_norm": 1.8345988988876343, "learning_rate": 5.8988059501502434e-08, "loss": 0.8879, "step": 16851 }, { "epoch": 0.97, "grad_norm": 1.8764899969100952, "learning_rate": 5.878676094305147e-08, "loss": 0.8845, "step": 16852 }, { "epoch": 0.97, "grad_norm": 1.644471287727356, "learning_rate": 5.858580542809966e-08, "loss": 0.8837, "step": 16853 }, { "epoch": 0.97, "grad_norm": 1.7819035053253174, "learning_rate": 5.8385192963580364e-08, "loss": 0.905, "step": 16854 }, { "epoch": 0.97, "grad_norm": 1.6999619007110596, "learning_rate": 5.818492355641803e-08, "loss": 0.941, "step": 16855 }, { "epoch": 0.97, "grad_norm": 1.8117866516113281, "learning_rate": 5.79849972135238e-08, "loss": 0.9184, "step": 16856 }, { "epoch": 0.97, "grad_norm": 1.6418768167495728, "learning_rate": 5.778541394179327e-08, "loss": 0.8844, "step": 16857 }, { "epoch": 0.97, "grad_norm": 1.6514885425567627, "learning_rate": 5.7586173748117594e-08, "loss": 0.8537, "step": 16858 }, { "epoch": 0.97, "grad_norm": 1.8319910764694214, "learning_rate": 5.738727663936905e-08, "loss": 0.9233, "step": 16859 }, { "epoch": 0.97, "grad_norm": 0.9716206789016724, "learning_rate": 5.718872262241215e-08, "loss": 0.4998, "step": 16860 }, { "epoch": 0.97, "grad_norm": 1.785876989364624, "learning_rate": 5.6990511704098086e-08, "loss": 0.8748, "step": 16861 }, { "epoch": 0.97, "grad_norm": 1.663503885269165, "learning_rate": 5.6792643891266927e-08, "loss": 0.9801, "step": 16862 }, { "epoch": 0.97, "grad_norm": 2.0122292041778564, "learning_rate": 5.659511919074656e-08, "loss": 0.8861, "step": 16863 }, { "epoch": 0.97, "grad_norm": 1.0279662609100342, "learning_rate": 5.6397937609353745e-08, "loss": 0.5392, "step": 16864 }, { "epoch": 0.97, "grad_norm": 2.028484582901001, "learning_rate": 5.620109915388972e-08, "loss": 0.9338, "step": 16865 }, { "epoch": 0.97, "grad_norm": 1.669344425201416, "learning_rate": 5.600460383115014e-08, "loss": 0.9576, "step": 16866 }, { "epoch": 0.97, "grad_norm": 1.8382127285003662, "learning_rate": 5.5808451647914044e-08, "loss": 0.9117, "step": 16867 }, { "epoch": 0.97, "grad_norm": 1.753487229347229, "learning_rate": 5.5612642610950454e-08, "loss": 0.9321, "step": 16868 }, { "epoch": 0.97, "grad_norm": 1.7272156476974487, "learning_rate": 5.541717672701619e-08, "loss": 0.8381, "step": 16869 }, { "epoch": 0.97, "grad_norm": 2.0990896224975586, "learning_rate": 5.522205400285585e-08, "loss": 0.905, "step": 16870 }, { "epoch": 0.97, "grad_norm": 1.7755687236785889, "learning_rate": 5.502727444520295e-08, "loss": 0.8522, "step": 16871 }, { "epoch": 0.97, "grad_norm": 1.8048200607299805, "learning_rate": 5.483283806077877e-08, "loss": 0.9164, "step": 16872 }, { "epoch": 0.97, "grad_norm": 1.7603212594985962, "learning_rate": 5.463874485629239e-08, "loss": 1.0092, "step": 16873 }, { "epoch": 0.97, "grad_norm": 1.7477320432662964, "learning_rate": 5.444499483844179e-08, "loss": 0.9092, "step": 16874 }, { "epoch": 0.97, "grad_norm": 1.979417324066162, "learning_rate": 5.425158801391162e-08, "loss": 0.8517, "step": 16875 }, { "epoch": 0.97, "grad_norm": 1.7972522974014282, "learning_rate": 5.405852438937764e-08, "loss": 0.9267, "step": 16876 }, { "epoch": 0.97, "grad_norm": 1.9225883483886719, "learning_rate": 5.386580397150121e-08, "loss": 0.9067, "step": 16877 }, { "epoch": 0.97, "grad_norm": 1.7164772748947144, "learning_rate": 5.3673426766932546e-08, "loss": 0.8054, "step": 16878 }, { "epoch": 0.97, "grad_norm": 1.678081750869751, "learning_rate": 5.3481392782309684e-08, "loss": 0.795, "step": 16879 }, { "epoch": 0.97, "grad_norm": 1.756480097770691, "learning_rate": 5.328970202425954e-08, "loss": 0.9606, "step": 16880 }, { "epoch": 0.97, "grad_norm": 1.753977656364441, "learning_rate": 5.309835449939793e-08, "loss": 0.8448, "step": 16881 }, { "epoch": 0.97, "grad_norm": 1.8222942352294922, "learning_rate": 5.2907350214325135e-08, "loss": 0.8967, "step": 16882 }, { "epoch": 0.97, "grad_norm": 1.868146300315857, "learning_rate": 5.271668917563366e-08, "loss": 0.8656, "step": 16883 }, { "epoch": 0.97, "grad_norm": 1.6813275814056396, "learning_rate": 5.2526371389902685e-08, "loss": 0.7733, "step": 16884 }, { "epoch": 0.97, "grad_norm": 1.9261870384216309, "learning_rate": 5.233639686370029e-08, "loss": 0.8399, "step": 16885 }, { "epoch": 0.97, "grad_norm": 1.9739835262298584, "learning_rate": 5.214676560358123e-08, "loss": 0.9294, "step": 16886 }, { "epoch": 0.97, "grad_norm": 1.7306627035140991, "learning_rate": 5.195747761608805e-08, "loss": 0.8975, "step": 16887 }, { "epoch": 0.97, "grad_norm": 1.8155744075775146, "learning_rate": 5.176853290775441e-08, "loss": 0.8849, "step": 16888 }, { "epoch": 0.97, "grad_norm": 1.8863637447357178, "learning_rate": 5.157993148509843e-08, "loss": 0.8946, "step": 16889 }, { "epoch": 0.97, "grad_norm": 2.2636477947235107, "learning_rate": 5.1391673354630467e-08, "loss": 0.9218, "step": 16890 }, { "epoch": 0.97, "grad_norm": 2.2207958698272705, "learning_rate": 5.1203758522844205e-08, "loss": 0.8633, "step": 16891 }, { "epoch": 0.97, "grad_norm": 1.8707972764968872, "learning_rate": 5.101618699622668e-08, "loss": 0.8653, "step": 16892 }, { "epoch": 0.97, "grad_norm": 1.85116708278656, "learning_rate": 5.082895878124827e-08, "loss": 0.9898, "step": 16893 }, { "epoch": 0.97, "grad_norm": 1.6779738664627075, "learning_rate": 5.064207388437159e-08, "loss": 0.8801, "step": 16894 }, { "epoch": 0.97, "grad_norm": 1.7194525003433228, "learning_rate": 5.04555323120437e-08, "loss": 0.9582, "step": 16895 }, { "epoch": 0.97, "grad_norm": 1.7836042642593384, "learning_rate": 5.026933407070167e-08, "loss": 0.8723, "step": 16896 }, { "epoch": 0.97, "grad_norm": 1.7618259191513062, "learning_rate": 5.0083479166773696e-08, "loss": 0.8302, "step": 16897 }, { "epoch": 0.97, "grad_norm": 1.7172775268554688, "learning_rate": 4.98979676066691e-08, "loss": 0.9439, "step": 16898 }, { "epoch": 0.97, "grad_norm": 0.9904892444610596, "learning_rate": 4.971279939679163e-08, "loss": 0.5308, "step": 16899 }, { "epoch": 0.97, "grad_norm": 1.9795794486999512, "learning_rate": 4.952797454353064e-08, "loss": 0.8302, "step": 16900 }, { "epoch": 0.97, "grad_norm": 1.7926768064498901, "learning_rate": 4.934349305326325e-08, "loss": 0.9091, "step": 16901 }, { "epoch": 0.97, "grad_norm": 1.7847084999084473, "learning_rate": 4.915935493235657e-08, "loss": 0.9821, "step": 16902 }, { "epoch": 0.97, "grad_norm": 1.694834589958191, "learning_rate": 4.897556018716443e-08, "loss": 0.9604, "step": 16903 }, { "epoch": 0.97, "grad_norm": 1.6887502670288086, "learning_rate": 4.879210882402729e-08, "loss": 0.9233, "step": 16904 }, { "epoch": 0.97, "grad_norm": 1.618674397468567, "learning_rate": 4.8609000849277884e-08, "loss": 0.8824, "step": 16905 }, { "epoch": 0.97, "grad_norm": 1.8002557754516602, "learning_rate": 4.8426236269233375e-08, "loss": 0.9415, "step": 16906 }, { "epoch": 0.97, "grad_norm": 1.7163259983062744, "learning_rate": 4.824381509020093e-08, "loss": 0.8355, "step": 16907 }, { "epoch": 0.97, "grad_norm": 2.139887571334839, "learning_rate": 4.806173731847441e-08, "loss": 0.8743, "step": 16908 }, { "epoch": 0.97, "grad_norm": 1.826765775680542, "learning_rate": 4.788000296033879e-08, "loss": 0.9019, "step": 16909 }, { "epoch": 0.97, "grad_norm": 1.677130937576294, "learning_rate": 4.769861202206349e-08, "loss": 0.9111, "step": 16910 }, { "epoch": 0.97, "grad_norm": 1.74350106716156, "learning_rate": 4.751756450990908e-08, "loss": 0.8496, "step": 16911 }, { "epoch": 0.97, "grad_norm": 1.7927353382110596, "learning_rate": 4.733686043012275e-08, "loss": 0.9233, "step": 16912 }, { "epoch": 0.97, "grad_norm": 1.712623953819275, "learning_rate": 4.715649978893844e-08, "loss": 0.8325, "step": 16913 }, { "epoch": 0.97, "grad_norm": 1.7005751132965088, "learning_rate": 4.697648259258225e-08, "loss": 0.8882, "step": 16914 }, { "epoch": 0.97, "grad_norm": 2.08018159866333, "learning_rate": 4.6796808847264787e-08, "loss": 0.9487, "step": 16915 }, { "epoch": 0.97, "grad_norm": 1.9266852140426636, "learning_rate": 4.6617478559186634e-08, "loss": 0.9103, "step": 16916 }, { "epoch": 0.97, "grad_norm": 1.7743932008743286, "learning_rate": 4.6438491734535077e-08, "loss": 0.9236, "step": 16917 }, { "epoch": 0.97, "grad_norm": 1.790971279144287, "learning_rate": 4.6259848379486275e-08, "loss": 0.9723, "step": 16918 }, { "epoch": 0.97, "grad_norm": 1.8152374029159546, "learning_rate": 4.608154850020641e-08, "loss": 0.9049, "step": 16919 }, { "epoch": 0.97, "grad_norm": 1.7499529123306274, "learning_rate": 4.5903592102847224e-08, "loss": 0.8602, "step": 16920 }, { "epoch": 0.97, "grad_norm": 1.8109657764434814, "learning_rate": 4.5725979193549376e-08, "loss": 0.896, "step": 16921 }, { "epoch": 0.97, "grad_norm": 1.8365999460220337, "learning_rate": 4.554870977844128e-08, "loss": 0.8922, "step": 16922 }, { "epoch": 0.97, "grad_norm": 1.6344904899597168, "learning_rate": 4.537178386364138e-08, "loss": 0.9225, "step": 16923 }, { "epoch": 0.97, "grad_norm": 1.8438054323196411, "learning_rate": 4.519520145525369e-08, "loss": 0.919, "step": 16924 }, { "epoch": 0.97, "grad_norm": 1.926115870475769, "learning_rate": 4.5018962559372216e-08, "loss": 0.9209, "step": 16925 }, { "epoch": 0.97, "grad_norm": 1.9236541986465454, "learning_rate": 4.484306718207765e-08, "loss": 0.9418, "step": 16926 }, { "epoch": 0.97, "grad_norm": 1.9134057760238647, "learning_rate": 4.4667515329440694e-08, "loss": 0.9301, "step": 16927 }, { "epoch": 0.97, "grad_norm": 1.7136832475662231, "learning_rate": 4.449230700751872e-08, "loss": 0.9425, "step": 16928 }, { "epoch": 0.97, "grad_norm": 1.6230510473251343, "learning_rate": 4.4317442222356896e-08, "loss": 0.8967, "step": 16929 }, { "epoch": 0.97, "grad_norm": 1.7166478633880615, "learning_rate": 4.41429209799904e-08, "loss": 0.9116, "step": 16930 }, { "epoch": 0.97, "grad_norm": 1.8351988792419434, "learning_rate": 4.3968743286442186e-08, "loss": 0.9177, "step": 16931 }, { "epoch": 0.97, "grad_norm": 1.733890175819397, "learning_rate": 4.3794909147720776e-08, "loss": 0.9268, "step": 16932 }, { "epoch": 0.97, "grad_norm": 1.6140780448913574, "learning_rate": 4.362141856982471e-08, "loss": 0.9459, "step": 16933 }, { "epoch": 0.97, "grad_norm": 1.906572699546814, "learning_rate": 4.344827155874254e-08, "loss": 0.9225, "step": 16934 }, { "epoch": 0.97, "grad_norm": 1.7609244585037231, "learning_rate": 4.327546812044836e-08, "loss": 0.8341, "step": 16935 }, { "epoch": 0.97, "grad_norm": 1.8001834154129028, "learning_rate": 4.3103008260904076e-08, "loss": 0.9166, "step": 16936 }, { "epoch": 0.97, "grad_norm": 1.7748472690582275, "learning_rate": 4.29308919860616e-08, "loss": 0.9032, "step": 16937 }, { "epoch": 0.97, "grad_norm": 1.9414796829223633, "learning_rate": 4.2759119301859495e-08, "loss": 0.938, "step": 16938 }, { "epoch": 0.97, "grad_norm": 1.7094727754592896, "learning_rate": 4.2587690214226376e-08, "loss": 0.8305, "step": 16939 }, { "epoch": 0.97, "grad_norm": 1.812376618385315, "learning_rate": 4.2416604729077496e-08, "loss": 0.8929, "step": 16940 }, { "epoch": 0.97, "grad_norm": 1.905362606048584, "learning_rate": 4.2245862852315914e-08, "loss": 0.9097, "step": 16941 }, { "epoch": 0.97, "grad_norm": 1.858384370803833, "learning_rate": 4.2075464589833583e-08, "loss": 0.9154, "step": 16942 }, { "epoch": 0.97, "grad_norm": 1.7335377931594849, "learning_rate": 4.1905409947510244e-08, "loss": 0.8718, "step": 16943 }, { "epoch": 0.97, "grad_norm": 1.9073083400726318, "learning_rate": 4.1735698931215655e-08, "loss": 0.9305, "step": 16944 }, { "epoch": 0.97, "grad_norm": 1.79978609085083, "learning_rate": 4.1566331546804004e-08, "loss": 0.9137, "step": 16945 }, { "epoch": 0.97, "grad_norm": 1.5958067178726196, "learning_rate": 4.1397307800120635e-08, "loss": 0.9178, "step": 16946 }, { "epoch": 0.97, "grad_norm": 1.730715036392212, "learning_rate": 4.122862769699753e-08, "loss": 0.8727, "step": 16947 }, { "epoch": 0.97, "grad_norm": 1.8058571815490723, "learning_rate": 4.1060291243255615e-08, "loss": 0.8657, "step": 16948 }, { "epoch": 0.97, "grad_norm": 1.7328755855560303, "learning_rate": 4.089229844470466e-08, "loss": 0.9298, "step": 16949 }, { "epoch": 0.97, "grad_norm": 1.7758475542068481, "learning_rate": 4.072464930714004e-08, "loss": 0.9433, "step": 16950 }, { "epoch": 0.97, "grad_norm": 1.676727294921875, "learning_rate": 4.055734383634824e-08, "loss": 0.9177, "step": 16951 }, { "epoch": 0.97, "grad_norm": 1.8348947763442993, "learning_rate": 4.0390382038102416e-08, "loss": 0.8619, "step": 16952 }, { "epoch": 0.97, "grad_norm": 1.7150864601135254, "learning_rate": 4.0223763918162404e-08, "loss": 0.8381, "step": 16953 }, { "epoch": 0.97, "grad_norm": 1.750909447669983, "learning_rate": 4.0057489482279166e-08, "loss": 0.849, "step": 16954 }, { "epoch": 0.97, "grad_norm": 1.809551477432251, "learning_rate": 3.989155873619033e-08, "loss": 0.8733, "step": 16955 }, { "epoch": 0.97, "grad_norm": 1.783695936203003, "learning_rate": 3.972597168562131e-08, "loss": 0.8814, "step": 16956 }, { "epoch": 0.97, "grad_norm": 1.7003097534179688, "learning_rate": 3.956072833628533e-08, "loss": 0.8092, "step": 16957 }, { "epoch": 0.97, "grad_norm": 1.7009940147399902, "learning_rate": 3.939582869388559e-08, "loss": 0.887, "step": 16958 }, { "epoch": 0.97, "grad_norm": 1.8587396144866943, "learning_rate": 3.923127276411309e-08, "loss": 0.8572, "step": 16959 }, { "epoch": 0.97, "grad_norm": 1.7167260646820068, "learning_rate": 3.906706055264331e-08, "loss": 0.9159, "step": 16960 }, { "epoch": 0.97, "grad_norm": 1.8860445022583008, "learning_rate": 3.890319206514615e-08, "loss": 0.9051, "step": 16961 }, { "epoch": 0.97, "grad_norm": 1.8863625526428223, "learning_rate": 3.873966730727374e-08, "loss": 0.9757, "step": 16962 }, { "epoch": 0.97, "grad_norm": 1.8373031616210938, "learning_rate": 3.857648628466937e-08, "loss": 0.8576, "step": 16963 }, { "epoch": 0.97, "grad_norm": 1.6365312337875366, "learning_rate": 3.8413649002965184e-08, "loss": 0.9164, "step": 16964 }, { "epoch": 0.97, "grad_norm": 1.8804470300674438, "learning_rate": 3.825115546777891e-08, "loss": 0.8722, "step": 16965 }, { "epoch": 0.97, "grad_norm": 1.8623290061950684, "learning_rate": 3.808900568471941e-08, "loss": 0.8616, "step": 16966 }, { "epoch": 0.97, "grad_norm": 1.488790512084961, "learning_rate": 3.792719965937885e-08, "loss": 0.9075, "step": 16967 }, { "epoch": 0.97, "grad_norm": 1.8464670181274414, "learning_rate": 3.7765737397343904e-08, "loss": 0.8954, "step": 16968 }, { "epoch": 0.97, "grad_norm": 1.6899328231811523, "learning_rate": 3.7604618904184545e-08, "loss": 0.9634, "step": 16969 }, { "epoch": 0.97, "grad_norm": 1.7534446716308594, "learning_rate": 3.744384418546188e-08, "loss": 0.8734, "step": 16970 }, { "epoch": 0.97, "grad_norm": 1.8187503814697266, "learning_rate": 3.728341324672147e-08, "loss": 0.9168, "step": 16971 }, { "epoch": 0.97, "grad_norm": 1.840823769569397, "learning_rate": 3.712332609350222e-08, "loss": 0.9104, "step": 16972 }, { "epoch": 0.97, "grad_norm": 1.7387958765029907, "learning_rate": 3.6963582731326386e-08, "loss": 0.9156, "step": 16973 }, { "epoch": 0.97, "grad_norm": 1.6884331703186035, "learning_rate": 3.680418316570622e-08, "loss": 0.8942, "step": 16974 }, { "epoch": 0.97, "grad_norm": 1.890742301940918, "learning_rate": 3.664512740214288e-08, "loss": 0.8811, "step": 16975 }, { "epoch": 0.97, "grad_norm": 1.8318909406661987, "learning_rate": 3.648641544612419e-08, "loss": 0.8581, "step": 16976 }, { "epoch": 0.97, "grad_norm": 1.6680599451065063, "learning_rate": 3.6328047303128e-08, "loss": 0.8599, "step": 16977 }, { "epoch": 0.97, "grad_norm": 1.918280839920044, "learning_rate": 3.6170022978617715e-08, "loss": 0.9324, "step": 16978 }, { "epoch": 0.97, "grad_norm": 1.7631083726882935, "learning_rate": 3.6012342478047854e-08, "loss": 0.9336, "step": 16979 }, { "epoch": 0.97, "grad_norm": 1.8177746534347534, "learning_rate": 3.585500580685852e-08, "loss": 0.9714, "step": 16980 }, { "epoch": 0.97, "grad_norm": 1.7385541200637817, "learning_rate": 3.569801297047981e-08, "loss": 0.8697, "step": 16981 }, { "epoch": 0.97, "grad_norm": 1.7808159589767456, "learning_rate": 3.5541363974327395e-08, "loss": 0.8847, "step": 16982 }, { "epoch": 0.97, "grad_norm": 1.7447491884231567, "learning_rate": 3.538505882380916e-08, "loss": 0.9111, "step": 16983 }, { "epoch": 0.97, "grad_norm": 1.592615008354187, "learning_rate": 3.522909752431636e-08, "loss": 0.9204, "step": 16984 }, { "epoch": 0.97, "grad_norm": 1.9057852029800415, "learning_rate": 3.507348008123246e-08, "loss": 0.8696, "step": 16985 }, { "epoch": 0.97, "grad_norm": 1.6352488994598389, "learning_rate": 3.49182064999265e-08, "loss": 0.9257, "step": 16986 }, { "epoch": 0.97, "grad_norm": 1.6955666542053223, "learning_rate": 3.4763276785757525e-08, "loss": 0.8545, "step": 16987 }, { "epoch": 0.97, "grad_norm": 1.7541472911834717, "learning_rate": 3.460869094407127e-08, "loss": 0.8645, "step": 16988 }, { "epoch": 0.97, "grad_norm": 1.832167387008667, "learning_rate": 3.4454448980201226e-08, "loss": 0.8578, "step": 16989 }, { "epoch": 0.97, "grad_norm": 1.7183431386947632, "learning_rate": 3.430055089947093e-08, "loss": 0.8699, "step": 16990 }, { "epoch": 0.97, "grad_norm": 1.7163522243499756, "learning_rate": 3.414699670718946e-08, "loss": 0.9084, "step": 16991 }, { "epoch": 0.97, "grad_norm": 1.7016727924346924, "learning_rate": 3.399378640865814e-08, "loss": 0.9657, "step": 16992 }, { "epoch": 0.97, "grad_norm": 1.8987613916397095, "learning_rate": 3.3840920009161614e-08, "loss": 0.8524, "step": 16993 }, { "epoch": 0.97, "grad_norm": 1.7800244092941284, "learning_rate": 3.3688397513975676e-08, "loss": 0.8475, "step": 16994 }, { "epoch": 0.97, "grad_norm": 1.732125997543335, "learning_rate": 3.353621892836389e-08, "loss": 0.897, "step": 16995 }, { "epoch": 0.97, "grad_norm": 1.7798832654953003, "learning_rate": 3.3384384257576505e-08, "loss": 0.9155, "step": 16996 }, { "epoch": 0.97, "grad_norm": 1.6733369827270508, "learning_rate": 3.3232893506852657e-08, "loss": 0.9714, "step": 16997 }, { "epoch": 0.97, "grad_norm": 1.6715399026870728, "learning_rate": 3.30817466814215e-08, "loss": 0.9048, "step": 16998 }, { "epoch": 0.97, "grad_norm": 1.7094799280166626, "learning_rate": 3.293094378649775e-08, "loss": 0.8183, "step": 16999 }, { "epoch": 0.97, "grad_norm": 1.740619421005249, "learning_rate": 3.278048482728502e-08, "loss": 0.8976, "step": 17000 }, { "epoch": 0.98, "grad_norm": 1.6426995992660522, "learning_rate": 3.2630369808975827e-08, "loss": 0.8611, "step": 17001 }, { "epoch": 0.98, "grad_norm": 1.9045161008834839, "learning_rate": 3.248059873675047e-08, "loss": 0.8481, "step": 17002 }, { "epoch": 0.98, "grad_norm": 1.8640708923339844, "learning_rate": 3.233117161577481e-08, "loss": 0.8529, "step": 17003 }, { "epoch": 0.98, "grad_norm": 1.7647262811660767, "learning_rate": 3.218208845120807e-08, "loss": 0.875, "step": 17004 }, { "epoch": 0.98, "grad_norm": 1.8356826305389404, "learning_rate": 3.2033349248193903e-08, "loss": 0.9043, "step": 17005 }, { "epoch": 0.98, "grad_norm": 1.8375816345214844, "learning_rate": 3.1884954011862646e-08, "loss": 0.9352, "step": 17006 }, { "epoch": 0.98, "grad_norm": 2.0403692722320557, "learning_rate": 3.173690274733798e-08, "loss": 0.8908, "step": 17007 }, { "epoch": 0.98, "grad_norm": 1.753091812133789, "learning_rate": 3.158919545972694e-08, "loss": 0.889, "step": 17008 }, { "epoch": 0.98, "grad_norm": 1.7871108055114746, "learning_rate": 3.144183215412877e-08, "loss": 0.8781, "step": 17009 }, { "epoch": 0.98, "grad_norm": 1.767849087715149, "learning_rate": 3.1294812835624965e-08, "loss": 0.9194, "step": 17010 }, { "epoch": 0.98, "grad_norm": 1.8572269678115845, "learning_rate": 3.114813750929258e-08, "loss": 0.867, "step": 17011 }, { "epoch": 0.98, "grad_norm": 1.6849069595336914, "learning_rate": 3.1001806180189775e-08, "loss": 0.9161, "step": 17012 }, { "epoch": 0.98, "grad_norm": 1.7427431344985962, "learning_rate": 3.085581885336808e-08, "loss": 0.8043, "step": 17013 }, { "epoch": 0.98, "grad_norm": 1.810930848121643, "learning_rate": 3.071017553386346e-08, "loss": 0.8816, "step": 17014 }, { "epoch": 0.98, "grad_norm": 1.6672090291976929, "learning_rate": 3.056487622670301e-08, "loss": 0.8447, "step": 17015 }, { "epoch": 0.98, "grad_norm": 1.0917577743530273, "learning_rate": 3.0419920936900494e-08, "loss": 0.5408, "step": 17016 }, { "epoch": 0.98, "grad_norm": 1.8725041151046753, "learning_rate": 3.027530966945747e-08, "loss": 0.8854, "step": 17017 }, { "epoch": 0.98, "grad_norm": 1.9404852390289307, "learning_rate": 3.0131042429364376e-08, "loss": 0.9232, "step": 17018 }, { "epoch": 0.98, "grad_norm": 0.9725698828697205, "learning_rate": 2.998711922159836e-08, "loss": 0.4745, "step": 17019 }, { "epoch": 0.98, "grad_norm": 1.7312403917312622, "learning_rate": 2.984354005112766e-08, "loss": 0.7786, "step": 17020 }, { "epoch": 0.98, "grad_norm": 1.7155433893203735, "learning_rate": 2.9700304922906098e-08, "loss": 0.9062, "step": 17021 }, { "epoch": 0.98, "grad_norm": 1.7692140340805054, "learning_rate": 2.955741384187527e-08, "loss": 0.8677, "step": 17022 }, { "epoch": 0.98, "grad_norm": 1.8503379821777344, "learning_rate": 2.9414866812967902e-08, "loss": 0.916, "step": 17023 }, { "epoch": 0.98, "grad_norm": 1.9333522319793701, "learning_rate": 2.9272663841101168e-08, "loss": 0.9184, "step": 17024 }, { "epoch": 0.98, "grad_norm": 1.113245964050293, "learning_rate": 2.9130804931182254e-08, "loss": 0.573, "step": 17025 }, { "epoch": 0.98, "grad_norm": 1.6996816396713257, "learning_rate": 2.8989290088107248e-08, "loss": 0.8168, "step": 17026 }, { "epoch": 0.98, "grad_norm": 1.7576831579208374, "learning_rate": 2.8848119316758905e-08, "loss": 0.9034, "step": 17027 }, { "epoch": 0.98, "grad_norm": 1.783080816268921, "learning_rate": 2.8707292622008888e-08, "loss": 0.8541, "step": 17028 }, { "epoch": 0.98, "grad_norm": 1.6134307384490967, "learning_rate": 2.856681000871664e-08, "loss": 0.8485, "step": 17029 }, { "epoch": 0.98, "grad_norm": 1.7848316431045532, "learning_rate": 2.8426671481728285e-08, "loss": 0.9054, "step": 17030 }, { "epoch": 0.98, "grad_norm": 1.7033088207244873, "learning_rate": 2.8286877045882178e-08, "loss": 0.8713, "step": 17031 }, { "epoch": 0.98, "grad_norm": 1.7085144519805908, "learning_rate": 2.8147426706001125e-08, "loss": 0.8415, "step": 17032 }, { "epoch": 0.98, "grad_norm": 1.7783198356628418, "learning_rate": 2.8008320466896834e-08, "loss": 0.8603, "step": 17033 }, { "epoch": 0.98, "grad_norm": 1.7809889316558838, "learning_rate": 2.7869558333369906e-08, "loss": 0.8824, "step": 17034 }, { "epoch": 0.98, "grad_norm": 1.7643349170684814, "learning_rate": 2.7731140310208738e-08, "loss": 0.9208, "step": 17035 }, { "epoch": 0.98, "grad_norm": 1.8451462984085083, "learning_rate": 2.7593066402189506e-08, "loss": 0.9005, "step": 17036 }, { "epoch": 0.98, "grad_norm": 1.7428935766220093, "learning_rate": 2.745533661407729e-08, "loss": 0.9347, "step": 17037 }, { "epoch": 0.98, "grad_norm": 1.8069937229156494, "learning_rate": 2.7317950950622728e-08, "loss": 0.8862, "step": 17038 }, { "epoch": 0.98, "grad_norm": 1.8138737678527832, "learning_rate": 2.71809094165687e-08, "loss": 0.92, "step": 17039 }, { "epoch": 0.98, "grad_norm": 1.8255681991577148, "learning_rate": 2.7044212016643643e-08, "loss": 0.7977, "step": 17040 }, { "epoch": 0.98, "grad_norm": 1.8366420269012451, "learning_rate": 2.6907858755564896e-08, "loss": 0.8944, "step": 17041 }, { "epoch": 0.98, "grad_norm": 1.9001367092132568, "learning_rate": 2.6771849638036473e-08, "loss": 0.8731, "step": 17042 }, { "epoch": 0.98, "grad_norm": 1.8538137674331665, "learning_rate": 2.6636184668753506e-08, "loss": 0.8772, "step": 17043 }, { "epoch": 0.98, "grad_norm": 1.7869664430618286, "learning_rate": 2.6500863852395585e-08, "loss": 0.8862, "step": 17044 }, { "epoch": 0.98, "grad_norm": 1.6638556718826294, "learning_rate": 2.6365887193633422e-08, "loss": 0.8804, "step": 17045 }, { "epoch": 0.98, "grad_norm": 1.8795177936553955, "learning_rate": 2.6231254697123286e-08, "loss": 0.8973, "step": 17046 }, { "epoch": 0.98, "grad_norm": 1.7164945602416992, "learning_rate": 2.609696636751258e-08, "loss": 0.8814, "step": 17047 }, { "epoch": 0.98, "grad_norm": 1.7197479009628296, "learning_rate": 2.596302220943536e-08, "loss": 0.9137, "step": 17048 }, { "epoch": 0.98, "grad_norm": 1.7311943769454956, "learning_rate": 2.582942222751239e-08, "loss": 0.897, "step": 17049 }, { "epoch": 0.98, "grad_norm": 1.7729771137237549, "learning_rate": 2.569616642635331e-08, "loss": 0.886, "step": 17050 }, { "epoch": 0.98, "grad_norm": 1.8523064851760864, "learning_rate": 2.5563254810558878e-08, "loss": 0.96, "step": 17051 }, { "epoch": 0.98, "grad_norm": 1.9374133348464966, "learning_rate": 2.5430687384713217e-08, "loss": 0.862, "step": 17052 }, { "epoch": 0.98, "grad_norm": 1.754805326461792, "learning_rate": 2.5298464153391544e-08, "loss": 0.9155, "step": 17053 }, { "epoch": 0.98, "grad_norm": 1.8759046792984009, "learning_rate": 2.5166585121156883e-08, "loss": 0.9156, "step": 17054 }, { "epoch": 0.98, "grad_norm": 1.7295581102371216, "learning_rate": 2.5035050292560036e-08, "loss": 0.8677, "step": 17055 }, { "epoch": 0.98, "grad_norm": 1.7976747751235962, "learning_rate": 2.4903859672139597e-08, "loss": 0.979, "step": 17056 }, { "epoch": 0.98, "grad_norm": 1.8166133165359497, "learning_rate": 2.477301326442305e-08, "loss": 0.8452, "step": 17057 }, { "epoch": 0.98, "grad_norm": 1.8200608491897583, "learning_rate": 2.464251107392457e-08, "loss": 0.8777, "step": 17058 }, { "epoch": 0.98, "grad_norm": 1.654309868812561, "learning_rate": 2.4512353105148322e-08, "loss": 0.7942, "step": 17059 }, { "epoch": 0.98, "grad_norm": 1.7202975749969482, "learning_rate": 2.4382539362585168e-08, "loss": 0.9671, "step": 17060 }, { "epoch": 0.98, "grad_norm": 1.6502807140350342, "learning_rate": 2.4253069850714848e-08, "loss": 0.8972, "step": 17061 }, { "epoch": 0.98, "grad_norm": 1.7878464460372925, "learning_rate": 2.412394457400491e-08, "loss": 0.9122, "step": 17062 }, { "epoch": 0.98, "grad_norm": 1.6978769302368164, "learning_rate": 2.3995163536911782e-08, "loss": 0.7872, "step": 17063 }, { "epoch": 0.98, "grad_norm": 1.6716961860656738, "learning_rate": 2.3866726743877467e-08, "loss": 0.8586, "step": 17064 }, { "epoch": 0.98, "grad_norm": 1.0738970041275024, "learning_rate": 2.37386341993362e-08, "loss": 0.5886, "step": 17065 }, { "epoch": 0.98, "grad_norm": 1.7975754737854004, "learning_rate": 2.361088590770666e-08, "loss": 0.7864, "step": 17066 }, { "epoch": 0.98, "grad_norm": 1.8742730617523193, "learning_rate": 2.348348187339755e-08, "loss": 0.8731, "step": 17067 }, { "epoch": 0.98, "grad_norm": 1.6703159809112549, "learning_rate": 2.3356422100805354e-08, "loss": 0.9394, "step": 17068 }, { "epoch": 0.98, "grad_norm": 2.0650947093963623, "learning_rate": 2.3229706594314337e-08, "loss": 0.9534, "step": 17069 }, { "epoch": 0.98, "grad_norm": 1.850548267364502, "learning_rate": 2.3103335358297672e-08, "loss": 0.9163, "step": 17070 }, { "epoch": 0.98, "grad_norm": 1.9106305837631226, "learning_rate": 2.2977308397115205e-08, "loss": 0.9422, "step": 17071 }, { "epoch": 0.98, "grad_norm": 1.8150694370269775, "learning_rate": 2.2851625715115677e-08, "loss": 0.854, "step": 17072 }, { "epoch": 0.98, "grad_norm": 2.1853013038635254, "learning_rate": 2.272628731663673e-08, "loss": 0.8768, "step": 17073 }, { "epoch": 0.98, "grad_norm": 1.7792284488677979, "learning_rate": 2.2601293206003795e-08, "loss": 0.9033, "step": 17074 }, { "epoch": 0.98, "grad_norm": 1.5997140407562256, "learning_rate": 2.2476643387528974e-08, "loss": 0.948, "step": 17075 }, { "epoch": 0.98, "grad_norm": 1.8332079648971558, "learning_rate": 2.2352337865514384e-08, "loss": 0.9094, "step": 17076 }, { "epoch": 0.98, "grad_norm": 1.7646775245666504, "learning_rate": 2.2228376644248816e-08, "loss": 0.8828, "step": 17077 }, { "epoch": 0.98, "grad_norm": 1.842896580696106, "learning_rate": 2.210475972800996e-08, "loss": 0.859, "step": 17078 }, { "epoch": 0.98, "grad_norm": 1.8573428392410278, "learning_rate": 2.1981487121064404e-08, "loss": 0.8593, "step": 17079 }, { "epoch": 0.98, "grad_norm": 1.9214030504226685, "learning_rate": 2.18585588276643e-08, "loss": 0.8884, "step": 17080 }, { "epoch": 0.98, "grad_norm": 1.898058533668518, "learning_rate": 2.173597485205403e-08, "loss": 0.9032, "step": 17081 }, { "epoch": 0.98, "grad_norm": 1.7773573398590088, "learning_rate": 2.1613735198460216e-08, "loss": 0.8774, "step": 17082 }, { "epoch": 0.98, "grad_norm": 1.8364392518997192, "learning_rate": 2.1491839871105034e-08, "loss": 0.8957, "step": 17083 }, { "epoch": 0.98, "grad_norm": 1.8194957971572876, "learning_rate": 2.137028887419068e-08, "loss": 0.9094, "step": 17084 }, { "epoch": 0.98, "grad_norm": 1.69001042842865, "learning_rate": 2.1249082211914905e-08, "loss": 0.9411, "step": 17085 }, { "epoch": 0.98, "grad_norm": 1.8862391710281372, "learning_rate": 2.1128219888457702e-08, "loss": 0.8236, "step": 17086 }, { "epoch": 0.98, "grad_norm": 1.803242564201355, "learning_rate": 2.1007701907991284e-08, "loss": 0.8839, "step": 17087 }, { "epoch": 0.98, "grad_norm": 1.7918617725372314, "learning_rate": 2.088752827467455e-08, "loss": 0.8937, "step": 17088 }, { "epoch": 0.98, "grad_norm": 1.8471431732177734, "learning_rate": 2.0767698992653072e-08, "loss": 0.8761, "step": 17089 }, { "epoch": 0.98, "grad_norm": 1.606785774230957, "learning_rate": 2.064821406606243e-08, "loss": 0.8914, "step": 17090 }, { "epoch": 0.98, "grad_norm": 1.7698849439620972, "learning_rate": 2.0529073499024886e-08, "loss": 0.9565, "step": 17091 }, { "epoch": 0.98, "grad_norm": 1.8024587631225586, "learning_rate": 2.0410277295653814e-08, "loss": 0.9026, "step": 17092 }, { "epoch": 0.98, "grad_norm": 1.7660603523254395, "learning_rate": 2.0291825460047044e-08, "loss": 0.8885, "step": 17093 }, { "epoch": 0.98, "grad_norm": 1.6082065105438232, "learning_rate": 2.0173717996291308e-08, "loss": 0.9053, "step": 17094 }, { "epoch": 0.98, "grad_norm": 1.676315426826477, "learning_rate": 2.005595490846335e-08, "loss": 0.9445, "step": 17095 }, { "epoch": 0.98, "grad_norm": 1.7303417921066284, "learning_rate": 1.993853620062769e-08, "loss": 0.8629, "step": 17096 }, { "epoch": 0.98, "grad_norm": 1.5428791046142578, "learning_rate": 1.9821461876833314e-08, "loss": 0.9133, "step": 17097 }, { "epoch": 0.98, "grad_norm": 1.6239371299743652, "learning_rate": 1.9704731941122545e-08, "loss": 0.7725, "step": 17098 }, { "epoch": 0.98, "grad_norm": 1.7116451263427734, "learning_rate": 1.9588346397523273e-08, "loss": 0.9502, "step": 17099 }, { "epoch": 0.98, "grad_norm": 1.0114779472351074, "learning_rate": 1.947230525005006e-08, "loss": 0.5726, "step": 17100 }, { "epoch": 0.98, "grad_norm": 1.896443486213684, "learning_rate": 1.9356608502708595e-08, "loss": 0.8814, "step": 17101 }, { "epoch": 0.98, "grad_norm": 1.716213583946228, "learning_rate": 1.924125615949013e-08, "loss": 0.8606, "step": 17102 }, { "epoch": 0.98, "grad_norm": 1.8351266384124756, "learning_rate": 1.912624822437592e-08, "loss": 0.9155, "step": 17103 }, { "epoch": 0.98, "grad_norm": 1.8435132503509521, "learning_rate": 1.9011584701335017e-08, "loss": 0.9169, "step": 17104 }, { "epoch": 0.98, "grad_norm": 1.8010510206222534, "learning_rate": 1.8897265594323145e-08, "loss": 0.9259, "step": 17105 }, { "epoch": 0.98, "grad_norm": 1.8487257957458496, "learning_rate": 1.8783290907286034e-08, "loss": 0.8832, "step": 17106 }, { "epoch": 0.98, "grad_norm": 1.7463833093643188, "learning_rate": 1.8669660644156094e-08, "loss": 0.9773, "step": 17107 }, { "epoch": 0.98, "grad_norm": 1.8983663320541382, "learning_rate": 1.8556374808853527e-08, "loss": 0.8695, "step": 17108 }, { "epoch": 0.98, "grad_norm": 1.585308313369751, "learning_rate": 1.8443433405289646e-08, "loss": 0.8921, "step": 17109 }, { "epoch": 0.98, "grad_norm": 1.7126920223236084, "learning_rate": 1.8330836437360222e-08, "loss": 0.8569, "step": 17110 }, { "epoch": 0.98, "grad_norm": 1.848032832145691, "learning_rate": 1.8218583908949926e-08, "loss": 0.8996, "step": 17111 }, { "epoch": 0.98, "grad_norm": 1.6332186460494995, "learning_rate": 1.810667582393455e-08, "loss": 0.8163, "step": 17112 }, { "epoch": 0.98, "grad_norm": 1.7423447370529175, "learning_rate": 1.7995112186174334e-08, "loss": 0.8045, "step": 17113 }, { "epoch": 0.98, "grad_norm": 2.0589439868927, "learning_rate": 1.7883892999518428e-08, "loss": 0.8867, "step": 17114 }, { "epoch": 0.98, "grad_norm": 1.6956969499588013, "learning_rate": 1.7773018267805976e-08, "loss": 0.9109, "step": 17115 }, { "epoch": 0.98, "grad_norm": 1.7583951950073242, "learning_rate": 1.7662487994862808e-08, "loss": 0.8571, "step": 17116 }, { "epoch": 0.98, "grad_norm": 1.729231357574463, "learning_rate": 1.7552302184502544e-08, "loss": 0.7545, "step": 17117 }, { "epoch": 0.98, "grad_norm": 1.80427885055542, "learning_rate": 1.744246084052659e-08, "loss": 0.9136, "step": 17118 }, { "epoch": 0.98, "grad_norm": 1.8782159090042114, "learning_rate": 1.7332963966726348e-08, "loss": 0.8671, "step": 17119 }, { "epoch": 0.98, "grad_norm": 1.652601957321167, "learning_rate": 1.722381156688102e-08, "loss": 0.9287, "step": 17120 }, { "epoch": 0.98, "grad_norm": 1.7854180335998535, "learning_rate": 1.711500364475538e-08, "loss": 0.9002, "step": 17121 }, { "epoch": 0.98, "grad_norm": 1.712899923324585, "learning_rate": 1.7006540204105304e-08, "loss": 0.9656, "step": 17122 }, { "epoch": 0.98, "grad_norm": 1.819166898727417, "learning_rate": 1.6898421248673357e-08, "loss": 0.8584, "step": 17123 }, { "epoch": 0.98, "grad_norm": 1.9423781633377075, "learning_rate": 1.679064678218989e-08, "loss": 0.9003, "step": 17124 }, { "epoch": 0.98, "grad_norm": 1.9733960628509521, "learning_rate": 1.668321680837415e-08, "loss": 0.8727, "step": 17125 }, { "epoch": 0.98, "grad_norm": 1.8883227109909058, "learning_rate": 1.6576131330933166e-08, "loss": 0.957, "step": 17126 }, { "epoch": 0.98, "grad_norm": 1.7456860542297363, "learning_rate": 1.6469390353562877e-08, "loss": 0.8811, "step": 17127 }, { "epoch": 0.98, "grad_norm": 1.9007147550582886, "learning_rate": 1.6362993879945888e-08, "loss": 0.9507, "step": 17128 }, { "epoch": 0.98, "grad_norm": 2.090998649597168, "learning_rate": 1.6256941913753708e-08, "loss": 0.9503, "step": 17129 }, { "epoch": 0.98, "grad_norm": 1.1105899810791016, "learning_rate": 1.615123445864564e-08, "loss": 0.5609, "step": 17130 }, { "epoch": 0.98, "grad_norm": 1.83977210521698, "learning_rate": 1.6045871518269863e-08, "loss": 0.8709, "step": 17131 }, { "epoch": 0.98, "grad_norm": 1.682094931602478, "learning_rate": 1.5940853096262366e-08, "loss": 0.9542, "step": 17132 }, { "epoch": 0.98, "grad_norm": 1.648603916168213, "learning_rate": 1.5836179196246914e-08, "loss": 0.8645, "step": 17133 }, { "epoch": 0.98, "grad_norm": 1.9157813787460327, "learning_rate": 1.5731849821833955e-08, "loss": 0.8548, "step": 17134 }, { "epoch": 0.98, "grad_norm": 1.7474228143692017, "learning_rate": 1.562786497662616e-08, "loss": 0.8565, "step": 17135 }, { "epoch": 0.98, "grad_norm": 1.7426587343215942, "learning_rate": 1.5524224664210662e-08, "loss": 0.9368, "step": 17136 }, { "epoch": 0.98, "grad_norm": 1.7092564105987549, "learning_rate": 1.5420928888163487e-08, "loss": 0.8422, "step": 17137 }, { "epoch": 0.98, "grad_norm": 2.0438761711120605, "learning_rate": 1.5317977652048455e-08, "loss": 0.8994, "step": 17138 }, { "epoch": 0.98, "grad_norm": 1.7591665983200073, "learning_rate": 1.5215370959419383e-08, "loss": 0.9301, "step": 17139 }, { "epoch": 0.98, "grad_norm": 1.7031729221343994, "learning_rate": 1.5113108813816778e-08, "loss": 0.8402, "step": 17140 }, { "epoch": 0.98, "grad_norm": 1.6974835395812988, "learning_rate": 1.501119121876893e-08, "loss": 0.8846, "step": 17141 }, { "epoch": 0.98, "grad_norm": 2.09260892868042, "learning_rate": 1.4909618177793016e-08, "loss": 0.9626, "step": 17142 }, { "epoch": 0.98, "grad_norm": 1.9540131092071533, "learning_rate": 1.4808389694394021e-08, "loss": 0.8754, "step": 17143 }, { "epoch": 0.98, "grad_norm": 1.8488633632659912, "learning_rate": 1.4707505772064701e-08, "loss": 0.9224, "step": 17144 }, { "epoch": 0.98, "grad_norm": 1.8815505504608154, "learning_rate": 1.4606966414286717e-08, "loss": 0.9979, "step": 17145 }, { "epoch": 0.98, "grad_norm": 2.037135124206543, "learning_rate": 1.4506771624529514e-08, "loss": 0.9023, "step": 17146 }, { "epoch": 0.98, "grad_norm": 1.694784164428711, "learning_rate": 1.440692140624922e-08, "loss": 0.8502, "step": 17147 }, { "epoch": 0.98, "grad_norm": 1.7239001989364624, "learning_rate": 1.4307415762893074e-08, "loss": 0.8968, "step": 17148 }, { "epoch": 0.98, "grad_norm": 1.7371407747268677, "learning_rate": 1.4208254697894996e-08, "loss": 0.8871, "step": 17149 }, { "epoch": 0.98, "grad_norm": 1.7894316911697388, "learning_rate": 1.4109438214674475e-08, "loss": 0.8576, "step": 17150 }, { "epoch": 0.98, "grad_norm": 1.828436255455017, "learning_rate": 1.4010966316643226e-08, "loss": 0.8415, "step": 17151 }, { "epoch": 0.98, "grad_norm": 1.6152567863464355, "learning_rate": 1.3912839007199641e-08, "loss": 0.8744, "step": 17152 }, { "epoch": 0.98, "grad_norm": 1.729296326637268, "learning_rate": 1.381505628972879e-08, "loss": 0.875, "step": 17153 }, { "epoch": 0.98, "grad_norm": 1.7571072578430176, "learning_rate": 1.371761816760464e-08, "loss": 0.8943, "step": 17154 }, { "epoch": 0.98, "grad_norm": 1.803439974784851, "learning_rate": 1.3620524644188949e-08, "loss": 0.9129, "step": 17155 }, { "epoch": 0.98, "grad_norm": 1.8950973749160767, "learning_rate": 1.3523775722834586e-08, "loss": 0.9404, "step": 17156 }, { "epoch": 0.98, "grad_norm": 1.7568846940994263, "learning_rate": 1.3427371406877776e-08, "loss": 0.8778, "step": 17157 }, { "epoch": 0.98, "grad_norm": 1.6658481359481812, "learning_rate": 1.3331311699646965e-08, "loss": 0.8916, "step": 17158 }, { "epoch": 0.98, "grad_norm": 1.9940531253814697, "learning_rate": 1.3235596604455058e-08, "loss": 0.9097, "step": 17159 }, { "epoch": 0.98, "grad_norm": 1.8355598449707031, "learning_rate": 1.3140226124606082e-08, "loss": 0.8967, "step": 17160 }, { "epoch": 0.98, "grad_norm": 1.6796530485153198, "learning_rate": 1.3045200263390734e-08, "loss": 0.7783, "step": 17161 }, { "epoch": 0.98, "grad_norm": 1.6376535892486572, "learning_rate": 1.2950519024088615e-08, "loss": 0.8064, "step": 17162 }, { "epoch": 0.98, "grad_norm": 1.9003660678863525, "learning_rate": 1.2856182409966e-08, "loss": 0.8903, "step": 17163 }, { "epoch": 0.98, "grad_norm": 1.8660941123962402, "learning_rate": 1.2762190424278065e-08, "loss": 0.9566, "step": 17164 }, { "epoch": 0.98, "grad_norm": 1.6976171731948853, "learning_rate": 1.266854307026888e-08, "loss": 0.86, "step": 17165 }, { "epoch": 0.98, "grad_norm": 1.8287798166275024, "learning_rate": 1.2575240351170303e-08, "loss": 0.9357, "step": 17166 }, { "epoch": 0.98, "grad_norm": 1.7769935131072998, "learning_rate": 1.2482282270200874e-08, "loss": 0.9407, "step": 17167 }, { "epoch": 0.98, "grad_norm": 2.1053519248962402, "learning_rate": 1.2389668830569135e-08, "loss": 0.8631, "step": 17168 }, { "epoch": 0.98, "grad_norm": 1.8490961790084839, "learning_rate": 1.2297400035471418e-08, "loss": 0.8591, "step": 17169 }, { "epoch": 0.98, "grad_norm": 1.7782566547393799, "learning_rate": 1.2205475888089623e-08, "loss": 0.8729, "step": 17170 }, { "epoch": 0.98, "grad_norm": 1.9943373203277588, "learning_rate": 1.2113896391597878e-08, "loss": 0.9313, "step": 17171 }, { "epoch": 0.98, "grad_norm": 1.9776736497879028, "learning_rate": 1.2022661549154769e-08, "loss": 0.8921, "step": 17172 }, { "epoch": 0.98, "grad_norm": 1.773078203201294, "learning_rate": 1.1931771363909995e-08, "loss": 0.8297, "step": 17173 }, { "epoch": 0.98, "grad_norm": 1.7074309587478638, "learning_rate": 1.1841225838998827e-08, "loss": 0.9223, "step": 17174 }, { "epoch": 0.99, "grad_norm": 1.8111144304275513, "learning_rate": 1.1751024977546543e-08, "loss": 0.885, "step": 17175 }, { "epoch": 0.99, "grad_norm": 1.7480214834213257, "learning_rate": 1.16611687826651e-08, "loss": 0.9442, "step": 17176 }, { "epoch": 0.99, "grad_norm": 1.824697732925415, "learning_rate": 1.1571657257455349e-08, "loss": 0.8634, "step": 17177 }, { "epoch": 0.99, "grad_norm": 1.7207027673721313, "learning_rate": 1.148249040500704e-08, "loss": 0.8902, "step": 17178 }, { "epoch": 0.99, "grad_norm": 1.7518699169158936, "learning_rate": 1.1393668228395494e-08, "loss": 0.8756, "step": 17179 }, { "epoch": 0.99, "grad_norm": 1.871133804321289, "learning_rate": 1.1305190730686034e-08, "loss": 0.8652, "step": 17180 }, { "epoch": 0.99, "grad_norm": 1.7560901641845703, "learning_rate": 1.1217057914932882e-08, "loss": 0.9319, "step": 17181 }, { "epoch": 0.99, "grad_norm": 1.8585501909255981, "learning_rate": 1.1129269784175833e-08, "loss": 0.9514, "step": 17182 }, { "epoch": 0.99, "grad_norm": 1.7219775915145874, "learning_rate": 1.1041826341445793e-08, "loss": 0.8165, "step": 17183 }, { "epoch": 0.99, "grad_norm": 1.6961294412612915, "learning_rate": 1.095472758975813e-08, "loss": 0.8941, "step": 17184 }, { "epoch": 0.99, "grad_norm": 1.8639365434646606, "learning_rate": 1.0867973532120436e-08, "loss": 0.9362, "step": 17185 }, { "epoch": 0.99, "grad_norm": 1.8855323791503906, "learning_rate": 1.0781564171524761e-08, "loss": 0.8431, "step": 17186 }, { "epoch": 0.99, "grad_norm": 1.7122552394866943, "learning_rate": 1.0695499510954276e-08, "loss": 0.8716, "step": 17187 }, { "epoch": 0.99, "grad_norm": 1.8580491542816162, "learning_rate": 1.060977955337772e-08, "loss": 0.8543, "step": 17188 }, { "epoch": 0.99, "grad_norm": 1.8506311178207397, "learning_rate": 1.0524404301753832e-08, "loss": 0.8649, "step": 17189 }, { "epoch": 0.99, "grad_norm": 1.7915905714035034, "learning_rate": 1.0439373759028037e-08, "loss": 0.8932, "step": 17190 }, { "epoch": 0.99, "grad_norm": 1.7270772457122803, "learning_rate": 1.0354687928134655e-08, "loss": 0.7863, "step": 17191 }, { "epoch": 0.99, "grad_norm": 1.7215646505355835, "learning_rate": 1.027034681199579e-08, "loss": 0.892, "step": 17192 }, { "epoch": 0.99, "grad_norm": 1.7147690057754517, "learning_rate": 1.018635041352245e-08, "loss": 0.9039, "step": 17193 }, { "epoch": 0.99, "grad_norm": 1.8432629108428955, "learning_rate": 1.0102698735612315e-08, "loss": 0.8989, "step": 17194 }, { "epoch": 0.99, "grad_norm": 1.7604790925979614, "learning_rate": 1.0019391781153076e-08, "loss": 0.8715, "step": 17195 }, { "epoch": 0.99, "grad_norm": 1.8725320100784302, "learning_rate": 9.93642955301799e-09, "loss": 0.8321, "step": 17196 }, { "epoch": 0.99, "grad_norm": 1.7183960676193237, "learning_rate": 9.853812054071432e-09, "loss": 0.9397, "step": 17197 }, { "epoch": 0.99, "grad_norm": 1.798171043395996, "learning_rate": 9.771539287163345e-09, "loss": 0.8991, "step": 17198 }, { "epoch": 0.99, "grad_norm": 1.7045540809631348, "learning_rate": 9.68961125513257e-09, "loss": 0.8625, "step": 17199 }, { "epoch": 0.99, "grad_norm": 1.686225175857544, "learning_rate": 9.608027960805732e-09, "loss": 0.9079, "step": 17200 }, { "epoch": 0.99, "grad_norm": 1.7695108652114868, "learning_rate": 9.52678940699947e-09, "loss": 0.9661, "step": 17201 }, { "epoch": 0.99, "grad_norm": 1.8676689863204956, "learning_rate": 9.445895596517097e-09, "loss": 0.8829, "step": 17202 }, { "epoch": 0.99, "grad_norm": 1.7154688835144043, "learning_rate": 9.3653465321486e-09, "loss": 0.8037, "step": 17203 }, { "epoch": 0.99, "grad_norm": 1.7137490510940552, "learning_rate": 9.285142216675092e-09, "loss": 0.8875, "step": 17204 }, { "epoch": 0.99, "grad_norm": 1.7711985111236572, "learning_rate": 9.205282652862135e-09, "loss": 0.8764, "step": 17205 }, { "epoch": 0.99, "grad_norm": 1.8837743997573853, "learning_rate": 9.125767843467526e-09, "loss": 0.8796, "step": 17206 }, { "epoch": 0.99, "grad_norm": 1.1066405773162842, "learning_rate": 9.046597791234624e-09, "loss": 0.5435, "step": 17207 }, { "epoch": 0.99, "grad_norm": 1.7785933017730713, "learning_rate": 8.967772498894577e-09, "loss": 0.8934, "step": 17208 }, { "epoch": 0.99, "grad_norm": 1.7028650045394897, "learning_rate": 8.889291969167435e-09, "loss": 0.92, "step": 17209 }, { "epoch": 0.99, "grad_norm": 1.9259285926818848, "learning_rate": 8.811156204762139e-09, "loss": 0.9289, "step": 17210 }, { "epoch": 0.99, "grad_norm": 1.9579336643218994, "learning_rate": 8.733365208374312e-09, "loss": 0.9149, "step": 17211 }, { "epoch": 0.99, "grad_norm": 1.8345398902893066, "learning_rate": 8.655918982689582e-09, "loss": 0.8363, "step": 17212 }, { "epoch": 0.99, "grad_norm": 1.728969931602478, "learning_rate": 8.578817530378036e-09, "loss": 0.9045, "step": 17213 }, { "epoch": 0.99, "grad_norm": 1.7376890182495117, "learning_rate": 8.5020608541031e-09, "loss": 0.7658, "step": 17214 }, { "epoch": 0.99, "grad_norm": 1.830384373664856, "learning_rate": 8.425648956510434e-09, "loss": 1.0337, "step": 17215 }, { "epoch": 0.99, "grad_norm": 1.7596899271011353, "learning_rate": 8.34958184023904e-09, "loss": 0.8931, "step": 17216 }, { "epoch": 0.99, "grad_norm": 1.8630993366241455, "learning_rate": 8.273859507913485e-09, "loss": 0.9106, "step": 17217 }, { "epoch": 0.99, "grad_norm": 1.742850422859192, "learning_rate": 8.19848196214501e-09, "loss": 0.9151, "step": 17218 }, { "epoch": 0.99, "grad_norm": 1.7061578035354614, "learning_rate": 8.123449205537093e-09, "loss": 0.8896, "step": 17219 }, { "epoch": 0.99, "grad_norm": 1.7565820217132568, "learning_rate": 8.04876124067766e-09, "loss": 0.8627, "step": 17220 }, { "epoch": 0.99, "grad_norm": 1.7931569814682007, "learning_rate": 7.974418070143541e-09, "loss": 0.9215, "step": 17221 }, { "epoch": 0.99, "grad_norm": 1.763895869255066, "learning_rate": 7.900419696500461e-09, "loss": 0.9329, "step": 17222 }, { "epoch": 0.99, "grad_norm": 1.82549250125885, "learning_rate": 7.826766122303042e-09, "loss": 0.8678, "step": 17223 }, { "epoch": 0.99, "grad_norm": 1.936253309249878, "learning_rate": 7.753457350091476e-09, "loss": 0.8659, "step": 17224 }, { "epoch": 0.99, "grad_norm": 1.8933451175689697, "learning_rate": 7.680493382395959e-09, "loss": 1.0177, "step": 17225 }, { "epoch": 0.99, "grad_norm": 1.8975743055343628, "learning_rate": 7.607874221733369e-09, "loss": 0.8707, "step": 17226 }, { "epoch": 0.99, "grad_norm": 1.8239986896514893, "learning_rate": 7.5355998706117e-09, "loss": 0.9135, "step": 17227 }, { "epoch": 0.99, "grad_norm": 1.7205934524536133, "learning_rate": 7.463670331523399e-09, "loss": 0.8547, "step": 17228 }, { "epoch": 0.99, "grad_norm": 1.576512336730957, "learning_rate": 7.392085606949817e-09, "loss": 0.8946, "step": 17229 }, { "epoch": 0.99, "grad_norm": 1.7212175130844116, "learning_rate": 7.320845699363421e-09, "loss": 0.8746, "step": 17230 }, { "epoch": 0.99, "grad_norm": 1.6207765340805054, "learning_rate": 7.249950611220025e-09, "loss": 0.8747, "step": 17231 }, { "epoch": 0.99, "grad_norm": 1.6379351615905762, "learning_rate": 7.1794003449676684e-09, "loss": 0.8458, "step": 17232 }, { "epoch": 0.99, "grad_norm": 1.757816195487976, "learning_rate": 7.109194903041073e-09, "loss": 0.9361, "step": 17233 }, { "epoch": 0.99, "grad_norm": 1.1014716625213623, "learning_rate": 7.0393342878616325e-09, "loss": 0.5234, "step": 17234 }, { "epoch": 0.99, "grad_norm": 1.861232876777649, "learning_rate": 6.969818501839643e-09, "loss": 0.8701, "step": 17235 }, { "epoch": 0.99, "grad_norm": 1.7594435214996338, "learning_rate": 6.900647547376516e-09, "loss": 0.8218, "step": 17236 }, { "epoch": 0.99, "grad_norm": 1.704912543296814, "learning_rate": 6.831821426855901e-09, "loss": 0.8592, "step": 17237 }, { "epoch": 0.99, "grad_norm": 1.8003016710281372, "learning_rate": 6.763340142654784e-09, "loss": 0.9959, "step": 17238 }, { "epoch": 0.99, "grad_norm": 1.7108994722366333, "learning_rate": 6.695203697136832e-09, "loss": 0.8467, "step": 17239 }, { "epoch": 0.99, "grad_norm": 1.994081735610962, "learning_rate": 6.6274120926512755e-09, "loss": 0.8885, "step": 17240 }, { "epoch": 0.99, "grad_norm": 1.7196333408355713, "learning_rate": 6.559965331538465e-09, "loss": 0.8546, "step": 17241 }, { "epoch": 0.99, "grad_norm": 1.8456125259399414, "learning_rate": 6.492863416125428e-09, "loss": 0.9658, "step": 17242 }, { "epoch": 0.99, "grad_norm": 1.581201434135437, "learning_rate": 6.4261063487292e-09, "loss": 0.8655, "step": 17243 }, { "epoch": 0.99, "grad_norm": 1.8731552362442017, "learning_rate": 6.3596941316501624e-09, "loss": 0.8043, "step": 17244 }, { "epoch": 0.99, "grad_norm": 1.7131097316741943, "learning_rate": 6.293626767183148e-09, "loss": 0.9556, "step": 17245 }, { "epoch": 0.99, "grad_norm": 1.699301838874817, "learning_rate": 6.2279042576074425e-09, "loss": 0.9659, "step": 17246 }, { "epoch": 0.99, "grad_norm": 1.8515162467956543, "learning_rate": 6.162526605189012e-09, "loss": 0.9216, "step": 17247 }, { "epoch": 0.99, "grad_norm": 1.698272705078125, "learning_rate": 6.097493812186051e-09, "loss": 0.9046, "step": 17248 }, { "epoch": 0.99, "grad_norm": 2.0736775398254395, "learning_rate": 6.032805880841209e-09, "loss": 0.8813, "step": 17249 }, { "epoch": 0.99, "grad_norm": 1.8179266452789307, "learning_rate": 5.9684628133871435e-09, "loss": 0.8279, "step": 17250 }, { "epoch": 0.99, "grad_norm": 1.759022831916809, "learning_rate": 5.904464612044303e-09, "loss": 0.8102, "step": 17251 }, { "epoch": 0.99, "grad_norm": 1.745347261428833, "learning_rate": 5.840811279020919e-09, "loss": 0.8959, "step": 17252 }, { "epoch": 0.99, "grad_norm": 1.802207589149475, "learning_rate": 5.777502816514124e-09, "loss": 0.8725, "step": 17253 }, { "epoch": 0.99, "grad_norm": 1.6186901330947876, "learning_rate": 5.714539226707727e-09, "loss": 0.906, "step": 17254 }, { "epoch": 0.99, "grad_norm": 1.7447986602783203, "learning_rate": 5.651920511774433e-09, "loss": 0.8402, "step": 17255 }, { "epoch": 0.99, "grad_norm": 1.7617939710617065, "learning_rate": 5.5896466738758485e-09, "loss": 0.8709, "step": 17256 }, { "epoch": 0.99, "grad_norm": 1.8744162321090698, "learning_rate": 5.527717715159142e-09, "loss": 0.8727, "step": 17257 }, { "epoch": 0.99, "grad_norm": 1.0165075063705444, "learning_rate": 5.466133637763715e-09, "loss": 0.5246, "step": 17258 }, { "epoch": 0.99, "grad_norm": 1.9690394401550293, "learning_rate": 5.404894443812314e-09, "loss": 0.9476, "step": 17259 }, { "epoch": 0.99, "grad_norm": 1.9371414184570312, "learning_rate": 5.344000135419913e-09, "loss": 0.863, "step": 17260 }, { "epoch": 0.99, "grad_norm": 1.7504358291625977, "learning_rate": 5.283450714687055e-09, "loss": 0.9737, "step": 17261 }, { "epoch": 0.99, "grad_norm": 1.7477630376815796, "learning_rate": 5.223246183703179e-09, "loss": 0.881, "step": 17262 }, { "epoch": 0.99, "grad_norm": 1.6652309894561768, "learning_rate": 5.163386544545512e-09, "loss": 0.9266, "step": 17263 }, { "epoch": 0.99, "grad_norm": 1.9076701402664185, "learning_rate": 5.10387179928018e-09, "loss": 0.9125, "step": 17264 }, { "epoch": 0.99, "grad_norm": 1.5830901861190796, "learning_rate": 5.044701949961095e-09, "loss": 0.851, "step": 17265 }, { "epoch": 0.99, "grad_norm": 1.8062587976455688, "learning_rate": 4.985876998628847e-09, "loss": 0.9222, "step": 17266 }, { "epoch": 0.99, "grad_norm": 1.8255336284637451, "learning_rate": 4.927396947315144e-09, "loss": 0.8615, "step": 17267 }, { "epoch": 0.99, "grad_norm": 1.905593752861023, "learning_rate": 4.869261798035041e-09, "loss": 0.8791, "step": 17268 }, { "epoch": 0.99, "grad_norm": 1.7742645740509033, "learning_rate": 4.811471552798042e-09, "loss": 0.93, "step": 17269 }, { "epoch": 0.99, "grad_norm": 1.9172112941741943, "learning_rate": 4.754026213595886e-09, "loss": 0.8941, "step": 17270 }, { "epoch": 0.99, "grad_norm": 1.6594692468643188, "learning_rate": 4.696925782411432e-09, "loss": 0.875, "step": 17271 }, { "epoch": 0.99, "grad_norm": 1.7498916387557983, "learning_rate": 4.640170261216437e-09, "loss": 0.9888, "step": 17272 }, { "epoch": 0.99, "grad_norm": 1.7695399522781372, "learning_rate": 4.583759651967113e-09, "loss": 0.9271, "step": 17273 }, { "epoch": 0.99, "grad_norm": 1.7547574043273926, "learning_rate": 4.5276939566119004e-09, "loss": 0.9253, "step": 17274 }, { "epoch": 0.99, "grad_norm": 2.035147190093994, "learning_rate": 4.47197317708481e-09, "loss": 0.8427, "step": 17275 }, { "epoch": 0.99, "grad_norm": 1.783646583557129, "learning_rate": 4.4165973153076355e-09, "loss": 0.9137, "step": 17276 }, { "epoch": 0.99, "grad_norm": 1.8231968879699707, "learning_rate": 4.3615663731932936e-09, "loss": 0.9847, "step": 17277 }, { "epoch": 0.99, "grad_norm": 2.018293857574463, "learning_rate": 4.306880352639154e-09, "loss": 0.8412, "step": 17278 }, { "epoch": 0.99, "grad_norm": 1.7986867427825928, "learning_rate": 4.252539255532595e-09, "loss": 0.9551, "step": 17279 }, { "epoch": 0.99, "grad_norm": 1.831531286239624, "learning_rate": 4.198543083748785e-09, "loss": 0.962, "step": 17280 }, { "epoch": 0.99, "grad_norm": 1.8445018529891968, "learning_rate": 4.144891839150678e-09, "loss": 0.9136, "step": 17281 }, { "epoch": 0.99, "grad_norm": 1.7745819091796875, "learning_rate": 4.091585523591235e-09, "loss": 0.976, "step": 17282 }, { "epoch": 0.99, "grad_norm": 1.8485870361328125, "learning_rate": 4.038624138907876e-09, "loss": 0.9329, "step": 17283 }, { "epoch": 0.99, "grad_norm": 1.834294319152832, "learning_rate": 3.9860076869291385e-09, "loss": 0.9365, "step": 17284 }, { "epoch": 0.99, "grad_norm": 1.7621880769729614, "learning_rate": 3.933736169471347e-09, "loss": 0.9385, "step": 17285 }, { "epoch": 0.99, "grad_norm": 1.733346700668335, "learning_rate": 3.881809588336394e-09, "loss": 0.9959, "step": 17286 }, { "epoch": 0.99, "grad_norm": 1.7475131750106812, "learning_rate": 3.8302279453172885e-09, "loss": 0.9017, "step": 17287 }, { "epoch": 0.99, "grad_norm": 0.9727720022201538, "learning_rate": 3.77899124219483e-09, "loss": 0.5222, "step": 17288 }, { "epoch": 0.99, "grad_norm": 1.8779473304748535, "learning_rate": 3.728099480735381e-09, "loss": 0.8489, "step": 17289 }, { "epoch": 0.99, "grad_norm": 1.8390697240829468, "learning_rate": 3.677552662695316e-09, "loss": 0.9777, "step": 17290 }, { "epoch": 0.99, "grad_norm": 1.7679295539855957, "learning_rate": 3.6273507898199057e-09, "loss": 0.8226, "step": 17291 }, { "epoch": 0.99, "grad_norm": 1.679634690284729, "learning_rate": 3.577493863841097e-09, "loss": 0.9521, "step": 17292 }, { "epoch": 0.99, "grad_norm": 1.8505855798721313, "learning_rate": 3.5279818864786264e-09, "loss": 0.9067, "step": 17293 }, { "epoch": 0.99, "grad_norm": 1.7914808988571167, "learning_rate": 3.4788148594411262e-09, "loss": 0.8174, "step": 17294 }, { "epoch": 0.99, "grad_norm": 1.918171763420105, "learning_rate": 3.429992784426128e-09, "loss": 0.9265, "step": 17295 }, { "epoch": 0.99, "grad_norm": 1.758527159690857, "learning_rate": 3.3815156631178404e-09, "loss": 0.9308, "step": 17296 }, { "epoch": 0.99, "grad_norm": 1.7671966552734375, "learning_rate": 3.3333834971882582e-09, "loss": 0.8315, "step": 17297 }, { "epoch": 0.99, "grad_norm": 1.8305507898330688, "learning_rate": 3.2855962883004968e-09, "loss": 0.9141, "step": 17298 }, { "epoch": 0.99, "grad_norm": 1.8487319946289062, "learning_rate": 3.2381540381010157e-09, "loss": 0.8533, "step": 17299 }, { "epoch": 0.99, "grad_norm": 1.8038307428359985, "learning_rate": 3.1910567482285048e-09, "loss": 0.8889, "step": 17300 }, { "epoch": 0.99, "grad_norm": 1.8449538946151733, "learning_rate": 3.1443044203072205e-09, "loss": 0.9464, "step": 17301 }, { "epoch": 0.99, "grad_norm": 1.7164820432662964, "learning_rate": 3.0978970559503164e-09, "loss": 0.8779, "step": 17302 }, { "epoch": 0.99, "grad_norm": 1.8161195516586304, "learning_rate": 3.0518346567609546e-09, "loss": 0.864, "step": 17303 }, { "epoch": 0.99, "grad_norm": 1.8048014640808105, "learning_rate": 3.0061172243267543e-09, "loss": 0.925, "step": 17304 }, { "epoch": 0.99, "grad_norm": 1.6757572889328003, "learning_rate": 2.9607447602264526e-09, "loss": 0.8084, "step": 17305 }, { "epoch": 0.99, "grad_norm": 1.8629672527313232, "learning_rate": 2.915717266025464e-09, "loss": 0.9228, "step": 17306 }, { "epoch": 0.99, "grad_norm": 1.797763466835022, "learning_rate": 2.8710347432769903e-09, "loss": 0.8502, "step": 17307 }, { "epoch": 0.99, "grad_norm": 1.6682815551757812, "learning_rate": 2.8266971935231312e-09, "loss": 0.9116, "step": 17308 }, { "epoch": 0.99, "grad_norm": 1.71448814868927, "learning_rate": 2.7827046182937744e-09, "loss": 0.8901, "step": 17309 }, { "epoch": 0.99, "grad_norm": 1.8518085479736328, "learning_rate": 2.7390570191077048e-09, "loss": 0.9126, "step": 17310 }, { "epoch": 0.99, "grad_norm": 1.6976794004440308, "learning_rate": 2.695754397470385e-09, "loss": 0.9096, "step": 17311 }, { "epoch": 0.99, "grad_norm": 1.6713060140609741, "learning_rate": 2.6527967548761747e-09, "loss": 0.9112, "step": 17312 }, { "epoch": 0.99, "grad_norm": 1.8100568056106567, "learning_rate": 2.610184092807222e-09, "loss": 0.9074, "step": 17313 }, { "epoch": 0.99, "grad_norm": 1.8403061628341675, "learning_rate": 2.5679164127345726e-09, "loss": 0.9149, "step": 17314 }, { "epoch": 0.99, "grad_norm": 1.8030495643615723, "learning_rate": 2.5259937161159485e-09, "loss": 0.8348, "step": 17315 }, { "epoch": 0.99, "grad_norm": 1.9048752784729004, "learning_rate": 2.4844160043990817e-09, "loss": 0.8158, "step": 17316 }, { "epoch": 0.99, "grad_norm": 1.5969892740249634, "learning_rate": 2.4431832790172693e-09, "loss": 0.8478, "step": 17317 }, { "epoch": 0.99, "grad_norm": 1.7424389123916626, "learning_rate": 2.402295541394928e-09, "loss": 0.9026, "step": 17318 }, { "epoch": 0.99, "grad_norm": 1.7886524200439453, "learning_rate": 2.36175279294204e-09, "loss": 0.844, "step": 17319 }, { "epoch": 0.99, "grad_norm": 1.6108866930007935, "learning_rate": 2.3215550350574877e-09, "loss": 0.868, "step": 17320 }, { "epoch": 0.99, "grad_norm": 1.8164024353027344, "learning_rate": 2.281702269129049e-09, "loss": 0.9111, "step": 17321 }, { "epoch": 0.99, "grad_norm": 1.7763748168945312, "learning_rate": 2.2421944965311803e-09, "loss": 0.9436, "step": 17322 }, { "epoch": 0.99, "grad_norm": 1.7680420875549316, "learning_rate": 2.2030317186283457e-09, "loss": 0.9507, "step": 17323 }, { "epoch": 0.99, "grad_norm": 1.6039468050003052, "learning_rate": 2.164213936770576e-09, "loss": 0.846, "step": 17324 }, { "epoch": 0.99, "grad_norm": 1.952896237373352, "learning_rate": 2.125741152297911e-09, "loss": 0.9217, "step": 17325 }, { "epoch": 0.99, "grad_norm": 2.07783579826355, "learning_rate": 2.087613366538177e-09, "loss": 0.8864, "step": 17326 }, { "epoch": 0.99, "grad_norm": 1.6380054950714111, "learning_rate": 2.0498305808069886e-09, "loss": 0.8736, "step": 17327 }, { "epoch": 0.99, "grad_norm": 1.9388900995254517, "learning_rate": 2.0123927964077473e-09, "loss": 0.8911, "step": 17328 }, { "epoch": 0.99, "grad_norm": 1.9390250444412231, "learning_rate": 1.9753000146327527e-09, "loss": 0.8948, "step": 17329 }, { "epoch": 0.99, "grad_norm": 1.7530548572540283, "learning_rate": 1.9385522367620922e-09, "loss": 0.925, "step": 17330 }, { "epoch": 0.99, "grad_norm": 1.6160852909088135, "learning_rate": 1.9021494640625302e-09, "loss": 0.7857, "step": 17331 }, { "epoch": 0.99, "grad_norm": 1.7491726875305176, "learning_rate": 1.8660916977919495e-09, "loss": 0.8592, "step": 17332 }, { "epoch": 0.99, "grad_norm": 1.7526048421859741, "learning_rate": 1.8303789391937999e-09, "loss": 0.8532, "step": 17333 }, { "epoch": 0.99, "grad_norm": 1.6760650873184204, "learning_rate": 1.7950111895004285e-09, "loss": 0.8776, "step": 17334 }, { "epoch": 0.99, "grad_norm": 1.8032807111740112, "learning_rate": 1.7599884499319708e-09, "loss": 0.8643, "step": 17335 }, { "epoch": 0.99, "grad_norm": 1.9885274171829224, "learning_rate": 1.72531072169857e-09, "loss": 0.8011, "step": 17336 }, { "epoch": 0.99, "grad_norm": 1.9964971542358398, "learning_rate": 1.6909780059948255e-09, "loss": 0.8789, "step": 17337 }, { "epoch": 0.99, "grad_norm": 1.8368868827819824, "learning_rate": 1.656990304006456e-09, "loss": 0.8164, "step": 17338 }, { "epoch": 0.99, "grad_norm": 0.9991557598114014, "learning_rate": 1.623347616905857e-09, "loss": 0.5255, "step": 17339 }, { "epoch": 0.99, "grad_norm": 1.8376662731170654, "learning_rate": 1.5900499458543216e-09, "loss": 0.921, "step": 17340 }, { "epoch": 0.99, "grad_norm": 1.7481489181518555, "learning_rate": 1.557097292000931e-09, "loss": 0.8681, "step": 17341 }, { "epoch": 0.99, "grad_norm": 1.8019217252731323, "learning_rate": 1.524489656482553e-09, "loss": 0.898, "step": 17342 }, { "epoch": 0.99, "grad_norm": 1.7518693208694458, "learning_rate": 1.4922270404238436e-09, "loss": 0.904, "step": 17343 }, { "epoch": 0.99, "grad_norm": 1.019370198249817, "learning_rate": 1.4603094449394673e-09, "loss": 0.551, "step": 17344 }, { "epoch": 0.99, "grad_norm": 1.8118783235549927, "learning_rate": 1.4287368711296546e-09, "loss": 0.9583, "step": 17345 }, { "epoch": 0.99, "grad_norm": 1.884177565574646, "learning_rate": 1.3975093200835344e-09, "loss": 0.847, "step": 17346 }, { "epoch": 0.99, "grad_norm": 1.8861427307128906, "learning_rate": 1.3666267928802435e-09, "loss": 0.9287, "step": 17347 }, { "epoch": 0.99, "grad_norm": 1.8120076656341553, "learning_rate": 1.3360892905844858e-09, "loss": 0.9, "step": 17348 }, { "epoch": 0.99, "grad_norm": 1.8624743223190308, "learning_rate": 1.305896814249863e-09, "loss": 0.8985, "step": 17349 }, { "epoch": 1.0, "grad_norm": 1.8163299560546875, "learning_rate": 1.2760493649188744e-09, "loss": 0.923, "step": 17350 }, { "epoch": 1.0, "grad_norm": 1.7511534690856934, "learning_rate": 1.246546943621807e-09, "loss": 0.8586, "step": 17351 }, { "epoch": 1.0, "grad_norm": 1.720291018486023, "learning_rate": 1.2173895513745148e-09, "loss": 0.8695, "step": 17352 }, { "epoch": 1.0, "grad_norm": 2.0976109504699707, "learning_rate": 1.188577189186191e-09, "loss": 0.9732, "step": 17353 }, { "epoch": 1.0, "grad_norm": 1.7866488695144653, "learning_rate": 1.1601098580482639e-09, "loss": 0.9215, "step": 17354 }, { "epoch": 1.0, "grad_norm": 1.6885319948196411, "learning_rate": 1.1319875589443918e-09, "loss": 0.8194, "step": 17355 }, { "epoch": 1.0, "grad_norm": 0.9684853553771973, "learning_rate": 1.1042102928460196e-09, "loss": 0.4855, "step": 17356 }, { "epoch": 1.0, "grad_norm": 1.9425514936447144, "learning_rate": 1.0767780607090494e-09, "loss": 0.8542, "step": 17357 }, { "epoch": 1.0, "grad_norm": 1.6254544258117676, "learning_rate": 1.0496908634827218e-09, "loss": 0.9099, "step": 17358 }, { "epoch": 1.0, "grad_norm": 1.193384051322937, "learning_rate": 1.0229487020996243e-09, "loss": 0.4737, "step": 17359 }, { "epoch": 1.0, "grad_norm": 1.7835793495178223, "learning_rate": 9.965515774845723e-10, "loss": 0.8662, "step": 17360 }, { "epoch": 1.0, "grad_norm": 1.7936466932296753, "learning_rate": 9.70499490546839e-10, "loss": 0.9262, "step": 17361 }, { "epoch": 1.0, "grad_norm": 1.8469680547714233, "learning_rate": 9.447924421868148e-10, "loss": 0.9731, "step": 17362 }, { "epoch": 1.0, "grad_norm": 1.8499726057052612, "learning_rate": 9.19430433290458e-10, "loss": 0.9678, "step": 17363 }, { "epoch": 1.0, "grad_norm": 1.8740984201431274, "learning_rate": 8.944134647326241e-10, "loss": 0.9736, "step": 17364 }, { "epoch": 1.0, "grad_norm": 1.848207712173462, "learning_rate": 8.697415373770668e-10, "loss": 0.9566, "step": 17365 }, { "epoch": 1.0, "grad_norm": 1.7580366134643555, "learning_rate": 8.454146520764373e-10, "loss": 0.9085, "step": 17366 }, { "epoch": 1.0, "grad_norm": 1.8541340827941895, "learning_rate": 8.214328096678437e-10, "loss": 0.857, "step": 17367 }, { "epoch": 1.0, "grad_norm": 1.8808695077896118, "learning_rate": 7.977960109806226e-10, "loss": 0.8506, "step": 17368 }, { "epoch": 1.0, "grad_norm": 1.6626999378204346, "learning_rate": 7.745042568296779e-10, "loss": 0.863, "step": 17369 }, { "epoch": 1.0, "grad_norm": 1.829413652420044, "learning_rate": 7.515575480188109e-10, "loss": 0.9216, "step": 17370 }, { "epoch": 1.0, "grad_norm": 1.7048414945602417, "learning_rate": 7.289558853407208e-10, "loss": 0.8215, "step": 17371 }, { "epoch": 1.0, "grad_norm": 1.8696891069412231, "learning_rate": 7.066992695736741e-10, "loss": 0.9779, "step": 17372 }, { "epoch": 1.0, "grad_norm": 1.8192112445831299, "learning_rate": 6.847877014870552e-10, "loss": 0.9278, "step": 17373 }, { "epoch": 1.0, "grad_norm": 1.810707688331604, "learning_rate": 6.632211818358159e-10, "loss": 0.8488, "step": 17374 }, { "epoch": 1.0, "grad_norm": 1.922389030456543, "learning_rate": 6.419997113649156e-10, "loss": 0.9139, "step": 17375 }, { "epoch": 1.0, "grad_norm": 1.8385673761367798, "learning_rate": 6.211232908071018e-10, "loss": 0.8446, "step": 17376 }, { "epoch": 1.0, "grad_norm": 1.6024255752563477, "learning_rate": 6.00591920881799e-10, "loss": 0.9176, "step": 17377 }, { "epoch": 1.0, "grad_norm": 1.808584213256836, "learning_rate": 5.804056022973292e-10, "loss": 0.935, "step": 17378 }, { "epoch": 1.0, "grad_norm": 2.0808799266815186, "learning_rate": 5.605643357520229e-10, "loss": 0.9457, "step": 17379 }, { "epoch": 1.0, "grad_norm": 0.9911454916000366, "learning_rate": 5.410681219286673e-10, "loss": 0.5222, "step": 17380 }, { "epoch": 1.0, "grad_norm": 1.739420771598816, "learning_rate": 5.219169615000574e-10, "loss": 0.9829, "step": 17381 }, { "epoch": 1.0, "grad_norm": 1.7970499992370605, "learning_rate": 5.031108551289965e-10, "loss": 0.8973, "step": 17382 }, { "epoch": 1.0, "grad_norm": 1.724595308303833, "learning_rate": 4.846498034616342e-10, "loss": 0.9242, "step": 17383 }, { "epoch": 1.0, "grad_norm": 1.6912459135055542, "learning_rate": 4.665338071374592e-10, "loss": 0.9215, "step": 17384 }, { "epoch": 1.0, "grad_norm": 1.6548596620559692, "learning_rate": 4.487628667804167e-10, "loss": 0.8796, "step": 17385 }, { "epoch": 1.0, "grad_norm": 1.742425799369812, "learning_rate": 4.3133698300446e-10, "loss": 0.7722, "step": 17386 }, { "epoch": 1.0, "grad_norm": 1.6803466081619263, "learning_rate": 4.1425615641021986e-10, "loss": 0.8334, "step": 17387 }, { "epoch": 1.0, "grad_norm": 1.7410744428634644, "learning_rate": 3.975203875861144e-10, "loss": 0.8724, "step": 17388 }, { "epoch": 1.0, "grad_norm": 1.7138065099716187, "learning_rate": 3.8112967711168013e-10, "loss": 0.9301, "step": 17389 }, { "epoch": 1.0, "grad_norm": 1.581705093383789, "learning_rate": 3.650840255520205e-10, "loss": 0.8427, "step": 17390 }, { "epoch": 1.0, "grad_norm": 1.812705397605896, "learning_rate": 3.4938343346002657e-10, "loss": 0.9257, "step": 17391 }, { "epoch": 1.0, "grad_norm": 1.7295976877212524, "learning_rate": 3.3402790137859744e-10, "loss": 0.9071, "step": 17392 }, { "epoch": 1.0, "grad_norm": 1.7323287725448608, "learning_rate": 3.190174298361992e-10, "loss": 0.9106, "step": 17393 }, { "epoch": 1.0, "grad_norm": 1.8838635683059692, "learning_rate": 3.043520193513061e-10, "loss": 0.8679, "step": 17394 }, { "epoch": 1.0, "grad_norm": 1.7170557975769043, "learning_rate": 2.9003167043017975e-10, "loss": 0.7926, "step": 17395 }, { "epoch": 1.0, "grad_norm": 1.724204182624817, "learning_rate": 2.760563835679797e-10, "loss": 0.8397, "step": 17396 }, { "epoch": 1.0, "grad_norm": 1.85691237449646, "learning_rate": 2.6242615924543245e-10, "loss": 0.9537, "step": 17397 }, { "epoch": 1.0, "grad_norm": 1.871584415435791, "learning_rate": 2.4914099793327263e-10, "loss": 0.9361, "step": 17398 }, { "epoch": 1.0, "grad_norm": 1.8563990592956543, "learning_rate": 2.3620090009002227e-10, "loss": 0.843, "step": 17399 }, { "epoch": 1.0, "grad_norm": 1.7285387516021729, "learning_rate": 2.236058661619911e-10, "loss": 0.8923, "step": 17400 }, { "epoch": 1.0, "grad_norm": 1.770787000656128, "learning_rate": 2.113558965843865e-10, "loss": 0.8035, "step": 17401 }, { "epoch": 1.0, "grad_norm": 1.6742857694625854, "learning_rate": 1.9945099178020345e-10, "loss": 0.8655, "step": 17402 }, { "epoch": 1.0, "grad_norm": 1.7817342281341553, "learning_rate": 1.8789115215911424e-10, "loss": 0.8919, "step": 17403 }, { "epoch": 1.0, "grad_norm": 1.7028727531433105, "learning_rate": 1.7667637812079918e-10, "loss": 0.8223, "step": 17404 }, { "epoch": 1.0, "grad_norm": 1.6603620052337646, "learning_rate": 1.6580667005161589e-10, "loss": 0.8239, "step": 17405 }, { "epoch": 1.0, "grad_norm": 1.749579668045044, "learning_rate": 1.5528202832681972e-10, "loss": 0.8359, "step": 17406 }, { "epoch": 1.0, "grad_norm": 1.78286874294281, "learning_rate": 1.4510245331056383e-10, "loss": 0.8856, "step": 17407 }, { "epoch": 1.0, "grad_norm": 1.8148283958435059, "learning_rate": 1.3526794535256848e-10, "loss": 0.9131, "step": 17408 }, { "epoch": 1.0, "grad_norm": 2.525618076324463, "learning_rate": 1.2577850479367214e-10, "loss": 0.8512, "step": 17409 }, { "epoch": 1.0, "grad_norm": 1.7967215776443481, "learning_rate": 1.1663413196028039e-10, "loss": 0.9035, "step": 17410 }, { "epoch": 1.0, "grad_norm": 1.115170955657959, "learning_rate": 1.0783482716880677e-10, "loss": 0.5575, "step": 17411 }, { "epoch": 1.0, "grad_norm": 1.8722461462020874, "learning_rate": 9.938059072123196e-11, "loss": 0.9315, "step": 17412 }, { "epoch": 1.0, "grad_norm": 1.7386447191238403, "learning_rate": 9.127142291176506e-11, "loss": 0.8546, "step": 17413 }, { "epoch": 1.0, "grad_norm": 1.6093006134033203, "learning_rate": 8.35073240179618e-11, "loss": 0.9021, "step": 17414 }, { "epoch": 1.0, "grad_norm": 1.8057047128677368, "learning_rate": 7.60882943096064e-11, "loss": 0.887, "step": 17415 }, { "epoch": 1.0, "grad_norm": 2.009599208831787, "learning_rate": 6.901433404093993e-11, "loss": 0.933, "step": 17416 }, { "epoch": 1.0, "grad_norm": 1.808866024017334, "learning_rate": 6.228544345732168e-11, "loss": 0.8222, "step": 17417 }, { "epoch": 1.0, "grad_norm": 1.743621587753296, "learning_rate": 5.5901622789678035e-11, "loss": 0.9033, "step": 17418 }, { "epoch": 1.0, "grad_norm": 1.8964340686798096, "learning_rate": 4.98628722600536e-11, "loss": 0.8462, "step": 17419 }, { "epoch": 1.0, "grad_norm": 1.7574759721755981, "learning_rate": 4.416919207606007e-11, "loss": 0.9225, "step": 17420 }, { "epoch": 1.0, "grad_norm": 1.6010372638702393, "learning_rate": 3.8820582434206944e-11, "loss": 0.7561, "step": 17421 }, { "epoch": 1.0, "grad_norm": 1.743086576461792, "learning_rate": 3.3817043517681e-11, "loss": 0.8319, "step": 17422 }, { "epoch": 1.0, "grad_norm": 1.7555707693099976, "learning_rate": 2.915857550078727e-11, "loss": 0.8599, "step": 17423 }, { "epoch": 1.0, "grad_norm": 1.6431801319122314, "learning_rate": 2.4845178544508074e-11, "loss": 0.914, "step": 17424 }, { "epoch": 1.0, "grad_norm": 1.865052342414856, "learning_rate": 2.087685279650309e-11, "loss": 0.8689, "step": 17425 }, { "epoch": 1.0, "grad_norm": 1.6582229137420654, "learning_rate": 1.7253598394439965e-11, "loss": 0.8424, "step": 17426 }, { "epoch": 1.0, "grad_norm": 3.0003459453582764, "learning_rate": 1.3975415462663678e-11, "loss": 0.8006, "step": 17427 }, { "epoch": 1.0, "grad_norm": 1.7288761138916016, "learning_rate": 1.1042304115527203e-11, "loss": 0.8445, "step": 17428 }, { "epoch": 1.0, "grad_norm": 1.8551204204559326, "learning_rate": 8.454264452950612e-12, "loss": 0.9486, "step": 17429 }, { "epoch": 1.0, "grad_norm": 1.8679002523422241, "learning_rate": 6.211296564861969e-12, "loss": 0.8834, "step": 17430 }, { "epoch": 1.0, "grad_norm": 1.8377928733825684, "learning_rate": 4.313400528976886e-12, "loss": 0.9727, "step": 17431 }, { "epoch": 1.0, "grad_norm": 1.751551628112793, "learning_rate": 2.7605764096882983e-12, "loss": 0.8406, "step": 17432 }, { "epoch": 1.0, "grad_norm": 1.9908881187438965, "learning_rate": 1.5528242613971345e-12, "loss": 0.9007, "step": 17433 }, { "epoch": 1.0, "grad_norm": 1.6909549236297607, "learning_rate": 6.901441262918696e-13, "loss": 0.9673, "step": 17434 }, { "epoch": 1.0, "grad_norm": 1.9202371835708618, "learning_rate": 1.7253603323830192e-13, "loss": 0.9763, "step": 17435 }, { "epoch": 1.0, "grad_norm": 2.094381332397461, "learning_rate": 0.0, "loss": 0.7425, "step": 17436 }, { "epoch": 1.0, "step": 17436, "total_flos": 6.360551734822673e+19, "train_loss": 0.9591581591121454, "train_runtime": 243882.3408, "train_samples_per_second": 9.151, "train_steps_per_second": 0.071 } ], "logging_steps": 1.0, "max_steps": 17436, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 6.360551734822673e+19, "train_batch_size": 2, "trial_name": null, "trial_params": null }