{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 9831, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 25.774307163187583, "learning_rate": 6.779661016949153e-08, "loss": 2.4261, "step": 1 }, { "epoch": 0.0, "grad_norm": 26.21884882896487, "learning_rate": 1.3559322033898305e-07, "loss": 2.4695, "step": 2 }, { "epoch": 0.0, "grad_norm": 16.482975314694887, "learning_rate": 2.0338983050847458e-07, "loss": 1.8327, "step": 3 }, { "epoch": 0.0, "grad_norm": 21.85451201104805, "learning_rate": 2.711864406779661e-07, "loss": 2.0734, "step": 4 }, { "epoch": 0.0, "grad_norm": 23.270952415339217, "learning_rate": 3.3898305084745766e-07, "loss": 2.236, "step": 5 }, { "epoch": 0.0, "grad_norm": 22.87877942813057, "learning_rate": 4.0677966101694916e-07, "loss": 2.2076, "step": 6 }, { "epoch": 0.0, "grad_norm": 21.776727913056515, "learning_rate": 4.745762711864407e-07, "loss": 2.1196, "step": 7 }, { "epoch": 0.0, "grad_norm": 23.25224020480987, "learning_rate": 5.423728813559322e-07, "loss": 2.345, "step": 8 }, { "epoch": 0.0, "grad_norm": 27.33668116834911, "learning_rate": 6.101694915254238e-07, "loss": 2.3076, "step": 9 }, { "epoch": 0.0, "grad_norm": 19.181907330178706, "learning_rate": 6.779661016949153e-07, "loss": 2.141, "step": 10 }, { "epoch": 0.0, "grad_norm": 23.615130567617697, "learning_rate": 7.457627118644069e-07, "loss": 2.2639, "step": 11 }, { "epoch": 0.0, "grad_norm": 26.579337894350388, "learning_rate": 8.135593220338983e-07, "loss": 2.2511, "step": 12 }, { "epoch": 0.0, "grad_norm": 23.051745481180966, "learning_rate": 8.813559322033899e-07, "loss": 2.0932, "step": 13 }, { "epoch": 0.0, "grad_norm": 23.07477432582864, "learning_rate": 9.491525423728814e-07, "loss": 2.1369, "step": 14 }, { "epoch": 0.0, "grad_norm": 21.56621520785092, "learning_rate": 1.016949152542373e-06, "loss": 2.1692, "step": 15 }, { "epoch": 0.0, "grad_norm": 21.349853220880675, "learning_rate": 1.0847457627118644e-06, "loss": 1.9588, "step": 16 }, { "epoch": 0.0, "grad_norm": 13.978393855508065, "learning_rate": 1.152542372881356e-06, "loss": 1.7003, "step": 17 }, { "epoch": 0.0, "grad_norm": 15.129076262634308, "learning_rate": 1.2203389830508477e-06, "loss": 1.9801, "step": 18 }, { "epoch": 0.0, "grad_norm": 13.20744557420552, "learning_rate": 1.288135593220339e-06, "loss": 1.7939, "step": 19 }, { "epoch": 0.0, "grad_norm": 11.203685104742611, "learning_rate": 1.3559322033898307e-06, "loss": 1.5948, "step": 20 }, { "epoch": 0.0, "grad_norm": 10.654869222087148, "learning_rate": 1.4237288135593222e-06, "loss": 1.6056, "step": 21 }, { "epoch": 0.0, "grad_norm": 11.814571724474261, "learning_rate": 1.4915254237288139e-06, "loss": 1.7941, "step": 22 }, { "epoch": 0.0, "grad_norm": 8.530069326697427, "learning_rate": 1.5593220338983054e-06, "loss": 1.2375, "step": 23 }, { "epoch": 0.0, "grad_norm": 10.32660567477036, "learning_rate": 1.6271186440677967e-06, "loss": 1.2946, "step": 24 }, { "epoch": 0.0, "grad_norm": 10.990179490365163, "learning_rate": 1.6949152542372882e-06, "loss": 1.1305, "step": 25 }, { "epoch": 0.0, "grad_norm": 6.380671795900115, "learning_rate": 1.7627118644067799e-06, "loss": 0.9932, "step": 26 }, { "epoch": 0.0, "grad_norm": 7.844307063285582, "learning_rate": 1.8305084745762714e-06, "loss": 1.0589, "step": 27 }, { "epoch": 0.0, "grad_norm": 5.994303435945046, "learning_rate": 1.8983050847457629e-06, "loss": 0.9073, "step": 28 }, { "epoch": 0.0, "grad_norm": 6.551427417951352, "learning_rate": 1.9661016949152544e-06, "loss": 1.0221, "step": 29 }, { "epoch": 0.0, "grad_norm": 5.42154176934472, "learning_rate": 2.033898305084746e-06, "loss": 0.9045, "step": 30 }, { "epoch": 0.0, "grad_norm": 3.6207875594724706, "learning_rate": 2.1016949152542374e-06, "loss": 0.8139, "step": 31 }, { "epoch": 0.0, "grad_norm": 2.1824465930868917, "learning_rate": 2.169491525423729e-06, "loss": 0.669, "step": 32 }, { "epoch": 0.0, "grad_norm": 2.706012292777276, "learning_rate": 2.2372881355932204e-06, "loss": 0.8022, "step": 33 }, { "epoch": 0.0, "grad_norm": 3.8251403517668487, "learning_rate": 2.305084745762712e-06, "loss": 0.8254, "step": 34 }, { "epoch": 0.0, "grad_norm": 2.984073023660238, "learning_rate": 2.372881355932204e-06, "loss": 0.9779, "step": 35 }, { "epoch": 0.0, "grad_norm": 2.4188064951429475, "learning_rate": 2.4406779661016953e-06, "loss": 0.7361, "step": 36 }, { "epoch": 0.0, "grad_norm": 2.3516733355792843, "learning_rate": 2.5084745762711864e-06, "loss": 0.8315, "step": 37 }, { "epoch": 0.0, "grad_norm": 2.2496309758042417, "learning_rate": 2.576271186440678e-06, "loss": 0.8022, "step": 38 }, { "epoch": 0.0, "grad_norm": 2.420808155220749, "learning_rate": 2.64406779661017e-06, "loss": 0.8303, "step": 39 }, { "epoch": 0.0, "grad_norm": 2.235898259469345, "learning_rate": 2.7118644067796613e-06, "loss": 0.8828, "step": 40 }, { "epoch": 0.0, "grad_norm": 2.259722995408095, "learning_rate": 2.779661016949153e-06, "loss": 0.7345, "step": 41 }, { "epoch": 0.0, "grad_norm": 2.5220575903145717, "learning_rate": 2.8474576271186443e-06, "loss": 0.8884, "step": 42 }, { "epoch": 0.0, "grad_norm": 2.01432745835319, "learning_rate": 2.915254237288136e-06, "loss": 0.716, "step": 43 }, { "epoch": 0.0, "grad_norm": 2.3912698546304783, "learning_rate": 2.9830508474576277e-06, "loss": 0.8186, "step": 44 }, { "epoch": 0.0, "grad_norm": 2.2176167549704413, "learning_rate": 3.0508474576271192e-06, "loss": 0.8315, "step": 45 }, { "epoch": 0.0, "grad_norm": 2.3542559647297248, "learning_rate": 3.1186440677966107e-06, "loss": 0.8938, "step": 46 }, { "epoch": 0.0, "grad_norm": 2.408850666287898, "learning_rate": 3.186440677966102e-06, "loss": 0.7443, "step": 47 }, { "epoch": 0.0, "grad_norm": 1.9403993418379064, "learning_rate": 3.2542372881355933e-06, "loss": 0.7316, "step": 48 }, { "epoch": 0.0, "grad_norm": 2.2266120044077855, "learning_rate": 3.322033898305085e-06, "loss": 0.823, "step": 49 }, { "epoch": 0.01, "grad_norm": 2.2149796287947052, "learning_rate": 3.3898305084745763e-06, "loss": 0.8493, "step": 50 }, { "epoch": 0.01, "grad_norm": 1.964248261933294, "learning_rate": 3.457627118644068e-06, "loss": 0.7657, "step": 51 }, { "epoch": 0.01, "grad_norm": 2.1369395484319402, "learning_rate": 3.5254237288135597e-06, "loss": 0.7717, "step": 52 }, { "epoch": 0.01, "grad_norm": 1.9324624073386334, "learning_rate": 3.5932203389830512e-06, "loss": 0.7885, "step": 53 }, { "epoch": 0.01, "grad_norm": 1.7645971001845546, "learning_rate": 3.6610169491525427e-06, "loss": 0.6827, "step": 54 }, { "epoch": 0.01, "grad_norm": 1.9794442176736449, "learning_rate": 3.7288135593220342e-06, "loss": 0.7341, "step": 55 }, { "epoch": 0.01, "grad_norm": 2.205253413175247, "learning_rate": 3.7966101694915257e-06, "loss": 0.8477, "step": 56 }, { "epoch": 0.01, "grad_norm": 1.9368774918084077, "learning_rate": 3.864406779661018e-06, "loss": 0.7182, "step": 57 }, { "epoch": 0.01, "grad_norm": 1.8331552352287894, "learning_rate": 3.932203389830509e-06, "loss": 0.6232, "step": 58 }, { "epoch": 0.01, "grad_norm": 2.1664795950928943, "learning_rate": 4.000000000000001e-06, "loss": 0.7919, "step": 59 }, { "epoch": 0.01, "grad_norm": 1.9510344143430243, "learning_rate": 4.067796610169492e-06, "loss": 0.7199, "step": 60 }, { "epoch": 0.01, "grad_norm": 2.1394368935709833, "learning_rate": 4.135593220338983e-06, "loss": 0.846, "step": 61 }, { "epoch": 0.01, "grad_norm": 1.9579721708838056, "learning_rate": 4.203389830508475e-06, "loss": 0.7141, "step": 62 }, { "epoch": 0.01, "grad_norm": 2.257998109160227, "learning_rate": 4.271186440677967e-06, "loss": 0.7374, "step": 63 }, { "epoch": 0.01, "grad_norm": 1.9561971563274394, "learning_rate": 4.338983050847458e-06, "loss": 0.6967, "step": 64 }, { "epoch": 0.01, "grad_norm": 1.961780054829146, "learning_rate": 4.40677966101695e-06, "loss": 0.8134, "step": 65 }, { "epoch": 0.01, "grad_norm": 1.9708755542520096, "learning_rate": 4.474576271186441e-06, "loss": 0.6124, "step": 66 }, { "epoch": 0.01, "grad_norm": 2.1606992603731294, "learning_rate": 4.542372881355933e-06, "loss": 0.729, "step": 67 }, { "epoch": 0.01, "grad_norm": 2.0759273723089726, "learning_rate": 4.610169491525424e-06, "loss": 0.7483, "step": 68 }, { "epoch": 0.01, "grad_norm": 2.19293232912226, "learning_rate": 4.677966101694916e-06, "loss": 0.7522, "step": 69 }, { "epoch": 0.01, "grad_norm": 1.8359859772491813, "learning_rate": 4.745762711864408e-06, "loss": 0.7899, "step": 70 }, { "epoch": 0.01, "grad_norm": 2.3845629041250707, "learning_rate": 4.813559322033899e-06, "loss": 0.7246, "step": 71 }, { "epoch": 0.01, "grad_norm": 2.1085590957497846, "learning_rate": 4.881355932203391e-06, "loss": 0.7739, "step": 72 }, { "epoch": 0.01, "grad_norm": 1.8918037708327846, "learning_rate": 4.949152542372882e-06, "loss": 0.7698, "step": 73 }, { "epoch": 0.01, "grad_norm": 2.0067874509675687, "learning_rate": 5.016949152542373e-06, "loss": 0.7346, "step": 74 }, { "epoch": 0.01, "grad_norm": 2.119502972604516, "learning_rate": 5.084745762711865e-06, "loss": 0.8479, "step": 75 }, { "epoch": 0.01, "grad_norm": 1.861663786810975, "learning_rate": 5.152542372881356e-06, "loss": 0.6784, "step": 76 }, { "epoch": 0.01, "grad_norm": 1.9695486836646774, "learning_rate": 5.220338983050848e-06, "loss": 0.7107, "step": 77 }, { "epoch": 0.01, "grad_norm": 2.171762088984204, "learning_rate": 5.28813559322034e-06, "loss": 0.7572, "step": 78 }, { "epoch": 0.01, "grad_norm": 2.1380903015319648, "learning_rate": 5.355932203389831e-06, "loss": 0.7518, "step": 79 }, { "epoch": 0.01, "grad_norm": 2.020115592796084, "learning_rate": 5.423728813559323e-06, "loss": 0.7172, "step": 80 }, { "epoch": 0.01, "grad_norm": 2.3246993299029914, "learning_rate": 5.491525423728814e-06, "loss": 0.7228, "step": 81 }, { "epoch": 0.01, "grad_norm": 2.0860284304875973, "learning_rate": 5.559322033898306e-06, "loss": 0.7337, "step": 82 }, { "epoch": 0.01, "grad_norm": 2.105468963098628, "learning_rate": 5.6271186440677975e-06, "loss": 0.8502, "step": 83 }, { "epoch": 0.01, "grad_norm": 1.9810307968538619, "learning_rate": 5.694915254237289e-06, "loss": 0.7639, "step": 84 }, { "epoch": 0.01, "grad_norm": 2.42547864581517, "learning_rate": 5.7627118644067805e-06, "loss": 0.8198, "step": 85 }, { "epoch": 0.01, "grad_norm": 1.9843324749634157, "learning_rate": 5.830508474576272e-06, "loss": 0.6272, "step": 86 }, { "epoch": 0.01, "grad_norm": 1.8295596514924568, "learning_rate": 5.8983050847457635e-06, "loss": 0.6614, "step": 87 }, { "epoch": 0.01, "grad_norm": 2.2018819268618697, "learning_rate": 5.9661016949152555e-06, "loss": 0.8396, "step": 88 }, { "epoch": 0.01, "grad_norm": 2.014841623849869, "learning_rate": 6.0338983050847465e-06, "loss": 0.7825, "step": 89 }, { "epoch": 0.01, "grad_norm": 2.1391290710966953, "learning_rate": 6.1016949152542385e-06, "loss": 0.6737, "step": 90 }, { "epoch": 0.01, "grad_norm": 1.844927353698065, "learning_rate": 6.1694915254237295e-06, "loss": 0.7386, "step": 91 }, { "epoch": 0.01, "grad_norm": 2.4827254198905573, "learning_rate": 6.2372881355932215e-06, "loss": 0.9183, "step": 92 }, { "epoch": 0.01, "grad_norm": 2.1865083435662775, "learning_rate": 6.3050847457627125e-06, "loss": 0.6657, "step": 93 }, { "epoch": 0.01, "grad_norm": 2.22914258959008, "learning_rate": 6.372881355932204e-06, "loss": 0.7878, "step": 94 }, { "epoch": 0.01, "grad_norm": 2.0465478945502737, "learning_rate": 6.440677966101695e-06, "loss": 0.7384, "step": 95 }, { "epoch": 0.01, "grad_norm": 2.150182007226782, "learning_rate": 6.508474576271187e-06, "loss": 0.7675, "step": 96 }, { "epoch": 0.01, "grad_norm": 2.20138713129849, "learning_rate": 6.576271186440678e-06, "loss": 0.7957, "step": 97 }, { "epoch": 0.01, "grad_norm": 2.1717654881518405, "learning_rate": 6.64406779661017e-06, "loss": 0.7656, "step": 98 }, { "epoch": 0.01, "grad_norm": 2.1766599305935683, "learning_rate": 6.7118644067796615e-06, "loss": 0.8074, "step": 99 }, { "epoch": 0.01, "grad_norm": 2.250178867344064, "learning_rate": 6.779661016949153e-06, "loss": 0.7486, "step": 100 }, { "epoch": 0.01, "grad_norm": 2.1036428186662457, "learning_rate": 6.8474576271186445e-06, "loss": 0.7276, "step": 101 }, { "epoch": 0.01, "grad_norm": 2.231120104885962, "learning_rate": 6.915254237288136e-06, "loss": 0.7694, "step": 102 }, { "epoch": 0.01, "grad_norm": 1.8320331250168351, "learning_rate": 6.9830508474576275e-06, "loss": 0.7384, "step": 103 }, { "epoch": 0.01, "grad_norm": 2.07040534417232, "learning_rate": 7.0508474576271195e-06, "loss": 0.6566, "step": 104 }, { "epoch": 0.01, "grad_norm": 2.052812058503903, "learning_rate": 7.1186440677966106e-06, "loss": 0.761, "step": 105 }, { "epoch": 0.01, "grad_norm": 2.1483979394263417, "learning_rate": 7.1864406779661025e-06, "loss": 0.7733, "step": 106 }, { "epoch": 0.01, "grad_norm": 2.029441020419923, "learning_rate": 7.2542372881355936e-06, "loss": 0.6775, "step": 107 }, { "epoch": 0.01, "grad_norm": 2.0568628127678794, "learning_rate": 7.3220338983050855e-06, "loss": 0.6425, "step": 108 }, { "epoch": 0.01, "grad_norm": 2.179569873526594, "learning_rate": 7.3898305084745766e-06, "loss": 0.7068, "step": 109 }, { "epoch": 0.01, "grad_norm": 1.9555856066898845, "learning_rate": 7.4576271186440685e-06, "loss": 0.7595, "step": 110 }, { "epoch": 0.01, "grad_norm": 2.3202948797607554, "learning_rate": 7.52542372881356e-06, "loss": 0.8602, "step": 111 }, { "epoch": 0.01, "grad_norm": 2.0643122915003107, "learning_rate": 7.5932203389830515e-06, "loss": 0.791, "step": 112 }, { "epoch": 0.01, "grad_norm": 2.2126598533146677, "learning_rate": 7.661016949152543e-06, "loss": 0.7064, "step": 113 }, { "epoch": 0.01, "grad_norm": 2.4298478880835463, "learning_rate": 7.728813559322035e-06, "loss": 0.7869, "step": 114 }, { "epoch": 0.01, "grad_norm": 2.288531605902557, "learning_rate": 7.796610169491526e-06, "loss": 0.896, "step": 115 }, { "epoch": 0.01, "grad_norm": 2.2745017660805855, "learning_rate": 7.864406779661017e-06, "loss": 0.6802, "step": 116 }, { "epoch": 0.01, "grad_norm": 1.6570737256402928, "learning_rate": 7.93220338983051e-06, "loss": 0.6276, "step": 117 }, { "epoch": 0.01, "grad_norm": 2.1733978454998923, "learning_rate": 8.000000000000001e-06, "loss": 0.7266, "step": 118 }, { "epoch": 0.01, "grad_norm": 2.142950183320776, "learning_rate": 8.067796610169492e-06, "loss": 0.7994, "step": 119 }, { "epoch": 0.01, "grad_norm": 2.1131892760021556, "learning_rate": 8.135593220338983e-06, "loss": 0.7306, "step": 120 }, { "epoch": 0.01, "grad_norm": 1.9104523566550864, "learning_rate": 8.203389830508475e-06, "loss": 0.7925, "step": 121 }, { "epoch": 0.01, "grad_norm": 1.9144516822731146, "learning_rate": 8.271186440677966e-06, "loss": 0.7236, "step": 122 }, { "epoch": 0.01, "grad_norm": 2.1479589283189173, "learning_rate": 8.338983050847458e-06, "loss": 0.7482, "step": 123 }, { "epoch": 0.01, "grad_norm": 2.154494552349858, "learning_rate": 8.40677966101695e-06, "loss": 0.689, "step": 124 }, { "epoch": 0.01, "grad_norm": 1.9714017351330468, "learning_rate": 8.47457627118644e-06, "loss": 0.7369, "step": 125 }, { "epoch": 0.01, "grad_norm": 1.988551787371157, "learning_rate": 8.542372881355933e-06, "loss": 0.7643, "step": 126 }, { "epoch": 0.01, "grad_norm": 2.325041578076891, "learning_rate": 8.610169491525424e-06, "loss": 0.7964, "step": 127 }, { "epoch": 0.01, "grad_norm": 2.176814754004798, "learning_rate": 8.677966101694915e-06, "loss": 0.7643, "step": 128 }, { "epoch": 0.01, "grad_norm": 2.0824812157544645, "learning_rate": 8.745762711864407e-06, "loss": 0.7471, "step": 129 }, { "epoch": 0.01, "grad_norm": 1.9684242759673636, "learning_rate": 8.8135593220339e-06, "loss": 0.8099, "step": 130 }, { "epoch": 0.01, "grad_norm": 2.0678745627639197, "learning_rate": 8.88135593220339e-06, "loss": 0.8663, "step": 131 }, { "epoch": 0.01, "grad_norm": 2.060252010222962, "learning_rate": 8.949152542372881e-06, "loss": 0.7509, "step": 132 }, { "epoch": 0.01, "grad_norm": 1.8677177304612413, "learning_rate": 9.016949152542374e-06, "loss": 0.719, "step": 133 }, { "epoch": 0.01, "grad_norm": 2.0650299093701965, "learning_rate": 9.084745762711865e-06, "loss": 0.6587, "step": 134 }, { "epoch": 0.01, "grad_norm": 2.157297646435543, "learning_rate": 9.152542372881356e-06, "loss": 0.7644, "step": 135 }, { "epoch": 0.01, "grad_norm": 1.9208876885040105, "learning_rate": 9.220338983050847e-06, "loss": 0.7299, "step": 136 }, { "epoch": 0.01, "grad_norm": 2.1636200591275476, "learning_rate": 9.28813559322034e-06, "loss": 0.8205, "step": 137 }, { "epoch": 0.01, "grad_norm": 2.240165955523122, "learning_rate": 9.355932203389831e-06, "loss": 0.7958, "step": 138 }, { "epoch": 0.01, "grad_norm": 2.3038562909608458, "learning_rate": 9.423728813559322e-06, "loss": 0.7342, "step": 139 }, { "epoch": 0.01, "grad_norm": 1.8045909570080936, "learning_rate": 9.491525423728815e-06, "loss": 0.6016, "step": 140 }, { "epoch": 0.01, "grad_norm": 1.8861330770480502, "learning_rate": 9.559322033898306e-06, "loss": 0.7197, "step": 141 }, { "epoch": 0.01, "grad_norm": 1.973267688604536, "learning_rate": 9.627118644067797e-06, "loss": 0.7593, "step": 142 }, { "epoch": 0.01, "grad_norm": 1.8698474554660958, "learning_rate": 9.69491525423729e-06, "loss": 0.6507, "step": 143 }, { "epoch": 0.01, "grad_norm": 2.0010518197546987, "learning_rate": 9.762711864406781e-06, "loss": 0.7075, "step": 144 }, { "epoch": 0.01, "grad_norm": 1.9194243510439397, "learning_rate": 9.830508474576272e-06, "loss": 0.802, "step": 145 }, { "epoch": 0.01, "grad_norm": 2.168846656888089, "learning_rate": 9.898305084745763e-06, "loss": 0.7935, "step": 146 }, { "epoch": 0.01, "grad_norm": 2.125833442803335, "learning_rate": 9.966101694915256e-06, "loss": 0.7864, "step": 147 }, { "epoch": 0.02, "grad_norm": 2.102996254972869, "learning_rate": 1.0033898305084746e-05, "loss": 0.7078, "step": 148 }, { "epoch": 0.02, "grad_norm": 2.1987112323642615, "learning_rate": 1.0101694915254238e-05, "loss": 0.7534, "step": 149 }, { "epoch": 0.02, "grad_norm": 1.815570288794822, "learning_rate": 1.016949152542373e-05, "loss": 0.8229, "step": 150 }, { "epoch": 0.02, "grad_norm": 2.1419109745500293, "learning_rate": 1.0237288135593222e-05, "loss": 0.7117, "step": 151 }, { "epoch": 0.02, "grad_norm": 2.1575782595640476, "learning_rate": 1.0305084745762712e-05, "loss": 0.8649, "step": 152 }, { "epoch": 0.02, "grad_norm": 2.3369718017851002, "learning_rate": 1.0372881355932204e-05, "loss": 0.841, "step": 153 }, { "epoch": 0.02, "grad_norm": 1.89934679393495, "learning_rate": 1.0440677966101695e-05, "loss": 0.6265, "step": 154 }, { "epoch": 0.02, "grad_norm": 2.116565213986925, "learning_rate": 1.0508474576271188e-05, "loss": 0.724, "step": 155 }, { "epoch": 0.02, "grad_norm": 1.982021753415292, "learning_rate": 1.057627118644068e-05, "loss": 0.789, "step": 156 }, { "epoch": 0.02, "grad_norm": 2.1139135488892666, "learning_rate": 1.0644067796610172e-05, "loss": 0.8074, "step": 157 }, { "epoch": 0.02, "grad_norm": 2.118524566834072, "learning_rate": 1.0711864406779661e-05, "loss": 0.7203, "step": 158 }, { "epoch": 0.02, "grad_norm": 2.12820506314938, "learning_rate": 1.0779661016949154e-05, "loss": 0.7984, "step": 159 }, { "epoch": 0.02, "grad_norm": 1.9423385020630755, "learning_rate": 1.0847457627118645e-05, "loss": 0.6801, "step": 160 }, { "epoch": 0.02, "grad_norm": 2.1069618355533097, "learning_rate": 1.0915254237288135e-05, "loss": 0.6958, "step": 161 }, { "epoch": 0.02, "grad_norm": 2.2371552107869843, "learning_rate": 1.0983050847457627e-05, "loss": 0.5289, "step": 162 }, { "epoch": 0.02, "grad_norm": 1.904024031354274, "learning_rate": 1.1050847457627118e-05, "loss": 0.6713, "step": 163 }, { "epoch": 0.02, "grad_norm": 2.127282166318957, "learning_rate": 1.1118644067796611e-05, "loss": 0.7227, "step": 164 }, { "epoch": 0.02, "grad_norm": 2.142497901612626, "learning_rate": 1.1186440677966102e-05, "loss": 0.5906, "step": 165 }, { "epoch": 0.02, "grad_norm": 2.057330453800443, "learning_rate": 1.1254237288135595e-05, "loss": 0.6901, "step": 166 }, { "epoch": 0.02, "grad_norm": 2.093010939189739, "learning_rate": 1.1322033898305084e-05, "loss": 0.7951, "step": 167 }, { "epoch": 0.02, "grad_norm": 1.9107896944652738, "learning_rate": 1.1389830508474577e-05, "loss": 0.7762, "step": 168 }, { "epoch": 0.02, "grad_norm": 2.27565600735753, "learning_rate": 1.1457627118644068e-05, "loss": 0.7201, "step": 169 }, { "epoch": 0.02, "grad_norm": 2.078256587268634, "learning_rate": 1.1525423728813561e-05, "loss": 0.7271, "step": 170 }, { "epoch": 0.02, "grad_norm": 1.976498895581692, "learning_rate": 1.159322033898305e-05, "loss": 0.7205, "step": 171 }, { "epoch": 0.02, "grad_norm": 2.0274252035867497, "learning_rate": 1.1661016949152543e-05, "loss": 0.7542, "step": 172 }, { "epoch": 0.02, "grad_norm": 2.248882635591796, "learning_rate": 1.1728813559322034e-05, "loss": 0.7563, "step": 173 }, { "epoch": 0.02, "grad_norm": 2.152946416343347, "learning_rate": 1.1796610169491527e-05, "loss": 0.8891, "step": 174 }, { "epoch": 0.02, "grad_norm": 1.9614188493029023, "learning_rate": 1.1864406779661018e-05, "loss": 0.6927, "step": 175 }, { "epoch": 0.02, "grad_norm": 1.9561016698575437, "learning_rate": 1.1932203389830511e-05, "loss": 0.7323, "step": 176 }, { "epoch": 0.02, "grad_norm": 2.2060339927740333, "learning_rate": 1.2e-05, "loss": 0.7635, "step": 177 }, { "epoch": 0.02, "grad_norm": 2.361699969994298, "learning_rate": 1.2067796610169493e-05, "loss": 0.7074, "step": 178 }, { "epoch": 0.02, "grad_norm": 2.2826526992705576, "learning_rate": 1.2135593220338984e-05, "loss": 0.8152, "step": 179 }, { "epoch": 0.02, "grad_norm": 2.3383409650671783, "learning_rate": 1.2203389830508477e-05, "loss": 0.7099, "step": 180 }, { "epoch": 0.02, "grad_norm": 2.314928794545433, "learning_rate": 1.2271186440677966e-05, "loss": 0.8114, "step": 181 }, { "epoch": 0.02, "grad_norm": 2.1182102763924564, "learning_rate": 1.2338983050847459e-05, "loss": 0.7584, "step": 182 }, { "epoch": 0.02, "grad_norm": 1.9427488854499315, "learning_rate": 1.240677966101695e-05, "loss": 0.6497, "step": 183 }, { "epoch": 0.02, "grad_norm": 2.044035298732104, "learning_rate": 1.2474576271186443e-05, "loss": 0.8107, "step": 184 }, { "epoch": 0.02, "grad_norm": 2.000921224831445, "learning_rate": 1.2542372881355932e-05, "loss": 0.7334, "step": 185 }, { "epoch": 0.02, "grad_norm": 2.148842701328798, "learning_rate": 1.2610169491525425e-05, "loss": 0.8103, "step": 186 }, { "epoch": 0.02, "grad_norm": 2.2665184439248676, "learning_rate": 1.2677966101694916e-05, "loss": 0.8093, "step": 187 }, { "epoch": 0.02, "grad_norm": 1.9357363672299526, "learning_rate": 1.2745762711864407e-05, "loss": 0.7027, "step": 188 }, { "epoch": 0.02, "grad_norm": 1.7570604626310313, "learning_rate": 1.28135593220339e-05, "loss": 0.6212, "step": 189 }, { "epoch": 0.02, "grad_norm": 2.2773162054308607, "learning_rate": 1.288135593220339e-05, "loss": 0.7666, "step": 190 }, { "epoch": 0.02, "grad_norm": 1.7761063488705422, "learning_rate": 1.2949152542372882e-05, "loss": 0.6996, "step": 191 }, { "epoch": 0.02, "grad_norm": 1.8767194093067885, "learning_rate": 1.3016949152542373e-05, "loss": 0.7106, "step": 192 }, { "epoch": 0.02, "grad_norm": 2.0091109182168436, "learning_rate": 1.3084745762711866e-05, "loss": 0.6987, "step": 193 }, { "epoch": 0.02, "grad_norm": 1.8537469691014754, "learning_rate": 1.3152542372881355e-05, "loss": 0.6861, "step": 194 }, { "epoch": 0.02, "grad_norm": 2.017545763530024, "learning_rate": 1.3220338983050848e-05, "loss": 0.7587, "step": 195 }, { "epoch": 0.02, "grad_norm": 2.1514998030012236, "learning_rate": 1.328813559322034e-05, "loss": 0.828, "step": 196 }, { "epoch": 0.02, "grad_norm": 1.9143327758731226, "learning_rate": 1.3355932203389832e-05, "loss": 0.7598, "step": 197 }, { "epoch": 0.02, "grad_norm": 1.9042594463315574, "learning_rate": 1.3423728813559323e-05, "loss": 0.7754, "step": 198 }, { "epoch": 0.02, "grad_norm": 1.8556994255207242, "learning_rate": 1.3491525423728816e-05, "loss": 0.7414, "step": 199 }, { "epoch": 0.02, "grad_norm": 2.1774086553474232, "learning_rate": 1.3559322033898305e-05, "loss": 0.7429, "step": 200 }, { "epoch": 0.02, "grad_norm": 1.9337554264476329, "learning_rate": 1.3627118644067798e-05, "loss": 0.6986, "step": 201 }, { "epoch": 0.02, "grad_norm": 1.718424449386759, "learning_rate": 1.3694915254237289e-05, "loss": 0.68, "step": 202 }, { "epoch": 0.02, "grad_norm": 1.906706540088502, "learning_rate": 1.3762711864406782e-05, "loss": 0.822, "step": 203 }, { "epoch": 0.02, "grad_norm": 2.098824628782942, "learning_rate": 1.3830508474576271e-05, "loss": 0.7159, "step": 204 }, { "epoch": 0.02, "grad_norm": 1.798103139341794, "learning_rate": 1.3898305084745764e-05, "loss": 0.817, "step": 205 }, { "epoch": 0.02, "grad_norm": 1.6708618914113391, "learning_rate": 1.3966101694915255e-05, "loss": 0.7206, "step": 206 }, { "epoch": 0.02, "grad_norm": 1.8960631122303189, "learning_rate": 1.4033898305084748e-05, "loss": 0.7623, "step": 207 }, { "epoch": 0.02, "grad_norm": 1.941715727270628, "learning_rate": 1.4101694915254239e-05, "loss": 0.664, "step": 208 }, { "epoch": 0.02, "grad_norm": 1.964233432476786, "learning_rate": 1.416949152542373e-05, "loss": 0.6237, "step": 209 }, { "epoch": 0.02, "grad_norm": 1.7513382787513723, "learning_rate": 1.4237288135593221e-05, "loss": 0.6544, "step": 210 }, { "epoch": 0.02, "grad_norm": 1.9443552456303226, "learning_rate": 1.4305084745762714e-05, "loss": 0.6672, "step": 211 }, { "epoch": 0.02, "grad_norm": 1.9412006314237946, "learning_rate": 1.4372881355932205e-05, "loss": 0.8645, "step": 212 }, { "epoch": 0.02, "grad_norm": 2.117636061000563, "learning_rate": 1.4440677966101698e-05, "loss": 0.686, "step": 213 }, { "epoch": 0.02, "grad_norm": 2.0391785941599845, "learning_rate": 1.4508474576271187e-05, "loss": 0.8448, "step": 214 }, { "epoch": 0.02, "grad_norm": 1.950652111059504, "learning_rate": 1.4576271186440678e-05, "loss": 0.6526, "step": 215 }, { "epoch": 0.02, "grad_norm": 1.987977720735387, "learning_rate": 1.4644067796610171e-05, "loss": 0.7114, "step": 216 }, { "epoch": 0.02, "grad_norm": 2.173671919832719, "learning_rate": 1.4711864406779662e-05, "loss": 0.706, "step": 217 }, { "epoch": 0.02, "grad_norm": 2.0170009363485395, "learning_rate": 1.4779661016949153e-05, "loss": 0.6986, "step": 218 }, { "epoch": 0.02, "grad_norm": 1.7783761996339809, "learning_rate": 1.4847457627118644e-05, "loss": 0.7819, "step": 219 }, { "epoch": 0.02, "grad_norm": 2.120675334171391, "learning_rate": 1.4915254237288137e-05, "loss": 0.7375, "step": 220 }, { "epoch": 0.02, "grad_norm": 1.8559328669266935, "learning_rate": 1.4983050847457628e-05, "loss": 0.7511, "step": 221 }, { "epoch": 0.02, "grad_norm": 1.9057535498740088, "learning_rate": 1.505084745762712e-05, "loss": 0.7453, "step": 222 }, { "epoch": 0.02, "grad_norm": 1.9176427912162302, "learning_rate": 1.511864406779661e-05, "loss": 0.65, "step": 223 }, { "epoch": 0.02, "grad_norm": 1.967862907768235, "learning_rate": 1.5186440677966103e-05, "loss": 0.7469, "step": 224 }, { "epoch": 0.02, "grad_norm": 1.8613239875337846, "learning_rate": 1.5254237288135594e-05, "loss": 0.8355, "step": 225 }, { "epoch": 0.02, "grad_norm": 1.6798452172801586, "learning_rate": 1.5322033898305085e-05, "loss": 0.7247, "step": 226 }, { "epoch": 0.02, "grad_norm": 1.9657751054133528, "learning_rate": 1.5389830508474578e-05, "loss": 0.6851, "step": 227 }, { "epoch": 0.02, "grad_norm": 1.8171542649675818, "learning_rate": 1.545762711864407e-05, "loss": 0.7742, "step": 228 }, { "epoch": 0.02, "grad_norm": 2.010455167366286, "learning_rate": 1.552542372881356e-05, "loss": 0.8376, "step": 229 }, { "epoch": 0.02, "grad_norm": 1.891522214459134, "learning_rate": 1.5593220338983053e-05, "loss": 0.8006, "step": 230 }, { "epoch": 0.02, "grad_norm": 1.8244670277804427, "learning_rate": 1.5661016949152542e-05, "loss": 0.6862, "step": 231 }, { "epoch": 0.02, "grad_norm": 2.035296790223969, "learning_rate": 1.5728813559322035e-05, "loss": 0.7342, "step": 232 }, { "epoch": 0.02, "grad_norm": 2.036716471677796, "learning_rate": 1.5796610169491528e-05, "loss": 0.7172, "step": 233 }, { "epoch": 0.02, "grad_norm": 1.966447074428311, "learning_rate": 1.586440677966102e-05, "loss": 0.778, "step": 234 }, { "epoch": 0.02, "grad_norm": 1.8457730222414168, "learning_rate": 1.593220338983051e-05, "loss": 0.628, "step": 235 }, { "epoch": 0.02, "grad_norm": 1.8050930389522784, "learning_rate": 1.6000000000000003e-05, "loss": 0.7722, "step": 236 }, { "epoch": 0.02, "grad_norm": 2.531322108969687, "learning_rate": 1.6067796610169492e-05, "loss": 0.8066, "step": 237 }, { "epoch": 0.02, "grad_norm": 2.0927576851100174, "learning_rate": 1.6135593220338985e-05, "loss": 0.7249, "step": 238 }, { "epoch": 0.02, "grad_norm": 2.029523839237554, "learning_rate": 1.6203389830508474e-05, "loss": 0.8032, "step": 239 }, { "epoch": 0.02, "grad_norm": 2.0478471468460615, "learning_rate": 1.6271186440677967e-05, "loss": 0.7326, "step": 240 }, { "epoch": 0.02, "grad_norm": 1.829411642842363, "learning_rate": 1.633898305084746e-05, "loss": 0.817, "step": 241 }, { "epoch": 0.02, "grad_norm": 1.8488495872202233, "learning_rate": 1.640677966101695e-05, "loss": 0.6522, "step": 242 }, { "epoch": 0.02, "grad_norm": 1.9832710283695125, "learning_rate": 1.6474576271186442e-05, "loss": 0.7512, "step": 243 }, { "epoch": 0.02, "grad_norm": 2.100562680767454, "learning_rate": 1.654237288135593e-05, "loss": 0.8037, "step": 244 }, { "epoch": 0.02, "grad_norm": 2.035367004658631, "learning_rate": 1.6610169491525424e-05, "loss": 0.8428, "step": 245 }, { "epoch": 0.03, "grad_norm": 1.7483550173167273, "learning_rate": 1.6677966101694917e-05, "loss": 0.75, "step": 246 }, { "epoch": 0.03, "grad_norm": 1.930898045070027, "learning_rate": 1.674576271186441e-05, "loss": 0.6774, "step": 247 }, { "epoch": 0.03, "grad_norm": 1.8365756306413494, "learning_rate": 1.68135593220339e-05, "loss": 0.723, "step": 248 }, { "epoch": 0.03, "grad_norm": 1.701138383260526, "learning_rate": 1.6881355932203392e-05, "loss": 0.7625, "step": 249 }, { "epoch": 0.03, "grad_norm": 1.7105626859044865, "learning_rate": 1.694915254237288e-05, "loss": 0.6939, "step": 250 }, { "epoch": 0.03, "grad_norm": 2.059412608350948, "learning_rate": 1.7016949152542374e-05, "loss": 0.6739, "step": 251 }, { "epoch": 0.03, "grad_norm": 1.712149442270916, "learning_rate": 1.7084745762711867e-05, "loss": 0.744, "step": 252 }, { "epoch": 0.03, "grad_norm": 2.015776887334701, "learning_rate": 1.715254237288136e-05, "loss": 0.7223, "step": 253 }, { "epoch": 0.03, "grad_norm": 1.685804526129291, "learning_rate": 1.722033898305085e-05, "loss": 0.7029, "step": 254 }, { "epoch": 0.03, "grad_norm": 1.9446296967732069, "learning_rate": 1.728813559322034e-05, "loss": 0.7818, "step": 255 }, { "epoch": 0.03, "grad_norm": 1.791749565441212, "learning_rate": 1.735593220338983e-05, "loss": 0.6184, "step": 256 }, { "epoch": 0.03, "grad_norm": 1.7154113226867052, "learning_rate": 1.7423728813559324e-05, "loss": 0.7036, "step": 257 }, { "epoch": 0.03, "grad_norm": 1.883567618364835, "learning_rate": 1.7491525423728813e-05, "loss": 0.7517, "step": 258 }, { "epoch": 0.03, "grad_norm": 1.8940636144580816, "learning_rate": 1.7559322033898306e-05, "loss": 0.7078, "step": 259 }, { "epoch": 0.03, "grad_norm": 1.9268538873409744, "learning_rate": 1.76271186440678e-05, "loss": 0.7563, "step": 260 }, { "epoch": 0.03, "grad_norm": 1.7748203123085324, "learning_rate": 1.769491525423729e-05, "loss": 0.7079, "step": 261 }, { "epoch": 0.03, "grad_norm": 1.76578312180332, "learning_rate": 1.776271186440678e-05, "loss": 0.6875, "step": 262 }, { "epoch": 0.03, "grad_norm": 1.8139793782816562, "learning_rate": 1.7830508474576274e-05, "loss": 0.7254, "step": 263 }, { "epoch": 0.03, "grad_norm": 1.7496909186973184, "learning_rate": 1.7898305084745763e-05, "loss": 0.7714, "step": 264 }, { "epoch": 0.03, "grad_norm": 1.9619860178080115, "learning_rate": 1.7966101694915256e-05, "loss": 0.6743, "step": 265 }, { "epoch": 0.03, "grad_norm": 1.9633822472809355, "learning_rate": 1.803389830508475e-05, "loss": 0.7198, "step": 266 }, { "epoch": 0.03, "grad_norm": 1.8938463788084676, "learning_rate": 1.810169491525424e-05, "loss": 0.8072, "step": 267 }, { "epoch": 0.03, "grad_norm": 1.7910793721294165, "learning_rate": 1.816949152542373e-05, "loss": 0.7417, "step": 268 }, { "epoch": 0.03, "grad_norm": 1.8385244007491037, "learning_rate": 1.823728813559322e-05, "loss": 0.7399, "step": 269 }, { "epoch": 0.03, "grad_norm": 1.8299310983186288, "learning_rate": 1.8305084745762713e-05, "loss": 0.8197, "step": 270 }, { "epoch": 0.03, "grad_norm": 1.6938430742018853, "learning_rate": 1.8372881355932202e-05, "loss": 0.7432, "step": 271 }, { "epoch": 0.03, "grad_norm": 1.97932361867903, "learning_rate": 1.8440677966101695e-05, "loss": 0.7969, "step": 272 }, { "epoch": 0.03, "grad_norm": 1.7016617079758896, "learning_rate": 1.8508474576271188e-05, "loss": 0.7311, "step": 273 }, { "epoch": 0.03, "grad_norm": 1.7402288852531422, "learning_rate": 1.857627118644068e-05, "loss": 0.8583, "step": 274 }, { "epoch": 0.03, "grad_norm": 1.8009302958397473, "learning_rate": 1.864406779661017e-05, "loss": 0.7303, "step": 275 }, { "epoch": 0.03, "grad_norm": 1.8701338309527233, "learning_rate": 1.8711864406779663e-05, "loss": 0.7549, "step": 276 }, { "epoch": 0.03, "grad_norm": 2.0312194476891876, "learning_rate": 1.8779661016949152e-05, "loss": 0.7222, "step": 277 }, { "epoch": 0.03, "grad_norm": 1.8677846365652164, "learning_rate": 1.8847457627118645e-05, "loss": 0.7295, "step": 278 }, { "epoch": 0.03, "grad_norm": 1.949694415094439, "learning_rate": 1.8915254237288138e-05, "loss": 0.7984, "step": 279 }, { "epoch": 0.03, "grad_norm": 1.7111163200955561, "learning_rate": 1.898305084745763e-05, "loss": 0.7203, "step": 280 }, { "epoch": 0.03, "grad_norm": 1.9717239024751367, "learning_rate": 1.905084745762712e-05, "loss": 0.6803, "step": 281 }, { "epoch": 0.03, "grad_norm": 1.579686370395743, "learning_rate": 1.9118644067796613e-05, "loss": 0.7824, "step": 282 }, { "epoch": 0.03, "grad_norm": 1.678149645995694, "learning_rate": 1.9186440677966102e-05, "loss": 0.6746, "step": 283 }, { "epoch": 0.03, "grad_norm": 1.8213188558433615, "learning_rate": 1.9254237288135595e-05, "loss": 0.8051, "step": 284 }, { "epoch": 0.03, "grad_norm": 1.7725977550117176, "learning_rate": 1.9322033898305087e-05, "loss": 0.7281, "step": 285 }, { "epoch": 0.03, "grad_norm": 1.9372054768281428, "learning_rate": 1.938983050847458e-05, "loss": 0.6533, "step": 286 }, { "epoch": 0.03, "grad_norm": 1.7535765837740318, "learning_rate": 1.945762711864407e-05, "loss": 0.8288, "step": 287 }, { "epoch": 0.03, "grad_norm": 1.859536473734018, "learning_rate": 1.9525423728813562e-05, "loss": 0.8189, "step": 288 }, { "epoch": 0.03, "grad_norm": 1.843961540210964, "learning_rate": 1.9593220338983052e-05, "loss": 0.7668, "step": 289 }, { "epoch": 0.03, "grad_norm": 1.8461464381997479, "learning_rate": 1.9661016949152545e-05, "loss": 0.7714, "step": 290 }, { "epoch": 0.03, "grad_norm": 1.7238192387959088, "learning_rate": 1.9728813559322034e-05, "loss": 0.7324, "step": 291 }, { "epoch": 0.03, "grad_norm": 1.6427586720320793, "learning_rate": 1.9796610169491527e-05, "loss": 0.7335, "step": 292 }, { "epoch": 0.03, "grad_norm": 1.6800342906853636, "learning_rate": 1.986440677966102e-05, "loss": 0.7549, "step": 293 }, { "epoch": 0.03, "grad_norm": 1.7832389725065179, "learning_rate": 1.9932203389830512e-05, "loss": 0.8058, "step": 294 }, { "epoch": 0.03, "grad_norm": 1.727775985861393, "learning_rate": 2e-05, "loss": 0.6759, "step": 295 }, { "epoch": 0.03, "grad_norm": 1.7977238751907165, "learning_rate": 1.999999945732819e-05, "loss": 0.7674, "step": 296 }, { "epoch": 0.03, "grad_norm": 1.7390933788321141, "learning_rate": 1.9999997829312825e-05, "loss": 0.8395, "step": 297 }, { "epoch": 0.03, "grad_norm": 1.7764290549311015, "learning_rate": 1.9999995115954075e-05, "loss": 0.711, "step": 298 }, { "epoch": 0.03, "grad_norm": 1.6826307129531393, "learning_rate": 1.999999131725224e-05, "loss": 0.6686, "step": 299 }, { "epoch": 0.03, "grad_norm": 1.664467172792983, "learning_rate": 1.9999986433207727e-05, "loss": 0.6859, "step": 300 }, { "epoch": 0.03, "grad_norm": 1.91798902000967, "learning_rate": 1.9999980463821073e-05, "loss": 0.7042, "step": 301 }, { "epoch": 0.03, "grad_norm": 1.6449618209624421, "learning_rate": 1.9999973409092916e-05, "loss": 0.6718, "step": 302 }, { "epoch": 0.03, "grad_norm": 1.7583224606564245, "learning_rate": 1.999996526902403e-05, "loss": 0.7526, "step": 303 }, { "epoch": 0.03, "grad_norm": 1.7418882410175618, "learning_rate": 1.99999560436153e-05, "loss": 0.8077, "step": 304 }, { "epoch": 0.03, "grad_norm": 1.6483278016364271, "learning_rate": 1.9999945732867717e-05, "loss": 0.6834, "step": 305 }, { "epoch": 0.03, "grad_norm": 1.8758827011232249, "learning_rate": 1.999993433678241e-05, "loss": 0.6097, "step": 306 }, { "epoch": 0.03, "grad_norm": 1.9950744094762918, "learning_rate": 1.999992185536061e-05, "loss": 0.8323, "step": 307 }, { "epoch": 0.03, "grad_norm": 1.9600308793601466, "learning_rate": 1.9999908288603678e-05, "loss": 0.7898, "step": 308 }, { "epoch": 0.03, "grad_norm": 1.6362825731715926, "learning_rate": 1.9999893636513078e-05, "loss": 0.715, "step": 309 }, { "epoch": 0.03, "grad_norm": 1.9191632580569564, "learning_rate": 1.9999877899090408e-05, "loss": 0.8983, "step": 310 }, { "epoch": 0.03, "grad_norm": 2.2077252767571283, "learning_rate": 1.999986107633737e-05, "loss": 0.8502, "step": 311 }, { "epoch": 0.03, "grad_norm": 1.7211750642056591, "learning_rate": 1.9999843168255792e-05, "loss": 0.6766, "step": 312 }, { "epoch": 0.03, "grad_norm": 1.8405066522264266, "learning_rate": 1.999982417484762e-05, "loss": 0.7717, "step": 313 }, { "epoch": 0.03, "grad_norm": 1.7584962556190842, "learning_rate": 1.9999804096114914e-05, "loss": 0.7634, "step": 314 }, { "epoch": 0.03, "grad_norm": 1.9067370561909576, "learning_rate": 1.999978293205985e-05, "loss": 0.7854, "step": 315 }, { "epoch": 0.03, "grad_norm": 1.650263611438605, "learning_rate": 1.9999760682684732e-05, "loss": 0.8554, "step": 316 }, { "epoch": 0.03, "grad_norm": 1.9239042342081574, "learning_rate": 1.999973734799197e-05, "loss": 0.7182, "step": 317 }, { "epoch": 0.03, "grad_norm": 1.914889513011852, "learning_rate": 1.9999712927984097e-05, "loss": 0.8431, "step": 318 }, { "epoch": 0.03, "grad_norm": 1.7596818561456582, "learning_rate": 1.999968742266376e-05, "loss": 0.7526, "step": 319 }, { "epoch": 0.03, "grad_norm": 1.9152360275383022, "learning_rate": 1.9999660832033733e-05, "loss": 0.8691, "step": 320 }, { "epoch": 0.03, "grad_norm": 1.7674570266140242, "learning_rate": 1.99996331560969e-05, "loss": 0.7009, "step": 321 }, { "epoch": 0.03, "grad_norm": 1.8346671747992218, "learning_rate": 1.9999604394856265e-05, "loss": 0.7458, "step": 322 }, { "epoch": 0.03, "grad_norm": 1.7024441673305173, "learning_rate": 1.999957454831495e-05, "loss": 0.6926, "step": 323 }, { "epoch": 0.03, "grad_norm": 1.8858862630231061, "learning_rate": 1.9999543616476195e-05, "loss": 0.7739, "step": 324 }, { "epoch": 0.03, "grad_norm": 1.918639765230697, "learning_rate": 1.999951159934335e-05, "loss": 0.7835, "step": 325 }, { "epoch": 0.03, "grad_norm": 1.713705802835458, "learning_rate": 1.99994784969199e-05, "loss": 0.8373, "step": 326 }, { "epoch": 0.03, "grad_norm": 2.125556618290798, "learning_rate": 1.9999444309209432e-05, "loss": 0.7237, "step": 327 }, { "epoch": 0.03, "grad_norm": 1.8747153782294466, "learning_rate": 1.999940903621566e-05, "loss": 0.8242, "step": 328 }, { "epoch": 0.03, "grad_norm": 1.8541269896488926, "learning_rate": 1.999937267794241e-05, "loss": 0.8078, "step": 329 }, { "epoch": 0.03, "grad_norm": 1.8097939569533714, "learning_rate": 1.999933523439363e-05, "loss": 0.6798, "step": 330 }, { "epoch": 0.03, "grad_norm": 1.6707764941842447, "learning_rate": 1.999929670557338e-05, "loss": 0.7481, "step": 331 }, { "epoch": 0.03, "grad_norm": 1.5408400944729324, "learning_rate": 1.9999257091485842e-05, "loss": 0.6834, "step": 332 }, { "epoch": 0.03, "grad_norm": 1.8105877361881173, "learning_rate": 1.999921639213532e-05, "loss": 0.7798, "step": 333 }, { "epoch": 0.03, "grad_norm": 2.0245928171672127, "learning_rate": 1.9999174607526232e-05, "loss": 0.8071, "step": 334 }, { "epoch": 0.03, "grad_norm": 1.7126779698650814, "learning_rate": 1.9999131737663106e-05, "loss": 0.7318, "step": 335 }, { "epoch": 0.03, "grad_norm": 1.8799372221054982, "learning_rate": 1.9999087782550596e-05, "loss": 0.7915, "step": 336 }, { "epoch": 0.03, "grad_norm": 1.8302157825287524, "learning_rate": 1.999904274219348e-05, "loss": 0.8132, "step": 337 }, { "epoch": 0.03, "grad_norm": 1.8873097603497921, "learning_rate": 1.9998996616596643e-05, "loss": 0.818, "step": 338 }, { "epoch": 0.03, "grad_norm": 1.7616359225670495, "learning_rate": 1.9998949405765086e-05, "loss": 0.6288, "step": 339 }, { "epoch": 0.03, "grad_norm": 1.9961891473127835, "learning_rate": 1.9998901109703942e-05, "loss": 0.7461, "step": 340 }, { "epoch": 0.03, "grad_norm": 1.9590109916459193, "learning_rate": 1.9998851728418443e-05, "loss": 0.7262, "step": 341 }, { "epoch": 0.03, "grad_norm": 1.7017357772857418, "learning_rate": 1.9998801261913956e-05, "loss": 0.697, "step": 342 }, { "epoch": 0.03, "grad_norm": 1.8180055536057091, "learning_rate": 1.9998749710195957e-05, "loss": 0.7893, "step": 343 }, { "epoch": 0.03, "grad_norm": 2.0549880336112016, "learning_rate": 1.9998697073270038e-05, "loss": 0.7717, "step": 344 }, { "epoch": 0.04, "grad_norm": 1.6411992346494544, "learning_rate": 1.9998643351141912e-05, "loss": 0.7945, "step": 345 }, { "epoch": 0.04, "grad_norm": 1.8553473962687779, "learning_rate": 1.9998588543817415e-05, "loss": 0.8276, "step": 346 }, { "epoch": 0.04, "grad_norm": 1.8223722140799334, "learning_rate": 1.999853265130249e-05, "loss": 0.842, "step": 347 }, { "epoch": 0.04, "grad_norm": 1.7858229071600773, "learning_rate": 1.9998475673603205e-05, "loss": 0.6635, "step": 348 }, { "epoch": 0.04, "grad_norm": 1.5519682746904044, "learning_rate": 1.9998417610725745e-05, "loss": 0.6665, "step": 349 }, { "epoch": 0.04, "grad_norm": 1.9122831115958798, "learning_rate": 1.999835846267641e-05, "loss": 0.7812, "step": 350 }, { "epoch": 0.04, "grad_norm": 1.7934216535596592, "learning_rate": 1.9998298229461624e-05, "loss": 0.7411, "step": 351 }, { "epoch": 0.04, "grad_norm": 1.714151647311422, "learning_rate": 1.999823691108792e-05, "loss": 0.7088, "step": 352 }, { "epoch": 0.04, "grad_norm": 1.8445691735054452, "learning_rate": 1.9998174507561952e-05, "loss": 0.8371, "step": 353 }, { "epoch": 0.04, "grad_norm": 1.946917458223072, "learning_rate": 1.9998111018890496e-05, "loss": 0.8158, "step": 354 }, { "epoch": 0.04, "grad_norm": 1.9627402550609958, "learning_rate": 1.999804644508044e-05, "loss": 0.816, "step": 355 }, { "epoch": 0.04, "grad_norm": 1.684490996381524, "learning_rate": 1.99979807861388e-05, "loss": 0.7906, "step": 356 }, { "epoch": 0.04, "grad_norm": 1.7292982143939393, "learning_rate": 1.999791404207269e-05, "loss": 0.8777, "step": 357 }, { "epoch": 0.04, "grad_norm": 1.6990863259567761, "learning_rate": 1.9997846212889367e-05, "loss": 0.7051, "step": 358 }, { "epoch": 0.04, "grad_norm": 1.6806371114663146, "learning_rate": 1.999777729859618e-05, "loss": 0.6675, "step": 359 }, { "epoch": 0.04, "grad_norm": 1.891811437481674, "learning_rate": 1.9997707299200622e-05, "loss": 0.8882, "step": 360 }, { "epoch": 0.04, "grad_norm": 1.7243799607340802, "learning_rate": 1.999763621471028e-05, "loss": 0.7244, "step": 361 }, { "epoch": 0.04, "grad_norm": 1.8932374823398974, "learning_rate": 1.999756404513287e-05, "loss": 0.7592, "step": 362 }, { "epoch": 0.04, "grad_norm": 1.7673934736131074, "learning_rate": 1.999749079047623e-05, "loss": 0.7581, "step": 363 }, { "epoch": 0.04, "grad_norm": 1.688678447260584, "learning_rate": 1.9997416450748308e-05, "loss": 0.7357, "step": 364 }, { "epoch": 0.04, "grad_norm": 1.8030241010019676, "learning_rate": 1.999734102595717e-05, "loss": 0.8179, "step": 365 }, { "epoch": 0.04, "grad_norm": 1.6616197218353204, "learning_rate": 1.9997264516111006e-05, "loss": 0.6571, "step": 366 }, { "epoch": 0.04, "grad_norm": 1.7328480272335298, "learning_rate": 1.9997186921218123e-05, "loss": 0.8508, "step": 367 }, { "epoch": 0.04, "grad_norm": 1.591325686668673, "learning_rate": 1.999710824128693e-05, "loss": 0.7019, "step": 368 }, { "epoch": 0.04, "grad_norm": 1.7290334221881392, "learning_rate": 1.9997028476325978e-05, "loss": 0.8135, "step": 369 }, { "epoch": 0.04, "grad_norm": 1.9651039231608671, "learning_rate": 1.9996947626343924e-05, "loss": 0.8036, "step": 370 }, { "epoch": 0.04, "grad_norm": 1.8616771729798702, "learning_rate": 1.9996865691349538e-05, "loss": 0.7986, "step": 371 }, { "epoch": 0.04, "grad_norm": 1.656935611345808, "learning_rate": 1.9996782671351713e-05, "loss": 0.7317, "step": 372 }, { "epoch": 0.04, "grad_norm": 1.8377731073141668, "learning_rate": 1.9996698566359462e-05, "loss": 0.7378, "step": 373 }, { "epoch": 0.04, "grad_norm": 1.8112813049650685, "learning_rate": 1.9996613376381916e-05, "loss": 0.7948, "step": 374 }, { "epoch": 0.04, "grad_norm": 2.0118770931741934, "learning_rate": 1.999652710142831e-05, "loss": 0.8042, "step": 375 }, { "epoch": 0.04, "grad_norm": 1.7478808769772007, "learning_rate": 1.999643974150802e-05, "loss": 0.7676, "step": 376 }, { "epoch": 0.04, "grad_norm": 1.7676338042836226, "learning_rate": 1.9996351296630525e-05, "loss": 0.7365, "step": 377 }, { "epoch": 0.04, "grad_norm": 1.7180183778756248, "learning_rate": 1.999626176680542e-05, "loss": 0.6893, "step": 378 }, { "epoch": 0.04, "grad_norm": 1.50083822299388, "learning_rate": 1.9996171152042425e-05, "loss": 0.6976, "step": 379 }, { "epoch": 0.04, "grad_norm": 2.003558525467704, "learning_rate": 1.999607945235137e-05, "loss": 0.8067, "step": 380 }, { "epoch": 0.04, "grad_norm": 1.8041757070189628, "learning_rate": 1.9995986667742217e-05, "loss": 0.7977, "step": 381 }, { "epoch": 0.04, "grad_norm": 1.5599959651379252, "learning_rate": 1.999589279822503e-05, "loss": 0.6672, "step": 382 }, { "epoch": 0.04, "grad_norm": 1.7795825546643322, "learning_rate": 1.9995797843809998e-05, "loss": 0.7974, "step": 383 }, { "epoch": 0.04, "grad_norm": 1.802976071998248, "learning_rate": 1.9995701804507423e-05, "loss": 0.7719, "step": 384 }, { "epoch": 0.04, "grad_norm": 1.5941440848302881, "learning_rate": 1.9995604680327735e-05, "loss": 0.7913, "step": 385 }, { "epoch": 0.04, "grad_norm": 1.753323779430216, "learning_rate": 1.9995506471281473e-05, "loss": 0.77, "step": 386 }, { "epoch": 0.04, "grad_norm": 1.6683717471946458, "learning_rate": 1.9995407177379295e-05, "loss": 0.8528, "step": 387 }, { "epoch": 0.04, "grad_norm": 1.6751255034212773, "learning_rate": 1.9995306798631978e-05, "loss": 0.7806, "step": 388 }, { "epoch": 0.04, "grad_norm": 1.7968947270822646, "learning_rate": 1.999520533505042e-05, "loss": 0.7733, "step": 389 }, { "epoch": 0.04, "grad_norm": 2.025330454366504, "learning_rate": 1.999510278664563e-05, "loss": 0.7973, "step": 390 }, { "epoch": 0.04, "grad_norm": 1.5378582514197812, "learning_rate": 1.9994999153428737e-05, "loss": 0.6657, "step": 391 }, { "epoch": 0.04, "grad_norm": 1.583531116731558, "learning_rate": 1.999489443541099e-05, "loss": 0.7282, "step": 392 }, { "epoch": 0.04, "grad_norm": 1.6735520561192077, "learning_rate": 1.9994788632603756e-05, "loss": 0.7124, "step": 393 }, { "epoch": 0.04, "grad_norm": 1.7029836297903527, "learning_rate": 1.9994681745018516e-05, "loss": 0.734, "step": 394 }, { "epoch": 0.04, "grad_norm": 1.664386176676482, "learning_rate": 1.999457377266687e-05, "loss": 0.6538, "step": 395 }, { "epoch": 0.04, "grad_norm": 1.7530802960298322, "learning_rate": 1.9994464715560543e-05, "loss": 0.6985, "step": 396 }, { "epoch": 0.04, "grad_norm": 1.6860590312819281, "learning_rate": 1.9994354573711363e-05, "loss": 0.7206, "step": 397 }, { "epoch": 0.04, "grad_norm": 2.0901210609686722, "learning_rate": 1.999424334713129e-05, "loss": 0.7572, "step": 398 }, { "epoch": 0.04, "grad_norm": 1.8874014333140492, "learning_rate": 1.9994131035832394e-05, "loss": 0.782, "step": 399 }, { "epoch": 0.04, "grad_norm": 1.6553991557065093, "learning_rate": 1.9994017639826867e-05, "loss": 0.9557, "step": 400 }, { "epoch": 0.04, "grad_norm": 1.6718485788810615, "learning_rate": 1.999390315912701e-05, "loss": 0.7127, "step": 401 }, { "epoch": 0.04, "grad_norm": 1.7304276426396776, "learning_rate": 1.9993787593745254e-05, "loss": 0.7887, "step": 402 }, { "epoch": 0.04, "grad_norm": 1.8547453550237238, "learning_rate": 1.999367094369414e-05, "loss": 0.7698, "step": 403 }, { "epoch": 0.04, "grad_norm": 2.0608110132252917, "learning_rate": 1.999355320898633e-05, "loss": 0.7771, "step": 404 }, { "epoch": 0.04, "grad_norm": 1.6305518417499105, "learning_rate": 1.99934343896346e-05, "loss": 0.6427, "step": 405 }, { "epoch": 0.04, "grad_norm": 1.6217405497359352, "learning_rate": 1.9993314485651848e-05, "loss": 0.7834, "step": 406 }, { "epoch": 0.04, "grad_norm": 1.620804556556247, "learning_rate": 1.9993193497051084e-05, "loss": 0.8264, "step": 407 }, { "epoch": 0.04, "grad_norm": 1.7909076263383104, "learning_rate": 1.999307142384544e-05, "loss": 0.8092, "step": 408 }, { "epoch": 0.04, "grad_norm": 1.70064491469979, "learning_rate": 1.9992948266048174e-05, "loss": 0.821, "step": 409 }, { "epoch": 0.04, "grad_norm": 1.7059610993511671, "learning_rate": 1.999282402367264e-05, "loss": 0.7432, "step": 410 }, { "epoch": 0.04, "grad_norm": 1.5692469342874094, "learning_rate": 1.999269869673233e-05, "loss": 0.6727, "step": 411 }, { "epoch": 0.04, "grad_norm": 1.7378065054799623, "learning_rate": 1.9992572285240848e-05, "loss": 0.841, "step": 412 }, { "epoch": 0.04, "grad_norm": 1.8668129805207883, "learning_rate": 1.999244478921191e-05, "loss": 0.7503, "step": 413 }, { "epoch": 0.04, "grad_norm": 1.9060504553017656, "learning_rate": 1.9992316208659352e-05, "loss": 0.7054, "step": 414 }, { "epoch": 0.04, "grad_norm": 1.8212929976354297, "learning_rate": 1.9992186543597133e-05, "loss": 0.8107, "step": 415 }, { "epoch": 0.04, "grad_norm": 1.809192974400173, "learning_rate": 1.9992055794039325e-05, "loss": 0.7067, "step": 416 }, { "epoch": 0.04, "grad_norm": 1.5465827774015835, "learning_rate": 1.9991923960000116e-05, "loss": 0.6662, "step": 417 }, { "epoch": 0.04, "grad_norm": 1.5850884512167587, "learning_rate": 1.999179104149382e-05, "loss": 0.7838, "step": 418 }, { "epoch": 0.04, "grad_norm": 1.6622022535939016, "learning_rate": 1.999165703853486e-05, "loss": 0.7488, "step": 419 }, { "epoch": 0.04, "grad_norm": 1.6903905078454566, "learning_rate": 1.9991521951137783e-05, "loss": 0.7443, "step": 420 }, { "epoch": 0.04, "grad_norm": 1.6687306292307453, "learning_rate": 1.9991385779317246e-05, "loss": 0.723, "step": 421 }, { "epoch": 0.04, "grad_norm": 1.7219109927338176, "learning_rate": 1.999124852308803e-05, "loss": 0.78, "step": 422 }, { "epoch": 0.04, "grad_norm": 1.8243785936412005, "learning_rate": 1.9991110182465032e-05, "loss": 0.6784, "step": 423 }, { "epoch": 0.04, "grad_norm": 1.6490323132123654, "learning_rate": 1.999097075746327e-05, "loss": 0.6888, "step": 424 }, { "epoch": 0.04, "grad_norm": 1.7346088865508633, "learning_rate": 1.999083024809787e-05, "loss": 0.785, "step": 425 }, { "epoch": 0.04, "grad_norm": 1.6767037997897496, "learning_rate": 1.999068865438409e-05, "loss": 0.746, "step": 426 }, { "epoch": 0.04, "grad_norm": 1.7701958205287336, "learning_rate": 1.999054597633729e-05, "loss": 0.7477, "step": 427 }, { "epoch": 0.04, "grad_norm": 1.6835840347014255, "learning_rate": 1.999040221397296e-05, "loss": 0.7634, "step": 428 }, { "epoch": 0.04, "grad_norm": 1.7769492248134329, "learning_rate": 1.99902573673067e-05, "loss": 0.7342, "step": 429 }, { "epoch": 0.04, "grad_norm": 1.7048105843168924, "learning_rate": 1.9990111436354237e-05, "loss": 0.6987, "step": 430 }, { "epoch": 0.04, "grad_norm": 1.5761305862194244, "learning_rate": 1.99899644211314e-05, "loss": 0.7776, "step": 431 }, { "epoch": 0.04, "grad_norm": 1.74082648576072, "learning_rate": 1.9989816321654155e-05, "loss": 0.751, "step": 432 }, { "epoch": 0.04, "grad_norm": 1.8126076461111138, "learning_rate": 1.9989667137938573e-05, "loss": 0.7662, "step": 433 }, { "epoch": 0.04, "grad_norm": 1.6477682572902752, "learning_rate": 1.998951687000084e-05, "loss": 0.7225, "step": 434 }, { "epoch": 0.04, "grad_norm": 1.7702013292110663, "learning_rate": 1.998936551785727e-05, "loss": 0.828, "step": 435 }, { "epoch": 0.04, "grad_norm": 1.7479554654572946, "learning_rate": 1.9989213081524293e-05, "loss": 0.7605, "step": 436 }, { "epoch": 0.04, "grad_norm": 1.7333751540111162, "learning_rate": 1.9989059561018448e-05, "loss": 0.7445, "step": 437 }, { "epoch": 0.04, "grad_norm": 1.8159978064450342, "learning_rate": 1.9988904956356395e-05, "loss": 0.7398, "step": 438 }, { "epoch": 0.04, "grad_norm": 1.6659874749639998, "learning_rate": 1.998874926755492e-05, "loss": 0.8073, "step": 439 }, { "epoch": 0.04, "grad_norm": 1.7995914872407046, "learning_rate": 1.9988592494630922e-05, "loss": 0.9021, "step": 440 }, { "epoch": 0.04, "grad_norm": 1.7213002946596805, "learning_rate": 1.998843463760141e-05, "loss": 0.7034, "step": 441 }, { "epoch": 0.04, "grad_norm": 1.7732611112379235, "learning_rate": 1.998827569648352e-05, "loss": 0.738, "step": 442 }, { "epoch": 0.05, "grad_norm": 2.0593757365062655, "learning_rate": 1.9988115671294502e-05, "loss": 0.8003, "step": 443 }, { "epoch": 0.05, "grad_norm": 1.5199583294558163, "learning_rate": 1.9987954562051724e-05, "loss": 0.7742, "step": 444 }, { "epoch": 0.05, "grad_norm": 1.6694614497357176, "learning_rate": 1.9987792368772677e-05, "loss": 0.7096, "step": 445 }, { "epoch": 0.05, "grad_norm": 1.757071457535054, "learning_rate": 1.9987629091474955e-05, "loss": 0.7648, "step": 446 }, { "epoch": 0.05, "grad_norm": 1.4926165767089656, "learning_rate": 1.9987464730176285e-05, "loss": 0.6738, "step": 447 }, { "epoch": 0.05, "grad_norm": 1.6278470687032995, "learning_rate": 1.9987299284894505e-05, "loss": 0.6468, "step": 448 }, { "epoch": 0.05, "grad_norm": 1.5928480530517506, "learning_rate": 1.9987132755647574e-05, "loss": 0.733, "step": 449 }, { "epoch": 0.05, "grad_norm": 1.954406246297614, "learning_rate": 1.9986965142453562e-05, "loss": 0.7253, "step": 450 }, { "epoch": 0.05, "grad_norm": 1.677578009697896, "learning_rate": 1.998679644533066e-05, "loss": 0.7313, "step": 451 }, { "epoch": 0.05, "grad_norm": 1.756632357254643, "learning_rate": 1.998662666429718e-05, "loss": 0.7917, "step": 452 }, { "epoch": 0.05, "grad_norm": 1.6989063531350133, "learning_rate": 1.9986455799371555e-05, "loss": 0.7377, "step": 453 }, { "epoch": 0.05, "grad_norm": 1.5734631073088516, "learning_rate": 1.9986283850572317e-05, "loss": 0.7689, "step": 454 }, { "epoch": 0.05, "grad_norm": 1.6438195775524247, "learning_rate": 1.998611081791814e-05, "loss": 0.6276, "step": 455 }, { "epoch": 0.05, "grad_norm": 1.7470830793024756, "learning_rate": 1.9985936701427797e-05, "loss": 0.8255, "step": 456 }, { "epoch": 0.05, "grad_norm": 1.627502692382246, "learning_rate": 1.998576150112019e-05, "loss": 0.8686, "step": 457 }, { "epoch": 0.05, "grad_norm": 1.6742927725469114, "learning_rate": 1.9985585217014326e-05, "loss": 0.756, "step": 458 }, { "epoch": 0.05, "grad_norm": 1.806975380520805, "learning_rate": 1.9985407849129346e-05, "loss": 0.8736, "step": 459 }, { "epoch": 0.05, "grad_norm": 1.6601591312990973, "learning_rate": 1.9985229397484504e-05, "loss": 0.7521, "step": 460 }, { "epoch": 0.05, "grad_norm": 1.793242802316679, "learning_rate": 1.9985049862099156e-05, "loss": 0.7921, "step": 461 }, { "epoch": 0.05, "grad_norm": 1.6875996812673835, "learning_rate": 1.9984869242992798e-05, "loss": 0.8031, "step": 462 }, { "epoch": 0.05, "grad_norm": 1.5964038305874215, "learning_rate": 1.9984687540185026e-05, "loss": 0.63, "step": 463 }, { "epoch": 0.05, "grad_norm": 1.7710598946573337, "learning_rate": 1.9984504753695567e-05, "loss": 0.7269, "step": 464 }, { "epoch": 0.05, "grad_norm": 1.5836628328169005, "learning_rate": 1.9984320883544254e-05, "loss": 0.7293, "step": 465 }, { "epoch": 0.05, "grad_norm": 1.5838641530722382, "learning_rate": 1.998413592975105e-05, "loss": 0.6755, "step": 466 }, { "epoch": 0.05, "grad_norm": 1.4998685973317545, "learning_rate": 1.9983949892336024e-05, "loss": 0.6839, "step": 467 }, { "epoch": 0.05, "grad_norm": 1.8559871084679145, "learning_rate": 1.998376277131937e-05, "loss": 0.8468, "step": 468 }, { "epoch": 0.05, "grad_norm": 1.794841563871742, "learning_rate": 1.998357456672139e-05, "loss": 0.7864, "step": 469 }, { "epoch": 0.05, "grad_norm": 1.6493411230580337, "learning_rate": 1.9983385278562524e-05, "loss": 0.7253, "step": 470 }, { "epoch": 0.05, "grad_norm": 1.8178196745533752, "learning_rate": 1.9983194906863305e-05, "loss": 0.8109, "step": 471 }, { "epoch": 0.05, "grad_norm": 1.6890060467396713, "learning_rate": 1.99830034516444e-05, "loss": 0.744, "step": 472 }, { "epoch": 0.05, "grad_norm": 1.7568201017705833, "learning_rate": 1.9982810912926586e-05, "loss": 0.8342, "step": 473 }, { "epoch": 0.05, "grad_norm": 1.6989251017754607, "learning_rate": 1.998261729073076e-05, "loss": 0.77, "step": 474 }, { "epoch": 0.05, "grad_norm": 1.673235968940527, "learning_rate": 1.998242258507794e-05, "loss": 0.8354, "step": 475 }, { "epoch": 0.05, "grad_norm": 1.777489272545835, "learning_rate": 1.9982226795989255e-05, "loss": 0.7293, "step": 476 }, { "epoch": 0.05, "grad_norm": 1.9007324938155739, "learning_rate": 1.9982029923485957e-05, "loss": 0.8945, "step": 477 }, { "epoch": 0.05, "grad_norm": 1.695960720362699, "learning_rate": 1.9981831967589412e-05, "loss": 0.7604, "step": 478 }, { "epoch": 0.05, "grad_norm": 1.546497291074308, "learning_rate": 1.9981632928321103e-05, "loss": 0.747, "step": 479 }, { "epoch": 0.05, "grad_norm": 1.7995388123021736, "learning_rate": 1.9981432805702638e-05, "loss": 0.7974, "step": 480 }, { "epoch": 0.05, "grad_norm": 1.8867834481521102, "learning_rate": 1.998123159975573e-05, "loss": 0.7991, "step": 481 }, { "epoch": 0.05, "grad_norm": 1.8366610120829818, "learning_rate": 1.9981029310502227e-05, "loss": 0.762, "step": 482 }, { "epoch": 0.05, "grad_norm": 1.5535863887360595, "learning_rate": 1.9980825937964074e-05, "loss": 0.8253, "step": 483 }, { "epoch": 0.05, "grad_norm": 1.6730007484155052, "learning_rate": 1.9980621482163348e-05, "loss": 0.6544, "step": 484 }, { "epoch": 0.05, "grad_norm": 1.828936812673665, "learning_rate": 1.998041594312224e-05, "loss": 0.7863, "step": 485 }, { "epoch": 0.05, "grad_norm": 1.669970570478486, "learning_rate": 1.998020932086306e-05, "loss": 0.8412, "step": 486 }, { "epoch": 0.05, "grad_norm": 1.730258104372564, "learning_rate": 1.9980001615408228e-05, "loss": 0.8279, "step": 487 }, { "epoch": 0.05, "grad_norm": 1.7588302456093907, "learning_rate": 1.997979282678029e-05, "loss": 0.691, "step": 488 }, { "epoch": 0.05, "grad_norm": 1.5050103571137134, "learning_rate": 1.9979582955001908e-05, "loss": 0.6758, "step": 489 }, { "epoch": 0.05, "grad_norm": 1.8806382255625589, "learning_rate": 1.997937200009586e-05, "loss": 0.7779, "step": 490 }, { "epoch": 0.05, "grad_norm": 1.7261954107457222, "learning_rate": 1.9979159962085044e-05, "loss": 0.7572, "step": 491 }, { "epoch": 0.05, "grad_norm": 1.7225866766844808, "learning_rate": 1.997894684099247e-05, "loss": 0.8224, "step": 492 }, { "epoch": 0.05, "grad_norm": 1.6829420585393222, "learning_rate": 1.9978732636841264e-05, "loss": 0.6425, "step": 493 }, { "epoch": 0.05, "grad_norm": 1.7874342491112851, "learning_rate": 1.9978517349654684e-05, "loss": 0.7899, "step": 494 }, { "epoch": 0.05, "grad_norm": 1.94124149740623, "learning_rate": 1.9978300979456094e-05, "loss": 0.7637, "step": 495 }, { "epoch": 0.05, "grad_norm": 1.798068985507405, "learning_rate": 1.9978083526268974e-05, "loss": 0.9313, "step": 496 }, { "epoch": 0.05, "grad_norm": 1.7847619085647561, "learning_rate": 1.9977864990116926e-05, "loss": 0.8486, "step": 497 }, { "epoch": 0.05, "grad_norm": 1.6565790972074341, "learning_rate": 1.9977645371023673e-05, "loss": 0.807, "step": 498 }, { "epoch": 0.05, "grad_norm": 1.654235902274819, "learning_rate": 1.9977424669013044e-05, "loss": 0.724, "step": 499 }, { "epoch": 0.05, "grad_norm": 1.6431609076499223, "learning_rate": 1.9977202884109e-05, "loss": 0.6685, "step": 500 }, { "epoch": 0.05, "grad_norm": 1.643346718045025, "learning_rate": 1.9976980016335607e-05, "loss": 0.7285, "step": 501 }, { "epoch": 0.05, "grad_norm": 1.833324399036458, "learning_rate": 1.9976756065717056e-05, "loss": 0.8175, "step": 502 }, { "epoch": 0.05, "grad_norm": 1.6383228878544507, "learning_rate": 1.9976531032277653e-05, "loss": 0.736, "step": 503 }, { "epoch": 0.05, "grad_norm": 1.9735369455703395, "learning_rate": 1.9976304916041824e-05, "loss": 0.6961, "step": 504 }, { "epoch": 0.05, "grad_norm": 1.7905953530647531, "learning_rate": 1.9976077717034105e-05, "loss": 0.8517, "step": 505 }, { "epoch": 0.05, "grad_norm": 1.588535210734372, "learning_rate": 1.997584943527916e-05, "loss": 0.804, "step": 506 }, { "epoch": 0.05, "grad_norm": 1.627547386451281, "learning_rate": 1.9975620070801763e-05, "loss": 0.7761, "step": 507 }, { "epoch": 0.05, "grad_norm": 1.7265931965712278, "learning_rate": 1.997538962362681e-05, "loss": 0.7825, "step": 508 }, { "epoch": 0.05, "grad_norm": 1.847361586714884, "learning_rate": 1.997515809377931e-05, "loss": 0.7746, "step": 509 }, { "epoch": 0.05, "grad_norm": 1.759148489609264, "learning_rate": 1.997492548128439e-05, "loss": 0.8152, "step": 510 }, { "epoch": 0.05, "grad_norm": 1.822969884000541, "learning_rate": 1.9974691786167303e-05, "loss": 0.7323, "step": 511 }, { "epoch": 0.05, "grad_norm": 1.9328903420977737, "learning_rate": 1.9974457008453408e-05, "loss": 0.791, "step": 512 }, { "epoch": 0.05, "grad_norm": 1.687975853272304, "learning_rate": 1.997422114816819e-05, "loss": 0.7652, "step": 513 }, { "epoch": 0.05, "grad_norm": 1.8072781375412006, "learning_rate": 1.9973984205337244e-05, "loss": 0.7159, "step": 514 }, { "epoch": 0.05, "grad_norm": 1.7775120206705086, "learning_rate": 1.997374617998629e-05, "loss": 0.8137, "step": 515 }, { "epoch": 0.05, "grad_norm": 1.6592282302457346, "learning_rate": 1.997350707214116e-05, "loss": 0.7775, "step": 516 }, { "epoch": 0.05, "grad_norm": 1.6719635951730791, "learning_rate": 1.99732668818278e-05, "loss": 0.827, "step": 517 }, { "epoch": 0.05, "grad_norm": 1.65300911080359, "learning_rate": 1.997302560907229e-05, "loss": 0.7261, "step": 518 }, { "epoch": 0.05, "grad_norm": 1.76543409775746, "learning_rate": 1.997278325390081e-05, "loss": 0.8259, "step": 519 }, { "epoch": 0.05, "grad_norm": 1.6797023411856506, "learning_rate": 1.9972539816339664e-05, "loss": 0.6834, "step": 520 }, { "epoch": 0.05, "grad_norm": 1.816213182694269, "learning_rate": 1.997229529641527e-05, "loss": 0.8897, "step": 521 }, { "epoch": 0.05, "grad_norm": 1.583562441314376, "learning_rate": 1.9972049694154175e-05, "loss": 0.7364, "step": 522 }, { "epoch": 0.05, "grad_norm": 1.5629931433036204, "learning_rate": 1.997180300958303e-05, "loss": 0.7036, "step": 523 }, { "epoch": 0.05, "grad_norm": 1.7244295918749826, "learning_rate": 1.997155524272861e-05, "loss": 0.8148, "step": 524 }, { "epoch": 0.05, "grad_norm": 1.9409193617437879, "learning_rate": 1.997130639361781e-05, "loss": 0.8473, "step": 525 }, { "epoch": 0.05, "grad_norm": 1.7157934465546114, "learning_rate": 1.9971056462277632e-05, "loss": 0.7863, "step": 526 }, { "epoch": 0.05, "grad_norm": 1.646802444838358, "learning_rate": 1.9970805448735205e-05, "loss": 0.7295, "step": 527 }, { "epoch": 0.05, "grad_norm": 1.625090042380927, "learning_rate": 1.9970553353017772e-05, "loss": 0.7481, "step": 528 }, { "epoch": 0.05, "grad_norm": 1.7295431401631154, "learning_rate": 1.9970300175152696e-05, "loss": 0.8117, "step": 529 }, { "epoch": 0.05, "grad_norm": 1.6140197901048001, "learning_rate": 1.997004591516745e-05, "loss": 0.7754, "step": 530 }, { "epoch": 0.05, "grad_norm": 1.6560281350322912, "learning_rate": 1.9969790573089637e-05, "loss": 0.6868, "step": 531 }, { "epoch": 0.05, "grad_norm": 1.7529556342448862, "learning_rate": 1.9969534148946965e-05, "loss": 0.6683, "step": 532 }, { "epoch": 0.05, "grad_norm": 1.7068734128772407, "learning_rate": 1.996927664276727e-05, "loss": 0.6886, "step": 533 }, { "epoch": 0.05, "grad_norm": 1.636282086916384, "learning_rate": 1.9969018054578497e-05, "loss": 0.707, "step": 534 }, { "epoch": 0.05, "grad_norm": 1.703668964082737, "learning_rate": 1.9968758384408715e-05, "loss": 0.8091, "step": 535 }, { "epoch": 0.05, "grad_norm": 1.6294150121655904, "learning_rate": 1.99684976322861e-05, "loss": 0.6922, "step": 536 }, { "epoch": 0.05, "grad_norm": 1.7678562963178264, "learning_rate": 1.9968235798238956e-05, "loss": 0.8531, "step": 537 }, { "epoch": 0.05, "grad_norm": 1.7785400771636022, "learning_rate": 1.9967972882295704e-05, "loss": 0.759, "step": 538 }, { "epoch": 0.05, "grad_norm": 2.028098506449123, "learning_rate": 1.9967708884484875e-05, "loss": 0.9118, "step": 539 }, { "epoch": 0.05, "grad_norm": 1.5347148064228826, "learning_rate": 1.996744380483513e-05, "loss": 0.6544, "step": 540 }, { "epoch": 0.06, "grad_norm": 1.820079444384609, "learning_rate": 1.9967177643375227e-05, "loss": 0.8159, "step": 541 }, { "epoch": 0.06, "grad_norm": 1.6671394749171597, "learning_rate": 1.9966910400134062e-05, "loss": 0.7221, "step": 542 }, { "epoch": 0.06, "grad_norm": 1.6788173634731034, "learning_rate": 1.996664207514064e-05, "loss": 0.7436, "step": 543 }, { "epoch": 0.06, "grad_norm": 1.5098704414985629, "learning_rate": 1.9966372668424083e-05, "loss": 0.6714, "step": 544 }, { "epoch": 0.06, "grad_norm": 1.5418486733686516, "learning_rate": 1.9966102180013625e-05, "loss": 0.758, "step": 545 }, { "epoch": 0.06, "grad_norm": 1.7685637436623223, "learning_rate": 1.9965830609938627e-05, "loss": 0.7747, "step": 546 }, { "epoch": 0.06, "grad_norm": 1.7274011670181888, "learning_rate": 1.996555795822857e-05, "loss": 0.8325, "step": 547 }, { "epoch": 0.06, "grad_norm": 1.6679061402459576, "learning_rate": 1.9965284224913034e-05, "loss": 0.8263, "step": 548 }, { "epoch": 0.06, "grad_norm": 1.573807291130018, "learning_rate": 1.9965009410021742e-05, "loss": 0.713, "step": 549 }, { "epoch": 0.06, "grad_norm": 1.8231463328916808, "learning_rate": 1.9964733513584508e-05, "loss": 0.7005, "step": 550 }, { "epoch": 0.06, "grad_norm": 1.6741907583067228, "learning_rate": 1.9964456535631287e-05, "loss": 0.7334, "step": 551 }, { "epoch": 0.06, "grad_norm": 1.79918584488658, "learning_rate": 1.9964178476192132e-05, "loss": 0.7676, "step": 552 }, { "epoch": 0.06, "grad_norm": 1.811502444265952, "learning_rate": 1.9963899335297227e-05, "loss": 0.7321, "step": 553 }, { "epoch": 0.06, "grad_norm": 1.7755800902867478, "learning_rate": 1.9963619112976867e-05, "loss": 0.7925, "step": 554 }, { "epoch": 0.06, "grad_norm": 1.5597918230704464, "learning_rate": 1.9963337809261465e-05, "loss": 0.7272, "step": 555 }, { "epoch": 0.06, "grad_norm": 1.864093123130176, "learning_rate": 1.9963055424181556e-05, "loss": 0.7669, "step": 556 }, { "epoch": 0.06, "grad_norm": 2.389358668807301, "learning_rate": 1.9962771957767782e-05, "loss": 0.7292, "step": 557 }, { "epoch": 0.06, "grad_norm": 1.74596830181998, "learning_rate": 1.9962487410050915e-05, "loss": 0.7526, "step": 558 }, { "epoch": 0.06, "grad_norm": 1.8385868687010367, "learning_rate": 1.9962201781061833e-05, "loss": 0.8057, "step": 559 }, { "epoch": 0.06, "grad_norm": 1.8668615928182508, "learning_rate": 1.996191507083154e-05, "loss": 0.792, "step": 560 }, { "epoch": 0.06, "grad_norm": 1.7652860661908676, "learning_rate": 1.9961627279391154e-05, "loss": 0.7552, "step": 561 }, { "epoch": 0.06, "grad_norm": 1.739278918825244, "learning_rate": 1.9961338406771908e-05, "loss": 0.8162, "step": 562 }, { "epoch": 0.06, "grad_norm": 1.6244770953157561, "learning_rate": 1.996104845300516e-05, "loss": 0.7304, "step": 563 }, { "epoch": 0.06, "grad_norm": 1.7582338674779954, "learning_rate": 1.996075741812237e-05, "loss": 0.7494, "step": 564 }, { "epoch": 0.06, "grad_norm": 1.7029228717071425, "learning_rate": 1.9960465302155132e-05, "loss": 0.7869, "step": 565 }, { "epoch": 0.06, "grad_norm": 1.6435160351803817, "learning_rate": 1.996017210513515e-05, "loss": 0.7812, "step": 566 }, { "epoch": 0.06, "grad_norm": 1.6801997517535343, "learning_rate": 1.9959877827094248e-05, "loss": 0.765, "step": 567 }, { "epoch": 0.06, "grad_norm": 1.835359427744583, "learning_rate": 1.995958246806436e-05, "loss": 0.8702, "step": 568 }, { "epoch": 0.06, "grad_norm": 1.7548856870112344, "learning_rate": 1.995928602807755e-05, "loss": 0.891, "step": 569 }, { "epoch": 0.06, "grad_norm": 1.6249171481144946, "learning_rate": 1.9958988507165985e-05, "loss": 0.7405, "step": 570 }, { "epoch": 0.06, "grad_norm": 1.8747074662765073, "learning_rate": 1.9958689905361956e-05, "loss": 0.8896, "step": 571 }, { "epoch": 0.06, "grad_norm": 1.685467232191064, "learning_rate": 1.9958390222697875e-05, "loss": 0.8109, "step": 572 }, { "epoch": 0.06, "grad_norm": 1.5586476170652768, "learning_rate": 1.9958089459206272e-05, "loss": 0.8215, "step": 573 }, { "epoch": 0.06, "grad_norm": 1.9110312441730324, "learning_rate": 1.9957787614919782e-05, "loss": 0.683, "step": 574 }, { "epoch": 0.06, "grad_norm": 1.795968523861863, "learning_rate": 1.9957484689871167e-05, "loss": 0.7298, "step": 575 }, { "epoch": 0.06, "grad_norm": 1.619585532371068, "learning_rate": 1.995718068409331e-05, "loss": 0.8326, "step": 576 }, { "epoch": 0.06, "grad_norm": 1.686148657322329, "learning_rate": 1.99568755976192e-05, "loss": 0.7904, "step": 577 }, { "epoch": 0.06, "grad_norm": 1.8980209427585717, "learning_rate": 1.9956569430481954e-05, "loss": 0.8522, "step": 578 }, { "epoch": 0.06, "grad_norm": 1.8223249537789892, "learning_rate": 1.99562621827148e-05, "loss": 0.7409, "step": 579 }, { "epoch": 0.06, "grad_norm": 1.6249663857855754, "learning_rate": 1.9955953854351083e-05, "loss": 0.7908, "step": 580 }, { "epoch": 0.06, "grad_norm": 1.6680154013641537, "learning_rate": 1.995564444542427e-05, "loss": 0.8155, "step": 581 }, { "epoch": 0.06, "grad_norm": 1.7437358002377406, "learning_rate": 1.9955333955967936e-05, "loss": 0.7572, "step": 582 }, { "epoch": 0.06, "grad_norm": 1.7442385392988993, "learning_rate": 1.9955022386015792e-05, "loss": 0.7381, "step": 583 }, { "epoch": 0.06, "grad_norm": 1.6455585855805404, "learning_rate": 1.995470973560164e-05, "loss": 0.7625, "step": 584 }, { "epoch": 0.06, "grad_norm": 1.5922361876502813, "learning_rate": 1.995439600475943e-05, "loss": 0.7293, "step": 585 }, { "epoch": 0.06, "grad_norm": 1.5827693035694907, "learning_rate": 1.9954081193523197e-05, "loss": 0.8177, "step": 586 }, { "epoch": 0.06, "grad_norm": 1.7369402384800399, "learning_rate": 1.9953765301927116e-05, "loss": 0.761, "step": 587 }, { "epoch": 0.06, "grad_norm": 1.7496504255649807, "learning_rate": 1.9953448330005472e-05, "loss": 0.7633, "step": 588 }, { "epoch": 0.06, "grad_norm": 1.6992475664068267, "learning_rate": 1.9953130277792664e-05, "loss": 0.7109, "step": 589 }, { "epoch": 0.06, "grad_norm": 1.7758089119133673, "learning_rate": 1.9952811145323213e-05, "loss": 0.8003, "step": 590 }, { "epoch": 0.06, "grad_norm": 1.5105006477745304, "learning_rate": 1.995249093263176e-05, "loss": 0.6333, "step": 591 }, { "epoch": 0.06, "grad_norm": 1.5506652898849551, "learning_rate": 1.9952169639753055e-05, "loss": 0.7387, "step": 592 }, { "epoch": 0.06, "grad_norm": 1.6406097273031046, "learning_rate": 1.995184726672197e-05, "loss": 0.754, "step": 593 }, { "epoch": 0.06, "grad_norm": 1.6937146704480206, "learning_rate": 1.9951523813573496e-05, "loss": 0.6996, "step": 594 }, { "epoch": 0.06, "grad_norm": 1.6863369020459251, "learning_rate": 1.9951199280342732e-05, "loss": 0.7959, "step": 595 }, { "epoch": 0.06, "grad_norm": 1.601016536813647, "learning_rate": 1.9950873667064906e-05, "loss": 0.7021, "step": 596 }, { "epoch": 0.06, "grad_norm": 1.7116189222241849, "learning_rate": 1.995054697377536e-05, "loss": 0.7783, "step": 597 }, { "epoch": 0.06, "grad_norm": 1.7443096421429085, "learning_rate": 1.9950219200509547e-05, "loss": 0.6914, "step": 598 }, { "epoch": 0.06, "grad_norm": 1.8021785056836013, "learning_rate": 1.9949890347303047e-05, "loss": 0.7945, "step": 599 }, { "epoch": 0.06, "grad_norm": 1.6555910571792856, "learning_rate": 1.9949560414191546e-05, "loss": 0.7046, "step": 600 }, { "epoch": 0.06, "grad_norm": 1.7326941535085139, "learning_rate": 1.9949229401210855e-05, "loss": 0.8461, "step": 601 }, { "epoch": 0.06, "grad_norm": 1.6370922161012353, "learning_rate": 1.9948897308396903e-05, "loss": 0.6904, "step": 602 }, { "epoch": 0.06, "grad_norm": 1.6119600425769653, "learning_rate": 1.9948564135785733e-05, "loss": 0.8214, "step": 603 }, { "epoch": 0.06, "grad_norm": 1.7636336681905245, "learning_rate": 1.9948229883413503e-05, "loss": 0.8049, "step": 604 }, { "epoch": 0.06, "grad_norm": 1.766219115503329, "learning_rate": 1.994789455131649e-05, "loss": 0.6965, "step": 605 }, { "epoch": 0.06, "grad_norm": 1.7177562818571706, "learning_rate": 1.994755813953109e-05, "loss": 0.8001, "step": 606 }, { "epoch": 0.06, "grad_norm": 1.720211764446672, "learning_rate": 1.994722064809382e-05, "loss": 0.8151, "step": 607 }, { "epoch": 0.06, "grad_norm": 1.7469479452575274, "learning_rate": 1.9946882077041304e-05, "loss": 0.7806, "step": 608 }, { "epoch": 0.06, "grad_norm": 1.85793386115385, "learning_rate": 1.9946542426410295e-05, "loss": 0.7966, "step": 609 }, { "epoch": 0.06, "grad_norm": 1.7629818781691207, "learning_rate": 1.9946201696237645e-05, "loss": 0.7873, "step": 610 }, { "epoch": 0.06, "grad_norm": 1.6830394408562603, "learning_rate": 1.9945859886560346e-05, "loss": 0.7506, "step": 611 }, { "epoch": 0.06, "grad_norm": 1.7860842728889232, "learning_rate": 1.9945516997415493e-05, "loss": 0.7918, "step": 612 }, { "epoch": 0.06, "grad_norm": 1.6513821786445477, "learning_rate": 1.9945173028840296e-05, "loss": 0.8549, "step": 613 }, { "epoch": 0.06, "grad_norm": 1.9218388455242161, "learning_rate": 1.9944827980872094e-05, "loss": 0.8566, "step": 614 }, { "epoch": 0.06, "grad_norm": 1.7239023872462365, "learning_rate": 1.9944481853548335e-05, "loss": 0.7107, "step": 615 }, { "epoch": 0.06, "grad_norm": 1.7916304919841761, "learning_rate": 1.9944134646906588e-05, "loss": 0.8648, "step": 616 }, { "epoch": 0.06, "grad_norm": 1.4579672832201422, "learning_rate": 1.994378636098453e-05, "loss": 0.7886, "step": 617 }, { "epoch": 0.06, "grad_norm": 1.5409736030803547, "learning_rate": 1.9943436995819968e-05, "loss": 0.6151, "step": 618 }, { "epoch": 0.06, "grad_norm": 1.7127356435439465, "learning_rate": 1.9943086551450816e-05, "loss": 0.7307, "step": 619 }, { "epoch": 0.06, "grad_norm": 1.5792128695492997, "learning_rate": 1.9942735027915113e-05, "loss": 0.7638, "step": 620 }, { "epoch": 0.06, "grad_norm": 1.7555416285049137, "learning_rate": 1.994238242525101e-05, "loss": 0.8322, "step": 621 }, { "epoch": 0.06, "grad_norm": 1.805621761550607, "learning_rate": 1.9942028743496773e-05, "loss": 0.744, "step": 622 }, { "epoch": 0.06, "grad_norm": 1.8466682699690558, "learning_rate": 1.9941673982690795e-05, "loss": 0.8617, "step": 623 }, { "epoch": 0.06, "grad_norm": 1.6590526970977735, "learning_rate": 1.9941318142871576e-05, "loss": 0.6979, "step": 624 }, { "epoch": 0.06, "grad_norm": 1.7786400594149645, "learning_rate": 1.9940961224077736e-05, "loss": 0.7248, "step": 625 }, { "epoch": 0.06, "grad_norm": 1.6788965960428635, "learning_rate": 1.9940603226348015e-05, "loss": 0.7699, "step": 626 }, { "epoch": 0.06, "grad_norm": 1.9265619143980974, "learning_rate": 1.994024414972127e-05, "loss": 0.8173, "step": 627 }, { "epoch": 0.06, "grad_norm": 1.715611931609657, "learning_rate": 1.9939883994236468e-05, "loss": 0.6624, "step": 628 }, { "epoch": 0.06, "grad_norm": 1.5987849831467449, "learning_rate": 1.99395227599327e-05, "loss": 0.8136, "step": 629 }, { "epoch": 0.06, "grad_norm": 1.6986730658985771, "learning_rate": 1.9939160446849176e-05, "loss": 0.7376, "step": 630 }, { "epoch": 0.06, "grad_norm": 1.6545697033122497, "learning_rate": 1.9938797055025213e-05, "loss": 0.7149, "step": 631 }, { "epoch": 0.06, "grad_norm": 1.994617700256768, "learning_rate": 1.993843258450026e-05, "loss": 0.8161, "step": 632 }, { "epoch": 0.06, "grad_norm": 1.8732615461651878, "learning_rate": 1.9938067035313865e-05, "loss": 0.7736, "step": 633 }, { "epoch": 0.06, "grad_norm": 1.6800973995399204, "learning_rate": 1.993770040750571e-05, "loss": 0.8434, "step": 634 }, { "epoch": 0.06, "grad_norm": 1.6925647661553942, "learning_rate": 1.9937332701115585e-05, "loss": 0.788, "step": 635 }, { "epoch": 0.06, "grad_norm": 1.7217431415270548, "learning_rate": 1.993696391618339e-05, "loss": 0.6426, "step": 636 }, { "epoch": 0.06, "grad_norm": 1.6743741825967342, "learning_rate": 1.993659405274917e-05, "loss": 0.7904, "step": 637 }, { "epoch": 0.06, "grad_norm": 1.7108692982520572, "learning_rate": 1.993622311085305e-05, "loss": 0.6979, "step": 638 }, { "epoch": 0.06, "grad_norm": 1.6256885715656448, "learning_rate": 1.9935851090535295e-05, "loss": 0.7019, "step": 639 }, { "epoch": 0.07, "grad_norm": 1.6229470491886617, "learning_rate": 1.993547799183629e-05, "loss": 0.7252, "step": 640 }, { "epoch": 0.07, "grad_norm": 1.5160911920409792, "learning_rate": 1.9935103814796516e-05, "loss": 0.7873, "step": 641 }, { "epoch": 0.07, "grad_norm": 1.5804423279608668, "learning_rate": 1.9934728559456592e-05, "loss": 0.8012, "step": 642 }, { "epoch": 0.07, "grad_norm": 1.4519598120061032, "learning_rate": 1.9934352225857245e-05, "loss": 0.7251, "step": 643 }, { "epoch": 0.07, "grad_norm": 1.7263353286073957, "learning_rate": 1.993397481403932e-05, "loss": 0.8427, "step": 644 }, { "epoch": 0.07, "grad_norm": 1.637774815064037, "learning_rate": 1.993359632404378e-05, "loss": 0.7888, "step": 645 }, { "epoch": 0.07, "grad_norm": 1.7467785786117918, "learning_rate": 1.9933216755911706e-05, "loss": 0.7857, "step": 646 }, { "epoch": 0.07, "grad_norm": 1.5876144203148541, "learning_rate": 1.9932836109684287e-05, "loss": 0.7362, "step": 647 }, { "epoch": 0.07, "grad_norm": 1.697451802836231, "learning_rate": 1.993245438540284e-05, "loss": 0.7894, "step": 648 }, { "epoch": 0.07, "grad_norm": 1.7166828340233518, "learning_rate": 1.9932071583108796e-05, "loss": 0.7418, "step": 649 }, { "epoch": 0.07, "grad_norm": 1.4878913435621886, "learning_rate": 1.9931687702843706e-05, "loss": 0.7117, "step": 650 }, { "epoch": 0.07, "grad_norm": 1.9177940612995616, "learning_rate": 1.9931302744649224e-05, "loss": 0.8025, "step": 651 }, { "epoch": 0.07, "grad_norm": 1.4139266215501305, "learning_rate": 1.993091670856714e-05, "loss": 0.7023, "step": 652 }, { "epoch": 0.07, "grad_norm": 1.6439224416147458, "learning_rate": 1.993052959463935e-05, "loss": 0.7698, "step": 653 }, { "epoch": 0.07, "grad_norm": 1.5886834224506095, "learning_rate": 1.9930141402907867e-05, "loss": 0.7759, "step": 654 }, { "epoch": 0.07, "grad_norm": 1.5458636504703784, "learning_rate": 1.9929752133414827e-05, "loss": 0.7419, "step": 655 }, { "epoch": 0.07, "grad_norm": 1.5464792132429968, "learning_rate": 1.9929361786202476e-05, "loss": 0.6632, "step": 656 }, { "epoch": 0.07, "grad_norm": 1.525133701113065, "learning_rate": 1.9928970361313182e-05, "loss": 0.8723, "step": 657 }, { "epoch": 0.07, "grad_norm": 1.6124831659371417, "learning_rate": 1.9928577858789424e-05, "loss": 0.806, "step": 658 }, { "epoch": 0.07, "grad_norm": 1.4701753543539475, "learning_rate": 1.992818427867381e-05, "loss": 0.6918, "step": 659 }, { "epoch": 0.07, "grad_norm": 1.6849551194071541, "learning_rate": 1.9927789621009045e-05, "loss": 0.8426, "step": 660 }, { "epoch": 0.07, "grad_norm": 1.8884047795845, "learning_rate": 1.9927393885837975e-05, "loss": 0.695, "step": 661 }, { "epoch": 0.07, "grad_norm": 1.8116947046420835, "learning_rate": 1.9926997073203544e-05, "loss": 0.8473, "step": 662 }, { "epoch": 0.07, "grad_norm": 1.8689748277748406, "learning_rate": 1.9926599183148822e-05, "loss": 0.8169, "step": 663 }, { "epoch": 0.07, "grad_norm": 1.994878462441299, "learning_rate": 1.9926200215716993e-05, "loss": 0.8405, "step": 664 }, { "epoch": 0.07, "grad_norm": 1.7987595227302102, "learning_rate": 1.992580017095136e-05, "loss": 0.7267, "step": 665 }, { "epoch": 0.07, "grad_norm": 1.5843737449052286, "learning_rate": 1.992539904889534e-05, "loss": 0.8127, "step": 666 }, { "epoch": 0.07, "grad_norm": 1.8387810874585708, "learning_rate": 1.992499684959247e-05, "loss": 0.7091, "step": 667 }, { "epoch": 0.07, "grad_norm": 1.7093730466761763, "learning_rate": 1.9924593573086398e-05, "loss": 0.7642, "step": 668 }, { "epoch": 0.07, "grad_norm": 1.7669829291305252, "learning_rate": 1.99241892194209e-05, "loss": 0.7177, "step": 669 }, { "epoch": 0.07, "grad_norm": 1.7859363978876226, "learning_rate": 1.9923783788639862e-05, "loss": 0.7864, "step": 670 }, { "epoch": 0.07, "grad_norm": 1.5913637995967829, "learning_rate": 1.992337728078728e-05, "loss": 0.7669, "step": 671 }, { "epoch": 0.07, "grad_norm": 1.9006919289403583, "learning_rate": 1.9922969695907278e-05, "loss": 0.8227, "step": 672 }, { "epoch": 0.07, "grad_norm": 1.5707279920402772, "learning_rate": 1.9922561034044095e-05, "loss": 0.7132, "step": 673 }, { "epoch": 0.07, "grad_norm": 2.0511635474237364, "learning_rate": 1.9922151295242085e-05, "loss": 0.777, "step": 674 }, { "epoch": 0.07, "grad_norm": 1.378420795935105, "learning_rate": 1.9921740479545716e-05, "loss": 0.6924, "step": 675 }, { "epoch": 0.07, "grad_norm": 1.3751596448402028, "learning_rate": 1.9921328586999574e-05, "loss": 0.7471, "step": 676 }, { "epoch": 0.07, "grad_norm": 1.6308793722944452, "learning_rate": 1.992091561764837e-05, "loss": 0.7539, "step": 677 }, { "epoch": 0.07, "grad_norm": 1.6200136862142986, "learning_rate": 1.9920501571536917e-05, "loss": 0.71, "step": 678 }, { "epoch": 0.07, "grad_norm": 1.6981092405574794, "learning_rate": 1.9920086448710162e-05, "loss": 0.7512, "step": 679 }, { "epoch": 0.07, "grad_norm": 1.731471177175868, "learning_rate": 1.9919670249213152e-05, "loss": 0.8579, "step": 680 }, { "epoch": 0.07, "grad_norm": 1.6461346547255145, "learning_rate": 1.9919252973091067e-05, "loss": 0.7589, "step": 681 }, { "epoch": 0.07, "grad_norm": 1.7313768240322136, "learning_rate": 1.991883462038919e-05, "loss": 0.8429, "step": 682 }, { "epoch": 0.07, "grad_norm": 1.6033143635975582, "learning_rate": 1.9918415191152927e-05, "loss": 0.7439, "step": 683 }, { "epoch": 0.07, "grad_norm": 1.6283606197499367, "learning_rate": 1.99179946854278e-05, "loss": 0.7209, "step": 684 }, { "epoch": 0.07, "grad_norm": 1.6895382664512686, "learning_rate": 1.9917573103259452e-05, "loss": 0.8396, "step": 685 }, { "epoch": 0.07, "grad_norm": 1.5021274979284158, "learning_rate": 1.9917150444693635e-05, "loss": 0.809, "step": 686 }, { "epoch": 0.07, "grad_norm": 1.5898326262688758, "learning_rate": 1.9916726709776228e-05, "loss": 0.7391, "step": 687 }, { "epoch": 0.07, "grad_norm": 1.7143323205939232, "learning_rate": 1.9916301898553215e-05, "loss": 0.8389, "step": 688 }, { "epoch": 0.07, "grad_norm": 1.6601964607064166, "learning_rate": 1.9915876011070705e-05, "loss": 0.7384, "step": 689 }, { "epoch": 0.07, "grad_norm": 1.6997295899037483, "learning_rate": 1.991544904737492e-05, "loss": 0.6985, "step": 690 }, { "epoch": 0.07, "grad_norm": 1.6338386462513637, "learning_rate": 1.9915021007512202e-05, "loss": 0.8418, "step": 691 }, { "epoch": 0.07, "grad_norm": 1.6619689013467975, "learning_rate": 1.991459189152901e-05, "loss": 0.828, "step": 692 }, { "epoch": 0.07, "grad_norm": 1.5415233753313728, "learning_rate": 1.991416169947191e-05, "loss": 0.7842, "step": 693 }, { "epoch": 0.07, "grad_norm": 1.7594928368716753, "learning_rate": 1.9913730431387603e-05, "loss": 0.7791, "step": 694 }, { "epoch": 0.07, "grad_norm": 1.8285621513513672, "learning_rate": 1.9913298087322886e-05, "loss": 0.8216, "step": 695 }, { "epoch": 0.07, "grad_norm": 1.6189878588401247, "learning_rate": 1.991286466732469e-05, "loss": 0.7305, "step": 696 }, { "epoch": 0.07, "grad_norm": 1.6523217899208054, "learning_rate": 1.9912430171440053e-05, "loss": 0.8201, "step": 697 }, { "epoch": 0.07, "grad_norm": 1.6392344267199934, "learning_rate": 1.9911994599716137e-05, "loss": 0.7456, "step": 698 }, { "epoch": 0.07, "grad_norm": 1.687768478994549, "learning_rate": 1.9911557952200212e-05, "loss": 0.7575, "step": 699 }, { "epoch": 0.07, "grad_norm": 1.7800920776702511, "learning_rate": 1.9911120228939668e-05, "loss": 0.7296, "step": 700 }, { "epoch": 0.07, "grad_norm": 1.7376436911092747, "learning_rate": 1.991068142998202e-05, "loss": 0.8336, "step": 701 }, { "epoch": 0.07, "grad_norm": 1.6366755441602803, "learning_rate": 1.991024155537489e-05, "loss": 0.6899, "step": 702 }, { "epoch": 0.07, "grad_norm": 1.6311542633007308, "learning_rate": 1.9909800605166013e-05, "loss": 0.7963, "step": 703 }, { "epoch": 0.07, "grad_norm": 1.6271019133596079, "learning_rate": 1.990935857940325e-05, "loss": 0.7225, "step": 704 }, { "epoch": 0.07, "grad_norm": 1.730023263842185, "learning_rate": 1.9908915478134584e-05, "loss": 0.8107, "step": 705 }, { "epoch": 0.07, "grad_norm": 1.7223462659250526, "learning_rate": 1.9908471301408097e-05, "loss": 0.8464, "step": 706 }, { "epoch": 0.07, "grad_norm": 1.766101862175367, "learning_rate": 1.9908026049272006e-05, "loss": 0.7489, "step": 707 }, { "epoch": 0.07, "grad_norm": 1.5577791461925623, "learning_rate": 1.990757972177463e-05, "loss": 0.7676, "step": 708 }, { "epoch": 0.07, "grad_norm": 1.7987244153202877, "learning_rate": 1.990713231896441e-05, "loss": 0.7746, "step": 709 }, { "epoch": 0.07, "grad_norm": 1.7553912474460873, "learning_rate": 1.990668384088991e-05, "loss": 0.8975, "step": 710 }, { "epoch": 0.07, "grad_norm": 1.7169730984255511, "learning_rate": 1.99062342875998e-05, "loss": 0.7652, "step": 711 }, { "epoch": 0.07, "grad_norm": 1.826842707928925, "learning_rate": 1.9905783659142878e-05, "loss": 0.7199, "step": 712 }, { "epoch": 0.07, "grad_norm": 1.7082093614480132, "learning_rate": 1.9905331955568045e-05, "loss": 0.694, "step": 713 }, { "epoch": 0.07, "grad_norm": 1.5829316805497582, "learning_rate": 1.990487917692433e-05, "loss": 0.7717, "step": 714 }, { "epoch": 0.07, "grad_norm": 1.6451440728127658, "learning_rate": 1.9904425323260875e-05, "loss": 0.7858, "step": 715 }, { "epoch": 0.07, "grad_norm": 1.652820322856562, "learning_rate": 1.990397039462694e-05, "loss": 0.7959, "step": 716 }, { "epoch": 0.07, "grad_norm": 1.7584740049172933, "learning_rate": 1.99035143910719e-05, "loss": 0.7561, "step": 717 }, { "epoch": 0.07, "grad_norm": 1.7493478475508397, "learning_rate": 1.990305731264525e-05, "loss": 0.7176, "step": 718 }, { "epoch": 0.07, "grad_norm": 1.618316837166389, "learning_rate": 1.990259915939659e-05, "loss": 0.7485, "step": 719 }, { "epoch": 0.07, "grad_norm": 1.6385372900613149, "learning_rate": 1.9902139931375654e-05, "loss": 0.8057, "step": 720 }, { "epoch": 0.07, "grad_norm": 1.5803614398859005, "learning_rate": 1.9901679628632277e-05, "loss": 0.6364, "step": 721 }, { "epoch": 0.07, "grad_norm": 1.620284724869963, "learning_rate": 1.9901218251216424e-05, "loss": 0.7402, "step": 722 }, { "epoch": 0.07, "grad_norm": 1.762040099161046, "learning_rate": 1.9900755799178165e-05, "loss": 0.7896, "step": 723 }, { "epoch": 0.07, "grad_norm": 1.6331588276201974, "learning_rate": 1.99002922725677e-05, "loss": 0.7898, "step": 724 }, { "epoch": 0.07, "grad_norm": 1.650574167609646, "learning_rate": 1.989982767143533e-05, "loss": 0.7415, "step": 725 }, { "epoch": 0.07, "grad_norm": 1.8140348040677017, "learning_rate": 1.9899361995831477e-05, "loss": 0.9059, "step": 726 }, { "epoch": 0.07, "grad_norm": 1.5401630519652945, "learning_rate": 1.989889524580669e-05, "loss": 0.6807, "step": 727 }, { "epoch": 0.07, "grad_norm": 1.6906440729063186, "learning_rate": 1.9898427421411627e-05, "loss": 0.6651, "step": 728 }, { "epoch": 0.07, "grad_norm": 1.7156359455773944, "learning_rate": 1.9897958522697066e-05, "loss": 0.7354, "step": 729 }, { "epoch": 0.07, "grad_norm": 1.6778297011427266, "learning_rate": 1.9897488549713887e-05, "loss": 0.853, "step": 730 }, { "epoch": 0.07, "grad_norm": 1.5822425468702135, "learning_rate": 1.9897017502513107e-05, "loss": 0.8437, "step": 731 }, { "epoch": 0.07, "grad_norm": 1.5697794417278896, "learning_rate": 1.9896545381145854e-05, "loss": 0.6893, "step": 732 }, { "epoch": 0.07, "grad_norm": 1.6609595195224915, "learning_rate": 1.9896072185663358e-05, "loss": 0.8496, "step": 733 }, { "epoch": 0.07, "grad_norm": 1.7210583504941446, "learning_rate": 1.989559791611699e-05, "loss": 0.7671, "step": 734 }, { "epoch": 0.07, "grad_norm": 1.6351024582932636, "learning_rate": 1.989512257255821e-05, "loss": 0.7643, "step": 735 }, { "epoch": 0.07, "grad_norm": 1.5373211139173386, "learning_rate": 1.9894646155038624e-05, "loss": 0.7101, "step": 736 }, { "epoch": 0.07, "grad_norm": 1.6909968283815922, "learning_rate": 1.989416866360993e-05, "loss": 0.7985, "step": 737 }, { "epoch": 0.08, "grad_norm": 1.5172315909593315, "learning_rate": 1.9893690098323955e-05, "loss": 0.6585, "step": 738 }, { "epoch": 0.08, "grad_norm": 1.6726015484769146, "learning_rate": 1.989321045923264e-05, "loss": 0.7843, "step": 739 }, { "epoch": 0.08, "grad_norm": 1.5799583071761338, "learning_rate": 1.989272974638804e-05, "loss": 0.8198, "step": 740 }, { "epoch": 0.08, "grad_norm": 1.6190253159018553, "learning_rate": 1.9892247959842338e-05, "loss": 0.7887, "step": 741 }, { "epoch": 0.08, "grad_norm": 1.6597510473945443, "learning_rate": 1.989176509964781e-05, "loss": 0.8144, "step": 742 }, { "epoch": 0.08, "grad_norm": 1.535999847235126, "learning_rate": 1.9891281165856876e-05, "loss": 0.6152, "step": 743 }, { "epoch": 0.08, "grad_norm": 1.766018263265675, "learning_rate": 1.989079615852205e-05, "loss": 0.7636, "step": 744 }, { "epoch": 0.08, "grad_norm": 1.5858962987014023, "learning_rate": 1.9890310077695976e-05, "loss": 0.7285, "step": 745 }, { "epoch": 0.08, "grad_norm": 1.5638476394295513, "learning_rate": 1.988982292343141e-05, "loss": 0.7238, "step": 746 }, { "epoch": 0.08, "grad_norm": 1.5665371791225642, "learning_rate": 1.9889334695781227e-05, "loss": 0.6451, "step": 747 }, { "epoch": 0.08, "grad_norm": 1.629557149390182, "learning_rate": 1.9888845394798416e-05, "loss": 0.7883, "step": 748 }, { "epoch": 0.08, "grad_norm": 1.7545548160299196, "learning_rate": 1.9888355020536078e-05, "loss": 0.7631, "step": 749 }, { "epoch": 0.08, "grad_norm": 1.9006888455912045, "learning_rate": 1.988786357304744e-05, "loss": 0.7871, "step": 750 }, { "epoch": 0.08, "grad_norm": 1.7168503900092866, "learning_rate": 1.9887371052385843e-05, "loss": 0.8515, "step": 751 }, { "epoch": 0.08, "grad_norm": 1.8927518735252673, "learning_rate": 1.9886877458604737e-05, "loss": 0.8392, "step": 752 }, { "epoch": 0.08, "grad_norm": 1.7011730949471735, "learning_rate": 1.9886382791757695e-05, "loss": 0.7202, "step": 753 }, { "epoch": 0.08, "grad_norm": 1.637575948589387, "learning_rate": 1.9885887051898407e-05, "loss": 0.74, "step": 754 }, { "epoch": 0.08, "grad_norm": 1.8131790572190998, "learning_rate": 1.988539023908068e-05, "loss": 0.7383, "step": 755 }, { "epoch": 0.08, "grad_norm": 1.6681631940285306, "learning_rate": 1.988489235335843e-05, "loss": 0.7128, "step": 756 }, { "epoch": 0.08, "grad_norm": 1.7593396966304202, "learning_rate": 1.98843933947857e-05, "loss": 0.8314, "step": 757 }, { "epoch": 0.08, "grad_norm": 1.7039767523301115, "learning_rate": 1.988389336341664e-05, "loss": 0.7352, "step": 758 }, { "epoch": 0.08, "grad_norm": 1.679183477039409, "learning_rate": 1.988339225930552e-05, "loss": 0.617, "step": 759 }, { "epoch": 0.08, "grad_norm": 1.6711954196811725, "learning_rate": 1.9882890082506733e-05, "loss": 0.7709, "step": 760 }, { "epoch": 0.08, "grad_norm": 1.7192406625162853, "learning_rate": 1.988238683307478e-05, "loss": 0.9232, "step": 761 }, { "epoch": 0.08, "grad_norm": 1.5921764606177538, "learning_rate": 1.9881882511064275e-05, "loss": 0.7469, "step": 762 }, { "epoch": 0.08, "grad_norm": 1.6184206958606004, "learning_rate": 1.9881377116529964e-05, "loss": 0.681, "step": 763 }, { "epoch": 0.08, "grad_norm": 1.6011320408604426, "learning_rate": 1.988087064952669e-05, "loss": 0.7478, "step": 764 }, { "epoch": 0.08, "grad_norm": 1.4149338242204557, "learning_rate": 1.9880363110109427e-05, "loss": 0.7489, "step": 765 }, { "epoch": 0.08, "grad_norm": 1.6112484116223573, "learning_rate": 1.987985449833326e-05, "loss": 0.7996, "step": 766 }, { "epoch": 0.08, "grad_norm": 1.7167345566634746, "learning_rate": 1.987934481425339e-05, "loss": 0.8087, "step": 767 }, { "epoch": 0.08, "grad_norm": 1.5677451466214758, "learning_rate": 1.987883405792514e-05, "loss": 0.7465, "step": 768 }, { "epoch": 0.08, "grad_norm": 1.5804647890281336, "learning_rate": 1.9878322229403938e-05, "loss": 0.626, "step": 769 }, { "epoch": 0.08, "grad_norm": 1.616554155661927, "learning_rate": 1.9877809328745338e-05, "loss": 0.6658, "step": 770 }, { "epoch": 0.08, "grad_norm": 1.5867376321137878, "learning_rate": 1.987729535600501e-05, "loss": 0.7254, "step": 771 }, { "epoch": 0.08, "grad_norm": 1.7469693174480554, "learning_rate": 1.9876780311238727e-05, "loss": 0.7281, "step": 772 }, { "epoch": 0.08, "grad_norm": 1.6938507016832554, "learning_rate": 1.9876264194502403e-05, "loss": 0.7879, "step": 773 }, { "epoch": 0.08, "grad_norm": 1.6085928749683316, "learning_rate": 1.9875747005852048e-05, "loss": 0.8294, "step": 774 }, { "epoch": 0.08, "grad_norm": 1.6144461068818, "learning_rate": 1.987522874534379e-05, "loss": 0.6786, "step": 775 }, { "epoch": 0.08, "grad_norm": 1.6544216920426305, "learning_rate": 1.987470941303389e-05, "loss": 0.7309, "step": 776 }, { "epoch": 0.08, "grad_norm": 1.7767428032177268, "learning_rate": 1.9874189008978702e-05, "loss": 0.7362, "step": 777 }, { "epoch": 0.08, "grad_norm": 1.735644721679841, "learning_rate": 1.9873667533234714e-05, "loss": 0.8189, "step": 778 }, { "epoch": 0.08, "grad_norm": 1.7759896504317103, "learning_rate": 1.987314498585852e-05, "loss": 0.7809, "step": 779 }, { "epoch": 0.08, "grad_norm": 1.6443310875392836, "learning_rate": 1.987262136690684e-05, "loss": 0.7772, "step": 780 }, { "epoch": 0.08, "grad_norm": 1.6914137805454288, "learning_rate": 1.9872096676436498e-05, "loss": 0.6338, "step": 781 }, { "epoch": 0.08, "grad_norm": 1.9227888727294091, "learning_rate": 1.9871570914504447e-05, "loss": 0.8005, "step": 782 }, { "epoch": 0.08, "grad_norm": 1.795882620074113, "learning_rate": 1.9871044081167742e-05, "loss": 0.7707, "step": 783 }, { "epoch": 0.08, "grad_norm": 1.6930358644809467, "learning_rate": 1.9870516176483573e-05, "loss": 0.8259, "step": 784 }, { "epoch": 0.08, "grad_norm": 1.7819340442518423, "learning_rate": 1.9869987200509228e-05, "loss": 0.6661, "step": 785 }, { "epoch": 0.08, "grad_norm": 1.790035538160907, "learning_rate": 1.9869457153302124e-05, "loss": 0.829, "step": 786 }, { "epoch": 0.08, "grad_norm": 1.5202353379476825, "learning_rate": 1.9868926034919787e-05, "loss": 0.8845, "step": 787 }, { "epoch": 0.08, "grad_norm": 1.863990678141754, "learning_rate": 1.986839384541986e-05, "loss": 0.7813, "step": 788 }, { "epoch": 0.08, "grad_norm": 1.5088174748801766, "learning_rate": 1.9867860584860106e-05, "loss": 0.8197, "step": 789 }, { "epoch": 0.08, "grad_norm": 1.569596018469031, "learning_rate": 1.98673262532984e-05, "loss": 0.8397, "step": 790 }, { "epoch": 0.08, "grad_norm": 1.76684943786916, "learning_rate": 1.986679085079274e-05, "loss": 0.8621, "step": 791 }, { "epoch": 0.08, "grad_norm": 1.909957245712091, "learning_rate": 1.986625437740123e-05, "loss": 0.9255, "step": 792 }, { "epoch": 0.08, "grad_norm": 1.51112619591038, "learning_rate": 1.98657168331821e-05, "loss": 0.7438, "step": 793 }, { "epoch": 0.08, "grad_norm": 1.7600103242211305, "learning_rate": 1.986517821819369e-05, "loss": 0.762, "step": 794 }, { "epoch": 0.08, "grad_norm": 1.629549944021964, "learning_rate": 1.986463853249446e-05, "loss": 0.8457, "step": 795 }, { "epoch": 0.08, "grad_norm": 1.6322779733282207, "learning_rate": 1.9864097776142978e-05, "loss": 0.7376, "step": 796 }, { "epoch": 0.08, "grad_norm": 1.6427720947747648, "learning_rate": 1.9863555949197942e-05, "loss": 0.7868, "step": 797 }, { "epoch": 0.08, "grad_norm": 1.5070344335988768, "learning_rate": 1.986301305171816e-05, "loss": 0.7177, "step": 798 }, { "epoch": 0.08, "grad_norm": 1.6517460173464724, "learning_rate": 1.9862469083762546e-05, "loss": 0.83, "step": 799 }, { "epoch": 0.08, "grad_norm": 1.862254713203288, "learning_rate": 1.9861924045390147e-05, "loss": 0.8391, "step": 800 }, { "epoch": 0.08, "grad_norm": 1.840773968509843, "learning_rate": 1.986137793666012e-05, "loss": 0.7189, "step": 801 }, { "epoch": 0.08, "grad_norm": 1.7091886103939837, "learning_rate": 1.9860830757631727e-05, "loss": 0.7196, "step": 802 }, { "epoch": 0.08, "grad_norm": 1.939228414732462, "learning_rate": 1.9860282508364365e-05, "loss": 0.7769, "step": 803 }, { "epoch": 0.08, "grad_norm": 1.7527357282829756, "learning_rate": 1.9859733188917532e-05, "loss": 0.7744, "step": 804 }, { "epoch": 0.08, "grad_norm": 1.8700242525423052, "learning_rate": 1.9859182799350852e-05, "loss": 0.7007, "step": 805 }, { "epoch": 0.08, "grad_norm": 1.595136671416076, "learning_rate": 1.985863133972406e-05, "loss": 0.7394, "step": 806 }, { "epoch": 0.08, "grad_norm": 1.666270275523058, "learning_rate": 1.9858078810097004e-05, "loss": 0.7128, "step": 807 }, { "epoch": 0.08, "grad_norm": 1.6526420386804836, "learning_rate": 1.985752521052966e-05, "loss": 0.793, "step": 808 }, { "epoch": 0.08, "grad_norm": 2.0164674424468023, "learning_rate": 1.9856970541082107e-05, "loss": 0.7952, "step": 809 }, { "epoch": 0.08, "grad_norm": 1.7422692497713812, "learning_rate": 1.985641480181455e-05, "loss": 0.7611, "step": 810 }, { "epoch": 0.08, "grad_norm": 1.8988225567936234, "learning_rate": 1.98558579927873e-05, "loss": 0.8972, "step": 811 }, { "epoch": 0.08, "grad_norm": 1.5986381889416141, "learning_rate": 1.9855300114060795e-05, "loss": 0.7646, "step": 812 }, { "epoch": 0.08, "grad_norm": 1.6483180495163923, "learning_rate": 1.9854741165695583e-05, "loss": 0.7311, "step": 813 }, { "epoch": 0.08, "grad_norm": 1.8063581944455203, "learning_rate": 1.9854181147752326e-05, "loss": 0.9311, "step": 814 }, { "epoch": 0.08, "grad_norm": 1.6504812770449069, "learning_rate": 1.9853620060291812e-05, "loss": 0.767, "step": 815 }, { "epoch": 0.08, "grad_norm": 1.4128816928019157, "learning_rate": 1.985305790337493e-05, "loss": 0.669, "step": 816 }, { "epoch": 0.08, "grad_norm": 1.733825753404519, "learning_rate": 1.98524946770627e-05, "loss": 0.8098, "step": 817 }, { "epoch": 0.08, "grad_norm": 1.6299749868856428, "learning_rate": 1.9851930381416243e-05, "loss": 0.748, "step": 818 }, { "epoch": 0.08, "grad_norm": 1.6517357971409825, "learning_rate": 1.9851365016496812e-05, "loss": 0.8209, "step": 819 }, { "epoch": 0.08, "grad_norm": 1.583252522074743, "learning_rate": 1.985079858236577e-05, "loss": 0.6843, "step": 820 }, { "epoch": 0.08, "grad_norm": 1.6536302219640018, "learning_rate": 1.9850231079084593e-05, "loss": 0.7518, "step": 821 }, { "epoch": 0.08, "grad_norm": 1.7454823923954026, "learning_rate": 1.9849662506714865e-05, "loss": 0.7312, "step": 822 }, { "epoch": 0.08, "grad_norm": 1.914739489812267, "learning_rate": 1.984909286531831e-05, "loss": 0.8605, "step": 823 }, { "epoch": 0.08, "grad_norm": 1.6639931309404836, "learning_rate": 1.9848522154956744e-05, "loss": 0.751, "step": 824 }, { "epoch": 0.08, "grad_norm": 1.5651952261950397, "learning_rate": 1.984795037569211e-05, "loss": 0.7202, "step": 825 }, { "epoch": 0.08, "grad_norm": 1.5688629583006275, "learning_rate": 1.984737752758647e-05, "loss": 0.665, "step": 826 }, { "epoch": 0.08, "grad_norm": 1.4482723619902158, "learning_rate": 1.9846803610701996e-05, "loss": 0.861, "step": 827 }, { "epoch": 0.08, "grad_norm": 1.7229690482399078, "learning_rate": 1.9846228625100977e-05, "loss": 0.756, "step": 828 }, { "epoch": 0.08, "grad_norm": 1.5180781551592952, "learning_rate": 1.9845652570845818e-05, "loss": 0.8151, "step": 829 }, { "epoch": 0.08, "grad_norm": 1.655586251600461, "learning_rate": 1.9845075447999042e-05, "loss": 0.8247, "step": 830 }, { "epoch": 0.08, "grad_norm": 1.6566059901082226, "learning_rate": 1.9844497256623283e-05, "loss": 0.7896, "step": 831 }, { "epoch": 0.08, "grad_norm": 1.6097210602345795, "learning_rate": 1.98439179967813e-05, "loss": 0.7025, "step": 832 }, { "epoch": 0.08, "grad_norm": 1.6245002946759295, "learning_rate": 1.9843337668535958e-05, "loss": 0.7641, "step": 833 }, { "epoch": 0.08, "grad_norm": 1.5594374812846619, "learning_rate": 1.9842756271950247e-05, "loss": 0.6164, "step": 834 }, { "epoch": 0.08, "grad_norm": 1.662029547316825, "learning_rate": 1.9842173807087264e-05, "loss": 0.7241, "step": 835 }, { "epoch": 0.09, "grad_norm": 1.7204550225697404, "learning_rate": 1.9841590274010228e-05, "loss": 0.8256, "step": 836 }, { "epoch": 0.09, "grad_norm": 1.712325268991625, "learning_rate": 1.9841005672782473e-05, "loss": 0.8094, "step": 837 }, { "epoch": 0.09, "grad_norm": 1.6558537995946654, "learning_rate": 1.984042000346745e-05, "loss": 0.8182, "step": 838 }, { "epoch": 0.09, "grad_norm": 1.4574707909678533, "learning_rate": 1.9839833266128726e-05, "loss": 0.7029, "step": 839 }, { "epoch": 0.09, "grad_norm": 1.5151941195461256, "learning_rate": 1.9839245460829974e-05, "loss": 0.8322, "step": 840 }, { "epoch": 0.09, "grad_norm": 1.578641414260665, "learning_rate": 1.9838656587634996e-05, "loss": 0.6918, "step": 841 }, { "epoch": 0.09, "grad_norm": 1.583223765478341, "learning_rate": 1.983806664660771e-05, "loss": 0.6128, "step": 842 }, { "epoch": 0.09, "grad_norm": 1.35378598549046, "learning_rate": 1.9837475637812132e-05, "loss": 0.6721, "step": 843 }, { "epoch": 0.09, "grad_norm": 1.5124064601223453, "learning_rate": 1.983688356131242e-05, "loss": 0.7138, "step": 844 }, { "epoch": 0.09, "grad_norm": 1.6007621242000454, "learning_rate": 1.9836290417172825e-05, "loss": 0.7293, "step": 845 }, { "epoch": 0.09, "grad_norm": 1.532861934209684, "learning_rate": 1.983569620545773e-05, "loss": 0.7525, "step": 846 }, { "epoch": 0.09, "grad_norm": 1.5919170282152117, "learning_rate": 1.9835100926231625e-05, "loss": 0.7039, "step": 847 }, { "epoch": 0.09, "grad_norm": 1.613310000105187, "learning_rate": 1.9834504579559118e-05, "loss": 0.7526, "step": 848 }, { "epoch": 0.09, "grad_norm": 1.5091381434192657, "learning_rate": 1.9833907165504935e-05, "loss": 0.674, "step": 849 }, { "epoch": 0.09, "grad_norm": 1.6288949471182157, "learning_rate": 1.9833308684133913e-05, "loss": 0.6881, "step": 850 }, { "epoch": 0.09, "grad_norm": 1.7035249813071198, "learning_rate": 1.9832709135511006e-05, "loss": 0.7646, "step": 851 }, { "epoch": 0.09, "grad_norm": 1.6085841109244907, "learning_rate": 1.983210851970129e-05, "loss": 0.7064, "step": 852 }, { "epoch": 0.09, "grad_norm": 1.5416377825068288, "learning_rate": 1.9831506836769955e-05, "loss": 0.6925, "step": 853 }, { "epoch": 0.09, "grad_norm": 1.6407129976757333, "learning_rate": 1.9830904086782298e-05, "loss": 0.8426, "step": 854 }, { "epoch": 0.09, "grad_norm": 1.711869418845939, "learning_rate": 1.983030026980374e-05, "loss": 0.8236, "step": 855 }, { "epoch": 0.09, "grad_norm": 1.511105480844578, "learning_rate": 1.9829695385899816e-05, "loss": 0.7293, "step": 856 }, { "epoch": 0.09, "grad_norm": 1.727960597652724, "learning_rate": 1.9829089435136176e-05, "loss": 0.7806, "step": 857 }, { "epoch": 0.09, "grad_norm": 1.8756242422118425, "learning_rate": 1.982848241757859e-05, "loss": 0.8235, "step": 858 }, { "epoch": 0.09, "grad_norm": 1.751065062669428, "learning_rate": 1.982787433329294e-05, "loss": 0.8057, "step": 859 }, { "epoch": 0.09, "grad_norm": 1.5458333978687038, "learning_rate": 1.9827265182345218e-05, "loss": 0.7308, "step": 860 }, { "epoch": 0.09, "grad_norm": 1.5226977001977733, "learning_rate": 1.9826654964801544e-05, "loss": 0.7669, "step": 861 }, { "epoch": 0.09, "grad_norm": 1.6839225185773306, "learning_rate": 1.9826043680728142e-05, "loss": 0.8051, "step": 862 }, { "epoch": 0.09, "grad_norm": 1.5793830832920863, "learning_rate": 1.9825431330191366e-05, "loss": 0.8132, "step": 863 }, { "epoch": 0.09, "grad_norm": 1.6937724188454104, "learning_rate": 1.9824817913257666e-05, "loss": 0.6887, "step": 864 }, { "epoch": 0.09, "grad_norm": 1.873772989542665, "learning_rate": 1.9824203429993627e-05, "loss": 0.8393, "step": 865 }, { "epoch": 0.09, "grad_norm": 1.6477348786228707, "learning_rate": 1.982358788046594e-05, "loss": 0.8163, "step": 866 }, { "epoch": 0.09, "grad_norm": 1.52524445412708, "learning_rate": 1.9822971264741412e-05, "loss": 0.7488, "step": 867 }, { "epoch": 0.09, "grad_norm": 1.6276327048556951, "learning_rate": 1.9822353582886963e-05, "loss": 0.6686, "step": 868 }, { "epoch": 0.09, "grad_norm": 1.634620053938659, "learning_rate": 1.9821734834969643e-05, "loss": 0.8702, "step": 869 }, { "epoch": 0.09, "grad_norm": 1.5984651127834377, "learning_rate": 1.98211150210566e-05, "loss": 0.6718, "step": 870 }, { "epoch": 0.09, "grad_norm": 1.5710084200353578, "learning_rate": 1.98204941412151e-05, "loss": 0.6835, "step": 871 }, { "epoch": 0.09, "grad_norm": 1.6624153524943914, "learning_rate": 1.9819872195512545e-05, "loss": 0.7358, "step": 872 }, { "epoch": 0.09, "grad_norm": 1.590680428823555, "learning_rate": 1.9819249184016426e-05, "loss": 0.7424, "step": 873 }, { "epoch": 0.09, "grad_norm": 1.6679560046276667, "learning_rate": 1.9818625106794363e-05, "loss": 0.7768, "step": 874 }, { "epoch": 0.09, "grad_norm": 1.576313274431883, "learning_rate": 1.981799996391409e-05, "loss": 0.758, "step": 875 }, { "epoch": 0.09, "grad_norm": 1.6518281038340517, "learning_rate": 1.981737375544346e-05, "loss": 0.7233, "step": 876 }, { "epoch": 0.09, "grad_norm": 1.4274151645512967, "learning_rate": 1.9816746481450436e-05, "loss": 0.7168, "step": 877 }, { "epoch": 0.09, "grad_norm": 1.939294035861291, "learning_rate": 1.9816118142003096e-05, "loss": 0.8503, "step": 878 }, { "epoch": 0.09, "grad_norm": 1.5387673753810336, "learning_rate": 1.981548873716964e-05, "loss": 0.7362, "step": 879 }, { "epoch": 0.09, "grad_norm": 1.4805195280344556, "learning_rate": 1.9814858267018376e-05, "loss": 0.6436, "step": 880 }, { "epoch": 0.09, "grad_norm": 1.5446391199954803, "learning_rate": 1.9814226731617734e-05, "loss": 0.6923, "step": 881 }, { "epoch": 0.09, "grad_norm": 1.4813312189994743, "learning_rate": 1.981359413103626e-05, "loss": 0.7571, "step": 882 }, { "epoch": 0.09, "grad_norm": 1.6640726456178598, "learning_rate": 1.981296046534261e-05, "loss": 0.851, "step": 883 }, { "epoch": 0.09, "grad_norm": 1.6633157436957888, "learning_rate": 1.981232573460556e-05, "loss": 0.7917, "step": 884 }, { "epoch": 0.09, "grad_norm": 1.6746676607899385, "learning_rate": 1.9811689938893998e-05, "loss": 0.7473, "step": 885 }, { "epoch": 0.09, "grad_norm": 1.7031648068997371, "learning_rate": 1.9811053078276933e-05, "loss": 0.7654, "step": 886 }, { "epoch": 0.09, "grad_norm": 1.5023261382337536, "learning_rate": 1.981041515282348e-05, "loss": 0.6898, "step": 887 }, { "epoch": 0.09, "grad_norm": 1.5128621826976616, "learning_rate": 1.980977616260288e-05, "loss": 0.6342, "step": 888 }, { "epoch": 0.09, "grad_norm": 1.7107190654157496, "learning_rate": 1.980913610768449e-05, "loss": 0.8767, "step": 889 }, { "epoch": 0.09, "grad_norm": 1.7018440255203298, "learning_rate": 1.9808494988137766e-05, "loss": 0.8087, "step": 890 }, { "epoch": 0.09, "grad_norm": 1.748811212451861, "learning_rate": 1.9807852804032306e-05, "loss": 0.7345, "step": 891 }, { "epoch": 0.09, "grad_norm": 1.7272712489265187, "learning_rate": 1.98072095554378e-05, "loss": 0.8743, "step": 892 }, { "epoch": 0.09, "grad_norm": 1.8069453963982343, "learning_rate": 1.9806565242424064e-05, "loss": 0.8003, "step": 893 }, { "epoch": 0.09, "grad_norm": 1.5389870763989857, "learning_rate": 1.980591986506103e-05, "loss": 0.7899, "step": 894 }, { "epoch": 0.09, "grad_norm": 1.3762396601862672, "learning_rate": 1.9805273423418737e-05, "loss": 0.7024, "step": 895 }, { "epoch": 0.09, "grad_norm": 1.46629201856903, "learning_rate": 1.980462591756735e-05, "loss": 0.7164, "step": 896 }, { "epoch": 0.09, "grad_norm": 1.6408767957620132, "learning_rate": 1.9803977347577154e-05, "loss": 0.7415, "step": 897 }, { "epoch": 0.09, "grad_norm": 1.5881132334026928, "learning_rate": 1.980332771351853e-05, "loss": 0.8025, "step": 898 }, { "epoch": 0.09, "grad_norm": 1.5763669524638544, "learning_rate": 1.980267701546199e-05, "loss": 0.8883, "step": 899 }, { "epoch": 0.09, "grad_norm": 1.4978780824228621, "learning_rate": 1.980202525347816e-05, "loss": 0.7626, "step": 900 }, { "epoch": 0.09, "grad_norm": 1.4887098145165154, "learning_rate": 1.980137242763777e-05, "loss": 0.8167, "step": 901 }, { "epoch": 0.09, "grad_norm": 1.7711545128982842, "learning_rate": 1.9800718538011683e-05, "loss": 0.7537, "step": 902 }, { "epoch": 0.09, "grad_norm": 1.5460885788878576, "learning_rate": 1.9800063584670864e-05, "loss": 0.7343, "step": 903 }, { "epoch": 0.09, "grad_norm": 1.7386866230563274, "learning_rate": 1.97994075676864e-05, "loss": 0.8492, "step": 904 }, { "epoch": 0.09, "grad_norm": 1.5807749512434859, "learning_rate": 1.9798750487129486e-05, "loss": 0.6922, "step": 905 }, { "epoch": 0.09, "grad_norm": 1.5324171436351097, "learning_rate": 1.9798092343071445e-05, "loss": 0.7796, "step": 906 }, { "epoch": 0.09, "grad_norm": 1.7562281371712432, "learning_rate": 1.9797433135583705e-05, "loss": 0.7693, "step": 907 }, { "epoch": 0.09, "grad_norm": 1.6217028713794182, "learning_rate": 1.979677286473781e-05, "loss": 0.7588, "step": 908 }, { "epoch": 0.09, "grad_norm": 1.565090135593504, "learning_rate": 1.9796111530605428e-05, "loss": 0.7376, "step": 909 }, { "epoch": 0.09, "grad_norm": 1.603068989358705, "learning_rate": 1.9795449133258335e-05, "loss": 0.7798, "step": 910 }, { "epoch": 0.09, "grad_norm": 1.6396418489523752, "learning_rate": 1.979478567276842e-05, "loss": 0.6676, "step": 911 }, { "epoch": 0.09, "grad_norm": 1.6372104385022896, "learning_rate": 1.979412114920769e-05, "loss": 0.7513, "step": 912 }, { "epoch": 0.09, "grad_norm": 1.675396810421105, "learning_rate": 1.9793455562648276e-05, "loss": 0.7631, "step": 913 }, { "epoch": 0.09, "grad_norm": 1.897849675524275, "learning_rate": 1.9792788913162414e-05, "loss": 0.7721, "step": 914 }, { "epoch": 0.09, "grad_norm": 1.5554235700494816, "learning_rate": 1.9792121200822456e-05, "loss": 0.81, "step": 915 }, { "epoch": 0.09, "grad_norm": 1.4373647032594903, "learning_rate": 1.9791452425700874e-05, "loss": 0.5976, "step": 916 }, { "epoch": 0.09, "grad_norm": 1.7491234619263865, "learning_rate": 1.9790782587870252e-05, "loss": 0.7904, "step": 917 }, { "epoch": 0.09, "grad_norm": 1.6486451154873125, "learning_rate": 1.9790111687403292e-05, "loss": 0.7869, "step": 918 }, { "epoch": 0.09, "grad_norm": 1.5310571807878468, "learning_rate": 1.978943972437281e-05, "loss": 0.7273, "step": 919 }, { "epoch": 0.09, "grad_norm": 1.7047828597241863, "learning_rate": 1.978876669885173e-05, "loss": 0.682, "step": 920 }, { "epoch": 0.09, "grad_norm": 1.6301078272480116, "learning_rate": 1.9788092610913107e-05, "loss": 0.799, "step": 921 }, { "epoch": 0.09, "grad_norm": 1.5601134386377202, "learning_rate": 1.97874174606301e-05, "loss": 0.8432, "step": 922 }, { "epoch": 0.09, "grad_norm": 1.7210324057395259, "learning_rate": 1.9786741248075984e-05, "loss": 0.8726, "step": 923 }, { "epoch": 0.09, "grad_norm": 1.394576608403927, "learning_rate": 1.9786063973324156e-05, "loss": 0.7333, "step": 924 }, { "epoch": 0.09, "grad_norm": 1.5976279760603629, "learning_rate": 1.978538563644812e-05, "loss": 0.7835, "step": 925 }, { "epoch": 0.09, "grad_norm": 1.5422919699984112, "learning_rate": 1.97847062375215e-05, "loss": 0.772, "step": 926 }, { "epoch": 0.09, "grad_norm": 1.6557365404347686, "learning_rate": 1.978402577661803e-05, "loss": 0.7868, "step": 927 }, { "epoch": 0.09, "grad_norm": 1.674019352909184, "learning_rate": 1.978334425381157e-05, "loss": 0.7464, "step": 928 }, { "epoch": 0.09, "grad_norm": 1.6255566180377932, "learning_rate": 1.9782661669176086e-05, "loss": 0.7936, "step": 929 }, { "epoch": 0.09, "grad_norm": 1.6899360192659847, "learning_rate": 1.978197802278566e-05, "loss": 0.8837, "step": 930 }, { "epoch": 0.09, "grad_norm": 1.5427340894121977, "learning_rate": 1.9781293314714493e-05, "loss": 0.7157, "step": 931 }, { "epoch": 0.09, "grad_norm": 1.5486328001346643, "learning_rate": 1.97806075450369e-05, "loss": 0.6634, "step": 932 }, { "epoch": 0.09, "grad_norm": 1.676171028632494, "learning_rate": 1.9779920713827307e-05, "loss": 0.6629, "step": 933 }, { "epoch": 0.1, "grad_norm": 1.6169928978186605, "learning_rate": 1.9779232821160264e-05, "loss": 0.7855, "step": 934 }, { "epoch": 0.1, "grad_norm": 1.6601303576342932, "learning_rate": 1.9778543867110428e-05, "loss": 0.7458, "step": 935 }, { "epoch": 0.1, "grad_norm": 1.572233190447626, "learning_rate": 1.9777853851752575e-05, "loss": 0.7343, "step": 936 }, { "epoch": 0.1, "grad_norm": 1.5786433225350176, "learning_rate": 1.9777162775161592e-05, "loss": 0.7364, "step": 937 }, { "epoch": 0.1, "grad_norm": 1.6711676113294462, "learning_rate": 1.977647063741249e-05, "loss": 0.6002, "step": 938 }, { "epoch": 0.1, "grad_norm": 1.6719456821034266, "learning_rate": 1.9775777438580387e-05, "loss": 0.7643, "step": 939 }, { "epoch": 0.1, "grad_norm": 1.6821802539166844, "learning_rate": 1.977508317874052e-05, "loss": 0.8292, "step": 940 }, { "epoch": 0.1, "grad_norm": 1.5626244212813953, "learning_rate": 1.9774387857968237e-05, "loss": 0.6857, "step": 941 }, { "epoch": 0.1, "grad_norm": 1.621394503878368, "learning_rate": 1.977369147633901e-05, "loss": 0.6925, "step": 942 }, { "epoch": 0.1, "grad_norm": 1.5671624337676704, "learning_rate": 1.977299403392841e-05, "loss": 0.7112, "step": 943 }, { "epoch": 0.1, "grad_norm": 2.004929948433543, "learning_rate": 1.9772295530812143e-05, "loss": 0.7504, "step": 944 }, { "epoch": 0.1, "grad_norm": 1.5805032467684188, "learning_rate": 1.977159596706602e-05, "loss": 0.7705, "step": 945 }, { "epoch": 0.1, "grad_norm": 1.564866358149519, "learning_rate": 1.9770895342765964e-05, "loss": 0.6859, "step": 946 }, { "epoch": 0.1, "grad_norm": 1.5104736001360075, "learning_rate": 1.977019365798802e-05, "loss": 0.6725, "step": 947 }, { "epoch": 0.1, "grad_norm": 1.7279061948601635, "learning_rate": 1.976949091280834e-05, "loss": 0.8203, "step": 948 }, { "epoch": 0.1, "grad_norm": 1.6987389794038148, "learning_rate": 1.97687871073032e-05, "loss": 0.7356, "step": 949 }, { "epoch": 0.1, "grad_norm": 1.5683557461074862, "learning_rate": 1.976808224154899e-05, "loss": 0.7154, "step": 950 }, { "epoch": 0.1, "grad_norm": 1.6291551489710554, "learning_rate": 1.9767376315622204e-05, "loss": 0.6979, "step": 951 }, { "epoch": 0.1, "grad_norm": 1.7429785759185206, "learning_rate": 1.9766669329599466e-05, "loss": 0.9519, "step": 952 }, { "epoch": 0.1, "grad_norm": 1.7201420018698663, "learning_rate": 1.9765961283557503e-05, "loss": 0.8157, "step": 953 }, { "epoch": 0.1, "grad_norm": 1.6340952247661684, "learning_rate": 1.976525217757317e-05, "loss": 0.8053, "step": 954 }, { "epoch": 0.1, "grad_norm": 1.7497449502423785, "learning_rate": 1.976454201172342e-05, "loss": 0.6892, "step": 955 }, { "epoch": 0.1, "grad_norm": 1.8490925913199021, "learning_rate": 1.976383078608534e-05, "loss": 0.7282, "step": 956 }, { "epoch": 0.1, "grad_norm": 1.571142660205738, "learning_rate": 1.976311850073611e-05, "loss": 0.7532, "step": 957 }, { "epoch": 0.1, "grad_norm": 1.490756284704002, "learning_rate": 1.976240515575305e-05, "loss": 0.7327, "step": 958 }, { "epoch": 0.1, "grad_norm": 1.6843788399658988, "learning_rate": 1.976169075121358e-05, "loss": 0.7708, "step": 959 }, { "epoch": 0.1, "grad_norm": 1.7748135897626862, "learning_rate": 1.976097528719523e-05, "loss": 0.8446, "step": 960 }, { "epoch": 0.1, "grad_norm": 1.6707936681278435, "learning_rate": 1.9760258763775656e-05, "loss": 0.7498, "step": 961 }, { "epoch": 0.1, "grad_norm": 1.7033717941830635, "learning_rate": 1.975954118103263e-05, "loss": 0.6827, "step": 962 }, { "epoch": 0.1, "grad_norm": 1.6250184779189945, "learning_rate": 1.975882253904403e-05, "loss": 0.8026, "step": 963 }, { "epoch": 0.1, "grad_norm": 1.6361132028766527, "learning_rate": 1.9758102837887853e-05, "loss": 0.6545, "step": 964 }, { "epoch": 0.1, "grad_norm": 1.6965149086218916, "learning_rate": 1.9757382077642214e-05, "loss": 0.8264, "step": 965 }, { "epoch": 0.1, "grad_norm": 1.6633583574231603, "learning_rate": 1.9756660258385338e-05, "loss": 0.8511, "step": 966 }, { "epoch": 0.1, "grad_norm": 1.5977541239908342, "learning_rate": 1.975593738019557e-05, "loss": 0.7447, "step": 967 }, { "epoch": 0.1, "grad_norm": 1.56727108125653, "learning_rate": 1.975521344315136e-05, "loss": 0.7605, "step": 968 }, { "epoch": 0.1, "grad_norm": 1.622962864888057, "learning_rate": 1.9754488447331292e-05, "loss": 0.7268, "step": 969 }, { "epoch": 0.1, "grad_norm": 1.5541156726666745, "learning_rate": 1.9753762392814043e-05, "loss": 0.7134, "step": 970 }, { "epoch": 0.1, "grad_norm": 1.610775363823247, "learning_rate": 1.975303527967842e-05, "loss": 0.7554, "step": 971 }, { "epoch": 0.1, "grad_norm": 1.7499914820423736, "learning_rate": 1.9752307108003334e-05, "loss": 0.8552, "step": 972 }, { "epoch": 0.1, "grad_norm": 1.5534873957880497, "learning_rate": 1.9751577877867823e-05, "loss": 0.753, "step": 973 }, { "epoch": 0.1, "grad_norm": 1.808776873744801, "learning_rate": 1.975084758935103e-05, "loss": 0.7819, "step": 974 }, { "epoch": 0.1, "grad_norm": 1.7700864769108424, "learning_rate": 1.9750116242532217e-05, "loss": 0.9487, "step": 975 }, { "epoch": 0.1, "grad_norm": 1.6943439977730848, "learning_rate": 1.974938383749076e-05, "loss": 0.7968, "step": 976 }, { "epoch": 0.1, "grad_norm": 1.6407491012547737, "learning_rate": 1.9748650374306156e-05, "loss": 0.7705, "step": 977 }, { "epoch": 0.1, "grad_norm": 1.70469858711636, "learning_rate": 1.9747915853058003e-05, "loss": 0.7693, "step": 978 }, { "epoch": 0.1, "grad_norm": 1.582707324059266, "learning_rate": 1.974718027382602e-05, "loss": 0.7874, "step": 979 }, { "epoch": 0.1, "grad_norm": 1.6073999705030813, "learning_rate": 1.974644363669005e-05, "loss": 0.799, "step": 980 }, { "epoch": 0.1, "grad_norm": 1.6479941375707847, "learning_rate": 1.974570594173004e-05, "loss": 0.7785, "step": 981 }, { "epoch": 0.1, "grad_norm": 1.6646530813302085, "learning_rate": 1.974496718902606e-05, "loss": 0.8484, "step": 982 }, { "epoch": 0.1, "grad_norm": 1.506444754411822, "learning_rate": 1.9744227378658283e-05, "loss": 0.7749, "step": 983 }, { "epoch": 0.1, "grad_norm": 1.6978448175283514, "learning_rate": 1.9743486510707006e-05, "loss": 0.9159, "step": 984 }, { "epoch": 0.1, "grad_norm": 1.542376548702729, "learning_rate": 1.974274458525264e-05, "loss": 0.7079, "step": 985 }, { "epoch": 0.1, "grad_norm": 1.6363243613427414, "learning_rate": 1.9742001602375708e-05, "loss": 0.802, "step": 986 }, { "epoch": 0.1, "grad_norm": 1.596726103662305, "learning_rate": 1.9741257562156854e-05, "loss": 0.8283, "step": 987 }, { "epoch": 0.1, "grad_norm": 1.552072210203679, "learning_rate": 1.974051246467682e-05, "loss": 0.7828, "step": 988 }, { "epoch": 0.1, "grad_norm": 1.7601591894326951, "learning_rate": 1.973976631001649e-05, "loss": 0.6845, "step": 989 }, { "epoch": 0.1, "grad_norm": 1.751601699761324, "learning_rate": 1.9739019098256835e-05, "loss": 0.7943, "step": 990 }, { "epoch": 0.1, "grad_norm": 1.5746857081818262, "learning_rate": 1.973827082947896e-05, "loss": 0.7584, "step": 991 }, { "epoch": 0.1, "grad_norm": 1.6786168546308895, "learning_rate": 1.9737521503764076e-05, "loss": 0.704, "step": 992 }, { "epoch": 0.1, "grad_norm": 1.7689656599100716, "learning_rate": 1.9736771121193513e-05, "loss": 0.8223, "step": 993 }, { "epoch": 0.1, "grad_norm": 1.6628613066396887, "learning_rate": 1.9736019681848706e-05, "loss": 0.7639, "step": 994 }, { "epoch": 0.1, "grad_norm": 1.6507086963589646, "learning_rate": 1.973526718581122e-05, "loss": 0.7448, "step": 995 }, { "epoch": 0.1, "grad_norm": 1.4891511625875535, "learning_rate": 1.9734513633162723e-05, "loss": 0.7129, "step": 996 }, { "epoch": 0.1, "grad_norm": 1.801981972115585, "learning_rate": 1.9733759023985e-05, "loss": 0.7863, "step": 997 }, { "epoch": 0.1, "grad_norm": 1.6483362524488605, "learning_rate": 1.9733003358359955e-05, "loss": 0.6735, "step": 998 }, { "epoch": 0.1, "grad_norm": 1.5831381134582354, "learning_rate": 1.9732246636369605e-05, "loss": 0.74, "step": 999 }, { "epoch": 0.1, "grad_norm": 1.7761677747689284, "learning_rate": 1.9731488858096078e-05, "loss": 0.7121, "step": 1000 }, { "epoch": 0.1, "grad_norm": 1.5907097418790517, "learning_rate": 1.9730730023621617e-05, "loss": 0.7825, "step": 1001 }, { "epoch": 0.1, "grad_norm": 1.6103489754656453, "learning_rate": 1.972997013302858e-05, "loss": 0.8179, "step": 1002 }, { "epoch": 0.1, "grad_norm": 1.5474773072539885, "learning_rate": 1.9729209186399448e-05, "loss": 0.7446, "step": 1003 }, { "epoch": 0.1, "grad_norm": 1.522546952331723, "learning_rate": 1.972844718381681e-05, "loss": 0.7528, "step": 1004 }, { "epoch": 0.1, "grad_norm": 1.78490021522784, "learning_rate": 1.9727684125363364e-05, "loss": 0.887, "step": 1005 }, { "epoch": 0.1, "grad_norm": 1.5860785797184678, "learning_rate": 1.972692001112193e-05, "loss": 0.71, "step": 1006 }, { "epoch": 0.1, "grad_norm": 1.5648733823815915, "learning_rate": 1.972615484117544e-05, "loss": 0.7583, "step": 1007 }, { "epoch": 0.1, "grad_norm": 1.707739890292607, "learning_rate": 1.972538861560694e-05, "loss": 0.7862, "step": 1008 }, { "epoch": 0.1, "grad_norm": 1.659828837091845, "learning_rate": 1.9724621334499597e-05, "loss": 0.7663, "step": 1009 }, { "epoch": 0.1, "grad_norm": 1.6810106139429293, "learning_rate": 1.9723852997936683e-05, "loss": 0.7745, "step": 1010 }, { "epoch": 0.1, "grad_norm": 1.5375832455600957, "learning_rate": 1.972308360600159e-05, "loss": 0.7442, "step": 1011 }, { "epoch": 0.1, "grad_norm": 1.8275172545359188, "learning_rate": 1.9722313158777825e-05, "loss": 0.8308, "step": 1012 }, { "epoch": 0.1, "grad_norm": 1.5558075737762853, "learning_rate": 1.9721541656349005e-05, "loss": 0.7765, "step": 1013 }, { "epoch": 0.1, "grad_norm": 1.7634963149801517, "learning_rate": 1.9720769098798866e-05, "loss": 0.7983, "step": 1014 }, { "epoch": 0.1, "grad_norm": 1.5644790225602312, "learning_rate": 1.9719995486211258e-05, "loss": 0.7695, "step": 1015 }, { "epoch": 0.1, "grad_norm": 1.5350140296556833, "learning_rate": 1.9719220818670144e-05, "loss": 0.8209, "step": 1016 }, { "epoch": 0.1, "grad_norm": 1.716896406028085, "learning_rate": 1.97184450962596e-05, "loss": 0.8061, "step": 1017 }, { "epoch": 0.1, "grad_norm": 1.5783961891694578, "learning_rate": 1.971766831906382e-05, "loss": 0.6355, "step": 1018 }, { "epoch": 0.1, "grad_norm": 1.8176065622672664, "learning_rate": 1.9716890487167114e-05, "loss": 0.7767, "step": 1019 }, { "epoch": 0.1, "grad_norm": 1.5373269041490218, "learning_rate": 1.97161116006539e-05, "loss": 0.8069, "step": 1020 }, { "epoch": 0.1, "grad_norm": 1.779558739717626, "learning_rate": 1.9715331659608716e-05, "loss": 0.9195, "step": 1021 }, { "epoch": 0.1, "grad_norm": 1.5333585600073345, "learning_rate": 1.971455066411621e-05, "loss": 0.762, "step": 1022 }, { "epoch": 0.1, "grad_norm": 1.568751380846371, "learning_rate": 1.9713768614261144e-05, "loss": 0.7352, "step": 1023 }, { "epoch": 0.1, "grad_norm": 1.4626406800636604, "learning_rate": 1.9712985510128406e-05, "loss": 0.7571, "step": 1024 }, { "epoch": 0.1, "grad_norm": 1.6115763573627133, "learning_rate": 1.9712201351802985e-05, "loss": 0.8721, "step": 1025 }, { "epoch": 0.1, "grad_norm": 1.4664746057207612, "learning_rate": 1.9711416139369984e-05, "loss": 0.6534, "step": 1026 }, { "epoch": 0.1, "grad_norm": 1.834484523255519, "learning_rate": 1.971062987291464e-05, "loss": 0.7143, "step": 1027 }, { "epoch": 0.1, "grad_norm": 1.5306636799352586, "learning_rate": 1.9709842552522272e-05, "loss": 0.7261, "step": 1028 }, { "epoch": 0.1, "grad_norm": 1.480494949221711, "learning_rate": 1.9709054178278343e-05, "loss": 0.7775, "step": 1029 }, { "epoch": 0.1, "grad_norm": 1.4159653760073085, "learning_rate": 1.9708264750268416e-05, "loss": 0.6398, "step": 1030 }, { "epoch": 0.1, "grad_norm": 1.4677707044356991, "learning_rate": 1.9707474268578172e-05, "loss": 0.8, "step": 1031 }, { "epoch": 0.1, "grad_norm": 1.6534741622675229, "learning_rate": 1.97066827332934e-05, "loss": 0.802, "step": 1032 }, { "epoch": 0.11, "grad_norm": 1.5389456821966196, "learning_rate": 1.9705890144500012e-05, "loss": 0.764, "step": 1033 }, { "epoch": 0.11, "grad_norm": 1.58608861866648, "learning_rate": 1.9705096502284037e-05, "loss": 0.6967, "step": 1034 }, { "epoch": 0.11, "grad_norm": 1.7745355167995958, "learning_rate": 1.9704301806731604e-05, "loss": 0.7554, "step": 1035 }, { "epoch": 0.11, "grad_norm": 1.6537554040171483, "learning_rate": 1.970350605792897e-05, "loss": 0.7989, "step": 1036 }, { "epoch": 0.11, "grad_norm": 1.49864081600279, "learning_rate": 1.97027092559625e-05, "loss": 0.6163, "step": 1037 }, { "epoch": 0.11, "grad_norm": 1.6541958709909896, "learning_rate": 1.9701911400918673e-05, "loss": 0.7903, "step": 1038 }, { "epoch": 0.11, "grad_norm": 1.7003266196100684, "learning_rate": 1.9701112492884084e-05, "loss": 0.8901, "step": 1039 }, { "epoch": 0.11, "grad_norm": 1.6807657449170552, "learning_rate": 1.9700312531945444e-05, "loss": 0.6864, "step": 1040 }, { "epoch": 0.11, "grad_norm": 1.614703409405987, "learning_rate": 1.969951151818957e-05, "loss": 0.839, "step": 1041 }, { "epoch": 0.11, "grad_norm": 1.7806689503190112, "learning_rate": 1.9698709451703405e-05, "loss": 0.7984, "step": 1042 }, { "epoch": 0.11, "grad_norm": 1.8040362335997895, "learning_rate": 1.9697906332574005e-05, "loss": 0.8109, "step": 1043 }, { "epoch": 0.11, "grad_norm": 1.7385609277311922, "learning_rate": 1.9697102160888525e-05, "loss": 0.7235, "step": 1044 }, { "epoch": 0.11, "grad_norm": 1.6972535125809556, "learning_rate": 1.9696296936734254e-05, "loss": 0.7874, "step": 1045 }, { "epoch": 0.11, "grad_norm": 1.8012082058922911, "learning_rate": 1.9695490660198584e-05, "loss": 0.7251, "step": 1046 }, { "epoch": 0.11, "grad_norm": 1.7649284575263353, "learning_rate": 1.9694683331369023e-05, "loss": 0.7602, "step": 1047 }, { "epoch": 0.11, "grad_norm": 1.6764194604703575, "learning_rate": 1.9693874950333196e-05, "loss": 0.8882, "step": 1048 }, { "epoch": 0.11, "grad_norm": 1.6350072171486167, "learning_rate": 1.9693065517178836e-05, "loss": 0.6515, "step": 1049 }, { "epoch": 0.11, "grad_norm": 1.4989978096130447, "learning_rate": 1.9692255031993794e-05, "loss": 0.7783, "step": 1050 }, { "epoch": 0.11, "grad_norm": 1.3890840164822351, "learning_rate": 1.9691443494866043e-05, "loss": 0.5872, "step": 1051 }, { "epoch": 0.11, "grad_norm": 1.63102381931439, "learning_rate": 1.9690630905883658e-05, "loss": 0.7515, "step": 1052 }, { "epoch": 0.11, "grad_norm": 1.6734613888623586, "learning_rate": 1.968981726513483e-05, "loss": 0.7333, "step": 1053 }, { "epoch": 0.11, "grad_norm": 1.643404966679851, "learning_rate": 1.9689002572707873e-05, "loss": 0.7412, "step": 1054 }, { "epoch": 0.11, "grad_norm": 1.601773214091372, "learning_rate": 1.96881868286912e-05, "loss": 0.7213, "step": 1055 }, { "epoch": 0.11, "grad_norm": 1.6130619273018145, "learning_rate": 1.9687370033173356e-05, "loss": 0.7485, "step": 1056 }, { "epoch": 0.11, "grad_norm": 1.7587685900576053, "learning_rate": 1.968655218624299e-05, "loss": 0.767, "step": 1057 }, { "epoch": 0.11, "grad_norm": 1.6537766624915502, "learning_rate": 1.968573328798886e-05, "loss": 0.6481, "step": 1058 }, { "epoch": 0.11, "grad_norm": 1.7185152830818846, "learning_rate": 1.9684913338499855e-05, "loss": 0.8093, "step": 1059 }, { "epoch": 0.11, "grad_norm": 1.9319626256997462, "learning_rate": 1.968409233786496e-05, "loss": 0.6413, "step": 1060 }, { "epoch": 0.11, "grad_norm": 1.547975529846523, "learning_rate": 1.9683270286173286e-05, "loss": 0.749, "step": 1061 }, { "epoch": 0.11, "grad_norm": 1.6573271155926854, "learning_rate": 1.968244718351405e-05, "loss": 0.644, "step": 1062 }, { "epoch": 0.11, "grad_norm": 1.5989053921550134, "learning_rate": 1.968162302997659e-05, "loss": 0.818, "step": 1063 }, { "epoch": 0.11, "grad_norm": 1.7268069657969647, "learning_rate": 1.968079782565035e-05, "loss": 0.7992, "step": 1064 }, { "epoch": 0.11, "grad_norm": 1.862673950648877, "learning_rate": 1.9679971570624902e-05, "loss": 0.7751, "step": 1065 }, { "epoch": 0.11, "grad_norm": 1.6890329705049392, "learning_rate": 1.9679144264989914e-05, "loss": 0.7416, "step": 1066 }, { "epoch": 0.11, "grad_norm": 1.6130513551926382, "learning_rate": 1.967831590883518e-05, "loss": 0.7199, "step": 1067 }, { "epoch": 0.11, "grad_norm": 1.653095405272077, "learning_rate": 1.967748650225061e-05, "loss": 0.7696, "step": 1068 }, { "epoch": 0.11, "grad_norm": 1.445353967928603, "learning_rate": 1.9676656045326217e-05, "loss": 0.7718, "step": 1069 }, { "epoch": 0.11, "grad_norm": 1.6023578260531115, "learning_rate": 1.967582453815214e-05, "loss": 0.8398, "step": 1070 }, { "epoch": 0.11, "grad_norm": 1.6055120340482538, "learning_rate": 1.967499198081862e-05, "loss": 0.8376, "step": 1071 }, { "epoch": 0.11, "grad_norm": 1.5238960023480306, "learning_rate": 1.9674158373416017e-05, "loss": 0.619, "step": 1072 }, { "epoch": 0.11, "grad_norm": 1.7324188075148437, "learning_rate": 1.9673323716034812e-05, "loss": 0.9763, "step": 1073 }, { "epoch": 0.11, "grad_norm": 1.5746141870115677, "learning_rate": 1.967248800876559e-05, "loss": 0.7339, "step": 1074 }, { "epoch": 0.11, "grad_norm": 1.4853638045612656, "learning_rate": 1.967165125169906e-05, "loss": 0.7768, "step": 1075 }, { "epoch": 0.11, "grad_norm": 1.7272333668866946, "learning_rate": 1.9670813444926028e-05, "loss": 0.8803, "step": 1076 }, { "epoch": 0.11, "grad_norm": 1.7776062101599435, "learning_rate": 1.9669974588537437e-05, "loss": 0.7372, "step": 1077 }, { "epoch": 0.11, "grad_norm": 1.7740405473527927, "learning_rate": 1.9669134682624324e-05, "loss": 0.7725, "step": 1078 }, { "epoch": 0.11, "grad_norm": 1.6327544110262224, "learning_rate": 1.9668293727277847e-05, "loss": 0.8023, "step": 1079 }, { "epoch": 0.11, "grad_norm": 1.6830258720590778, "learning_rate": 1.9667451722589283e-05, "loss": 0.8351, "step": 1080 }, { "epoch": 0.11, "grad_norm": 1.632865983465764, "learning_rate": 1.966660866865002e-05, "loss": 0.8502, "step": 1081 }, { "epoch": 0.11, "grad_norm": 1.8501165148542782, "learning_rate": 1.9665764565551548e-05, "loss": 0.8346, "step": 1082 }, { "epoch": 0.11, "grad_norm": 1.643792627111494, "learning_rate": 1.966491941338549e-05, "loss": 0.6841, "step": 1083 }, { "epoch": 0.11, "grad_norm": 1.7869989049285269, "learning_rate": 1.9664073212243576e-05, "loss": 0.7794, "step": 1084 }, { "epoch": 0.11, "grad_norm": 1.5272749364519826, "learning_rate": 1.966322596221764e-05, "loss": 0.6827, "step": 1085 }, { "epoch": 0.11, "grad_norm": 1.548034982280869, "learning_rate": 1.9662377663399647e-05, "loss": 0.7078, "step": 1086 }, { "epoch": 0.11, "grad_norm": 1.7707301221752616, "learning_rate": 1.9661528315881654e-05, "loss": 0.8737, "step": 1087 }, { "epoch": 0.11, "grad_norm": 1.738629441499874, "learning_rate": 1.9660677919755855e-05, "loss": 0.9194, "step": 1088 }, { "epoch": 0.11, "grad_norm": 1.6275322704385589, "learning_rate": 1.9659826475114548e-05, "loss": 0.7833, "step": 1089 }, { "epoch": 0.11, "grad_norm": 1.6804171948269853, "learning_rate": 1.9658973982050133e-05, "loss": 0.776, "step": 1090 }, { "epoch": 0.11, "grad_norm": 1.575697569245944, "learning_rate": 1.9658120440655147e-05, "loss": 0.7907, "step": 1091 }, { "epoch": 0.11, "grad_norm": 1.570696801563854, "learning_rate": 1.965726585102222e-05, "loss": 0.7582, "step": 1092 }, { "epoch": 0.11, "grad_norm": 1.6602049450510845, "learning_rate": 1.965641021324411e-05, "loss": 0.718, "step": 1093 }, { "epoch": 0.11, "grad_norm": 1.6935903967630492, "learning_rate": 1.965555352741368e-05, "loss": 0.8315, "step": 1094 }, { "epoch": 0.11, "grad_norm": 1.795982914953094, "learning_rate": 1.9654695793623908e-05, "loss": 0.7485, "step": 1095 }, { "epoch": 0.11, "grad_norm": 1.5328332724743226, "learning_rate": 1.9653837011967894e-05, "loss": 0.7178, "step": 1096 }, { "epoch": 0.11, "grad_norm": 1.5640940025238694, "learning_rate": 1.9652977182538836e-05, "loss": 0.7228, "step": 1097 }, { "epoch": 0.11, "grad_norm": 1.556543305513743, "learning_rate": 1.9652116305430067e-05, "loss": 0.8408, "step": 1098 }, { "epoch": 0.11, "grad_norm": 1.5417627207071147, "learning_rate": 1.965125438073501e-05, "loss": 0.7884, "step": 1099 }, { "epoch": 0.11, "grad_norm": 1.526098291521299, "learning_rate": 1.9650391408547222e-05, "loss": 0.8267, "step": 1100 }, { "epoch": 0.11, "grad_norm": 1.7507073343126842, "learning_rate": 1.9649527388960365e-05, "loss": 0.891, "step": 1101 }, { "epoch": 0.11, "grad_norm": 1.5063836112608697, "learning_rate": 1.9648662322068205e-05, "loss": 0.8313, "step": 1102 }, { "epoch": 0.11, "grad_norm": 1.6884409011943324, "learning_rate": 1.964779620796464e-05, "loss": 0.7544, "step": 1103 }, { "epoch": 0.11, "grad_norm": 1.7060068858942647, "learning_rate": 1.9646929046743675e-05, "loss": 0.727, "step": 1104 }, { "epoch": 0.11, "grad_norm": 1.3902547802147753, "learning_rate": 1.9646060838499418e-05, "loss": 0.7339, "step": 1105 }, { "epoch": 0.11, "grad_norm": 1.4974045504752067, "learning_rate": 1.964519158332611e-05, "loss": 0.8947, "step": 1106 }, { "epoch": 0.11, "grad_norm": 1.627185223593396, "learning_rate": 1.9644321281318085e-05, "loss": 0.7223, "step": 1107 }, { "epoch": 0.11, "grad_norm": 1.8650431831681953, "learning_rate": 1.9643449932569803e-05, "loss": 0.8473, "step": 1108 }, { "epoch": 0.11, "grad_norm": 1.5295919963150972, "learning_rate": 1.9642577537175845e-05, "loss": 0.6756, "step": 1109 }, { "epoch": 0.11, "grad_norm": 1.827817218480238, "learning_rate": 1.9641704095230884e-05, "loss": 0.7595, "step": 1110 }, { "epoch": 0.11, "grad_norm": 1.8572345935296157, "learning_rate": 1.9640829606829724e-05, "loss": 0.8726, "step": 1111 }, { "epoch": 0.11, "grad_norm": 1.9229900375324138, "learning_rate": 1.9639954072067273e-05, "loss": 0.6821, "step": 1112 }, { "epoch": 0.11, "grad_norm": 1.529709612228805, "learning_rate": 1.9639077491038562e-05, "loss": 0.7728, "step": 1113 }, { "epoch": 0.11, "grad_norm": 1.7816911587916253, "learning_rate": 1.9638199863838726e-05, "loss": 0.6983, "step": 1114 }, { "epoch": 0.11, "grad_norm": 1.5904056602001073, "learning_rate": 1.9637321190563023e-05, "loss": 0.7543, "step": 1115 }, { "epoch": 0.11, "grad_norm": 1.764043464464645, "learning_rate": 1.963644147130681e-05, "loss": 0.8764, "step": 1116 }, { "epoch": 0.11, "grad_norm": 1.581725970823919, "learning_rate": 1.9635560706165577e-05, "loss": 0.7722, "step": 1117 }, { "epoch": 0.11, "grad_norm": 1.483170971872246, "learning_rate": 1.9634678895234912e-05, "loss": 0.6994, "step": 1118 }, { "epoch": 0.11, "grad_norm": 1.607073912269842, "learning_rate": 1.963379603861052e-05, "loss": 0.7012, "step": 1119 }, { "epoch": 0.11, "grad_norm": 1.5788678057053491, "learning_rate": 1.9632912136388228e-05, "loss": 0.6462, "step": 1120 }, { "epoch": 0.11, "grad_norm": 1.736077450846907, "learning_rate": 1.9632027188663963e-05, "loss": 0.7634, "step": 1121 }, { "epoch": 0.11, "grad_norm": 1.553299674016011, "learning_rate": 1.9631141195533773e-05, "loss": 0.7819, "step": 1122 }, { "epoch": 0.11, "grad_norm": 1.4890833928925393, "learning_rate": 1.9630254157093822e-05, "loss": 0.6707, "step": 1123 }, { "epoch": 0.11, "grad_norm": 1.7022152845461826, "learning_rate": 1.9629366073440385e-05, "loss": 0.7831, "step": 1124 }, { "epoch": 0.11, "grad_norm": 1.574794997754779, "learning_rate": 1.9628476944669847e-05, "loss": 0.6835, "step": 1125 }, { "epoch": 0.11, "grad_norm": 1.6830359629247464, "learning_rate": 1.9627586770878705e-05, "loss": 0.7132, "step": 1126 }, { "epoch": 0.11, "grad_norm": 1.6489366566650268, "learning_rate": 1.962669555216358e-05, "loss": 0.8291, "step": 1127 }, { "epoch": 0.11, "grad_norm": 1.5845939269600287, "learning_rate": 1.9625803288621194e-05, "loss": 0.7917, "step": 1128 }, { "epoch": 0.11, "grad_norm": 1.4958457966941456, "learning_rate": 1.96249099803484e-05, "loss": 0.7724, "step": 1129 }, { "epoch": 0.11, "grad_norm": 1.6052748434750355, "learning_rate": 1.9624015627442134e-05, "loss": 0.7444, "step": 1130 }, { "epoch": 0.12, "grad_norm": 1.5071294567920583, "learning_rate": 1.962312022999948e-05, "loss": 0.6666, "step": 1131 }, { "epoch": 0.12, "grad_norm": 1.7281784144036083, "learning_rate": 1.9622223788117614e-05, "loss": 0.7376, "step": 1132 }, { "epoch": 0.12, "grad_norm": 1.6185475613897287, "learning_rate": 1.962132630189383e-05, "loss": 0.7193, "step": 1133 }, { "epoch": 0.12, "grad_norm": 1.7260084083877627, "learning_rate": 1.962042777142554e-05, "loss": 0.7864, "step": 1134 }, { "epoch": 0.12, "grad_norm": 1.5682532904630293, "learning_rate": 1.9619528196810254e-05, "loss": 0.7225, "step": 1135 }, { "epoch": 0.12, "grad_norm": 1.6391159674374254, "learning_rate": 1.9618627578145624e-05, "loss": 0.7729, "step": 1136 }, { "epoch": 0.12, "grad_norm": 1.5784690928165177, "learning_rate": 1.9617725915529383e-05, "loss": 0.8187, "step": 1137 }, { "epoch": 0.12, "grad_norm": 1.7088910412836227, "learning_rate": 1.9616823209059398e-05, "loss": 0.784, "step": 1138 }, { "epoch": 0.12, "grad_norm": 1.6571345846430185, "learning_rate": 1.9615919458833646e-05, "loss": 0.6829, "step": 1139 }, { "epoch": 0.12, "grad_norm": 1.6319202664587704, "learning_rate": 1.9615014664950214e-05, "loss": 0.75, "step": 1140 }, { "epoch": 0.12, "grad_norm": 1.4303103285508485, "learning_rate": 1.9614108827507304e-05, "loss": 0.6818, "step": 1141 }, { "epoch": 0.12, "grad_norm": 1.5585376987437913, "learning_rate": 1.9613201946603225e-05, "loss": 0.8776, "step": 1142 }, { "epoch": 0.12, "grad_norm": 1.6595202034800935, "learning_rate": 1.9612294022336407e-05, "loss": 0.8057, "step": 1143 }, { "epoch": 0.12, "grad_norm": 1.8047651958531572, "learning_rate": 1.9611385054805396e-05, "loss": 0.8138, "step": 1144 }, { "epoch": 0.12, "grad_norm": 1.5737996224499786, "learning_rate": 1.9610475044108837e-05, "loss": 0.6541, "step": 1145 }, { "epoch": 0.12, "grad_norm": 1.5709415443250103, "learning_rate": 1.960956399034551e-05, "loss": 0.7679, "step": 1146 }, { "epoch": 0.12, "grad_norm": 1.58783726351291, "learning_rate": 1.9608651893614284e-05, "loss": 0.772, "step": 1147 }, { "epoch": 0.12, "grad_norm": 1.6528514722614565, "learning_rate": 1.960773875401416e-05, "loss": 0.6852, "step": 1148 }, { "epoch": 0.12, "grad_norm": 1.6218521782436417, "learning_rate": 1.960682457164424e-05, "loss": 0.7883, "step": 1149 }, { "epoch": 0.12, "grad_norm": 1.4396576356085227, "learning_rate": 1.960590934660375e-05, "loss": 0.6858, "step": 1150 }, { "epoch": 0.12, "grad_norm": 1.6058391146974962, "learning_rate": 1.9604993078992016e-05, "loss": 0.6883, "step": 1151 }, { "epoch": 0.12, "grad_norm": 1.752370009104151, "learning_rate": 1.960407576890849e-05, "loss": 0.8184, "step": 1152 }, { "epoch": 0.12, "grad_norm": 1.4172074465364604, "learning_rate": 1.9603157416452732e-05, "loss": 0.6789, "step": 1153 }, { "epoch": 0.12, "grad_norm": 1.843728995407731, "learning_rate": 1.960223802172441e-05, "loss": 0.7007, "step": 1154 }, { "epoch": 0.12, "grad_norm": 1.631743660794764, "learning_rate": 1.9601317584823315e-05, "loss": 0.764, "step": 1155 }, { "epoch": 0.12, "grad_norm": 1.484817707471324, "learning_rate": 1.9600396105849346e-05, "loss": 0.7767, "step": 1156 }, { "epoch": 0.12, "grad_norm": 1.6070133386997218, "learning_rate": 1.9599473584902512e-05, "loss": 0.8539, "step": 1157 }, { "epoch": 0.12, "grad_norm": 1.695249667390617, "learning_rate": 1.9598550022082937e-05, "loss": 0.7938, "step": 1158 }, { "epoch": 0.12, "grad_norm": 1.6234052698473143, "learning_rate": 1.9597625417490863e-05, "loss": 0.757, "step": 1159 }, { "epoch": 0.12, "grad_norm": 1.6293595346347398, "learning_rate": 1.9596699771226638e-05, "loss": 0.7461, "step": 1160 }, { "epoch": 0.12, "grad_norm": 1.662704230882139, "learning_rate": 1.959577308339073e-05, "loss": 0.926, "step": 1161 }, { "epoch": 0.12, "grad_norm": 1.756056604825178, "learning_rate": 1.9594845354083716e-05, "loss": 0.8179, "step": 1162 }, { "epoch": 0.12, "grad_norm": 1.6719212295165158, "learning_rate": 1.9593916583406285e-05, "loss": 0.7861, "step": 1163 }, { "epoch": 0.12, "grad_norm": 1.6184719245446453, "learning_rate": 1.959298677145924e-05, "loss": 0.8487, "step": 1164 }, { "epoch": 0.12, "grad_norm": 1.7206162783405348, "learning_rate": 1.95920559183435e-05, "loss": 0.7311, "step": 1165 }, { "epoch": 0.12, "grad_norm": 1.8359997424317043, "learning_rate": 1.9591124024160092e-05, "loss": 0.7866, "step": 1166 }, { "epoch": 0.12, "grad_norm": 1.624456920621825, "learning_rate": 1.959019108901016e-05, "loss": 0.747, "step": 1167 }, { "epoch": 0.12, "grad_norm": 1.6260806359564066, "learning_rate": 1.9589257112994955e-05, "loss": 0.69, "step": 1168 }, { "epoch": 0.12, "grad_norm": 1.667073327058875, "learning_rate": 1.958832209621586e-05, "loss": 0.7738, "step": 1169 }, { "epoch": 0.12, "grad_norm": 1.6441841038248202, "learning_rate": 1.9587386038774336e-05, "loss": 0.7193, "step": 1170 }, { "epoch": 0.12, "grad_norm": 1.621706152411868, "learning_rate": 1.958644894077199e-05, "loss": 0.8084, "step": 1171 }, { "epoch": 0.12, "grad_norm": 1.586482320038094, "learning_rate": 1.9585510802310527e-05, "loss": 0.7456, "step": 1172 }, { "epoch": 0.12, "grad_norm": 1.692881629428406, "learning_rate": 1.9584571623491767e-05, "loss": 0.8248, "step": 1173 }, { "epoch": 0.12, "grad_norm": 1.7294424016547736, "learning_rate": 1.9583631404417644e-05, "loss": 0.7689, "step": 1174 }, { "epoch": 0.12, "grad_norm": 1.6193899899241604, "learning_rate": 1.9582690145190203e-05, "loss": 0.8272, "step": 1175 }, { "epoch": 0.12, "grad_norm": 1.7799529369465272, "learning_rate": 1.9581747845911606e-05, "loss": 0.8271, "step": 1176 }, { "epoch": 0.12, "grad_norm": 1.493448273391503, "learning_rate": 1.958080450668412e-05, "loss": 0.6895, "step": 1177 }, { "epoch": 0.12, "grad_norm": 1.5369375142395612, "learning_rate": 1.9579860127610127e-05, "loss": 0.7262, "step": 1178 }, { "epoch": 0.12, "grad_norm": 1.5421982874414422, "learning_rate": 1.9578914708792137e-05, "loss": 0.7941, "step": 1179 }, { "epoch": 0.12, "grad_norm": 1.595212877359824, "learning_rate": 1.9577968250332748e-05, "loss": 0.8563, "step": 1180 }, { "epoch": 0.12, "grad_norm": 1.55371549416413, "learning_rate": 1.957702075233469e-05, "loss": 0.7626, "step": 1181 }, { "epoch": 0.12, "grad_norm": 1.4194941949887323, "learning_rate": 1.9576072214900797e-05, "loss": 0.783, "step": 1182 }, { "epoch": 0.12, "grad_norm": 1.5571594964699573, "learning_rate": 1.9575122638134018e-05, "loss": 0.6418, "step": 1183 }, { "epoch": 0.12, "grad_norm": 1.6443867895825042, "learning_rate": 1.9574172022137416e-05, "loss": 0.7109, "step": 1184 }, { "epoch": 0.12, "grad_norm": 1.5964204320215944, "learning_rate": 1.9573220367014164e-05, "loss": 0.6518, "step": 1185 }, { "epoch": 0.12, "grad_norm": 1.593349684628786, "learning_rate": 1.9572267672867546e-05, "loss": 0.7596, "step": 1186 }, { "epoch": 0.12, "grad_norm": 1.616352916585672, "learning_rate": 1.957131393980097e-05, "loss": 0.6699, "step": 1187 }, { "epoch": 0.12, "grad_norm": 1.623342101148859, "learning_rate": 1.9570359167917942e-05, "loss": 0.753, "step": 1188 }, { "epoch": 0.12, "grad_norm": 1.5982626120798151, "learning_rate": 1.956940335732209e-05, "loss": 0.774, "step": 1189 }, { "epoch": 0.12, "grad_norm": 1.5780506501342173, "learning_rate": 1.9568446508117155e-05, "loss": 0.6698, "step": 1190 }, { "epoch": 0.12, "grad_norm": 1.6191888496783715, "learning_rate": 1.9567488620406984e-05, "loss": 0.7069, "step": 1191 }, { "epoch": 0.12, "grad_norm": 3.128084240737797, "learning_rate": 1.956652969429554e-05, "loss": 0.8099, "step": 1192 }, { "epoch": 0.12, "grad_norm": 1.654400131567404, "learning_rate": 1.95655697298869e-05, "loss": 0.8374, "step": 1193 }, { "epoch": 0.12, "grad_norm": 1.5711157493370986, "learning_rate": 1.9564608727285258e-05, "loss": 0.6901, "step": 1194 }, { "epoch": 0.12, "grad_norm": 1.458281055944847, "learning_rate": 1.956364668659491e-05, "loss": 0.6599, "step": 1195 }, { "epoch": 0.12, "grad_norm": 1.6305506241103176, "learning_rate": 1.9562683607920278e-05, "loss": 0.8076, "step": 1196 }, { "epoch": 0.12, "grad_norm": 1.6685254009668504, "learning_rate": 1.956171949136588e-05, "loss": 0.7673, "step": 1197 }, { "epoch": 0.12, "grad_norm": 1.5926444627846572, "learning_rate": 1.956075433703636e-05, "loss": 0.6434, "step": 1198 }, { "epoch": 0.12, "grad_norm": 1.5334041797983586, "learning_rate": 1.955978814503647e-05, "loss": 0.6648, "step": 1199 }, { "epoch": 0.12, "grad_norm": 1.6348793124114884, "learning_rate": 1.9558820915471076e-05, "loss": 0.8082, "step": 1200 }, { "epoch": 0.12, "grad_norm": 1.6650518092286242, "learning_rate": 1.9557852648445155e-05, "loss": 0.7144, "step": 1201 }, { "epoch": 0.12, "grad_norm": 1.7730364636444929, "learning_rate": 1.95568833440638e-05, "loss": 0.8038, "step": 1202 }, { "epoch": 0.12, "grad_norm": 1.4817291628359017, "learning_rate": 1.9555913002432207e-05, "loss": 0.7215, "step": 1203 }, { "epoch": 0.12, "grad_norm": 1.5437673000618928, "learning_rate": 1.9554941623655695e-05, "loss": 0.7794, "step": 1204 }, { "epoch": 0.12, "grad_norm": 1.4958564172666966, "learning_rate": 1.9553969207839694e-05, "loss": 0.7292, "step": 1205 }, { "epoch": 0.12, "grad_norm": 1.4671382704484248, "learning_rate": 1.955299575508974e-05, "loss": 0.718, "step": 1206 }, { "epoch": 0.12, "grad_norm": 1.5760005500160656, "learning_rate": 1.9552021265511492e-05, "loss": 0.7143, "step": 1207 }, { "epoch": 0.12, "grad_norm": 1.5823479034445453, "learning_rate": 1.955104573921071e-05, "loss": 0.7174, "step": 1208 }, { "epoch": 0.12, "grad_norm": 1.7896892822695007, "learning_rate": 1.955006917629328e-05, "loss": 0.8204, "step": 1209 }, { "epoch": 0.12, "grad_norm": 1.609040548686237, "learning_rate": 1.9549091576865186e-05, "loss": 0.7619, "step": 1210 }, { "epoch": 0.12, "grad_norm": 1.5210302834438783, "learning_rate": 1.954811294103253e-05, "loss": 0.6917, "step": 1211 }, { "epoch": 0.12, "grad_norm": 1.668606437182086, "learning_rate": 1.9547133268901527e-05, "loss": 0.7154, "step": 1212 }, { "epoch": 0.12, "grad_norm": 1.7072837999942514, "learning_rate": 1.954615256057851e-05, "loss": 0.7634, "step": 1213 }, { "epoch": 0.12, "grad_norm": 1.8353300138895317, "learning_rate": 1.954517081616992e-05, "loss": 0.7178, "step": 1214 }, { "epoch": 0.12, "grad_norm": 1.5972113260052339, "learning_rate": 1.954418803578231e-05, "loss": 0.7412, "step": 1215 }, { "epoch": 0.12, "grad_norm": 1.6412684612277957, "learning_rate": 1.9543204219522338e-05, "loss": 0.728, "step": 1216 }, { "epoch": 0.12, "grad_norm": 1.609735472945734, "learning_rate": 1.954221936749679e-05, "loss": 0.8719, "step": 1217 }, { "epoch": 0.12, "grad_norm": 1.5573591820655903, "learning_rate": 1.9541233479812552e-05, "loss": 0.7166, "step": 1218 }, { "epoch": 0.12, "grad_norm": 1.7781813815209297, "learning_rate": 1.9540246556576624e-05, "loss": 0.7628, "step": 1219 }, { "epoch": 0.12, "grad_norm": 1.683038497796432, "learning_rate": 1.9539258597896132e-05, "loss": 0.9519, "step": 1220 }, { "epoch": 0.12, "grad_norm": 1.6477867242414523, "learning_rate": 1.9538269603878293e-05, "loss": 0.7761, "step": 1221 }, { "epoch": 0.12, "grad_norm": 1.6247177428236637, "learning_rate": 1.953727957463045e-05, "loss": 0.8392, "step": 1222 }, { "epoch": 0.12, "grad_norm": 1.7641582896967056, "learning_rate": 1.953628851026006e-05, "loss": 0.8251, "step": 1223 }, { "epoch": 0.12, "grad_norm": 1.651347180107763, "learning_rate": 1.9535296410874678e-05, "loss": 0.7091, "step": 1224 }, { "epoch": 0.12, "grad_norm": 1.596795753990509, "learning_rate": 1.953430327658199e-05, "loss": 0.6776, "step": 1225 }, { "epoch": 0.12, "grad_norm": 1.72023293687916, "learning_rate": 1.9533309107489775e-05, "loss": 0.7708, "step": 1226 }, { "epoch": 0.12, "grad_norm": 1.8641043621843565, "learning_rate": 1.9532313903705945e-05, "loss": 0.8081, "step": 1227 }, { "epoch": 0.12, "grad_norm": 1.6892750119679534, "learning_rate": 1.953131766533851e-05, "loss": 0.8025, "step": 1228 }, { "epoch": 0.13, "grad_norm": 1.6979908514604627, "learning_rate": 1.9530320392495592e-05, "loss": 0.8839, "step": 1229 }, { "epoch": 0.13, "grad_norm": 1.5614345333542738, "learning_rate": 1.9529322085285436e-05, "loss": 0.8074, "step": 1230 }, { "epoch": 0.13, "grad_norm": 1.5462404519794781, "learning_rate": 1.952832274381639e-05, "loss": 0.7008, "step": 1231 }, { "epoch": 0.13, "grad_norm": 1.6801337840039725, "learning_rate": 1.9527322368196913e-05, "loss": 0.7842, "step": 1232 }, { "epoch": 0.13, "grad_norm": 1.4877774460670745, "learning_rate": 1.952632095853559e-05, "loss": 0.7126, "step": 1233 }, { "epoch": 0.13, "grad_norm": 1.470108974836532, "learning_rate": 1.9525318514941097e-05, "loss": 0.7865, "step": 1234 }, { "epoch": 0.13, "grad_norm": 1.6233458319969059, "learning_rate": 1.9524315037522238e-05, "loss": 0.7215, "step": 1235 }, { "epoch": 0.13, "grad_norm": 1.6854954491546525, "learning_rate": 1.9523310526387928e-05, "loss": 0.7766, "step": 1236 }, { "epoch": 0.13, "grad_norm": 1.71560710244287, "learning_rate": 1.9522304981647186e-05, "loss": 0.8284, "step": 1237 }, { "epoch": 0.13, "grad_norm": 1.5165468607991677, "learning_rate": 1.952129840340915e-05, "loss": 0.7682, "step": 1238 }, { "epoch": 0.13, "grad_norm": 1.5311283978080743, "learning_rate": 1.952029079178307e-05, "loss": 0.7429, "step": 1239 }, { "epoch": 0.13, "grad_norm": 1.5454786126577658, "learning_rate": 1.951928214687831e-05, "loss": 0.7592, "step": 1240 }, { "epoch": 0.13, "grad_norm": 1.7566208659424567, "learning_rate": 1.9518272468804333e-05, "loss": 0.7453, "step": 1241 }, { "epoch": 0.13, "grad_norm": 1.6901545265480546, "learning_rate": 1.9517261757670728e-05, "loss": 0.8635, "step": 1242 }, { "epoch": 0.13, "grad_norm": 1.7770640309026147, "learning_rate": 1.9516250013587193e-05, "loss": 0.7617, "step": 1243 }, { "epoch": 0.13, "grad_norm": 1.5424774446781067, "learning_rate": 1.9515237236663537e-05, "loss": 0.7539, "step": 1244 }, { "epoch": 0.13, "grad_norm": 1.6504202538179722, "learning_rate": 1.951422342700968e-05, "loss": 0.7253, "step": 1245 }, { "epoch": 0.13, "grad_norm": 1.479830693335698, "learning_rate": 1.9513208584735656e-05, "loss": 0.7184, "step": 1246 }, { "epoch": 0.13, "grad_norm": 1.610053631904774, "learning_rate": 1.9512192709951613e-05, "loss": 0.8248, "step": 1247 }, { "epoch": 0.13, "grad_norm": 1.647736990225513, "learning_rate": 1.95111758027678e-05, "loss": 0.7153, "step": 1248 }, { "epoch": 0.13, "grad_norm": 1.6762468882847545, "learning_rate": 1.9510157863294595e-05, "loss": 0.8218, "step": 1249 }, { "epoch": 0.13, "grad_norm": 1.5976749648522812, "learning_rate": 1.9509138891642476e-05, "loss": 0.7339, "step": 1250 }, { "epoch": 0.13, "grad_norm": 1.6550024682871, "learning_rate": 1.950811888792204e-05, "loss": 0.754, "step": 1251 }, { "epoch": 0.13, "grad_norm": 1.8202097251988651, "learning_rate": 1.9507097852243982e-05, "loss": 0.7897, "step": 1252 }, { "epoch": 0.13, "grad_norm": 1.5527467626965263, "learning_rate": 1.950607578471913e-05, "loss": 0.7427, "step": 1253 }, { "epoch": 0.13, "grad_norm": 1.431786500229432, "learning_rate": 1.950505268545841e-05, "loss": 0.6891, "step": 1254 }, { "epoch": 0.13, "grad_norm": 1.5255211241113107, "learning_rate": 1.9504028554572865e-05, "loss": 0.6895, "step": 1255 }, { "epoch": 0.13, "grad_norm": 1.573716102171307, "learning_rate": 1.9503003392173643e-05, "loss": 0.7392, "step": 1256 }, { "epoch": 0.13, "grad_norm": 1.7500915942835833, "learning_rate": 1.9501977198372017e-05, "loss": 0.8491, "step": 1257 }, { "epoch": 0.13, "grad_norm": 1.6546658273474368, "learning_rate": 1.9500949973279358e-05, "loss": 0.6988, "step": 1258 }, { "epoch": 0.13, "grad_norm": 1.6133269018317669, "learning_rate": 1.949992171700716e-05, "loss": 0.6442, "step": 1259 }, { "epoch": 0.13, "grad_norm": 1.6354850724656633, "learning_rate": 1.949889242966702e-05, "loss": 0.8259, "step": 1260 }, { "epoch": 0.13, "grad_norm": 1.7682967900115991, "learning_rate": 1.9497862111370654e-05, "loss": 0.8345, "step": 1261 }, { "epoch": 0.13, "grad_norm": 1.6029947100999593, "learning_rate": 1.9496830762229884e-05, "loss": 0.8014, "step": 1262 }, { "epoch": 0.13, "grad_norm": 1.6075492800959517, "learning_rate": 1.949579838235665e-05, "loss": 0.7311, "step": 1263 }, { "epoch": 0.13, "grad_norm": 1.6998815244012917, "learning_rate": 1.9494764971862998e-05, "loss": 0.7245, "step": 1264 }, { "epoch": 0.13, "grad_norm": 1.4951211694261486, "learning_rate": 1.949373053086109e-05, "loss": 0.8232, "step": 1265 }, { "epoch": 0.13, "grad_norm": 1.5067111018633088, "learning_rate": 1.9492695059463197e-05, "loss": 0.7533, "step": 1266 }, { "epoch": 0.13, "grad_norm": 1.6037628206146815, "learning_rate": 1.949165855778171e-05, "loss": 0.7363, "step": 1267 }, { "epoch": 0.13, "grad_norm": 1.5768105452349963, "learning_rate": 1.9490621025929112e-05, "loss": 0.7573, "step": 1268 }, { "epoch": 0.13, "grad_norm": 2.01346038116964, "learning_rate": 1.9489582464018023e-05, "loss": 0.8341, "step": 1269 }, { "epoch": 0.13, "grad_norm": 1.7239135570187674, "learning_rate": 1.948854287216116e-05, "loss": 0.7889, "step": 1270 }, { "epoch": 0.13, "grad_norm": 1.4433082279559541, "learning_rate": 1.9487502250471347e-05, "loss": 0.6745, "step": 1271 }, { "epoch": 0.13, "grad_norm": 1.6967479347098455, "learning_rate": 1.9486460599061536e-05, "loss": 0.6947, "step": 1272 }, { "epoch": 0.13, "grad_norm": 1.6368410087098855, "learning_rate": 1.948541791804478e-05, "loss": 0.7861, "step": 1273 }, { "epoch": 0.13, "grad_norm": 1.7263202574572991, "learning_rate": 1.948437420753424e-05, "loss": 0.7657, "step": 1274 }, { "epoch": 0.13, "grad_norm": 1.4448531385811203, "learning_rate": 1.94833294676432e-05, "loss": 0.5998, "step": 1275 }, { "epoch": 0.13, "grad_norm": 1.5124154039368294, "learning_rate": 1.948228369848505e-05, "loss": 0.7219, "step": 1276 }, { "epoch": 0.13, "grad_norm": 1.702412187559942, "learning_rate": 1.948123690017329e-05, "loss": 0.781, "step": 1277 }, { "epoch": 0.13, "grad_norm": 1.555786535491567, "learning_rate": 1.9480189072821537e-05, "loss": 0.7099, "step": 1278 }, { "epoch": 0.13, "grad_norm": 1.6032400807672151, "learning_rate": 1.947914021654351e-05, "loss": 0.6508, "step": 1279 }, { "epoch": 0.13, "grad_norm": 1.3784201916828709, "learning_rate": 1.9478090331453054e-05, "loss": 0.7026, "step": 1280 }, { "epoch": 0.13, "grad_norm": 1.5784513045776165, "learning_rate": 1.947703941766411e-05, "loss": 0.8092, "step": 1281 }, { "epoch": 0.13, "grad_norm": 1.5666128770715946, "learning_rate": 1.947598747529074e-05, "loss": 0.7278, "step": 1282 }, { "epoch": 0.13, "grad_norm": 1.6376187966620492, "learning_rate": 1.947493450444712e-05, "loss": 0.7695, "step": 1283 }, { "epoch": 0.13, "grad_norm": 1.406052287009246, "learning_rate": 1.9473880505247532e-05, "loss": 0.7641, "step": 1284 }, { "epoch": 0.13, "grad_norm": 1.7269431831784723, "learning_rate": 1.947282547780637e-05, "loss": 0.8017, "step": 1285 }, { "epoch": 0.13, "grad_norm": 1.5161138349707055, "learning_rate": 1.947176942223814e-05, "loss": 0.7157, "step": 1286 }, { "epoch": 0.13, "grad_norm": 1.5678540961023142, "learning_rate": 1.947071233865746e-05, "loss": 0.767, "step": 1287 }, { "epoch": 0.13, "grad_norm": 1.5527071981054203, "learning_rate": 1.946965422717906e-05, "loss": 0.8231, "step": 1288 }, { "epoch": 0.13, "grad_norm": 1.5907878060262768, "learning_rate": 1.9468595087917786e-05, "loss": 0.6807, "step": 1289 }, { "epoch": 0.13, "grad_norm": 1.5158981766324053, "learning_rate": 1.9467534920988586e-05, "loss": 0.697, "step": 1290 }, { "epoch": 0.13, "grad_norm": 1.59870920201292, "learning_rate": 1.9466473726506522e-05, "loss": 0.7714, "step": 1291 }, { "epoch": 0.13, "grad_norm": 1.7794305184289632, "learning_rate": 1.9465411504586778e-05, "loss": 0.734, "step": 1292 }, { "epoch": 0.13, "grad_norm": 1.6392493833071595, "learning_rate": 1.9464348255344642e-05, "loss": 0.7791, "step": 1293 }, { "epoch": 0.13, "grad_norm": 1.6509845458827024, "learning_rate": 1.9463283978895503e-05, "loss": 0.7268, "step": 1294 }, { "epoch": 0.13, "grad_norm": 1.7374581302193655, "learning_rate": 1.9462218675354877e-05, "loss": 0.8593, "step": 1295 }, { "epoch": 0.13, "grad_norm": 1.5227355426200806, "learning_rate": 1.9461152344838386e-05, "loss": 0.766, "step": 1296 }, { "epoch": 0.13, "grad_norm": 1.546737758110503, "learning_rate": 1.9460084987461767e-05, "loss": 0.7758, "step": 1297 }, { "epoch": 0.13, "grad_norm": 1.5361827428965045, "learning_rate": 1.945901660334086e-05, "loss": 0.7971, "step": 1298 }, { "epoch": 0.13, "grad_norm": 1.7399009926320979, "learning_rate": 1.9457947192591623e-05, "loss": 0.6787, "step": 1299 }, { "epoch": 0.13, "grad_norm": 1.5621018072678603, "learning_rate": 1.945687675533013e-05, "loss": 0.7643, "step": 1300 }, { "epoch": 0.13, "grad_norm": 1.5677030577309845, "learning_rate": 1.9455805291672544e-05, "loss": 0.7244, "step": 1301 }, { "epoch": 0.13, "grad_norm": 1.8324847460117326, "learning_rate": 1.9454732801735173e-05, "loss": 0.8323, "step": 1302 }, { "epoch": 0.13, "grad_norm": 1.8054740103348892, "learning_rate": 1.945365928563441e-05, "loss": 0.86, "step": 1303 }, { "epoch": 0.13, "grad_norm": 1.5267902312222363, "learning_rate": 1.9452584743486768e-05, "loss": 0.7199, "step": 1304 }, { "epoch": 0.13, "grad_norm": 1.6207235099791248, "learning_rate": 1.9451509175408875e-05, "loss": 0.8241, "step": 1305 }, { "epoch": 0.13, "grad_norm": 1.6311789356149866, "learning_rate": 1.9450432581517466e-05, "loss": 0.8855, "step": 1306 }, { "epoch": 0.13, "grad_norm": 1.6012394050190988, "learning_rate": 1.944935496192939e-05, "loss": 0.7032, "step": 1307 }, { "epoch": 0.13, "grad_norm": 1.6940041699409758, "learning_rate": 1.9448276316761604e-05, "loss": 0.795, "step": 1308 }, { "epoch": 0.13, "grad_norm": 1.6439835630134267, "learning_rate": 1.9447196646131177e-05, "loss": 0.8516, "step": 1309 }, { "epoch": 0.13, "grad_norm": 1.563633479841595, "learning_rate": 1.9446115950155293e-05, "loss": 0.855, "step": 1310 }, { "epoch": 0.13, "grad_norm": 1.6668575947195545, "learning_rate": 1.9445034228951245e-05, "loss": 0.7242, "step": 1311 }, { "epoch": 0.13, "grad_norm": 1.573876033648897, "learning_rate": 1.944395148263643e-05, "loss": 0.7852, "step": 1312 }, { "epoch": 0.13, "grad_norm": 1.77770874892001, "learning_rate": 1.9442867711328372e-05, "loss": 0.8421, "step": 1313 }, { "epoch": 0.13, "grad_norm": 1.6470437342507795, "learning_rate": 1.9441782915144694e-05, "loss": 0.762, "step": 1314 }, { "epoch": 0.13, "grad_norm": 1.5012094606088324, "learning_rate": 1.9440697094203135e-05, "loss": 0.7325, "step": 1315 }, { "epoch": 0.13, "grad_norm": 1.624074833699309, "learning_rate": 1.943961024862154e-05, "loss": 0.7439, "step": 1316 }, { "epoch": 0.13, "grad_norm": 1.853394164880661, "learning_rate": 1.943852237851787e-05, "loss": 0.7825, "step": 1317 }, { "epoch": 0.13, "grad_norm": 1.8797453479583488, "learning_rate": 1.9437433484010203e-05, "loss": 0.769, "step": 1318 }, { "epoch": 0.13, "grad_norm": 1.6375299522173172, "learning_rate": 1.943634356521671e-05, "loss": 0.7554, "step": 1319 }, { "epoch": 0.13, "grad_norm": 1.5538090573787875, "learning_rate": 1.9435252622255694e-05, "loss": 0.8113, "step": 1320 }, { "epoch": 0.13, "grad_norm": 1.5303415868657724, "learning_rate": 1.9434160655245557e-05, "loss": 0.777, "step": 1321 }, { "epoch": 0.13, "grad_norm": 1.6464673627238309, "learning_rate": 1.9433067664304818e-05, "loss": 0.7287, "step": 1322 }, { "epoch": 0.13, "grad_norm": 1.687666449674919, "learning_rate": 1.9431973649552094e-05, "loss": 0.7274, "step": 1323 }, { "epoch": 0.13, "grad_norm": 1.8097115392477354, "learning_rate": 1.9430878611106133e-05, "loss": 0.7362, "step": 1324 }, { "epoch": 0.13, "grad_norm": 1.5710783797225716, "learning_rate": 1.942978254908578e-05, "loss": 0.73, "step": 1325 }, { "epoch": 0.13, "grad_norm": 1.4434197275009946, "learning_rate": 1.942868546361e-05, "loss": 0.6966, "step": 1326 }, { "epoch": 0.13, "grad_norm": 1.7744289801449897, "learning_rate": 1.9427587354797857e-05, "loss": 0.8461, "step": 1327 }, { "epoch": 0.14, "grad_norm": 1.5746501758483216, "learning_rate": 1.9426488222768542e-05, "loss": 0.7306, "step": 1328 }, { "epoch": 0.14, "grad_norm": 1.6972017007527762, "learning_rate": 1.942538806764134e-05, "loss": 0.9094, "step": 1329 }, { "epoch": 0.14, "grad_norm": 1.6418540267949544, "learning_rate": 1.9424286889535663e-05, "loss": 0.7503, "step": 1330 }, { "epoch": 0.14, "grad_norm": 1.5414145441351583, "learning_rate": 1.942318468857102e-05, "loss": 0.809, "step": 1331 }, { "epoch": 0.14, "grad_norm": 1.4873908203642654, "learning_rate": 1.9422081464867043e-05, "loss": 0.7576, "step": 1332 }, { "epoch": 0.14, "grad_norm": 1.5279253204351688, "learning_rate": 1.942097721854347e-05, "loss": 0.7851, "step": 1333 }, { "epoch": 0.14, "grad_norm": 1.687191963317494, "learning_rate": 1.9419871949720145e-05, "loss": 0.8089, "step": 1334 }, { "epoch": 0.14, "grad_norm": 1.4740624992934195, "learning_rate": 1.9418765658517032e-05, "loss": 0.8128, "step": 1335 }, { "epoch": 0.14, "grad_norm": 1.62078576616334, "learning_rate": 1.9417658345054196e-05, "loss": 0.7414, "step": 1336 }, { "epoch": 0.14, "grad_norm": 1.4588705309609198, "learning_rate": 1.9416550009451827e-05, "loss": 0.6019, "step": 1337 }, { "epoch": 0.14, "grad_norm": 1.5758310763195056, "learning_rate": 1.941544065183021e-05, "loss": 0.7815, "step": 1338 }, { "epoch": 0.14, "grad_norm": 1.5708714704534004, "learning_rate": 1.9414330272309753e-05, "loss": 0.6871, "step": 1339 }, { "epoch": 0.14, "grad_norm": 1.5364979929950109, "learning_rate": 1.9413218871010964e-05, "loss": 0.7195, "step": 1340 }, { "epoch": 0.14, "grad_norm": 1.7385744275514265, "learning_rate": 1.9412106448054476e-05, "loss": 0.8279, "step": 1341 }, { "epoch": 0.14, "grad_norm": 1.731954304641849, "learning_rate": 1.9410993003561027e-05, "loss": 0.8354, "step": 1342 }, { "epoch": 0.14, "grad_norm": 1.6744216970597119, "learning_rate": 1.940987853765145e-05, "loss": 0.7414, "step": 1343 }, { "epoch": 0.14, "grad_norm": 1.6391277634441834, "learning_rate": 1.9408763050446716e-05, "loss": 0.7927, "step": 1344 }, { "epoch": 0.14, "grad_norm": 1.7315724333267457, "learning_rate": 1.9407646542067892e-05, "loss": 0.8221, "step": 1345 }, { "epoch": 0.14, "grad_norm": 1.6881225736292127, "learning_rate": 1.9406529012636153e-05, "loss": 0.7886, "step": 1346 }, { "epoch": 0.14, "grad_norm": 1.581344862065677, "learning_rate": 1.940541046227279e-05, "loss": 0.6217, "step": 1347 }, { "epoch": 0.14, "grad_norm": 1.589644667566598, "learning_rate": 1.9404290891099205e-05, "loss": 0.8205, "step": 1348 }, { "epoch": 0.14, "grad_norm": 1.5314874491401882, "learning_rate": 1.940317029923691e-05, "loss": 0.7064, "step": 1349 }, { "epoch": 0.14, "grad_norm": 1.6299195296123066, "learning_rate": 1.9402048686807534e-05, "loss": 0.6926, "step": 1350 }, { "epoch": 0.14, "grad_norm": 1.6570600149638093, "learning_rate": 1.94009260539328e-05, "loss": 0.7882, "step": 1351 }, { "epoch": 0.14, "grad_norm": 1.4966120899156123, "learning_rate": 1.939980240073456e-05, "loss": 0.8876, "step": 1352 }, { "epoch": 0.14, "grad_norm": 1.5556698302348833, "learning_rate": 1.939867772733476e-05, "loss": 0.731, "step": 1353 }, { "epoch": 0.14, "grad_norm": 1.733950508696902, "learning_rate": 1.9397552033855474e-05, "loss": 0.8104, "step": 1354 }, { "epoch": 0.14, "grad_norm": 1.6408561640363308, "learning_rate": 1.939642532041888e-05, "loss": 0.7891, "step": 1355 }, { "epoch": 0.14, "grad_norm": 1.6736879527988986, "learning_rate": 1.9395297587147256e-05, "loss": 0.8729, "step": 1356 }, { "epoch": 0.14, "grad_norm": 1.6896580180672505, "learning_rate": 1.9394168834163007e-05, "loss": 0.9141, "step": 1357 }, { "epoch": 0.14, "grad_norm": 1.6713377955139828, "learning_rate": 1.939303906158864e-05, "loss": 0.8107, "step": 1358 }, { "epoch": 0.14, "grad_norm": 1.6503043741757688, "learning_rate": 1.9391908269546773e-05, "loss": 0.8003, "step": 1359 }, { "epoch": 0.14, "grad_norm": 1.47962070900276, "learning_rate": 1.9390776458160137e-05, "loss": 0.6367, "step": 1360 }, { "epoch": 0.14, "grad_norm": 1.7702743050628662, "learning_rate": 1.938964362755157e-05, "loss": 0.8017, "step": 1361 }, { "epoch": 0.14, "grad_norm": 1.7824582581508035, "learning_rate": 1.9388509777844027e-05, "loss": 0.8004, "step": 1362 }, { "epoch": 0.14, "grad_norm": 1.5668170478152565, "learning_rate": 1.9387374909160567e-05, "loss": 0.7007, "step": 1363 }, { "epoch": 0.14, "grad_norm": 1.560949917574267, "learning_rate": 1.9386239021624362e-05, "loss": 0.7871, "step": 1364 }, { "epoch": 0.14, "grad_norm": 1.7083113146742042, "learning_rate": 1.9385102115358695e-05, "loss": 0.7337, "step": 1365 }, { "epoch": 0.14, "grad_norm": 1.6388928807175198, "learning_rate": 1.938396419048696e-05, "loss": 0.7306, "step": 1366 }, { "epoch": 0.14, "grad_norm": 1.8150081650050511, "learning_rate": 1.9382825247132662e-05, "loss": 0.7636, "step": 1367 }, { "epoch": 0.14, "grad_norm": 1.6574189204465755, "learning_rate": 1.9381685285419417e-05, "loss": 0.7559, "step": 1368 }, { "epoch": 0.14, "grad_norm": 1.6478090214963608, "learning_rate": 1.9380544305470942e-05, "loss": 0.868, "step": 1369 }, { "epoch": 0.14, "grad_norm": 1.585684010108357, "learning_rate": 1.937940230741108e-05, "loss": 0.6967, "step": 1370 }, { "epoch": 0.14, "grad_norm": 1.5524270780817286, "learning_rate": 1.9378259291363774e-05, "loss": 0.7274, "step": 1371 }, { "epoch": 0.14, "grad_norm": 1.7849853674264955, "learning_rate": 1.9377115257453085e-05, "loss": 0.7452, "step": 1372 }, { "epoch": 0.14, "grad_norm": 1.5268152637742454, "learning_rate": 1.9375970205803175e-05, "loss": 0.7099, "step": 1373 }, { "epoch": 0.14, "grad_norm": 1.5911324780952263, "learning_rate": 1.937482413653832e-05, "loss": 0.8437, "step": 1374 }, { "epoch": 0.14, "grad_norm": 1.4835029992058266, "learning_rate": 1.9373677049782916e-05, "loss": 0.5901, "step": 1375 }, { "epoch": 0.14, "grad_norm": 1.5101360380804878, "learning_rate": 1.937252894566145e-05, "loss": 0.7386, "step": 1376 }, { "epoch": 0.14, "grad_norm": 1.4448014915967933, "learning_rate": 1.937137982429854e-05, "loss": 0.7943, "step": 1377 }, { "epoch": 0.14, "grad_norm": 1.5081393545005264, "learning_rate": 1.9370229685818903e-05, "loss": 0.663, "step": 1378 }, { "epoch": 0.14, "grad_norm": 1.5379845123993552, "learning_rate": 1.936907853034737e-05, "loss": 0.7651, "step": 1379 }, { "epoch": 0.14, "grad_norm": 1.7229609662472323, "learning_rate": 1.9367926358008872e-05, "loss": 0.624, "step": 1380 }, { "epoch": 0.14, "grad_norm": 1.5857171073643888, "learning_rate": 1.936677316892847e-05, "loss": 0.7273, "step": 1381 }, { "epoch": 0.14, "grad_norm": 1.8265782778745185, "learning_rate": 1.936561896323132e-05, "loss": 0.831, "step": 1382 }, { "epoch": 0.14, "grad_norm": 1.5858906216474162, "learning_rate": 1.9364463741042694e-05, "loss": 0.8427, "step": 1383 }, { "epoch": 0.14, "grad_norm": 1.6444591766781325, "learning_rate": 1.936330750248797e-05, "loss": 0.7696, "step": 1384 }, { "epoch": 0.14, "grad_norm": 1.7707256531790037, "learning_rate": 1.9362150247692646e-05, "loss": 0.7645, "step": 1385 }, { "epoch": 0.14, "grad_norm": 1.510474176362667, "learning_rate": 1.9360991976782317e-05, "loss": 0.5463, "step": 1386 }, { "epoch": 0.14, "grad_norm": 1.616219735344741, "learning_rate": 1.93598326898827e-05, "loss": 0.6828, "step": 1387 }, { "epoch": 0.14, "grad_norm": 1.5900808236080102, "learning_rate": 1.935867238711962e-05, "loss": 0.7644, "step": 1388 }, { "epoch": 0.14, "grad_norm": 1.5447266911351711, "learning_rate": 1.9357511068619e-05, "loss": 0.7823, "step": 1389 }, { "epoch": 0.14, "grad_norm": 1.443662221459821, "learning_rate": 1.9356348734506888e-05, "loss": 0.6705, "step": 1390 }, { "epoch": 0.14, "grad_norm": 1.6858155970178428, "learning_rate": 1.935518538490944e-05, "loss": 0.7775, "step": 1391 }, { "epoch": 0.14, "grad_norm": 1.6260463865702544, "learning_rate": 1.9354021019952917e-05, "loss": 0.6997, "step": 1392 }, { "epoch": 0.14, "grad_norm": 1.6151462386185187, "learning_rate": 1.9352855639763693e-05, "loss": 0.7388, "step": 1393 }, { "epoch": 0.14, "grad_norm": 1.6226745231864121, "learning_rate": 1.935168924446825e-05, "loss": 0.7637, "step": 1394 }, { "epoch": 0.14, "grad_norm": 1.736869739132547, "learning_rate": 1.935052183419319e-05, "loss": 0.8231, "step": 1395 }, { "epoch": 0.14, "grad_norm": 1.5521494478169477, "learning_rate": 1.9349353409065203e-05, "loss": 0.8538, "step": 1396 }, { "epoch": 0.14, "grad_norm": 1.4936808683766414, "learning_rate": 1.9348183969211113e-05, "loss": 0.7137, "step": 1397 }, { "epoch": 0.14, "grad_norm": 1.6405737267346916, "learning_rate": 1.9347013514757845e-05, "loss": 0.7905, "step": 1398 }, { "epoch": 0.14, "grad_norm": 1.621940251119487, "learning_rate": 1.934584204583243e-05, "loss": 0.828, "step": 1399 }, { "epoch": 0.14, "grad_norm": 1.6037284742563462, "learning_rate": 1.934466956256201e-05, "loss": 0.6796, "step": 1400 }, { "epoch": 0.14, "grad_norm": 1.567145940022025, "learning_rate": 1.9343496065073846e-05, "loss": 0.7317, "step": 1401 }, { "epoch": 0.14, "grad_norm": 1.9771936077668393, "learning_rate": 1.93423215534953e-05, "loss": 0.7907, "step": 1402 }, { "epoch": 0.14, "grad_norm": 1.6312584772073946, "learning_rate": 1.934114602795385e-05, "loss": 0.6874, "step": 1403 }, { "epoch": 0.14, "grad_norm": 1.4670109880400073, "learning_rate": 1.9339969488577074e-05, "loss": 0.7118, "step": 1404 }, { "epoch": 0.14, "grad_norm": 1.5236031544065936, "learning_rate": 1.933879193549267e-05, "loss": 0.7507, "step": 1405 }, { "epoch": 0.14, "grad_norm": 1.6547095135909067, "learning_rate": 1.9337613368828443e-05, "loss": 0.7915, "step": 1406 }, { "epoch": 0.14, "grad_norm": 1.6994751897935607, "learning_rate": 1.9336433788712313e-05, "loss": 0.7358, "step": 1407 }, { "epoch": 0.14, "grad_norm": 1.6317326459822472, "learning_rate": 1.9335253195272298e-05, "loss": 0.7229, "step": 1408 }, { "epoch": 0.14, "grad_norm": 1.4262997590885145, "learning_rate": 1.9334071588636537e-05, "loss": 0.6226, "step": 1409 }, { "epoch": 0.14, "grad_norm": 1.650951039532934, "learning_rate": 1.9332888968933273e-05, "loss": 0.7597, "step": 1410 }, { "epoch": 0.14, "grad_norm": 1.431463986073521, "learning_rate": 1.933170533629086e-05, "loss": 0.7399, "step": 1411 }, { "epoch": 0.14, "grad_norm": 1.7834899114222995, "learning_rate": 1.933052069083777e-05, "loss": 0.7878, "step": 1412 }, { "epoch": 0.14, "grad_norm": 1.6821249271791365, "learning_rate": 1.9329335032702564e-05, "loss": 0.7578, "step": 1413 }, { "epoch": 0.14, "grad_norm": 1.6168283852908498, "learning_rate": 1.932814836201394e-05, "loss": 0.8011, "step": 1414 }, { "epoch": 0.14, "grad_norm": 1.5598551085056578, "learning_rate": 1.932696067890069e-05, "loss": 0.7733, "step": 1415 }, { "epoch": 0.14, "grad_norm": 1.4618766523100533, "learning_rate": 1.9325771983491708e-05, "loss": 0.6826, "step": 1416 }, { "epoch": 0.14, "grad_norm": 1.3812200411588107, "learning_rate": 1.932458227591602e-05, "loss": 0.7218, "step": 1417 }, { "epoch": 0.14, "grad_norm": 1.5380172482866934, "learning_rate": 1.9323391556302743e-05, "loss": 0.8418, "step": 1418 }, { "epoch": 0.14, "grad_norm": 1.6412778862474402, "learning_rate": 1.9322199824781117e-05, "loss": 0.826, "step": 1419 }, { "epoch": 0.14, "grad_norm": 1.5275748605652015, "learning_rate": 1.9321007081480486e-05, "loss": 0.7854, "step": 1420 }, { "epoch": 0.14, "grad_norm": 1.4810290185183532, "learning_rate": 1.9319813326530296e-05, "loss": 0.7001, "step": 1421 }, { "epoch": 0.14, "grad_norm": 1.5757069867975266, "learning_rate": 1.9318618560060117e-05, "loss": 0.7207, "step": 1422 }, { "epoch": 0.14, "grad_norm": 1.4581775676110735, "learning_rate": 1.931742278219962e-05, "loss": 0.7473, "step": 1423 }, { "epoch": 0.14, "grad_norm": 1.5452482775653402, "learning_rate": 1.9316225993078586e-05, "loss": 0.7197, "step": 1424 }, { "epoch": 0.14, "grad_norm": 1.5196283043899141, "learning_rate": 1.9315028192826912e-05, "loss": 0.7792, "step": 1425 }, { "epoch": 0.15, "grad_norm": 1.490523064687223, "learning_rate": 1.9313829381574594e-05, "loss": 0.714, "step": 1426 }, { "epoch": 0.15, "grad_norm": 1.5986736451592876, "learning_rate": 1.9312629559451755e-05, "loss": 0.8192, "step": 1427 }, { "epoch": 0.15, "grad_norm": 1.6455596522866387, "learning_rate": 1.931142872658861e-05, "loss": 0.8674, "step": 1428 }, { "epoch": 0.15, "grad_norm": 1.690847570583257, "learning_rate": 1.9310226883115488e-05, "loss": 0.7916, "step": 1429 }, { "epoch": 0.15, "grad_norm": 1.7743202508358853, "learning_rate": 1.9309024029162833e-05, "loss": 0.5859, "step": 1430 }, { "epoch": 0.15, "grad_norm": 1.5466754395817315, "learning_rate": 1.93078201648612e-05, "loss": 0.7705, "step": 1431 }, { "epoch": 0.15, "grad_norm": 1.5247144984670906, "learning_rate": 1.9306615290341244e-05, "loss": 0.7685, "step": 1432 }, { "epoch": 0.15, "grad_norm": 1.6077323413863682, "learning_rate": 1.9305409405733736e-05, "loss": 0.7231, "step": 1433 }, { "epoch": 0.15, "grad_norm": 1.6702697638591097, "learning_rate": 1.930420251116956e-05, "loss": 0.7152, "step": 1434 }, { "epoch": 0.15, "grad_norm": 1.4667964469637342, "learning_rate": 1.9302994606779704e-05, "loss": 0.6875, "step": 1435 }, { "epoch": 0.15, "grad_norm": 1.525503816062669, "learning_rate": 1.930178569269526e-05, "loss": 0.684, "step": 1436 }, { "epoch": 0.15, "grad_norm": 1.8024932861274463, "learning_rate": 1.9300575769047448e-05, "loss": 0.7442, "step": 1437 }, { "epoch": 0.15, "grad_norm": 1.562052851367724, "learning_rate": 1.929936483596758e-05, "loss": 0.7246, "step": 1438 }, { "epoch": 0.15, "grad_norm": 1.6614152158765623, "learning_rate": 1.9298152893587084e-05, "loss": 0.7837, "step": 1439 }, { "epoch": 0.15, "grad_norm": 1.7276983476208452, "learning_rate": 1.92969399420375e-05, "loss": 0.732, "step": 1440 }, { "epoch": 0.15, "grad_norm": 1.5647712919266812, "learning_rate": 1.9295725981450473e-05, "loss": 0.7849, "step": 1441 }, { "epoch": 0.15, "grad_norm": 1.6277974927232288, "learning_rate": 1.9294511011957757e-05, "loss": 0.7406, "step": 1442 }, { "epoch": 0.15, "grad_norm": 1.6582286211160586, "learning_rate": 1.9293295033691223e-05, "loss": 0.7728, "step": 1443 }, { "epoch": 0.15, "grad_norm": 1.581468131712894, "learning_rate": 1.9292078046782844e-05, "loss": 0.7779, "step": 1444 }, { "epoch": 0.15, "grad_norm": 1.7121934073770229, "learning_rate": 1.9290860051364704e-05, "loss": 0.8609, "step": 1445 }, { "epoch": 0.15, "grad_norm": 1.549302609178488, "learning_rate": 1.9289641047569e-05, "loss": 0.6958, "step": 1446 }, { "epoch": 0.15, "grad_norm": 1.6043051626537128, "learning_rate": 1.928842103552803e-05, "loss": 0.7551, "step": 1447 }, { "epoch": 0.15, "grad_norm": 1.6362496985196373, "learning_rate": 1.928720001537421e-05, "loss": 0.8159, "step": 1448 }, { "epoch": 0.15, "grad_norm": 1.8244786394221024, "learning_rate": 1.928597798724007e-05, "loss": 0.8213, "step": 1449 }, { "epoch": 0.15, "grad_norm": 1.670704556030598, "learning_rate": 1.9284754951258233e-05, "loss": 0.8299, "step": 1450 }, { "epoch": 0.15, "grad_norm": 1.511410122208224, "learning_rate": 1.9283530907561445e-05, "loss": 0.7247, "step": 1451 }, { "epoch": 0.15, "grad_norm": 1.6005491815394173, "learning_rate": 1.9282305856282554e-05, "loss": 0.7206, "step": 1452 }, { "epoch": 0.15, "grad_norm": 1.623120296961341, "learning_rate": 1.928107979755452e-05, "loss": 0.669, "step": 1453 }, { "epoch": 0.15, "grad_norm": 1.7108125044473872, "learning_rate": 1.9279852731510417e-05, "loss": 0.752, "step": 1454 }, { "epoch": 0.15, "grad_norm": 1.5740653272856195, "learning_rate": 1.927862465828342e-05, "loss": 0.7685, "step": 1455 }, { "epoch": 0.15, "grad_norm": 1.5311910116271827, "learning_rate": 1.9277395578006815e-05, "loss": 0.7808, "step": 1456 }, { "epoch": 0.15, "grad_norm": 1.7568559916243593, "learning_rate": 1.9276165490814e-05, "loss": 0.7578, "step": 1457 }, { "epoch": 0.15, "grad_norm": 1.6287705974238318, "learning_rate": 1.927493439683849e-05, "loss": 0.7119, "step": 1458 }, { "epoch": 0.15, "grad_norm": 1.7208541800835253, "learning_rate": 1.9273702296213893e-05, "loss": 0.8291, "step": 1459 }, { "epoch": 0.15, "grad_norm": 1.5970064188840418, "learning_rate": 1.9272469189073936e-05, "loss": 0.7491, "step": 1460 }, { "epoch": 0.15, "grad_norm": 1.591605260808786, "learning_rate": 1.9271235075552454e-05, "loss": 0.6758, "step": 1461 }, { "epoch": 0.15, "grad_norm": 1.5906047294247754, "learning_rate": 1.926999995578339e-05, "loss": 0.7827, "step": 1462 }, { "epoch": 0.15, "grad_norm": 1.8630511855892309, "learning_rate": 1.9268763829900798e-05, "loss": 0.7945, "step": 1463 }, { "epoch": 0.15, "grad_norm": 1.793429611608883, "learning_rate": 1.9267526698038838e-05, "loss": 0.8592, "step": 1464 }, { "epoch": 0.15, "grad_norm": 1.803266576136559, "learning_rate": 1.9266288560331782e-05, "loss": 0.7465, "step": 1465 }, { "epoch": 0.15, "grad_norm": 1.5244435320341803, "learning_rate": 1.9265049416914015e-05, "loss": 0.7006, "step": 1466 }, { "epoch": 0.15, "grad_norm": 1.7823386142447417, "learning_rate": 1.9263809267920023e-05, "loss": 0.8138, "step": 1467 }, { "epoch": 0.15, "grad_norm": 1.546354254225523, "learning_rate": 1.92625681134844e-05, "loss": 0.6401, "step": 1468 }, { "epoch": 0.15, "grad_norm": 1.5043994110416876, "learning_rate": 1.9261325953741863e-05, "loss": 0.8051, "step": 1469 }, { "epoch": 0.15, "grad_norm": 1.6999701502405948, "learning_rate": 1.9260082788827225e-05, "loss": 0.8232, "step": 1470 }, { "epoch": 0.15, "grad_norm": 1.6116709705664414, "learning_rate": 1.9258838618875407e-05, "loss": 0.7465, "step": 1471 }, { "epoch": 0.15, "grad_norm": 1.5700867235311593, "learning_rate": 1.9257593444021455e-05, "loss": 0.6726, "step": 1472 }, { "epoch": 0.15, "grad_norm": 1.6404232682439661, "learning_rate": 1.92563472644005e-05, "loss": 0.8057, "step": 1473 }, { "epoch": 0.15, "grad_norm": 1.609198643059564, "learning_rate": 1.9255100080147807e-05, "loss": 0.8679, "step": 1474 }, { "epoch": 0.15, "grad_norm": 1.61013255267516, "learning_rate": 1.9253851891398735e-05, "loss": 0.732, "step": 1475 }, { "epoch": 0.15, "grad_norm": 1.5979956087776157, "learning_rate": 1.9252602698288752e-05, "loss": 0.7424, "step": 1476 }, { "epoch": 0.15, "grad_norm": 1.5554224156137497, "learning_rate": 1.925135250095344e-05, "loss": 0.7008, "step": 1477 }, { "epoch": 0.15, "grad_norm": 1.6167967536593908, "learning_rate": 1.9250101299528495e-05, "loss": 0.8089, "step": 1478 }, { "epoch": 0.15, "grad_norm": 1.6375834796003947, "learning_rate": 1.9248849094149703e-05, "loss": 0.7496, "step": 1479 }, { "epoch": 0.15, "grad_norm": 1.5682760543403722, "learning_rate": 1.9247595884952977e-05, "loss": 0.8036, "step": 1480 }, { "epoch": 0.15, "grad_norm": 1.6028509146470817, "learning_rate": 1.924634167207434e-05, "loss": 0.7174, "step": 1481 }, { "epoch": 0.15, "grad_norm": 1.6240045575203728, "learning_rate": 1.9245086455649903e-05, "loss": 0.7451, "step": 1482 }, { "epoch": 0.15, "grad_norm": 1.619210407596221, "learning_rate": 1.9243830235815913e-05, "loss": 0.7987, "step": 1483 }, { "epoch": 0.15, "grad_norm": 1.4698223414285452, "learning_rate": 1.924257301270871e-05, "loss": 0.7055, "step": 1484 }, { "epoch": 0.15, "grad_norm": 1.5073455061374148, "learning_rate": 1.924131478646474e-05, "loss": 0.8317, "step": 1485 }, { "epoch": 0.15, "grad_norm": 1.6407134053525472, "learning_rate": 1.9240055557220573e-05, "loss": 0.7548, "step": 1486 }, { "epoch": 0.15, "grad_norm": 1.8031573777028704, "learning_rate": 1.9238795325112867e-05, "loss": 0.7956, "step": 1487 }, { "epoch": 0.15, "grad_norm": 1.582598949302052, "learning_rate": 1.923753409027841e-05, "loss": 0.7401, "step": 1488 }, { "epoch": 0.15, "grad_norm": 1.4703010435992137, "learning_rate": 1.9236271852854088e-05, "loss": 0.8035, "step": 1489 }, { "epoch": 0.15, "grad_norm": 1.4954150702770912, "learning_rate": 1.9235008612976897e-05, "loss": 0.7635, "step": 1490 }, { "epoch": 0.15, "grad_norm": 1.9250287248373963, "learning_rate": 1.9233744370783935e-05, "loss": 0.7855, "step": 1491 }, { "epoch": 0.15, "grad_norm": 1.6460987951004957, "learning_rate": 1.9232479126412425e-05, "loss": 0.8874, "step": 1492 }, { "epoch": 0.15, "grad_norm": 1.563877453344683, "learning_rate": 1.9231212879999686e-05, "loss": 0.8022, "step": 1493 }, { "epoch": 0.15, "grad_norm": 1.7544917369182025, "learning_rate": 1.922994563168315e-05, "loss": 0.7461, "step": 1494 }, { "epoch": 0.15, "grad_norm": 1.6902299666720255, "learning_rate": 1.9228677381600352e-05, "loss": 0.7271, "step": 1495 }, { "epoch": 0.15, "grad_norm": 1.672354406035893, "learning_rate": 1.9227408129888945e-05, "loss": 0.7699, "step": 1496 }, { "epoch": 0.15, "grad_norm": 1.396315478816546, "learning_rate": 1.9226137876686686e-05, "loss": 0.7364, "step": 1497 }, { "epoch": 0.15, "grad_norm": 1.7579916394018387, "learning_rate": 1.9224866622131442e-05, "loss": 0.7713, "step": 1498 }, { "epoch": 0.15, "grad_norm": 1.5314102029803376, "learning_rate": 1.9223594366361188e-05, "loss": 0.8544, "step": 1499 }, { "epoch": 0.15, "grad_norm": 1.5980160334993416, "learning_rate": 1.9222321109514006e-05, "loss": 0.8614, "step": 1500 }, { "epoch": 0.15, "grad_norm": 1.5790504981956814, "learning_rate": 1.922104685172809e-05, "loss": 0.6682, "step": 1501 }, { "epoch": 0.15, "grad_norm": 1.506221154074456, "learning_rate": 1.9219771593141736e-05, "loss": 0.827, "step": 1502 }, { "epoch": 0.15, "grad_norm": 1.6994180118450408, "learning_rate": 1.921849533389336e-05, "loss": 0.7882, "step": 1503 }, { "epoch": 0.15, "grad_norm": 1.5998130523759928, "learning_rate": 1.9217218074121474e-05, "loss": 0.6596, "step": 1504 }, { "epoch": 0.15, "grad_norm": 1.524449341063304, "learning_rate": 1.9215939813964707e-05, "loss": 0.7048, "step": 1505 }, { "epoch": 0.15, "grad_norm": 1.5581478522339887, "learning_rate": 1.9214660553561796e-05, "loss": 0.7535, "step": 1506 }, { "epoch": 0.15, "grad_norm": 1.616475689899037, "learning_rate": 1.921338029305158e-05, "loss": 0.8237, "step": 1507 }, { "epoch": 0.15, "grad_norm": 1.540256961106526, "learning_rate": 1.9212099032573017e-05, "loss": 0.7855, "step": 1508 }, { "epoch": 0.15, "grad_norm": 1.6530635115493926, "learning_rate": 1.9210816772265166e-05, "loss": 0.7577, "step": 1509 }, { "epoch": 0.15, "grad_norm": 1.6069626479514616, "learning_rate": 1.9209533512267193e-05, "loss": 0.793, "step": 1510 }, { "epoch": 0.15, "grad_norm": 1.726638072569584, "learning_rate": 1.920824925271838e-05, "loss": 0.8787, "step": 1511 }, { "epoch": 0.15, "grad_norm": 1.622170668313893, "learning_rate": 1.920696399375811e-05, "loss": 0.6719, "step": 1512 }, { "epoch": 0.15, "grad_norm": 1.5849164974116032, "learning_rate": 1.9205677735525883e-05, "loss": 0.7252, "step": 1513 }, { "epoch": 0.15, "grad_norm": 1.6194613219944933, "learning_rate": 1.9204390478161294e-05, "loss": 0.8939, "step": 1514 }, { "epoch": 0.15, "grad_norm": 1.5088878684059277, "learning_rate": 1.9203102221804062e-05, "loss": 0.7617, "step": 1515 }, { "epoch": 0.15, "grad_norm": 1.7035212373349042, "learning_rate": 1.9201812966594e-05, "loss": 0.7593, "step": 1516 }, { "epoch": 0.15, "grad_norm": 1.6703938590394398, "learning_rate": 1.920052271267105e-05, "loss": 0.6431, "step": 1517 }, { "epoch": 0.15, "grad_norm": 1.5769045147068257, "learning_rate": 1.9199231460175232e-05, "loss": 0.6884, "step": 1518 }, { "epoch": 0.15, "grad_norm": 1.5809397366575757, "learning_rate": 1.9197939209246697e-05, "loss": 0.7772, "step": 1519 }, { "epoch": 0.15, "grad_norm": 1.6391801398503676, "learning_rate": 1.9196645960025707e-05, "loss": 0.7642, "step": 1520 }, { "epoch": 0.15, "grad_norm": 1.8299512941006664, "learning_rate": 1.9195351712652615e-05, "loss": 0.7433, "step": 1521 }, { "epoch": 0.15, "grad_norm": 1.5095893517785566, "learning_rate": 1.9194056467267893e-05, "loss": 0.7138, "step": 1522 }, { "epoch": 0.15, "grad_norm": 1.6093122134592646, "learning_rate": 1.919276022401212e-05, "loss": 0.8123, "step": 1523 }, { "epoch": 0.16, "grad_norm": 1.6161664662430497, "learning_rate": 1.9191462983025984e-05, "loss": 0.7022, "step": 1524 }, { "epoch": 0.16, "grad_norm": 1.761508380087029, "learning_rate": 1.919016474445028e-05, "loss": 0.7325, "step": 1525 }, { "epoch": 0.16, "grad_norm": 1.6202857694097255, "learning_rate": 1.9188865508425912e-05, "loss": 0.7897, "step": 1526 }, { "epoch": 0.16, "grad_norm": 1.566351498765691, "learning_rate": 1.918756527509389e-05, "loss": 0.7244, "step": 1527 }, { "epoch": 0.16, "grad_norm": 1.5388170986926715, "learning_rate": 1.9186264044595334e-05, "loss": 0.7408, "step": 1528 }, { "epoch": 0.16, "grad_norm": 2.0411475883686667, "learning_rate": 1.9184961817071474e-05, "loss": 0.8856, "step": 1529 }, { "epoch": 0.16, "grad_norm": 1.537923172996495, "learning_rate": 1.9183658592663646e-05, "loss": 0.7629, "step": 1530 }, { "epoch": 0.16, "grad_norm": 1.5649059968966519, "learning_rate": 1.918235437151329e-05, "loss": 0.8007, "step": 1531 }, { "epoch": 0.16, "grad_norm": 1.4841845007351928, "learning_rate": 1.9181049153761968e-05, "loss": 0.7615, "step": 1532 }, { "epoch": 0.16, "grad_norm": 1.5810852008291814, "learning_rate": 1.9179742939551334e-05, "loss": 0.7283, "step": 1533 }, { "epoch": 0.16, "grad_norm": 1.6433971912621534, "learning_rate": 1.917843572902316e-05, "loss": 0.7719, "step": 1534 }, { "epoch": 0.16, "grad_norm": 1.6686771191429626, "learning_rate": 1.917712752231932e-05, "loss": 0.7082, "step": 1535 }, { "epoch": 0.16, "grad_norm": 1.5992105637180958, "learning_rate": 1.9175818319581804e-05, "loss": 0.8333, "step": 1536 }, { "epoch": 0.16, "grad_norm": 1.5199822157361877, "learning_rate": 1.9174508120952703e-05, "loss": 0.6218, "step": 1537 }, { "epoch": 0.16, "grad_norm": 1.696677016341126, "learning_rate": 1.9173196926574217e-05, "loss": 0.774, "step": 1538 }, { "epoch": 0.16, "grad_norm": 1.5505851914086217, "learning_rate": 1.9171884736588658e-05, "loss": 0.8452, "step": 1539 }, { "epoch": 0.16, "grad_norm": 1.6196310318726466, "learning_rate": 1.9170571551138443e-05, "loss": 0.7345, "step": 1540 }, { "epoch": 0.16, "grad_norm": 1.711535964064723, "learning_rate": 1.91692573703661e-05, "loss": 0.8005, "step": 1541 }, { "epoch": 0.16, "grad_norm": 1.602974087584989, "learning_rate": 1.916794219441426e-05, "loss": 0.7703, "step": 1542 }, { "epoch": 0.16, "grad_norm": 1.803421607568332, "learning_rate": 1.9166626023425663e-05, "loss": 0.7639, "step": 1543 }, { "epoch": 0.16, "grad_norm": 1.6301320994866801, "learning_rate": 1.9165308857543162e-05, "loss": 0.7741, "step": 1544 }, { "epoch": 0.16, "grad_norm": 1.923659935329634, "learning_rate": 1.9163990696909714e-05, "loss": 0.83, "step": 1545 }, { "epoch": 0.16, "grad_norm": 1.833057279021541, "learning_rate": 1.9162671541668384e-05, "loss": 0.8549, "step": 1546 }, { "epoch": 0.16, "grad_norm": 1.5499426415563595, "learning_rate": 1.9161351391962347e-05, "loss": 0.7802, "step": 1547 }, { "epoch": 0.16, "grad_norm": 1.61928782607135, "learning_rate": 1.9160030247934887e-05, "loss": 0.8191, "step": 1548 }, { "epoch": 0.16, "grad_norm": 1.454897568051772, "learning_rate": 1.9158708109729387e-05, "loss": 0.6563, "step": 1549 }, { "epoch": 0.16, "grad_norm": 1.6785044647038443, "learning_rate": 1.9157384977489347e-05, "loss": 0.7766, "step": 1550 }, { "epoch": 0.16, "grad_norm": 1.7302652329541985, "learning_rate": 1.9156060851358377e-05, "loss": 0.709, "step": 1551 }, { "epoch": 0.16, "grad_norm": 1.4544831620456349, "learning_rate": 1.9154735731480185e-05, "loss": 0.6414, "step": 1552 }, { "epoch": 0.16, "grad_norm": 1.6795340410621473, "learning_rate": 1.915340961799859e-05, "loss": 0.8081, "step": 1553 }, { "epoch": 0.16, "grad_norm": 1.4616387300706588, "learning_rate": 1.9152082511057525e-05, "loss": 0.8863, "step": 1554 }, { "epoch": 0.16, "grad_norm": 1.522043264691204, "learning_rate": 1.915075441080103e-05, "loss": 0.7951, "step": 1555 }, { "epoch": 0.16, "grad_norm": 1.567166649488013, "learning_rate": 1.9149425317373242e-05, "loss": 0.8348, "step": 1556 }, { "epoch": 0.16, "grad_norm": 1.651202931472597, "learning_rate": 1.914809523091842e-05, "loss": 0.7499, "step": 1557 }, { "epoch": 0.16, "grad_norm": 1.6437075117820903, "learning_rate": 1.9146764151580916e-05, "loss": 0.7034, "step": 1558 }, { "epoch": 0.16, "grad_norm": 1.955079683704659, "learning_rate": 1.9145432079505205e-05, "loss": 0.802, "step": 1559 }, { "epoch": 0.16, "grad_norm": 1.5064482039439775, "learning_rate": 1.9144099014835863e-05, "loss": 0.7824, "step": 1560 }, { "epoch": 0.16, "grad_norm": 1.5106371142000838, "learning_rate": 1.914276495771757e-05, "loss": 0.6852, "step": 1561 }, { "epoch": 0.16, "grad_norm": 1.4133764252186334, "learning_rate": 1.9141429908295115e-05, "loss": 0.7393, "step": 1562 }, { "epoch": 0.16, "grad_norm": 1.5175862671123022, "learning_rate": 1.91400938667134e-05, "loss": 0.652, "step": 1563 }, { "epoch": 0.16, "grad_norm": 1.8668591354206043, "learning_rate": 1.9138756833117433e-05, "loss": 0.8072, "step": 1564 }, { "epoch": 0.16, "grad_norm": 1.7148653540172474, "learning_rate": 1.9137418807652322e-05, "loss": 0.7976, "step": 1565 }, { "epoch": 0.16, "grad_norm": 1.3971811107739365, "learning_rate": 1.9136079790463297e-05, "loss": 0.6894, "step": 1566 }, { "epoch": 0.16, "grad_norm": 1.558703271920691, "learning_rate": 1.913473978169568e-05, "loss": 0.7824, "step": 1567 }, { "epoch": 0.16, "grad_norm": 1.6105428293661352, "learning_rate": 1.9133398781494915e-05, "loss": 0.8303, "step": 1568 }, { "epoch": 0.16, "grad_norm": 1.443076696573244, "learning_rate": 1.9132056790006538e-05, "loss": 0.6699, "step": 1569 }, { "epoch": 0.16, "grad_norm": 1.5389324497104773, "learning_rate": 1.9130713807376208e-05, "loss": 0.7501, "step": 1570 }, { "epoch": 0.16, "grad_norm": 1.418175886216392, "learning_rate": 1.9129369833749684e-05, "loss": 0.7416, "step": 1571 }, { "epoch": 0.16, "grad_norm": 1.5503745273715697, "learning_rate": 1.9128024869272827e-05, "loss": 0.8613, "step": 1572 }, { "epoch": 0.16, "grad_norm": 1.5373599502009552, "learning_rate": 1.912667891409162e-05, "loss": 0.6855, "step": 1573 }, { "epoch": 0.16, "grad_norm": 1.778854879435199, "learning_rate": 1.9125331968352144e-05, "loss": 0.838, "step": 1574 }, { "epoch": 0.16, "grad_norm": 1.6617599172340474, "learning_rate": 1.9123984032200586e-05, "loss": 0.8029, "step": 1575 }, { "epoch": 0.16, "grad_norm": 1.5626142896457125, "learning_rate": 1.9122635105783246e-05, "loss": 0.7714, "step": 1576 }, { "epoch": 0.16, "grad_norm": 1.5953250529453096, "learning_rate": 1.9121285189246522e-05, "loss": 0.7895, "step": 1577 }, { "epoch": 0.16, "grad_norm": 1.8414569875633697, "learning_rate": 1.9119934282736937e-05, "loss": 0.9218, "step": 1578 }, { "epoch": 0.16, "grad_norm": 1.4706065042563319, "learning_rate": 1.91185823864011e-05, "loss": 0.6972, "step": 1579 }, { "epoch": 0.16, "grad_norm": 1.5299081578716949, "learning_rate": 1.9117229500385747e-05, "loss": 0.7931, "step": 1580 }, { "epoch": 0.16, "grad_norm": 1.3789090369504793, "learning_rate": 1.9115875624837712e-05, "loss": 0.7851, "step": 1581 }, { "epoch": 0.16, "grad_norm": 1.4746021435408434, "learning_rate": 1.911452075990393e-05, "loss": 0.8104, "step": 1582 }, { "epoch": 0.16, "grad_norm": 1.6450850823417302, "learning_rate": 1.9113164905731456e-05, "loss": 0.7881, "step": 1583 }, { "epoch": 0.16, "grad_norm": 1.5010962716226026, "learning_rate": 1.9111808062467444e-05, "loss": 0.7363, "step": 1584 }, { "epoch": 0.16, "grad_norm": 1.6404627346035592, "learning_rate": 1.9110450230259164e-05, "loss": 0.7816, "step": 1585 }, { "epoch": 0.16, "grad_norm": 1.6060822901587557, "learning_rate": 1.910909140925398e-05, "loss": 0.8176, "step": 1586 }, { "epoch": 0.16, "grad_norm": 1.51717087233064, "learning_rate": 1.910773159959937e-05, "loss": 0.7383, "step": 1587 }, { "epoch": 0.16, "grad_norm": 1.61362295729002, "learning_rate": 1.9106370801442932e-05, "loss": 0.7415, "step": 1588 }, { "epoch": 0.16, "grad_norm": 1.4683775876196694, "learning_rate": 1.9105009014932346e-05, "loss": 0.6588, "step": 1589 }, { "epoch": 0.16, "grad_norm": 1.770666499944069, "learning_rate": 1.9103646240215417e-05, "loss": 0.7953, "step": 1590 }, { "epoch": 0.16, "grad_norm": 1.738033048483459, "learning_rate": 1.9102282477440054e-05, "loss": 0.7054, "step": 1591 }, { "epoch": 0.16, "grad_norm": 1.5252116881641011, "learning_rate": 1.9100917726754274e-05, "loss": 0.8349, "step": 1592 }, { "epoch": 0.16, "grad_norm": 1.628771819360274, "learning_rate": 1.9099551988306196e-05, "loss": 0.7939, "step": 1593 }, { "epoch": 0.16, "grad_norm": 1.5997324728092637, "learning_rate": 1.9098185262244052e-05, "loss": 0.7922, "step": 1594 }, { "epoch": 0.16, "grad_norm": 1.577128392588311, "learning_rate": 1.9096817548716176e-05, "loss": 0.7836, "step": 1595 }, { "epoch": 0.16, "grad_norm": 1.7620820504603598, "learning_rate": 1.9095448847871017e-05, "loss": 0.6764, "step": 1596 }, { "epoch": 0.16, "grad_norm": 1.4430637543169647, "learning_rate": 1.909407915985712e-05, "loss": 0.7046, "step": 1597 }, { "epoch": 0.16, "grad_norm": 1.5525390305375857, "learning_rate": 1.9092708484823146e-05, "loss": 0.778, "step": 1598 }, { "epoch": 0.16, "grad_norm": 1.6033162887065793, "learning_rate": 1.9091336822917858e-05, "loss": 0.734, "step": 1599 }, { "epoch": 0.16, "grad_norm": 1.373049548183506, "learning_rate": 1.908996417429013e-05, "loss": 0.7226, "step": 1600 }, { "epoch": 0.16, "grad_norm": 1.515664936780327, "learning_rate": 1.9088590539088944e-05, "loss": 0.8194, "step": 1601 }, { "epoch": 0.16, "grad_norm": 1.676295501712208, "learning_rate": 1.9087215917463388e-05, "loss": 0.7845, "step": 1602 }, { "epoch": 0.16, "grad_norm": 1.7349913040217095, "learning_rate": 1.9085840309562647e-05, "loss": 0.7594, "step": 1603 }, { "epoch": 0.16, "grad_norm": 1.4118887464545329, "learning_rate": 1.9084463715536028e-05, "loss": 0.7238, "step": 1604 }, { "epoch": 0.16, "grad_norm": 1.5514927958243494, "learning_rate": 1.908308613553294e-05, "loss": 0.8336, "step": 1605 }, { "epoch": 0.16, "grad_norm": 1.7937303832748122, "learning_rate": 1.9081707569702895e-05, "loss": 0.8283, "step": 1606 }, { "epoch": 0.16, "grad_norm": 1.6048560592421042, "learning_rate": 1.9080328018195512e-05, "loss": 0.9193, "step": 1607 }, { "epoch": 0.16, "grad_norm": 1.5251342315015357, "learning_rate": 1.9078947481160525e-05, "loss": 0.7064, "step": 1608 }, { "epoch": 0.16, "grad_norm": 1.5915408336271724, "learning_rate": 1.907756595874777e-05, "loss": 0.8293, "step": 1609 }, { "epoch": 0.16, "grad_norm": 1.5361474379210114, "learning_rate": 1.9076183451107185e-05, "loss": 0.7543, "step": 1610 }, { "epoch": 0.16, "grad_norm": 1.6348641221810145, "learning_rate": 1.9074799958388824e-05, "loss": 0.8217, "step": 1611 }, { "epoch": 0.16, "grad_norm": 1.6079219437209098, "learning_rate": 1.907341548074284e-05, "loss": 0.8163, "step": 1612 }, { "epoch": 0.16, "grad_norm": 1.5291210741782348, "learning_rate": 1.9072030018319498e-05, "loss": 0.8774, "step": 1613 }, { "epoch": 0.16, "grad_norm": 1.6444665406797316, "learning_rate": 1.9070643571269168e-05, "loss": 0.7758, "step": 1614 }, { "epoch": 0.16, "grad_norm": 1.4431843276318772, "learning_rate": 1.906925613974233e-05, "loss": 0.6653, "step": 1615 }, { "epoch": 0.16, "grad_norm": 1.567521467193932, "learning_rate": 1.9067867723889563e-05, "loss": 0.8517, "step": 1616 }, { "epoch": 0.16, "grad_norm": 1.5714406321988241, "learning_rate": 1.9066478323861558e-05, "loss": 0.7835, "step": 1617 }, { "epoch": 0.16, "grad_norm": 1.5959228661639966, "learning_rate": 1.906508793980912e-05, "loss": 0.7141, "step": 1618 }, { "epoch": 0.16, "grad_norm": 1.4847876302277403, "learning_rate": 1.9063696571883145e-05, "loss": 0.7785, "step": 1619 }, { "epoch": 0.16, "grad_norm": 1.5503075253886878, "learning_rate": 1.9062304220234648e-05, "loss": 0.7935, "step": 1620 }, { "epoch": 0.16, "grad_norm": 1.6556741264913504, "learning_rate": 1.9060910885014745e-05, "loss": 0.6974, "step": 1621 }, { "epoch": 0.16, "grad_norm": 1.4290189452587512, "learning_rate": 1.9059516566374662e-05, "loss": 0.6936, "step": 1622 }, { "epoch": 0.17, "grad_norm": 1.5478177845334817, "learning_rate": 1.9058121264465733e-05, "loss": 0.7785, "step": 1623 }, { "epoch": 0.17, "grad_norm": 1.5658364846871906, "learning_rate": 1.9056724979439395e-05, "loss": 0.8645, "step": 1624 }, { "epoch": 0.17, "grad_norm": 1.5477169944101723, "learning_rate": 1.9055327711447188e-05, "loss": 0.7858, "step": 1625 }, { "epoch": 0.17, "grad_norm": 1.4657396785160544, "learning_rate": 1.905392946064077e-05, "loss": 0.7117, "step": 1626 }, { "epoch": 0.17, "grad_norm": 1.5708545119734512, "learning_rate": 1.90525302271719e-05, "loss": 0.7021, "step": 1627 }, { "epoch": 0.17, "grad_norm": 1.6476076431039985, "learning_rate": 1.9051130011192432e-05, "loss": 0.7394, "step": 1628 }, { "epoch": 0.17, "grad_norm": 1.4400462499040607, "learning_rate": 1.904972881285435e-05, "loss": 0.8131, "step": 1629 }, { "epoch": 0.17, "grad_norm": 1.6174406456486523, "learning_rate": 1.9048326632309725e-05, "loss": 0.7308, "step": 1630 }, { "epoch": 0.17, "grad_norm": 1.5193393003943425, "learning_rate": 1.9046923469710745e-05, "loss": 0.7133, "step": 1631 }, { "epoch": 0.17, "grad_norm": 1.7774121870566917, "learning_rate": 1.90455193252097e-05, "loss": 0.8559, "step": 1632 }, { "epoch": 0.17, "grad_norm": 1.526356917860105, "learning_rate": 1.904411419895899e-05, "loss": 0.6728, "step": 1633 }, { "epoch": 0.17, "grad_norm": 1.869316998443361, "learning_rate": 1.9042708091111118e-05, "loss": 0.8109, "step": 1634 }, { "epoch": 0.17, "grad_norm": 1.6306679434589662, "learning_rate": 1.9041301001818694e-05, "loss": 0.7945, "step": 1635 }, { "epoch": 0.17, "grad_norm": 1.757448536351054, "learning_rate": 1.9039892931234434e-05, "loss": 0.7686, "step": 1636 }, { "epoch": 0.17, "grad_norm": 1.544620177922369, "learning_rate": 1.9038483879511166e-05, "loss": 0.6817, "step": 1637 }, { "epoch": 0.17, "grad_norm": 1.7468142129124404, "learning_rate": 1.903707384680182e-05, "loss": 0.7455, "step": 1638 }, { "epoch": 0.17, "grad_norm": 1.6848255747831735, "learning_rate": 1.9035662833259433e-05, "loss": 0.8196, "step": 1639 }, { "epoch": 0.17, "grad_norm": 1.5329377292764257, "learning_rate": 1.9034250839037144e-05, "loss": 0.7906, "step": 1640 }, { "epoch": 0.17, "grad_norm": 1.5415068985564901, "learning_rate": 1.903283786428821e-05, "loss": 0.6799, "step": 1641 }, { "epoch": 0.17, "grad_norm": 1.4666601543044273, "learning_rate": 1.903142390916598e-05, "loss": 0.7268, "step": 1642 }, { "epoch": 0.17, "grad_norm": 1.575405053556528, "learning_rate": 1.903000897382392e-05, "loss": 0.7831, "step": 1643 }, { "epoch": 0.17, "grad_norm": 1.6124679892613942, "learning_rate": 1.9028593058415604e-05, "loss": 0.7744, "step": 1644 }, { "epoch": 0.17, "grad_norm": 1.4810041986168434, "learning_rate": 1.90271761630947e-05, "loss": 0.7654, "step": 1645 }, { "epoch": 0.17, "grad_norm": 1.5817930470576074, "learning_rate": 1.9025758288014992e-05, "loss": 0.7471, "step": 1646 }, { "epoch": 0.17, "grad_norm": 1.7806556950470092, "learning_rate": 1.9024339433330374e-05, "loss": 0.7575, "step": 1647 }, { "epoch": 0.17, "grad_norm": 1.7388350084069517, "learning_rate": 1.902291959919483e-05, "loss": 0.7252, "step": 1648 }, { "epoch": 0.17, "grad_norm": 1.6049185515811917, "learning_rate": 1.9021498785762465e-05, "loss": 0.71, "step": 1649 }, { "epoch": 0.17, "grad_norm": 1.5496687398901938, "learning_rate": 1.902007699318749e-05, "loss": 0.8395, "step": 1650 }, { "epoch": 0.17, "grad_norm": 1.5859664455327087, "learning_rate": 1.9018654221624215e-05, "loss": 0.8465, "step": 1651 }, { "epoch": 0.17, "grad_norm": 1.6860476567662297, "learning_rate": 1.9017230471227065e-05, "loss": 0.8093, "step": 1652 }, { "epoch": 0.17, "grad_norm": 1.5255507157557995, "learning_rate": 1.9015805742150555e-05, "loss": 0.7641, "step": 1653 }, { "epoch": 0.17, "grad_norm": 1.5576595287920239, "learning_rate": 1.9014380034549326e-05, "loss": 0.7399, "step": 1654 }, { "epoch": 0.17, "grad_norm": 1.5716277986521965, "learning_rate": 1.901295334857811e-05, "loss": 0.7475, "step": 1655 }, { "epoch": 0.17, "grad_norm": 1.5940348916804257, "learning_rate": 1.901152568439176e-05, "loss": 0.8035, "step": 1656 }, { "epoch": 0.17, "grad_norm": 1.652806276890107, "learning_rate": 1.9010097042145217e-05, "loss": 0.7669, "step": 1657 }, { "epoch": 0.17, "grad_norm": 1.5652094056502321, "learning_rate": 1.9008667421993542e-05, "loss": 0.7551, "step": 1658 }, { "epoch": 0.17, "grad_norm": 1.4934518396928138, "learning_rate": 1.9007236824091902e-05, "loss": 0.8183, "step": 1659 }, { "epoch": 0.17, "grad_norm": 1.4623729202198175, "learning_rate": 1.9005805248595558e-05, "loss": 0.7898, "step": 1660 }, { "epoch": 0.17, "grad_norm": 1.572320496115777, "learning_rate": 1.9004372695659893e-05, "loss": 0.7187, "step": 1661 }, { "epoch": 0.17, "grad_norm": 1.5236632692717045, "learning_rate": 1.9002939165440382e-05, "loss": 0.7906, "step": 1662 }, { "epoch": 0.17, "grad_norm": 1.6567306846632446, "learning_rate": 1.9001504658092614e-05, "loss": 0.7019, "step": 1663 }, { "epoch": 0.17, "grad_norm": 1.7033509945844678, "learning_rate": 1.900006917377229e-05, "loss": 0.8409, "step": 1664 }, { "epoch": 0.17, "grad_norm": 1.5112174633108457, "learning_rate": 1.8998632712635197e-05, "loss": 0.6675, "step": 1665 }, { "epoch": 0.17, "grad_norm": 1.6077579566638796, "learning_rate": 1.8997195274837246e-05, "loss": 0.7659, "step": 1666 }, { "epoch": 0.17, "grad_norm": 1.6200552954296608, "learning_rate": 1.8995756860534447e-05, "loss": 0.7704, "step": 1667 }, { "epoch": 0.17, "grad_norm": 1.7219390404142412, "learning_rate": 1.899431746988292e-05, "loss": 0.6929, "step": 1668 }, { "epoch": 0.17, "grad_norm": 1.5052480569089988, "learning_rate": 1.8992877103038888e-05, "loss": 0.7769, "step": 1669 }, { "epoch": 0.17, "grad_norm": 1.7106222019790918, "learning_rate": 1.8991435760158678e-05, "loss": 0.7939, "step": 1670 }, { "epoch": 0.17, "grad_norm": 1.6093050462228884, "learning_rate": 1.8989993441398725e-05, "loss": 0.7808, "step": 1671 }, { "epoch": 0.17, "grad_norm": 1.6456229403185043, "learning_rate": 1.8988550146915577e-05, "loss": 0.7685, "step": 1672 }, { "epoch": 0.17, "grad_norm": 1.6032421563969286, "learning_rate": 1.8987105876865875e-05, "loss": 0.769, "step": 1673 }, { "epoch": 0.17, "grad_norm": 1.583807363251475, "learning_rate": 1.898566063140637e-05, "loss": 0.7032, "step": 1674 }, { "epoch": 0.17, "grad_norm": 1.493731079727449, "learning_rate": 1.898421441069392e-05, "loss": 0.7107, "step": 1675 }, { "epoch": 0.17, "grad_norm": 1.5749307014664113, "learning_rate": 1.89827672148855e-05, "loss": 0.8638, "step": 1676 }, { "epoch": 0.17, "grad_norm": 1.6771481017193612, "learning_rate": 1.898131904413817e-05, "loss": 0.7711, "step": 1677 }, { "epoch": 0.17, "grad_norm": 1.4599515555865261, "learning_rate": 1.897986989860911e-05, "loss": 0.7718, "step": 1678 }, { "epoch": 0.17, "grad_norm": 1.748238922772858, "learning_rate": 1.8978419778455604e-05, "loss": 0.7038, "step": 1679 }, { "epoch": 0.17, "grad_norm": 1.7105558563254843, "learning_rate": 1.8976968683835035e-05, "loss": 0.7343, "step": 1680 }, { "epoch": 0.17, "grad_norm": 1.53133673055251, "learning_rate": 1.89755166149049e-05, "loss": 0.7529, "step": 1681 }, { "epoch": 0.17, "grad_norm": 1.5783781823685694, "learning_rate": 1.8974063571822802e-05, "loss": 0.8669, "step": 1682 }, { "epoch": 0.17, "grad_norm": 1.657796324266244, "learning_rate": 1.8972609554746438e-05, "loss": 0.8415, "step": 1683 }, { "epoch": 0.17, "grad_norm": 1.864844220133073, "learning_rate": 1.8971154563833624e-05, "loss": 0.7533, "step": 1684 }, { "epoch": 0.17, "grad_norm": 1.6854041586824038, "learning_rate": 1.896969859924227e-05, "loss": 0.7676, "step": 1685 }, { "epoch": 0.17, "grad_norm": 1.5650090207778384, "learning_rate": 1.896824166113041e-05, "loss": 0.8221, "step": 1686 }, { "epoch": 0.17, "grad_norm": 1.7722938808951092, "learning_rate": 1.8966783749656162e-05, "loss": 0.6842, "step": 1687 }, { "epoch": 0.17, "grad_norm": 1.4914344699971838, "learning_rate": 1.8965324864977766e-05, "loss": 0.6963, "step": 1688 }, { "epoch": 0.17, "grad_norm": 1.5272008983944905, "learning_rate": 1.8963865007253557e-05, "loss": 0.8026, "step": 1689 }, { "epoch": 0.17, "grad_norm": 1.482223785650939, "learning_rate": 1.8962404176641976e-05, "loss": 0.8048, "step": 1690 }, { "epoch": 0.17, "grad_norm": 1.5593816026928582, "learning_rate": 1.8960942373301584e-05, "loss": 0.6894, "step": 1691 }, { "epoch": 0.17, "grad_norm": 1.5617523696611526, "learning_rate": 1.895947959739103e-05, "loss": 0.8203, "step": 1692 }, { "epoch": 0.17, "grad_norm": 1.814077619268731, "learning_rate": 1.8958015849069074e-05, "loss": 0.8205, "step": 1693 }, { "epoch": 0.17, "grad_norm": 1.6045784749385548, "learning_rate": 1.8956551128494583e-05, "loss": 0.7982, "step": 1694 }, { "epoch": 0.17, "grad_norm": 1.7260765769995472, "learning_rate": 1.8955085435826535e-05, "loss": 0.6918, "step": 1695 }, { "epoch": 0.17, "grad_norm": 1.5504805127465078, "learning_rate": 1.8953618771224003e-05, "loss": 0.7217, "step": 1696 }, { "epoch": 0.17, "grad_norm": 1.720269284682822, "learning_rate": 1.895215113484618e-05, "loss": 0.9367, "step": 1697 }, { "epoch": 0.17, "grad_norm": 1.5714592051560299, "learning_rate": 1.895068252685234e-05, "loss": 0.7853, "step": 1698 }, { "epoch": 0.17, "grad_norm": 1.7019456115766567, "learning_rate": 1.8949212947401884e-05, "loss": 0.8011, "step": 1699 }, { "epoch": 0.17, "grad_norm": 1.884939067526523, "learning_rate": 1.8947742396654318e-05, "loss": 0.8446, "step": 1700 }, { "epoch": 0.17, "grad_norm": 1.6279141034164122, "learning_rate": 1.8946270874769234e-05, "loss": 0.6527, "step": 1701 }, { "epoch": 0.17, "grad_norm": 1.530506408713068, "learning_rate": 1.8944798381906356e-05, "loss": 0.7613, "step": 1702 }, { "epoch": 0.17, "grad_norm": 1.6168212969929507, "learning_rate": 1.8943324918225495e-05, "loss": 0.7585, "step": 1703 }, { "epoch": 0.17, "grad_norm": 1.6534578465353755, "learning_rate": 1.894185048388657e-05, "loss": 0.7905, "step": 1704 }, { "epoch": 0.17, "grad_norm": 1.5523776193348853, "learning_rate": 1.894037507904961e-05, "loss": 0.8521, "step": 1705 }, { "epoch": 0.17, "grad_norm": 1.551937987120993, "learning_rate": 1.8938898703874747e-05, "loss": 0.8972, "step": 1706 }, { "epoch": 0.17, "grad_norm": 1.69364613713285, "learning_rate": 1.893742135852222e-05, "loss": 0.767, "step": 1707 }, { "epoch": 0.17, "grad_norm": 1.6210580238903587, "learning_rate": 1.893594304315237e-05, "loss": 0.6865, "step": 1708 }, { "epoch": 0.17, "grad_norm": 1.451273066156775, "learning_rate": 1.8934463757925642e-05, "loss": 0.7857, "step": 1709 }, { "epoch": 0.17, "grad_norm": 1.683278709031471, "learning_rate": 1.8932983503002598e-05, "loss": 0.7911, "step": 1710 }, { "epoch": 0.17, "grad_norm": 1.5290904299097812, "learning_rate": 1.8931502278543887e-05, "loss": 0.6458, "step": 1711 }, { "epoch": 0.17, "grad_norm": 1.5786586347808211, "learning_rate": 1.8930020084710276e-05, "loss": 0.727, "step": 1712 }, { "epoch": 0.17, "grad_norm": 1.4938119669135754, "learning_rate": 1.8928536921662637e-05, "loss": 0.6879, "step": 1713 }, { "epoch": 0.17, "grad_norm": 1.5091020987453612, "learning_rate": 1.892705278956194e-05, "loss": 0.6316, "step": 1714 }, { "epoch": 0.17, "grad_norm": 1.5694048656046364, "learning_rate": 1.892556768856927e-05, "loss": 0.8505, "step": 1715 }, { "epoch": 0.17, "grad_norm": 1.4532551755755019, "learning_rate": 1.8924081618845804e-05, "loss": 0.6874, "step": 1716 }, { "epoch": 0.17, "grad_norm": 1.4379667556058273, "learning_rate": 1.892259458055284e-05, "loss": 0.6783, "step": 1717 }, { "epoch": 0.17, "grad_norm": 1.5265403642367932, "learning_rate": 1.892110657385176e-05, "loss": 0.7493, "step": 1718 }, { "epoch": 0.17, "grad_norm": 1.5479382570256124, "learning_rate": 1.891961759890408e-05, "loss": 0.7337, "step": 1719 }, { "epoch": 0.17, "grad_norm": 1.559889119676712, "learning_rate": 1.891812765587139e-05, "loss": 0.8697, "step": 1720 }, { "epoch": 0.18, "grad_norm": 1.593218655173124, "learning_rate": 1.891663674491541e-05, "loss": 0.7259, "step": 1721 }, { "epoch": 0.18, "grad_norm": 1.4592818229059854, "learning_rate": 1.8915144866197954e-05, "loss": 0.7734, "step": 1722 }, { "epoch": 0.18, "grad_norm": 1.648324646449686, "learning_rate": 1.8913652019880938e-05, "loss": 0.8242, "step": 1723 }, { "epoch": 0.18, "grad_norm": 1.4729500894677385, "learning_rate": 1.8912158206126386e-05, "loss": 0.6893, "step": 1724 }, { "epoch": 0.18, "grad_norm": 1.6400655969345412, "learning_rate": 1.8910663425096435e-05, "loss": 0.8344, "step": 1725 }, { "epoch": 0.18, "grad_norm": 1.497654805250299, "learning_rate": 1.8909167676953317e-05, "loss": 0.7566, "step": 1726 }, { "epoch": 0.18, "grad_norm": 1.6871982575991455, "learning_rate": 1.890767096185937e-05, "loss": 0.7531, "step": 1727 }, { "epoch": 0.18, "grad_norm": 1.5138489141587188, "learning_rate": 1.8906173279977042e-05, "loss": 0.6804, "step": 1728 }, { "epoch": 0.18, "grad_norm": 1.6122397020645634, "learning_rate": 1.890467463146888e-05, "loss": 0.804, "step": 1729 }, { "epoch": 0.18, "grad_norm": 1.6764245244750045, "learning_rate": 1.890317501649754e-05, "loss": 0.8541, "step": 1730 }, { "epoch": 0.18, "grad_norm": 1.5251612599991828, "learning_rate": 1.8901674435225784e-05, "loss": 0.7366, "step": 1731 }, { "epoch": 0.18, "grad_norm": 1.719713507717278, "learning_rate": 1.890017288781647e-05, "loss": 0.7156, "step": 1732 }, { "epoch": 0.18, "grad_norm": 1.5326149533609268, "learning_rate": 1.889867037443258e-05, "loss": 0.8467, "step": 1733 }, { "epoch": 0.18, "grad_norm": 1.686840873303595, "learning_rate": 1.8897166895237172e-05, "loss": 0.7844, "step": 1734 }, { "epoch": 0.18, "grad_norm": 1.6371997798982931, "learning_rate": 1.889566245039344e-05, "loss": 0.7474, "step": 1735 }, { "epoch": 0.18, "grad_norm": 1.611010263494007, "learning_rate": 1.8894157040064657e-05, "loss": 0.6348, "step": 1736 }, { "epoch": 0.18, "grad_norm": 1.6232898835545204, "learning_rate": 1.889265066441422e-05, "loss": 0.7371, "step": 1737 }, { "epoch": 0.18, "grad_norm": 1.6149901594670062, "learning_rate": 1.8891143323605618e-05, "loss": 0.7951, "step": 1738 }, { "epoch": 0.18, "grad_norm": 1.4939178979273533, "learning_rate": 1.888963501780245e-05, "loss": 0.7611, "step": 1739 }, { "epoch": 0.18, "grad_norm": 1.569457200983501, "learning_rate": 1.8888125747168418e-05, "loss": 0.7955, "step": 1740 }, { "epoch": 0.18, "grad_norm": 1.7213407027487273, "learning_rate": 1.8886615511867334e-05, "loss": 0.607, "step": 1741 }, { "epoch": 0.18, "grad_norm": 1.5760183368741416, "learning_rate": 1.88851043120631e-05, "loss": 0.8016, "step": 1742 }, { "epoch": 0.18, "grad_norm": 1.7611761834273227, "learning_rate": 1.888359214791975e-05, "loss": 0.8343, "step": 1743 }, { "epoch": 0.18, "grad_norm": 1.5535745253840973, "learning_rate": 1.888207901960139e-05, "loss": 0.7834, "step": 1744 }, { "epoch": 0.18, "grad_norm": 1.6699865529676274, "learning_rate": 1.8880564927272255e-05, "loss": 0.7145, "step": 1745 }, { "epoch": 0.18, "grad_norm": 1.5987541438124577, "learning_rate": 1.887904987109667e-05, "loss": 0.8974, "step": 1746 }, { "epoch": 0.18, "grad_norm": 1.5848509287747299, "learning_rate": 1.887753385123908e-05, "loss": 0.8937, "step": 1747 }, { "epoch": 0.18, "grad_norm": 1.5965473543295567, "learning_rate": 1.8876016867864012e-05, "loss": 0.643, "step": 1748 }, { "epoch": 0.18, "grad_norm": 1.474355510667469, "learning_rate": 1.8874498921136128e-05, "loss": 0.7083, "step": 1749 }, { "epoch": 0.18, "grad_norm": 1.6473044578139473, "learning_rate": 1.887298001122016e-05, "loss": 0.8028, "step": 1750 }, { "epoch": 0.18, "grad_norm": 1.6837953191941109, "learning_rate": 1.8871460138280972e-05, "loss": 0.6944, "step": 1751 }, { "epoch": 0.18, "grad_norm": 1.6999399008927014, "learning_rate": 1.8869939302483523e-05, "loss": 0.7527, "step": 1752 }, { "epoch": 0.18, "grad_norm": 1.580640208318216, "learning_rate": 1.8868417503992868e-05, "loss": 0.7821, "step": 1753 }, { "epoch": 0.18, "grad_norm": 1.7608845706069354, "learning_rate": 1.8866894742974185e-05, "loss": 0.735, "step": 1754 }, { "epoch": 0.18, "grad_norm": 1.783871397792197, "learning_rate": 1.8865371019592734e-05, "loss": 0.6882, "step": 1755 }, { "epoch": 0.18, "grad_norm": 4.106234520745424, "learning_rate": 1.8863846334013903e-05, "loss": 0.9047, "step": 1756 }, { "epoch": 0.18, "grad_norm": 1.6261963712487317, "learning_rate": 1.8862320686403163e-05, "loss": 0.8233, "step": 1757 }, { "epoch": 0.18, "grad_norm": 1.5592849660065935, "learning_rate": 1.886079407692611e-05, "loss": 0.7271, "step": 1758 }, { "epoch": 0.18, "grad_norm": 1.696634596163249, "learning_rate": 1.8859266505748422e-05, "loss": 0.8247, "step": 1759 }, { "epoch": 0.18, "grad_norm": 1.5583376186257532, "learning_rate": 1.88577379730359e-05, "loss": 0.753, "step": 1760 }, { "epoch": 0.18, "grad_norm": 1.6329960586391725, "learning_rate": 1.885620847895444e-05, "loss": 0.8578, "step": 1761 }, { "epoch": 0.18, "grad_norm": 1.669480616922605, "learning_rate": 1.885467802367005e-05, "loss": 0.7505, "step": 1762 }, { "epoch": 0.18, "grad_norm": 1.657482857375443, "learning_rate": 1.8853146607348824e-05, "loss": 0.769, "step": 1763 }, { "epoch": 0.18, "grad_norm": 1.5154300129562621, "learning_rate": 1.8851614230156984e-05, "loss": 0.749, "step": 1764 }, { "epoch": 0.18, "grad_norm": 1.6318632109908695, "learning_rate": 1.8850080892260844e-05, "loss": 0.7529, "step": 1765 }, { "epoch": 0.18, "grad_norm": 1.7001848525559118, "learning_rate": 1.8848546593826827e-05, "loss": 0.8489, "step": 1766 }, { "epoch": 0.18, "grad_norm": 1.589134711427665, "learning_rate": 1.8847011335021447e-05, "loss": 0.848, "step": 1767 }, { "epoch": 0.18, "grad_norm": 1.5385234932969856, "learning_rate": 1.884547511601134e-05, "loss": 0.7197, "step": 1768 }, { "epoch": 0.18, "grad_norm": 1.5689035447257587, "learning_rate": 1.884393793696324e-05, "loss": 0.7098, "step": 1769 }, { "epoch": 0.18, "grad_norm": 1.6426536233783764, "learning_rate": 1.884239979804398e-05, "loss": 0.7266, "step": 1770 }, { "epoch": 0.18, "grad_norm": 1.677291392580847, "learning_rate": 1.8840860699420497e-05, "loss": 0.8098, "step": 1771 }, { "epoch": 0.18, "grad_norm": 1.7249955607522947, "learning_rate": 1.8839320641259844e-05, "loss": 0.8916, "step": 1772 }, { "epoch": 0.18, "grad_norm": 1.6209383073724422, "learning_rate": 1.8837779623729167e-05, "loss": 0.8117, "step": 1773 }, { "epoch": 0.18, "grad_norm": 1.593803315763351, "learning_rate": 1.8836237646995717e-05, "loss": 0.8198, "step": 1774 }, { "epoch": 0.18, "grad_norm": 1.6593476747026599, "learning_rate": 1.883469471122686e-05, "loss": 0.721, "step": 1775 }, { "epoch": 0.18, "grad_norm": 1.5354092753657136, "learning_rate": 1.8833150816590042e-05, "loss": 0.774, "step": 1776 }, { "epoch": 0.18, "grad_norm": 1.4254979394593619, "learning_rate": 1.8831605963252845e-05, "loss": 0.6664, "step": 1777 }, { "epoch": 0.18, "grad_norm": 1.7161205041025949, "learning_rate": 1.883006015138293e-05, "loss": 0.8563, "step": 1778 }, { "epoch": 0.18, "grad_norm": 1.7088321727886966, "learning_rate": 1.8828513381148072e-05, "loss": 0.8043, "step": 1779 }, { "epoch": 0.18, "grad_norm": 1.5486816559944219, "learning_rate": 1.8826965652716148e-05, "loss": 0.7855, "step": 1780 }, { "epoch": 0.18, "grad_norm": 1.3385051756675685, "learning_rate": 1.882541696625514e-05, "loss": 0.7483, "step": 1781 }, { "epoch": 0.18, "grad_norm": 1.498188058539235, "learning_rate": 1.8823867321933135e-05, "loss": 0.7488, "step": 1782 }, { "epoch": 0.18, "grad_norm": 1.736701746116013, "learning_rate": 1.882231671991832e-05, "loss": 0.805, "step": 1783 }, { "epoch": 0.18, "grad_norm": 1.7067130208010808, "learning_rate": 1.8820765160378996e-05, "loss": 0.7416, "step": 1784 }, { "epoch": 0.18, "grad_norm": 1.5944026426589217, "learning_rate": 1.881921264348355e-05, "loss": 0.863, "step": 1785 }, { "epoch": 0.18, "grad_norm": 1.63057640894758, "learning_rate": 1.8817659169400493e-05, "loss": 0.8167, "step": 1786 }, { "epoch": 0.18, "grad_norm": 1.4701737654650993, "learning_rate": 1.8816104738298425e-05, "loss": 0.7732, "step": 1787 }, { "epoch": 0.18, "grad_norm": 1.7659716645257424, "learning_rate": 1.8814549350346052e-05, "loss": 0.8413, "step": 1788 }, { "epoch": 0.18, "grad_norm": 1.6703190187917394, "learning_rate": 1.8812993005712196e-05, "loss": 0.8808, "step": 1789 }, { "epoch": 0.18, "grad_norm": 1.5804918076804337, "learning_rate": 1.881143570456577e-05, "loss": 0.8396, "step": 1790 }, { "epoch": 0.18, "grad_norm": 1.502015060915155, "learning_rate": 1.880987744707579e-05, "loss": 0.7767, "step": 1791 }, { "epoch": 0.18, "grad_norm": 1.5100557185000367, "learning_rate": 1.8808318233411384e-05, "loss": 0.837, "step": 1792 }, { "epoch": 0.18, "grad_norm": 1.4980699724605326, "learning_rate": 1.880675806374178e-05, "loss": 0.8265, "step": 1793 }, { "epoch": 0.18, "grad_norm": 1.6145024202929508, "learning_rate": 1.880519693823631e-05, "loss": 0.8522, "step": 1794 }, { "epoch": 0.18, "grad_norm": 1.7060963762719894, "learning_rate": 1.8803634857064413e-05, "loss": 0.6858, "step": 1795 }, { "epoch": 0.18, "grad_norm": 1.66699583908313, "learning_rate": 1.8802071820395626e-05, "loss": 0.8265, "step": 1796 }, { "epoch": 0.18, "grad_norm": 1.5088819587635958, "learning_rate": 1.880050782839959e-05, "loss": 0.8256, "step": 1797 }, { "epoch": 0.18, "grad_norm": 1.8248098806211075, "learning_rate": 1.8798942881246057e-05, "loss": 0.7441, "step": 1798 }, { "epoch": 0.18, "grad_norm": 1.497823717757942, "learning_rate": 1.8797376979104874e-05, "loss": 0.6212, "step": 1799 }, { "epoch": 0.18, "grad_norm": 1.5313709881390287, "learning_rate": 1.879581012214599e-05, "loss": 0.7585, "step": 1800 }, { "epoch": 0.18, "grad_norm": 1.6533852967630702, "learning_rate": 1.8794242310539475e-05, "loss": 0.706, "step": 1801 }, { "epoch": 0.18, "grad_norm": 1.57322286965956, "learning_rate": 1.879267354445548e-05, "loss": 0.7315, "step": 1802 }, { "epoch": 0.18, "grad_norm": 1.4688226723378812, "learning_rate": 1.8791103824064275e-05, "loss": 0.6824, "step": 1803 }, { "epoch": 0.18, "grad_norm": 1.6606304786948904, "learning_rate": 1.8789533149536227e-05, "loss": 0.774, "step": 1804 }, { "epoch": 0.18, "grad_norm": 1.4170226955215965, "learning_rate": 1.8787961521041808e-05, "loss": 0.7616, "step": 1805 }, { "epoch": 0.18, "grad_norm": 1.6319528691892806, "learning_rate": 1.8786388938751593e-05, "loss": 0.7277, "step": 1806 }, { "epoch": 0.18, "grad_norm": 1.7368883126240444, "learning_rate": 1.8784815402836264e-05, "loss": 0.7857, "step": 1807 }, { "epoch": 0.18, "grad_norm": 1.6956446906840685, "learning_rate": 1.87832409134666e-05, "loss": 0.8243, "step": 1808 }, { "epoch": 0.18, "grad_norm": 1.5661135765652143, "learning_rate": 1.8781665470813493e-05, "loss": 0.7309, "step": 1809 }, { "epoch": 0.18, "grad_norm": 1.6488344290849406, "learning_rate": 1.878008907504793e-05, "loss": 0.741, "step": 1810 }, { "epoch": 0.18, "grad_norm": 1.748004174675965, "learning_rate": 1.8778511726341e-05, "loss": 0.812, "step": 1811 }, { "epoch": 0.18, "grad_norm": 1.6500412921236254, "learning_rate": 1.8776933424863903e-05, "loss": 0.75, "step": 1812 }, { "epoch": 0.18, "grad_norm": 1.5783811495293991, "learning_rate": 1.8775354170787938e-05, "loss": 0.7833, "step": 1813 }, { "epoch": 0.18, "grad_norm": 1.6438030612284624, "learning_rate": 1.8773773964284512e-05, "loss": 0.8352, "step": 1814 }, { "epoch": 0.18, "grad_norm": 1.7254538324598188, "learning_rate": 1.8772192805525125e-05, "loss": 0.7913, "step": 1815 }, { "epoch": 0.18, "grad_norm": 1.4609001559387844, "learning_rate": 1.8770610694681393e-05, "loss": 0.7164, "step": 1816 }, { "epoch": 0.18, "grad_norm": 1.5638189485710678, "learning_rate": 1.8769027631925027e-05, "loss": 0.741, "step": 1817 }, { "epoch": 0.18, "grad_norm": 1.5970980794499663, "learning_rate": 1.876744361742784e-05, "loss": 0.752, "step": 1818 }, { "epoch": 0.19, "grad_norm": 1.3691889410857434, "learning_rate": 1.876585865136176e-05, "loss": 0.7374, "step": 1819 }, { "epoch": 0.19, "grad_norm": 1.7025448759231918, "learning_rate": 1.8764272733898808e-05, "loss": 0.7367, "step": 1820 }, { "epoch": 0.19, "grad_norm": 1.6286596973353795, "learning_rate": 1.8762685865211106e-05, "loss": 0.7936, "step": 1821 }, { "epoch": 0.19, "grad_norm": 1.8002277034115608, "learning_rate": 1.8761098045470887e-05, "loss": 0.8227, "step": 1822 }, { "epoch": 0.19, "grad_norm": 1.6901918456825955, "learning_rate": 1.875950927485048e-05, "loss": 0.7645, "step": 1823 }, { "epoch": 0.19, "grad_norm": 1.5603243747675015, "learning_rate": 1.875791955352233e-05, "loss": 0.7471, "step": 1824 }, { "epoch": 0.19, "grad_norm": 1.638513369552182, "learning_rate": 1.8756328881658968e-05, "loss": 0.86, "step": 1825 }, { "epoch": 0.19, "grad_norm": 1.4728899465231686, "learning_rate": 1.875473725943304e-05, "loss": 0.7207, "step": 1826 }, { "epoch": 0.19, "grad_norm": 1.6494863709917857, "learning_rate": 1.8753144687017292e-05, "loss": 0.755, "step": 1827 }, { "epoch": 0.19, "grad_norm": 1.4154536873183075, "learning_rate": 1.8751551164584568e-05, "loss": 0.7739, "step": 1828 }, { "epoch": 0.19, "grad_norm": 1.58099440095728, "learning_rate": 1.8749956692307825e-05, "loss": 0.7177, "step": 1829 }, { "epoch": 0.19, "grad_norm": 1.526424937636552, "learning_rate": 1.874836127036012e-05, "loss": 0.6252, "step": 1830 }, { "epoch": 0.19, "grad_norm": 1.7385896248430377, "learning_rate": 1.874676489891461e-05, "loss": 0.75, "step": 1831 }, { "epoch": 0.19, "grad_norm": 1.6335448597236224, "learning_rate": 1.8745167578144552e-05, "loss": 0.8474, "step": 1832 }, { "epoch": 0.19, "grad_norm": 1.5534145226465848, "learning_rate": 1.8743569308223312e-05, "loss": 0.7913, "step": 1833 }, { "epoch": 0.19, "grad_norm": 1.5581638165153364, "learning_rate": 1.8741970089324357e-05, "loss": 0.6453, "step": 1834 }, { "epoch": 0.19, "grad_norm": 1.6011044528243155, "learning_rate": 1.874036992162126e-05, "loss": 0.7914, "step": 1835 }, { "epoch": 0.19, "grad_norm": 1.768395282832198, "learning_rate": 1.873876880528769e-05, "loss": 0.7309, "step": 1836 }, { "epoch": 0.19, "grad_norm": 1.5493951226784552, "learning_rate": 1.8737166740497427e-05, "loss": 0.7553, "step": 1837 }, { "epoch": 0.19, "grad_norm": 1.5239649939874127, "learning_rate": 1.873556372742435e-05, "loss": 0.7589, "step": 1838 }, { "epoch": 0.19, "grad_norm": 1.634050189294019, "learning_rate": 1.8733959766242435e-05, "loss": 0.897, "step": 1839 }, { "epoch": 0.19, "grad_norm": 1.6207931351883975, "learning_rate": 1.8732354857125773e-05, "loss": 0.814, "step": 1840 }, { "epoch": 0.19, "grad_norm": 1.507728990025924, "learning_rate": 1.873074900024855e-05, "loss": 0.6828, "step": 1841 }, { "epoch": 0.19, "grad_norm": 1.5626504219753878, "learning_rate": 1.8729142195785057e-05, "loss": 0.8172, "step": 1842 }, { "epoch": 0.19, "grad_norm": 1.6739510888151425, "learning_rate": 1.8727534443909686e-05, "loss": 0.7512, "step": 1843 }, { "epoch": 0.19, "grad_norm": 1.5598058732542022, "learning_rate": 1.872592574479694e-05, "loss": 0.7194, "step": 1844 }, { "epoch": 0.19, "grad_norm": 1.6005303628723884, "learning_rate": 1.8724316098621405e-05, "loss": 0.7972, "step": 1845 }, { "epoch": 0.19, "grad_norm": 1.600654059697692, "learning_rate": 1.872270550555779e-05, "loss": 0.9066, "step": 1846 }, { "epoch": 0.19, "grad_norm": 1.5518927935254088, "learning_rate": 1.872109396578091e-05, "loss": 0.6793, "step": 1847 }, { "epoch": 0.19, "grad_norm": 1.5799506697248105, "learning_rate": 1.8719481479465657e-05, "loss": 0.6856, "step": 1848 }, { "epoch": 0.19, "grad_norm": 1.6159705842129906, "learning_rate": 1.8717868046787046e-05, "loss": 0.8038, "step": 1849 }, { "epoch": 0.19, "grad_norm": 1.4684918042317172, "learning_rate": 1.871625366792019e-05, "loss": 0.6801, "step": 1850 }, { "epoch": 0.19, "grad_norm": 1.6412423287402154, "learning_rate": 1.8714638343040306e-05, "loss": 0.7523, "step": 1851 }, { "epoch": 0.19, "grad_norm": 1.6437306891728323, "learning_rate": 1.871302207232271e-05, "loss": 0.7218, "step": 1852 }, { "epoch": 0.19, "grad_norm": 1.4601714173931502, "learning_rate": 1.871140485594283e-05, "loss": 0.7227, "step": 1853 }, { "epoch": 0.19, "grad_norm": 1.6513526696248817, "learning_rate": 1.8709786694076178e-05, "loss": 0.8656, "step": 1854 }, { "epoch": 0.19, "grad_norm": 1.4853149556610323, "learning_rate": 1.870816758689839e-05, "loss": 0.7308, "step": 1855 }, { "epoch": 0.19, "grad_norm": 1.6802466029990593, "learning_rate": 1.870654753458519e-05, "loss": 0.8212, "step": 1856 }, { "epoch": 0.19, "grad_norm": 1.8500457105225216, "learning_rate": 1.8704926537312408e-05, "loss": 0.8009, "step": 1857 }, { "epoch": 0.19, "grad_norm": 1.5972154532993916, "learning_rate": 1.8703304595255983e-05, "loss": 0.6746, "step": 1858 }, { "epoch": 0.19, "grad_norm": 1.6662701350599647, "learning_rate": 1.8701681708591947e-05, "loss": 0.8041, "step": 1859 }, { "epoch": 0.19, "grad_norm": 1.5730426403183335, "learning_rate": 1.8700057877496443e-05, "loss": 0.9202, "step": 1860 }, { "epoch": 0.19, "grad_norm": 1.4563945916902619, "learning_rate": 1.8698433102145706e-05, "loss": 0.7369, "step": 1861 }, { "epoch": 0.19, "grad_norm": 1.4702139366238451, "learning_rate": 1.8696807382716085e-05, "loss": 0.7116, "step": 1862 }, { "epoch": 0.19, "grad_norm": 1.5621107416457056, "learning_rate": 1.869518071938403e-05, "loss": 0.7758, "step": 1863 }, { "epoch": 0.19, "grad_norm": 1.5895626444726687, "learning_rate": 1.8693553112326084e-05, "loss": 0.8106, "step": 1864 }, { "epoch": 0.19, "grad_norm": 1.7607266030575186, "learning_rate": 1.8691924561718897e-05, "loss": 0.844, "step": 1865 }, { "epoch": 0.19, "grad_norm": 1.4933064378757956, "learning_rate": 1.8690295067739226e-05, "loss": 0.7138, "step": 1866 }, { "epoch": 0.19, "grad_norm": 1.712265806011729, "learning_rate": 1.8688664630563928e-05, "loss": 0.8637, "step": 1867 }, { "epoch": 0.19, "grad_norm": 1.6019635887840478, "learning_rate": 1.8687033250369955e-05, "loss": 0.7653, "step": 1868 }, { "epoch": 0.19, "grad_norm": 1.612276802897441, "learning_rate": 1.8685400927334377e-05, "loss": 0.8336, "step": 1869 }, { "epoch": 0.19, "grad_norm": 1.670180632071749, "learning_rate": 1.8683767661634354e-05, "loss": 0.8118, "step": 1870 }, { "epoch": 0.19, "grad_norm": 1.3743265251696855, "learning_rate": 1.8682133453447147e-05, "loss": 0.82, "step": 1871 }, { "epoch": 0.19, "grad_norm": 1.683875985160119, "learning_rate": 1.868049830295013e-05, "loss": 0.9025, "step": 1872 }, { "epoch": 0.19, "grad_norm": 1.6648127443357852, "learning_rate": 1.867886221032077e-05, "loss": 0.8204, "step": 1873 }, { "epoch": 0.19, "grad_norm": 1.6458275270596272, "learning_rate": 1.8677225175736636e-05, "loss": 0.7923, "step": 1874 }, { "epoch": 0.19, "grad_norm": 1.5987414009006289, "learning_rate": 1.8675587199375407e-05, "loss": 0.6926, "step": 1875 }, { "epoch": 0.19, "grad_norm": 1.801676353452884, "learning_rate": 1.8673948281414857e-05, "loss": 0.8069, "step": 1876 }, { "epoch": 0.19, "grad_norm": 1.5174117497935236, "learning_rate": 1.867230842203287e-05, "loss": 0.6795, "step": 1877 }, { "epoch": 0.19, "grad_norm": 1.447289592777005, "learning_rate": 1.8670667621407423e-05, "loss": 0.7696, "step": 1878 }, { "epoch": 0.19, "grad_norm": 1.543875258571506, "learning_rate": 1.8669025879716597e-05, "loss": 0.7519, "step": 1879 }, { "epoch": 0.19, "grad_norm": 1.6415775747071424, "learning_rate": 1.866738319713858e-05, "loss": 0.7268, "step": 1880 }, { "epoch": 0.19, "grad_norm": 1.7785108692805012, "learning_rate": 1.8665739573851662e-05, "loss": 0.7699, "step": 1881 }, { "epoch": 0.19, "grad_norm": 1.7857674261403411, "learning_rate": 1.866409501003423e-05, "loss": 0.8229, "step": 1882 }, { "epoch": 0.19, "grad_norm": 1.9847860216254964, "learning_rate": 1.8662449505864776e-05, "loss": 0.7704, "step": 1883 }, { "epoch": 0.19, "grad_norm": 1.7227834593591063, "learning_rate": 1.8660803061521894e-05, "loss": 0.7932, "step": 1884 }, { "epoch": 0.19, "grad_norm": 1.6255355103927502, "learning_rate": 1.865915567718428e-05, "loss": 0.682, "step": 1885 }, { "epoch": 0.19, "grad_norm": 1.5679640202331104, "learning_rate": 1.865750735303073e-05, "loss": 0.812, "step": 1886 }, { "epoch": 0.19, "grad_norm": 1.5657907851284936, "learning_rate": 1.8655858089240143e-05, "loss": 0.7679, "step": 1887 }, { "epoch": 0.19, "grad_norm": 1.3940385718601986, "learning_rate": 1.8654207885991527e-05, "loss": 0.6367, "step": 1888 }, { "epoch": 0.19, "grad_norm": 1.570281967703175, "learning_rate": 1.865255674346398e-05, "loss": 0.7764, "step": 1889 }, { "epoch": 0.19, "grad_norm": 1.6446730773550722, "learning_rate": 1.8650904661836707e-05, "loss": 0.7509, "step": 1890 }, { "epoch": 0.19, "grad_norm": 1.7466286168189615, "learning_rate": 1.864925164128902e-05, "loss": 0.7999, "step": 1891 }, { "epoch": 0.19, "grad_norm": 1.5994431575638575, "learning_rate": 1.864759768200033e-05, "loss": 0.7346, "step": 1892 }, { "epoch": 0.19, "grad_norm": 1.6980032406179928, "learning_rate": 1.864594278415014e-05, "loss": 0.8921, "step": 1893 }, { "epoch": 0.19, "grad_norm": 1.6641202788730192, "learning_rate": 1.864428694791807e-05, "loss": 0.7413, "step": 1894 }, { "epoch": 0.19, "grad_norm": 1.4534612756573948, "learning_rate": 1.8642630173483832e-05, "loss": 0.7873, "step": 1895 }, { "epoch": 0.19, "grad_norm": 1.6806786748699862, "learning_rate": 1.8640972461027246e-05, "loss": 0.6926, "step": 1896 }, { "epoch": 0.19, "grad_norm": 1.7567742933825417, "learning_rate": 1.863931381072823e-05, "loss": 0.806, "step": 1897 }, { "epoch": 0.19, "grad_norm": 1.5208842194437813, "learning_rate": 1.8637654222766802e-05, "loss": 0.7438, "step": 1898 }, { "epoch": 0.19, "grad_norm": 1.5697337897387504, "learning_rate": 1.8635993697323086e-05, "loss": 0.8382, "step": 1899 }, { "epoch": 0.19, "grad_norm": 1.5281416666058911, "learning_rate": 1.8634332234577307e-05, "loss": 0.7687, "step": 1900 }, { "epoch": 0.19, "grad_norm": 1.5959906743154828, "learning_rate": 1.8632669834709787e-05, "loss": 0.7747, "step": 1901 }, { "epoch": 0.19, "grad_norm": 1.553663965084361, "learning_rate": 1.8631006497900957e-05, "loss": 0.7009, "step": 1902 }, { "epoch": 0.19, "grad_norm": 1.5721802721916782, "learning_rate": 1.862934222433135e-05, "loss": 0.7484, "step": 1903 }, { "epoch": 0.19, "grad_norm": 1.523293199579708, "learning_rate": 1.8627677014181586e-05, "loss": 0.6594, "step": 1904 }, { "epoch": 0.19, "grad_norm": 1.615424841576138, "learning_rate": 1.862601086763241e-05, "loss": 0.7287, "step": 1905 }, { "epoch": 0.19, "grad_norm": 1.4620235693583123, "learning_rate": 1.8624343784864644e-05, "loss": 0.6631, "step": 1906 }, { "epoch": 0.19, "grad_norm": 1.609922327362643, "learning_rate": 1.8622675766059232e-05, "loss": 0.6958, "step": 1907 }, { "epoch": 0.19, "grad_norm": 1.7415547413413075, "learning_rate": 1.862100681139721e-05, "loss": 0.8045, "step": 1908 }, { "epoch": 0.19, "grad_norm": 1.642979877233161, "learning_rate": 1.861933692105972e-05, "loss": 0.8172, "step": 1909 }, { "epoch": 0.19, "grad_norm": 1.5667632726209306, "learning_rate": 1.8617666095227994e-05, "loss": 0.7247, "step": 1910 }, { "epoch": 0.19, "grad_norm": 1.4583686501782427, "learning_rate": 1.8615994334083377e-05, "loss": 0.81, "step": 1911 }, { "epoch": 0.19, "grad_norm": 1.4396267855158282, "learning_rate": 1.8614321637807315e-05, "loss": 0.7329, "step": 1912 }, { "epoch": 0.19, "grad_norm": 1.6642235119219537, "learning_rate": 1.8612648006581354e-05, "loss": 0.8072, "step": 1913 }, { "epoch": 0.19, "grad_norm": 1.754213812581633, "learning_rate": 1.861097344058714e-05, "loss": 0.8149, "step": 1914 }, { "epoch": 0.19, "grad_norm": 1.6761790132590595, "learning_rate": 1.8609297940006418e-05, "loss": 0.8206, "step": 1915 }, { "epoch": 0.19, "grad_norm": 1.560191341344239, "learning_rate": 1.8607621505021035e-05, "loss": 0.7154, "step": 1916 }, { "epoch": 0.19, "grad_norm": 1.4918257116316898, "learning_rate": 1.860594413581295e-05, "loss": 0.7098, "step": 1917 }, { "epoch": 0.2, "grad_norm": 1.5958708627476872, "learning_rate": 1.860426583256421e-05, "loss": 0.8249, "step": 1918 }, { "epoch": 0.2, "grad_norm": 1.4247394782586766, "learning_rate": 1.8602586595456974e-05, "loss": 0.8818, "step": 1919 }, { "epoch": 0.2, "grad_norm": 1.7522243225471708, "learning_rate": 1.8600906424673487e-05, "loss": 0.8321, "step": 1920 }, { "epoch": 0.2, "grad_norm": 1.5649963909661708, "learning_rate": 1.8599225320396113e-05, "loss": 0.7594, "step": 1921 }, { "epoch": 0.2, "grad_norm": 1.6616037284068703, "learning_rate": 1.8597543282807303e-05, "loss": 0.808, "step": 1922 }, { "epoch": 0.2, "grad_norm": 1.6696455666594916, "learning_rate": 1.8595860312089625e-05, "loss": 0.7538, "step": 1923 }, { "epoch": 0.2, "grad_norm": 1.6723167707677005, "learning_rate": 1.859417640842573e-05, "loss": 0.768, "step": 1924 }, { "epoch": 0.2, "grad_norm": 1.6174241472922297, "learning_rate": 1.859249157199839e-05, "loss": 0.7958, "step": 1925 }, { "epoch": 0.2, "grad_norm": 1.4890001345357784, "learning_rate": 1.8590805802990458e-05, "loss": 0.797, "step": 1926 }, { "epoch": 0.2, "grad_norm": 1.5636449533652899, "learning_rate": 1.8589119101584902e-05, "loss": 0.7467, "step": 1927 }, { "epoch": 0.2, "grad_norm": 1.7449087745880458, "learning_rate": 1.8587431467964784e-05, "loss": 0.8519, "step": 1928 }, { "epoch": 0.2, "grad_norm": 1.6217747543112395, "learning_rate": 1.8585742902313274e-05, "loss": 0.7736, "step": 1929 }, { "epoch": 0.2, "grad_norm": 1.5118301003504073, "learning_rate": 1.858405340481364e-05, "loss": 0.6824, "step": 1930 }, { "epoch": 0.2, "grad_norm": 1.4925272292019434, "learning_rate": 1.8582362975649245e-05, "loss": 0.678, "step": 1931 }, { "epoch": 0.2, "grad_norm": 1.6573874004589289, "learning_rate": 1.8580671615003566e-05, "loss": 0.8158, "step": 1932 }, { "epoch": 0.2, "grad_norm": 1.5882708936318732, "learning_rate": 1.8578979323060164e-05, "loss": 0.703, "step": 1933 }, { "epoch": 0.2, "grad_norm": 1.747937980589991, "learning_rate": 1.8577286100002723e-05, "loss": 0.8432, "step": 1934 }, { "epoch": 0.2, "grad_norm": 1.5880321948553646, "learning_rate": 1.8575591946015006e-05, "loss": 0.8102, "step": 1935 }, { "epoch": 0.2, "grad_norm": 1.67263280842425, "learning_rate": 1.8573896861280893e-05, "loss": 0.8489, "step": 1936 }, { "epoch": 0.2, "grad_norm": 1.616183957035531, "learning_rate": 1.8572200845984352e-05, "loss": 0.6341, "step": 1937 }, { "epoch": 0.2, "grad_norm": 1.6906920012154358, "learning_rate": 1.857050390030947e-05, "loss": 0.8366, "step": 1938 }, { "epoch": 0.2, "grad_norm": 1.5695251712587637, "learning_rate": 1.8568806024440415e-05, "loss": 0.6616, "step": 1939 }, { "epoch": 0.2, "grad_norm": 1.531032808164511, "learning_rate": 1.8567107218561463e-05, "loss": 0.7847, "step": 1940 }, { "epoch": 0.2, "grad_norm": 1.5605863172120555, "learning_rate": 1.8565407482857e-05, "loss": 0.7061, "step": 1941 }, { "epoch": 0.2, "grad_norm": 1.6497185295835817, "learning_rate": 1.85637068175115e-05, "loss": 0.7621, "step": 1942 }, { "epoch": 0.2, "grad_norm": 1.6176729022633565, "learning_rate": 1.856200522270955e-05, "loss": 0.7856, "step": 1943 }, { "epoch": 0.2, "grad_norm": 1.9537736544492905, "learning_rate": 1.856030269863583e-05, "loss": 0.88, "step": 1944 }, { "epoch": 0.2, "grad_norm": 1.5430187767275976, "learning_rate": 1.8558599245475115e-05, "loss": 0.6248, "step": 1945 }, { "epoch": 0.2, "grad_norm": 1.3672398031794841, "learning_rate": 1.8556894863412297e-05, "loss": 0.6678, "step": 1946 }, { "epoch": 0.2, "grad_norm": 1.6851967538360875, "learning_rate": 1.8555189552632353e-05, "loss": 0.7628, "step": 1947 }, { "epoch": 0.2, "grad_norm": 1.308925529168711, "learning_rate": 1.8553483313320372e-05, "loss": 0.7125, "step": 1948 }, { "epoch": 0.2, "grad_norm": 1.5482666375476917, "learning_rate": 1.855177614566154e-05, "loss": 0.7105, "step": 1949 }, { "epoch": 0.2, "grad_norm": 1.6451222261319363, "learning_rate": 1.8550068049841143e-05, "loss": 0.754, "step": 1950 }, { "epoch": 0.2, "grad_norm": 1.530070822063962, "learning_rate": 1.8548359026044567e-05, "loss": 0.8445, "step": 1951 }, { "epoch": 0.2, "grad_norm": 1.6759016729134624, "learning_rate": 1.85466490744573e-05, "loss": 0.8776, "step": 1952 }, { "epoch": 0.2, "grad_norm": 1.6564499377083384, "learning_rate": 1.854493819526493e-05, "loss": 0.8513, "step": 1953 }, { "epoch": 0.2, "grad_norm": 1.6158099814548028, "learning_rate": 1.854322638865315e-05, "loss": 0.8304, "step": 1954 }, { "epoch": 0.2, "grad_norm": 1.6128948769079847, "learning_rate": 1.854151365480774e-05, "loss": 0.7835, "step": 1955 }, { "epoch": 0.2, "grad_norm": 1.6422859708402293, "learning_rate": 1.8539799993914602e-05, "loss": 0.7945, "step": 1956 }, { "epoch": 0.2, "grad_norm": 1.5442670681243345, "learning_rate": 1.8538085406159722e-05, "loss": 0.7909, "step": 1957 }, { "epoch": 0.2, "grad_norm": 1.6477871595113218, "learning_rate": 1.8536369891729188e-05, "loss": 0.7269, "step": 1958 }, { "epoch": 0.2, "grad_norm": 1.6111243464475271, "learning_rate": 1.85346534508092e-05, "loss": 0.7188, "step": 1959 }, { "epoch": 0.2, "grad_norm": 1.528940302181452, "learning_rate": 1.8532936083586047e-05, "loss": 0.757, "step": 1960 }, { "epoch": 0.2, "grad_norm": 1.6760006322163905, "learning_rate": 1.853121779024612e-05, "loss": 0.6481, "step": 1961 }, { "epoch": 0.2, "grad_norm": 1.5850301760975005, "learning_rate": 1.8529498570975918e-05, "loss": 0.8071, "step": 1962 }, { "epoch": 0.2, "grad_norm": 1.439301043406329, "learning_rate": 1.852777842596203e-05, "loss": 0.6926, "step": 1963 }, { "epoch": 0.2, "grad_norm": 1.4913418883840095, "learning_rate": 1.8526057355391153e-05, "loss": 0.7353, "step": 1964 }, { "epoch": 0.2, "grad_norm": 1.4632243798516378, "learning_rate": 1.8524335359450084e-05, "loss": 0.6683, "step": 1965 }, { "epoch": 0.2, "grad_norm": 1.6979225750276388, "learning_rate": 1.852261243832572e-05, "loss": 0.7964, "step": 1966 }, { "epoch": 0.2, "grad_norm": 1.6935771936979542, "learning_rate": 1.852088859220505e-05, "loss": 0.8466, "step": 1967 }, { "epoch": 0.2, "grad_norm": 1.6235813178922434, "learning_rate": 1.851916382127518e-05, "loss": 0.7163, "step": 1968 }, { "epoch": 0.2, "grad_norm": 1.6952450683272433, "learning_rate": 1.85174381257233e-05, "loss": 0.8911, "step": 1969 }, { "epoch": 0.2, "grad_norm": 1.6217638700128432, "learning_rate": 1.8515711505736708e-05, "loss": 0.8231, "step": 1970 }, { "epoch": 0.2, "grad_norm": 1.5461320142762094, "learning_rate": 1.8513983961502802e-05, "loss": 0.6862, "step": 1971 }, { "epoch": 0.2, "grad_norm": 1.8345041401665254, "learning_rate": 1.851225549320908e-05, "loss": 0.7441, "step": 1972 }, { "epoch": 0.2, "grad_norm": 1.5711831067423128, "learning_rate": 1.8510526101043146e-05, "loss": 0.869, "step": 1973 }, { "epoch": 0.2, "grad_norm": 1.5191116397427378, "learning_rate": 1.850879578519269e-05, "loss": 0.7696, "step": 1974 }, { "epoch": 0.2, "grad_norm": 1.5090778444514343, "learning_rate": 1.8507064545845513e-05, "loss": 0.7709, "step": 1975 }, { "epoch": 0.2, "grad_norm": 1.5043235651481828, "learning_rate": 1.8505332383189518e-05, "loss": 0.6482, "step": 1976 }, { "epoch": 0.2, "grad_norm": 1.7494370121789027, "learning_rate": 1.85035992974127e-05, "loss": 0.659, "step": 1977 }, { "epoch": 0.2, "grad_norm": 1.6977092603202606, "learning_rate": 1.850186528870316e-05, "loss": 0.8217, "step": 1978 }, { "epoch": 0.2, "grad_norm": 1.4858417843522007, "learning_rate": 1.85001303572491e-05, "loss": 0.7733, "step": 1979 }, { "epoch": 0.2, "grad_norm": 1.3671668705036, "learning_rate": 1.8498394503238814e-05, "loss": 0.7278, "step": 1980 }, { "epoch": 0.2, "grad_norm": 1.6825392973325513, "learning_rate": 1.84966577268607e-05, "loss": 0.7464, "step": 1981 }, { "epoch": 0.2, "grad_norm": 1.555401729857369, "learning_rate": 1.849492002830327e-05, "loss": 0.766, "step": 1982 }, { "epoch": 0.2, "grad_norm": 1.6489193347651052, "learning_rate": 1.8493181407755117e-05, "loss": 0.8089, "step": 1983 }, { "epoch": 0.2, "grad_norm": 1.555459995400608, "learning_rate": 1.849144186540494e-05, "loss": 0.7948, "step": 1984 }, { "epoch": 0.2, "grad_norm": 1.6696539094363, "learning_rate": 1.8489701401441534e-05, "loss": 0.7851, "step": 1985 }, { "epoch": 0.2, "grad_norm": 1.599201865291812, "learning_rate": 1.848796001605381e-05, "loss": 0.8093, "step": 1986 }, { "epoch": 0.2, "grad_norm": 1.6274974091557912, "learning_rate": 1.8486217709430757e-05, "loss": 0.7926, "step": 1987 }, { "epoch": 0.2, "grad_norm": 1.3623804172978458, "learning_rate": 1.848447448176149e-05, "loss": 0.7262, "step": 1988 }, { "epoch": 0.2, "grad_norm": 1.629211820742282, "learning_rate": 1.8482730333235196e-05, "loss": 0.8294, "step": 1989 }, { "epoch": 0.2, "grad_norm": 1.7541029673914632, "learning_rate": 1.8480985264041176e-05, "loss": 0.8159, "step": 1990 }, { "epoch": 0.2, "grad_norm": 1.586614609609062, "learning_rate": 1.847923927436884e-05, "loss": 0.6823, "step": 1991 }, { "epoch": 0.2, "grad_norm": 1.5419850233142978, "learning_rate": 1.8477492364407677e-05, "loss": 0.6992, "step": 1992 }, { "epoch": 0.2, "grad_norm": 1.472491430080609, "learning_rate": 1.8475744534347293e-05, "loss": 0.7535, "step": 1993 }, { "epoch": 0.2, "grad_norm": 1.5645681274063772, "learning_rate": 1.8473995784377384e-05, "loss": 0.7991, "step": 1994 }, { "epoch": 0.2, "grad_norm": 1.6389601170363615, "learning_rate": 1.8472246114687754e-05, "loss": 0.7141, "step": 1995 }, { "epoch": 0.2, "grad_norm": 1.6616292369048578, "learning_rate": 1.8470495525468295e-05, "loss": 0.7879, "step": 1996 }, { "epoch": 0.2, "grad_norm": 1.4737315897382532, "learning_rate": 1.8468744016909012e-05, "loss": 0.7577, "step": 1997 }, { "epoch": 0.2, "grad_norm": 1.549146981818495, "learning_rate": 1.8466991589200004e-05, "loss": 0.6605, "step": 1998 }, { "epoch": 0.2, "grad_norm": 1.4417487564219365, "learning_rate": 1.8465238242531467e-05, "loss": 0.7124, "step": 1999 }, { "epoch": 0.2, "grad_norm": 1.6835586343638251, "learning_rate": 1.84634839770937e-05, "loss": 0.7829, "step": 2000 }, { "epoch": 0.2, "grad_norm": 1.7115119058166468, "learning_rate": 1.8461728793077104e-05, "loss": 0.7586, "step": 2001 }, { "epoch": 0.2, "grad_norm": 1.4947857186720843, "learning_rate": 1.8459972690672172e-05, "loss": 0.7328, "step": 2002 }, { "epoch": 0.2, "grad_norm": 1.4280227740430842, "learning_rate": 1.8458215670069502e-05, "loss": 0.7671, "step": 2003 }, { "epoch": 0.2, "grad_norm": 1.6151555542402964, "learning_rate": 1.8456457731459795e-05, "loss": 0.848, "step": 2004 }, { "epoch": 0.2, "grad_norm": 1.479691535263727, "learning_rate": 1.8454698875033843e-05, "loss": 0.8455, "step": 2005 }, { "epoch": 0.2, "grad_norm": 1.4177021763508002, "learning_rate": 1.8452939100982547e-05, "loss": 0.7119, "step": 2006 }, { "epoch": 0.2, "grad_norm": 1.6199388078799257, "learning_rate": 1.8451178409496903e-05, "loss": 0.6344, "step": 2007 }, { "epoch": 0.2, "grad_norm": 1.5825791681632486, "learning_rate": 1.8449416800767998e-05, "loss": 0.7744, "step": 2008 }, { "epoch": 0.2, "grad_norm": 1.5318746508145062, "learning_rate": 1.8447654274987038e-05, "loss": 0.8559, "step": 2009 }, { "epoch": 0.2, "grad_norm": 1.6233848458956928, "learning_rate": 1.844589083234531e-05, "loss": 0.7345, "step": 2010 }, { "epoch": 0.2, "grad_norm": 1.994553363314384, "learning_rate": 1.8444126473034212e-05, "loss": 0.7971, "step": 2011 }, { "epoch": 0.2, "grad_norm": 1.6031955233721904, "learning_rate": 1.844236119724524e-05, "loss": 0.7934, "step": 2012 }, { "epoch": 0.2, "grad_norm": 1.4580459780767323, "learning_rate": 1.8440595005169985e-05, "loss": 0.7894, "step": 2013 }, { "epoch": 0.2, "grad_norm": 1.7207833492032534, "learning_rate": 1.843882789700013e-05, "loss": 0.8159, "step": 2014 }, { "epoch": 0.2, "grad_norm": 1.538572416456644, "learning_rate": 1.843705987292748e-05, "loss": 0.8205, "step": 2015 }, { "epoch": 0.21, "grad_norm": 1.6077079023161813, "learning_rate": 1.8435290933143925e-05, "loss": 0.712, "step": 2016 }, { "epoch": 0.21, "grad_norm": 1.4914180648119735, "learning_rate": 1.8433521077841447e-05, "loss": 0.705, "step": 2017 }, { "epoch": 0.21, "grad_norm": 1.5844200282792549, "learning_rate": 1.8431750307212143e-05, "loss": 0.747, "step": 2018 }, { "epoch": 0.21, "grad_norm": 1.5512967626223364, "learning_rate": 1.84299786214482e-05, "loss": 0.7392, "step": 2019 }, { "epoch": 0.21, "grad_norm": 1.6445683986761288, "learning_rate": 1.8428206020741913e-05, "loss": 0.8185, "step": 2020 }, { "epoch": 0.21, "grad_norm": 1.5331188402336782, "learning_rate": 1.8426432505285658e-05, "loss": 0.6476, "step": 2021 }, { "epoch": 0.21, "grad_norm": 1.5679496580351822, "learning_rate": 1.8424658075271934e-05, "loss": 0.6425, "step": 2022 }, { "epoch": 0.21, "grad_norm": 1.5806644443693592, "learning_rate": 1.8422882730893323e-05, "loss": 0.815, "step": 2023 }, { "epoch": 0.21, "grad_norm": 1.8126513267054625, "learning_rate": 1.8421106472342507e-05, "loss": 0.7919, "step": 2024 }, { "epoch": 0.21, "grad_norm": 1.8010077865252379, "learning_rate": 1.841932929981228e-05, "loss": 0.792, "step": 2025 }, { "epoch": 0.21, "grad_norm": 1.6176071870755673, "learning_rate": 1.8417551213495516e-05, "loss": 0.681, "step": 2026 }, { "epoch": 0.21, "grad_norm": 1.6194212121101104, "learning_rate": 1.8415772213585206e-05, "loss": 0.781, "step": 2027 }, { "epoch": 0.21, "grad_norm": 1.4615424765257328, "learning_rate": 1.8413992300274432e-05, "loss": 0.7386, "step": 2028 }, { "epoch": 0.21, "grad_norm": 1.345972388499806, "learning_rate": 1.8412211473756366e-05, "loss": 0.6836, "step": 2029 }, { "epoch": 0.21, "grad_norm": 1.420925984429223, "learning_rate": 1.8410429734224305e-05, "loss": 0.7123, "step": 2030 }, { "epoch": 0.21, "grad_norm": 1.4714918647469502, "learning_rate": 1.8408647081871617e-05, "loss": 0.7548, "step": 2031 }, { "epoch": 0.21, "grad_norm": 1.2880187247176165, "learning_rate": 1.8406863516891787e-05, "loss": 0.6705, "step": 2032 }, { "epoch": 0.21, "grad_norm": 1.5561890649523946, "learning_rate": 1.840507903947839e-05, "loss": 0.8041, "step": 2033 }, { "epoch": 0.21, "grad_norm": 1.8240869988638713, "learning_rate": 1.8403293649825105e-05, "loss": 0.8352, "step": 2034 }, { "epoch": 0.21, "grad_norm": 1.6843780737067835, "learning_rate": 1.8401507348125706e-05, "loss": 0.8211, "step": 2035 }, { "epoch": 0.21, "grad_norm": 1.8174509816114721, "learning_rate": 1.8399720134574068e-05, "loss": 0.8882, "step": 2036 }, { "epoch": 0.21, "grad_norm": 1.8972402872295238, "learning_rate": 1.839793200936417e-05, "loss": 0.8591, "step": 2037 }, { "epoch": 0.21, "grad_norm": 1.7514471330221437, "learning_rate": 1.8396142972690075e-05, "loss": 0.6459, "step": 2038 }, { "epoch": 0.21, "grad_norm": 1.8239195890199404, "learning_rate": 1.8394353024745965e-05, "loss": 0.7782, "step": 2039 }, { "epoch": 0.21, "grad_norm": 1.5675086725687128, "learning_rate": 1.839256216572611e-05, "loss": 0.8358, "step": 2040 }, { "epoch": 0.21, "grad_norm": 1.5085805660216012, "learning_rate": 1.8390770395824874e-05, "loss": 0.735, "step": 2041 }, { "epoch": 0.21, "grad_norm": 1.547175912059753, "learning_rate": 1.8388977715236728e-05, "loss": 0.7459, "step": 2042 }, { "epoch": 0.21, "grad_norm": 1.7356081253311848, "learning_rate": 1.838718412415624e-05, "loss": 0.8296, "step": 2043 }, { "epoch": 0.21, "grad_norm": 1.655669317138079, "learning_rate": 1.8385389622778076e-05, "loss": 0.9693, "step": 2044 }, { "epoch": 0.21, "grad_norm": 1.6362615478232747, "learning_rate": 1.8383594211297002e-05, "loss": 0.8121, "step": 2045 }, { "epoch": 0.21, "grad_norm": 1.5372469664900423, "learning_rate": 1.838179788990788e-05, "loss": 0.7378, "step": 2046 }, { "epoch": 0.21, "grad_norm": 1.7855729505540894, "learning_rate": 1.838000065880568e-05, "loss": 0.8149, "step": 2047 }, { "epoch": 0.21, "grad_norm": 1.6119552242718482, "learning_rate": 1.837820251818545e-05, "loss": 0.7797, "step": 2048 }, { "epoch": 0.21, "grad_norm": 1.6814490296118239, "learning_rate": 1.837640346824236e-05, "loss": 0.6792, "step": 2049 }, { "epoch": 0.21, "grad_norm": 1.5907274351368677, "learning_rate": 1.837460350917166e-05, "loss": 0.6379, "step": 2050 }, { "epoch": 0.21, "grad_norm": 1.582028706018697, "learning_rate": 1.837280264116872e-05, "loss": 0.87, "step": 2051 }, { "epoch": 0.21, "grad_norm": 1.5896426368682781, "learning_rate": 1.837100086442899e-05, "loss": 0.7454, "step": 2052 }, { "epoch": 0.21, "grad_norm": 1.515693029722567, "learning_rate": 1.8369198179148022e-05, "loss": 0.5782, "step": 2053 }, { "epoch": 0.21, "grad_norm": 1.6823168519943408, "learning_rate": 1.836739458552147e-05, "loss": 0.7646, "step": 2054 }, { "epoch": 0.21, "grad_norm": 1.5807384127341129, "learning_rate": 1.8365590083745085e-05, "loss": 0.7864, "step": 2055 }, { "epoch": 0.21, "grad_norm": 1.5014690904678236, "learning_rate": 1.8363784674014726e-05, "loss": 0.7983, "step": 2056 }, { "epoch": 0.21, "grad_norm": 1.527163447694033, "learning_rate": 1.836197835652633e-05, "loss": 0.7369, "step": 2057 }, { "epoch": 0.21, "grad_norm": 1.4761009440956374, "learning_rate": 1.8360171131475954e-05, "loss": 0.7562, "step": 2058 }, { "epoch": 0.21, "grad_norm": 1.5807536940605051, "learning_rate": 1.8358362999059738e-05, "loss": 0.7055, "step": 2059 }, { "epoch": 0.21, "grad_norm": 1.7363606674901761, "learning_rate": 1.835655395947393e-05, "loss": 0.8579, "step": 2060 }, { "epoch": 0.21, "grad_norm": 1.5892285709310214, "learning_rate": 1.835474401291487e-05, "loss": 0.76, "step": 2061 }, { "epoch": 0.21, "grad_norm": 1.96710212615678, "learning_rate": 1.8352933159579e-05, "loss": 0.7318, "step": 2062 }, { "epoch": 0.21, "grad_norm": 1.656354040552658, "learning_rate": 1.8351121399662862e-05, "loss": 0.7117, "step": 2063 }, { "epoch": 0.21, "grad_norm": 1.4360631305910538, "learning_rate": 1.8349308733363093e-05, "loss": 0.6603, "step": 2064 }, { "epoch": 0.21, "grad_norm": 1.646228813200624, "learning_rate": 1.8347495160876432e-05, "loss": 0.7216, "step": 2065 }, { "epoch": 0.21, "grad_norm": 1.446053013317963, "learning_rate": 1.834568068239971e-05, "loss": 0.7162, "step": 2066 }, { "epoch": 0.21, "grad_norm": 1.4323835655155246, "learning_rate": 1.8343865298129858e-05, "loss": 0.7047, "step": 2067 }, { "epoch": 0.21, "grad_norm": 1.5035102946772407, "learning_rate": 1.8342049008263917e-05, "loss": 0.8284, "step": 2068 }, { "epoch": 0.21, "grad_norm": 1.6444469347806443, "learning_rate": 1.8340231812999007e-05, "loss": 0.7184, "step": 2069 }, { "epoch": 0.21, "grad_norm": 1.6596641180910732, "learning_rate": 1.8338413712532365e-05, "loss": 0.8087, "step": 2070 }, { "epoch": 0.21, "grad_norm": 1.5875512346960872, "learning_rate": 1.833659470706131e-05, "loss": 0.7649, "step": 2071 }, { "epoch": 0.21, "grad_norm": 1.5843463610866617, "learning_rate": 1.8334774796783268e-05, "loss": 0.7858, "step": 2072 }, { "epoch": 0.21, "grad_norm": 1.4945176583565407, "learning_rate": 1.833295398189576e-05, "loss": 0.7108, "step": 2073 }, { "epoch": 0.21, "grad_norm": 1.812069632882091, "learning_rate": 1.8331132262596418e-05, "loss": 0.8371, "step": 2074 }, { "epoch": 0.21, "grad_norm": 1.4764660495567634, "learning_rate": 1.832930963908295e-05, "loss": 0.6777, "step": 2075 }, { "epoch": 0.21, "grad_norm": 1.5515759927728716, "learning_rate": 1.8327486111553174e-05, "loss": 0.8229, "step": 2076 }, { "epoch": 0.21, "grad_norm": 1.4658649863388442, "learning_rate": 1.832566168020501e-05, "loss": 0.7533, "step": 2077 }, { "epoch": 0.21, "grad_norm": 1.5142633214554027, "learning_rate": 1.832383634523647e-05, "loss": 0.6924, "step": 2078 }, { "epoch": 0.21, "grad_norm": 1.5895282587876711, "learning_rate": 1.8322010106845663e-05, "loss": 0.8646, "step": 2079 }, { "epoch": 0.21, "grad_norm": 1.4819718119255312, "learning_rate": 1.8320182965230803e-05, "loss": 0.6515, "step": 2080 }, { "epoch": 0.21, "grad_norm": 1.5681585082866136, "learning_rate": 1.8318354920590195e-05, "loss": 0.8381, "step": 2081 }, { "epoch": 0.21, "grad_norm": 1.4810855763092055, "learning_rate": 1.8316525973122243e-05, "loss": 0.769, "step": 2082 }, { "epoch": 0.21, "grad_norm": 1.7651158078259417, "learning_rate": 1.8314696123025456e-05, "loss": 0.8944, "step": 2083 }, { "epoch": 0.21, "grad_norm": 1.3824607726643825, "learning_rate": 1.8312865370498428e-05, "loss": 0.7574, "step": 2084 }, { "epoch": 0.21, "grad_norm": 1.723480098849916, "learning_rate": 1.8311033715739864e-05, "loss": 0.7932, "step": 2085 }, { "epoch": 0.21, "grad_norm": 1.7779798532759492, "learning_rate": 1.830920115894856e-05, "loss": 0.7958, "step": 2086 }, { "epoch": 0.21, "grad_norm": 1.5820203885718955, "learning_rate": 1.8307367700323412e-05, "loss": 0.6929, "step": 2087 }, { "epoch": 0.21, "grad_norm": 1.6031859652237008, "learning_rate": 1.8305533340063416e-05, "loss": 0.7878, "step": 2088 }, { "epoch": 0.21, "grad_norm": 1.5935160645728348, "learning_rate": 1.8303698078367654e-05, "loss": 0.7755, "step": 2089 }, { "epoch": 0.21, "grad_norm": 1.5403578121776387, "learning_rate": 1.8301861915435325e-05, "loss": 0.8191, "step": 2090 }, { "epoch": 0.21, "grad_norm": 1.8028348646006056, "learning_rate": 1.830002485146571e-05, "loss": 0.8042, "step": 2091 }, { "epoch": 0.21, "grad_norm": 1.553546451754014, "learning_rate": 1.8298186886658194e-05, "loss": 0.8077, "step": 2092 }, { "epoch": 0.21, "grad_norm": 1.5730451787934852, "learning_rate": 1.8296348021212264e-05, "loss": 0.8645, "step": 2093 }, { "epoch": 0.21, "grad_norm": 1.635196346349874, "learning_rate": 1.8294508255327495e-05, "loss": 0.8325, "step": 2094 }, { "epoch": 0.21, "grad_norm": 1.7375633128416428, "learning_rate": 1.8292667589203567e-05, "loss": 0.7546, "step": 2095 }, { "epoch": 0.21, "grad_norm": 1.4748248166175115, "learning_rate": 1.8290826023040257e-05, "loss": 0.8443, "step": 2096 }, { "epoch": 0.21, "grad_norm": 1.8397460998187334, "learning_rate": 1.8288983557037432e-05, "loss": 0.7881, "step": 2097 }, { "epoch": 0.21, "grad_norm": 1.4051773976204776, "learning_rate": 1.8287140191395066e-05, "loss": 0.7506, "step": 2098 }, { "epoch": 0.21, "grad_norm": 1.5543239613177329, "learning_rate": 1.8285295926313234e-05, "loss": 0.7805, "step": 2099 }, { "epoch": 0.21, "grad_norm": 1.6267215636888006, "learning_rate": 1.8283450761992095e-05, "loss": 0.7499, "step": 2100 }, { "epoch": 0.21, "grad_norm": 1.6634829018566752, "learning_rate": 1.8281604698631913e-05, "loss": 0.702, "step": 2101 }, { "epoch": 0.21, "grad_norm": 1.458900948499807, "learning_rate": 1.827975773643305e-05, "loss": 0.7928, "step": 2102 }, { "epoch": 0.21, "grad_norm": 1.7096776108046234, "learning_rate": 1.8277909875595967e-05, "loss": 0.7865, "step": 2103 }, { "epoch": 0.21, "grad_norm": 1.4549477637499242, "learning_rate": 1.827606111632122e-05, "loss": 0.7086, "step": 2104 }, { "epoch": 0.21, "grad_norm": 1.6106913000137875, "learning_rate": 1.827421145880946e-05, "loss": 0.7765, "step": 2105 }, { "epoch": 0.21, "grad_norm": 1.522056592191057, "learning_rate": 1.8272360903261443e-05, "loss": 0.7613, "step": 2106 }, { "epoch": 0.21, "grad_norm": 1.539075161245667, "learning_rate": 1.8270509449878015e-05, "loss": 0.724, "step": 2107 }, { "epoch": 0.21, "grad_norm": 1.4949035601058638, "learning_rate": 1.8268657098860118e-05, "loss": 0.8125, "step": 2108 }, { "epoch": 0.21, "grad_norm": 1.4720359002309913, "learning_rate": 1.82668038504088e-05, "loss": 0.6817, "step": 2109 }, { "epoch": 0.21, "grad_norm": 1.733004339510359, "learning_rate": 1.826494970472521e-05, "loss": 0.8364, "step": 2110 }, { "epoch": 0.21, "grad_norm": 1.5290000198905853, "learning_rate": 1.8263094662010575e-05, "loss": 0.7266, "step": 2111 }, { "epoch": 0.21, "grad_norm": 1.6943982181306592, "learning_rate": 1.8261238722466233e-05, "loss": 0.6214, "step": 2112 }, { "epoch": 0.21, "grad_norm": 1.7396644301300892, "learning_rate": 1.825938188629362e-05, "loss": 0.7582, "step": 2113 }, { "epoch": 0.22, "grad_norm": 1.756893544882042, "learning_rate": 1.8257524153694265e-05, "loss": 0.7242, "step": 2114 }, { "epoch": 0.22, "grad_norm": 1.6405496596283244, "learning_rate": 1.82556655248698e-05, "loss": 0.8054, "step": 2115 }, { "epoch": 0.22, "grad_norm": 1.6438093570265306, "learning_rate": 1.8253806000021943e-05, "loss": 0.726, "step": 2116 }, { "epoch": 0.22, "grad_norm": 1.564672755650021, "learning_rate": 1.825194557935252e-05, "loss": 0.6933, "step": 2117 }, { "epoch": 0.22, "grad_norm": 1.7318313364847981, "learning_rate": 1.825008426306345e-05, "loss": 0.7744, "step": 2118 }, { "epoch": 0.22, "grad_norm": 1.6157523650895218, "learning_rate": 1.8248222051356756e-05, "loss": 0.7796, "step": 2119 }, { "epoch": 0.22, "grad_norm": 1.382492070976764, "learning_rate": 1.824635894443454e-05, "loss": 0.7445, "step": 2120 }, { "epoch": 0.22, "grad_norm": 1.6583871301219149, "learning_rate": 1.8244494942499017e-05, "loss": 0.7904, "step": 2121 }, { "epoch": 0.22, "grad_norm": 1.4231276239769954, "learning_rate": 1.8242630045752504e-05, "loss": 0.6558, "step": 2122 }, { "epoch": 0.22, "grad_norm": 1.455061423838774, "learning_rate": 1.8240764254397392e-05, "loss": 0.7436, "step": 2123 }, { "epoch": 0.22, "grad_norm": 1.703724941352681, "learning_rate": 1.8238897568636197e-05, "loss": 0.6682, "step": 2124 }, { "epoch": 0.22, "grad_norm": 1.6735807021258513, "learning_rate": 1.8237029988671514e-05, "loss": 0.763, "step": 2125 }, { "epoch": 0.22, "grad_norm": 1.548686319328144, "learning_rate": 1.8235161514706036e-05, "loss": 0.7035, "step": 2126 }, { "epoch": 0.22, "grad_norm": 1.6797818763836223, "learning_rate": 1.823329214694256e-05, "loss": 0.6695, "step": 2127 }, { "epoch": 0.22, "grad_norm": 1.6404203109199544, "learning_rate": 1.8231421885583972e-05, "loss": 0.8632, "step": 2128 }, { "epoch": 0.22, "grad_norm": 1.5372973402500991, "learning_rate": 1.822955073083327e-05, "loss": 0.7308, "step": 2129 }, { "epoch": 0.22, "grad_norm": 1.6315504556007998, "learning_rate": 1.822767868289353e-05, "loss": 0.8769, "step": 2130 }, { "epoch": 0.22, "grad_norm": 1.6043084151034726, "learning_rate": 1.8225805741967934e-05, "loss": 0.7148, "step": 2131 }, { "epoch": 0.22, "grad_norm": 1.5401015017426092, "learning_rate": 1.822393190825976e-05, "loss": 0.7348, "step": 2132 }, { "epoch": 0.22, "grad_norm": 1.4478526089039574, "learning_rate": 1.8222057181972386e-05, "loss": 0.7506, "step": 2133 }, { "epoch": 0.22, "grad_norm": 1.7731832083632584, "learning_rate": 1.8220181563309284e-05, "loss": 0.7261, "step": 2134 }, { "epoch": 0.22, "grad_norm": 1.510479871553798, "learning_rate": 1.8218305052474025e-05, "loss": 0.6962, "step": 2135 }, { "epoch": 0.22, "grad_norm": 1.6077436722899787, "learning_rate": 1.821642764967027e-05, "loss": 0.7813, "step": 2136 }, { "epoch": 0.22, "grad_norm": 1.6144631090233805, "learning_rate": 1.8214549355101786e-05, "loss": 0.7784, "step": 2137 }, { "epoch": 0.22, "grad_norm": 1.9735813719971183, "learning_rate": 1.8212670168972428e-05, "loss": 0.7713, "step": 2138 }, { "epoch": 0.22, "grad_norm": 1.6638275430587353, "learning_rate": 1.8210790091486156e-05, "loss": 0.7142, "step": 2139 }, { "epoch": 0.22, "grad_norm": 1.5424127528689335, "learning_rate": 1.8208909122847024e-05, "loss": 0.7102, "step": 2140 }, { "epoch": 0.22, "grad_norm": 1.597958992848855, "learning_rate": 1.8207027263259176e-05, "loss": 0.8374, "step": 2141 }, { "epoch": 0.22, "grad_norm": 1.565074333708335, "learning_rate": 1.8205144512926866e-05, "loss": 0.754, "step": 2142 }, { "epoch": 0.22, "grad_norm": 1.5897706230321191, "learning_rate": 1.8203260872054432e-05, "loss": 0.8083, "step": 2143 }, { "epoch": 0.22, "grad_norm": 1.4999155604503212, "learning_rate": 1.8201376340846315e-05, "loss": 0.6513, "step": 2144 }, { "epoch": 0.22, "grad_norm": 1.4601575616164244, "learning_rate": 1.819949091950705e-05, "loss": 0.6396, "step": 2145 }, { "epoch": 0.22, "grad_norm": 1.5653866484142276, "learning_rate": 1.8197604608241272e-05, "loss": 0.6518, "step": 2146 }, { "epoch": 0.22, "grad_norm": 1.6287065297840895, "learning_rate": 1.819571740725371e-05, "loss": 0.7207, "step": 2147 }, { "epoch": 0.22, "grad_norm": 1.5469126251614622, "learning_rate": 1.819382931674919e-05, "loss": 0.6479, "step": 2148 }, { "epoch": 0.22, "grad_norm": 1.6232718164950395, "learning_rate": 1.8191940336932638e-05, "loss": 0.8188, "step": 2149 }, { "epoch": 0.22, "grad_norm": 1.8874968240656473, "learning_rate": 1.8190050468009065e-05, "loss": 0.8426, "step": 2150 }, { "epoch": 0.22, "grad_norm": 1.6375878574874134, "learning_rate": 1.8188159710183595e-05, "loss": 0.831, "step": 2151 }, { "epoch": 0.22, "grad_norm": 1.6805671634638826, "learning_rate": 1.8186268063661432e-05, "loss": 0.7414, "step": 2152 }, { "epoch": 0.22, "grad_norm": 1.6881636311339634, "learning_rate": 1.8184375528647896e-05, "loss": 0.785, "step": 2153 }, { "epoch": 0.22, "grad_norm": 1.5103174375177986, "learning_rate": 1.8182482105348383e-05, "loss": 0.789, "step": 2154 }, { "epoch": 0.22, "grad_norm": 1.5687022363735545, "learning_rate": 1.8180587793968398e-05, "loss": 0.7227, "step": 2155 }, { "epoch": 0.22, "grad_norm": 1.6928307729934189, "learning_rate": 1.817869259471354e-05, "loss": 0.7008, "step": 2156 }, { "epoch": 0.22, "grad_norm": 1.2934833517593187, "learning_rate": 1.8176796507789497e-05, "loss": 0.6371, "step": 2157 }, { "epoch": 0.22, "grad_norm": 1.5592241220325302, "learning_rate": 1.8174899533402066e-05, "loss": 0.7327, "step": 2158 }, { "epoch": 0.22, "grad_norm": 1.8484178493317083, "learning_rate": 1.8173001671757127e-05, "loss": 0.8249, "step": 2159 }, { "epoch": 0.22, "grad_norm": 1.688186929197883, "learning_rate": 1.8171102923060675e-05, "loss": 0.8771, "step": 2160 }, { "epoch": 0.22, "grad_norm": 1.5226220759645985, "learning_rate": 1.8169203287518778e-05, "loss": 0.7016, "step": 2161 }, { "epoch": 0.22, "grad_norm": 1.5748867957539676, "learning_rate": 1.816730276533762e-05, "loss": 0.7682, "step": 2162 }, { "epoch": 0.22, "grad_norm": 1.5209777167596974, "learning_rate": 1.8165401356723467e-05, "loss": 0.7089, "step": 2163 }, { "epoch": 0.22, "grad_norm": 1.5028113310786382, "learning_rate": 1.816349906188269e-05, "loss": 0.6731, "step": 2164 }, { "epoch": 0.22, "grad_norm": 1.572176249001475, "learning_rate": 1.8161595881021756e-05, "loss": 0.8238, "step": 2165 }, { "epoch": 0.22, "grad_norm": 1.5882665731586676, "learning_rate": 1.8159691814347217e-05, "loss": 0.8786, "step": 2166 }, { "epoch": 0.22, "grad_norm": 1.5656827837258642, "learning_rate": 1.8157786862065735e-05, "loss": 0.7759, "step": 2167 }, { "epoch": 0.22, "grad_norm": 1.4514364376988993, "learning_rate": 1.8155881024384064e-05, "loss": 0.835, "step": 2168 }, { "epoch": 0.22, "grad_norm": 1.588171207975576, "learning_rate": 1.8153974301509053e-05, "loss": 0.7176, "step": 2169 }, { "epoch": 0.22, "grad_norm": 1.7084577912367054, "learning_rate": 1.815206669364764e-05, "loss": 0.8163, "step": 2170 }, { "epoch": 0.22, "grad_norm": 1.5668159027685062, "learning_rate": 1.8150158201006878e-05, "loss": 0.8995, "step": 2171 }, { "epoch": 0.22, "grad_norm": 1.8305587750700543, "learning_rate": 1.8148248823793895e-05, "loss": 0.7976, "step": 2172 }, { "epoch": 0.22, "grad_norm": 1.5635711829004972, "learning_rate": 1.8146338562215927e-05, "loss": 0.8354, "step": 2173 }, { "epoch": 0.22, "grad_norm": 1.569805517173065, "learning_rate": 1.81444274164803e-05, "loss": 0.8663, "step": 2174 }, { "epoch": 0.22, "grad_norm": 1.583628255647632, "learning_rate": 1.8142515386794443e-05, "loss": 0.7238, "step": 2175 }, { "epoch": 0.22, "grad_norm": 1.5298895392029468, "learning_rate": 1.814060247336588e-05, "loss": 0.7605, "step": 2176 }, { "epoch": 0.22, "grad_norm": 1.6327355633469391, "learning_rate": 1.8138688676402212e-05, "loss": 0.7277, "step": 2177 }, { "epoch": 0.22, "grad_norm": 1.4512353475385125, "learning_rate": 1.8136773996111175e-05, "loss": 0.6717, "step": 2178 }, { "epoch": 0.22, "grad_norm": 1.7808283545662742, "learning_rate": 1.813485843270056e-05, "loss": 0.8072, "step": 2179 }, { "epoch": 0.22, "grad_norm": 1.6894019878441329, "learning_rate": 1.8132941986378276e-05, "loss": 0.6918, "step": 2180 }, { "epoch": 0.22, "grad_norm": 1.2985355768151867, "learning_rate": 1.8131024657352328e-05, "loss": 0.7033, "step": 2181 }, { "epoch": 0.22, "grad_norm": 1.3873095945563672, "learning_rate": 1.8129106445830807e-05, "loss": 0.8286, "step": 2182 }, { "epoch": 0.22, "grad_norm": 1.7586578364971428, "learning_rate": 1.8127187352021908e-05, "loss": 0.8945, "step": 2183 }, { "epoch": 0.22, "grad_norm": 1.6220564789546164, "learning_rate": 1.8125267376133912e-05, "loss": 0.81, "step": 2184 }, { "epoch": 0.22, "grad_norm": 1.6101531248573264, "learning_rate": 1.812334651837521e-05, "loss": 0.7452, "step": 2185 }, { "epoch": 0.22, "grad_norm": 1.5883461039783242, "learning_rate": 1.8121424778954278e-05, "loss": 0.8675, "step": 2186 }, { "epoch": 0.22, "grad_norm": 1.4514916706150318, "learning_rate": 1.8119502158079693e-05, "loss": 0.7217, "step": 2187 }, { "epoch": 0.22, "grad_norm": 1.6195550361174957, "learning_rate": 1.8117578655960123e-05, "loss": 0.9525, "step": 2188 }, { "epoch": 0.22, "grad_norm": 1.5460650451901794, "learning_rate": 1.8115654272804333e-05, "loss": 0.6632, "step": 2189 }, { "epoch": 0.22, "grad_norm": 1.5250281746069403, "learning_rate": 1.811372900882119e-05, "loss": 0.6274, "step": 2190 }, { "epoch": 0.22, "grad_norm": 1.5412167993310468, "learning_rate": 1.8111802864219643e-05, "loss": 0.7328, "step": 2191 }, { "epoch": 0.22, "grad_norm": 1.637075226598729, "learning_rate": 1.8109875839208754e-05, "loss": 0.7426, "step": 2192 }, { "epoch": 0.22, "grad_norm": 1.6717734330324852, "learning_rate": 1.8107947933997663e-05, "loss": 0.8137, "step": 2193 }, { "epoch": 0.22, "grad_norm": 1.530759629670791, "learning_rate": 1.810601914879562e-05, "loss": 0.7175, "step": 2194 }, { "epoch": 0.22, "grad_norm": 1.526483843980927, "learning_rate": 1.8104089483811963e-05, "loss": 0.7751, "step": 2195 }, { "epoch": 0.22, "grad_norm": 1.5835310478088402, "learning_rate": 1.8102158939256122e-05, "loss": 0.7437, "step": 2196 }, { "epoch": 0.22, "grad_norm": 1.6780789997117656, "learning_rate": 1.8100227515337634e-05, "loss": 0.7519, "step": 2197 }, { "epoch": 0.22, "grad_norm": 1.6293693684949369, "learning_rate": 1.8098295212266123e-05, "loss": 0.7471, "step": 2198 }, { "epoch": 0.22, "grad_norm": 1.5963838193341875, "learning_rate": 1.8096362030251312e-05, "loss": 0.8278, "step": 2199 }, { "epoch": 0.22, "grad_norm": 1.641469573490994, "learning_rate": 1.8094427969503013e-05, "loss": 0.772, "step": 2200 }, { "epoch": 0.22, "grad_norm": 1.634557774448412, "learning_rate": 1.8092493030231142e-05, "loss": 0.8353, "step": 2201 }, { "epoch": 0.22, "grad_norm": 1.565183800023239, "learning_rate": 1.8090557212645702e-05, "loss": 0.7399, "step": 2202 }, { "epoch": 0.22, "grad_norm": 1.7881324029802745, "learning_rate": 1.8088620516956804e-05, "loss": 0.8626, "step": 2203 }, { "epoch": 0.22, "grad_norm": 1.4944277512903883, "learning_rate": 1.808668294337464e-05, "loss": 0.7774, "step": 2204 }, { "epoch": 0.22, "grad_norm": 1.527912053127469, "learning_rate": 1.8084744492109497e-05, "loss": 0.7545, "step": 2205 }, { "epoch": 0.22, "grad_norm": 1.592145236674976, "learning_rate": 1.8082805163371777e-05, "loss": 0.7721, "step": 2206 }, { "epoch": 0.22, "grad_norm": 1.6812339574309858, "learning_rate": 1.8080864957371958e-05, "loss": 0.7945, "step": 2207 }, { "epoch": 0.22, "grad_norm": 1.6211980361220193, "learning_rate": 1.8078923874320615e-05, "loss": 0.7518, "step": 2208 }, { "epoch": 0.22, "grad_norm": 1.614036868443832, "learning_rate": 1.8076981914428426e-05, "loss": 0.868, "step": 2209 }, { "epoch": 0.22, "grad_norm": 1.6792941191195823, "learning_rate": 1.8075039077906163e-05, "loss": 0.8067, "step": 2210 }, { "epoch": 0.22, "grad_norm": 1.5699987593174205, "learning_rate": 1.8073095364964682e-05, "loss": 0.7725, "step": 2211 }, { "epoch": 0.23, "grad_norm": 1.5710191725635028, "learning_rate": 1.8071150775814956e-05, "loss": 0.8535, "step": 2212 }, { "epoch": 0.23, "grad_norm": 1.6947804590122968, "learning_rate": 1.8069205310668026e-05, "loss": 0.7741, "step": 2213 }, { "epoch": 0.23, "grad_norm": 1.4980756469697138, "learning_rate": 1.8067258969735052e-05, "loss": 0.727, "step": 2214 }, { "epoch": 0.23, "grad_norm": 1.452475006388767, "learning_rate": 1.8065311753227272e-05, "loss": 0.6818, "step": 2215 }, { "epoch": 0.23, "grad_norm": 1.5605286651538655, "learning_rate": 1.8063363661356027e-05, "loss": 0.7793, "step": 2216 }, { "epoch": 0.23, "grad_norm": 1.6668571113325545, "learning_rate": 1.8061414694332756e-05, "loss": 0.6445, "step": 2217 }, { "epoch": 0.23, "grad_norm": 1.5436010718479045, "learning_rate": 1.8059464852368987e-05, "loss": 0.8266, "step": 2218 }, { "epoch": 0.23, "grad_norm": 1.5529862587308387, "learning_rate": 1.8057514135676344e-05, "loss": 0.6925, "step": 2219 }, { "epoch": 0.23, "grad_norm": 1.673046218654071, "learning_rate": 1.8055562544466544e-05, "loss": 0.7221, "step": 2220 }, { "epoch": 0.23, "grad_norm": 1.6071811351863372, "learning_rate": 1.8053610078951406e-05, "loss": 0.7382, "step": 2221 }, { "epoch": 0.23, "grad_norm": 1.697397817656138, "learning_rate": 1.805165673934284e-05, "loss": 0.8095, "step": 2222 }, { "epoch": 0.23, "grad_norm": 1.494326366333854, "learning_rate": 1.804970252585285e-05, "loss": 0.7678, "step": 2223 }, { "epoch": 0.23, "grad_norm": 1.460052000381731, "learning_rate": 1.804774743869353e-05, "loss": 0.6809, "step": 2224 }, { "epoch": 0.23, "grad_norm": 1.6463863753897041, "learning_rate": 1.804579147807708e-05, "loss": 0.9014, "step": 2225 }, { "epoch": 0.23, "grad_norm": 1.4916566202877668, "learning_rate": 1.8043834644215788e-05, "loss": 0.7232, "step": 2226 }, { "epoch": 0.23, "grad_norm": 1.4792196205038426, "learning_rate": 1.8041876937322037e-05, "loss": 0.7147, "step": 2227 }, { "epoch": 0.23, "grad_norm": 1.6681977826488052, "learning_rate": 1.8039918357608304e-05, "loss": 0.7517, "step": 2228 }, { "epoch": 0.23, "grad_norm": 1.6038026551810642, "learning_rate": 1.803795890528716e-05, "loss": 0.7602, "step": 2229 }, { "epoch": 0.23, "grad_norm": 1.6556802077399546, "learning_rate": 1.8035998580571282e-05, "loss": 0.6506, "step": 2230 }, { "epoch": 0.23, "grad_norm": 1.738570205110402, "learning_rate": 1.8034037383673428e-05, "loss": 0.7763, "step": 2231 }, { "epoch": 0.23, "grad_norm": 1.5473612742580263, "learning_rate": 1.803207531480645e-05, "loss": 0.6568, "step": 2232 }, { "epoch": 0.23, "grad_norm": 1.6939965516749653, "learning_rate": 1.8030112374183308e-05, "loss": 0.7322, "step": 2233 }, { "epoch": 0.23, "grad_norm": 1.528914442618967, "learning_rate": 1.802814856201704e-05, "loss": 0.7585, "step": 2234 }, { "epoch": 0.23, "grad_norm": 1.4757461250675585, "learning_rate": 1.8026183878520794e-05, "loss": 0.7902, "step": 2235 }, { "epoch": 0.23, "grad_norm": 1.5769224453410504, "learning_rate": 1.8024218323907807e-05, "loss": 0.6148, "step": 2236 }, { "epoch": 0.23, "grad_norm": 1.4609111499910967, "learning_rate": 1.8022251898391402e-05, "loss": 0.6959, "step": 2237 }, { "epoch": 0.23, "grad_norm": 1.434729618799995, "learning_rate": 1.8020284602185006e-05, "loss": 0.7305, "step": 2238 }, { "epoch": 0.23, "grad_norm": 1.4238232846508847, "learning_rate": 1.8018316435502143e-05, "loss": 0.6863, "step": 2239 }, { "epoch": 0.23, "grad_norm": 1.5818552847046934, "learning_rate": 1.8016347398556423e-05, "loss": 0.7345, "step": 2240 }, { "epoch": 0.23, "grad_norm": 1.7039010405467567, "learning_rate": 1.8014377491561553e-05, "loss": 0.7224, "step": 2241 }, { "epoch": 0.23, "grad_norm": 1.6340705954608221, "learning_rate": 1.8012406714731338e-05, "loss": 0.8247, "step": 2242 }, { "epoch": 0.23, "grad_norm": 1.5826030085527707, "learning_rate": 1.8010435068279673e-05, "loss": 0.8059, "step": 2243 }, { "epoch": 0.23, "grad_norm": 1.5847457915547827, "learning_rate": 1.800846255242055e-05, "loss": 0.7552, "step": 2244 }, { "epoch": 0.23, "grad_norm": 1.7361257789569242, "learning_rate": 1.800648916736806e-05, "loss": 0.7518, "step": 2245 }, { "epoch": 0.23, "grad_norm": 1.7586600117640085, "learning_rate": 1.8004514913336374e-05, "loss": 0.9412, "step": 2246 }, { "epoch": 0.23, "grad_norm": 1.6696786408496551, "learning_rate": 1.8002539790539772e-05, "loss": 0.8753, "step": 2247 }, { "epoch": 0.23, "grad_norm": 1.559155976771375, "learning_rate": 1.8000563799192622e-05, "loss": 0.7045, "step": 2248 }, { "epoch": 0.23, "grad_norm": 1.5477534172888618, "learning_rate": 1.7998586939509385e-05, "loss": 0.7251, "step": 2249 }, { "epoch": 0.23, "grad_norm": 1.7697877319548974, "learning_rate": 1.799660921170462e-05, "loss": 0.8147, "step": 2250 }, { "epoch": 0.23, "grad_norm": 1.629767247929377, "learning_rate": 1.799463061599298e-05, "loss": 0.9108, "step": 2251 }, { "epoch": 0.23, "grad_norm": 1.596274521142993, "learning_rate": 1.7992651152589205e-05, "loss": 0.7334, "step": 2252 }, { "epoch": 0.23, "grad_norm": 1.6516715835230245, "learning_rate": 1.799067082170814e-05, "loss": 0.7745, "step": 2253 }, { "epoch": 0.23, "grad_norm": 1.8505464121141373, "learning_rate": 1.798868962356472e-05, "loss": 0.7958, "step": 2254 }, { "epoch": 0.23, "grad_norm": 1.5896082359240096, "learning_rate": 1.798670755837397e-05, "loss": 0.7953, "step": 2255 }, { "epoch": 0.23, "grad_norm": 1.574272206265239, "learning_rate": 1.7984724626351008e-05, "loss": 0.8106, "step": 2256 }, { "epoch": 0.23, "grad_norm": 1.558125079934259, "learning_rate": 1.7982740827711058e-05, "loss": 0.6858, "step": 2257 }, { "epoch": 0.23, "grad_norm": 1.5459963520716786, "learning_rate": 1.7980756162669427e-05, "loss": 0.7438, "step": 2258 }, { "epoch": 0.23, "grad_norm": 1.5282081192747206, "learning_rate": 1.797877063144152e-05, "loss": 0.7825, "step": 2259 }, { "epoch": 0.23, "grad_norm": 1.523370901061267, "learning_rate": 1.7976784234242838e-05, "loss": 0.6955, "step": 2260 }, { "epoch": 0.23, "grad_norm": 1.5460023446118578, "learning_rate": 1.7974796971288966e-05, "loss": 0.7258, "step": 2261 }, { "epoch": 0.23, "grad_norm": 1.3836104999840484, "learning_rate": 1.7972808842795594e-05, "loss": 0.8183, "step": 2262 }, { "epoch": 0.23, "grad_norm": 1.6467488935328332, "learning_rate": 1.7970819848978503e-05, "loss": 0.7974, "step": 2263 }, { "epoch": 0.23, "grad_norm": 1.513920497223266, "learning_rate": 1.7968829990053572e-05, "loss": 0.7312, "step": 2264 }, { "epoch": 0.23, "grad_norm": 1.5416125278602086, "learning_rate": 1.796683926623676e-05, "loss": 0.8179, "step": 2265 }, { "epoch": 0.23, "grad_norm": 1.6108320417733, "learning_rate": 1.7964847677744136e-05, "loss": 0.719, "step": 2266 }, { "epoch": 0.23, "grad_norm": 1.5754808386958055, "learning_rate": 1.7962855224791852e-05, "loss": 0.7511, "step": 2267 }, { "epoch": 0.23, "grad_norm": 1.4482906664508932, "learning_rate": 1.7960861907596158e-05, "loss": 0.7618, "step": 2268 }, { "epoch": 0.23, "grad_norm": 1.5903384355437244, "learning_rate": 1.7958867726373393e-05, "loss": 0.7502, "step": 2269 }, { "epoch": 0.23, "grad_norm": 1.3129301228336305, "learning_rate": 1.7956872681340005e-05, "loss": 0.7442, "step": 2270 }, { "epoch": 0.23, "grad_norm": 1.60673569630705, "learning_rate": 1.795487677271252e-05, "loss": 0.7776, "step": 2271 }, { "epoch": 0.23, "grad_norm": 1.655070352685222, "learning_rate": 1.7952880000707558e-05, "loss": 0.6787, "step": 2272 }, { "epoch": 0.23, "grad_norm": 1.5340526168662216, "learning_rate": 1.795088236554184e-05, "loss": 0.7754, "step": 2273 }, { "epoch": 0.23, "grad_norm": 1.525057887506206, "learning_rate": 1.7948883867432184e-05, "loss": 0.8401, "step": 2274 }, { "epoch": 0.23, "grad_norm": 1.6525453707981543, "learning_rate": 1.7946884506595487e-05, "loss": 0.7541, "step": 2275 }, { "epoch": 0.23, "grad_norm": 1.6554115210034996, "learning_rate": 1.794488428324875e-05, "loss": 0.7034, "step": 2276 }, { "epoch": 0.23, "grad_norm": 1.4414860762539807, "learning_rate": 1.7942883197609074e-05, "loss": 0.6702, "step": 2277 }, { "epoch": 0.23, "grad_norm": 1.594305792624561, "learning_rate": 1.7940881249893636e-05, "loss": 0.7537, "step": 2278 }, { "epoch": 0.23, "grad_norm": 1.6752368175096224, "learning_rate": 1.7938878440319722e-05, "loss": 0.8536, "step": 2279 }, { "epoch": 0.23, "grad_norm": 1.3880591018903257, "learning_rate": 1.7936874769104703e-05, "loss": 0.5761, "step": 2280 }, { "epoch": 0.23, "grad_norm": 1.6830662990808718, "learning_rate": 1.7934870236466047e-05, "loss": 0.7947, "step": 2281 }, { "epoch": 0.23, "grad_norm": 1.5170592285262723, "learning_rate": 1.7932864842621312e-05, "loss": 0.7356, "step": 2282 }, { "epoch": 0.23, "grad_norm": 1.590286210609126, "learning_rate": 1.7930858587788156e-05, "loss": 0.7456, "step": 2283 }, { "epoch": 0.23, "grad_norm": 1.5899046321067252, "learning_rate": 1.7928851472184323e-05, "loss": 0.7471, "step": 2284 }, { "epoch": 0.23, "grad_norm": 1.830304910240319, "learning_rate": 1.792684349602766e-05, "loss": 0.8573, "step": 2285 }, { "epoch": 0.23, "grad_norm": 1.5360155713568722, "learning_rate": 1.7924834659536093e-05, "loss": 0.899, "step": 2286 }, { "epoch": 0.23, "grad_norm": 1.503268262966366, "learning_rate": 1.7922824962927657e-05, "loss": 0.808, "step": 2287 }, { "epoch": 0.23, "grad_norm": 1.6637303000309078, "learning_rate": 1.792081440642047e-05, "loss": 0.8694, "step": 2288 }, { "epoch": 0.23, "grad_norm": 1.4930490566181709, "learning_rate": 1.7918802990232748e-05, "loss": 0.7238, "step": 2289 }, { "epoch": 0.23, "grad_norm": 1.6580291703719623, "learning_rate": 1.7916790714582795e-05, "loss": 0.7738, "step": 2290 }, { "epoch": 0.23, "grad_norm": 1.5967606389216442, "learning_rate": 1.7914777579689017e-05, "loss": 0.7241, "step": 2291 }, { "epoch": 0.23, "grad_norm": 1.6330398542636713, "learning_rate": 1.7912763585769905e-05, "loss": 0.8487, "step": 2292 }, { "epoch": 0.23, "grad_norm": 1.4311002448815942, "learning_rate": 1.791074873304405e-05, "loss": 0.7219, "step": 2293 }, { "epoch": 0.23, "grad_norm": 1.738071137245182, "learning_rate": 1.790873302173013e-05, "loss": 0.7444, "step": 2294 }, { "epoch": 0.23, "grad_norm": 1.5850740120003257, "learning_rate": 1.7906716452046915e-05, "loss": 0.7367, "step": 2295 }, { "epoch": 0.23, "grad_norm": 1.5592420662588853, "learning_rate": 1.790469902421328e-05, "loss": 0.7313, "step": 2296 }, { "epoch": 0.23, "grad_norm": 1.4806536703150404, "learning_rate": 1.7902680738448178e-05, "loss": 0.8097, "step": 2297 }, { "epoch": 0.23, "grad_norm": 1.489296277259454, "learning_rate": 1.790066159497067e-05, "loss": 0.7146, "step": 2298 }, { "epoch": 0.23, "grad_norm": 1.558413346803114, "learning_rate": 1.7898641593999893e-05, "loss": 0.7599, "step": 2299 }, { "epoch": 0.23, "grad_norm": 1.4735137697579415, "learning_rate": 1.78966207357551e-05, "loss": 0.8103, "step": 2300 }, { "epoch": 0.23, "grad_norm": 1.4504295184011107, "learning_rate": 1.789459902045561e-05, "loss": 0.6457, "step": 2301 }, { "epoch": 0.23, "grad_norm": 1.4983891556182687, "learning_rate": 1.7892576448320854e-05, "loss": 0.7301, "step": 2302 }, { "epoch": 0.23, "grad_norm": 1.635941334049674, "learning_rate": 1.7890553019570353e-05, "loss": 0.7768, "step": 2303 }, { "epoch": 0.23, "grad_norm": 1.3864888565813458, "learning_rate": 1.7888528734423715e-05, "loss": 0.6055, "step": 2304 }, { "epoch": 0.23, "grad_norm": 1.4703550242509769, "learning_rate": 1.7886503593100646e-05, "loss": 0.7608, "step": 2305 }, { "epoch": 0.23, "grad_norm": 1.6246563516863506, "learning_rate": 1.788447759582094e-05, "loss": 0.7621, "step": 2306 }, { "epoch": 0.23, "grad_norm": 1.577237508115445, "learning_rate": 1.7882450742804493e-05, "loss": 0.7709, "step": 2307 }, { "epoch": 0.23, "grad_norm": 1.690658860404219, "learning_rate": 1.7880423034271287e-05, "loss": 0.7846, "step": 2308 }, { "epoch": 0.23, "grad_norm": 1.6931476627904156, "learning_rate": 1.7878394470441396e-05, "loss": 0.6558, "step": 2309 }, { "epoch": 0.23, "grad_norm": 1.6204463011198105, "learning_rate": 1.7876365051534987e-05, "loss": 0.8227, "step": 2310 }, { "epoch": 0.24, "grad_norm": 1.522142949127196, "learning_rate": 1.7874334777772326e-05, "loss": 0.7449, "step": 2311 }, { "epoch": 0.24, "grad_norm": 1.6461329249430225, "learning_rate": 1.7872303649373767e-05, "loss": 0.7682, "step": 2312 }, { "epoch": 0.24, "grad_norm": 1.5824110708824122, "learning_rate": 1.7870271666559755e-05, "loss": 0.7866, "step": 2313 }, { "epoch": 0.24, "grad_norm": 1.6400074273266572, "learning_rate": 1.786823882955083e-05, "loss": 0.8164, "step": 2314 }, { "epoch": 0.24, "grad_norm": 1.502158375278795, "learning_rate": 1.7866205138567626e-05, "loss": 0.7833, "step": 2315 }, { "epoch": 0.24, "grad_norm": 1.4295665349426534, "learning_rate": 1.7864170593830868e-05, "loss": 0.7662, "step": 2316 }, { "epoch": 0.24, "grad_norm": 1.516678854068183, "learning_rate": 1.7862135195561373e-05, "loss": 0.6889, "step": 2317 }, { "epoch": 0.24, "grad_norm": 1.546924044213888, "learning_rate": 1.7860098943980056e-05, "loss": 0.6845, "step": 2318 }, { "epoch": 0.24, "grad_norm": 1.5686659936839316, "learning_rate": 1.7858061839307913e-05, "loss": 0.7132, "step": 2319 }, { "epoch": 0.24, "grad_norm": 1.5550197845969858, "learning_rate": 1.7856023881766048e-05, "loss": 0.7091, "step": 2320 }, { "epoch": 0.24, "grad_norm": 1.529551763484397, "learning_rate": 1.785398507157564e-05, "loss": 0.7918, "step": 2321 }, { "epoch": 0.24, "grad_norm": 1.6061716867769542, "learning_rate": 1.7851945408957977e-05, "loss": 0.788, "step": 2322 }, { "epoch": 0.24, "grad_norm": 1.556076449285782, "learning_rate": 1.784990489413443e-05, "loss": 0.8446, "step": 2323 }, { "epoch": 0.24, "grad_norm": 1.6842789691197135, "learning_rate": 1.7847863527326468e-05, "loss": 0.7131, "step": 2324 }, { "epoch": 0.24, "grad_norm": 1.5732131853316018, "learning_rate": 1.7845821308755644e-05, "loss": 0.7275, "step": 2325 }, { "epoch": 0.24, "grad_norm": 1.6710459182156756, "learning_rate": 1.7843778238643615e-05, "loss": 0.7791, "step": 2326 }, { "epoch": 0.24, "grad_norm": 1.767202341467178, "learning_rate": 1.784173431721212e-05, "loss": 0.801, "step": 2327 }, { "epoch": 0.24, "grad_norm": 1.5185138048879634, "learning_rate": 1.7839689544682993e-05, "loss": 0.8229, "step": 2328 }, { "epoch": 0.24, "grad_norm": 1.6198534151063857, "learning_rate": 1.7837643921278168e-05, "loss": 0.763, "step": 2329 }, { "epoch": 0.24, "grad_norm": 1.3949632519487039, "learning_rate": 1.783559744721966e-05, "loss": 0.7574, "step": 2330 }, { "epoch": 0.24, "grad_norm": 1.6805417262534503, "learning_rate": 1.7833550122729587e-05, "loss": 0.728, "step": 2331 }, { "epoch": 0.24, "grad_norm": 1.5531944312041643, "learning_rate": 1.783150194803015e-05, "loss": 0.7335, "step": 2332 }, { "epoch": 0.24, "grad_norm": 1.7159353418847874, "learning_rate": 1.7829452923343648e-05, "loss": 0.632, "step": 2333 }, { "epoch": 0.24, "grad_norm": 1.491814756386104, "learning_rate": 1.782740304889247e-05, "loss": 0.7295, "step": 2334 }, { "epoch": 0.24, "grad_norm": 1.6304216530256852, "learning_rate": 1.7825352324899093e-05, "loss": 0.7432, "step": 2335 }, { "epoch": 0.24, "grad_norm": 1.5059743877915686, "learning_rate": 1.7823300751586103e-05, "loss": 0.8248, "step": 2336 }, { "epoch": 0.24, "grad_norm": 1.5372983039722445, "learning_rate": 1.7821248329176156e-05, "loss": 0.7279, "step": 2337 }, { "epoch": 0.24, "grad_norm": 1.634906198136157, "learning_rate": 1.7819195057892015e-05, "loss": 0.8099, "step": 2338 }, { "epoch": 0.24, "grad_norm": 1.395598233074499, "learning_rate": 1.781714093795653e-05, "loss": 0.7467, "step": 2339 }, { "epoch": 0.24, "grad_norm": 1.387677729120744, "learning_rate": 1.781508596959264e-05, "loss": 0.6657, "step": 2340 }, { "epoch": 0.24, "grad_norm": 1.665462662562675, "learning_rate": 1.7813030153023382e-05, "loss": 0.7382, "step": 2341 }, { "epoch": 0.24, "grad_norm": 1.4919226889245358, "learning_rate": 1.7810973488471882e-05, "loss": 0.8811, "step": 2342 }, { "epoch": 0.24, "grad_norm": 1.570893636073215, "learning_rate": 1.7808915976161364e-05, "loss": 0.6923, "step": 2343 }, { "epoch": 0.24, "grad_norm": 1.574635587846645, "learning_rate": 1.780685761631513e-05, "loss": 0.7827, "step": 2344 }, { "epoch": 0.24, "grad_norm": 1.6669870967013605, "learning_rate": 1.7804798409156592e-05, "loss": 0.8147, "step": 2345 }, { "epoch": 0.24, "grad_norm": 1.7019734330167111, "learning_rate": 1.7802738354909236e-05, "loss": 0.8692, "step": 2346 }, { "epoch": 0.24, "grad_norm": 1.5679652330468707, "learning_rate": 1.7800677453796656e-05, "loss": 0.7307, "step": 2347 }, { "epoch": 0.24, "grad_norm": 1.5464299583191818, "learning_rate": 1.7798615706042525e-05, "loss": 0.6894, "step": 2348 }, { "epoch": 0.24, "grad_norm": 1.5136573641374516, "learning_rate": 1.7796553111870616e-05, "loss": 0.7219, "step": 2349 }, { "epoch": 0.24, "grad_norm": 1.4193934300294413, "learning_rate": 1.7794489671504793e-05, "loss": 0.7493, "step": 2350 }, { "epoch": 0.24, "grad_norm": 1.4403542637862665, "learning_rate": 1.7792425385169006e-05, "loss": 0.7527, "step": 2351 }, { "epoch": 0.24, "grad_norm": 1.7784718114779616, "learning_rate": 1.7790360253087304e-05, "loss": 0.6761, "step": 2352 }, { "epoch": 0.24, "grad_norm": 1.5741525022384846, "learning_rate": 1.7788294275483826e-05, "loss": 0.7737, "step": 2353 }, { "epoch": 0.24, "grad_norm": 1.5300340210520655, "learning_rate": 1.77862274525828e-05, "loss": 0.7701, "step": 2354 }, { "epoch": 0.24, "grad_norm": 1.668962554632878, "learning_rate": 1.7784159784608543e-05, "loss": 0.7207, "step": 2355 }, { "epoch": 0.24, "grad_norm": 1.4913381646840573, "learning_rate": 1.7782091271785475e-05, "loss": 0.6585, "step": 2356 }, { "epoch": 0.24, "grad_norm": 1.6479277771557135, "learning_rate": 1.7780021914338097e-05, "loss": 0.7769, "step": 2357 }, { "epoch": 0.24, "grad_norm": 1.5844220072583104, "learning_rate": 1.7777951712491007e-05, "loss": 0.7385, "step": 2358 }, { "epoch": 0.24, "grad_norm": 1.5974427028035825, "learning_rate": 1.7775880666468893e-05, "loss": 0.7192, "step": 2359 }, { "epoch": 0.24, "grad_norm": 1.5697418827414715, "learning_rate": 1.777380877649653e-05, "loss": 0.7989, "step": 2360 }, { "epoch": 0.24, "grad_norm": 1.6440250115104977, "learning_rate": 1.7771736042798796e-05, "loss": 0.8351, "step": 2361 }, { "epoch": 0.24, "grad_norm": 1.641336175166559, "learning_rate": 1.7769662465600648e-05, "loss": 0.9197, "step": 2362 }, { "epoch": 0.24, "grad_norm": 1.4982313511845302, "learning_rate": 1.776758804512715e-05, "loss": 0.783, "step": 2363 }, { "epoch": 0.24, "grad_norm": 1.6681412333189058, "learning_rate": 1.7765512781603434e-05, "loss": 0.8278, "step": 2364 }, { "epoch": 0.24, "grad_norm": 1.5493644062862706, "learning_rate": 1.776343667525475e-05, "loss": 0.6787, "step": 2365 }, { "epoch": 0.24, "grad_norm": 1.6440060561290857, "learning_rate": 1.7761359726306416e-05, "loss": 0.682, "step": 2366 }, { "epoch": 0.24, "grad_norm": 1.6511456873028476, "learning_rate": 1.775928193498386e-05, "loss": 0.8085, "step": 2367 }, { "epoch": 0.24, "grad_norm": 1.6045551984799362, "learning_rate": 1.775720330151259e-05, "loss": 0.7526, "step": 2368 }, { "epoch": 0.24, "grad_norm": 1.5439942621819842, "learning_rate": 1.7755123826118215e-05, "loss": 0.8021, "step": 2369 }, { "epoch": 0.24, "grad_norm": 1.4369959230768674, "learning_rate": 1.7753043509026423e-05, "loss": 0.7637, "step": 2370 }, { "epoch": 0.24, "grad_norm": 1.6486282948075461, "learning_rate": 1.7750962350463005e-05, "loss": 0.8355, "step": 2371 }, { "epoch": 0.24, "grad_norm": 1.6284762608456098, "learning_rate": 1.774888035065383e-05, "loss": 0.8257, "step": 2372 }, { "epoch": 0.24, "grad_norm": 1.5193088564965311, "learning_rate": 1.7746797509824875e-05, "loss": 0.7028, "step": 2373 }, { "epoch": 0.24, "grad_norm": 1.4951644348597182, "learning_rate": 1.7744713828202197e-05, "loss": 0.7792, "step": 2374 }, { "epoch": 0.24, "grad_norm": 1.6373593823681687, "learning_rate": 1.7742629306011944e-05, "loss": 0.7483, "step": 2375 }, { "epoch": 0.24, "grad_norm": 1.5588424733699444, "learning_rate": 1.7740543943480366e-05, "loss": 0.7492, "step": 2376 }, { "epoch": 0.24, "grad_norm": 1.5793556324853542, "learning_rate": 1.7738457740833785e-05, "loss": 0.7173, "step": 2377 }, { "epoch": 0.24, "grad_norm": 1.5671484025185511, "learning_rate": 1.7736370698298637e-05, "loss": 0.7549, "step": 2378 }, { "epoch": 0.24, "grad_norm": 1.6090619943711126, "learning_rate": 1.773428281610143e-05, "loss": 0.8098, "step": 2379 }, { "epoch": 0.24, "grad_norm": 1.5578260997102495, "learning_rate": 1.7732194094468774e-05, "loss": 0.7443, "step": 2380 }, { "epoch": 0.24, "grad_norm": 1.4643681684258554, "learning_rate": 1.773010453362737e-05, "loss": 0.6952, "step": 2381 }, { "epoch": 0.24, "grad_norm": 1.6443043569457474, "learning_rate": 1.7728014133804004e-05, "loss": 0.7982, "step": 2382 }, { "epoch": 0.24, "grad_norm": 1.7237283774975798, "learning_rate": 1.7725922895225554e-05, "loss": 0.7161, "step": 2383 }, { "epoch": 0.24, "grad_norm": 1.5062277487135245, "learning_rate": 1.7723830818118997e-05, "loss": 0.8251, "step": 2384 }, { "epoch": 0.24, "grad_norm": 1.5078731457102554, "learning_rate": 1.772173790271139e-05, "loss": 0.6404, "step": 2385 }, { "epoch": 0.24, "grad_norm": 1.5321573853321941, "learning_rate": 1.7719644149229888e-05, "loss": 0.7963, "step": 2386 }, { "epoch": 0.24, "grad_norm": 1.4672533371132384, "learning_rate": 1.7717549557901735e-05, "loss": 0.6917, "step": 2387 }, { "epoch": 0.24, "grad_norm": 1.6271594580379487, "learning_rate": 1.7715454128954265e-05, "loss": 0.8336, "step": 2388 }, { "epoch": 0.24, "grad_norm": 1.5602583919191477, "learning_rate": 1.771335786261491e-05, "loss": 0.7374, "step": 2389 }, { "epoch": 0.24, "grad_norm": 1.5818657874167723, "learning_rate": 1.7711260759111176e-05, "loss": 0.828, "step": 2390 }, { "epoch": 0.24, "grad_norm": 1.558116204908089, "learning_rate": 1.7709162818670682e-05, "loss": 0.6835, "step": 2391 }, { "epoch": 0.24, "grad_norm": 1.7432452033050492, "learning_rate": 1.770706404152112e-05, "loss": 0.8202, "step": 2392 }, { "epoch": 0.24, "grad_norm": 1.5211509608944633, "learning_rate": 1.7704964427890283e-05, "loss": 0.7055, "step": 2393 }, { "epoch": 0.24, "grad_norm": 1.5952756302093576, "learning_rate": 1.770286397800605e-05, "loss": 0.6794, "step": 2394 }, { "epoch": 0.24, "grad_norm": 1.5888107698469187, "learning_rate": 1.770076269209639e-05, "loss": 0.7322, "step": 2395 }, { "epoch": 0.24, "grad_norm": 1.5430566750829608, "learning_rate": 1.7698660570389362e-05, "loss": 0.79, "step": 2396 }, { "epoch": 0.24, "grad_norm": 1.657367611936941, "learning_rate": 1.769655761311313e-05, "loss": 0.7225, "step": 2397 }, { "epoch": 0.24, "grad_norm": 1.5974172027320854, "learning_rate": 1.7694453820495926e-05, "loss": 0.8598, "step": 2398 }, { "epoch": 0.24, "grad_norm": 1.5034867091666928, "learning_rate": 1.769234919276609e-05, "loss": 0.7784, "step": 2399 }, { "epoch": 0.24, "grad_norm": 1.5627926366672371, "learning_rate": 1.7690243730152043e-05, "loss": 0.8084, "step": 2400 }, { "epoch": 0.24, "grad_norm": 1.5433255789976013, "learning_rate": 1.76881374328823e-05, "loss": 0.7259, "step": 2401 }, { "epoch": 0.24, "grad_norm": 1.4802198654414553, "learning_rate": 1.7686030301185467e-05, "loss": 0.7845, "step": 2402 }, { "epoch": 0.24, "grad_norm": 1.5150080798011463, "learning_rate": 1.768392233529024e-05, "loss": 0.8206, "step": 2403 }, { "epoch": 0.24, "grad_norm": 1.443068138380811, "learning_rate": 1.768181353542541e-05, "loss": 0.7574, "step": 2404 }, { "epoch": 0.24, "grad_norm": 1.6784448140480828, "learning_rate": 1.7679703901819846e-05, "loss": 0.7368, "step": 2405 }, { "epoch": 0.24, "grad_norm": 1.6090442224123616, "learning_rate": 1.7677593434702525e-05, "loss": 0.7393, "step": 2406 }, { "epoch": 0.24, "grad_norm": 1.5542583164937114, "learning_rate": 1.7675482134302503e-05, "loss": 0.8111, "step": 2407 }, { "epoch": 0.24, "grad_norm": 1.471717033418376, "learning_rate": 1.767337000084892e-05, "loss": 0.8439, "step": 2408 }, { "epoch": 0.25, "grad_norm": 1.7737988262489541, "learning_rate": 1.7671257034571027e-05, "loss": 0.726, "step": 2409 }, { "epoch": 0.25, "grad_norm": 1.5441726699663372, "learning_rate": 1.7669143235698147e-05, "loss": 0.7305, "step": 2410 }, { "epoch": 0.25, "grad_norm": 1.6664292775600074, "learning_rate": 1.7667028604459698e-05, "loss": 0.8451, "step": 2411 }, { "epoch": 0.25, "grad_norm": 1.6825398377815297, "learning_rate": 1.7664913141085192e-05, "loss": 0.8154, "step": 2412 }, { "epoch": 0.25, "grad_norm": 1.6625355468496055, "learning_rate": 1.7662796845804237e-05, "loss": 0.7677, "step": 2413 }, { "epoch": 0.25, "grad_norm": 1.6267473749888721, "learning_rate": 1.766067971884651e-05, "loss": 0.7411, "step": 2414 }, { "epoch": 0.25, "grad_norm": 1.5825823341562621, "learning_rate": 1.7658561760441803e-05, "loss": 0.7802, "step": 2415 }, { "epoch": 0.25, "grad_norm": 1.6240718740048647, "learning_rate": 1.765644297081998e-05, "loss": 0.7313, "step": 2416 }, { "epoch": 0.25, "grad_norm": 1.6696875876375654, "learning_rate": 1.7654323350211008e-05, "loss": 0.7813, "step": 2417 }, { "epoch": 0.25, "grad_norm": 1.5501823094293559, "learning_rate": 1.7652202898844935e-05, "loss": 0.7431, "step": 2418 }, { "epoch": 0.25, "grad_norm": 1.50758234605498, "learning_rate": 1.765008161695191e-05, "loss": 0.6745, "step": 2419 }, { "epoch": 0.25, "grad_norm": 1.4204801932106466, "learning_rate": 1.7647959504762155e-05, "loss": 0.6802, "step": 2420 }, { "epoch": 0.25, "grad_norm": 1.5743749498683806, "learning_rate": 1.7645836562506e-05, "loss": 0.7562, "step": 2421 }, { "epoch": 0.25, "grad_norm": 1.3687332132205343, "learning_rate": 1.7643712790413848e-05, "loss": 0.5944, "step": 2422 }, { "epoch": 0.25, "grad_norm": 1.7182075046051772, "learning_rate": 1.764158818871621e-05, "loss": 0.7734, "step": 2423 }, { "epoch": 0.25, "grad_norm": 1.5626744617095825, "learning_rate": 1.7639462757643672e-05, "loss": 0.75, "step": 2424 }, { "epoch": 0.25, "grad_norm": 1.5197050336323787, "learning_rate": 1.7637336497426925e-05, "loss": 0.7943, "step": 2425 }, { "epoch": 0.25, "grad_norm": 1.3619574222041446, "learning_rate": 1.7635209408296733e-05, "loss": 0.6312, "step": 2426 }, { "epoch": 0.25, "grad_norm": 1.6305415758528246, "learning_rate": 1.763308149048396e-05, "loss": 0.888, "step": 2427 }, { "epoch": 0.25, "grad_norm": 1.5546492520862543, "learning_rate": 1.763095274421956e-05, "loss": 0.8019, "step": 2428 }, { "epoch": 0.25, "grad_norm": 1.555215142648058, "learning_rate": 1.7628823169734573e-05, "loss": 0.7817, "step": 2429 }, { "epoch": 0.25, "grad_norm": 1.511727865036425, "learning_rate": 1.7626692767260136e-05, "loss": 0.7247, "step": 2430 }, { "epoch": 0.25, "grad_norm": 1.804382355917543, "learning_rate": 1.7624561537027465e-05, "loss": 0.8425, "step": 2431 }, { "epoch": 0.25, "grad_norm": 1.6648886739946853, "learning_rate": 1.7622429479267876e-05, "loss": 0.8356, "step": 2432 }, { "epoch": 0.25, "grad_norm": 1.7135054130486669, "learning_rate": 1.7620296594212766e-05, "loss": 0.7348, "step": 2433 }, { "epoch": 0.25, "grad_norm": 1.4794522885096486, "learning_rate": 1.761816288209363e-05, "loss": 0.7983, "step": 2434 }, { "epoch": 0.25, "grad_norm": 1.7887338748656285, "learning_rate": 1.7616028343142048e-05, "loss": 0.8899, "step": 2435 }, { "epoch": 0.25, "grad_norm": 1.630679636137063, "learning_rate": 1.7613892977589692e-05, "loss": 0.8243, "step": 2436 }, { "epoch": 0.25, "grad_norm": 1.625913634437561, "learning_rate": 1.7611756785668313e-05, "loss": 0.823, "step": 2437 }, { "epoch": 0.25, "grad_norm": 1.4996521582223123, "learning_rate": 1.7609619767609782e-05, "loss": 0.7152, "step": 2438 }, { "epoch": 0.25, "grad_norm": 1.6813915627504108, "learning_rate": 1.760748192364602e-05, "loss": 0.8351, "step": 2439 }, { "epoch": 0.25, "grad_norm": 1.563039082175078, "learning_rate": 1.7605343254009063e-05, "loss": 0.82, "step": 2440 }, { "epoch": 0.25, "grad_norm": 1.5668253334856597, "learning_rate": 1.7603203758931027e-05, "loss": 0.6734, "step": 2441 }, { "epoch": 0.25, "grad_norm": 1.64218612244895, "learning_rate": 1.760106343864413e-05, "loss": 0.7117, "step": 2442 }, { "epoch": 0.25, "grad_norm": 1.4172340689668361, "learning_rate": 1.759892229338066e-05, "loss": 0.7138, "step": 2443 }, { "epoch": 0.25, "grad_norm": 1.5161626935295773, "learning_rate": 1.759678032337301e-05, "loss": 0.7911, "step": 2444 }, { "epoch": 0.25, "grad_norm": 1.6555376454889454, "learning_rate": 1.7594637528853654e-05, "loss": 0.7448, "step": 2445 }, { "epoch": 0.25, "grad_norm": 1.6448450279141316, "learning_rate": 1.7592493910055165e-05, "loss": 0.8029, "step": 2446 }, { "epoch": 0.25, "grad_norm": 1.6811458558176975, "learning_rate": 1.759034946721019e-05, "loss": 0.8106, "step": 2447 }, { "epoch": 0.25, "grad_norm": 1.4864518399180122, "learning_rate": 1.7588204200551486e-05, "loss": 0.8088, "step": 2448 }, { "epoch": 0.25, "grad_norm": 1.685284783331612, "learning_rate": 1.7586058110311882e-05, "loss": 0.7551, "step": 2449 }, { "epoch": 0.25, "grad_norm": 1.7535255994342807, "learning_rate": 1.75839111967243e-05, "loss": 0.7497, "step": 2450 }, { "epoch": 0.25, "grad_norm": 1.519266239438577, "learning_rate": 1.7581763460021758e-05, "loss": 0.7114, "step": 2451 }, { "epoch": 0.25, "grad_norm": 1.5205905936797746, "learning_rate": 1.757961490043736e-05, "loss": 0.7277, "step": 2452 }, { "epoch": 0.25, "grad_norm": 1.4832968313005075, "learning_rate": 1.757746551820429e-05, "loss": 0.6313, "step": 2453 }, { "epoch": 0.25, "grad_norm": 1.5906751619604762, "learning_rate": 1.7575315313555844e-05, "loss": 0.7671, "step": 2454 }, { "epoch": 0.25, "grad_norm": 1.5306138592197545, "learning_rate": 1.757316428672538e-05, "loss": 0.8233, "step": 2455 }, { "epoch": 0.25, "grad_norm": 1.5420758259052774, "learning_rate": 1.7571012437946365e-05, "loss": 0.8207, "step": 2456 }, { "epoch": 0.25, "grad_norm": 1.4863632131027524, "learning_rate": 1.7568859767452347e-05, "loss": 0.6853, "step": 2457 }, { "epoch": 0.25, "grad_norm": 1.5257917166953734, "learning_rate": 1.7566706275476967e-05, "loss": 0.6817, "step": 2458 }, { "epoch": 0.25, "grad_norm": 1.4743887619029097, "learning_rate": 1.7564551962253952e-05, "loss": 0.7811, "step": 2459 }, { "epoch": 0.25, "grad_norm": 1.5472620487297755, "learning_rate": 1.756239682801711e-05, "loss": 0.7322, "step": 2460 }, { "epoch": 0.25, "grad_norm": 1.4691655852007381, "learning_rate": 1.7560240873000363e-05, "loss": 0.7562, "step": 2461 }, { "epoch": 0.25, "grad_norm": 1.4818530953336322, "learning_rate": 1.7558084097437697e-05, "loss": 0.6782, "step": 2462 }, { "epoch": 0.25, "grad_norm": 1.5238066160364558, "learning_rate": 1.7555926501563198e-05, "loss": 0.749, "step": 2463 }, { "epoch": 0.25, "grad_norm": 1.7467509201649687, "learning_rate": 1.7553768085611033e-05, "loss": 0.6228, "step": 2464 }, { "epoch": 0.25, "grad_norm": 1.6127320429072924, "learning_rate": 1.7551608849815473e-05, "loss": 0.796, "step": 2465 }, { "epoch": 0.25, "grad_norm": 1.4307340600762173, "learning_rate": 1.754944879441087e-05, "loss": 0.7548, "step": 2466 }, { "epoch": 0.25, "grad_norm": 1.7693917555197356, "learning_rate": 1.7547287919631655e-05, "loss": 0.8992, "step": 2467 }, { "epoch": 0.25, "grad_norm": 1.4706406131606666, "learning_rate": 1.7545126225712366e-05, "loss": 0.7496, "step": 2468 }, { "epoch": 0.25, "grad_norm": 1.6957896599223548, "learning_rate": 1.7542963712887618e-05, "loss": 0.6873, "step": 2469 }, { "epoch": 0.25, "grad_norm": 1.5260931443330856, "learning_rate": 1.7540800381392116e-05, "loss": 0.7522, "step": 2470 }, { "epoch": 0.25, "grad_norm": 1.543857764791894, "learning_rate": 1.753863623146066e-05, "loss": 0.6656, "step": 2471 }, { "epoch": 0.25, "grad_norm": 1.4427949153903654, "learning_rate": 1.753647126332813e-05, "loss": 0.6484, "step": 2472 }, { "epoch": 0.25, "grad_norm": 1.541434903268351, "learning_rate": 1.7534305477229502e-05, "loss": 0.6468, "step": 2473 }, { "epoch": 0.25, "grad_norm": 1.5649722223214777, "learning_rate": 1.7532138873399838e-05, "loss": 0.7066, "step": 2474 }, { "epoch": 0.25, "grad_norm": 1.453940354232897, "learning_rate": 1.7529971452074288e-05, "loss": 0.7706, "step": 2475 }, { "epoch": 0.25, "grad_norm": 1.4539802973508147, "learning_rate": 1.752780321348809e-05, "loss": 0.635, "step": 2476 }, { "epoch": 0.25, "grad_norm": 1.5904603944217826, "learning_rate": 1.752563415787658e-05, "loss": 0.8014, "step": 2477 }, { "epoch": 0.25, "grad_norm": 1.6679468593874847, "learning_rate": 1.752346428547517e-05, "loss": 0.8044, "step": 2478 }, { "epoch": 0.25, "grad_norm": 1.5745163149114132, "learning_rate": 1.7521293596519364e-05, "loss": 0.835, "step": 2479 }, { "epoch": 0.25, "grad_norm": 1.6925493416084263, "learning_rate": 1.7519122091244757e-05, "loss": 0.7898, "step": 2480 }, { "epoch": 0.25, "grad_norm": 1.511155784957144, "learning_rate": 1.7516949769887032e-05, "loss": 0.7317, "step": 2481 }, { "epoch": 0.25, "grad_norm": 1.6708420971092535, "learning_rate": 1.7514776632681964e-05, "loss": 0.8107, "step": 2482 }, { "epoch": 0.25, "grad_norm": 1.6091074751165642, "learning_rate": 1.751260267986541e-05, "loss": 0.7963, "step": 2483 }, { "epoch": 0.25, "grad_norm": 1.6040076390401958, "learning_rate": 1.751042791167332e-05, "loss": 0.7685, "step": 2484 }, { "epoch": 0.25, "grad_norm": 1.6669016961142142, "learning_rate": 1.7508252328341726e-05, "loss": 0.8528, "step": 2485 }, { "epoch": 0.25, "grad_norm": 1.5555218509357984, "learning_rate": 1.7506075930106757e-05, "loss": 0.7461, "step": 2486 }, { "epoch": 0.25, "grad_norm": 1.5085029479729102, "learning_rate": 1.750389871720463e-05, "loss": 0.8095, "step": 2487 }, { "epoch": 0.25, "grad_norm": 1.647440048436809, "learning_rate": 1.750172068987165e-05, "loss": 0.8776, "step": 2488 }, { "epoch": 0.25, "grad_norm": 1.5388843511885144, "learning_rate": 1.7499541848344198e-05, "loss": 0.7754, "step": 2489 }, { "epoch": 0.25, "grad_norm": 1.608332838142146, "learning_rate": 1.749736219285876e-05, "loss": 0.8276, "step": 2490 }, { "epoch": 0.25, "grad_norm": 1.4894710731185878, "learning_rate": 1.7495181723651898e-05, "loss": 0.7, "step": 2491 }, { "epoch": 0.25, "grad_norm": 1.4209838347335078, "learning_rate": 1.7493000440960277e-05, "loss": 0.6782, "step": 2492 }, { "epoch": 0.25, "grad_norm": 1.506093175873103, "learning_rate": 1.749081834502063e-05, "loss": 0.6989, "step": 2493 }, { "epoch": 0.25, "grad_norm": 1.5791699181274723, "learning_rate": 1.74886354360698e-05, "loss": 0.7106, "step": 2494 }, { "epoch": 0.25, "grad_norm": 1.5764451475291832, "learning_rate": 1.74864517143447e-05, "loss": 0.7963, "step": 2495 }, { "epoch": 0.25, "grad_norm": 1.5289101233823514, "learning_rate": 1.7484267180082343e-05, "loss": 0.7024, "step": 2496 }, { "epoch": 0.25, "grad_norm": 1.605164422817265, "learning_rate": 1.7482081833519822e-05, "loss": 0.847, "step": 2497 }, { "epoch": 0.25, "grad_norm": 1.730092743675462, "learning_rate": 1.7479895674894326e-05, "loss": 0.6953, "step": 2498 }, { "epoch": 0.25, "grad_norm": 1.6454716877649915, "learning_rate": 1.7477708704443125e-05, "loss": 0.7739, "step": 2499 }, { "epoch": 0.25, "grad_norm": 1.8375789056356429, "learning_rate": 1.7475520922403582e-05, "loss": 0.877, "step": 2500 }, { "epoch": 0.25, "grad_norm": 1.5146743080412426, "learning_rate": 1.7473332329013152e-05, "loss": 0.7364, "step": 2501 }, { "epoch": 0.25, "grad_norm": 1.4101525134673571, "learning_rate": 1.7471142924509365e-05, "loss": 0.6998, "step": 2502 }, { "epoch": 0.25, "grad_norm": 1.4413863461289211, "learning_rate": 1.7468952709129848e-05, "loss": 0.7836, "step": 2503 }, { "epoch": 0.25, "grad_norm": 1.4701548661669526, "learning_rate": 1.7466761683112316e-05, "loss": 0.6526, "step": 2504 }, { "epoch": 0.25, "grad_norm": 1.529401860380887, "learning_rate": 1.746456984669457e-05, "loss": 0.7717, "step": 2505 }, { "epoch": 0.25, "grad_norm": 1.3710047924443467, "learning_rate": 1.7462377200114503e-05, "loss": 0.6129, "step": 2506 }, { "epoch": 0.26, "grad_norm": 1.7241252121013668, "learning_rate": 1.7460183743610085e-05, "loss": 0.7578, "step": 2507 }, { "epoch": 0.26, "grad_norm": 1.6915304109956375, "learning_rate": 1.745798947741939e-05, "loss": 0.7747, "step": 2508 }, { "epoch": 0.26, "grad_norm": 1.5089746597984957, "learning_rate": 1.7455794401780565e-05, "loss": 0.7323, "step": 2509 }, { "epoch": 0.26, "grad_norm": 1.4422751178761635, "learning_rate": 1.745359851693185e-05, "loss": 0.7499, "step": 2510 }, { "epoch": 0.26, "grad_norm": 1.5399503613199372, "learning_rate": 1.7451401823111585e-05, "loss": 0.7636, "step": 2511 }, { "epoch": 0.26, "grad_norm": 1.606901545574598, "learning_rate": 1.7449204320558172e-05, "loss": 0.7686, "step": 2512 }, { "epoch": 0.26, "grad_norm": 1.5511141257902685, "learning_rate": 1.7447006009510127e-05, "loss": 0.6931, "step": 2513 }, { "epoch": 0.26, "grad_norm": 1.425351819376338, "learning_rate": 1.7444806890206036e-05, "loss": 0.7941, "step": 2514 }, { "epoch": 0.26, "grad_norm": 1.3726498678601882, "learning_rate": 1.7442606962884582e-05, "loss": 0.7392, "step": 2515 }, { "epoch": 0.26, "grad_norm": 1.430601606264957, "learning_rate": 1.744040622778453e-05, "loss": 0.6102, "step": 2516 }, { "epoch": 0.26, "grad_norm": 1.5338334677524086, "learning_rate": 1.7438204685144733e-05, "loss": 0.8332, "step": 2517 }, { "epoch": 0.26, "grad_norm": 1.5960453889328998, "learning_rate": 1.7436002335204144e-05, "loss": 0.8608, "step": 2518 }, { "epoch": 0.26, "grad_norm": 1.7945166823710936, "learning_rate": 1.7433799178201786e-05, "loss": 0.8269, "step": 2519 }, { "epoch": 0.26, "grad_norm": 1.630442107761875, "learning_rate": 1.743159521437678e-05, "loss": 0.8059, "step": 2520 }, { "epoch": 0.26, "grad_norm": 1.5696632337208958, "learning_rate": 1.7429390443968327e-05, "loss": 0.847, "step": 2521 }, { "epoch": 0.26, "grad_norm": 1.7247206084773075, "learning_rate": 1.742718486721573e-05, "loss": 0.868, "step": 2522 }, { "epoch": 0.26, "grad_norm": 1.6226798368416193, "learning_rate": 1.742497848435836e-05, "loss": 0.8623, "step": 2523 }, { "epoch": 0.26, "grad_norm": 1.5800909522421087, "learning_rate": 1.742277129563569e-05, "loss": 0.7399, "step": 2524 }, { "epoch": 0.26, "grad_norm": 2.6787372496634005, "learning_rate": 1.742056330128727e-05, "loss": 0.6171, "step": 2525 }, { "epoch": 0.26, "grad_norm": 1.6313396899700607, "learning_rate": 1.7418354501552755e-05, "loss": 0.8866, "step": 2526 }, { "epoch": 0.26, "grad_norm": 1.6581527447564648, "learning_rate": 1.7416144896671868e-05, "loss": 0.7926, "step": 2527 }, { "epoch": 0.26, "grad_norm": 1.4389725304207477, "learning_rate": 1.7413934486884424e-05, "loss": 0.751, "step": 2528 }, { "epoch": 0.26, "grad_norm": 1.626069977682484, "learning_rate": 1.7411723272430337e-05, "loss": 0.7008, "step": 2529 }, { "epoch": 0.26, "grad_norm": 1.5656126801302932, "learning_rate": 1.7409511253549592e-05, "loss": 0.7889, "step": 2530 }, { "epoch": 0.26, "grad_norm": 1.5549251618392606, "learning_rate": 1.7407298430482272e-05, "loss": 0.702, "step": 2531 }, { "epoch": 0.26, "grad_norm": 1.7298135822967657, "learning_rate": 1.7405084803468545e-05, "loss": 0.7869, "step": 2532 }, { "epoch": 0.26, "grad_norm": 1.5801692734885895, "learning_rate": 1.7402870372748667e-05, "loss": 0.7014, "step": 2533 }, { "epoch": 0.26, "grad_norm": 1.585716907263995, "learning_rate": 1.7400655138562977e-05, "loss": 0.7708, "step": 2534 }, { "epoch": 0.26, "grad_norm": 1.6201214371053156, "learning_rate": 1.7398439101151908e-05, "loss": 0.7475, "step": 2535 }, { "epoch": 0.26, "grad_norm": 1.5630473760032266, "learning_rate": 1.7396222260755967e-05, "loss": 0.7259, "step": 2536 }, { "epoch": 0.26, "grad_norm": 1.6177675004741257, "learning_rate": 1.739400461761577e-05, "loss": 0.8223, "step": 2537 }, { "epoch": 0.26, "grad_norm": 1.5956924130781553, "learning_rate": 1.7391786171971995e-05, "loss": 0.8004, "step": 2538 }, { "epoch": 0.26, "grad_norm": 1.548599637727936, "learning_rate": 1.738956692406543e-05, "loss": 0.7373, "step": 2539 }, { "epoch": 0.26, "grad_norm": 1.8006817203863534, "learning_rate": 1.7387346874136936e-05, "loss": 0.8084, "step": 2540 }, { "epoch": 0.26, "grad_norm": 1.5053069784925723, "learning_rate": 1.7385126022427462e-05, "loss": 0.7325, "step": 2541 }, { "epoch": 0.26, "grad_norm": 1.6085547890387042, "learning_rate": 1.738290436917805e-05, "loss": 0.8145, "step": 2542 }, { "epoch": 0.26, "grad_norm": 1.5814550489473806, "learning_rate": 1.738068191462982e-05, "loss": 0.7558, "step": 2543 }, { "epoch": 0.26, "grad_norm": 1.5549657373912378, "learning_rate": 1.7378458659023995e-05, "loss": 0.7423, "step": 2544 }, { "epoch": 0.26, "grad_norm": 1.5182984340635106, "learning_rate": 1.7376234602601862e-05, "loss": 0.7716, "step": 2545 }, { "epoch": 0.26, "grad_norm": 1.6084968643569708, "learning_rate": 1.737400974560482e-05, "loss": 0.8375, "step": 2546 }, { "epoch": 0.26, "grad_norm": 1.562666012399755, "learning_rate": 1.7371784088274336e-05, "loss": 0.7752, "step": 2547 }, { "epoch": 0.26, "grad_norm": 1.4105680133899585, "learning_rate": 1.7369557630851968e-05, "loss": 0.7848, "step": 2548 }, { "epoch": 0.26, "grad_norm": 1.6011744787383677, "learning_rate": 1.736733037357937e-05, "loss": 0.8372, "step": 2549 }, { "epoch": 0.26, "grad_norm": 1.5803871758323422, "learning_rate": 1.736510231669827e-05, "loss": 0.7333, "step": 2550 }, { "epoch": 0.26, "grad_norm": 1.5883478183700042, "learning_rate": 1.736287346045049e-05, "loss": 0.8632, "step": 2551 }, { "epoch": 0.26, "grad_norm": 1.6146855441975676, "learning_rate": 1.736064380507794e-05, "loss": 0.7427, "step": 2552 }, { "epoch": 0.26, "grad_norm": 1.8021113066776384, "learning_rate": 1.735841335082261e-05, "loss": 0.8252, "step": 2553 }, { "epoch": 0.26, "grad_norm": 1.5866143850955474, "learning_rate": 1.7356182097926586e-05, "loss": 0.7464, "step": 2554 }, { "epoch": 0.26, "grad_norm": 1.4130768308137258, "learning_rate": 1.7353950046632034e-05, "loss": 0.7745, "step": 2555 }, { "epoch": 0.26, "grad_norm": 1.707661990330429, "learning_rate": 1.7351717197181205e-05, "loss": 0.7891, "step": 2556 }, { "epoch": 0.26, "grad_norm": 1.7951534499962143, "learning_rate": 1.7349483549816442e-05, "loss": 0.7286, "step": 2557 }, { "epoch": 0.26, "grad_norm": 1.499878405477149, "learning_rate": 1.7347249104780174e-05, "loss": 0.6814, "step": 2558 }, { "epoch": 0.26, "grad_norm": 1.441729542410146, "learning_rate": 1.7345013862314915e-05, "loss": 0.7117, "step": 2559 }, { "epoch": 0.26, "grad_norm": 1.485469000960217, "learning_rate": 1.734277782266326e-05, "loss": 0.7257, "step": 2560 }, { "epoch": 0.26, "grad_norm": 1.6231370649812686, "learning_rate": 1.7340540986067907e-05, "loss": 0.8767, "step": 2561 }, { "epoch": 0.26, "grad_norm": 1.5556532622432997, "learning_rate": 1.733830335277162e-05, "loss": 0.6962, "step": 2562 }, { "epoch": 0.26, "grad_norm": 1.5465549577351825, "learning_rate": 1.733606492301726e-05, "loss": 0.6955, "step": 2563 }, { "epoch": 0.26, "grad_norm": 1.5457692336086464, "learning_rate": 1.7333825697047778e-05, "loss": 0.7819, "step": 2564 }, { "epoch": 0.26, "grad_norm": 1.5236657144163215, "learning_rate": 1.7331585675106205e-05, "loss": 0.8064, "step": 2565 }, { "epoch": 0.26, "grad_norm": 1.482979384172734, "learning_rate": 1.732934485743566e-05, "loss": 0.6854, "step": 2566 }, { "epoch": 0.26, "grad_norm": 1.5310665557094747, "learning_rate": 1.732710324427935e-05, "loss": 0.7453, "step": 2567 }, { "epoch": 0.26, "grad_norm": 1.5265915332766808, "learning_rate": 1.7324860835880564e-05, "loss": 0.7083, "step": 2568 }, { "epoch": 0.26, "grad_norm": 1.5640457528269724, "learning_rate": 1.7322617632482682e-05, "loss": 0.8216, "step": 2569 }, { "epoch": 0.26, "grad_norm": 1.5862358089141146, "learning_rate": 1.732037363432917e-05, "loss": 0.7527, "step": 2570 }, { "epoch": 0.26, "grad_norm": 1.5229343485201696, "learning_rate": 1.731812884166358e-05, "loss": 0.7896, "step": 2571 }, { "epoch": 0.26, "grad_norm": 1.4177542192456623, "learning_rate": 1.7315883254729545e-05, "loss": 0.7879, "step": 2572 }, { "epoch": 0.26, "grad_norm": 1.6326061490316859, "learning_rate": 1.731363687377079e-05, "loss": 0.7037, "step": 2573 }, { "epoch": 0.26, "grad_norm": 1.601629614231217, "learning_rate": 1.7311389699031123e-05, "loss": 0.6777, "step": 2574 }, { "epoch": 0.26, "grad_norm": 1.6520831590575529, "learning_rate": 1.7309141730754445e-05, "loss": 0.7052, "step": 2575 }, { "epoch": 0.26, "grad_norm": 1.619518747814848, "learning_rate": 1.730689296918473e-05, "loss": 0.7202, "step": 2576 }, { "epoch": 0.26, "grad_norm": 1.4296269965485597, "learning_rate": 1.7304643414566054e-05, "loss": 0.6845, "step": 2577 }, { "epoch": 0.26, "grad_norm": 1.489642595553068, "learning_rate": 1.7302393067142567e-05, "loss": 0.7439, "step": 2578 }, { "epoch": 0.26, "grad_norm": 1.5089579003609654, "learning_rate": 1.7300141927158506e-05, "loss": 0.6531, "step": 2579 }, { "epoch": 0.26, "grad_norm": 1.5543973690671014, "learning_rate": 1.7297889994858207e-05, "loss": 0.7204, "step": 2580 }, { "epoch": 0.26, "grad_norm": 1.5813435578709207, "learning_rate": 1.729563727048607e-05, "loss": 0.7328, "step": 2581 }, { "epoch": 0.26, "grad_norm": 1.79196216418814, "learning_rate": 1.7293383754286597e-05, "loss": 0.8422, "step": 2582 }, { "epoch": 0.26, "grad_norm": 1.4605108163184226, "learning_rate": 1.7291129446504376e-05, "loss": 0.7435, "step": 2583 }, { "epoch": 0.26, "grad_norm": 1.5836072342624712, "learning_rate": 1.7288874347384074e-05, "loss": 0.7172, "step": 2584 }, { "epoch": 0.26, "grad_norm": 1.5132036770627308, "learning_rate": 1.7286618457170445e-05, "loss": 0.733, "step": 2585 }, { "epoch": 0.26, "grad_norm": 1.4698388154169335, "learning_rate": 1.7284361776108334e-05, "loss": 0.7593, "step": 2586 }, { "epoch": 0.26, "grad_norm": 1.6601052501100197, "learning_rate": 1.7282104304442665e-05, "loss": 0.7853, "step": 2587 }, { "epoch": 0.26, "grad_norm": 1.6290132199377816, "learning_rate": 1.7279846042418457e-05, "loss": 0.8818, "step": 2588 }, { "epoch": 0.26, "grad_norm": 1.5030303854427125, "learning_rate": 1.72775869902808e-05, "loss": 0.7677, "step": 2589 }, { "epoch": 0.26, "grad_norm": 1.7115445257183575, "learning_rate": 1.727532714827489e-05, "loss": 0.7636, "step": 2590 }, { "epoch": 0.26, "grad_norm": 1.6349488084588633, "learning_rate": 1.727306651664599e-05, "loss": 0.85, "step": 2591 }, { "epoch": 0.26, "grad_norm": 1.5229765706901655, "learning_rate": 1.7270805095639453e-05, "loss": 0.7667, "step": 2592 }, { "epoch": 0.26, "grad_norm": 1.4680567877934216, "learning_rate": 1.726854288550073e-05, "loss": 0.741, "step": 2593 }, { "epoch": 0.26, "grad_norm": 1.5454570479760592, "learning_rate": 1.726627988647534e-05, "loss": 0.7619, "step": 2594 }, { "epoch": 0.26, "grad_norm": 1.4331477716947236, "learning_rate": 1.7264016098808904e-05, "loss": 0.7499, "step": 2595 }, { "epoch": 0.26, "grad_norm": 1.6029816212533754, "learning_rate": 1.7261751522747116e-05, "loss": 0.735, "step": 2596 }, { "epoch": 0.26, "grad_norm": 1.6261320064664497, "learning_rate": 1.725948615853576e-05, "loss": 0.8171, "step": 2597 }, { "epoch": 0.26, "grad_norm": 1.5537449429626073, "learning_rate": 1.725722000642071e-05, "loss": 0.7344, "step": 2598 }, { "epoch": 0.26, "grad_norm": 1.7498220500226265, "learning_rate": 1.7254953066647915e-05, "loss": 0.8478, "step": 2599 }, { "epoch": 0.26, "grad_norm": 1.4349222353392714, "learning_rate": 1.7252685339463423e-05, "loss": 0.7856, "step": 2600 }, { "epoch": 0.26, "grad_norm": 1.5870205146425946, "learning_rate": 1.7250416825113355e-05, "loss": 0.7084, "step": 2601 }, { "epoch": 0.26, "grad_norm": 1.5105277145464684, "learning_rate": 1.7248147523843925e-05, "loss": 0.7371, "step": 2602 }, { "epoch": 0.26, "grad_norm": 1.5539051554492644, "learning_rate": 1.7245877435901428e-05, "loss": 0.7413, "step": 2603 }, { "epoch": 0.26, "grad_norm": 1.5027853678248158, "learning_rate": 1.7243606561532247e-05, "loss": 0.8116, "step": 2604 }, { "epoch": 0.26, "grad_norm": 1.6730095843999155, "learning_rate": 1.7241334900982854e-05, "loss": 0.7399, "step": 2605 }, { "epoch": 0.27, "grad_norm": 1.582166574687787, "learning_rate": 1.72390624544998e-05, "loss": 0.7094, "step": 2606 }, { "epoch": 0.27, "grad_norm": 1.6502380774099639, "learning_rate": 1.723678922232972e-05, "loss": 0.7626, "step": 2607 }, { "epoch": 0.27, "grad_norm": 1.666325386956416, "learning_rate": 1.7234515204719342e-05, "loss": 0.7526, "step": 2608 }, { "epoch": 0.27, "grad_norm": 1.5806104358843491, "learning_rate": 1.7232240401915473e-05, "loss": 0.7783, "step": 2609 }, { "epoch": 0.27, "grad_norm": 1.5524286203165254, "learning_rate": 1.722996481416501e-05, "loss": 0.7713, "step": 2610 }, { "epoch": 0.27, "grad_norm": 1.5354166257297275, "learning_rate": 1.722768844171493e-05, "loss": 0.7797, "step": 2611 }, { "epoch": 0.27, "grad_norm": 1.648540577003657, "learning_rate": 1.7225411284812294e-05, "loss": 0.8315, "step": 2612 }, { "epoch": 0.27, "grad_norm": 1.4579288968385267, "learning_rate": 1.722313334370426e-05, "loss": 0.8666, "step": 2613 }, { "epoch": 0.27, "grad_norm": 1.4702550601601378, "learning_rate": 1.722085461863806e-05, "loss": 0.6591, "step": 2614 }, { "epoch": 0.27, "grad_norm": 1.6201922996801192, "learning_rate": 1.721857510986101e-05, "loss": 0.7371, "step": 2615 }, { "epoch": 0.27, "grad_norm": 1.5990364634319834, "learning_rate": 1.7216294817620518e-05, "loss": 0.8072, "step": 2616 }, { "epoch": 0.27, "grad_norm": 1.5437191298534145, "learning_rate": 1.7214013742164072e-05, "loss": 0.846, "step": 2617 }, { "epoch": 0.27, "grad_norm": 1.5671733942399428, "learning_rate": 1.7211731883739254e-05, "loss": 0.7216, "step": 2618 }, { "epoch": 0.27, "grad_norm": 1.5207270159074848, "learning_rate": 1.7209449242593715e-05, "loss": 0.8302, "step": 2619 }, { "epoch": 0.27, "grad_norm": 1.4966851389973288, "learning_rate": 1.7207165818975206e-05, "loss": 0.7077, "step": 2620 }, { "epoch": 0.27, "grad_norm": 1.6020631870179807, "learning_rate": 1.720488161313155e-05, "loss": 0.7177, "step": 2621 }, { "epoch": 0.27, "grad_norm": 1.6527642932203361, "learning_rate": 1.720259662531067e-05, "loss": 0.8524, "step": 2622 }, { "epoch": 0.27, "grad_norm": 1.5292848541794821, "learning_rate": 1.7200310855760564e-05, "loss": 0.6932, "step": 2623 }, { "epoch": 0.27, "grad_norm": 1.5040037411224556, "learning_rate": 1.7198024304729314e-05, "loss": 0.7989, "step": 2624 }, { "epoch": 0.27, "grad_norm": 1.6330275027530194, "learning_rate": 1.7195736972465087e-05, "loss": 0.7362, "step": 2625 }, { "epoch": 0.27, "grad_norm": 1.4973184520025868, "learning_rate": 1.7193448859216146e-05, "loss": 0.7976, "step": 2626 }, { "epoch": 0.27, "grad_norm": 1.5210898000562127, "learning_rate": 1.719115996523082e-05, "loss": 0.8209, "step": 2627 }, { "epoch": 0.27, "grad_norm": 2.823776645172069, "learning_rate": 1.7188870290757536e-05, "loss": 0.6101, "step": 2628 }, { "epoch": 0.27, "grad_norm": 1.3714876037564456, "learning_rate": 1.7186579836044804e-05, "loss": 0.7237, "step": 2629 }, { "epoch": 0.27, "grad_norm": 1.5517520331930001, "learning_rate": 1.718428860134122e-05, "loss": 0.7674, "step": 2630 }, { "epoch": 0.27, "grad_norm": 1.5261112683689515, "learning_rate": 1.7181996586895456e-05, "loss": 0.7371, "step": 2631 }, { "epoch": 0.27, "grad_norm": 1.546182807578598, "learning_rate": 1.7179703792956276e-05, "loss": 0.6897, "step": 2632 }, { "epoch": 0.27, "grad_norm": 1.6211807619267422, "learning_rate": 1.7177410219772527e-05, "loss": 0.7503, "step": 2633 }, { "epoch": 0.27, "grad_norm": 1.6640469296407736, "learning_rate": 1.7175115867593143e-05, "loss": 0.8524, "step": 2634 }, { "epoch": 0.27, "grad_norm": 1.5480571031288723, "learning_rate": 1.7172820736667133e-05, "loss": 0.7468, "step": 2635 }, { "epoch": 0.27, "grad_norm": 1.685157331961948, "learning_rate": 1.7170524827243608e-05, "loss": 0.7039, "step": 2636 }, { "epoch": 0.27, "grad_norm": 1.5492580721226499, "learning_rate": 1.7168228139571744e-05, "loss": 0.8109, "step": 2637 }, { "epoch": 0.27, "grad_norm": 1.4887599314971207, "learning_rate": 1.7165930673900812e-05, "loss": 0.7612, "step": 2638 }, { "epoch": 0.27, "grad_norm": 1.6865716116863976, "learning_rate": 1.7163632430480172e-05, "loss": 0.7688, "step": 2639 }, { "epoch": 0.27, "grad_norm": 1.6204207493635825, "learning_rate": 1.7161333409559256e-05, "loss": 0.7999, "step": 2640 }, { "epoch": 0.27, "grad_norm": 1.5023197048656975, "learning_rate": 1.715903361138759e-05, "loss": 0.7147, "step": 2641 }, { "epoch": 0.27, "grad_norm": 1.4427185908059887, "learning_rate": 1.715673303621478e-05, "loss": 0.6728, "step": 2642 }, { "epoch": 0.27, "grad_norm": 1.4589691465012895, "learning_rate": 1.715443168429052e-05, "loss": 0.8163, "step": 2643 }, { "epoch": 0.27, "grad_norm": 1.622314886789072, "learning_rate": 1.7152129555864583e-05, "loss": 0.7668, "step": 2644 }, { "epoch": 0.27, "grad_norm": 1.5875141396662342, "learning_rate": 1.7149826651186828e-05, "loss": 0.7138, "step": 2645 }, { "epoch": 0.27, "grad_norm": 1.5819705613379111, "learning_rate": 1.71475229705072e-05, "loss": 0.7547, "step": 2646 }, { "epoch": 0.27, "grad_norm": 1.5286856655549894, "learning_rate": 1.714521851407573e-05, "loss": 0.6983, "step": 2647 }, { "epoch": 0.27, "grad_norm": 1.6680424585241245, "learning_rate": 1.7142913282142528e-05, "loss": 0.8304, "step": 2648 }, { "epoch": 0.27, "grad_norm": 1.3949753968844358, "learning_rate": 1.7140607274957793e-05, "loss": 0.7176, "step": 2649 }, { "epoch": 0.27, "grad_norm": 1.7513081690313692, "learning_rate": 1.7138300492771803e-05, "loss": 0.7674, "step": 2650 }, { "epoch": 0.27, "grad_norm": 1.5134706541956857, "learning_rate": 1.7135992935834927e-05, "loss": 0.6783, "step": 2651 }, { "epoch": 0.27, "grad_norm": 1.749163817172862, "learning_rate": 1.7133684604397613e-05, "loss": 0.7608, "step": 2652 }, { "epoch": 0.27, "grad_norm": 1.4252020954515696, "learning_rate": 1.713137549871039e-05, "loss": 0.764, "step": 2653 }, { "epoch": 0.27, "grad_norm": 1.6358995582805185, "learning_rate": 1.712906561902388e-05, "loss": 0.7249, "step": 2654 }, { "epoch": 0.27, "grad_norm": 1.535740650980999, "learning_rate": 1.7126754965588785e-05, "loss": 0.6888, "step": 2655 }, { "epoch": 0.27, "grad_norm": 1.2716763178954538, "learning_rate": 1.7124443538655887e-05, "loss": 0.6396, "step": 2656 }, { "epoch": 0.27, "grad_norm": 1.5575463059511079, "learning_rate": 1.7122131338476058e-05, "loss": 0.6889, "step": 2657 }, { "epoch": 0.27, "grad_norm": 1.5997207855607138, "learning_rate": 1.7119818365300246e-05, "loss": 0.735, "step": 2658 }, { "epoch": 0.27, "grad_norm": 1.65220121537614, "learning_rate": 1.7117504619379498e-05, "loss": 0.7919, "step": 2659 }, { "epoch": 0.27, "grad_norm": 1.5541586302274732, "learning_rate": 1.7115190100964926e-05, "loss": 0.7166, "step": 2660 }, { "epoch": 0.27, "grad_norm": 1.4452623372732751, "learning_rate": 1.7112874810307736e-05, "loss": 0.68, "step": 2661 }, { "epoch": 0.27, "grad_norm": 1.5159176357495028, "learning_rate": 1.711055874765922e-05, "loss": 0.8157, "step": 2662 }, { "epoch": 0.27, "grad_norm": 1.5363560553715614, "learning_rate": 1.710824191327075e-05, "loss": 0.7492, "step": 2663 }, { "epoch": 0.27, "grad_norm": 1.7141688792271914, "learning_rate": 1.7105924307393776e-05, "loss": 0.7959, "step": 2664 }, { "epoch": 0.27, "grad_norm": 1.6963032092016188, "learning_rate": 1.7103605930279847e-05, "loss": 0.789, "step": 2665 }, { "epoch": 0.27, "grad_norm": 1.570563090841888, "learning_rate": 1.7101286782180585e-05, "loss": 0.7145, "step": 2666 }, { "epoch": 0.27, "grad_norm": 1.5015102151992468, "learning_rate": 1.7098966863347687e-05, "loss": 0.7126, "step": 2667 }, { "epoch": 0.27, "grad_norm": 1.523283582923896, "learning_rate": 1.7096646174032955e-05, "loss": 0.7493, "step": 2668 }, { "epoch": 0.27, "grad_norm": 1.6155367999750465, "learning_rate": 1.7094324714488258e-05, "loss": 0.7459, "step": 2669 }, { "epoch": 0.27, "grad_norm": 1.4925118200717544, "learning_rate": 1.709200248496556e-05, "loss": 0.8062, "step": 2670 }, { "epoch": 0.27, "grad_norm": 1.4778170137928905, "learning_rate": 1.7089679485716897e-05, "loss": 0.7196, "step": 2671 }, { "epoch": 0.27, "grad_norm": 1.4904397859832723, "learning_rate": 1.7087355716994398e-05, "loss": 0.6981, "step": 2672 }, { "epoch": 0.27, "grad_norm": 1.7317550031169902, "learning_rate": 1.7085031179050268e-05, "loss": 0.7211, "step": 2673 }, { "epoch": 0.27, "grad_norm": 1.489607713671086, "learning_rate": 1.7082705872136797e-05, "loss": 0.7409, "step": 2674 }, { "epoch": 0.27, "grad_norm": 1.516574391694995, "learning_rate": 1.708037979650637e-05, "loss": 0.7761, "step": 2675 }, { "epoch": 0.27, "grad_norm": 1.6355128601376654, "learning_rate": 1.707805295241144e-05, "loss": 0.8915, "step": 2676 }, { "epoch": 0.27, "grad_norm": 1.5304123605760016, "learning_rate": 1.707572534010455e-05, "loss": 0.849, "step": 2677 }, { "epoch": 0.27, "grad_norm": 1.5575421176033872, "learning_rate": 1.7073396959838326e-05, "loss": 0.724, "step": 2678 }, { "epoch": 0.27, "grad_norm": 1.696531813138858, "learning_rate": 1.7071067811865477e-05, "loss": 0.7578, "step": 2679 }, { "epoch": 0.27, "grad_norm": 1.6790816568746896, "learning_rate": 1.7068737896438796e-05, "loss": 0.8616, "step": 2680 }, { "epoch": 0.27, "grad_norm": 1.7031711209522635, "learning_rate": 1.706640721381116e-05, "loss": 0.7563, "step": 2681 }, { "epoch": 0.27, "grad_norm": 1.4519377943836558, "learning_rate": 1.7064075764235527e-05, "loss": 0.6649, "step": 2682 }, { "epoch": 0.27, "grad_norm": 1.6649570253386337, "learning_rate": 1.706174354796494e-05, "loss": 0.8154, "step": 2683 }, { "epoch": 0.27, "grad_norm": 1.620028413852496, "learning_rate": 1.7059410565252525e-05, "loss": 0.7565, "step": 2684 }, { "epoch": 0.27, "grad_norm": 1.355628318623081, "learning_rate": 1.7057076816351487e-05, "loss": 0.5766, "step": 2685 }, { "epoch": 0.27, "grad_norm": 1.4905400260550263, "learning_rate": 1.7054742301515123e-05, "loss": 0.6522, "step": 2686 }, { "epoch": 0.27, "grad_norm": 1.6314227432654287, "learning_rate": 1.7052407020996804e-05, "loss": 0.7687, "step": 2687 }, { "epoch": 0.27, "grad_norm": 1.6353452003075364, "learning_rate": 1.7050070975049993e-05, "loss": 0.7895, "step": 2688 }, { "epoch": 0.27, "grad_norm": 1.3951006709312348, "learning_rate": 1.7047734163928227e-05, "loss": 0.7829, "step": 2689 }, { "epoch": 0.27, "grad_norm": 1.6440774612884574, "learning_rate": 1.704539658788513e-05, "loss": 0.8283, "step": 2690 }, { "epoch": 0.27, "grad_norm": 1.4537194274268685, "learning_rate": 1.7043058247174414e-05, "loss": 0.639, "step": 2691 }, { "epoch": 0.27, "grad_norm": 1.6083947454220908, "learning_rate": 1.7040719142049864e-05, "loss": 0.6641, "step": 2692 }, { "epoch": 0.27, "grad_norm": 1.6615547545200424, "learning_rate": 1.7038379272765357e-05, "loss": 0.7293, "step": 2693 }, { "epoch": 0.27, "grad_norm": 1.5466180268091978, "learning_rate": 1.703603863957485e-05, "loss": 0.7146, "step": 2694 }, { "epoch": 0.27, "grad_norm": 1.5668440062466291, "learning_rate": 1.7033697242732376e-05, "loss": 0.8263, "step": 2695 }, { "epoch": 0.27, "grad_norm": 1.7239135169274935, "learning_rate": 1.7031355082492066e-05, "loss": 0.8717, "step": 2696 }, { "epoch": 0.27, "grad_norm": 1.484778336143205, "learning_rate": 1.7029012159108114e-05, "loss": 0.7309, "step": 2697 }, { "epoch": 0.27, "grad_norm": 1.550338169624176, "learning_rate": 1.7026668472834818e-05, "loss": 0.7148, "step": 2698 }, { "epoch": 0.27, "grad_norm": 1.4474445970065117, "learning_rate": 1.702432402392654e-05, "loss": 0.7862, "step": 2699 }, { "epoch": 0.27, "grad_norm": 1.61847983913502, "learning_rate": 1.7021978812637742e-05, "loss": 0.7613, "step": 2700 }, { "epoch": 0.27, "grad_norm": 1.5619155946150487, "learning_rate": 1.7019632839222954e-05, "loss": 0.8718, "step": 2701 }, { "epoch": 0.27, "grad_norm": 1.5338774499992636, "learning_rate": 1.7017286103936793e-05, "loss": 0.8118, "step": 2702 }, { "epoch": 0.27, "grad_norm": 1.3798929312784296, "learning_rate": 1.7014938607033967e-05, "loss": 0.7618, "step": 2703 }, { "epoch": 0.28, "grad_norm": 1.6002922978466065, "learning_rate": 1.7012590348769255e-05, "loss": 0.7193, "step": 2704 }, { "epoch": 0.28, "grad_norm": 1.4831134418629623, "learning_rate": 1.7010241329397524e-05, "loss": 0.7847, "step": 2705 }, { "epoch": 0.28, "grad_norm": 1.4187030989217948, "learning_rate": 1.700789154917373e-05, "loss": 0.6872, "step": 2706 }, { "epoch": 0.28, "grad_norm": 1.5502049567007963, "learning_rate": 1.7005541008352894e-05, "loss": 0.6629, "step": 2707 }, { "epoch": 0.28, "grad_norm": 1.562381202598881, "learning_rate": 1.700318970719014e-05, "loss": 0.7573, "step": 2708 }, { "epoch": 0.28, "grad_norm": 1.4838134273125427, "learning_rate": 1.7000837645940654e-05, "loss": 0.7006, "step": 2709 }, { "epoch": 0.28, "grad_norm": 1.562643798332321, "learning_rate": 1.6998484824859728e-05, "loss": 0.8275, "step": 2710 }, { "epoch": 0.28, "grad_norm": 1.4447304429418308, "learning_rate": 1.699613124420272e-05, "loss": 0.7452, "step": 2711 }, { "epoch": 0.28, "grad_norm": 1.4520248132390094, "learning_rate": 1.699377690422507e-05, "loss": 0.794, "step": 2712 }, { "epoch": 0.28, "grad_norm": 1.5650615558288783, "learning_rate": 1.6991421805182305e-05, "loss": 0.8639, "step": 2713 }, { "epoch": 0.28, "grad_norm": 1.5385647957238642, "learning_rate": 1.6989065947330038e-05, "loss": 0.7816, "step": 2714 }, { "epoch": 0.28, "grad_norm": 1.605033676576137, "learning_rate": 1.6986709330923964e-05, "loss": 0.7617, "step": 2715 }, { "epoch": 0.28, "grad_norm": 1.5254040652379102, "learning_rate": 1.6984351956219847e-05, "loss": 0.7744, "step": 2716 }, { "epoch": 0.28, "grad_norm": 1.5370041367141265, "learning_rate": 1.698199382347355e-05, "loss": 0.7817, "step": 2717 }, { "epoch": 0.28, "grad_norm": 1.5591871015222685, "learning_rate": 1.697963493294101e-05, "loss": 0.7608, "step": 2718 }, { "epoch": 0.28, "grad_norm": 1.5268100390967951, "learning_rate": 1.6977275284878243e-05, "loss": 0.7932, "step": 2719 }, { "epoch": 0.28, "grad_norm": 1.5310722382386455, "learning_rate": 1.697491487954136e-05, "loss": 0.7739, "step": 2720 }, { "epoch": 0.28, "grad_norm": 1.5020818896731902, "learning_rate": 1.6972553717186543e-05, "loss": 0.7654, "step": 2721 }, { "epoch": 0.28, "grad_norm": 1.6285379518225092, "learning_rate": 1.6970191798070056e-05, "loss": 0.8342, "step": 2722 }, { "epoch": 0.28, "grad_norm": 1.5214473900346936, "learning_rate": 1.6967829122448255e-05, "loss": 0.7637, "step": 2723 }, { "epoch": 0.28, "grad_norm": 1.6437973758314557, "learning_rate": 1.6965465690577562e-05, "loss": 0.6452, "step": 2724 }, { "epoch": 0.28, "grad_norm": 1.5907319901063022, "learning_rate": 1.69631015027145e-05, "loss": 0.7918, "step": 2725 }, { "epoch": 0.28, "grad_norm": 1.6153610345004459, "learning_rate": 1.696073655911566e-05, "loss": 0.6448, "step": 2726 }, { "epoch": 0.28, "grad_norm": 1.6265533051979708, "learning_rate": 1.695837086003772e-05, "loss": 0.7247, "step": 2727 }, { "epoch": 0.28, "grad_norm": 1.7536067425885253, "learning_rate": 1.695600440573744e-05, "loss": 0.8441, "step": 2728 }, { "epoch": 0.28, "grad_norm": 1.4307599485709788, "learning_rate": 1.6953637196471658e-05, "loss": 0.6191, "step": 2729 }, { "epoch": 0.28, "grad_norm": 1.369399874389617, "learning_rate": 1.6951269232497307e-05, "loss": 0.7959, "step": 2730 }, { "epoch": 0.28, "grad_norm": 1.5347249065403918, "learning_rate": 1.6948900514071383e-05, "loss": 0.7254, "step": 2731 }, { "epoch": 0.28, "grad_norm": 1.569237032596756, "learning_rate": 1.6946531041450976e-05, "loss": 0.843, "step": 2732 }, { "epoch": 0.28, "grad_norm": 1.4274347398074667, "learning_rate": 1.694416081489326e-05, "loss": 0.7344, "step": 2733 }, { "epoch": 0.28, "grad_norm": 1.5881966084737997, "learning_rate": 1.694178983465548e-05, "loss": 0.7619, "step": 2734 }, { "epoch": 0.28, "grad_norm": 1.4842518701692677, "learning_rate": 1.6939418100994972e-05, "loss": 0.6843, "step": 2735 }, { "epoch": 0.28, "grad_norm": 1.409863595614964, "learning_rate": 1.6937045614169147e-05, "loss": 0.6616, "step": 2736 }, { "epoch": 0.28, "grad_norm": 1.7392852185664112, "learning_rate": 1.6934672374435504e-05, "loss": 0.7943, "step": 2737 }, { "epoch": 0.28, "grad_norm": 1.6435124089413926, "learning_rate": 1.693229838205162e-05, "loss": 0.7713, "step": 2738 }, { "epoch": 0.28, "grad_norm": 1.4599316988101887, "learning_rate": 1.692992363727516e-05, "loss": 0.8101, "step": 2739 }, { "epoch": 0.28, "grad_norm": 1.489997692489692, "learning_rate": 1.6927548140363857e-05, "loss": 0.7271, "step": 2740 }, { "epoch": 0.28, "grad_norm": 1.62368929242668, "learning_rate": 1.692517189157554e-05, "loss": 0.8319, "step": 2741 }, { "epoch": 0.28, "grad_norm": 1.6038961839172665, "learning_rate": 1.6922794891168114e-05, "loss": 0.6931, "step": 2742 }, { "epoch": 0.28, "grad_norm": 1.6660621188481828, "learning_rate": 1.6920417139399558e-05, "loss": 0.7983, "step": 2743 }, { "epoch": 0.28, "grad_norm": 1.6005730767318995, "learning_rate": 1.6918038636527947e-05, "loss": 0.7519, "step": 2744 }, { "epoch": 0.28, "grad_norm": 1.5824965838797156, "learning_rate": 1.691565938281143e-05, "loss": 0.8057, "step": 2745 }, { "epoch": 0.28, "grad_norm": 1.6253194656473673, "learning_rate": 1.691327937850823e-05, "loss": 0.776, "step": 2746 }, { "epoch": 0.28, "grad_norm": 1.5283801314123524, "learning_rate": 1.691089862387667e-05, "loss": 0.6774, "step": 2747 }, { "epoch": 0.28, "grad_norm": 1.4364607171702077, "learning_rate": 1.690851711917514e-05, "loss": 0.7053, "step": 2748 }, { "epoch": 0.28, "grad_norm": 1.4031659843312712, "learning_rate": 1.690613486466211e-05, "loss": 0.7023, "step": 2749 }, { "epoch": 0.28, "grad_norm": 1.5841263671334511, "learning_rate": 1.6903751860596142e-05, "loss": 0.7127, "step": 2750 }, { "epoch": 0.28, "grad_norm": 1.6482604897965214, "learning_rate": 1.690136810723587e-05, "loss": 0.8546, "step": 2751 }, { "epoch": 0.28, "grad_norm": 1.4450313742005596, "learning_rate": 1.689898360484002e-05, "loss": 0.8229, "step": 2752 }, { "epoch": 0.28, "grad_norm": 1.6073793491502968, "learning_rate": 1.6896598353667384e-05, "loss": 0.7387, "step": 2753 }, { "epoch": 0.28, "grad_norm": 1.7321541961311, "learning_rate": 1.689421235397685e-05, "loss": 0.7184, "step": 2754 }, { "epoch": 0.28, "grad_norm": 1.595811929094905, "learning_rate": 1.6891825606027376e-05, "loss": 0.8199, "step": 2755 }, { "epoch": 0.28, "grad_norm": 1.653256316090929, "learning_rate": 1.688943811007801e-05, "loss": 0.7459, "step": 2756 }, { "epoch": 0.28, "grad_norm": 1.3943982750909278, "learning_rate": 1.6887049866387874e-05, "loss": 0.7861, "step": 2757 }, { "epoch": 0.28, "grad_norm": 1.833818403056107, "learning_rate": 1.6884660875216178e-05, "loss": 0.8094, "step": 2758 }, { "epoch": 0.28, "grad_norm": 1.62709212438958, "learning_rate": 1.6882271136822204e-05, "loss": 0.8247, "step": 2759 }, { "epoch": 0.28, "grad_norm": 1.4804818862878864, "learning_rate": 1.687988065146533e-05, "loss": 0.7697, "step": 2760 }, { "epoch": 0.28, "grad_norm": 1.535185488000713, "learning_rate": 1.6877489419405e-05, "loss": 0.7532, "step": 2761 }, { "epoch": 0.28, "grad_norm": 1.4658145091412107, "learning_rate": 1.6875097440900746e-05, "loss": 0.7117, "step": 2762 }, { "epoch": 0.28, "grad_norm": 1.476554530620898, "learning_rate": 1.6872704716212178e-05, "loss": 0.696, "step": 2763 }, { "epoch": 0.28, "grad_norm": 1.4988545882011124, "learning_rate": 1.6870311245598992e-05, "loss": 0.8044, "step": 2764 }, { "epoch": 0.28, "grad_norm": 1.4127927069516595, "learning_rate": 1.6867917029320958e-05, "loss": 0.8519, "step": 2765 }, { "epoch": 0.28, "grad_norm": 1.3757450216152427, "learning_rate": 1.6865522067637932e-05, "loss": 0.6608, "step": 2766 }, { "epoch": 0.28, "grad_norm": 1.4416391583154726, "learning_rate": 1.686312636080985e-05, "loss": 0.6738, "step": 2767 }, { "epoch": 0.28, "grad_norm": 1.4930160707420024, "learning_rate": 1.686072990909673e-05, "loss": 0.7284, "step": 2768 }, { "epoch": 0.28, "grad_norm": 1.6919012095067527, "learning_rate": 1.6858332712758667e-05, "loss": 0.7883, "step": 2769 }, { "epoch": 0.28, "grad_norm": 1.4702667160247174, "learning_rate": 1.6855934772055843e-05, "loss": 0.7797, "step": 2770 }, { "epoch": 0.28, "grad_norm": 1.4828486258168854, "learning_rate": 1.6853536087248512e-05, "loss": 0.8487, "step": 2771 }, { "epoch": 0.28, "grad_norm": 1.6255330339109715, "learning_rate": 1.6851136658597014e-05, "loss": 0.7718, "step": 2772 }, { "epoch": 0.28, "grad_norm": 1.5772887860309583, "learning_rate": 1.6848736486361777e-05, "loss": 0.6948, "step": 2773 }, { "epoch": 0.28, "grad_norm": 1.7697523320924025, "learning_rate": 1.6846335570803293e-05, "loss": 0.8107, "step": 2774 }, { "epoch": 0.28, "grad_norm": 1.575249049219454, "learning_rate": 1.6843933912182148e-05, "loss": 0.7582, "step": 2775 }, { "epoch": 0.28, "grad_norm": 1.6035336350257257, "learning_rate": 1.6841531510759007e-05, "loss": 0.7807, "step": 2776 }, { "epoch": 0.28, "grad_norm": 1.5277565031061031, "learning_rate": 1.6839128366794603e-05, "loss": 0.8003, "step": 2777 }, { "epoch": 0.28, "grad_norm": 1.665939020688003, "learning_rate": 1.6836724480549773e-05, "loss": 0.7279, "step": 2778 }, { "epoch": 0.28, "grad_norm": 1.649265018826581, "learning_rate": 1.6834319852285413e-05, "loss": 0.7026, "step": 2779 }, { "epoch": 0.28, "grad_norm": 1.5585497816292266, "learning_rate": 1.6831914482262514e-05, "loss": 0.7922, "step": 2780 }, { "epoch": 0.28, "grad_norm": 1.5025288970479587, "learning_rate": 1.682950837074213e-05, "loss": 0.6813, "step": 2781 }, { "epoch": 0.28, "grad_norm": 1.703071819086007, "learning_rate": 1.6827101517985417e-05, "loss": 0.8524, "step": 2782 }, { "epoch": 0.28, "grad_norm": 1.483095775477807, "learning_rate": 1.6824693924253596e-05, "loss": 0.6962, "step": 2783 }, { "epoch": 0.28, "grad_norm": 1.48203059260217, "learning_rate": 1.682228558980798e-05, "loss": 0.768, "step": 2784 }, { "epoch": 0.28, "grad_norm": 1.6182378066924379, "learning_rate": 1.681987651490995e-05, "loss": 0.7338, "step": 2785 }, { "epoch": 0.28, "grad_norm": 1.5946567346322944, "learning_rate": 1.6817466699820976e-05, "loss": 0.7571, "step": 2786 }, { "epoch": 0.28, "grad_norm": 1.4595787577320416, "learning_rate": 1.6815056144802604e-05, "loss": 0.7257, "step": 2787 }, { "epoch": 0.28, "grad_norm": 1.7789185563151684, "learning_rate": 1.681264485011646e-05, "loss": 0.6834, "step": 2788 }, { "epoch": 0.28, "grad_norm": 1.5248080417971017, "learning_rate": 1.681023281602426e-05, "loss": 0.7149, "step": 2789 }, { "epoch": 0.28, "grad_norm": 1.425715654156925, "learning_rate": 1.6807820042787788e-05, "loss": 0.7185, "step": 2790 }, { "epoch": 0.28, "grad_norm": 1.4767606646263185, "learning_rate": 1.680540653066891e-05, "loss": 0.8509, "step": 2791 }, { "epoch": 0.28, "grad_norm": 1.694444133436615, "learning_rate": 1.6802992279929583e-05, "loss": 0.8359, "step": 2792 }, { "epoch": 0.28, "grad_norm": 1.6278746176394097, "learning_rate": 1.6800577290831825e-05, "loss": 0.698, "step": 2793 }, { "epoch": 0.28, "grad_norm": 1.4507274176318135, "learning_rate": 1.6798161563637754e-05, "loss": 0.689, "step": 2794 }, { "epoch": 0.28, "grad_norm": 1.4968155686337008, "learning_rate": 1.6795745098609555e-05, "loss": 0.6953, "step": 2795 }, { "epoch": 0.28, "grad_norm": 1.4824801508895227, "learning_rate": 1.6793327896009498e-05, "loss": 0.6898, "step": 2796 }, { "epoch": 0.28, "grad_norm": 1.6046740497702996, "learning_rate": 1.6790909956099935e-05, "loss": 0.6818, "step": 2797 }, { "epoch": 0.28, "grad_norm": 1.5459818527277547, "learning_rate": 1.6788491279143298e-05, "loss": 0.682, "step": 2798 }, { "epoch": 0.28, "grad_norm": 1.4972000594763024, "learning_rate": 1.6786071865402086e-05, "loss": 0.7576, "step": 2799 }, { "epoch": 0.28, "grad_norm": 1.560378258512186, "learning_rate": 1.6783651715138902e-05, "loss": 0.7076, "step": 2800 }, { "epoch": 0.28, "grad_norm": 1.6391118154486723, "learning_rate": 1.6781230828616404e-05, "loss": 0.8201, "step": 2801 }, { "epoch": 0.29, "grad_norm": 1.6916916979306973, "learning_rate": 1.6778809206097347e-05, "loss": 0.8081, "step": 2802 }, { "epoch": 0.29, "grad_norm": 1.6874205828232405, "learning_rate": 1.6776386847844563e-05, "loss": 0.7184, "step": 2803 }, { "epoch": 0.29, "grad_norm": 1.458018963147143, "learning_rate": 1.6773963754120952e-05, "loss": 0.7259, "step": 2804 }, { "epoch": 0.29, "grad_norm": 1.6070999871208667, "learning_rate": 1.6771539925189514e-05, "loss": 0.7739, "step": 2805 }, { "epoch": 0.29, "grad_norm": 1.666769768470216, "learning_rate": 1.6769115361313308e-05, "loss": 0.7305, "step": 2806 }, { "epoch": 0.29, "grad_norm": 1.513886999979369, "learning_rate": 1.6766690062755488e-05, "loss": 0.6507, "step": 2807 }, { "epoch": 0.29, "grad_norm": 1.5865848169441699, "learning_rate": 1.676426402977928e-05, "loss": 0.6846, "step": 2808 }, { "epoch": 0.29, "grad_norm": 1.5473956402439422, "learning_rate": 1.6761837262647994e-05, "loss": 0.7615, "step": 2809 }, { "epoch": 0.29, "grad_norm": 1.7061180332352512, "learning_rate": 1.6759409761625015e-05, "loss": 0.7268, "step": 2810 }, { "epoch": 0.29, "grad_norm": 1.5375806434832613, "learning_rate": 1.6756981526973813e-05, "loss": 0.7344, "step": 2811 }, { "epoch": 0.29, "grad_norm": 1.626081660308829, "learning_rate": 1.6754552558957936e-05, "loss": 0.7418, "step": 2812 }, { "epoch": 0.29, "grad_norm": 1.4755972980689747, "learning_rate": 1.6752122857841005e-05, "loss": 0.7499, "step": 2813 }, { "epoch": 0.29, "grad_norm": 1.6415653421485688, "learning_rate": 1.6749692423886727e-05, "loss": 0.7489, "step": 2814 }, { "epoch": 0.29, "grad_norm": 1.5273140977187238, "learning_rate": 1.6747261257358894e-05, "loss": 0.8002, "step": 2815 }, { "epoch": 0.29, "grad_norm": 1.5374915960526312, "learning_rate": 1.6744829358521368e-05, "loss": 0.7432, "step": 2816 }, { "epoch": 0.29, "grad_norm": 1.488772374315502, "learning_rate": 1.674239672763809e-05, "loss": 0.6842, "step": 2817 }, { "epoch": 0.29, "grad_norm": 1.4171710004641913, "learning_rate": 1.6739963364973084e-05, "loss": 0.6569, "step": 2818 }, { "epoch": 0.29, "grad_norm": 1.6203595465668792, "learning_rate": 1.673752927079046e-05, "loss": 0.7785, "step": 2819 }, { "epoch": 0.29, "grad_norm": 1.493613756350887, "learning_rate": 1.6735094445354395e-05, "loss": 0.8533, "step": 2820 }, { "epoch": 0.29, "grad_norm": 1.5122966135044509, "learning_rate": 1.6732658888929153e-05, "loss": 0.8206, "step": 2821 }, { "epoch": 0.29, "grad_norm": 1.4885803944048208, "learning_rate": 1.6730222601779075e-05, "loss": 0.6961, "step": 2822 }, { "epoch": 0.29, "grad_norm": 1.6084817399112654, "learning_rate": 1.6727785584168583e-05, "loss": 0.7868, "step": 2823 }, { "epoch": 0.29, "grad_norm": 1.5461636352471053, "learning_rate": 1.6725347836362178e-05, "loss": 0.6473, "step": 2824 }, { "epoch": 0.29, "grad_norm": 1.5190104114024647, "learning_rate": 1.6722909358624436e-05, "loss": 0.7424, "step": 2825 }, { "epoch": 0.29, "grad_norm": 1.5882905731886237, "learning_rate": 1.6720470151220018e-05, "loss": 0.7879, "step": 2826 }, { "epoch": 0.29, "grad_norm": 1.6363752670735936, "learning_rate": 1.671803021441366e-05, "loss": 0.7221, "step": 2827 }, { "epoch": 0.29, "grad_norm": 1.7099595353026695, "learning_rate": 1.6715589548470187e-05, "loss": 0.8382, "step": 2828 }, { "epoch": 0.29, "grad_norm": 1.6410804954346245, "learning_rate": 1.6713148153654483e-05, "loss": 0.8053, "step": 2829 }, { "epoch": 0.29, "grad_norm": 1.440973928004605, "learning_rate": 1.6710706030231533e-05, "loss": 0.733, "step": 2830 }, { "epoch": 0.29, "grad_norm": 1.45683336566356, "learning_rate": 1.670826317846638e-05, "loss": 0.7442, "step": 2831 }, { "epoch": 0.29, "grad_norm": 1.5222653629474279, "learning_rate": 1.6705819598624168e-05, "loss": 0.7067, "step": 2832 }, { "epoch": 0.29, "grad_norm": 1.5092845480116759, "learning_rate": 1.6703375290970107e-05, "loss": 0.6241, "step": 2833 }, { "epoch": 0.29, "grad_norm": 1.673406426838146, "learning_rate": 1.6700930255769486e-05, "loss": 0.9131, "step": 2834 }, { "epoch": 0.29, "grad_norm": 1.4722136796614742, "learning_rate": 1.6698484493287678e-05, "loss": 0.774, "step": 2835 }, { "epoch": 0.29, "grad_norm": 1.7138172095048119, "learning_rate": 1.6696038003790128e-05, "loss": 0.8088, "step": 2836 }, { "epoch": 0.29, "grad_norm": 1.4999575241202465, "learning_rate": 1.6693590787542372e-05, "loss": 0.8001, "step": 2837 }, { "epoch": 0.29, "grad_norm": 1.596426220710093, "learning_rate": 1.6691142844810013e-05, "loss": 0.7445, "step": 2838 }, { "epoch": 0.29, "grad_norm": 1.5321179361818746, "learning_rate": 1.668869417585873e-05, "loss": 0.7331, "step": 2839 }, { "epoch": 0.29, "grad_norm": 1.4175973021799633, "learning_rate": 1.6686244780954294e-05, "loss": 0.6561, "step": 2840 }, { "epoch": 0.29, "grad_norm": 1.6761302463380996, "learning_rate": 1.668379466036255e-05, "loss": 0.7373, "step": 2841 }, { "epoch": 0.29, "grad_norm": 1.6008508892660749, "learning_rate": 1.668134381434942e-05, "loss": 0.7636, "step": 2842 }, { "epoch": 0.29, "grad_norm": 1.7527217040594256, "learning_rate": 1.66788922431809e-05, "loss": 0.8217, "step": 2843 }, { "epoch": 0.29, "grad_norm": 1.49467108389566, "learning_rate": 1.6676439947123075e-05, "loss": 0.7853, "step": 2844 }, { "epoch": 0.29, "grad_norm": 1.6393797745455427, "learning_rate": 1.66739869264421e-05, "loss": 0.8308, "step": 2845 }, { "epoch": 0.29, "grad_norm": 1.4602628736605359, "learning_rate": 1.667153318140422e-05, "loss": 0.7054, "step": 2846 }, { "epoch": 0.29, "grad_norm": 1.579963462481767, "learning_rate": 1.666907871227574e-05, "loss": 0.8356, "step": 2847 }, { "epoch": 0.29, "grad_norm": 1.6868043052083224, "learning_rate": 1.6666623519323056e-05, "loss": 0.7385, "step": 2848 }, { "epoch": 0.29, "grad_norm": 1.5060864355496348, "learning_rate": 1.6664167602812646e-05, "loss": 0.7126, "step": 2849 }, { "epoch": 0.29, "grad_norm": 1.369964772802513, "learning_rate": 1.6661710963011057e-05, "loss": 0.7056, "step": 2850 }, { "epoch": 0.29, "grad_norm": 1.5409934582076479, "learning_rate": 1.665925360018492e-05, "loss": 0.7766, "step": 2851 }, { "epoch": 0.29, "grad_norm": 1.5099368755717504, "learning_rate": 1.6656795514600948e-05, "loss": 0.8421, "step": 2852 }, { "epoch": 0.29, "grad_norm": 1.4299528658124159, "learning_rate": 1.665433670652592e-05, "loss": 0.7769, "step": 2853 }, { "epoch": 0.29, "grad_norm": 1.519976430392634, "learning_rate": 1.665187717622671e-05, "loss": 0.7765, "step": 2854 }, { "epoch": 0.29, "grad_norm": 1.4572693694812913, "learning_rate": 1.6649416923970248e-05, "loss": 0.7355, "step": 2855 }, { "epoch": 0.29, "grad_norm": 1.630645817094188, "learning_rate": 1.664695595002357e-05, "loss": 0.7639, "step": 2856 }, { "epoch": 0.29, "grad_norm": 1.7757479522459132, "learning_rate": 1.6644494254653767e-05, "loss": 0.8451, "step": 2857 }, { "epoch": 0.29, "grad_norm": 1.558065617309748, "learning_rate": 1.664203183812802e-05, "loss": 0.774, "step": 2858 }, { "epoch": 0.29, "grad_norm": 1.5455113506164675, "learning_rate": 1.6639568700713587e-05, "loss": 0.7113, "step": 2859 }, { "epoch": 0.29, "grad_norm": 1.699674973561388, "learning_rate": 1.6637104842677807e-05, "loss": 0.6718, "step": 2860 }, { "epoch": 0.29, "grad_norm": 1.5690022382016071, "learning_rate": 1.6634640264288087e-05, "loss": 0.8394, "step": 2861 }, { "epoch": 0.29, "grad_norm": 1.5342184544437765, "learning_rate": 1.663217496581192e-05, "loss": 0.795, "step": 2862 }, { "epoch": 0.29, "grad_norm": 1.5959621667132664, "learning_rate": 1.6629708947516875e-05, "loss": 0.8409, "step": 2863 }, { "epoch": 0.29, "grad_norm": 1.3836758638315834, "learning_rate": 1.6627242209670606e-05, "loss": 0.7884, "step": 2864 }, { "epoch": 0.29, "grad_norm": 1.7082254637709893, "learning_rate": 1.6624774752540828e-05, "loss": 0.848, "step": 2865 }, { "epoch": 0.29, "grad_norm": 1.4559456057698992, "learning_rate": 1.6622306576395355e-05, "loss": 0.7191, "step": 2866 }, { "epoch": 0.29, "grad_norm": 1.491862156638078, "learning_rate": 1.661983768150206e-05, "loss": 0.779, "step": 2867 }, { "epoch": 0.29, "grad_norm": 1.58902897489258, "learning_rate": 1.661736806812891e-05, "loss": 0.7634, "step": 2868 }, { "epoch": 0.29, "grad_norm": 1.452695928938561, "learning_rate": 1.661489773654394e-05, "loss": 0.6678, "step": 2869 }, { "epoch": 0.29, "grad_norm": 1.4349696118715132, "learning_rate": 1.6612426687015266e-05, "loss": 0.7108, "step": 2870 }, { "epoch": 0.29, "grad_norm": 1.578562608016945, "learning_rate": 1.6609954919811078e-05, "loss": 0.7473, "step": 2871 }, { "epoch": 0.29, "grad_norm": 1.5552003153913527, "learning_rate": 1.6607482435199654e-05, "loss": 0.7015, "step": 2872 }, { "epoch": 0.29, "grad_norm": 1.5501887712427833, "learning_rate": 1.6605009233449344e-05, "loss": 0.8025, "step": 2873 }, { "epoch": 0.29, "grad_norm": 1.5822623419828576, "learning_rate": 1.6602535314828572e-05, "loss": 0.7478, "step": 2874 }, { "epoch": 0.29, "grad_norm": 1.563036489716803, "learning_rate": 1.6600060679605844e-05, "loss": 0.7182, "step": 2875 }, { "epoch": 0.29, "grad_norm": 1.5476161809098843, "learning_rate": 1.6597585328049736e-05, "loss": 0.6821, "step": 2876 }, { "epoch": 0.29, "grad_norm": 1.496579865043439, "learning_rate": 1.6595109260428924e-05, "loss": 0.6769, "step": 2877 }, { "epoch": 0.29, "grad_norm": 1.595038129995531, "learning_rate": 1.6592632477012132e-05, "loss": 0.8206, "step": 2878 }, { "epoch": 0.29, "grad_norm": 1.6403593358151038, "learning_rate": 1.6590154978068185e-05, "loss": 0.7059, "step": 2879 }, { "epoch": 0.29, "grad_norm": 1.4723084470416206, "learning_rate": 1.6587676763865972e-05, "loss": 0.7159, "step": 2880 }, { "epoch": 0.29, "grad_norm": 1.4455832157117119, "learning_rate": 1.6585197834674466e-05, "loss": 0.6425, "step": 2881 }, { "epoch": 0.29, "grad_norm": 1.559593603945744, "learning_rate": 1.6582718190762718e-05, "loss": 0.7431, "step": 2882 }, { "epoch": 0.29, "grad_norm": 1.6447593020281035, "learning_rate": 1.658023783239985e-05, "loss": 0.7389, "step": 2883 }, { "epoch": 0.29, "grad_norm": 1.7274509720157214, "learning_rate": 1.657775675985507e-05, "loss": 0.7268, "step": 2884 }, { "epoch": 0.29, "grad_norm": 1.6186783596171195, "learning_rate": 1.657527497339766e-05, "loss": 0.8189, "step": 2885 }, { "epoch": 0.29, "grad_norm": 1.6366378590676298, "learning_rate": 1.6572792473296972e-05, "loss": 0.7788, "step": 2886 }, { "epoch": 0.29, "grad_norm": 1.7420947209611584, "learning_rate": 1.6570309259822455e-05, "loss": 0.7549, "step": 2887 }, { "epoch": 0.29, "grad_norm": 1.723075188665114, "learning_rate": 1.656782533324361e-05, "loss": 0.8223, "step": 2888 }, { "epoch": 0.29, "grad_norm": 1.6003846657928615, "learning_rate": 1.6565340693830035e-05, "loss": 0.685, "step": 2889 }, { "epoch": 0.29, "grad_norm": 1.509000318762168, "learning_rate": 1.6562855341851402e-05, "loss": 0.6158, "step": 2890 }, { "epoch": 0.29, "grad_norm": 1.531501881671066, "learning_rate": 1.6560369277577454e-05, "loss": 0.7816, "step": 2891 }, { "epoch": 0.29, "grad_norm": 1.574341951527415, "learning_rate": 1.6557882501278012e-05, "loss": 0.8377, "step": 2892 }, { "epoch": 0.29, "grad_norm": 1.4853515813642724, "learning_rate": 1.6555395013222978e-05, "loss": 0.7056, "step": 2893 }, { "epoch": 0.29, "grad_norm": 1.4195032069063818, "learning_rate": 1.6552906813682324e-05, "loss": 0.6399, "step": 2894 }, { "epoch": 0.29, "grad_norm": 1.4676046827308649, "learning_rate": 1.655041790292612e-05, "loss": 0.7103, "step": 2895 }, { "epoch": 0.29, "grad_norm": 1.5535140253853619, "learning_rate": 1.6547928281224484e-05, "loss": 0.6995, "step": 2896 }, { "epoch": 0.29, "grad_norm": 1.4573585724428806, "learning_rate": 1.654543794884764e-05, "loss": 0.793, "step": 2897 }, { "epoch": 0.29, "grad_norm": 1.695730920554382, "learning_rate": 1.6542946906065854e-05, "loss": 0.7337, "step": 2898 }, { "epoch": 0.29, "grad_norm": 1.5311418599951014, "learning_rate": 1.654045515314951e-05, "loss": 0.8071, "step": 2899 }, { "epoch": 0.29, "grad_norm": 1.6182734204706768, "learning_rate": 1.653796269036904e-05, "loss": 0.8456, "step": 2900 }, { "epoch": 0.3, "grad_norm": 1.6533053637642925, "learning_rate": 1.653546951799496e-05, "loss": 0.8067, "step": 2901 }, { "epoch": 0.3, "grad_norm": 1.6011248753907634, "learning_rate": 1.6532975636297863e-05, "loss": 0.7606, "step": 2902 }, { "epoch": 0.3, "grad_norm": 1.7192000277990367, "learning_rate": 1.6530481045548426e-05, "loss": 0.8073, "step": 2903 }, { "epoch": 0.3, "grad_norm": 1.543114296940595, "learning_rate": 1.6527985746017405e-05, "loss": 0.7372, "step": 2904 }, { "epoch": 0.3, "grad_norm": 1.431147193220211, "learning_rate": 1.652548973797561e-05, "loss": 0.6621, "step": 2905 }, { "epoch": 0.3, "grad_norm": 1.4846835227313837, "learning_rate": 1.6522993021693955e-05, "loss": 0.6698, "step": 2906 }, { "epoch": 0.3, "grad_norm": 1.6667734247359371, "learning_rate": 1.6520495597443415e-05, "loss": 0.7361, "step": 2907 }, { "epoch": 0.3, "grad_norm": 1.531655652290317, "learning_rate": 1.6517997465495046e-05, "loss": 0.7556, "step": 2908 }, { "epoch": 0.3, "grad_norm": 1.6498541138511267, "learning_rate": 1.651549862611998e-05, "loss": 0.8484, "step": 2909 }, { "epoch": 0.3, "grad_norm": 1.4380001397040072, "learning_rate": 1.6512999079589435e-05, "loss": 0.8083, "step": 2910 }, { "epoch": 0.3, "grad_norm": 1.5522328190326016, "learning_rate": 1.6510498826174685e-05, "loss": 0.6948, "step": 2911 }, { "epoch": 0.3, "grad_norm": 1.4467565956725026, "learning_rate": 1.6507997866147107e-05, "loss": 0.644, "step": 2912 }, { "epoch": 0.3, "grad_norm": 1.5220169878385472, "learning_rate": 1.650549619977813e-05, "loss": 0.747, "step": 2913 }, { "epoch": 0.3, "grad_norm": 1.5977989579510004, "learning_rate": 1.6502993827339274e-05, "loss": 0.7749, "step": 2914 }, { "epoch": 0.3, "grad_norm": 1.4482793528657738, "learning_rate": 1.6500490749102137e-05, "loss": 0.7466, "step": 2915 }, { "epoch": 0.3, "grad_norm": 1.653687165964818, "learning_rate": 1.6497986965338383e-05, "loss": 0.7703, "step": 2916 }, { "epoch": 0.3, "grad_norm": 1.681617825976571, "learning_rate": 1.649548247631976e-05, "loss": 0.7186, "step": 2917 }, { "epoch": 0.3, "grad_norm": 1.6218352088541688, "learning_rate": 1.64929772823181e-05, "loss": 0.7309, "step": 2918 }, { "epoch": 0.3, "grad_norm": 1.6085825395587305, "learning_rate": 1.649047138360529e-05, "loss": 0.7282, "step": 2919 }, { "epoch": 0.3, "grad_norm": 1.5622700051177003, "learning_rate": 1.648796478045331e-05, "loss": 0.6535, "step": 2920 }, { "epoch": 0.3, "grad_norm": 1.664828775585605, "learning_rate": 1.648545747313421e-05, "loss": 0.7428, "step": 2921 }, { "epoch": 0.3, "grad_norm": 1.5657215537870217, "learning_rate": 1.648294946192013e-05, "loss": 0.9065, "step": 2922 }, { "epoch": 0.3, "grad_norm": 1.4834078087685076, "learning_rate": 1.648044074708326e-05, "loss": 0.6748, "step": 2923 }, { "epoch": 0.3, "grad_norm": 1.6883500257594792, "learning_rate": 1.6477931328895895e-05, "loss": 0.8581, "step": 2924 }, { "epoch": 0.3, "grad_norm": 1.6501700073000836, "learning_rate": 1.647542120763039e-05, "loss": 0.783, "step": 2925 }, { "epoch": 0.3, "grad_norm": 1.629511650923731, "learning_rate": 1.647291038355917e-05, "loss": 0.8634, "step": 2926 }, { "epoch": 0.3, "grad_norm": 1.5041014480365311, "learning_rate": 1.6470398856954758e-05, "loss": 0.7276, "step": 2927 }, { "epoch": 0.3, "grad_norm": 1.6518055797582596, "learning_rate": 1.6467886628089734e-05, "loss": 0.7761, "step": 2928 }, { "epoch": 0.3, "grad_norm": 1.3929239385366425, "learning_rate": 1.6465373697236762e-05, "loss": 0.6874, "step": 2929 }, { "epoch": 0.3, "grad_norm": 1.5387621505367852, "learning_rate": 1.6462860064668582e-05, "loss": 0.8216, "step": 2930 }, { "epoch": 0.3, "grad_norm": 1.7292060414509942, "learning_rate": 1.6460345730658015e-05, "loss": 0.7825, "step": 2931 }, { "epoch": 0.3, "grad_norm": 1.5574284883149774, "learning_rate": 1.6457830695477942e-05, "loss": 0.7583, "step": 2932 }, { "epoch": 0.3, "grad_norm": 1.7204118790035463, "learning_rate": 1.6455314959401337e-05, "loss": 0.7306, "step": 2933 }, { "epoch": 0.3, "grad_norm": 1.5051589890511696, "learning_rate": 1.6452798522701244e-05, "loss": 0.6778, "step": 2934 }, { "epoch": 0.3, "grad_norm": 1.6934088049746245, "learning_rate": 1.6450281385650783e-05, "loss": 0.7215, "step": 2935 }, { "epoch": 0.3, "grad_norm": 1.5681140824743331, "learning_rate": 1.6447763548523145e-05, "loss": 0.8215, "step": 2936 }, { "epoch": 0.3, "grad_norm": 1.410247781162616, "learning_rate": 1.644524501159161e-05, "loss": 0.6756, "step": 2937 }, { "epoch": 0.3, "grad_norm": 1.6823167629308104, "learning_rate": 1.644272577512952e-05, "loss": 0.7749, "step": 2938 }, { "epoch": 0.3, "grad_norm": 1.630703164194887, "learning_rate": 1.64402058394103e-05, "loss": 0.6726, "step": 2939 }, { "epoch": 0.3, "grad_norm": 1.446999359948708, "learning_rate": 1.643768520470745e-05, "loss": 0.6298, "step": 2940 }, { "epoch": 0.3, "grad_norm": 1.6801001999822267, "learning_rate": 1.6435163871294543e-05, "loss": 0.7354, "step": 2941 }, { "epoch": 0.3, "grad_norm": 1.5506128809974706, "learning_rate": 1.6432641839445234e-05, "loss": 0.7504, "step": 2942 }, { "epoch": 0.3, "grad_norm": 1.5430153520759022, "learning_rate": 1.6430119109433247e-05, "loss": 0.7373, "step": 2943 }, { "epoch": 0.3, "grad_norm": 1.463452113269399, "learning_rate": 1.6427595681532387e-05, "loss": 0.7108, "step": 2944 }, { "epoch": 0.3, "grad_norm": 1.4432730436194878, "learning_rate": 1.6425071556016533e-05, "loss": 0.8079, "step": 2945 }, { "epoch": 0.3, "grad_norm": 1.5186131828255842, "learning_rate": 1.642254673315964e-05, "loss": 0.7656, "step": 2946 }, { "epoch": 0.3, "grad_norm": 1.4850811230291339, "learning_rate": 1.6420021213235736e-05, "loss": 0.7535, "step": 2947 }, { "epoch": 0.3, "grad_norm": 1.5396786472697646, "learning_rate": 1.6417494996518925e-05, "loss": 0.7964, "step": 2948 }, { "epoch": 0.3, "grad_norm": 1.572772703812896, "learning_rate": 1.641496808328339e-05, "loss": 0.7796, "step": 2949 }, { "epoch": 0.3, "grad_norm": 1.621635568935317, "learning_rate": 1.641244047380339e-05, "loss": 0.7015, "step": 2950 }, { "epoch": 0.3, "grad_norm": 1.6564223885148655, "learning_rate": 1.640991216835326e-05, "loss": 0.7155, "step": 2951 }, { "epoch": 0.3, "grad_norm": 1.5100958840388738, "learning_rate": 1.64073831672074e-05, "loss": 0.785, "step": 2952 }, { "epoch": 0.3, "grad_norm": 1.6905292713911557, "learning_rate": 1.6404853470640295e-05, "loss": 0.7858, "step": 2953 }, { "epoch": 0.3, "grad_norm": 1.4028782823432704, "learning_rate": 1.640232307892651e-05, "loss": 0.7429, "step": 2954 }, { "epoch": 0.3, "grad_norm": 1.5817976361332047, "learning_rate": 1.6399791992340674e-05, "loss": 0.7154, "step": 2955 }, { "epoch": 0.3, "grad_norm": 1.5391480351313862, "learning_rate": 1.63972602111575e-05, "loss": 0.7061, "step": 2956 }, { "epoch": 0.3, "grad_norm": 1.6468298584133225, "learning_rate": 1.6394727735651772e-05, "loss": 0.8179, "step": 2957 }, { "epoch": 0.3, "grad_norm": 1.6514204198522735, "learning_rate": 1.6392194566098352e-05, "loss": 0.7793, "step": 2958 }, { "epoch": 0.3, "grad_norm": 1.5621916853219573, "learning_rate": 1.638966070277217e-05, "loss": 0.7411, "step": 2959 }, { "epoch": 0.3, "grad_norm": 1.5437305047711751, "learning_rate": 1.6387126145948248e-05, "loss": 0.8206, "step": 2960 }, { "epoch": 0.3, "grad_norm": 1.5740494144417758, "learning_rate": 1.638459089590166e-05, "loss": 0.6844, "step": 2961 }, { "epoch": 0.3, "grad_norm": 1.4071516788183378, "learning_rate": 1.638205495290758e-05, "loss": 0.7201, "step": 2962 }, { "epoch": 0.3, "grad_norm": 1.566424857115309, "learning_rate": 1.6379518317241236e-05, "loss": 0.616, "step": 2963 }, { "epoch": 0.3, "grad_norm": 1.6035080522044143, "learning_rate": 1.6376980989177942e-05, "loss": 0.8079, "step": 2964 }, { "epoch": 0.3, "grad_norm": 1.6479440300773858, "learning_rate": 1.6374442968993086e-05, "loss": 0.7135, "step": 2965 }, { "epoch": 0.3, "grad_norm": 1.4576916199655903, "learning_rate": 1.6371904256962135e-05, "loss": 0.675, "step": 2966 }, { "epoch": 0.3, "grad_norm": 1.5816551028695465, "learning_rate": 1.636936485336062e-05, "loss": 0.7392, "step": 2967 }, { "epoch": 0.3, "grad_norm": 1.6615128946836017, "learning_rate": 1.6366824758464156e-05, "loss": 0.7514, "step": 2968 }, { "epoch": 0.3, "grad_norm": 1.6986189045921958, "learning_rate": 1.636428397254843e-05, "loss": 0.7831, "step": 2969 }, { "epoch": 0.3, "grad_norm": 1.6285478666931792, "learning_rate": 1.636174249588921e-05, "loss": 0.8359, "step": 2970 }, { "epoch": 0.3, "grad_norm": 1.6220292651856945, "learning_rate": 1.635920032876232e-05, "loss": 0.8139, "step": 2971 }, { "epoch": 0.3, "grad_norm": 1.5218810852144655, "learning_rate": 1.6356657471443686e-05, "loss": 0.7399, "step": 2972 }, { "epoch": 0.3, "grad_norm": 1.60756563674869, "learning_rate": 1.635411392420929e-05, "loss": 0.719, "step": 2973 }, { "epoch": 0.3, "grad_norm": 1.5665830216671, "learning_rate": 1.6351569687335195e-05, "loss": 0.7577, "step": 2974 }, { "epoch": 0.3, "grad_norm": 1.5933734297319644, "learning_rate": 1.6349024761097532e-05, "loss": 0.7912, "step": 2975 }, { "epoch": 0.3, "grad_norm": 1.5034799372445287, "learning_rate": 1.634647914577253e-05, "loss": 0.7415, "step": 2976 }, { "epoch": 0.3, "grad_norm": 1.4852458185738506, "learning_rate": 1.6343932841636455e-05, "loss": 0.6613, "step": 2977 }, { "epoch": 0.3, "grad_norm": 1.473874116603972, "learning_rate": 1.634138584896568e-05, "loss": 0.6285, "step": 2978 }, { "epoch": 0.3, "grad_norm": 1.438166471562597, "learning_rate": 1.6338838168036643e-05, "loss": 0.6949, "step": 2979 }, { "epoch": 0.3, "grad_norm": 1.3987278868685178, "learning_rate": 1.633628979912585e-05, "loss": 0.7365, "step": 2980 }, { "epoch": 0.3, "grad_norm": 1.6983099446864078, "learning_rate": 1.6333740742509886e-05, "loss": 0.6947, "step": 2981 }, { "epoch": 0.3, "grad_norm": 1.6141816785702128, "learning_rate": 1.6331190998465416e-05, "loss": 0.7373, "step": 2982 }, { "epoch": 0.3, "grad_norm": 1.5452457901687615, "learning_rate": 1.632864056726917e-05, "loss": 0.6975, "step": 2983 }, { "epoch": 0.3, "grad_norm": 1.6608140326198042, "learning_rate": 1.6326089449197956e-05, "loss": 0.7476, "step": 2984 }, { "epoch": 0.3, "grad_norm": 1.4572606282215208, "learning_rate": 1.6323537644528666e-05, "loss": 0.6997, "step": 2985 }, { "epoch": 0.3, "grad_norm": 1.5716370053847366, "learning_rate": 1.6320985153538255e-05, "loss": 0.7826, "step": 2986 }, { "epoch": 0.3, "grad_norm": 1.6592991363088199, "learning_rate": 1.6318431976503754e-05, "loss": 0.7428, "step": 2987 }, { "epoch": 0.3, "grad_norm": 1.8237988750533487, "learning_rate": 1.6315878113702264e-05, "loss": 0.7558, "step": 2988 }, { "epoch": 0.3, "grad_norm": 1.6716469257192015, "learning_rate": 1.631332356541098e-05, "loss": 0.7054, "step": 2989 }, { "epoch": 0.3, "grad_norm": 1.585305425503702, "learning_rate": 1.6310768331907152e-05, "loss": 0.8262, "step": 2990 }, { "epoch": 0.3, "grad_norm": 1.5738139196841392, "learning_rate": 1.6308212413468112e-05, "loss": 0.7449, "step": 2991 }, { "epoch": 0.3, "grad_norm": 1.5846646205419024, "learning_rate": 1.6305655810371263e-05, "loss": 0.7675, "step": 2992 }, { "epoch": 0.3, "grad_norm": 1.5475748322806462, "learning_rate": 1.630309852289408e-05, "loss": 0.7844, "step": 2993 }, { "epoch": 0.3, "grad_norm": 1.4521587234662767, "learning_rate": 1.630054055131413e-05, "loss": 0.9161, "step": 2994 }, { "epoch": 0.3, "grad_norm": 1.5509347372810725, "learning_rate": 1.629798189590903e-05, "loss": 0.6987, "step": 2995 }, { "epoch": 0.3, "grad_norm": 1.508791759708456, "learning_rate": 1.6295422556956482e-05, "loss": 0.7036, "step": 2996 }, { "epoch": 0.3, "grad_norm": 1.6009516480774482, "learning_rate": 1.6292862534734265e-05, "loss": 0.7279, "step": 2997 }, { "epoch": 0.3, "grad_norm": 1.4819643251473897, "learning_rate": 1.629030182952023e-05, "loss": 0.7282, "step": 2998 }, { "epoch": 0.31, "grad_norm": 1.457259734300266, "learning_rate": 1.6287740441592302e-05, "loss": 0.7124, "step": 2999 }, { "epoch": 0.31, "grad_norm": 1.503453385065571, "learning_rate": 1.6285178371228477e-05, "loss": 0.7833, "step": 3000 }, { "epoch": 0.31, "grad_norm": 1.4651584475008543, "learning_rate": 1.628261561870683e-05, "loss": 0.7883, "step": 3001 }, { "epoch": 0.31, "grad_norm": 1.5518497406157812, "learning_rate": 1.6280052184305505e-05, "loss": 0.7794, "step": 3002 }, { "epoch": 0.31, "grad_norm": 1.487130618612337, "learning_rate": 1.627748806830273e-05, "loss": 0.7968, "step": 3003 }, { "epoch": 0.31, "grad_norm": 1.8566924238421831, "learning_rate": 1.627492327097679e-05, "loss": 0.7767, "step": 3004 }, { "epoch": 0.31, "grad_norm": 1.5397235409358518, "learning_rate": 1.627235779260606e-05, "loss": 0.8144, "step": 3005 }, { "epoch": 0.31, "grad_norm": 1.688856929862697, "learning_rate": 1.6269791633468975e-05, "loss": 0.8169, "step": 3006 }, { "epoch": 0.31, "grad_norm": 1.4271495517722819, "learning_rate": 1.626722479384406e-05, "loss": 0.8141, "step": 3007 }, { "epoch": 0.31, "grad_norm": 1.6141490822849023, "learning_rate": 1.62646572740099e-05, "loss": 0.828, "step": 3008 }, { "epoch": 0.31, "grad_norm": 1.3862109919366226, "learning_rate": 1.6262089074245163e-05, "loss": 0.6934, "step": 3009 }, { "epoch": 0.31, "grad_norm": 1.7264246506749796, "learning_rate": 1.6259520194828586e-05, "loss": 0.8084, "step": 3010 }, { "epoch": 0.31, "grad_norm": 1.6831492410996842, "learning_rate": 1.625695063603898e-05, "loss": 0.7705, "step": 3011 }, { "epoch": 0.31, "grad_norm": 1.7346547899690183, "learning_rate": 1.6254380398155226e-05, "loss": 0.794, "step": 3012 }, { "epoch": 0.31, "grad_norm": 1.5155030467406767, "learning_rate": 1.6251809481456292e-05, "loss": 0.7796, "step": 3013 }, { "epoch": 0.31, "grad_norm": 1.5258784519917301, "learning_rate": 1.6249237886221206e-05, "loss": 0.6925, "step": 3014 }, { "epoch": 0.31, "grad_norm": 1.7016953394241054, "learning_rate": 1.6246665612729074e-05, "loss": 0.8212, "step": 3015 }, { "epoch": 0.31, "grad_norm": 1.6296541627190022, "learning_rate": 1.6244092661259073e-05, "loss": 0.7483, "step": 3016 }, { "epoch": 0.31, "grad_norm": 1.512531790713189, "learning_rate": 1.6241519032090466e-05, "loss": 0.7023, "step": 3017 }, { "epoch": 0.31, "grad_norm": 1.6388795320351859, "learning_rate": 1.623894472550257e-05, "loss": 0.8662, "step": 3018 }, { "epoch": 0.31, "grad_norm": 1.5834486433720267, "learning_rate": 1.623636974177479e-05, "loss": 0.9036, "step": 3019 }, { "epoch": 0.31, "grad_norm": 1.608536353251349, "learning_rate": 1.62337940811866e-05, "loss": 0.8445, "step": 3020 }, { "epoch": 0.31, "grad_norm": 1.6247451048664159, "learning_rate": 1.6231217744017557e-05, "loss": 0.7967, "step": 3021 }, { "epoch": 0.31, "grad_norm": 1.4633740641195936, "learning_rate": 1.6228640730547264e-05, "loss": 0.8128, "step": 3022 }, { "epoch": 0.31, "grad_norm": 1.7411844443965356, "learning_rate": 1.622606304105543e-05, "loss": 0.7927, "step": 3023 }, { "epoch": 0.31, "grad_norm": 1.4140442778396463, "learning_rate": 1.6223484675821813e-05, "loss": 0.6619, "step": 3024 }, { "epoch": 0.31, "grad_norm": 1.6603407739311253, "learning_rate": 1.622090563512626e-05, "loss": 0.8268, "step": 3025 }, { "epoch": 0.31, "grad_norm": 1.457021585971081, "learning_rate": 1.6218325919248688e-05, "loss": 0.7776, "step": 3026 }, { "epoch": 0.31, "grad_norm": 1.5941908538126683, "learning_rate": 1.6215745528469078e-05, "loss": 0.7932, "step": 3027 }, { "epoch": 0.31, "grad_norm": 1.6131401763391802, "learning_rate": 1.6213164463067495e-05, "loss": 0.7959, "step": 3028 }, { "epoch": 0.31, "grad_norm": 1.5040078612577223, "learning_rate": 1.621058272332407e-05, "loss": 0.7749, "step": 3029 }, { "epoch": 0.31, "grad_norm": 1.4877261332506513, "learning_rate": 1.6208000309519017e-05, "loss": 0.7778, "step": 3030 }, { "epoch": 0.31, "grad_norm": 1.7057119690601854, "learning_rate": 1.6205417221932612e-05, "loss": 0.8156, "step": 3031 }, { "epoch": 0.31, "grad_norm": 1.5713357933890586, "learning_rate": 1.620283346084521e-05, "loss": 0.7903, "step": 3032 }, { "epoch": 0.31, "grad_norm": 1.5128948987964903, "learning_rate": 1.6200249026537232e-05, "loss": 0.6547, "step": 3033 }, { "epoch": 0.31, "grad_norm": 1.5813368080784047, "learning_rate": 1.619766391928919e-05, "loss": 0.8104, "step": 3034 }, { "epoch": 0.31, "grad_norm": 1.5360058392310687, "learning_rate": 1.6195078139381647e-05, "loss": 0.7894, "step": 3035 }, { "epoch": 0.31, "grad_norm": 1.472545203396127, "learning_rate": 1.619249168709525e-05, "loss": 0.6958, "step": 3036 }, { "epoch": 0.31, "grad_norm": 1.4597438853991118, "learning_rate": 1.6189904562710722e-05, "loss": 0.6809, "step": 3037 }, { "epoch": 0.31, "grad_norm": 1.6564332088860414, "learning_rate": 1.6187316766508856e-05, "loss": 0.659, "step": 3038 }, { "epoch": 0.31, "grad_norm": 1.526292436196685, "learning_rate": 1.618472829877051e-05, "loss": 0.768, "step": 3039 }, { "epoch": 0.31, "grad_norm": 1.6314143560380923, "learning_rate": 1.6182139159776627e-05, "loss": 0.8162, "step": 3040 }, { "epoch": 0.31, "grad_norm": 1.702836373014158, "learning_rate": 1.6179549349808216e-05, "loss": 0.7858, "step": 3041 }, { "epoch": 0.31, "grad_norm": 1.328996213321001, "learning_rate": 1.6176958869146358e-05, "loss": 0.7156, "step": 3042 }, { "epoch": 0.31, "grad_norm": 1.567602922565799, "learning_rate": 1.6174367718072213e-05, "loss": 0.729, "step": 3043 }, { "epoch": 0.31, "grad_norm": 1.5843451063905833, "learning_rate": 1.617177589686701e-05, "loss": 0.8005, "step": 3044 }, { "epoch": 0.31, "grad_norm": 1.6711460527473359, "learning_rate": 1.6169183405812053e-05, "loss": 0.652, "step": 3045 }, { "epoch": 0.31, "grad_norm": 1.4961663881076657, "learning_rate": 1.6166590245188708e-05, "loss": 0.6315, "step": 3046 }, { "epoch": 0.31, "grad_norm": 1.565609963546912, "learning_rate": 1.6163996415278423e-05, "loss": 0.8349, "step": 3047 }, { "epoch": 0.31, "grad_norm": 1.3998288323033399, "learning_rate": 1.6161401916362723e-05, "loss": 0.6608, "step": 3048 }, { "epoch": 0.31, "grad_norm": 1.6135613014089765, "learning_rate": 1.6158806748723205e-05, "loss": 0.7993, "step": 3049 }, { "epoch": 0.31, "grad_norm": 1.5912260549170647, "learning_rate": 1.6156210912641524e-05, "loss": 0.825, "step": 3050 }, { "epoch": 0.31, "grad_norm": 1.4735378808073134, "learning_rate": 1.6153614408399418e-05, "loss": 0.6708, "step": 3051 }, { "epoch": 0.31, "grad_norm": 1.6086824751987387, "learning_rate": 1.61510172362787e-05, "loss": 0.7271, "step": 3052 }, { "epoch": 0.31, "grad_norm": 1.3443880247958508, "learning_rate": 1.6148419396561254e-05, "loss": 0.7419, "step": 3053 }, { "epoch": 0.31, "grad_norm": 1.5674892031057017, "learning_rate": 1.6145820889529033e-05, "loss": 0.7514, "step": 3054 }, { "epoch": 0.31, "grad_norm": 1.5110324653313316, "learning_rate": 1.6143221715464067e-05, "loss": 0.7184, "step": 3055 }, { "epoch": 0.31, "grad_norm": 1.7002384489332842, "learning_rate": 1.6140621874648447e-05, "loss": 0.7133, "step": 3056 }, { "epoch": 0.31, "grad_norm": 1.625221892701305, "learning_rate": 1.6138021367364353e-05, "loss": 0.7698, "step": 3057 }, { "epoch": 0.31, "grad_norm": 1.614223008622113, "learning_rate": 1.613542019389403e-05, "loss": 0.8406, "step": 3058 }, { "epoch": 0.31, "grad_norm": 1.6200348606542283, "learning_rate": 1.613281835451979e-05, "loss": 0.8781, "step": 3059 }, { "epoch": 0.31, "grad_norm": 1.5587315808852187, "learning_rate": 1.6130215849524025e-05, "loss": 0.7167, "step": 3060 }, { "epoch": 0.31, "grad_norm": 1.7338243906017512, "learning_rate": 1.6127612679189195e-05, "loss": 0.7895, "step": 3061 }, { "epoch": 0.31, "grad_norm": 1.7108718796736928, "learning_rate": 1.6125008843797835e-05, "loss": 0.7477, "step": 3062 }, { "epoch": 0.31, "grad_norm": 1.592167593368202, "learning_rate": 1.6122404343632547e-05, "loss": 0.7709, "step": 3063 }, { "epoch": 0.31, "grad_norm": 1.4668296830135767, "learning_rate": 1.6119799178976014e-05, "loss": 0.6438, "step": 3064 }, { "epoch": 0.31, "grad_norm": 1.6321493961782922, "learning_rate": 1.6117193350110982e-05, "loss": 0.7311, "step": 3065 }, { "epoch": 0.31, "grad_norm": 1.6487251263538025, "learning_rate": 1.6114586857320272e-05, "loss": 0.675, "step": 3066 }, { "epoch": 0.31, "grad_norm": 1.7908476169567467, "learning_rate": 1.611197970088678e-05, "loss": 0.8056, "step": 3067 }, { "epoch": 0.31, "grad_norm": 1.3986976644629658, "learning_rate": 1.6109371881093476e-05, "loss": 0.7328, "step": 3068 }, { "epoch": 0.31, "grad_norm": 1.6928601199095388, "learning_rate": 1.6106763398223394e-05, "loss": 0.7561, "step": 3069 }, { "epoch": 0.31, "grad_norm": 1.4871254686923177, "learning_rate": 1.610415425255964e-05, "loss": 0.6538, "step": 3070 }, { "epoch": 0.31, "grad_norm": 1.506830990770424, "learning_rate": 1.6101544444385406e-05, "loss": 0.7115, "step": 3071 }, { "epoch": 0.31, "grad_norm": 1.4004733856804321, "learning_rate": 1.6098933973983934e-05, "loss": 0.7236, "step": 3072 }, { "epoch": 0.31, "grad_norm": 1.5034428955072574, "learning_rate": 1.609632284163856e-05, "loss": 0.7715, "step": 3073 }, { "epoch": 0.31, "grad_norm": 1.7482891338939817, "learning_rate": 1.6093711047632676e-05, "loss": 0.7237, "step": 3074 }, { "epoch": 0.31, "grad_norm": 1.4696133304283667, "learning_rate": 1.6091098592249754e-05, "loss": 0.8095, "step": 3075 }, { "epoch": 0.31, "grad_norm": 1.6176646068560052, "learning_rate": 1.608848547577333e-05, "loss": 0.7347, "step": 3076 }, { "epoch": 0.31, "grad_norm": 1.568225549711669, "learning_rate": 1.6085871698487023e-05, "loss": 0.7641, "step": 3077 }, { "epoch": 0.31, "grad_norm": 1.508505589717331, "learning_rate": 1.608325726067452e-05, "loss": 0.7548, "step": 3078 }, { "epoch": 0.31, "grad_norm": 1.54588442431525, "learning_rate": 1.6080642162619567e-05, "loss": 0.7671, "step": 3079 }, { "epoch": 0.31, "grad_norm": 1.6584741334511337, "learning_rate": 1.6078026404605998e-05, "loss": 0.8191, "step": 3080 }, { "epoch": 0.31, "grad_norm": 1.4583145165427174, "learning_rate": 1.6075409986917714e-05, "loss": 0.7521, "step": 3081 }, { "epoch": 0.31, "grad_norm": 1.694055870736409, "learning_rate": 1.6072792909838686e-05, "loss": 0.7129, "step": 3082 }, { "epoch": 0.31, "grad_norm": 1.6988886090864983, "learning_rate": 1.6070175173652954e-05, "loss": 0.8416, "step": 3083 }, { "epoch": 0.31, "grad_norm": 1.4580745191676912, "learning_rate": 1.6067556778644633e-05, "loss": 0.6665, "step": 3084 }, { "epoch": 0.31, "grad_norm": 1.7544466094328668, "learning_rate": 1.606493772509791e-05, "loss": 0.7951, "step": 3085 }, { "epoch": 0.31, "grad_norm": 1.53367296871484, "learning_rate": 1.6062318013297045e-05, "loss": 0.8168, "step": 3086 }, { "epoch": 0.31, "grad_norm": 1.5875483622870659, "learning_rate": 1.6059697643526363e-05, "loss": 0.7529, "step": 3087 }, { "epoch": 0.31, "grad_norm": 1.4879290031335537, "learning_rate": 1.605707661607026e-05, "loss": 0.7073, "step": 3088 }, { "epoch": 0.31, "grad_norm": 1.5101875171658965, "learning_rate": 1.6054454931213217e-05, "loss": 0.7343, "step": 3089 }, { "epoch": 0.31, "grad_norm": 1.4990092041474783, "learning_rate": 1.605183258923977e-05, "loss": 0.7026, "step": 3090 }, { "epoch": 0.31, "grad_norm": 1.7472358291731696, "learning_rate": 1.6049209590434538e-05, "loss": 0.8645, "step": 3091 }, { "epoch": 0.31, "grad_norm": 1.5695152177694733, "learning_rate": 1.60465859350822e-05, "loss": 0.7457, "step": 3092 }, { "epoch": 0.31, "grad_norm": 1.4429394717863138, "learning_rate": 1.6043961623467523e-05, "loss": 0.7753, "step": 3093 }, { "epoch": 0.31, "grad_norm": 1.4815833238007763, "learning_rate": 1.6041336655875324e-05, "loss": 0.7611, "step": 3094 }, { "epoch": 0.31, "grad_norm": 1.653173088460423, "learning_rate": 1.6038711032590507e-05, "loss": 0.7596, "step": 3095 }, { "epoch": 0.31, "grad_norm": 1.557819086276001, "learning_rate": 1.6036084753898046e-05, "loss": 0.7968, "step": 3096 }, { "epoch": 0.32, "grad_norm": 1.4276364100852235, "learning_rate": 1.6033457820082975e-05, "loss": 0.6869, "step": 3097 }, { "epoch": 0.32, "grad_norm": 1.5354139858886717, "learning_rate": 1.6030830231430412e-05, "loss": 0.8235, "step": 3098 }, { "epoch": 0.32, "grad_norm": 1.6050709658162097, "learning_rate": 1.6028201988225536e-05, "loss": 0.8106, "step": 3099 }, { "epoch": 0.32, "grad_norm": 1.6729719675167836, "learning_rate": 1.6025573090753608e-05, "loss": 0.7784, "step": 3100 }, { "epoch": 0.32, "grad_norm": 1.599429966910005, "learning_rate": 1.6022943539299948e-05, "loss": 0.7485, "step": 3101 }, { "epoch": 0.32, "grad_norm": 1.4492624752991685, "learning_rate": 1.602031333414996e-05, "loss": 0.6903, "step": 3102 }, { "epoch": 0.32, "grad_norm": 1.5228689547530554, "learning_rate": 1.6017682475589103e-05, "loss": 0.7714, "step": 3103 }, { "epoch": 0.32, "grad_norm": 1.4370981140658128, "learning_rate": 1.601505096390292e-05, "loss": 0.6991, "step": 3104 }, { "epoch": 0.32, "grad_norm": 1.8961969950247142, "learning_rate": 1.601241879937702e-05, "loss": 0.6681, "step": 3105 }, { "epoch": 0.32, "grad_norm": 1.554051609425092, "learning_rate": 1.600978598229708e-05, "loss": 0.7787, "step": 3106 }, { "epoch": 0.32, "grad_norm": 1.427848239361892, "learning_rate": 1.6007152512948855e-05, "loss": 0.7268, "step": 3107 }, { "epoch": 0.32, "grad_norm": 1.5025791642754298, "learning_rate": 1.600451839161817e-05, "loss": 0.7459, "step": 3108 }, { "epoch": 0.32, "grad_norm": 1.6643850750285802, "learning_rate": 1.6001883618590913e-05, "loss": 0.8121, "step": 3109 }, { "epoch": 0.32, "grad_norm": 1.418192732050395, "learning_rate": 1.599924819415305e-05, "loss": 0.7477, "step": 3110 }, { "epoch": 0.32, "grad_norm": 1.4618962142506864, "learning_rate": 1.5996612118590604e-05, "loss": 0.7099, "step": 3111 }, { "epoch": 0.32, "grad_norm": 1.526946571378853, "learning_rate": 1.5993975392189697e-05, "loss": 0.7344, "step": 3112 }, { "epoch": 0.32, "grad_norm": 1.4919376879669402, "learning_rate": 1.5991338015236494e-05, "loss": 0.8031, "step": 3113 }, { "epoch": 0.32, "grad_norm": 1.6745537838724252, "learning_rate": 1.5988699988017243e-05, "loss": 0.808, "step": 3114 }, { "epoch": 0.32, "grad_norm": 1.5838066227108833, "learning_rate": 1.598606131081826e-05, "loss": 0.7917, "step": 3115 }, { "epoch": 0.32, "grad_norm": 1.5775363708969412, "learning_rate": 1.5983421983925937e-05, "loss": 0.6163, "step": 3116 }, { "epoch": 0.32, "grad_norm": 1.5748608622317408, "learning_rate": 1.598078200762673e-05, "loss": 0.7859, "step": 3117 }, { "epoch": 0.32, "grad_norm": 1.5423950467094711, "learning_rate": 1.597814138220716e-05, "loss": 0.7963, "step": 3118 }, { "epoch": 0.32, "grad_norm": 1.658696812208415, "learning_rate": 1.597550010795383e-05, "loss": 0.7569, "step": 3119 }, { "epoch": 0.32, "grad_norm": 1.3567834085980208, "learning_rate": 1.5972858185153412e-05, "loss": 0.6929, "step": 3120 }, { "epoch": 0.32, "grad_norm": 1.6085663273116662, "learning_rate": 1.5970215614092642e-05, "loss": 0.6935, "step": 3121 }, { "epoch": 0.32, "grad_norm": 1.3961019816420073, "learning_rate": 1.5967572395058334e-05, "loss": 0.7607, "step": 3122 }, { "epoch": 0.32, "grad_norm": 1.6165689879778342, "learning_rate": 1.5964928528337363e-05, "loss": 0.8189, "step": 3123 }, { "epoch": 0.32, "grad_norm": 1.570775241467543, "learning_rate": 1.596228401421668e-05, "loss": 0.7161, "step": 3124 }, { "epoch": 0.32, "grad_norm": 1.6292724209145448, "learning_rate": 1.5959638852983306e-05, "loss": 0.7867, "step": 3125 }, { "epoch": 0.32, "grad_norm": 1.5400550233485224, "learning_rate": 1.5956993044924334e-05, "loss": 0.7101, "step": 3126 }, { "epoch": 0.32, "grad_norm": 1.7324267822730282, "learning_rate": 1.5954346590326927e-05, "loss": 0.8033, "step": 3127 }, { "epoch": 0.32, "grad_norm": 1.5495496863750844, "learning_rate": 1.595169948947831e-05, "loss": 0.7431, "step": 3128 }, { "epoch": 0.32, "grad_norm": 1.6223883659624856, "learning_rate": 1.5949051742665788e-05, "loss": 0.795, "step": 3129 }, { "epoch": 0.32, "grad_norm": 1.4697103408900418, "learning_rate": 1.594640335017673e-05, "loss": 0.703, "step": 3130 }, { "epoch": 0.32, "grad_norm": 1.7101405485345647, "learning_rate": 1.5943754312298583e-05, "loss": 0.7555, "step": 3131 }, { "epoch": 0.32, "grad_norm": 1.5690228773213615, "learning_rate": 1.5941104629318856e-05, "loss": 0.7266, "step": 3132 }, { "epoch": 0.32, "grad_norm": 1.5873101127509042, "learning_rate": 1.5938454301525126e-05, "loss": 0.7022, "step": 3133 }, { "epoch": 0.32, "grad_norm": 1.439819522741593, "learning_rate": 1.593580332920505e-05, "loss": 0.6614, "step": 3134 }, { "epoch": 0.32, "grad_norm": 1.6108536230936394, "learning_rate": 1.593315171264635e-05, "loss": 0.8291, "step": 3135 }, { "epoch": 0.32, "grad_norm": 1.4550865485826752, "learning_rate": 1.5930499452136816e-05, "loss": 0.6884, "step": 3136 }, { "epoch": 0.32, "grad_norm": 1.6520085948866068, "learning_rate": 1.592784654796431e-05, "loss": 0.794, "step": 3137 }, { "epoch": 0.32, "grad_norm": 1.5170627473468863, "learning_rate": 1.5925193000416756e-05, "loss": 0.6584, "step": 3138 }, { "epoch": 0.32, "grad_norm": 1.7376109545630536, "learning_rate": 1.5922538809782166e-05, "loss": 0.7054, "step": 3139 }, { "epoch": 0.32, "grad_norm": 1.5238517632758721, "learning_rate": 1.591988397634861e-05, "loss": 0.7448, "step": 3140 }, { "epoch": 0.32, "grad_norm": 1.7180058889898717, "learning_rate": 1.591722850040422e-05, "loss": 0.6657, "step": 3141 }, { "epoch": 0.32, "grad_norm": 1.562494608534868, "learning_rate": 1.591457238223721e-05, "loss": 0.6182, "step": 3142 }, { "epoch": 0.32, "grad_norm": 1.5083445543382505, "learning_rate": 1.5911915622135864e-05, "loss": 0.7609, "step": 3143 }, { "epoch": 0.32, "grad_norm": 1.5367934569040897, "learning_rate": 1.590925822038853e-05, "loss": 0.7238, "step": 3144 }, { "epoch": 0.32, "grad_norm": 1.5691078613734306, "learning_rate": 1.5906600177283628e-05, "loss": 0.8175, "step": 3145 }, { "epoch": 0.32, "grad_norm": 1.690059752667436, "learning_rate": 1.590394149310964e-05, "loss": 0.7669, "step": 3146 }, { "epoch": 0.32, "grad_norm": 1.701910126920164, "learning_rate": 1.5901282168155136e-05, "loss": 0.6637, "step": 3147 }, { "epoch": 0.32, "grad_norm": 1.4938188942808153, "learning_rate": 1.5898622202708734e-05, "loss": 0.7786, "step": 3148 }, { "epoch": 0.32, "grad_norm": 1.4882713244830919, "learning_rate": 1.5895961597059137e-05, "loss": 0.6403, "step": 3149 }, { "epoch": 0.32, "grad_norm": 1.652969857758579, "learning_rate": 1.5893300351495115e-05, "loss": 0.8245, "step": 3150 }, { "epoch": 0.32, "grad_norm": 1.4934941684919685, "learning_rate": 1.5890638466305495e-05, "loss": 0.7107, "step": 3151 }, { "epoch": 0.32, "grad_norm": 1.5580866347387254, "learning_rate": 1.5887975941779196e-05, "loss": 0.7022, "step": 3152 }, { "epoch": 0.32, "grad_norm": 1.3971726739844001, "learning_rate": 1.588531277820518e-05, "loss": 0.6722, "step": 3153 }, { "epoch": 0.32, "grad_norm": 1.5983800866485998, "learning_rate": 1.58826489758725e-05, "loss": 0.7353, "step": 3154 }, { "epoch": 0.32, "grad_norm": 1.4430310300597617, "learning_rate": 1.587998453507027e-05, "loss": 0.6149, "step": 3155 }, { "epoch": 0.32, "grad_norm": 1.5549077993086349, "learning_rate": 1.587731945608767e-05, "loss": 0.7428, "step": 3156 }, { "epoch": 0.32, "grad_norm": 1.6302877656070816, "learning_rate": 1.5874653739213948e-05, "loss": 0.7145, "step": 3157 }, { "epoch": 0.32, "grad_norm": 1.4814762359067342, "learning_rate": 1.587198738473844e-05, "loss": 0.7209, "step": 3158 }, { "epoch": 0.32, "grad_norm": 1.5449844910463155, "learning_rate": 1.5869320392950526e-05, "loss": 0.7601, "step": 3159 }, { "epoch": 0.32, "grad_norm": 1.5479632840600481, "learning_rate": 1.5866652764139667e-05, "loss": 0.6991, "step": 3160 }, { "epoch": 0.32, "grad_norm": 1.5665482425525807, "learning_rate": 1.58639844985954e-05, "loss": 0.7769, "step": 3161 }, { "epoch": 0.32, "grad_norm": 1.4987697650410985, "learning_rate": 1.5861315596607315e-05, "loss": 0.788, "step": 3162 }, { "epoch": 0.32, "grad_norm": 1.4443483167803732, "learning_rate": 1.585864605846508e-05, "loss": 0.7179, "step": 3163 }, { "epoch": 0.32, "grad_norm": 1.5872236675585665, "learning_rate": 1.585597588445844e-05, "loss": 0.6736, "step": 3164 }, { "epoch": 0.32, "grad_norm": 1.5611647818715682, "learning_rate": 1.585330507487719e-05, "loss": 0.8164, "step": 3165 }, { "epoch": 0.32, "grad_norm": 1.673138476399332, "learning_rate": 1.585063363001121e-05, "loss": 0.7495, "step": 3166 }, { "epoch": 0.32, "grad_norm": 1.7099893089128935, "learning_rate": 1.584796155015045e-05, "loss": 0.8088, "step": 3167 }, { "epoch": 0.32, "grad_norm": 1.4623288158796532, "learning_rate": 1.584528883558491e-05, "loss": 0.7705, "step": 3168 }, { "epoch": 0.32, "grad_norm": 1.509934012381232, "learning_rate": 1.5842615486604674e-05, "loss": 0.7781, "step": 3169 }, { "epoch": 0.32, "grad_norm": 1.5273599096580284, "learning_rate": 1.58399415034999e-05, "loss": 0.823, "step": 3170 }, { "epoch": 0.32, "grad_norm": 1.5482870979077432, "learning_rate": 1.5837266886560802e-05, "loss": 0.774, "step": 3171 }, { "epoch": 0.32, "grad_norm": 1.4253493680036589, "learning_rate": 1.583459163607767e-05, "loss": 0.6898, "step": 3172 }, { "epoch": 0.32, "grad_norm": 1.4850118758223274, "learning_rate": 1.5831915752340855e-05, "loss": 0.7483, "step": 3173 }, { "epoch": 0.32, "grad_norm": 1.7053278015269768, "learning_rate": 1.5829239235640782e-05, "loss": 0.8189, "step": 3174 }, { "epoch": 0.32, "grad_norm": 1.4857371360723388, "learning_rate": 1.5826562086267956e-05, "loss": 0.7126, "step": 3175 }, { "epoch": 0.32, "grad_norm": 1.5745791094329897, "learning_rate": 1.5823884304512934e-05, "loss": 0.7374, "step": 3176 }, { "epoch": 0.32, "grad_norm": 1.792000476054425, "learning_rate": 1.582120589066634e-05, "loss": 0.7418, "step": 3177 }, { "epoch": 0.32, "grad_norm": 1.4355059010070181, "learning_rate": 1.581852684501888e-05, "loss": 0.7174, "step": 3178 }, { "epoch": 0.32, "grad_norm": 1.6095170706882729, "learning_rate": 1.5815847167861327e-05, "loss": 0.866, "step": 3179 }, { "epoch": 0.32, "grad_norm": 1.426623584531756, "learning_rate": 1.5813166859484515e-05, "loss": 0.6396, "step": 3180 }, { "epoch": 0.32, "grad_norm": 1.3991244579834483, "learning_rate": 1.5810485920179344e-05, "loss": 0.6319, "step": 3181 }, { "epoch": 0.32, "grad_norm": 1.4587604377004648, "learning_rate": 1.5807804350236793e-05, "loss": 0.6378, "step": 3182 }, { "epoch": 0.32, "grad_norm": 1.426280154165666, "learning_rate": 1.5805122149947904e-05, "loss": 0.7459, "step": 3183 }, { "epoch": 0.32, "grad_norm": 1.5217399746727973, "learning_rate": 1.5802439319603786e-05, "loss": 0.753, "step": 3184 }, { "epoch": 0.32, "grad_norm": 1.4164338393551477, "learning_rate": 1.5799755859495625e-05, "loss": 0.617, "step": 3185 }, { "epoch": 0.32, "grad_norm": 1.5097480099291565, "learning_rate": 1.579707176991466e-05, "loss": 0.8471, "step": 3186 }, { "epoch": 0.32, "grad_norm": 1.72903010776992, "learning_rate": 1.5794387051152208e-05, "loss": 0.768, "step": 3187 }, { "epoch": 0.32, "grad_norm": 1.6044825868083277, "learning_rate": 1.5791701703499656e-05, "loss": 0.7313, "step": 3188 }, { "epoch": 0.32, "grad_norm": 1.5420586309894015, "learning_rate": 1.578901572724846e-05, "loss": 0.6848, "step": 3189 }, { "epoch": 0.32, "grad_norm": 1.6985781115712704, "learning_rate": 1.5786329122690135e-05, "loss": 0.728, "step": 3190 }, { "epoch": 0.32, "grad_norm": 1.3593546053483965, "learning_rate": 1.5783641890116273e-05, "loss": 0.5939, "step": 3191 }, { "epoch": 0.32, "grad_norm": 1.444491418044976, "learning_rate": 1.578095402981853e-05, "loss": 0.6696, "step": 3192 }, { "epoch": 0.32, "grad_norm": 1.4372557697332757, "learning_rate": 1.577826554208863e-05, "loss": 0.7297, "step": 3193 }, { "epoch": 0.32, "grad_norm": 1.6857280272203305, "learning_rate": 1.577557642721837e-05, "loss": 0.7403, "step": 3194 }, { "epoch": 0.32, "grad_norm": 1.5915123912985911, "learning_rate": 1.5772886685499605e-05, "loss": 0.767, "step": 3195 }, { "epoch": 0.33, "grad_norm": 1.5693985857062513, "learning_rate": 1.5770196317224267e-05, "loss": 0.8271, "step": 3196 }, { "epoch": 0.33, "grad_norm": 1.7371800022626132, "learning_rate": 1.576750532268436e-05, "loss": 0.7539, "step": 3197 }, { "epoch": 0.33, "grad_norm": 1.4895456395950455, "learning_rate": 1.5764813702171937e-05, "loss": 0.7717, "step": 3198 }, { "epoch": 0.33, "grad_norm": 1.5575816426789546, "learning_rate": 1.5762121455979144e-05, "loss": 0.7825, "step": 3199 }, { "epoch": 0.33, "grad_norm": 1.5366001344613465, "learning_rate": 1.5759428584398175e-05, "loss": 0.7271, "step": 3200 }, { "epoch": 0.33, "grad_norm": 1.4564051361437806, "learning_rate": 1.5756735087721297e-05, "loss": 0.7732, "step": 3201 }, { "epoch": 0.33, "grad_norm": 1.4784322680239337, "learning_rate": 1.5754040966240856e-05, "loss": 0.6973, "step": 3202 }, { "epoch": 0.33, "grad_norm": 1.547436781038291, "learning_rate": 1.575134622024925e-05, "loss": 0.7008, "step": 3203 }, { "epoch": 0.33, "grad_norm": 1.6936529434866159, "learning_rate": 1.5748650850038952e-05, "loss": 0.8086, "step": 3204 }, { "epoch": 0.33, "grad_norm": 1.5107765210344077, "learning_rate": 1.57459548559025e-05, "loss": 0.6434, "step": 3205 }, { "epoch": 0.33, "grad_norm": 1.4124165547616998, "learning_rate": 1.5743258238132506e-05, "loss": 0.7412, "step": 3206 }, { "epoch": 0.33, "grad_norm": 1.6505896344500248, "learning_rate": 1.574056099702165e-05, "loss": 0.7154, "step": 3207 }, { "epoch": 0.33, "grad_norm": 1.3868823830634551, "learning_rate": 1.5737863132862667e-05, "loss": 0.6848, "step": 3208 }, { "epoch": 0.33, "grad_norm": 1.5852957586226968, "learning_rate": 1.5735164645948365e-05, "loss": 0.719, "step": 3209 }, { "epoch": 0.33, "grad_norm": 1.5387482663631675, "learning_rate": 1.5732465536571635e-05, "loss": 0.7347, "step": 3210 }, { "epoch": 0.33, "grad_norm": 1.5964984312736819, "learning_rate": 1.5729765805025416e-05, "loss": 0.7555, "step": 3211 }, { "epoch": 0.33, "grad_norm": 1.472043261569749, "learning_rate": 1.5727065451602722e-05, "loss": 0.737, "step": 3212 }, { "epoch": 0.33, "grad_norm": 1.7394590989294285, "learning_rate": 1.5724364476596637e-05, "loss": 0.8139, "step": 3213 }, { "epoch": 0.33, "grad_norm": 1.4609690623304736, "learning_rate": 1.5721662880300303e-05, "loss": 0.8106, "step": 3214 }, { "epoch": 0.33, "grad_norm": 1.4984659414493768, "learning_rate": 1.5718960663006942e-05, "loss": 0.8126, "step": 3215 }, { "epoch": 0.33, "grad_norm": 1.5078764678998093, "learning_rate": 1.5716257825009836e-05, "loss": 0.7072, "step": 3216 }, { "epoch": 0.33, "grad_norm": 1.6217296271864472, "learning_rate": 1.5713554366602333e-05, "loss": 0.808, "step": 3217 }, { "epoch": 0.33, "grad_norm": 1.6107267709731687, "learning_rate": 1.571085028807786e-05, "loss": 0.7409, "step": 3218 }, { "epoch": 0.33, "grad_norm": 1.496304836221755, "learning_rate": 1.5708145589729887e-05, "loss": 0.7075, "step": 3219 }, { "epoch": 0.33, "grad_norm": 1.6943857719430757, "learning_rate": 1.5705440271851982e-05, "loss": 0.82, "step": 3220 }, { "epoch": 0.33, "grad_norm": 1.5916792170173406, "learning_rate": 1.570273433473776e-05, "loss": 0.7467, "step": 3221 }, { "epoch": 0.33, "grad_norm": 1.5585203351560093, "learning_rate": 1.57000277786809e-05, "loss": 0.7451, "step": 3222 }, { "epoch": 0.33, "grad_norm": 1.5284408972175045, "learning_rate": 1.569732060397517e-05, "loss": 0.7689, "step": 3223 }, { "epoch": 0.33, "grad_norm": 1.593389917270574, "learning_rate": 1.5694612810914378e-05, "loss": 0.7555, "step": 3224 }, { "epoch": 0.33, "grad_norm": 1.5395338356559685, "learning_rate": 1.5691904399792425e-05, "loss": 0.7338, "step": 3225 }, { "epoch": 0.33, "grad_norm": 1.6093287327816626, "learning_rate": 1.5689195370903258e-05, "loss": 0.7608, "step": 3226 }, { "epoch": 0.33, "grad_norm": 1.5416980247313807, "learning_rate": 1.56864857245409e-05, "loss": 0.6709, "step": 3227 }, { "epoch": 0.33, "grad_norm": 1.6869407406650818, "learning_rate": 1.5683775460999446e-05, "loss": 0.7468, "step": 3228 }, { "epoch": 0.33, "grad_norm": 1.5516293625736395, "learning_rate": 1.568106458057305e-05, "loss": 0.7228, "step": 3229 }, { "epoch": 0.33, "grad_norm": 1.5785437584600943, "learning_rate": 1.5678353083555938e-05, "loss": 0.743, "step": 3230 }, { "epoch": 0.33, "grad_norm": 1.5095253750379387, "learning_rate": 1.5675640970242393e-05, "loss": 0.7638, "step": 3231 }, { "epoch": 0.33, "grad_norm": 1.6052697059376566, "learning_rate": 1.5672928240926782e-05, "loss": 0.7752, "step": 3232 }, { "epoch": 0.33, "grad_norm": 1.6343953052917846, "learning_rate": 1.5670214895903522e-05, "loss": 0.7411, "step": 3233 }, { "epoch": 0.33, "grad_norm": 1.5285162580731912, "learning_rate": 1.5667500935467112e-05, "loss": 0.866, "step": 3234 }, { "epoch": 0.33, "grad_norm": 1.698977154811488, "learning_rate": 1.56647863599121e-05, "loss": 0.7947, "step": 3235 }, { "epoch": 0.33, "grad_norm": 1.4866475904993897, "learning_rate": 1.566207116953312e-05, "loss": 0.718, "step": 3236 }, { "epoch": 0.33, "grad_norm": 1.7092189587646751, "learning_rate": 1.5659355364624856e-05, "loss": 0.653, "step": 3237 }, { "epoch": 0.33, "grad_norm": 1.503865433297925, "learning_rate": 1.565663894548207e-05, "loss": 0.7405, "step": 3238 }, { "epoch": 0.33, "grad_norm": 1.4975521811870942, "learning_rate": 1.565392191239959e-05, "loss": 0.7793, "step": 3239 }, { "epoch": 0.33, "grad_norm": 1.669709188715031, "learning_rate": 1.5651204265672305e-05, "loss": 0.8376, "step": 3240 }, { "epoch": 0.33, "grad_norm": 1.6396041162136814, "learning_rate": 1.5648486005595167e-05, "loss": 0.6777, "step": 3241 }, { "epoch": 0.33, "grad_norm": 1.6140187825941346, "learning_rate": 1.5645767132463205e-05, "loss": 0.7828, "step": 3242 }, { "epoch": 0.33, "grad_norm": 1.646485113867051, "learning_rate": 1.5643047646571515e-05, "loss": 0.7514, "step": 3243 }, { "epoch": 0.33, "grad_norm": 1.451040577376004, "learning_rate": 1.5640327548215245e-05, "loss": 0.7091, "step": 3244 }, { "epoch": 0.33, "grad_norm": 1.5039928013791208, "learning_rate": 1.5637606837689632e-05, "loss": 0.6726, "step": 3245 }, { "epoch": 0.33, "grad_norm": 1.6029000330987817, "learning_rate": 1.563488551528995e-05, "loss": 0.8024, "step": 3246 }, { "epoch": 0.33, "grad_norm": 1.5792729716232257, "learning_rate": 1.563216358131157e-05, "loss": 0.8164, "step": 3247 }, { "epoch": 0.33, "grad_norm": 1.570838961503552, "learning_rate": 1.5629441036049914e-05, "loss": 0.7266, "step": 3248 }, { "epoch": 0.33, "grad_norm": 1.4417182766874617, "learning_rate": 1.5626717879800465e-05, "loss": 0.7966, "step": 3249 }, { "epoch": 0.33, "grad_norm": 1.5969231950371279, "learning_rate": 1.562399411285878e-05, "loss": 0.753, "step": 3250 }, { "epoch": 0.33, "grad_norm": 1.5993321650315608, "learning_rate": 1.5621269735520485e-05, "loss": 0.7534, "step": 3251 }, { "epoch": 0.33, "grad_norm": 1.721250803274356, "learning_rate": 1.5618544748081264e-05, "loss": 0.8658, "step": 3252 }, { "epoch": 0.33, "grad_norm": 1.6157898329866196, "learning_rate": 1.561581915083688e-05, "loss": 0.7707, "step": 3253 }, { "epoch": 0.33, "grad_norm": 1.4024982634493652, "learning_rate": 1.561309294408315e-05, "loss": 0.6503, "step": 3254 }, { "epoch": 0.33, "grad_norm": 1.6343740262884703, "learning_rate": 1.5610366128115955e-05, "loss": 0.8926, "step": 3255 }, { "epoch": 0.33, "grad_norm": 1.6057197627756363, "learning_rate": 1.5607638703231252e-05, "loss": 0.7579, "step": 3256 }, { "epoch": 0.33, "grad_norm": 1.4338536433313815, "learning_rate": 1.5604910669725066e-05, "loss": 0.7252, "step": 3257 }, { "epoch": 0.33, "grad_norm": 1.590164273276705, "learning_rate": 1.5602182027893475e-05, "loss": 0.8045, "step": 3258 }, { "epoch": 0.33, "grad_norm": 1.7041979020864662, "learning_rate": 1.5599452778032634e-05, "loss": 0.689, "step": 3259 }, { "epoch": 0.33, "grad_norm": 1.4874237126040357, "learning_rate": 1.559672292043876e-05, "loss": 0.7374, "step": 3260 }, { "epoch": 0.33, "grad_norm": 1.4925645152044493, "learning_rate": 1.5593992455408136e-05, "loss": 0.6849, "step": 3261 }, { "epoch": 0.33, "grad_norm": 1.5962062522842229, "learning_rate": 1.559126138323711e-05, "loss": 0.8662, "step": 3262 }, { "epoch": 0.33, "grad_norm": 1.4143071400598322, "learning_rate": 1.55885297042221e-05, "loss": 0.6973, "step": 3263 }, { "epoch": 0.33, "grad_norm": 1.4838263696618768, "learning_rate": 1.5585797418659584e-05, "loss": 0.6286, "step": 3264 }, { "epoch": 0.33, "grad_norm": 1.5525624545464243, "learning_rate": 1.558306452684611e-05, "loss": 0.7891, "step": 3265 }, { "epoch": 0.33, "grad_norm": 1.4785889855698298, "learning_rate": 1.5580331029078294e-05, "loss": 0.7668, "step": 3266 }, { "epoch": 0.33, "grad_norm": 1.5508249032227592, "learning_rate": 1.5577596925652808e-05, "loss": 0.6957, "step": 3267 }, { "epoch": 0.33, "grad_norm": 1.3613392374759203, "learning_rate": 1.5574862216866403e-05, "loss": 0.7151, "step": 3268 }, { "epoch": 0.33, "grad_norm": 1.5786788804197516, "learning_rate": 1.557212690301588e-05, "loss": 0.6864, "step": 3269 }, { "epoch": 0.33, "grad_norm": 1.681522707673013, "learning_rate": 1.5569390984398127e-05, "loss": 0.794, "step": 3270 }, { "epoch": 0.33, "grad_norm": 1.682966125140333, "learning_rate": 1.5566654461310073e-05, "loss": 0.7273, "step": 3271 }, { "epoch": 0.33, "grad_norm": 1.5554113078407963, "learning_rate": 1.556391733404873e-05, "loss": 0.7791, "step": 3272 }, { "epoch": 0.33, "grad_norm": 1.6006383408949836, "learning_rate": 1.5561179602911173e-05, "loss": 0.7161, "step": 3273 }, { "epoch": 0.33, "grad_norm": 1.5815849883781743, "learning_rate": 1.5558441268194537e-05, "loss": 0.6785, "step": 3274 }, { "epoch": 0.33, "grad_norm": 1.4786161669319569, "learning_rate": 1.5555702330196024e-05, "loss": 0.6685, "step": 3275 }, { "epoch": 0.33, "grad_norm": 1.5620239060191075, "learning_rate": 1.5552962789212904e-05, "loss": 0.7152, "step": 3276 }, { "epoch": 0.33, "grad_norm": 1.4996886244762353, "learning_rate": 1.5550222645542517e-05, "loss": 0.7554, "step": 3277 }, { "epoch": 0.33, "grad_norm": 1.6078257952546522, "learning_rate": 1.5547481899482252e-05, "loss": 0.7861, "step": 3278 }, { "epoch": 0.33, "grad_norm": 1.5363310435057185, "learning_rate": 1.554474055132958e-05, "loss": 0.7803, "step": 3279 }, { "epoch": 0.33, "grad_norm": 1.5009334366953122, "learning_rate": 1.5541998601382035e-05, "loss": 0.7894, "step": 3280 }, { "epoch": 0.33, "grad_norm": 1.5874154896475607, "learning_rate": 1.5539256049937205e-05, "loss": 0.6396, "step": 3281 }, { "epoch": 0.33, "grad_norm": 1.5775813348209935, "learning_rate": 1.5536512897292757e-05, "loss": 0.7288, "step": 3282 }, { "epoch": 0.33, "grad_norm": 1.6513279054317311, "learning_rate": 1.5533769143746416e-05, "loss": 0.7524, "step": 3283 }, { "epoch": 0.33, "grad_norm": 1.3792290072180375, "learning_rate": 1.5531024789595968e-05, "loss": 0.6658, "step": 3284 }, { "epoch": 0.33, "grad_norm": 1.4989661405348915, "learning_rate": 1.552827983513928e-05, "loss": 0.7818, "step": 3285 }, { "epoch": 0.33, "grad_norm": 1.4834997754072567, "learning_rate": 1.552553428067427e-05, "loss": 0.8217, "step": 3286 }, { "epoch": 0.33, "grad_norm": 1.584344077792234, "learning_rate": 1.5522788126498916e-05, "loss": 0.8188, "step": 3287 }, { "epoch": 0.33, "grad_norm": 1.4302097903103494, "learning_rate": 1.5520041372911286e-05, "loss": 0.7642, "step": 3288 }, { "epoch": 0.33, "grad_norm": 1.6986211356952792, "learning_rate": 1.5517294020209483e-05, "loss": 0.796, "step": 3289 }, { "epoch": 0.33, "grad_norm": 1.670903396086338, "learning_rate": 1.5514546068691697e-05, "loss": 0.8377, "step": 3290 }, { "epoch": 0.33, "grad_norm": 1.6445195419654928, "learning_rate": 1.5511797518656174e-05, "loss": 0.868, "step": 3291 }, { "epoch": 0.33, "grad_norm": 1.6439578769044951, "learning_rate": 1.5509048370401224e-05, "loss": 0.7765, "step": 3292 }, { "epoch": 0.33, "grad_norm": 1.608729331314093, "learning_rate": 1.5506298624225225e-05, "loss": 0.7792, "step": 3293 }, { "epoch": 0.34, "grad_norm": 1.5707601205056936, "learning_rate": 1.550354828042662e-05, "loss": 0.7926, "step": 3294 }, { "epoch": 0.34, "grad_norm": 1.493563824960474, "learning_rate": 1.5500797339303913e-05, "loss": 0.7154, "step": 3295 }, { "epoch": 0.34, "grad_norm": 1.5552815430878002, "learning_rate": 1.549804580115568e-05, "loss": 0.647, "step": 3296 }, { "epoch": 0.34, "grad_norm": 1.3313103567796118, "learning_rate": 1.5495293666280556e-05, "loss": 0.7092, "step": 3297 }, { "epoch": 0.34, "grad_norm": 1.8987395905196398, "learning_rate": 1.549254093497724e-05, "loss": 0.8331, "step": 3298 }, { "epoch": 0.34, "grad_norm": 1.6075348752193146, "learning_rate": 1.5489787607544498e-05, "loss": 0.8554, "step": 3299 }, { "epoch": 0.34, "grad_norm": 1.6698223611640848, "learning_rate": 1.5487033684281163e-05, "loss": 0.7023, "step": 3300 }, { "epoch": 0.34, "grad_norm": 1.5546756934383337, "learning_rate": 1.548427916548613e-05, "loss": 0.882, "step": 3301 }, { "epoch": 0.34, "grad_norm": 1.5788479756963842, "learning_rate": 1.5481524051458356e-05, "loss": 0.6948, "step": 3302 }, { "epoch": 0.34, "grad_norm": 1.5474807971285485, "learning_rate": 1.5478768342496872e-05, "loss": 0.8409, "step": 3303 }, { "epoch": 0.34, "grad_norm": 1.6709675283696996, "learning_rate": 1.547601203890076e-05, "loss": 0.7155, "step": 3304 }, { "epoch": 0.34, "grad_norm": 1.5105573645817394, "learning_rate": 1.5473255140969176e-05, "loss": 0.6846, "step": 3305 }, { "epoch": 0.34, "grad_norm": 1.5219539065398735, "learning_rate": 1.5470497649001336e-05, "loss": 0.7485, "step": 3306 }, { "epoch": 0.34, "grad_norm": 1.5137795824059195, "learning_rate": 1.5467739563296528e-05, "loss": 0.8512, "step": 3307 }, { "epoch": 0.34, "grad_norm": 1.5675663536182731, "learning_rate": 1.54649808841541e-05, "loss": 0.8345, "step": 3308 }, { "epoch": 0.34, "grad_norm": 1.583690197417576, "learning_rate": 1.546222161187345e-05, "loss": 0.7969, "step": 3309 }, { "epoch": 0.34, "grad_norm": 1.4902741507582222, "learning_rate": 1.5459461746754075e-05, "loss": 0.6601, "step": 3310 }, { "epoch": 0.34, "grad_norm": 1.5923296984248694, "learning_rate": 1.5456701289095496e-05, "loss": 0.7259, "step": 3311 }, { "epoch": 0.34, "grad_norm": 1.569763023547656, "learning_rate": 1.5453940239197328e-05, "loss": 0.7625, "step": 3312 }, { "epoch": 0.34, "grad_norm": 1.588716506288604, "learning_rate": 1.5451178597359237e-05, "loss": 0.7954, "step": 3313 }, { "epoch": 0.34, "grad_norm": 1.458630633504389, "learning_rate": 1.5448416363880956e-05, "loss": 0.7124, "step": 3314 }, { "epoch": 0.34, "grad_norm": 1.4807992133698338, "learning_rate": 1.5445653539062283e-05, "loss": 0.8119, "step": 3315 }, { "epoch": 0.34, "grad_norm": 1.6550269578144803, "learning_rate": 1.5442890123203077e-05, "loss": 0.8211, "step": 3316 }, { "epoch": 0.34, "grad_norm": 1.6472584346267662, "learning_rate": 1.544012611660327e-05, "loss": 0.7719, "step": 3317 }, { "epoch": 0.34, "grad_norm": 1.6462001997415807, "learning_rate": 1.5437361519562843e-05, "loss": 0.9011, "step": 3318 }, { "epoch": 0.34, "grad_norm": 1.6298769830925262, "learning_rate": 1.5434596332381853e-05, "loss": 0.8106, "step": 3319 }, { "epoch": 0.34, "grad_norm": 1.6925339646508504, "learning_rate": 1.543183055536042e-05, "loss": 0.7336, "step": 3320 }, { "epoch": 0.34, "grad_norm": 1.5500185607465797, "learning_rate": 1.5429064188798727e-05, "loss": 0.6836, "step": 3321 }, { "epoch": 0.34, "grad_norm": 1.6311566638530315, "learning_rate": 1.5426297232997016e-05, "loss": 0.6382, "step": 3322 }, { "epoch": 0.34, "grad_norm": 1.482678790650085, "learning_rate": 1.5423529688255593e-05, "loss": 0.7339, "step": 3323 }, { "epoch": 0.34, "grad_norm": 1.4653855741666746, "learning_rate": 1.542076155487484e-05, "loss": 0.8448, "step": 3324 }, { "epoch": 0.34, "grad_norm": 1.428470162563532, "learning_rate": 1.54179928331552e-05, "loss": 0.7418, "step": 3325 }, { "epoch": 0.34, "grad_norm": 1.5905801823778656, "learning_rate": 1.5415223523397153e-05, "loss": 0.7248, "step": 3326 }, { "epoch": 0.34, "grad_norm": 1.7581095189115765, "learning_rate": 1.541245362590128e-05, "loss": 0.7834, "step": 3327 }, { "epoch": 0.34, "grad_norm": 1.5621095736065527, "learning_rate": 1.5409683140968213e-05, "loss": 0.7208, "step": 3328 }, { "epoch": 0.34, "grad_norm": 1.7278372548733352, "learning_rate": 1.540691206889864e-05, "loss": 0.6603, "step": 3329 }, { "epoch": 0.34, "grad_norm": 1.4516693914835002, "learning_rate": 1.540414040999331e-05, "loss": 0.76, "step": 3330 }, { "epoch": 0.34, "grad_norm": 1.4918138555527303, "learning_rate": 1.5401368164553054e-05, "loss": 0.7142, "step": 3331 }, { "epoch": 0.34, "grad_norm": 1.567648550614192, "learning_rate": 1.539859533287875e-05, "loss": 0.6528, "step": 3332 }, { "epoch": 0.34, "grad_norm": 1.5803270851499127, "learning_rate": 1.5395821915271344e-05, "loss": 0.7476, "step": 3333 }, { "epoch": 0.34, "grad_norm": 1.5112348972644198, "learning_rate": 1.539304791203186e-05, "loss": 0.7673, "step": 3334 }, { "epoch": 0.34, "grad_norm": 1.6631107341011613, "learning_rate": 1.5390273323461354e-05, "loss": 0.7598, "step": 3335 }, { "epoch": 0.34, "grad_norm": 1.656394575426843, "learning_rate": 1.5387498149860975e-05, "loss": 0.8525, "step": 3336 }, { "epoch": 0.34, "grad_norm": 1.4690253202750414, "learning_rate": 1.5384722391531922e-05, "loss": 0.6093, "step": 3337 }, { "epoch": 0.34, "grad_norm": 1.6346574420144158, "learning_rate": 1.5381946048775462e-05, "loss": 0.7501, "step": 3338 }, { "epoch": 0.34, "grad_norm": 1.446909212096057, "learning_rate": 1.5379169121892925e-05, "loss": 0.7256, "step": 3339 }, { "epoch": 0.34, "grad_norm": 1.4950797036537102, "learning_rate": 1.5376391611185703e-05, "loss": 0.6672, "step": 3340 }, { "epoch": 0.34, "grad_norm": 1.6664378639633073, "learning_rate": 1.5373613516955243e-05, "loss": 0.7275, "step": 3341 }, { "epoch": 0.34, "grad_norm": 1.5257888571405214, "learning_rate": 1.537083483950307e-05, "loss": 0.7199, "step": 3342 }, { "epoch": 0.34, "grad_norm": 1.484698622122737, "learning_rate": 1.5368055579130768e-05, "loss": 0.7321, "step": 3343 }, { "epoch": 0.34, "grad_norm": 1.6038707286398843, "learning_rate": 1.5365275736139978e-05, "loss": 0.7752, "step": 3344 }, { "epoch": 0.34, "grad_norm": 1.484225545623433, "learning_rate": 1.536249531083241e-05, "loss": 0.655, "step": 3345 }, { "epoch": 0.34, "grad_norm": 1.4082424654428607, "learning_rate": 1.5359714303509838e-05, "loss": 0.7996, "step": 3346 }, { "epoch": 0.34, "grad_norm": 1.6178201732198476, "learning_rate": 1.5356932714474096e-05, "loss": 0.7231, "step": 3347 }, { "epoch": 0.34, "grad_norm": 1.5148520011513111, "learning_rate": 1.535415054402708e-05, "loss": 0.7029, "step": 3348 }, { "epoch": 0.34, "grad_norm": 1.6893975573942526, "learning_rate": 1.535136779247075e-05, "loss": 0.7314, "step": 3349 }, { "epoch": 0.34, "grad_norm": 1.5313278686857816, "learning_rate": 1.5348584460107132e-05, "loss": 0.8194, "step": 3350 }, { "epoch": 0.34, "grad_norm": 1.6152196301716772, "learning_rate": 1.5345800547238315e-05, "loss": 0.7202, "step": 3351 }, { "epoch": 0.34, "grad_norm": 1.3996437024777064, "learning_rate": 1.534301605416645e-05, "loss": 0.7516, "step": 3352 }, { "epoch": 0.34, "grad_norm": 1.3860359296983389, "learning_rate": 1.5340230981193745e-05, "loss": 0.6924, "step": 3353 }, { "epoch": 0.34, "grad_norm": 1.5472239021599388, "learning_rate": 1.5337445328622478e-05, "loss": 0.6571, "step": 3354 }, { "epoch": 0.34, "grad_norm": 1.6599870981410105, "learning_rate": 1.533465909675499e-05, "loss": 0.7952, "step": 3355 }, { "epoch": 0.34, "grad_norm": 1.5075921361400568, "learning_rate": 1.5331872285893682e-05, "loss": 0.6703, "step": 3356 }, { "epoch": 0.34, "grad_norm": 1.4520303260348506, "learning_rate": 1.5329084896341017e-05, "loss": 0.6917, "step": 3357 }, { "epoch": 0.34, "grad_norm": 1.509243271112511, "learning_rate": 1.5326296928399527e-05, "loss": 0.7536, "step": 3358 }, { "epoch": 0.34, "grad_norm": 1.4780434353646639, "learning_rate": 1.5323508382371796e-05, "loss": 0.8208, "step": 3359 }, { "epoch": 0.34, "grad_norm": 1.7156394695200816, "learning_rate": 1.5320719258560482e-05, "loss": 0.6873, "step": 3360 }, { "epoch": 0.34, "grad_norm": 1.569804942520179, "learning_rate": 1.53179295572683e-05, "loss": 0.6311, "step": 3361 }, { "epoch": 0.34, "grad_norm": 1.43936954024699, "learning_rate": 1.5315139278798027e-05, "loss": 0.7471, "step": 3362 }, { "epoch": 0.34, "grad_norm": 1.531157207106405, "learning_rate": 1.5312348423452506e-05, "loss": 0.7707, "step": 3363 }, { "epoch": 0.34, "grad_norm": 1.6599684182504466, "learning_rate": 1.5309556991534636e-05, "loss": 0.8001, "step": 3364 }, { "epoch": 0.34, "grad_norm": 1.7921342018549509, "learning_rate": 1.530676498334739e-05, "loss": 0.7317, "step": 3365 }, { "epoch": 0.34, "grad_norm": 1.6102687290223747, "learning_rate": 1.5303972399193795e-05, "loss": 0.8126, "step": 3366 }, { "epoch": 0.34, "grad_norm": 1.7942876571579665, "learning_rate": 1.5301179239376936e-05, "loss": 0.8059, "step": 3367 }, { "epoch": 0.34, "grad_norm": 1.5295088337332308, "learning_rate": 1.5298385504199975e-05, "loss": 0.6891, "step": 3368 }, { "epoch": 0.34, "grad_norm": 1.5894517073762104, "learning_rate": 1.5295591193966125e-05, "loss": 0.8017, "step": 3369 }, { "epoch": 0.34, "grad_norm": 1.5526743477678926, "learning_rate": 1.5292796308978662e-05, "loss": 0.6994, "step": 3370 }, { "epoch": 0.34, "grad_norm": 1.5052162467039785, "learning_rate": 1.529000084954093e-05, "loss": 0.7152, "step": 3371 }, { "epoch": 0.34, "grad_norm": 1.6780524182232355, "learning_rate": 1.5287204815956334e-05, "loss": 0.7928, "step": 3372 }, { "epoch": 0.34, "grad_norm": 1.683032929660594, "learning_rate": 1.5284408208528342e-05, "loss": 0.755, "step": 3373 }, { "epoch": 0.34, "grad_norm": 1.6660135300486254, "learning_rate": 1.5281611027560473e-05, "loss": 0.697, "step": 3374 }, { "epoch": 0.34, "grad_norm": 1.5766118490644017, "learning_rate": 1.527881327335632e-05, "loss": 0.6787, "step": 3375 }, { "epoch": 0.34, "grad_norm": 1.6779578367810388, "learning_rate": 1.5276014946219546e-05, "loss": 0.919, "step": 3376 }, { "epoch": 0.34, "grad_norm": 1.6525866035261925, "learning_rate": 1.5273216046453847e-05, "loss": 0.8289, "step": 3377 }, { "epoch": 0.34, "grad_norm": 1.6010039332657084, "learning_rate": 1.5270416574363015e-05, "loss": 0.7289, "step": 3378 }, { "epoch": 0.34, "grad_norm": 1.7230029616329539, "learning_rate": 1.5267616530250885e-05, "loss": 0.7253, "step": 3379 }, { "epoch": 0.34, "grad_norm": 1.4597544106926883, "learning_rate": 1.5264815914421355e-05, "loss": 0.6624, "step": 3380 }, { "epoch": 0.34, "grad_norm": 1.6520878222696886, "learning_rate": 1.526201472717839e-05, "loss": 0.7278, "step": 3381 }, { "epoch": 0.34, "grad_norm": 1.455043108180653, "learning_rate": 1.5259212968826013e-05, "loss": 0.7888, "step": 3382 }, { "epoch": 0.34, "grad_norm": 1.4135719140088696, "learning_rate": 1.5256410639668317e-05, "loss": 0.6792, "step": 3383 }, { "epoch": 0.34, "grad_norm": 1.5148945144636952, "learning_rate": 1.5253607740009447e-05, "loss": 0.7648, "step": 3384 }, { "epoch": 0.34, "grad_norm": 1.3845562724566336, "learning_rate": 1.5250804270153614e-05, "loss": 0.7052, "step": 3385 }, { "epoch": 0.34, "grad_norm": 1.4782533714227502, "learning_rate": 1.5248000230405086e-05, "loss": 0.8368, "step": 3386 }, { "epoch": 0.34, "grad_norm": 1.6029221711674988, "learning_rate": 1.5245195621068207e-05, "loss": 0.8218, "step": 3387 }, { "epoch": 0.34, "grad_norm": 1.7594558175947395, "learning_rate": 1.524239044244737e-05, "loss": 0.8015, "step": 3388 }, { "epoch": 0.34, "grad_norm": 1.5221214843094437, "learning_rate": 1.5239584694847032e-05, "loss": 0.6736, "step": 3389 }, { "epoch": 0.34, "grad_norm": 1.6120502289852638, "learning_rate": 1.523677837857171e-05, "loss": 0.7201, "step": 3390 }, { "epoch": 0.34, "grad_norm": 1.6352747168282848, "learning_rate": 1.523397149392599e-05, "loss": 0.7127, "step": 3391 }, { "epoch": 0.35, "grad_norm": 1.5988918051963217, "learning_rate": 1.5231164041214515e-05, "loss": 0.868, "step": 3392 }, { "epoch": 0.35, "grad_norm": 1.5813185864787247, "learning_rate": 1.5228356020741991e-05, "loss": 0.867, "step": 3393 }, { "epoch": 0.35, "grad_norm": 1.5204761871068275, "learning_rate": 1.5225547432813184e-05, "loss": 0.7359, "step": 3394 }, { "epoch": 0.35, "grad_norm": 1.4075043465821404, "learning_rate": 1.5222738277732918e-05, "loss": 0.7087, "step": 3395 }, { "epoch": 0.35, "grad_norm": 1.5446258138687168, "learning_rate": 1.5219928555806087e-05, "loss": 0.7042, "step": 3396 }, { "epoch": 0.35, "grad_norm": 1.4503770486789294, "learning_rate": 1.5217118267337645e-05, "loss": 0.6952, "step": 3397 }, { "epoch": 0.35, "grad_norm": 1.5964787717962494, "learning_rate": 1.52143074126326e-05, "loss": 0.709, "step": 3398 }, { "epoch": 0.35, "grad_norm": 1.6755609053141203, "learning_rate": 1.5211495991996029e-05, "loss": 0.798, "step": 3399 }, { "epoch": 0.35, "grad_norm": 1.7113459909179223, "learning_rate": 1.5208684005733063e-05, "loss": 0.7412, "step": 3400 }, { "epoch": 0.35, "grad_norm": 1.4563857410337533, "learning_rate": 1.5205871454148909e-05, "loss": 0.716, "step": 3401 }, { "epoch": 0.35, "grad_norm": 1.6121262428835532, "learning_rate": 1.5203058337548813e-05, "loss": 0.7882, "step": 3402 }, { "epoch": 0.35, "grad_norm": 1.6361418926139473, "learning_rate": 1.5200244656238106e-05, "loss": 0.7199, "step": 3403 }, { "epoch": 0.35, "grad_norm": 1.6940134297858211, "learning_rate": 1.519743041052216e-05, "loss": 0.7211, "step": 3404 }, { "epoch": 0.35, "grad_norm": 1.51968529644561, "learning_rate": 1.5194615600706426e-05, "loss": 0.7086, "step": 3405 }, { "epoch": 0.35, "grad_norm": 1.4702030040840666, "learning_rate": 1.51918002270964e-05, "loss": 0.6855, "step": 3406 }, { "epoch": 0.35, "grad_norm": 1.454815808229764, "learning_rate": 1.5188984289997653e-05, "loss": 0.6909, "step": 3407 }, { "epoch": 0.35, "grad_norm": 1.5142798550349588, "learning_rate": 1.5186167789715805e-05, "loss": 0.7542, "step": 3408 }, { "epoch": 0.35, "grad_norm": 1.3422009964047386, "learning_rate": 1.5183350726556545e-05, "loss": 0.747, "step": 3409 }, { "epoch": 0.35, "grad_norm": 1.3183729374682407, "learning_rate": 1.5180533100825625e-05, "loss": 0.7534, "step": 3410 }, { "epoch": 0.35, "grad_norm": 1.6271445029553226, "learning_rate": 1.5177714912828849e-05, "loss": 0.8038, "step": 3411 }, { "epoch": 0.35, "grad_norm": 1.4796260691101912, "learning_rate": 1.5174896162872093e-05, "loss": 0.748, "step": 3412 }, { "epoch": 0.35, "grad_norm": 1.4917898706741968, "learning_rate": 1.5172076851261284e-05, "loss": 0.7499, "step": 3413 }, { "epoch": 0.35, "grad_norm": 1.4965042302274694, "learning_rate": 1.5169256978302414e-05, "loss": 0.7906, "step": 3414 }, { "epoch": 0.35, "grad_norm": 1.5864985396719788, "learning_rate": 1.5166436544301537e-05, "loss": 0.7249, "step": 3415 }, { "epoch": 0.35, "grad_norm": 1.60300863633716, "learning_rate": 1.5163615549564767e-05, "loss": 0.848, "step": 3416 }, { "epoch": 0.35, "grad_norm": 1.522315570698071, "learning_rate": 1.5160793994398279e-05, "loss": 0.7615, "step": 3417 }, { "epoch": 0.35, "grad_norm": 1.638295121816351, "learning_rate": 1.5157971879108312e-05, "loss": 0.7377, "step": 3418 }, { "epoch": 0.35, "grad_norm": 1.6293378893887394, "learning_rate": 1.5155149204001154e-05, "loss": 0.8073, "step": 3419 }, { "epoch": 0.35, "grad_norm": 1.6763894967757471, "learning_rate": 1.5152325969383173e-05, "loss": 0.6717, "step": 3420 }, { "epoch": 0.35, "grad_norm": 1.6242355549321144, "learning_rate": 1.5149502175560777e-05, "loss": 0.7134, "step": 3421 }, { "epoch": 0.35, "grad_norm": 1.4804791823585846, "learning_rate": 1.514667782284045e-05, "loss": 0.6685, "step": 3422 }, { "epoch": 0.35, "grad_norm": 1.3690241377962513, "learning_rate": 1.5143852911528729e-05, "loss": 0.6693, "step": 3423 }, { "epoch": 0.35, "grad_norm": 1.44855052107555, "learning_rate": 1.5141027441932217e-05, "loss": 0.7307, "step": 3424 }, { "epoch": 0.35, "grad_norm": 1.7367703094119369, "learning_rate": 1.5138201414357574e-05, "loss": 0.6859, "step": 3425 }, { "epoch": 0.35, "grad_norm": 1.680051274739284, "learning_rate": 1.513537482911152e-05, "loss": 0.8612, "step": 3426 }, { "epoch": 0.35, "grad_norm": 1.678594781233372, "learning_rate": 1.5132547686500834e-05, "loss": 0.7895, "step": 3427 }, { "epoch": 0.35, "grad_norm": 1.5156833085619492, "learning_rate": 1.5129719986832363e-05, "loss": 0.7615, "step": 3428 }, { "epoch": 0.35, "grad_norm": 1.5973240474554833, "learning_rate": 1.5126891730413005e-05, "loss": 0.7608, "step": 3429 }, { "epoch": 0.35, "grad_norm": 1.5179787385027927, "learning_rate": 1.5124062917549728e-05, "loss": 0.7311, "step": 3430 }, { "epoch": 0.35, "grad_norm": 1.4531619208786193, "learning_rate": 1.512123354854955e-05, "loss": 0.7504, "step": 3431 }, { "epoch": 0.35, "grad_norm": 1.5130580888869969, "learning_rate": 1.511840362371956e-05, "loss": 0.6863, "step": 3432 }, { "epoch": 0.35, "grad_norm": 1.4289391942985183, "learning_rate": 1.51155731433669e-05, "loss": 0.7857, "step": 3433 }, { "epoch": 0.35, "grad_norm": 1.5482410360919439, "learning_rate": 1.511274210779877e-05, "loss": 0.7745, "step": 3434 }, { "epoch": 0.35, "grad_norm": 1.6338364710983555, "learning_rate": 1.5109910517322442e-05, "loss": 0.7405, "step": 3435 }, { "epoch": 0.35, "grad_norm": 1.5065075264891135, "learning_rate": 1.5107078372245237e-05, "loss": 0.6536, "step": 3436 }, { "epoch": 0.35, "grad_norm": 1.561399673765195, "learning_rate": 1.5104245672874538e-05, "loss": 0.7642, "step": 3437 }, { "epoch": 0.35, "grad_norm": 1.516983977202264, "learning_rate": 1.5101412419517796e-05, "loss": 0.7723, "step": 3438 }, { "epoch": 0.35, "grad_norm": 1.5629150934760518, "learning_rate": 1.509857861248251e-05, "loss": 0.7311, "step": 3439 }, { "epoch": 0.35, "grad_norm": 1.642574367936479, "learning_rate": 1.509574425207625e-05, "loss": 0.7182, "step": 3440 }, { "epoch": 0.35, "grad_norm": 1.57635743355489, "learning_rate": 1.509290933860664e-05, "loss": 0.7569, "step": 3441 }, { "epoch": 0.35, "grad_norm": 1.541793303046407, "learning_rate": 1.5090073872381363e-05, "loss": 0.7027, "step": 3442 }, { "epoch": 0.35, "grad_norm": 1.5103972648127917, "learning_rate": 1.5087237853708171e-05, "loss": 0.7425, "step": 3443 }, { "epoch": 0.35, "grad_norm": 1.4817589947354464, "learning_rate": 1.5084401282894864e-05, "loss": 0.7552, "step": 3444 }, { "epoch": 0.35, "grad_norm": 1.542431128929272, "learning_rate": 1.5081564160249307e-05, "loss": 0.8015, "step": 3445 }, { "epoch": 0.35, "grad_norm": 1.5698521794455202, "learning_rate": 1.507872648607943e-05, "loss": 0.6561, "step": 3446 }, { "epoch": 0.35, "grad_norm": 1.4648673126928915, "learning_rate": 1.5075888260693214e-05, "loss": 0.7357, "step": 3447 }, { "epoch": 0.35, "grad_norm": 1.4953230949375165, "learning_rate": 1.5073049484398707e-05, "loss": 0.7219, "step": 3448 }, { "epoch": 0.35, "grad_norm": 1.497409323484993, "learning_rate": 1.5070210157504009e-05, "loss": 0.7111, "step": 3449 }, { "epoch": 0.35, "grad_norm": 1.4666121459888177, "learning_rate": 1.5067370280317289e-05, "loss": 0.7245, "step": 3450 }, { "epoch": 0.35, "grad_norm": 1.7367927555042613, "learning_rate": 1.506452985314677e-05, "loss": 0.7632, "step": 3451 }, { "epoch": 0.35, "grad_norm": 1.4721834836468148, "learning_rate": 1.5061688876300738e-05, "loss": 0.7348, "step": 3452 }, { "epoch": 0.35, "grad_norm": 1.584192423035963, "learning_rate": 1.505884735008753e-05, "loss": 0.739, "step": 3453 }, { "epoch": 0.35, "grad_norm": 1.5366616006137925, "learning_rate": 1.5056005274815557e-05, "loss": 0.7103, "step": 3454 }, { "epoch": 0.35, "grad_norm": 1.5065621493020562, "learning_rate": 1.5053162650793277e-05, "loss": 0.8539, "step": 3455 }, { "epoch": 0.35, "grad_norm": 1.5153782193173004, "learning_rate": 1.5050319478329213e-05, "loss": 0.7514, "step": 3456 }, { "epoch": 0.35, "grad_norm": 1.37023829246634, "learning_rate": 1.504747575773195e-05, "loss": 0.7087, "step": 3457 }, { "epoch": 0.35, "grad_norm": 1.5598410742579687, "learning_rate": 1.5044631489310127e-05, "loss": 0.6599, "step": 3458 }, { "epoch": 0.35, "grad_norm": 1.4924135931050373, "learning_rate": 1.5041786673372445e-05, "loss": 0.7987, "step": 3459 }, { "epoch": 0.35, "grad_norm": 1.4892305193365434, "learning_rate": 1.5038941310227663e-05, "loss": 0.7199, "step": 3460 }, { "epoch": 0.35, "grad_norm": 1.4600408002651917, "learning_rate": 1.5036095400184603e-05, "loss": 0.7402, "step": 3461 }, { "epoch": 0.35, "grad_norm": 1.489791988488241, "learning_rate": 1.5033248943552141e-05, "loss": 0.6495, "step": 3462 }, { "epoch": 0.35, "grad_norm": 1.5042633182167775, "learning_rate": 1.5030401940639221e-05, "loss": 0.7515, "step": 3463 }, { "epoch": 0.35, "grad_norm": 1.608923764011562, "learning_rate": 1.5027554391754838e-05, "loss": 0.7703, "step": 3464 }, { "epoch": 0.35, "grad_norm": 1.4888772998343753, "learning_rate": 1.5024706297208042e-05, "loss": 0.6357, "step": 3465 }, { "epoch": 0.35, "grad_norm": 1.3819111163671756, "learning_rate": 1.5021857657307957e-05, "loss": 0.6705, "step": 3466 }, { "epoch": 0.35, "grad_norm": 1.5237889483123492, "learning_rate": 1.5019008472363759e-05, "loss": 0.7462, "step": 3467 }, { "epoch": 0.35, "grad_norm": 1.5342490853257287, "learning_rate": 1.5016158742684677e-05, "loss": 0.6554, "step": 3468 }, { "epoch": 0.35, "grad_norm": 1.7384068499426892, "learning_rate": 1.501330846858001e-05, "loss": 0.8208, "step": 3469 }, { "epoch": 0.35, "grad_norm": 1.5652607294497758, "learning_rate": 1.5010457650359107e-05, "loss": 0.7814, "step": 3470 }, { "epoch": 0.35, "grad_norm": 1.6478352433592374, "learning_rate": 1.5007606288331382e-05, "loss": 0.7972, "step": 3471 }, { "epoch": 0.35, "grad_norm": 1.634555272718592, "learning_rate": 1.50047543828063e-05, "loss": 0.6908, "step": 3472 }, { "epoch": 0.35, "grad_norm": 1.5720221466031237, "learning_rate": 1.5001901934093401e-05, "loss": 0.7597, "step": 3473 }, { "epoch": 0.35, "grad_norm": 2.5344693193411723, "learning_rate": 1.4999048942502266e-05, "loss": 0.6344, "step": 3474 }, { "epoch": 0.35, "grad_norm": 1.4777960019305205, "learning_rate": 1.4996195408342544e-05, "loss": 0.693, "step": 3475 }, { "epoch": 0.35, "grad_norm": 1.7370996677258854, "learning_rate": 1.4993341331923941e-05, "loss": 0.7798, "step": 3476 }, { "epoch": 0.35, "grad_norm": 1.5785841948824484, "learning_rate": 1.4990486713556225e-05, "loss": 0.7868, "step": 3477 }, { "epoch": 0.35, "grad_norm": 1.6036574659873122, "learning_rate": 1.498763155354922e-05, "loss": 0.7637, "step": 3478 }, { "epoch": 0.35, "grad_norm": 1.7302510182696837, "learning_rate": 1.4984775852212807e-05, "loss": 0.7649, "step": 3479 }, { "epoch": 0.35, "grad_norm": 1.664703370876807, "learning_rate": 1.4981919609856927e-05, "loss": 0.7293, "step": 3480 }, { "epoch": 0.35, "grad_norm": 1.649517852767688, "learning_rate": 1.4979062826791584e-05, "loss": 0.7203, "step": 3481 }, { "epoch": 0.35, "grad_norm": 1.4861988697765522, "learning_rate": 1.4976205503326833e-05, "loss": 0.7522, "step": 3482 }, { "epoch": 0.35, "grad_norm": 1.536728007961462, "learning_rate": 1.4973347639772796e-05, "loss": 0.6858, "step": 3483 }, { "epoch": 0.35, "grad_norm": 1.3665924982565034, "learning_rate": 1.4970489236439645e-05, "loss": 0.7136, "step": 3484 }, { "epoch": 0.35, "grad_norm": 1.7127713453643445, "learning_rate": 1.4967630293637618e-05, "loss": 0.7941, "step": 3485 }, { "epoch": 0.35, "grad_norm": 1.523515736800205, "learning_rate": 1.4964770811677007e-05, "loss": 0.7964, "step": 3486 }, { "epoch": 0.35, "grad_norm": 1.906325251267563, "learning_rate": 1.4961910790868167e-05, "loss": 0.7163, "step": 3487 }, { "epoch": 0.35, "grad_norm": 1.4949145245196809, "learning_rate": 1.4959050231521503e-05, "loss": 0.7391, "step": 3488 }, { "epoch": 0.35, "grad_norm": 1.4994096945124373, "learning_rate": 1.4956189133947492e-05, "loss": 0.664, "step": 3489 }, { "epoch": 0.35, "grad_norm": 1.5604623367643393, "learning_rate": 1.4953327498456652e-05, "loss": 0.7454, "step": 3490 }, { "epoch": 0.36, "grad_norm": 1.4918625748558811, "learning_rate": 1.4950465325359573e-05, "loss": 0.8073, "step": 3491 }, { "epoch": 0.36, "grad_norm": 1.4786728621419427, "learning_rate": 1.4947602614966904e-05, "loss": 0.6621, "step": 3492 }, { "epoch": 0.36, "grad_norm": 1.5629535108757933, "learning_rate": 1.494473936758934e-05, "loss": 0.8043, "step": 3493 }, { "epoch": 0.36, "grad_norm": 1.591950029518263, "learning_rate": 1.4941875583537643e-05, "loss": 0.705, "step": 3494 }, { "epoch": 0.36, "grad_norm": 1.5452538099171806, "learning_rate": 1.4939011263122635e-05, "loss": 0.6806, "step": 3495 }, { "epoch": 0.36, "grad_norm": 1.5899349971218648, "learning_rate": 1.4936146406655194e-05, "loss": 0.8044, "step": 3496 }, { "epoch": 0.36, "grad_norm": 1.5583774506657113, "learning_rate": 1.4933281014446253e-05, "loss": 0.7003, "step": 3497 }, { "epoch": 0.36, "grad_norm": 1.6291525500407988, "learning_rate": 1.4930415086806806e-05, "loss": 0.7825, "step": 3498 }, { "epoch": 0.36, "grad_norm": 1.845175889444008, "learning_rate": 1.49275486240479e-05, "loss": 0.8185, "step": 3499 }, { "epoch": 0.36, "grad_norm": 1.5155979978094534, "learning_rate": 1.4924681626480654e-05, "loss": 0.7676, "step": 3500 }, { "epoch": 0.36, "grad_norm": 1.5870897903048031, "learning_rate": 1.492181409441623e-05, "loss": 0.7441, "step": 3501 }, { "epoch": 0.36, "grad_norm": 1.629353091866416, "learning_rate": 1.4918946028165854e-05, "loss": 0.7711, "step": 3502 }, { "epoch": 0.36, "grad_norm": 1.418929425121082, "learning_rate": 1.4916077428040812e-05, "loss": 0.6599, "step": 3503 }, { "epoch": 0.36, "grad_norm": 1.78318020605609, "learning_rate": 1.491320829435244e-05, "loss": 0.7912, "step": 3504 }, { "epoch": 0.36, "grad_norm": 1.582679788025773, "learning_rate": 1.4910338627412145e-05, "loss": 0.7817, "step": 3505 }, { "epoch": 0.36, "grad_norm": 1.6395184005956496, "learning_rate": 1.4907468427531378e-05, "loss": 0.7486, "step": 3506 }, { "epoch": 0.36, "grad_norm": 1.595727446077858, "learning_rate": 1.4904597695021661e-05, "loss": 0.7169, "step": 3507 }, { "epoch": 0.36, "grad_norm": 1.7410899348133368, "learning_rate": 1.4901726430194559e-05, "loss": 0.697, "step": 3508 }, { "epoch": 0.36, "grad_norm": 1.6369873661667074, "learning_rate": 1.489885463336171e-05, "loss": 0.7974, "step": 3509 }, { "epoch": 0.36, "grad_norm": 1.6216919985187985, "learning_rate": 1.4895982304834801e-05, "loss": 0.8268, "step": 3510 }, { "epoch": 0.36, "grad_norm": 1.5314031214487973, "learning_rate": 1.4893109444925578e-05, "loss": 0.6685, "step": 3511 }, { "epoch": 0.36, "grad_norm": 1.6248727782910444, "learning_rate": 1.4890236053945843e-05, "loss": 0.7778, "step": 3512 }, { "epoch": 0.36, "grad_norm": 1.4707089643167897, "learning_rate": 1.4887362132207459e-05, "loss": 0.7438, "step": 3513 }, { "epoch": 0.36, "grad_norm": 1.7110333572039853, "learning_rate": 1.4884487680022345e-05, "loss": 0.7783, "step": 3514 }, { "epoch": 0.36, "grad_norm": 1.5233667532722868, "learning_rate": 1.4881612697702478e-05, "loss": 0.8079, "step": 3515 }, { "epoch": 0.36, "grad_norm": 1.6387517929079978, "learning_rate": 1.4878737185559892e-05, "loss": 0.7378, "step": 3516 }, { "epoch": 0.36, "grad_norm": 1.7368485789115762, "learning_rate": 1.4875861143906682e-05, "loss": 0.7348, "step": 3517 }, { "epoch": 0.36, "grad_norm": 1.6443544631394493, "learning_rate": 1.4872984573054993e-05, "loss": 0.8044, "step": 3518 }, { "epoch": 0.36, "grad_norm": 1.6264571228684506, "learning_rate": 1.4870107473317035e-05, "loss": 0.6913, "step": 3519 }, { "epoch": 0.36, "grad_norm": 1.4394243588179805, "learning_rate": 1.4867229845005069e-05, "loss": 0.7788, "step": 3520 }, { "epoch": 0.36, "grad_norm": 1.5243776042754629, "learning_rate": 1.486435168843142e-05, "loss": 0.8644, "step": 3521 }, { "epoch": 0.36, "grad_norm": 1.39511558840125, "learning_rate": 1.4861473003908464e-05, "loss": 0.7615, "step": 3522 }, { "epoch": 0.36, "grad_norm": 1.5237359383852438, "learning_rate": 1.4858593791748636e-05, "loss": 0.7109, "step": 3523 }, { "epoch": 0.36, "grad_norm": 1.4141768796323457, "learning_rate": 1.4855714052264436e-05, "loss": 0.6682, "step": 3524 }, { "epoch": 0.36, "grad_norm": 1.6013691548953137, "learning_rate": 1.4852833785768411e-05, "loss": 0.7491, "step": 3525 }, { "epoch": 0.36, "grad_norm": 1.6514226529816114, "learning_rate": 1.4849952992573164e-05, "loss": 0.7559, "step": 3526 }, { "epoch": 0.36, "grad_norm": 1.6338844754434587, "learning_rate": 1.4847071672991366e-05, "loss": 0.8517, "step": 3527 }, { "epoch": 0.36, "grad_norm": 1.5079211705287743, "learning_rate": 1.4844189827335739e-05, "loss": 0.6482, "step": 3528 }, { "epoch": 0.36, "grad_norm": 1.3343098488815823, "learning_rate": 1.484130745591906e-05, "loss": 0.6869, "step": 3529 }, { "epoch": 0.36, "grad_norm": 1.6681546973582473, "learning_rate": 1.4838424559054167e-05, "loss": 0.7828, "step": 3530 }, { "epoch": 0.36, "grad_norm": 1.7164511219007077, "learning_rate": 1.4835541137053952e-05, "loss": 0.7786, "step": 3531 }, { "epoch": 0.36, "grad_norm": 1.5107616051889503, "learning_rate": 1.4832657190231366e-05, "loss": 0.7285, "step": 3532 }, { "epoch": 0.36, "grad_norm": 1.397482883918753, "learning_rate": 1.4829772718899417e-05, "loss": 0.6134, "step": 3533 }, { "epoch": 0.36, "grad_norm": 1.71295749159559, "learning_rate": 1.4826887723371168e-05, "loss": 0.7616, "step": 3534 }, { "epoch": 0.36, "grad_norm": 1.6666440172844008, "learning_rate": 1.482400220395974e-05, "loss": 0.7533, "step": 3535 }, { "epoch": 0.36, "grad_norm": 1.4688735439647813, "learning_rate": 1.4821116160978313e-05, "loss": 0.7034, "step": 3536 }, { "epoch": 0.36, "grad_norm": 1.6611288071734818, "learning_rate": 1.481822959474012e-05, "loss": 0.7442, "step": 3537 }, { "epoch": 0.36, "grad_norm": 1.434539771228801, "learning_rate": 1.4815342505558454e-05, "loss": 0.7714, "step": 3538 }, { "epoch": 0.36, "grad_norm": 1.838877994694268, "learning_rate": 1.4812454893746662e-05, "loss": 0.8659, "step": 3539 }, { "epoch": 0.36, "grad_norm": 1.6113005589235647, "learning_rate": 1.4809566759618148e-05, "loss": 0.8147, "step": 3540 }, { "epoch": 0.36, "grad_norm": 1.5726735797445943, "learning_rate": 1.4806678103486376e-05, "loss": 0.7031, "step": 3541 }, { "epoch": 0.36, "grad_norm": 1.4865889652910735, "learning_rate": 1.4803788925664867e-05, "loss": 0.7913, "step": 3542 }, { "epoch": 0.36, "grad_norm": 1.603946966940541, "learning_rate": 1.480089922646719e-05, "loss": 0.8, "step": 3543 }, { "epoch": 0.36, "grad_norm": 1.5696449987816368, "learning_rate": 1.4798009006206979e-05, "loss": 0.892, "step": 3544 }, { "epoch": 0.36, "grad_norm": 1.565736236104443, "learning_rate": 1.4795118265197926e-05, "loss": 0.7458, "step": 3545 }, { "epoch": 0.36, "grad_norm": 1.5380747057397648, "learning_rate": 1.479222700375377e-05, "loss": 0.8117, "step": 3546 }, { "epoch": 0.36, "grad_norm": 1.5974693929548376, "learning_rate": 1.4789335222188314e-05, "loss": 0.7076, "step": 3547 }, { "epoch": 0.36, "grad_norm": 1.6337144633545488, "learning_rate": 1.478644292081542e-05, "loss": 0.6971, "step": 3548 }, { "epoch": 0.36, "grad_norm": 1.4854328456568768, "learning_rate": 1.4783550099948995e-05, "loss": 0.6628, "step": 3549 }, { "epoch": 0.36, "grad_norm": 1.3421312597167916, "learning_rate": 1.4780656759903015e-05, "loss": 0.6936, "step": 3550 }, { "epoch": 0.36, "grad_norm": 1.5433572587003273, "learning_rate": 1.4777762900991506e-05, "loss": 0.6593, "step": 3551 }, { "epoch": 0.36, "grad_norm": 1.524569316576266, "learning_rate": 1.4774868523528548e-05, "loss": 0.7604, "step": 3552 }, { "epoch": 0.36, "grad_norm": 1.4645164651882978, "learning_rate": 1.4771973627828283e-05, "loss": 0.7331, "step": 3553 }, { "epoch": 0.36, "grad_norm": 1.6152049411395204, "learning_rate": 1.4769078214204904e-05, "loss": 0.6323, "step": 3554 }, { "epoch": 0.36, "grad_norm": 1.6252699943112658, "learning_rate": 1.4766182282972667e-05, "loss": 0.8333, "step": 3555 }, { "epoch": 0.36, "grad_norm": 1.593900253019558, "learning_rate": 1.4763285834445877e-05, "loss": 0.7247, "step": 3556 }, { "epoch": 0.36, "grad_norm": 1.4427449082602886, "learning_rate": 1.4760388868938902e-05, "loss": 0.7594, "step": 3557 }, { "epoch": 0.36, "grad_norm": 1.556240541058109, "learning_rate": 1.4757491386766154e-05, "loss": 0.8283, "step": 3558 }, { "epoch": 0.36, "grad_norm": 1.5031696053700674, "learning_rate": 1.4754593388242117e-05, "loss": 0.7985, "step": 3559 }, { "epoch": 0.36, "grad_norm": 1.4533310803694022, "learning_rate": 1.4751694873681324e-05, "loss": 0.7481, "step": 3560 }, { "epoch": 0.36, "grad_norm": 1.5333323453412875, "learning_rate": 1.4748795843398361e-05, "loss": 0.6943, "step": 3561 }, { "epoch": 0.36, "grad_norm": 1.389091874942304, "learning_rate": 1.4745896297707867e-05, "loss": 0.6686, "step": 3562 }, { "epoch": 0.36, "grad_norm": 1.5586342886882658, "learning_rate": 1.4742996236924551e-05, "loss": 0.7302, "step": 3563 }, { "epoch": 0.36, "grad_norm": 1.478618598982181, "learning_rate": 1.4740095661363165e-05, "loss": 0.7922, "step": 3564 }, { "epoch": 0.36, "grad_norm": 1.626860733600897, "learning_rate": 1.4737194571338523e-05, "loss": 0.7162, "step": 3565 }, { "epoch": 0.36, "grad_norm": 1.4377768151388717, "learning_rate": 1.4734292967165491e-05, "loss": 0.7658, "step": 3566 }, { "epoch": 0.36, "grad_norm": 1.4427609709577702, "learning_rate": 1.473139084915899e-05, "loss": 0.7086, "step": 3567 }, { "epoch": 0.36, "grad_norm": 1.607204509238383, "learning_rate": 1.4728488217634008e-05, "loss": 0.7084, "step": 3568 }, { "epoch": 0.36, "grad_norm": 1.3988616333901625, "learning_rate": 1.4725585072905573e-05, "loss": 0.7179, "step": 3569 }, { "epoch": 0.36, "grad_norm": 1.5123617215419016, "learning_rate": 1.4722681415288775e-05, "loss": 0.7475, "step": 3570 }, { "epoch": 0.36, "grad_norm": 1.497428536624791, "learning_rate": 1.471977724509877e-05, "loss": 0.7024, "step": 3571 }, { "epoch": 0.36, "grad_norm": 1.6175948288740258, "learning_rate": 1.471687256265075e-05, "loss": 0.7575, "step": 3572 }, { "epoch": 0.36, "grad_norm": 1.5769398718160916, "learning_rate": 1.4713967368259981e-05, "loss": 0.7064, "step": 3573 }, { "epoch": 0.36, "grad_norm": 1.5223116988772043, "learning_rate": 1.4711061662241765e-05, "loss": 0.8851, "step": 3574 }, { "epoch": 0.36, "grad_norm": 1.6202519356094804, "learning_rate": 1.4708155444911485e-05, "loss": 0.6783, "step": 3575 }, { "epoch": 0.36, "grad_norm": 1.5682800582853023, "learning_rate": 1.4705248716584556e-05, "loss": 0.8026, "step": 3576 }, { "epoch": 0.36, "grad_norm": 1.4970671852419102, "learning_rate": 1.4702341477576461e-05, "loss": 0.7771, "step": 3577 }, { "epoch": 0.36, "grad_norm": 1.5979231371091105, "learning_rate": 1.4699433728202736e-05, "loss": 0.7805, "step": 3578 }, { "epoch": 0.36, "grad_norm": 1.58291483435768, "learning_rate": 1.469652546877897e-05, "loss": 0.8282, "step": 3579 }, { "epoch": 0.36, "grad_norm": 1.6557268300441905, "learning_rate": 1.4693616699620808e-05, "loss": 0.7751, "step": 3580 }, { "epoch": 0.36, "grad_norm": 1.5562287512074653, "learning_rate": 1.4690707421043956e-05, "loss": 0.6575, "step": 3581 }, { "epoch": 0.36, "grad_norm": 1.4175640438046877, "learning_rate": 1.4687797633364167e-05, "loss": 0.6543, "step": 3582 }, { "epoch": 0.36, "grad_norm": 1.6040932810491193, "learning_rate": 1.4684887336897255e-05, "loss": 0.763, "step": 3583 }, { "epoch": 0.36, "grad_norm": 1.6058731823369146, "learning_rate": 1.4681976531959085e-05, "loss": 0.8224, "step": 3584 }, { "epoch": 0.36, "grad_norm": 1.8304388759577574, "learning_rate": 1.4679065218865577e-05, "loss": 0.8874, "step": 3585 }, { "epoch": 0.36, "grad_norm": 1.4640845041467676, "learning_rate": 1.4676153397932718e-05, "loss": 0.6027, "step": 3586 }, { "epoch": 0.36, "grad_norm": 1.6107944122500681, "learning_rate": 1.4673241069476532e-05, "loss": 0.7631, "step": 3587 }, { "epoch": 0.36, "grad_norm": 1.476859364163601, "learning_rate": 1.4670328233813109e-05, "loss": 0.7166, "step": 3588 }, { "epoch": 0.37, "grad_norm": 1.62445852773536, "learning_rate": 1.4667414891258592e-05, "loss": 0.726, "step": 3589 }, { "epoch": 0.37, "grad_norm": 1.451038371551322, "learning_rate": 1.4664501042129179e-05, "loss": 0.7385, "step": 3590 }, { "epoch": 0.37, "grad_norm": 1.567794040163018, "learning_rate": 1.466158668674112e-05, "loss": 0.6489, "step": 3591 }, { "epoch": 0.37, "grad_norm": 1.3653304786147358, "learning_rate": 1.4658671825410729e-05, "loss": 0.7955, "step": 3592 }, { "epoch": 0.37, "grad_norm": 1.524032132670125, "learning_rate": 1.4655756458454364e-05, "loss": 0.7514, "step": 3593 }, { "epoch": 0.37, "grad_norm": 1.4538196894282756, "learning_rate": 1.4652840586188442e-05, "loss": 0.7715, "step": 3594 }, { "epoch": 0.37, "grad_norm": 1.687136784830419, "learning_rate": 1.4649924208929436e-05, "loss": 0.7911, "step": 3595 }, { "epoch": 0.37, "grad_norm": 1.4995055304016083, "learning_rate": 1.4647007326993876e-05, "loss": 0.8462, "step": 3596 }, { "epoch": 0.37, "grad_norm": 1.403670122516953, "learning_rate": 1.4644089940698341e-05, "loss": 0.6554, "step": 3597 }, { "epoch": 0.37, "grad_norm": 1.4977699227582575, "learning_rate": 1.4641172050359466e-05, "loss": 0.7081, "step": 3598 }, { "epoch": 0.37, "grad_norm": 1.6177170296718928, "learning_rate": 1.4638253656293948e-05, "loss": 0.7322, "step": 3599 }, { "epoch": 0.37, "grad_norm": 1.562436002230096, "learning_rate": 1.463533475881853e-05, "loss": 0.7545, "step": 3600 }, { "epoch": 0.37, "grad_norm": 1.5961450775953814, "learning_rate": 1.463241535825001e-05, "loss": 0.7949, "step": 3601 }, { "epoch": 0.37, "grad_norm": 1.575564927707248, "learning_rate": 1.4629495454905249e-05, "loss": 0.7422, "step": 3602 }, { "epoch": 0.37, "grad_norm": 1.604384087743517, "learning_rate": 1.4626575049101148e-05, "loss": 0.6647, "step": 3603 }, { "epoch": 0.37, "grad_norm": 1.6632558311955634, "learning_rate": 1.4623654141154682e-05, "loss": 0.8267, "step": 3604 }, { "epoch": 0.37, "grad_norm": 1.5379881536216085, "learning_rate": 1.4620732731382863e-05, "loss": 0.701, "step": 3605 }, { "epoch": 0.37, "grad_norm": 1.562272895182808, "learning_rate": 1.4617810820102766e-05, "loss": 0.7102, "step": 3606 }, { "epoch": 0.37, "grad_norm": 1.6436077048898663, "learning_rate": 1.461488840763152e-05, "loss": 0.8055, "step": 3607 }, { "epoch": 0.37, "grad_norm": 1.4710283252621488, "learning_rate": 1.4611965494286304e-05, "loss": 0.7057, "step": 3608 }, { "epoch": 0.37, "grad_norm": 1.6459419220368325, "learning_rate": 1.4609042080384359e-05, "loss": 0.764, "step": 3609 }, { "epoch": 0.37, "grad_norm": 1.618306370841053, "learning_rate": 1.4606118166242974e-05, "loss": 0.8026, "step": 3610 }, { "epoch": 0.37, "grad_norm": 1.7448311208782217, "learning_rate": 1.4603193752179494e-05, "loss": 0.7705, "step": 3611 }, { "epoch": 0.37, "grad_norm": 1.538104497557957, "learning_rate": 1.4600268838511314e-05, "loss": 0.8236, "step": 3612 }, { "epoch": 0.37, "grad_norm": 1.5925145637942848, "learning_rate": 1.4597343425555894e-05, "loss": 0.737, "step": 3613 }, { "epoch": 0.37, "grad_norm": 1.5889299388290077, "learning_rate": 1.459441751363074e-05, "loss": 0.7804, "step": 3614 }, { "epoch": 0.37, "grad_norm": 1.6711569124333012, "learning_rate": 1.4591491103053414e-05, "loss": 0.8212, "step": 3615 }, { "epoch": 0.37, "grad_norm": 1.669951186642647, "learning_rate": 1.458856419414153e-05, "loss": 0.8123, "step": 3616 }, { "epoch": 0.37, "grad_norm": 1.5160115842064925, "learning_rate": 1.458563678721276e-05, "loss": 0.6931, "step": 3617 }, { "epoch": 0.37, "grad_norm": 1.577245360536689, "learning_rate": 1.4582708882584831e-05, "loss": 0.8215, "step": 3618 }, { "epoch": 0.37, "grad_norm": 1.546826739829446, "learning_rate": 1.4579780480575516e-05, "loss": 0.6896, "step": 3619 }, { "epoch": 0.37, "grad_norm": 1.6012972025578633, "learning_rate": 1.4576851581502652e-05, "loss": 0.7108, "step": 3620 }, { "epoch": 0.37, "grad_norm": 1.4358221012271974, "learning_rate": 1.4573922185684118e-05, "loss": 0.7926, "step": 3621 }, { "epoch": 0.37, "grad_norm": 1.5724954684563723, "learning_rate": 1.4570992293437862e-05, "loss": 0.6527, "step": 3622 }, { "epoch": 0.37, "grad_norm": 1.4796895310716867, "learning_rate": 1.4568061905081874e-05, "loss": 0.7353, "step": 3623 }, { "epoch": 0.37, "grad_norm": 1.4753205823693467, "learning_rate": 1.4565131020934203e-05, "loss": 0.7481, "step": 3624 }, { "epoch": 0.37, "grad_norm": 1.4559061357050045, "learning_rate": 1.456219964131295e-05, "loss": 0.8166, "step": 3625 }, { "epoch": 0.37, "grad_norm": 1.5246544530422408, "learning_rate": 1.4559267766536272e-05, "loss": 0.8174, "step": 3626 }, { "epoch": 0.37, "grad_norm": 1.3934216510143873, "learning_rate": 1.4556335396922376e-05, "loss": 0.8245, "step": 3627 }, { "epoch": 0.37, "grad_norm": 1.622348369929527, "learning_rate": 1.4553402532789525e-05, "loss": 0.7072, "step": 3628 }, { "epoch": 0.37, "grad_norm": 1.6293277403360602, "learning_rate": 1.4550469174456038e-05, "loss": 0.8638, "step": 3629 }, { "epoch": 0.37, "grad_norm": 1.487813163535852, "learning_rate": 1.4547535322240283e-05, "loss": 0.7255, "step": 3630 }, { "epoch": 0.37, "grad_norm": 1.5659994707847569, "learning_rate": 1.4544600976460684e-05, "loss": 0.8259, "step": 3631 }, { "epoch": 0.37, "grad_norm": 1.6386233973682773, "learning_rate": 1.4541666137435717e-05, "loss": 0.896, "step": 3632 }, { "epoch": 0.37, "grad_norm": 1.4945777648735155, "learning_rate": 1.4538730805483918e-05, "loss": 0.6776, "step": 3633 }, { "epoch": 0.37, "grad_norm": 1.3869665607423542, "learning_rate": 1.4535794980923863e-05, "loss": 0.6188, "step": 3634 }, { "epoch": 0.37, "grad_norm": 1.5267935588868653, "learning_rate": 1.45328586640742e-05, "loss": 0.7501, "step": 3635 }, { "epoch": 0.37, "grad_norm": 1.6141746140610456, "learning_rate": 1.452992185525361e-05, "loss": 0.7758, "step": 3636 }, { "epoch": 0.37, "grad_norm": 1.7863954147640309, "learning_rate": 1.4526984554780847e-05, "loss": 0.8292, "step": 3637 }, { "epoch": 0.37, "grad_norm": 1.437857154083615, "learning_rate": 1.4524046762974705e-05, "loss": 0.7079, "step": 3638 }, { "epoch": 0.37, "grad_norm": 1.4963813486501298, "learning_rate": 1.4521108480154032e-05, "loss": 0.8067, "step": 3639 }, { "epoch": 0.37, "grad_norm": 1.6212269153066883, "learning_rate": 1.4518169706637736e-05, "loss": 0.8064, "step": 3640 }, { "epoch": 0.37, "grad_norm": 1.4496354534684153, "learning_rate": 1.4515230442744774e-05, "loss": 0.6204, "step": 3641 }, { "epoch": 0.37, "grad_norm": 1.6003373215463383, "learning_rate": 1.4512290688794161e-05, "loss": 0.8154, "step": 3642 }, { "epoch": 0.37, "grad_norm": 1.4260596812680963, "learning_rate": 1.4509350445104955e-05, "loss": 0.6393, "step": 3643 }, { "epoch": 0.37, "grad_norm": 1.7932749293783161, "learning_rate": 1.4506409711996278e-05, "loss": 0.7551, "step": 3644 }, { "epoch": 0.37, "grad_norm": 1.5188216802858843, "learning_rate": 1.45034684897873e-05, "loss": 0.7528, "step": 3645 }, { "epoch": 0.37, "grad_norm": 1.5663984399125837, "learning_rate": 1.4500526778797244e-05, "loss": 0.7476, "step": 3646 }, { "epoch": 0.37, "grad_norm": 1.6327098199457324, "learning_rate": 1.4497584579345385e-05, "loss": 0.7634, "step": 3647 }, { "epoch": 0.37, "grad_norm": 1.5901216906836784, "learning_rate": 1.4494641891751053e-05, "loss": 0.873, "step": 3648 }, { "epoch": 0.37, "grad_norm": 1.3081865288850023, "learning_rate": 1.4491698716333632e-05, "loss": 0.7266, "step": 3649 }, { "epoch": 0.37, "grad_norm": 1.4269513171508648, "learning_rate": 1.4488755053412562e-05, "loss": 0.6676, "step": 3650 }, { "epoch": 0.37, "grad_norm": 1.844540740181958, "learning_rate": 1.4485810903307324e-05, "loss": 0.8679, "step": 3651 }, { "epoch": 0.37, "grad_norm": 1.6034428084923098, "learning_rate": 1.4482866266337462e-05, "loss": 0.7452, "step": 3652 }, { "epoch": 0.37, "grad_norm": 1.6166846651844484, "learning_rate": 1.4479921142822573e-05, "loss": 0.6864, "step": 3653 }, { "epoch": 0.37, "grad_norm": 1.464754271434382, "learning_rate": 1.44769755330823e-05, "loss": 0.6319, "step": 3654 }, { "epoch": 0.37, "grad_norm": 1.5621261510649411, "learning_rate": 1.4474029437436347e-05, "loss": 0.7819, "step": 3655 }, { "epoch": 0.37, "grad_norm": 1.5159227509879454, "learning_rate": 1.4471082856204464e-05, "loss": 0.7048, "step": 3656 }, { "epoch": 0.37, "grad_norm": 1.528622911675774, "learning_rate": 1.4468135789706454e-05, "loss": 0.7776, "step": 3657 }, { "epoch": 0.37, "grad_norm": 1.5907679003343815, "learning_rate": 1.4465188238262183e-05, "loss": 0.7024, "step": 3658 }, { "epoch": 0.37, "grad_norm": 1.4917979339422152, "learning_rate": 1.4462240202191553e-05, "loss": 0.8161, "step": 3659 }, { "epoch": 0.37, "grad_norm": 1.418213101349719, "learning_rate": 1.4459291681814534e-05, "loss": 0.7786, "step": 3660 }, { "epoch": 0.37, "grad_norm": 1.4640771784646323, "learning_rate": 1.4456342677451135e-05, "loss": 0.5936, "step": 3661 }, { "epoch": 0.37, "grad_norm": 1.4022523187744633, "learning_rate": 1.4453393189421428e-05, "loss": 0.7231, "step": 3662 }, { "epoch": 0.37, "grad_norm": 2.7894244277851525, "learning_rate": 1.4450443218045533e-05, "loss": 0.6724, "step": 3663 }, { "epoch": 0.37, "grad_norm": 1.5693836473898657, "learning_rate": 1.4447492763643628e-05, "loss": 0.6606, "step": 3664 }, { "epoch": 0.37, "grad_norm": 1.4248358569829045, "learning_rate": 1.4444541826535932e-05, "loss": 0.7743, "step": 3665 }, { "epoch": 0.37, "grad_norm": 1.6878677574918814, "learning_rate": 1.4441590407042723e-05, "loss": 0.8351, "step": 3666 }, { "epoch": 0.37, "grad_norm": 1.5392881799461169, "learning_rate": 1.4438638505484335e-05, "loss": 0.6838, "step": 3667 }, { "epoch": 0.37, "grad_norm": 1.5517144493417998, "learning_rate": 1.4435686122181152e-05, "loss": 0.7951, "step": 3668 }, { "epoch": 0.37, "grad_norm": 1.4617789852879355, "learning_rate": 1.4432733257453606e-05, "loss": 0.7206, "step": 3669 }, { "epoch": 0.37, "grad_norm": 1.5475756651496593, "learning_rate": 1.4429779911622185e-05, "loss": 0.6955, "step": 3670 }, { "epoch": 0.37, "grad_norm": 1.586135028957735, "learning_rate": 1.4426826085007429e-05, "loss": 0.7057, "step": 3671 }, { "epoch": 0.37, "grad_norm": 1.4336651729302952, "learning_rate": 1.442387177792993e-05, "loss": 0.683, "step": 3672 }, { "epoch": 0.37, "grad_norm": 1.541184065036639, "learning_rate": 1.442091699071033e-05, "loss": 0.7663, "step": 3673 }, { "epoch": 0.37, "grad_norm": 1.623271185521219, "learning_rate": 1.4417961723669325e-05, "loss": 0.7979, "step": 3674 }, { "epoch": 0.37, "grad_norm": 1.678579444230614, "learning_rate": 1.4415005977127666e-05, "loss": 0.8188, "step": 3675 }, { "epoch": 0.37, "grad_norm": 1.5397455104574125, "learning_rate": 1.4412049751406149e-05, "loss": 0.7839, "step": 3676 }, { "epoch": 0.37, "grad_norm": 1.858473486282819, "learning_rate": 1.4409093046825628e-05, "loss": 0.9055, "step": 3677 }, { "epoch": 0.37, "grad_norm": 1.45726580316041, "learning_rate": 1.4406135863707011e-05, "loss": 0.7296, "step": 3678 }, { "epoch": 0.37, "grad_norm": 1.4610251095613365, "learning_rate": 1.4403178202371246e-05, "loss": 0.6918, "step": 3679 }, { "epoch": 0.37, "grad_norm": 1.4127601977062236, "learning_rate": 1.4400220063139348e-05, "loss": 0.7537, "step": 3680 }, { "epoch": 0.37, "grad_norm": 1.60772054879461, "learning_rate": 1.4397261446332374e-05, "loss": 0.713, "step": 3681 }, { "epoch": 0.37, "grad_norm": 1.6570686371270857, "learning_rate": 1.4394302352271432e-05, "loss": 0.7406, "step": 3682 }, { "epoch": 0.37, "grad_norm": 1.6827887598500695, "learning_rate": 1.4391342781277694e-05, "loss": 0.7145, "step": 3683 }, { "epoch": 0.37, "grad_norm": 1.4378929980810253, "learning_rate": 1.4388382733672366e-05, "loss": 0.7492, "step": 3684 }, { "epoch": 0.37, "grad_norm": 1.5246600418398752, "learning_rate": 1.4385422209776718e-05, "loss": 0.7944, "step": 3685 }, { "epoch": 0.37, "grad_norm": 1.4014568904240385, "learning_rate": 1.4382461209912073e-05, "loss": 0.6436, "step": 3686 }, { "epoch": 0.38, "grad_norm": 1.4428423629394982, "learning_rate": 1.4379499734399797e-05, "loss": 0.6961, "step": 3687 }, { "epoch": 0.38, "grad_norm": 1.623447139815587, "learning_rate": 1.4376537783561312e-05, "loss": 0.775, "step": 3688 }, { "epoch": 0.38, "grad_norm": 1.6461117377577357, "learning_rate": 1.4373575357718091e-05, "loss": 0.8546, "step": 3689 }, { "epoch": 0.38, "grad_norm": 1.570365184009539, "learning_rate": 1.4370612457191661e-05, "loss": 0.7562, "step": 3690 }, { "epoch": 0.38, "grad_norm": 1.525833349657673, "learning_rate": 1.4367649082303598e-05, "loss": 0.6829, "step": 3691 }, { "epoch": 0.38, "grad_norm": 1.651551129448428, "learning_rate": 1.4364685233375531e-05, "loss": 0.7618, "step": 3692 }, { "epoch": 0.38, "grad_norm": 1.8057838334810443, "learning_rate": 1.4361720910729133e-05, "loss": 0.7877, "step": 3693 }, { "epoch": 0.38, "grad_norm": 1.6881366741148849, "learning_rate": 1.4358756114686144e-05, "loss": 0.7429, "step": 3694 }, { "epoch": 0.38, "grad_norm": 1.5659478374429834, "learning_rate": 1.4355790845568341e-05, "loss": 0.7668, "step": 3695 }, { "epoch": 0.38, "grad_norm": 1.8136241690524513, "learning_rate": 1.4352825103697559e-05, "loss": 0.8258, "step": 3696 }, { "epoch": 0.38, "grad_norm": 1.547061746400421, "learning_rate": 1.4349858889395682e-05, "loss": 0.8155, "step": 3697 }, { "epoch": 0.38, "grad_norm": 1.6797520121295821, "learning_rate": 1.4346892202984645e-05, "loss": 0.6994, "step": 3698 }, { "epoch": 0.38, "grad_norm": 1.4798017009668816, "learning_rate": 1.434392504478644e-05, "loss": 0.7518, "step": 3699 }, { "epoch": 0.38, "grad_norm": 1.8504089558554946, "learning_rate": 1.4340957415123102e-05, "loss": 0.7717, "step": 3700 }, { "epoch": 0.38, "grad_norm": 1.4628113248243737, "learning_rate": 1.4337989314316722e-05, "loss": 0.7195, "step": 3701 }, { "epoch": 0.38, "grad_norm": 1.718890806307345, "learning_rate": 1.4335020742689439e-05, "loss": 0.777, "step": 3702 }, { "epoch": 0.38, "grad_norm": 1.6301303689478914, "learning_rate": 1.4332051700563448e-05, "loss": 0.6558, "step": 3703 }, { "epoch": 0.38, "grad_norm": 1.7492602602785114, "learning_rate": 1.432908218826099e-05, "loss": 0.7529, "step": 3704 }, { "epoch": 0.38, "grad_norm": 1.5101433076297917, "learning_rate": 1.4326112206104359e-05, "loss": 0.833, "step": 3705 }, { "epoch": 0.38, "grad_norm": 1.741038527985199, "learning_rate": 1.4323141754415904e-05, "loss": 0.797, "step": 3706 }, { "epoch": 0.38, "grad_norm": 1.3241571181008938, "learning_rate": 1.4320170833518012e-05, "loss": 0.632, "step": 3707 }, { "epoch": 0.38, "grad_norm": 1.5259498416902493, "learning_rate": 1.431719944373314e-05, "loss": 0.7218, "step": 3708 }, { "epoch": 0.38, "grad_norm": 1.6374207928378792, "learning_rate": 1.4314227585383782e-05, "loss": 0.7591, "step": 3709 }, { "epoch": 0.38, "grad_norm": 1.4740806885171056, "learning_rate": 1.4311255258792487e-05, "loss": 0.8105, "step": 3710 }, { "epoch": 0.38, "grad_norm": 1.3374152615652346, "learning_rate": 1.430828246428185e-05, "loss": 0.6014, "step": 3711 }, { "epoch": 0.38, "grad_norm": 1.535286610220723, "learning_rate": 1.4305309202174531e-05, "loss": 0.6523, "step": 3712 }, { "epoch": 0.38, "grad_norm": 1.62827588808955, "learning_rate": 1.4302335472793223e-05, "loss": 0.8442, "step": 3713 }, { "epoch": 0.38, "grad_norm": 1.6613625625280515, "learning_rate": 1.4299361276460682e-05, "loss": 0.7732, "step": 3714 }, { "epoch": 0.38, "grad_norm": 1.37174038097834, "learning_rate": 1.4296386613499707e-05, "loss": 0.6459, "step": 3715 }, { "epoch": 0.38, "grad_norm": 1.5986286169846708, "learning_rate": 1.429341148423315e-05, "loss": 0.7608, "step": 3716 }, { "epoch": 0.38, "grad_norm": 1.488244744833797, "learning_rate": 1.4290435888983925e-05, "loss": 0.7629, "step": 3717 }, { "epoch": 0.38, "grad_norm": 1.6315928375643658, "learning_rate": 1.4287459828074974e-05, "loss": 0.7264, "step": 3718 }, { "epoch": 0.38, "grad_norm": 1.5400859331905221, "learning_rate": 1.4284483301829308e-05, "loss": 0.7096, "step": 3719 }, { "epoch": 0.38, "grad_norm": 1.3762063201954269, "learning_rate": 1.4281506310569982e-05, "loss": 0.6705, "step": 3720 }, { "epoch": 0.38, "grad_norm": 1.4196649835515989, "learning_rate": 1.4278528854620101e-05, "loss": 0.6865, "step": 3721 }, { "epoch": 0.38, "grad_norm": 1.6112878945831668, "learning_rate": 1.4275550934302822e-05, "loss": 0.8048, "step": 3722 }, { "epoch": 0.38, "grad_norm": 1.42243294129432, "learning_rate": 1.4272572549941353e-05, "loss": 0.7125, "step": 3723 }, { "epoch": 0.38, "grad_norm": 1.5105908639260996, "learning_rate": 1.4269593701858946e-05, "loss": 0.7069, "step": 3724 }, { "epoch": 0.38, "grad_norm": 1.5253097055310054, "learning_rate": 1.4266614390378912e-05, "loss": 0.7579, "step": 3725 }, { "epoch": 0.38, "grad_norm": 1.6273141589271638, "learning_rate": 1.4263634615824611e-05, "loss": 0.688, "step": 3726 }, { "epoch": 0.38, "grad_norm": 1.536711430900752, "learning_rate": 1.4260654378519445e-05, "loss": 0.7457, "step": 3727 }, { "epoch": 0.38, "grad_norm": 1.6207216793297654, "learning_rate": 1.4257673678786878e-05, "loss": 0.7035, "step": 3728 }, { "epoch": 0.38, "grad_norm": 1.5322103079932716, "learning_rate": 1.4254692516950415e-05, "loss": 0.6968, "step": 3729 }, { "epoch": 0.38, "grad_norm": 1.606998978040395, "learning_rate": 1.4251710893333615e-05, "loss": 0.7688, "step": 3730 }, { "epoch": 0.38, "grad_norm": 1.6404842672847377, "learning_rate": 1.4248728808260086e-05, "loss": 0.7266, "step": 3731 }, { "epoch": 0.38, "grad_norm": 1.5499251500162907, "learning_rate": 1.4245746262053489e-05, "loss": 0.815, "step": 3732 }, { "epoch": 0.38, "grad_norm": 1.4590827206827173, "learning_rate": 1.4242763255037528e-05, "loss": 0.7333, "step": 3733 }, { "epoch": 0.38, "grad_norm": 1.520366668123699, "learning_rate": 1.4239779787535969e-05, "loss": 0.7919, "step": 3734 }, { "epoch": 0.38, "grad_norm": 1.599862226499546, "learning_rate": 1.4236795859872613e-05, "loss": 0.7795, "step": 3735 }, { "epoch": 0.38, "grad_norm": 1.5905915358036564, "learning_rate": 1.4233811472371326e-05, "loss": 0.6292, "step": 3736 }, { "epoch": 0.38, "grad_norm": 1.6291003406413689, "learning_rate": 1.4230826625356011e-05, "loss": 0.7277, "step": 3737 }, { "epoch": 0.38, "grad_norm": 1.4272235179354023, "learning_rate": 1.422784131915063e-05, "loss": 0.6668, "step": 3738 }, { "epoch": 0.38, "grad_norm": 1.832901248039077, "learning_rate": 1.422485555407919e-05, "loss": 0.8227, "step": 3739 }, { "epoch": 0.38, "grad_norm": 1.7708106990296313, "learning_rate": 1.4221869330465747e-05, "loss": 0.7496, "step": 3740 }, { "epoch": 0.38, "grad_norm": 1.6247255897769424, "learning_rate": 1.4218882648634413e-05, "loss": 0.7707, "step": 3741 }, { "epoch": 0.38, "grad_norm": 1.5910916475377026, "learning_rate": 1.4215895508909343e-05, "loss": 0.7974, "step": 3742 }, { "epoch": 0.38, "grad_norm": 1.4224597783707875, "learning_rate": 1.4212907911614744e-05, "loss": 0.6212, "step": 3743 }, { "epoch": 0.38, "grad_norm": 2.7028252775857555, "learning_rate": 1.4209919857074875e-05, "loss": 0.7943, "step": 3744 }, { "epoch": 0.38, "grad_norm": 1.6161012343232768, "learning_rate": 1.420693134561404e-05, "loss": 0.7725, "step": 3745 }, { "epoch": 0.38, "grad_norm": 1.678927989956609, "learning_rate": 1.4203942377556596e-05, "loss": 0.846, "step": 3746 }, { "epoch": 0.38, "grad_norm": 1.3767408598905295, "learning_rate": 1.4200952953226949e-05, "loss": 0.7024, "step": 3747 }, { "epoch": 0.38, "grad_norm": 1.545315952518722, "learning_rate": 1.4197963072949556e-05, "loss": 0.7767, "step": 3748 }, { "epoch": 0.38, "grad_norm": 1.6255543421501542, "learning_rate": 1.419497273704892e-05, "loss": 0.8155, "step": 3749 }, { "epoch": 0.38, "grad_norm": 1.2918856773106884, "learning_rate": 1.4191981945849595e-05, "loss": 0.6786, "step": 3750 }, { "epoch": 0.38, "grad_norm": 1.5229528852163654, "learning_rate": 1.4188990699676186e-05, "loss": 0.6854, "step": 3751 }, { "epoch": 0.38, "grad_norm": 1.62475224522315, "learning_rate": 1.4185998998853343e-05, "loss": 0.7444, "step": 3752 }, { "epoch": 0.38, "grad_norm": 1.458221762720607, "learning_rate": 1.4183006843705774e-05, "loss": 0.702, "step": 3753 }, { "epoch": 0.38, "grad_norm": 1.4584481686773072, "learning_rate": 1.4180014234558224e-05, "loss": 0.7233, "step": 3754 }, { "epoch": 0.38, "grad_norm": 1.6129813492541507, "learning_rate": 1.4177021171735499e-05, "loss": 0.6746, "step": 3755 }, { "epoch": 0.38, "grad_norm": 1.576899186873065, "learning_rate": 1.4174027655562443e-05, "loss": 0.7028, "step": 3756 }, { "epoch": 0.38, "grad_norm": 1.4883587909747937, "learning_rate": 1.4171033686363962e-05, "loss": 0.795, "step": 3757 }, { "epoch": 0.38, "grad_norm": 1.5501029021198707, "learning_rate": 1.4168039264465003e-05, "loss": 0.7463, "step": 3758 }, { "epoch": 0.38, "grad_norm": 1.589529021865067, "learning_rate": 1.4165044390190563e-05, "loss": 0.7619, "step": 3759 }, { "epoch": 0.38, "grad_norm": 1.6718136233085048, "learning_rate": 1.4162049063865686e-05, "loss": 0.7743, "step": 3760 }, { "epoch": 0.38, "grad_norm": 1.6352469921507213, "learning_rate": 1.4159053285815472e-05, "loss": 0.7488, "step": 3761 }, { "epoch": 0.38, "grad_norm": 1.728331817314099, "learning_rate": 1.4156057056365064e-05, "loss": 0.6152, "step": 3762 }, { "epoch": 0.38, "grad_norm": 1.5786144672305662, "learning_rate": 1.4153060375839656e-05, "loss": 0.8472, "step": 3763 }, { "epoch": 0.38, "grad_norm": 1.5559091282283846, "learning_rate": 1.4150063244564491e-05, "loss": 0.7672, "step": 3764 }, { "epoch": 0.38, "grad_norm": 1.6212637710342384, "learning_rate": 1.4147065662864859e-05, "loss": 0.765, "step": 3765 }, { "epoch": 0.38, "grad_norm": 1.5200602689627594, "learning_rate": 1.4144067631066102e-05, "loss": 0.7064, "step": 3766 }, { "epoch": 0.38, "grad_norm": 1.534721277780881, "learning_rate": 1.4141069149493612e-05, "loss": 0.691, "step": 3767 }, { "epoch": 0.38, "grad_norm": 1.592590452367276, "learning_rate": 1.4138070218472825e-05, "loss": 0.8375, "step": 3768 }, { "epoch": 0.38, "grad_norm": 1.4903446030462624, "learning_rate": 1.4135070838329227e-05, "loss": 0.649, "step": 3769 }, { "epoch": 0.38, "grad_norm": 1.6113849496475525, "learning_rate": 1.4132071009388353e-05, "loss": 0.5889, "step": 3770 }, { "epoch": 0.38, "grad_norm": 1.5290589043034744, "learning_rate": 1.4129070731975791e-05, "loss": 0.7634, "step": 3771 }, { "epoch": 0.38, "grad_norm": 1.5352165322416986, "learning_rate": 1.4126070006417174e-05, "loss": 0.7416, "step": 3772 }, { "epoch": 0.38, "grad_norm": 1.5023754903404172, "learning_rate": 1.412306883303818e-05, "loss": 0.689, "step": 3773 }, { "epoch": 0.38, "grad_norm": 1.528870029355672, "learning_rate": 1.4120067212164542e-05, "loss": 0.6352, "step": 3774 }, { "epoch": 0.38, "grad_norm": 1.4173567433584682, "learning_rate": 1.4117065144122038e-05, "loss": 0.649, "step": 3775 }, { "epoch": 0.38, "grad_norm": 1.5576072638287743, "learning_rate": 1.4114062629236497e-05, "loss": 0.7539, "step": 3776 }, { "epoch": 0.38, "grad_norm": 1.5399350550920947, "learning_rate": 1.4111059667833797e-05, "loss": 0.7383, "step": 3777 }, { "epoch": 0.38, "grad_norm": 1.574872734967755, "learning_rate": 1.4108056260239858e-05, "loss": 0.7656, "step": 3778 }, { "epoch": 0.38, "grad_norm": 1.5001125430872018, "learning_rate": 1.4105052406780653e-05, "loss": 0.6585, "step": 3779 }, { "epoch": 0.38, "grad_norm": 1.7144549791102108, "learning_rate": 1.4102048107782206e-05, "loss": 0.7931, "step": 3780 }, { "epoch": 0.38, "grad_norm": 1.6329338489197691, "learning_rate": 1.4099043363570588e-05, "loss": 0.8194, "step": 3781 }, { "epoch": 0.38, "grad_norm": 1.524154261575747, "learning_rate": 1.4096038174471913e-05, "loss": 0.7184, "step": 3782 }, { "epoch": 0.38, "grad_norm": 1.5219843735215859, "learning_rate": 1.4093032540812347e-05, "loss": 0.8048, "step": 3783 }, { "epoch": 0.38, "grad_norm": 1.4340792225579542, "learning_rate": 1.4090026462918109e-05, "loss": 0.7044, "step": 3784 }, { "epoch": 0.39, "grad_norm": 1.5796502903346024, "learning_rate": 1.408701994111546e-05, "loss": 0.7784, "step": 3785 }, { "epoch": 0.39, "grad_norm": 1.665890566670337, "learning_rate": 1.408401297573071e-05, "loss": 0.7158, "step": 3786 }, { "epoch": 0.39, "grad_norm": 1.4960904548272018, "learning_rate": 1.4081005567090217e-05, "loss": 0.6829, "step": 3787 }, { "epoch": 0.39, "grad_norm": 1.4380137590155226, "learning_rate": 1.4077997715520389e-05, "loss": 0.6871, "step": 3788 }, { "epoch": 0.39, "grad_norm": 1.6775201967511228, "learning_rate": 1.4074989421347683e-05, "loss": 0.7099, "step": 3789 }, { "epoch": 0.39, "grad_norm": 1.4047990814731268, "learning_rate": 1.4071980684898599e-05, "loss": 0.6937, "step": 3790 }, { "epoch": 0.39, "grad_norm": 1.5564919382081719, "learning_rate": 1.4068971506499693e-05, "loss": 0.6568, "step": 3791 }, { "epoch": 0.39, "grad_norm": 1.3846321477736974, "learning_rate": 1.4065961886477561e-05, "loss": 0.6756, "step": 3792 }, { "epoch": 0.39, "grad_norm": 1.795205437629554, "learning_rate": 1.4062951825158848e-05, "loss": 0.8043, "step": 3793 }, { "epoch": 0.39, "grad_norm": 1.496177531214818, "learning_rate": 1.4059941322870255e-05, "loss": 0.665, "step": 3794 }, { "epoch": 0.39, "grad_norm": 1.6592131448302958, "learning_rate": 1.405693037993852e-05, "loss": 0.7308, "step": 3795 }, { "epoch": 0.39, "grad_norm": 1.587738465551338, "learning_rate": 1.4053918996690437e-05, "loss": 0.8211, "step": 3796 }, { "epoch": 0.39, "grad_norm": 1.3009250251491375, "learning_rate": 1.4050907173452841e-05, "loss": 0.6029, "step": 3797 }, { "epoch": 0.39, "grad_norm": 1.596963960134962, "learning_rate": 1.4047894910552624e-05, "loss": 0.6934, "step": 3798 }, { "epoch": 0.39, "grad_norm": 1.3562784172634255, "learning_rate": 1.4044882208316714e-05, "loss": 0.7474, "step": 3799 }, { "epoch": 0.39, "grad_norm": 1.6531533546302477, "learning_rate": 1.4041869067072096e-05, "loss": 0.7492, "step": 3800 }, { "epoch": 0.39, "grad_norm": 1.6285548654829045, "learning_rate": 1.40388554871458e-05, "loss": 0.8076, "step": 3801 }, { "epoch": 0.39, "grad_norm": 1.583551870150882, "learning_rate": 1.4035841468864897e-05, "loss": 0.7962, "step": 3802 }, { "epoch": 0.39, "grad_norm": 1.47241936234266, "learning_rate": 1.403282701255652e-05, "loss": 0.7323, "step": 3803 }, { "epoch": 0.39, "grad_norm": 1.516670769299008, "learning_rate": 1.4029812118547836e-05, "loss": 0.7883, "step": 3804 }, { "epoch": 0.39, "grad_norm": 1.5406833413888608, "learning_rate": 1.4026796787166066e-05, "loss": 0.6896, "step": 3805 }, { "epoch": 0.39, "grad_norm": 1.521719402197105, "learning_rate": 1.4023781018738474e-05, "loss": 0.6659, "step": 3806 }, { "epoch": 0.39, "grad_norm": 1.4315441440696073, "learning_rate": 1.402076481359238e-05, "loss": 0.6849, "step": 3807 }, { "epoch": 0.39, "grad_norm": 1.3956933687096298, "learning_rate": 1.4017748172055146e-05, "loss": 0.7567, "step": 3808 }, { "epoch": 0.39, "grad_norm": 1.589580251407946, "learning_rate": 1.4014731094454175e-05, "loss": 0.7499, "step": 3809 }, { "epoch": 0.39, "grad_norm": 1.4449082249852578, "learning_rate": 1.4011713581116929e-05, "loss": 0.7198, "step": 3810 }, { "epoch": 0.39, "grad_norm": 1.7331598584411358, "learning_rate": 1.4008695632370905e-05, "loss": 0.771, "step": 3811 }, { "epoch": 0.39, "grad_norm": 1.5165282302359995, "learning_rate": 1.4005677248543664e-05, "loss": 0.6751, "step": 3812 }, { "epoch": 0.39, "grad_norm": 1.4090930328374909, "learning_rate": 1.4002658429962797e-05, "loss": 0.7594, "step": 3813 }, { "epoch": 0.39, "grad_norm": 1.4536552389420134, "learning_rate": 1.3999639176955954e-05, "loss": 0.6469, "step": 3814 }, { "epoch": 0.39, "grad_norm": 1.6771064874091852, "learning_rate": 1.3996619489850822e-05, "loss": 0.7147, "step": 3815 }, { "epoch": 0.39, "grad_norm": 1.6093967262983275, "learning_rate": 1.3993599368975148e-05, "loss": 0.7282, "step": 3816 }, { "epoch": 0.39, "grad_norm": 1.6028915140892204, "learning_rate": 1.3990578814656716e-05, "loss": 0.7347, "step": 3817 }, { "epoch": 0.39, "grad_norm": 1.5816198883110975, "learning_rate": 1.398755782722336e-05, "loss": 0.6194, "step": 3818 }, { "epoch": 0.39, "grad_norm": 1.5576591775681445, "learning_rate": 1.3984536407002958e-05, "loss": 0.7161, "step": 3819 }, { "epoch": 0.39, "grad_norm": 1.528736190571755, "learning_rate": 1.398151455432344e-05, "loss": 0.7431, "step": 3820 }, { "epoch": 0.39, "grad_norm": 1.5997533389755794, "learning_rate": 1.397849226951278e-05, "loss": 0.6505, "step": 3821 }, { "epoch": 0.39, "grad_norm": 1.5242466138313542, "learning_rate": 1.3975469552899003e-05, "loss": 0.7331, "step": 3822 }, { "epoch": 0.39, "grad_norm": 1.42476330834485, "learning_rate": 1.3972446404810176e-05, "loss": 0.7084, "step": 3823 }, { "epoch": 0.39, "grad_norm": 1.553639315764046, "learning_rate": 1.396942282557441e-05, "loss": 0.828, "step": 3824 }, { "epoch": 0.39, "grad_norm": 1.709618287320766, "learning_rate": 1.3966398815519874e-05, "loss": 0.8233, "step": 3825 }, { "epoch": 0.39, "grad_norm": 1.5133739711007284, "learning_rate": 1.3963374374974774e-05, "loss": 0.688, "step": 3826 }, { "epoch": 0.39, "grad_norm": 1.6350687600513938, "learning_rate": 1.3960349504267367e-05, "loss": 0.7473, "step": 3827 }, { "epoch": 0.39, "grad_norm": 1.6231874561776136, "learning_rate": 1.3957324203725952e-05, "loss": 0.7765, "step": 3828 }, { "epoch": 0.39, "grad_norm": 1.4225649096261395, "learning_rate": 1.395429847367888e-05, "loss": 0.7304, "step": 3829 }, { "epoch": 0.39, "grad_norm": 1.524648245289213, "learning_rate": 1.3951272314454549e-05, "loss": 0.7341, "step": 3830 }, { "epoch": 0.39, "grad_norm": 1.580200458978781, "learning_rate": 1.39482457263814e-05, "loss": 0.8039, "step": 3831 }, { "epoch": 0.39, "grad_norm": 1.6674960493863458, "learning_rate": 1.394521870978792e-05, "loss": 0.7352, "step": 3832 }, { "epoch": 0.39, "grad_norm": 1.5538310665859416, "learning_rate": 1.3942191265002642e-05, "loss": 0.6707, "step": 3833 }, { "epoch": 0.39, "grad_norm": 1.4450896961970803, "learning_rate": 1.3939163392354156e-05, "loss": 0.623, "step": 3834 }, { "epoch": 0.39, "grad_norm": 1.7013094745437254, "learning_rate": 1.3936135092171084e-05, "loss": 0.7985, "step": 3835 }, { "epoch": 0.39, "grad_norm": 1.5946591799629912, "learning_rate": 1.3933106364782103e-05, "loss": 0.7847, "step": 3836 }, { "epoch": 0.39, "grad_norm": 1.6543035325745643, "learning_rate": 1.393007721051593e-05, "loss": 0.839, "step": 3837 }, { "epoch": 0.39, "grad_norm": 1.3017152342489924, "learning_rate": 1.3927047629701336e-05, "loss": 0.7176, "step": 3838 }, { "epoch": 0.39, "grad_norm": 1.4315199274203088, "learning_rate": 1.3924017622667134e-05, "loss": 0.7293, "step": 3839 }, { "epoch": 0.39, "grad_norm": 1.6881075867618345, "learning_rate": 1.3920987189742186e-05, "loss": 0.7541, "step": 3840 }, { "epoch": 0.39, "grad_norm": 1.5138340574527112, "learning_rate": 1.3917956331255393e-05, "loss": 0.7273, "step": 3841 }, { "epoch": 0.39, "grad_norm": 1.6414630893059443, "learning_rate": 1.3914925047535712e-05, "loss": 0.7024, "step": 3842 }, { "epoch": 0.39, "grad_norm": 1.5294447073641448, "learning_rate": 1.3911893338912142e-05, "loss": 0.7604, "step": 3843 }, { "epoch": 0.39, "grad_norm": 1.510644801294499, "learning_rate": 1.390886120571372e-05, "loss": 0.7265, "step": 3844 }, { "epoch": 0.39, "grad_norm": 1.4877613287490439, "learning_rate": 1.3905828648269544e-05, "loss": 0.6287, "step": 3845 }, { "epoch": 0.39, "grad_norm": 1.5412889660007723, "learning_rate": 1.390279566690875e-05, "loss": 0.7313, "step": 3846 }, { "epoch": 0.39, "grad_norm": 1.6011530524353756, "learning_rate": 1.3899762261960519e-05, "loss": 0.7371, "step": 3847 }, { "epoch": 0.39, "grad_norm": 1.530472811667168, "learning_rate": 1.3896728433754078e-05, "loss": 0.6527, "step": 3848 }, { "epoch": 0.39, "grad_norm": 1.4986886699284894, "learning_rate": 1.3893694182618705e-05, "loss": 0.8456, "step": 3849 }, { "epoch": 0.39, "grad_norm": 1.5593222557486242, "learning_rate": 1.3890659508883719e-05, "loss": 0.7395, "step": 3850 }, { "epoch": 0.39, "grad_norm": 1.5585342555525534, "learning_rate": 1.3887624412878485e-05, "loss": 0.7369, "step": 3851 }, { "epoch": 0.39, "grad_norm": 1.6438767824159255, "learning_rate": 1.3884588894932418e-05, "loss": 0.6615, "step": 3852 }, { "epoch": 0.39, "grad_norm": 1.5879575751740531, "learning_rate": 1.3881552955374975e-05, "loss": 0.7852, "step": 3853 }, { "epoch": 0.39, "grad_norm": 1.5550148776799544, "learning_rate": 1.3878516594535661e-05, "loss": 0.739, "step": 3854 }, { "epoch": 0.39, "grad_norm": 1.5185103880024398, "learning_rate": 1.3875479812744022e-05, "loss": 0.803, "step": 3855 }, { "epoch": 0.39, "grad_norm": 1.532457978963268, "learning_rate": 1.3872442610329652e-05, "loss": 0.7316, "step": 3856 }, { "epoch": 0.39, "grad_norm": 1.6130164917900631, "learning_rate": 1.38694049876222e-05, "loss": 0.693, "step": 3857 }, { "epoch": 0.39, "grad_norm": 1.5063801970280357, "learning_rate": 1.3866366944951344e-05, "loss": 0.7135, "step": 3858 }, { "epoch": 0.39, "grad_norm": 1.5886342956960429, "learning_rate": 1.386332848264682e-05, "loss": 0.8027, "step": 3859 }, { "epoch": 0.39, "grad_norm": 1.7263521408368807, "learning_rate": 1.3860289601038406e-05, "loss": 0.7198, "step": 3860 }, { "epoch": 0.39, "grad_norm": 1.5607732274716484, "learning_rate": 1.3857250300455922e-05, "loss": 0.776, "step": 3861 }, { "epoch": 0.39, "grad_norm": 1.6904536538120865, "learning_rate": 1.3854210581229242e-05, "loss": 0.7018, "step": 3862 }, { "epoch": 0.39, "grad_norm": 1.6716662376200253, "learning_rate": 1.3851170443688274e-05, "loss": 0.7247, "step": 3863 }, { "epoch": 0.39, "grad_norm": 1.4706608496989764, "learning_rate": 1.384812988816298e-05, "loss": 0.6567, "step": 3864 }, { "epoch": 0.39, "grad_norm": 1.6684571671032502, "learning_rate": 1.3845088914983365e-05, "loss": 0.7876, "step": 3865 }, { "epoch": 0.39, "grad_norm": 1.5257979153878254, "learning_rate": 1.3842047524479478e-05, "loss": 0.833, "step": 3866 }, { "epoch": 0.39, "grad_norm": 1.428668645500322, "learning_rate": 1.3839005716981416e-05, "loss": 0.7895, "step": 3867 }, { "epoch": 0.39, "grad_norm": 1.437318096501984, "learning_rate": 1.383596349281932e-05, "loss": 0.7416, "step": 3868 }, { "epoch": 0.39, "grad_norm": 1.5649894008799563, "learning_rate": 1.383292085232337e-05, "loss": 0.7262, "step": 3869 }, { "epoch": 0.39, "grad_norm": 1.6585877060908665, "learning_rate": 1.3829877795823805e-05, "loss": 0.679, "step": 3870 }, { "epoch": 0.39, "grad_norm": 1.6236608561530104, "learning_rate": 1.3826834323650899e-05, "loss": 0.6862, "step": 3871 }, { "epoch": 0.39, "grad_norm": 1.4800539140742295, "learning_rate": 1.3823790436134971e-05, "loss": 0.7337, "step": 3872 }, { "epoch": 0.39, "grad_norm": 1.4511731411899458, "learning_rate": 1.3820746133606388e-05, "loss": 0.7061, "step": 3873 }, { "epoch": 0.39, "grad_norm": 1.5955067901139126, "learning_rate": 1.3817701416395562e-05, "loss": 0.7108, "step": 3874 }, { "epoch": 0.39, "grad_norm": 1.5956323928795686, "learning_rate": 1.381465628483295e-05, "loss": 0.836, "step": 3875 }, { "epoch": 0.39, "grad_norm": 1.5328985880733839, "learning_rate": 1.3811610739249052e-05, "loss": 0.7159, "step": 3876 }, { "epoch": 0.39, "grad_norm": 1.449031123788358, "learning_rate": 1.3808564779974418e-05, "loss": 0.7684, "step": 3877 }, { "epoch": 0.39, "grad_norm": 1.413321842781258, "learning_rate": 1.3805518407339633e-05, "loss": 0.7434, "step": 3878 }, { "epoch": 0.39, "grad_norm": 1.6163216631466955, "learning_rate": 1.3802471621675337e-05, "loss": 0.7836, "step": 3879 }, { "epoch": 0.39, "grad_norm": 1.5284123049299065, "learning_rate": 1.3799424423312213e-05, "loss": 0.8221, "step": 3880 }, { "epoch": 0.39, "grad_norm": 1.459195116401371, "learning_rate": 1.3796376812580983e-05, "loss": 0.6967, "step": 3881 }, { "epoch": 0.39, "grad_norm": 1.6636941098201041, "learning_rate": 1.379332878981242e-05, "loss": 0.8194, "step": 3882 }, { "epoch": 0.39, "grad_norm": 1.7712865910914184, "learning_rate": 1.3790280355337332e-05, "loss": 0.7863, "step": 3883 }, { "epoch": 0.4, "grad_norm": 1.340039087969083, "learning_rate": 1.378723150948659e-05, "loss": 0.6541, "step": 3884 }, { "epoch": 0.4, "grad_norm": 1.6035814459079745, "learning_rate": 1.3784182252591091e-05, "loss": 0.8015, "step": 3885 }, { "epoch": 0.4, "grad_norm": 1.4591240737646074, "learning_rate": 1.3781132584981789e-05, "loss": 0.5822, "step": 3886 }, { "epoch": 0.4, "grad_norm": 1.4467307457492842, "learning_rate": 1.3778082506989673e-05, "loss": 0.7766, "step": 3887 }, { "epoch": 0.4, "grad_norm": 1.495377031381774, "learning_rate": 1.3775032018945784e-05, "loss": 0.6177, "step": 3888 }, { "epoch": 0.4, "grad_norm": 1.6590640615190357, "learning_rate": 1.3771981121181207e-05, "loss": 0.814, "step": 3889 }, { "epoch": 0.4, "grad_norm": 1.5529252860548812, "learning_rate": 1.3768929814027064e-05, "loss": 0.7258, "step": 3890 }, { "epoch": 0.4, "grad_norm": 1.4806719936737924, "learning_rate": 1.376587809781453e-05, "loss": 0.7873, "step": 3891 }, { "epoch": 0.4, "grad_norm": 1.6248051659767457, "learning_rate": 1.3762825972874816e-05, "loss": 0.6624, "step": 3892 }, { "epoch": 0.4, "grad_norm": 1.715735022035157, "learning_rate": 1.375977343953919e-05, "loss": 0.7183, "step": 3893 }, { "epoch": 0.4, "grad_norm": 1.5299224025682734, "learning_rate": 1.3756720498138956e-05, "loss": 0.7039, "step": 3894 }, { "epoch": 0.4, "grad_norm": 1.4701266266479829, "learning_rate": 1.3753667149005457e-05, "loss": 0.794, "step": 3895 }, { "epoch": 0.4, "grad_norm": 1.567391393980187, "learning_rate": 1.375061339247009e-05, "loss": 0.8436, "step": 3896 }, { "epoch": 0.4, "grad_norm": 1.6305836041831285, "learning_rate": 1.3747559228864294e-05, "loss": 0.7146, "step": 3897 }, { "epoch": 0.4, "grad_norm": 1.5033273499462547, "learning_rate": 1.3744504658519545e-05, "loss": 0.707, "step": 3898 }, { "epoch": 0.4, "grad_norm": 1.610271975696023, "learning_rate": 1.3741449681767376e-05, "loss": 0.809, "step": 3899 }, { "epoch": 0.4, "grad_norm": 1.6805097925785062, "learning_rate": 1.3738394298939354e-05, "loss": 0.7389, "step": 3900 }, { "epoch": 0.4, "grad_norm": 1.5444366150685034, "learning_rate": 1.3735338510367093e-05, "loss": 0.7933, "step": 3901 }, { "epoch": 0.4, "grad_norm": 1.488665112098971, "learning_rate": 1.3732282316382249e-05, "loss": 0.7044, "step": 3902 }, { "epoch": 0.4, "grad_norm": 1.5498518567347663, "learning_rate": 1.3729225717316528e-05, "loss": 0.767, "step": 3903 }, { "epoch": 0.4, "grad_norm": 1.465550090058791, "learning_rate": 1.3726168713501673e-05, "loss": 0.7504, "step": 3904 }, { "epoch": 0.4, "grad_norm": 2.0252785952349286, "learning_rate": 1.3723111305269474e-05, "loss": 0.7166, "step": 3905 }, { "epoch": 0.4, "grad_norm": 1.7429264453882414, "learning_rate": 1.3720053492951766e-05, "loss": 0.7712, "step": 3906 }, { "epoch": 0.4, "grad_norm": 1.5456938227408854, "learning_rate": 1.3716995276880428e-05, "loss": 0.6899, "step": 3907 }, { "epoch": 0.4, "grad_norm": 1.504687922160811, "learning_rate": 1.3713936657387379e-05, "loss": 0.7122, "step": 3908 }, { "epoch": 0.4, "grad_norm": 1.5962339911396888, "learning_rate": 1.3710877634804587e-05, "loss": 0.709, "step": 3909 }, { "epoch": 0.4, "grad_norm": 1.4784206664096584, "learning_rate": 1.3707818209464057e-05, "loss": 0.7649, "step": 3910 }, { "epoch": 0.4, "grad_norm": 1.498108568389412, "learning_rate": 1.3704758381697845e-05, "loss": 0.7926, "step": 3911 }, { "epoch": 0.4, "grad_norm": 1.5681712841089852, "learning_rate": 1.3701698151838048e-05, "loss": 0.6128, "step": 3912 }, { "epoch": 0.4, "grad_norm": 1.5598006504989548, "learning_rate": 1.3698637520216802e-05, "loss": 0.7507, "step": 3913 }, { "epoch": 0.4, "grad_norm": 1.4329336123610517, "learning_rate": 1.3695576487166293e-05, "loss": 0.7848, "step": 3914 }, { "epoch": 0.4, "grad_norm": 1.6529935580307296, "learning_rate": 1.369251505301875e-05, "loss": 0.7724, "step": 3915 }, { "epoch": 0.4, "grad_norm": 1.4943906668863864, "learning_rate": 1.3689453218106445e-05, "loss": 0.7686, "step": 3916 }, { "epoch": 0.4, "grad_norm": 1.6263525308946214, "learning_rate": 1.3686390982761689e-05, "loss": 0.7711, "step": 3917 }, { "epoch": 0.4, "grad_norm": 1.590883672321078, "learning_rate": 1.368332834731684e-05, "loss": 0.7328, "step": 3918 }, { "epoch": 0.4, "grad_norm": 1.5011123483847482, "learning_rate": 1.3680265312104297e-05, "loss": 0.6958, "step": 3919 }, { "epoch": 0.4, "grad_norm": 1.4704528756404118, "learning_rate": 1.367720187745651e-05, "loss": 0.7051, "step": 3920 }, { "epoch": 0.4, "grad_norm": 1.6128133730605272, "learning_rate": 1.3674138043705962e-05, "loss": 0.9151, "step": 3921 }, { "epoch": 0.4, "grad_norm": 1.672605327710779, "learning_rate": 1.367107381118519e-05, "loss": 0.7019, "step": 3922 }, { "epoch": 0.4, "grad_norm": 1.4984658369655424, "learning_rate": 1.366800918022676e-05, "loss": 0.6323, "step": 3923 }, { "epoch": 0.4, "grad_norm": 2.185466425690861, "learning_rate": 1.3664944151163298e-05, "loss": 0.7865, "step": 3924 }, { "epoch": 0.4, "grad_norm": 1.5925705912270587, "learning_rate": 1.3661878724327462e-05, "loss": 0.7435, "step": 3925 }, { "epoch": 0.4, "grad_norm": 1.7039222421624558, "learning_rate": 1.3658812900051956e-05, "loss": 0.6535, "step": 3926 }, { "epoch": 0.4, "grad_norm": 1.5512031904450436, "learning_rate": 1.3655746678669526e-05, "loss": 0.7673, "step": 3927 }, { "epoch": 0.4, "grad_norm": 1.7229401174639263, "learning_rate": 1.3652680060512965e-05, "loss": 0.7888, "step": 3928 }, { "epoch": 0.4, "grad_norm": 1.5228373082528688, "learning_rate": 1.3649613045915103e-05, "loss": 0.7251, "step": 3929 }, { "epoch": 0.4, "grad_norm": 1.6710105183424213, "learning_rate": 1.3646545635208818e-05, "loss": 0.6803, "step": 3930 }, { "epoch": 0.4, "grad_norm": 1.8352157434527323, "learning_rate": 1.3643477828727032e-05, "loss": 0.7465, "step": 3931 }, { "epoch": 0.4, "grad_norm": 1.566225925123017, "learning_rate": 1.3640409626802704e-05, "loss": 0.8061, "step": 3932 }, { "epoch": 0.4, "grad_norm": 1.732142431695408, "learning_rate": 1.363734102976884e-05, "loss": 0.8112, "step": 3933 }, { "epoch": 0.4, "grad_norm": 1.7342905642809994, "learning_rate": 1.3634272037958492e-05, "loss": 0.8466, "step": 3934 }, { "epoch": 0.4, "grad_norm": 1.705404804091433, "learning_rate": 1.3631202651704745e-05, "loss": 0.7414, "step": 3935 }, { "epoch": 0.4, "grad_norm": 1.481497381352257, "learning_rate": 1.3628132871340738e-05, "loss": 0.7467, "step": 3936 }, { "epoch": 0.4, "grad_norm": 1.3577307892769122, "learning_rate": 1.3625062697199643e-05, "loss": 0.7035, "step": 3937 }, { "epoch": 0.4, "grad_norm": 1.536636098337762, "learning_rate": 1.3621992129614683e-05, "loss": 0.6956, "step": 3938 }, { "epoch": 0.4, "grad_norm": 1.5967946850115433, "learning_rate": 1.3618921168919119e-05, "loss": 0.7888, "step": 3939 }, { "epoch": 0.4, "grad_norm": 1.6370373459453844, "learning_rate": 1.3615849815446255e-05, "loss": 0.6748, "step": 3940 }, { "epoch": 0.4, "grad_norm": 1.4152368310294077, "learning_rate": 1.3612778069529439e-05, "loss": 0.7403, "step": 3941 }, { "epoch": 0.4, "grad_norm": 1.4061541912594002, "learning_rate": 1.3609705931502063e-05, "loss": 0.6308, "step": 3942 }, { "epoch": 0.4, "grad_norm": 1.5958246079468728, "learning_rate": 1.3606633401697557e-05, "loss": 0.7026, "step": 3943 }, { "epoch": 0.4, "grad_norm": 1.5512193775463972, "learning_rate": 1.3603560480449395e-05, "loss": 0.7159, "step": 3944 }, { "epoch": 0.4, "grad_norm": 1.8355451876396274, "learning_rate": 1.3600487168091099e-05, "loss": 0.8106, "step": 3945 }, { "epoch": 0.4, "grad_norm": 1.6634340855586849, "learning_rate": 1.3597413464956222e-05, "loss": 0.8114, "step": 3946 }, { "epoch": 0.4, "grad_norm": 1.5560974007212274, "learning_rate": 1.3594339371378373e-05, "loss": 0.5911, "step": 3947 }, { "epoch": 0.4, "grad_norm": 1.7429364246328933, "learning_rate": 1.3591264887691198e-05, "loss": 0.7712, "step": 3948 }, { "epoch": 0.4, "grad_norm": 1.3727463391852266, "learning_rate": 1.3588190014228376e-05, "loss": 0.5769, "step": 3949 }, { "epoch": 0.4, "grad_norm": 1.5220570359255545, "learning_rate": 1.358511475132364e-05, "loss": 0.7909, "step": 3950 }, { "epoch": 0.4, "grad_norm": 1.6723979110942109, "learning_rate": 1.3582039099310767e-05, "loss": 0.6782, "step": 3951 }, { "epoch": 0.4, "grad_norm": 1.430894102045827, "learning_rate": 1.3578963058523565e-05, "loss": 0.7322, "step": 3952 }, { "epoch": 0.4, "grad_norm": 1.5004514717420763, "learning_rate": 1.357588662929589e-05, "loss": 0.6998, "step": 3953 }, { "epoch": 0.4, "grad_norm": 1.4512332645002124, "learning_rate": 1.3572809811961642e-05, "loss": 0.7361, "step": 3954 }, { "epoch": 0.4, "grad_norm": 1.5840456135063126, "learning_rate": 1.3569732606854764e-05, "loss": 0.7795, "step": 3955 }, { "epoch": 0.4, "grad_norm": 1.5977379919396377, "learning_rate": 1.3566655014309231e-05, "loss": 0.7411, "step": 3956 }, { "epoch": 0.4, "grad_norm": 1.6264944495338678, "learning_rate": 1.3563577034659078e-05, "loss": 0.6683, "step": 3957 }, { "epoch": 0.4, "grad_norm": 1.5164155158232762, "learning_rate": 1.3560498668238364e-05, "loss": 0.7513, "step": 3958 }, { "epoch": 0.4, "grad_norm": 1.640008203893221, "learning_rate": 1.35574199153812e-05, "loss": 0.7755, "step": 3959 }, { "epoch": 0.4, "grad_norm": 1.6039825251834356, "learning_rate": 1.3554340776421736e-05, "loss": 0.6642, "step": 3960 }, { "epoch": 0.4, "grad_norm": 1.7130422927765, "learning_rate": 1.3551261251694162e-05, "loss": 0.8338, "step": 3961 }, { "epoch": 0.4, "grad_norm": 1.4141896215299843, "learning_rate": 1.3548181341532715e-05, "loss": 0.7731, "step": 3962 }, { "epoch": 0.4, "grad_norm": 1.3793364177003844, "learning_rate": 1.3545101046271673e-05, "loss": 0.6984, "step": 3963 }, { "epoch": 0.4, "grad_norm": 1.457833073055294, "learning_rate": 1.354202036624535e-05, "loss": 0.7104, "step": 3964 }, { "epoch": 0.4, "grad_norm": 1.4252227929541943, "learning_rate": 1.353893930178811e-05, "loss": 0.7564, "step": 3965 }, { "epoch": 0.4, "grad_norm": 1.476133945396026, "learning_rate": 1.353585785323435e-05, "loss": 0.7568, "step": 3966 }, { "epoch": 0.4, "grad_norm": 1.5951424524109457, "learning_rate": 1.3532776020918515e-05, "loss": 0.7453, "step": 3967 }, { "epoch": 0.4, "grad_norm": 1.7376181756282787, "learning_rate": 1.3529693805175087e-05, "loss": 0.7995, "step": 3968 }, { "epoch": 0.4, "grad_norm": 1.6823680223599915, "learning_rate": 1.3526611206338595e-05, "loss": 0.8328, "step": 3969 }, { "epoch": 0.4, "grad_norm": 1.5942370022385746, "learning_rate": 1.352352822474361e-05, "loss": 0.6895, "step": 3970 }, { "epoch": 0.4, "grad_norm": 1.734311346453895, "learning_rate": 1.3520444860724737e-05, "loss": 0.7768, "step": 3971 }, { "epoch": 0.4, "grad_norm": 1.4420902721836801, "learning_rate": 1.3517361114616628e-05, "loss": 0.7815, "step": 3972 }, { "epoch": 0.4, "grad_norm": 1.6730404476991343, "learning_rate": 1.351427698675397e-05, "loss": 0.8037, "step": 3973 }, { "epoch": 0.4, "grad_norm": 1.7654437159713978, "learning_rate": 1.3511192477471509e-05, "loss": 0.7146, "step": 3974 }, { "epoch": 0.4, "grad_norm": 1.645189569094433, "learning_rate": 1.350810758710401e-05, "loss": 0.7678, "step": 3975 }, { "epoch": 0.4, "grad_norm": 1.5397826998041777, "learning_rate": 1.3505022315986295e-05, "loss": 0.6837, "step": 3976 }, { "epoch": 0.4, "grad_norm": 1.6013430729945768, "learning_rate": 1.350193666445322e-05, "loss": 0.7314, "step": 3977 }, { "epoch": 0.4, "grad_norm": 1.53250292796185, "learning_rate": 1.3498850632839683e-05, "loss": 0.6441, "step": 3978 }, { "epoch": 0.4, "grad_norm": 1.4275075859196327, "learning_rate": 1.3495764221480625e-05, "loss": 0.6272, "step": 3979 }, { "epoch": 0.4, "grad_norm": 1.4213206179979412, "learning_rate": 1.349267743071103e-05, "loss": 0.759, "step": 3980 }, { "epoch": 0.4, "grad_norm": 1.5861061657113282, "learning_rate": 1.3489590260865919e-05, "loss": 0.7647, "step": 3981 }, { "epoch": 0.41, "grad_norm": 1.4740057360364553, "learning_rate": 1.3486502712280354e-05, "loss": 0.8151, "step": 3982 }, { "epoch": 0.41, "grad_norm": 1.7059409786907052, "learning_rate": 1.3483414785289447e-05, "loss": 0.7177, "step": 3983 }, { "epoch": 0.41, "grad_norm": 1.5460171824416302, "learning_rate": 1.3480326480228336e-05, "loss": 0.7102, "step": 3984 }, { "epoch": 0.41, "grad_norm": 1.6288133991658376, "learning_rate": 1.3477237797432214e-05, "loss": 0.7615, "step": 3985 }, { "epoch": 0.41, "grad_norm": 1.5816596029004617, "learning_rate": 1.3474148737236305e-05, "loss": 0.7862, "step": 3986 }, { "epoch": 0.41, "grad_norm": 1.472868552210121, "learning_rate": 1.347105929997588e-05, "loss": 0.7079, "step": 3987 }, { "epoch": 0.41, "grad_norm": 1.5274707854704879, "learning_rate": 1.346796948598625e-05, "loss": 0.749, "step": 3988 }, { "epoch": 0.41, "grad_norm": 1.6334326446322402, "learning_rate": 1.3464879295602767e-05, "loss": 0.8399, "step": 3989 }, { "epoch": 0.41, "grad_norm": 1.5472172851604107, "learning_rate": 1.346178872916082e-05, "loss": 0.7571, "step": 3990 }, { "epoch": 0.41, "grad_norm": 1.4613981141692318, "learning_rate": 1.345869778699584e-05, "loss": 0.6745, "step": 3991 }, { "epoch": 0.41, "grad_norm": 1.7461178368925054, "learning_rate": 1.3455606469443308e-05, "loss": 0.724, "step": 3992 }, { "epoch": 0.41, "grad_norm": 1.5461884023147348, "learning_rate": 1.345251477683873e-05, "loss": 0.7484, "step": 3993 }, { "epoch": 0.41, "grad_norm": 1.5143082004134223, "learning_rate": 1.3449422709517665e-05, "loss": 0.6859, "step": 3994 }, { "epoch": 0.41, "grad_norm": 1.472662926327967, "learning_rate": 1.344633026781571e-05, "loss": 0.7049, "step": 3995 }, { "epoch": 0.41, "grad_norm": 1.565687265737648, "learning_rate": 1.3443237452068496e-05, "loss": 0.7657, "step": 3996 }, { "epoch": 0.41, "grad_norm": 1.4049062205494047, "learning_rate": 1.3440144262611704e-05, "loss": 0.677, "step": 3997 }, { "epoch": 0.41, "grad_norm": 1.4465927947849393, "learning_rate": 1.3437050699781052e-05, "loss": 0.6983, "step": 3998 }, { "epoch": 0.41, "grad_norm": 1.4138060959994558, "learning_rate": 1.3433956763912293e-05, "loss": 0.6678, "step": 3999 }, { "epoch": 0.41, "grad_norm": 1.5221747354708852, "learning_rate": 1.3430862455341228e-05, "loss": 0.7398, "step": 4000 }, { "epoch": 0.41, "grad_norm": 1.6519117347548133, "learning_rate": 1.3427767774403697e-05, "loss": 0.8253, "step": 4001 }, { "epoch": 0.41, "grad_norm": 1.421518040175894, "learning_rate": 1.3424672721435582e-05, "loss": 0.7104, "step": 4002 }, { "epoch": 0.41, "grad_norm": 1.6079084155329708, "learning_rate": 1.3421577296772795e-05, "loss": 0.7406, "step": 4003 }, { "epoch": 0.41, "grad_norm": 1.5137819432029258, "learning_rate": 1.34184815007513e-05, "loss": 0.7776, "step": 4004 }, { "epoch": 0.41, "grad_norm": 1.7366776196818696, "learning_rate": 1.3415385333707096e-05, "loss": 0.8128, "step": 4005 }, { "epoch": 0.41, "grad_norm": 1.5077195369673975, "learning_rate": 1.3412288795976228e-05, "loss": 0.705, "step": 4006 }, { "epoch": 0.41, "grad_norm": 1.6593668158452635, "learning_rate": 1.340919188789477e-05, "loss": 0.7579, "step": 4007 }, { "epoch": 0.41, "grad_norm": 1.5030958680366602, "learning_rate": 1.340609460979885e-05, "loss": 0.7195, "step": 4008 }, { "epoch": 0.41, "grad_norm": 1.5556440737593225, "learning_rate": 1.3402996962024623e-05, "loss": 0.7334, "step": 4009 }, { "epoch": 0.41, "grad_norm": 1.5110906348055217, "learning_rate": 1.3399898944908292e-05, "loss": 0.7625, "step": 4010 }, { "epoch": 0.41, "grad_norm": 1.5609678400026048, "learning_rate": 1.3396800558786102e-05, "loss": 0.6784, "step": 4011 }, { "epoch": 0.41, "grad_norm": 1.687003664548803, "learning_rate": 1.339370180399433e-05, "loss": 0.812, "step": 4012 }, { "epoch": 0.41, "grad_norm": 1.5049571514814664, "learning_rate": 1.33906026808693e-05, "loss": 0.7418, "step": 4013 }, { "epoch": 0.41, "grad_norm": 1.639106490446256, "learning_rate": 1.3387503189747369e-05, "loss": 0.6906, "step": 4014 }, { "epoch": 0.41, "grad_norm": 1.4993464719464538, "learning_rate": 1.3384403330964944e-05, "loss": 0.7478, "step": 4015 }, { "epoch": 0.41, "grad_norm": 1.471817374722621, "learning_rate": 1.3381303104858462e-05, "loss": 0.686, "step": 4016 }, { "epoch": 0.41, "grad_norm": 1.588559439772156, "learning_rate": 1.3378202511764409e-05, "loss": 0.7877, "step": 4017 }, { "epoch": 0.41, "grad_norm": 1.623103714420601, "learning_rate": 1.3375101552019296e-05, "loss": 0.7324, "step": 4018 }, { "epoch": 0.41, "grad_norm": 1.5576566124316074, "learning_rate": 1.3372000225959696e-05, "loss": 0.7979, "step": 4019 }, { "epoch": 0.41, "grad_norm": 1.6544913152819087, "learning_rate": 1.3368898533922202e-05, "loss": 0.7104, "step": 4020 }, { "epoch": 0.41, "grad_norm": 1.7688287844335833, "learning_rate": 1.3365796476243455e-05, "loss": 0.731, "step": 4021 }, { "epoch": 0.41, "grad_norm": 1.535589463303318, "learning_rate": 1.3362694053260136e-05, "loss": 0.781, "step": 4022 }, { "epoch": 0.41, "grad_norm": 1.4515536576064387, "learning_rate": 1.3359591265308965e-05, "loss": 0.6553, "step": 4023 }, { "epoch": 0.41, "grad_norm": 1.6016329573953065, "learning_rate": 1.33564881127267e-05, "loss": 0.7897, "step": 4024 }, { "epoch": 0.41, "grad_norm": 1.3707604948899, "learning_rate": 1.3353384595850139e-05, "loss": 0.7084, "step": 4025 }, { "epoch": 0.41, "grad_norm": 1.5991205748896737, "learning_rate": 1.3350280715016122e-05, "loss": 0.7636, "step": 4026 }, { "epoch": 0.41, "grad_norm": 1.49161456735389, "learning_rate": 1.3347176470561525e-05, "loss": 0.7854, "step": 4027 }, { "epoch": 0.41, "grad_norm": 1.426486035192875, "learning_rate": 1.3344071862823267e-05, "loss": 0.7637, "step": 4028 }, { "epoch": 0.41, "grad_norm": 1.5661573435509604, "learning_rate": 1.3340966892138306e-05, "loss": 0.6711, "step": 4029 }, { "epoch": 0.41, "grad_norm": 1.5588267225607866, "learning_rate": 1.3337861558843632e-05, "loss": 0.775, "step": 4030 }, { "epoch": 0.41, "grad_norm": 1.6100450258007952, "learning_rate": 1.3334755863276288e-05, "loss": 0.7643, "step": 4031 }, { "epoch": 0.41, "grad_norm": 1.506099579082188, "learning_rate": 1.3331649805773339e-05, "loss": 0.7668, "step": 4032 }, { "epoch": 0.41, "grad_norm": 1.6918366583759896, "learning_rate": 1.3328543386671908e-05, "loss": 0.6847, "step": 4033 }, { "epoch": 0.41, "grad_norm": 1.4958834953303939, "learning_rate": 1.3325436606309146e-05, "loss": 0.6859, "step": 4034 }, { "epoch": 0.41, "grad_norm": 1.5302344329469577, "learning_rate": 1.3322329465022244e-05, "loss": 0.7167, "step": 4035 }, { "epoch": 0.41, "grad_norm": 1.4107656193790257, "learning_rate": 1.3319221963148432e-05, "loss": 0.7237, "step": 4036 }, { "epoch": 0.41, "grad_norm": 1.6684130305180658, "learning_rate": 1.3316114101024984e-05, "loss": 0.7189, "step": 4037 }, { "epoch": 0.41, "grad_norm": 1.6413025034363493, "learning_rate": 1.331300587898921e-05, "loss": 0.7789, "step": 4038 }, { "epoch": 0.41, "grad_norm": 1.7096488103304526, "learning_rate": 1.3309897297378456e-05, "loss": 0.8057, "step": 4039 }, { "epoch": 0.41, "grad_norm": 1.527070743940658, "learning_rate": 1.3306788356530111e-05, "loss": 0.7914, "step": 4040 }, { "epoch": 0.41, "grad_norm": 1.704771614310355, "learning_rate": 1.3303679056781603e-05, "loss": 0.8515, "step": 4041 }, { "epoch": 0.41, "grad_norm": 1.6883261564971928, "learning_rate": 1.3300569398470398e-05, "loss": 0.8103, "step": 4042 }, { "epoch": 0.41, "grad_norm": 1.6306291070874364, "learning_rate": 1.3297459381933999e-05, "loss": 0.8424, "step": 4043 }, { "epoch": 0.41, "grad_norm": 1.6226850730174835, "learning_rate": 1.329434900750995e-05, "loss": 0.6827, "step": 4044 }, { "epoch": 0.41, "grad_norm": 1.592943110845101, "learning_rate": 1.3291238275535831e-05, "loss": 0.7318, "step": 4045 }, { "epoch": 0.41, "grad_norm": 1.6276586615517499, "learning_rate": 1.3288127186349274e-05, "loss": 0.8086, "step": 4046 }, { "epoch": 0.41, "grad_norm": 1.5809120260098595, "learning_rate": 1.3285015740287926e-05, "loss": 0.6867, "step": 4047 }, { "epoch": 0.41, "grad_norm": 1.60571809164613, "learning_rate": 1.3281903937689493e-05, "loss": 0.6884, "step": 4048 }, { "epoch": 0.41, "grad_norm": 1.4376966388706274, "learning_rate": 1.327879177889171e-05, "loss": 0.6801, "step": 4049 }, { "epoch": 0.41, "grad_norm": 1.7896708274759923, "learning_rate": 1.3275679264232353e-05, "loss": 0.822, "step": 4050 }, { "epoch": 0.41, "grad_norm": 1.5150771194204846, "learning_rate": 1.327256639404924e-05, "loss": 0.7674, "step": 4051 }, { "epoch": 0.41, "grad_norm": 1.5066318786705863, "learning_rate": 1.3269453168680223e-05, "loss": 0.7316, "step": 4052 }, { "epoch": 0.41, "grad_norm": 1.5224427225579769, "learning_rate": 1.3266339588463191e-05, "loss": 0.7884, "step": 4053 }, { "epoch": 0.41, "grad_norm": 1.4163623206171054, "learning_rate": 1.3263225653736075e-05, "loss": 0.8494, "step": 4054 }, { "epoch": 0.41, "grad_norm": 1.5620742435587964, "learning_rate": 1.326011136483685e-05, "loss": 0.7807, "step": 4055 }, { "epoch": 0.41, "grad_norm": 1.6402310828409679, "learning_rate": 1.3256996722103517e-05, "loss": 0.8428, "step": 4056 }, { "epoch": 0.41, "grad_norm": 1.4431914952890472, "learning_rate": 1.3253881725874122e-05, "loss": 0.6633, "step": 4057 }, { "epoch": 0.41, "grad_norm": 1.4191342409961611, "learning_rate": 1.325076637648675e-05, "loss": 0.7194, "step": 4058 }, { "epoch": 0.41, "grad_norm": 1.6401724364523487, "learning_rate": 1.3247650674279526e-05, "loss": 0.7146, "step": 4059 }, { "epoch": 0.41, "grad_norm": 2.955684699842525, "learning_rate": 1.3244534619590608e-05, "loss": 0.7624, "step": 4060 }, { "epoch": 0.41, "grad_norm": 1.7062570829972066, "learning_rate": 1.3241418212758194e-05, "loss": 0.7219, "step": 4061 }, { "epoch": 0.41, "grad_norm": 1.3868359029573687, "learning_rate": 1.3238301454120527e-05, "loss": 0.674, "step": 4062 }, { "epoch": 0.41, "grad_norm": 1.835307203260098, "learning_rate": 1.3235184344015877e-05, "loss": 0.8416, "step": 4063 }, { "epoch": 0.41, "grad_norm": 1.9181633969275953, "learning_rate": 1.3232066882782557e-05, "loss": 0.6275, "step": 4064 }, { "epoch": 0.41, "grad_norm": 1.4805190237294978, "learning_rate": 1.3228949070758925e-05, "loss": 0.6706, "step": 4065 }, { "epoch": 0.41, "grad_norm": 1.6628355377811277, "learning_rate": 1.3225830908283363e-05, "loss": 0.7272, "step": 4066 }, { "epoch": 0.41, "grad_norm": 1.525776708814196, "learning_rate": 1.3222712395694303e-05, "loss": 0.7371, "step": 4067 }, { "epoch": 0.41, "grad_norm": 1.5025346264582065, "learning_rate": 1.3219593533330211e-05, "loss": 0.8229, "step": 4068 }, { "epoch": 0.41, "grad_norm": 1.7879124391361025, "learning_rate": 1.3216474321529589e-05, "loss": 0.7747, "step": 4069 }, { "epoch": 0.41, "grad_norm": 1.6357297829478563, "learning_rate": 1.3213354760630981e-05, "loss": 0.6958, "step": 4070 }, { "epoch": 0.41, "grad_norm": 1.5215139989575535, "learning_rate": 1.3210234850972966e-05, "loss": 0.7636, "step": 4071 }, { "epoch": 0.41, "grad_norm": 1.3885791150732956, "learning_rate": 1.3207114592894155e-05, "loss": 0.6559, "step": 4072 }, { "epoch": 0.41, "grad_norm": 1.5713248008518697, "learning_rate": 1.3203993986733212e-05, "loss": 0.719, "step": 4073 }, { "epoch": 0.41, "grad_norm": 1.5446063043547251, "learning_rate": 1.3200873032828827e-05, "loss": 0.7433, "step": 4074 }, { "epoch": 0.41, "grad_norm": 1.564066324834634, "learning_rate": 1.319775173151973e-05, "loss": 0.6929, "step": 4075 }, { "epoch": 0.41, "grad_norm": 1.6681039151887862, "learning_rate": 1.319463008314469e-05, "loss": 0.811, "step": 4076 }, { "epoch": 0.41, "grad_norm": 1.7337500036471722, "learning_rate": 1.3191508088042511e-05, "loss": 0.7997, "step": 4077 }, { "epoch": 0.41, "grad_norm": 1.5590561891101518, "learning_rate": 1.3188385746552039e-05, "loss": 0.8167, "step": 4078 }, { "epoch": 0.41, "grad_norm": 1.6649219606880605, "learning_rate": 1.3185263059012158e-05, "loss": 0.7256, "step": 4079 }, { "epoch": 0.42, "grad_norm": 1.4916284993073423, "learning_rate": 1.3182140025761782e-05, "loss": 0.7404, "step": 4080 }, { "epoch": 0.42, "grad_norm": 1.5809091122227412, "learning_rate": 1.3179016647139866e-05, "loss": 0.6229, "step": 4081 }, { "epoch": 0.42, "grad_norm": 1.577754747739406, "learning_rate": 1.3175892923485411e-05, "loss": 0.7157, "step": 4082 }, { "epoch": 0.42, "grad_norm": 1.494271837332332, "learning_rate": 1.3172768855137446e-05, "loss": 0.866, "step": 4083 }, { "epoch": 0.42, "grad_norm": 1.591720798787326, "learning_rate": 1.3169644442435036e-05, "loss": 0.7013, "step": 4084 }, { "epoch": 0.42, "grad_norm": 1.5553585860498986, "learning_rate": 1.316651968571729e-05, "loss": 0.8126, "step": 4085 }, { "epoch": 0.42, "grad_norm": 1.4891715960594787, "learning_rate": 1.316339458532335e-05, "loss": 0.7398, "step": 4086 }, { "epoch": 0.42, "grad_norm": 1.542762990549468, "learning_rate": 1.31602691415924e-05, "loss": 0.7326, "step": 4087 }, { "epoch": 0.42, "grad_norm": 1.490050148493316, "learning_rate": 1.3157143354863653e-05, "loss": 0.7312, "step": 4088 }, { "epoch": 0.42, "grad_norm": 1.7648769404474558, "learning_rate": 1.315401722547637e-05, "loss": 0.7986, "step": 4089 }, { "epoch": 0.42, "grad_norm": 1.719709018178479, "learning_rate": 1.3150890753769836e-05, "loss": 0.7715, "step": 4090 }, { "epoch": 0.42, "grad_norm": 1.5627320403532918, "learning_rate": 1.314776394008339e-05, "loss": 0.7837, "step": 4091 }, { "epoch": 0.42, "grad_norm": 1.453463710193572, "learning_rate": 1.3144636784756391e-05, "loss": 0.6246, "step": 4092 }, { "epoch": 0.42, "grad_norm": 1.650974176196651, "learning_rate": 1.3141509288128248e-05, "loss": 0.786, "step": 4093 }, { "epoch": 0.42, "grad_norm": 1.5484033252730638, "learning_rate": 1.3138381450538398e-05, "loss": 0.7412, "step": 4094 }, { "epoch": 0.42, "grad_norm": 1.6037113882082326, "learning_rate": 1.3135253272326316e-05, "loss": 0.6865, "step": 4095 }, { "epoch": 0.42, "grad_norm": 1.5918578191023562, "learning_rate": 1.3132124753831525e-05, "loss": 0.8283, "step": 4096 }, { "epoch": 0.42, "grad_norm": 1.4520708558181288, "learning_rate": 1.3128995895393576e-05, "loss": 0.7158, "step": 4097 }, { "epoch": 0.42, "grad_norm": 1.4518469912628422, "learning_rate": 1.3125866697352052e-05, "loss": 0.7374, "step": 4098 }, { "epoch": 0.42, "grad_norm": 1.5362404511239431, "learning_rate": 1.312273716004658e-05, "loss": 0.735, "step": 4099 }, { "epoch": 0.42, "grad_norm": 1.5893081455973872, "learning_rate": 1.3119607283816823e-05, "loss": 0.7949, "step": 4100 }, { "epoch": 0.42, "grad_norm": 1.5451259765624377, "learning_rate": 1.3116477069002483e-05, "loss": 0.7979, "step": 4101 }, { "epoch": 0.42, "grad_norm": 1.6486244621849588, "learning_rate": 1.3113346515943292e-05, "loss": 0.7615, "step": 4102 }, { "epoch": 0.42, "grad_norm": 1.8091766023002607, "learning_rate": 1.3110215624979026e-05, "loss": 0.7761, "step": 4103 }, { "epoch": 0.42, "grad_norm": 1.6659017005621068, "learning_rate": 1.3107084396449488e-05, "loss": 0.768, "step": 4104 }, { "epoch": 0.42, "grad_norm": 1.6204191281973657, "learning_rate": 1.3103952830694531e-05, "loss": 0.8416, "step": 4105 }, { "epoch": 0.42, "grad_norm": 1.5668821827815804, "learning_rate": 1.3100820928054034e-05, "loss": 0.7551, "step": 4106 }, { "epoch": 0.42, "grad_norm": 1.643785128436163, "learning_rate": 1.3097688688867917e-05, "loss": 0.899, "step": 4107 }, { "epoch": 0.42, "grad_norm": 1.4816697433407158, "learning_rate": 1.3094556113476138e-05, "loss": 0.7447, "step": 4108 }, { "epoch": 0.42, "grad_norm": 1.3628423656563957, "learning_rate": 1.309142320221868e-05, "loss": 0.7281, "step": 4109 }, { "epoch": 0.42, "grad_norm": 1.493882064536198, "learning_rate": 1.3088289955435583e-05, "loss": 0.7529, "step": 4110 }, { "epoch": 0.42, "grad_norm": 1.5185950409449684, "learning_rate": 1.3085156373466905e-05, "loss": 0.6877, "step": 4111 }, { "epoch": 0.42, "grad_norm": 1.552483946978183, "learning_rate": 1.3082022456652748e-05, "loss": 0.619, "step": 4112 }, { "epoch": 0.42, "grad_norm": 1.6203458777816349, "learning_rate": 1.3078888205333251e-05, "loss": 0.7527, "step": 4113 }, { "epoch": 0.42, "grad_norm": 1.6555482537000537, "learning_rate": 1.307575361984859e-05, "loss": 0.8066, "step": 4114 }, { "epoch": 0.42, "grad_norm": 1.561415647287691, "learning_rate": 1.307261870053897e-05, "loss": 0.7591, "step": 4115 }, { "epoch": 0.42, "grad_norm": 1.5575464787991349, "learning_rate": 1.3069483447744644e-05, "loss": 0.7045, "step": 4116 }, { "epoch": 0.42, "grad_norm": 1.601909026948332, "learning_rate": 1.306634786180589e-05, "loss": 0.7526, "step": 4117 }, { "epoch": 0.42, "grad_norm": 1.4804890751107598, "learning_rate": 1.3063211943063028e-05, "loss": 0.7148, "step": 4118 }, { "epoch": 0.42, "grad_norm": 1.4231408741569105, "learning_rate": 1.3060075691856408e-05, "loss": 0.7549, "step": 4119 }, { "epoch": 0.42, "grad_norm": 1.6377303109941803, "learning_rate": 1.3056939108526432e-05, "loss": 0.8093, "step": 4120 }, { "epoch": 0.42, "grad_norm": 1.4810282169201698, "learning_rate": 1.3053802193413518e-05, "loss": 0.6746, "step": 4121 }, { "epoch": 0.42, "grad_norm": 1.586189039013135, "learning_rate": 1.3050664946858132e-05, "loss": 0.7905, "step": 4122 }, { "epoch": 0.42, "grad_norm": 1.5164378866528998, "learning_rate": 1.3047527369200774e-05, "loss": 0.703, "step": 4123 }, { "epoch": 0.42, "grad_norm": 1.652431828924487, "learning_rate": 1.304438946078198e-05, "loss": 0.7871, "step": 4124 }, { "epoch": 0.42, "grad_norm": 1.5698746425610295, "learning_rate": 1.3041251221942316e-05, "loss": 0.7083, "step": 4125 }, { "epoch": 0.42, "grad_norm": 1.550651917500886, "learning_rate": 1.3038112653022394e-05, "loss": 0.8533, "step": 4126 }, { "epoch": 0.42, "grad_norm": 1.6512048825368992, "learning_rate": 1.3034973754362851e-05, "loss": 0.9085, "step": 4127 }, { "epoch": 0.42, "grad_norm": 1.4969760442828155, "learning_rate": 1.303183452630437e-05, "loss": 0.7446, "step": 4128 }, { "epoch": 0.42, "grad_norm": 1.5726190460076614, "learning_rate": 1.3028694969187665e-05, "loss": 0.743, "step": 4129 }, { "epoch": 0.42, "grad_norm": 1.4195619694904704, "learning_rate": 1.3025555083353485e-05, "loss": 0.7286, "step": 4130 }, { "epoch": 0.42, "grad_norm": 1.4969874031232395, "learning_rate": 1.3022414869142612e-05, "loss": 0.6673, "step": 4131 }, { "epoch": 0.42, "grad_norm": 1.5093059970994227, "learning_rate": 1.3019274326895874e-05, "loss": 0.7385, "step": 4132 }, { "epoch": 0.42, "grad_norm": 1.5271500356756018, "learning_rate": 1.3016133456954123e-05, "loss": 0.7508, "step": 4133 }, { "epoch": 0.42, "grad_norm": 1.520571634239225, "learning_rate": 1.301299225965825e-05, "loss": 0.7333, "step": 4134 }, { "epoch": 0.42, "grad_norm": 1.5627731896035457, "learning_rate": 1.300985073534919e-05, "loss": 0.7891, "step": 4135 }, { "epoch": 0.42, "grad_norm": 1.5326098612589172, "learning_rate": 1.3006708884367895e-05, "loss": 0.7618, "step": 4136 }, { "epoch": 0.42, "grad_norm": 1.8468622614861878, "learning_rate": 1.3003566707055375e-05, "loss": 0.7747, "step": 4137 }, { "epoch": 0.42, "grad_norm": 1.4174643847168273, "learning_rate": 1.3000424203752658e-05, "loss": 0.694, "step": 4138 }, { "epoch": 0.42, "grad_norm": 1.4514857062771434, "learning_rate": 1.2997281374800817e-05, "loss": 0.6628, "step": 4139 }, { "epoch": 0.42, "grad_norm": 1.5595809922253188, "learning_rate": 1.299413822054095e-05, "loss": 0.85, "step": 4140 }, { "epoch": 0.42, "grad_norm": 1.4476139519602225, "learning_rate": 1.2990994741314204e-05, "loss": 0.7068, "step": 4141 }, { "epoch": 0.42, "grad_norm": 1.5251837298148312, "learning_rate": 1.2987850937461757e-05, "loss": 0.7197, "step": 4142 }, { "epoch": 0.42, "grad_norm": 1.4801726487169606, "learning_rate": 1.2984706809324812e-05, "loss": 0.7069, "step": 4143 }, { "epoch": 0.42, "grad_norm": 1.5355823466076746, "learning_rate": 1.298156235724462e-05, "loss": 0.684, "step": 4144 }, { "epoch": 0.42, "grad_norm": 1.7080840468328307, "learning_rate": 1.2978417581562456e-05, "loss": 0.8024, "step": 4145 }, { "epoch": 0.42, "grad_norm": 1.5326888897079554, "learning_rate": 1.2975272482619645e-05, "loss": 0.7033, "step": 4146 }, { "epoch": 0.42, "grad_norm": 1.52255665500569, "learning_rate": 1.2972127060757536e-05, "loss": 0.7515, "step": 4147 }, { "epoch": 0.42, "grad_norm": 1.6721411657315013, "learning_rate": 1.296898131631751e-05, "loss": 0.8513, "step": 4148 }, { "epoch": 0.42, "grad_norm": 1.4392424729686444, "learning_rate": 1.296583524964099e-05, "loss": 0.7486, "step": 4149 }, { "epoch": 0.42, "grad_norm": 1.579784131290916, "learning_rate": 1.296268886106944e-05, "loss": 0.7839, "step": 4150 }, { "epoch": 0.42, "grad_norm": 1.442856162503546, "learning_rate": 1.295954215094434e-05, "loss": 0.7541, "step": 4151 }, { "epoch": 0.42, "grad_norm": 1.5161520779023678, "learning_rate": 1.2956395119607225e-05, "loss": 0.6876, "step": 4152 }, { "epoch": 0.42, "grad_norm": 1.6270966963220517, "learning_rate": 1.2953247767399653e-05, "loss": 0.7555, "step": 4153 }, { "epoch": 0.42, "grad_norm": 1.4074602171467065, "learning_rate": 1.2950100094663216e-05, "loss": 0.6801, "step": 4154 }, { "epoch": 0.42, "grad_norm": 1.485430352222668, "learning_rate": 1.294695210173955e-05, "loss": 0.6834, "step": 4155 }, { "epoch": 0.42, "grad_norm": 1.5971365760873109, "learning_rate": 1.2943803788970319e-05, "loss": 0.6976, "step": 4156 }, { "epoch": 0.42, "grad_norm": 1.5877675607054489, "learning_rate": 1.294065515669722e-05, "loss": 0.7516, "step": 4157 }, { "epoch": 0.42, "grad_norm": 1.5611545308976658, "learning_rate": 1.2937506205261991e-05, "loss": 0.7916, "step": 4158 }, { "epoch": 0.42, "grad_norm": 1.5146360432539503, "learning_rate": 1.2934356935006405e-05, "loss": 0.7435, "step": 4159 }, { "epoch": 0.42, "grad_norm": 1.5328419599085918, "learning_rate": 1.293120734627226e-05, "loss": 0.6888, "step": 4160 }, { "epoch": 0.42, "grad_norm": 1.6270060946823581, "learning_rate": 1.2928057439401396e-05, "loss": 0.6567, "step": 4161 }, { "epoch": 0.42, "grad_norm": 1.5686762641960714, "learning_rate": 1.2924907214735686e-05, "loss": 0.7013, "step": 4162 }, { "epoch": 0.42, "grad_norm": 1.679353724413454, "learning_rate": 1.2921756672617039e-05, "loss": 0.8016, "step": 4163 }, { "epoch": 0.42, "grad_norm": 1.7116083356462308, "learning_rate": 1.2918605813387395e-05, "loss": 0.8348, "step": 4164 }, { "epoch": 0.42, "grad_norm": 1.3332388029642228, "learning_rate": 1.2915454637388733e-05, "loss": 0.6438, "step": 4165 }, { "epoch": 0.42, "grad_norm": 1.5179907748585992, "learning_rate": 1.2912303144963062e-05, "loss": 0.7783, "step": 4166 }, { "epoch": 0.42, "grad_norm": 1.5280775014974233, "learning_rate": 1.290915133645243e-05, "loss": 0.7175, "step": 4167 }, { "epoch": 0.42, "grad_norm": 1.6248733792262533, "learning_rate": 1.2905999212198911e-05, "loss": 0.8058, "step": 4168 }, { "epoch": 0.42, "grad_norm": 1.5849368736277467, "learning_rate": 1.2902846772544625e-05, "loss": 0.7131, "step": 4169 }, { "epoch": 0.42, "grad_norm": 1.4466438034909208, "learning_rate": 1.2899694017831717e-05, "loss": 0.6041, "step": 4170 }, { "epoch": 0.42, "grad_norm": 1.4385438213485517, "learning_rate": 1.289654094840237e-05, "loss": 0.6044, "step": 4171 }, { "epoch": 0.42, "grad_norm": 1.5468408761996495, "learning_rate": 1.2893387564598798e-05, "loss": 0.7942, "step": 4172 }, { "epoch": 0.42, "grad_norm": 1.6063767743257966, "learning_rate": 1.2890233866763253e-05, "loss": 0.7492, "step": 4173 }, { "epoch": 0.42, "grad_norm": 1.5397754358691504, "learning_rate": 1.2887079855238023e-05, "loss": 0.7214, "step": 4174 }, { "epoch": 0.42, "grad_norm": 1.5545908044398078, "learning_rate": 1.2883925530365422e-05, "loss": 0.7593, "step": 4175 }, { "epoch": 0.42, "grad_norm": 1.669074462325384, "learning_rate": 1.2880770892487801e-05, "loss": 0.7603, "step": 4176 }, { "epoch": 0.42, "grad_norm": 1.564607040217321, "learning_rate": 1.2877615941947554e-05, "loss": 0.723, "step": 4177 }, { "epoch": 0.42, "grad_norm": 1.4498352227742795, "learning_rate": 1.2874460679087097e-05, "loss": 0.7274, "step": 4178 }, { "epoch": 0.43, "grad_norm": 1.8793408935895344, "learning_rate": 1.2871305104248885e-05, "loss": 0.7631, "step": 4179 }, { "epoch": 0.43, "grad_norm": 1.5812346101843897, "learning_rate": 1.2868149217775405e-05, "loss": 0.8137, "step": 4180 }, { "epoch": 0.43, "grad_norm": 1.521026332929309, "learning_rate": 1.286499302000918e-05, "loss": 0.705, "step": 4181 }, { "epoch": 0.43, "grad_norm": 1.5306191672334164, "learning_rate": 1.2861836511292766e-05, "loss": 0.6851, "step": 4182 }, { "epoch": 0.43, "grad_norm": 1.5073339226386875, "learning_rate": 1.2858679691968754e-05, "loss": 0.7127, "step": 4183 }, { "epoch": 0.43, "grad_norm": 1.564368783420573, "learning_rate": 1.2855522562379768e-05, "loss": 0.7618, "step": 4184 }, { "epoch": 0.43, "grad_norm": 1.4856689898896016, "learning_rate": 1.2852365122868458e-05, "loss": 0.5892, "step": 4185 }, { "epoch": 0.43, "grad_norm": 1.5290378168110395, "learning_rate": 1.2849207373777523e-05, "loss": 0.8697, "step": 4186 }, { "epoch": 0.43, "grad_norm": 1.6287002115251614, "learning_rate": 1.2846049315449687e-05, "loss": 0.7245, "step": 4187 }, { "epoch": 0.43, "grad_norm": 1.5121503034940078, "learning_rate": 1.2842890948227702e-05, "loss": 0.7317, "step": 4188 }, { "epoch": 0.43, "grad_norm": 1.716114438295996, "learning_rate": 1.2839732272454364e-05, "loss": 0.7015, "step": 4189 }, { "epoch": 0.43, "grad_norm": 1.4890403805664274, "learning_rate": 1.283657328847249e-05, "loss": 0.7445, "step": 4190 }, { "epoch": 0.43, "grad_norm": 1.604534877043379, "learning_rate": 1.2833413996624953e-05, "loss": 0.73, "step": 4191 }, { "epoch": 0.43, "grad_norm": 1.661337652846229, "learning_rate": 1.2830254397254631e-05, "loss": 0.7738, "step": 4192 }, { "epoch": 0.43, "grad_norm": 1.522246190980118, "learning_rate": 1.2827094490704457e-05, "loss": 0.6997, "step": 4193 }, { "epoch": 0.43, "grad_norm": 1.607181524710579, "learning_rate": 1.2823934277317385e-05, "loss": 0.8442, "step": 4194 }, { "epoch": 0.43, "grad_norm": 1.5441419295022962, "learning_rate": 1.2820773757436408e-05, "loss": 0.7235, "step": 4195 }, { "epoch": 0.43, "grad_norm": 1.4943906211090183, "learning_rate": 1.2817612931404554e-05, "loss": 0.7262, "step": 4196 }, { "epoch": 0.43, "grad_norm": 1.5578734075719667, "learning_rate": 1.281445179956488e-05, "loss": 0.7286, "step": 4197 }, { "epoch": 0.43, "grad_norm": 1.444465296199738, "learning_rate": 1.2811290362260475e-05, "loss": 0.7434, "step": 4198 }, { "epoch": 0.43, "grad_norm": 1.3774053157762296, "learning_rate": 1.280812861983446e-05, "loss": 0.6669, "step": 4199 }, { "epoch": 0.43, "grad_norm": 1.6867537085739222, "learning_rate": 1.2804966572630004e-05, "loss": 0.718, "step": 4200 }, { "epoch": 0.43, "grad_norm": 1.7171504576021996, "learning_rate": 1.280180422099029e-05, "loss": 0.7925, "step": 4201 }, { "epoch": 0.43, "grad_norm": 1.5436301822788967, "learning_rate": 1.2798641565258542e-05, "loss": 0.7213, "step": 4202 }, { "epoch": 0.43, "grad_norm": 1.5605055613090992, "learning_rate": 1.2795478605778016e-05, "loss": 0.6631, "step": 4203 }, { "epoch": 0.43, "grad_norm": 1.5283818360565202, "learning_rate": 1.2792315342892007e-05, "loss": 0.7638, "step": 4204 }, { "epoch": 0.43, "grad_norm": 1.7239811766109319, "learning_rate": 1.2789151776943833e-05, "loss": 0.8045, "step": 4205 }, { "epoch": 0.43, "grad_norm": 1.6300973652859465, "learning_rate": 1.2785987908276853e-05, "loss": 0.6882, "step": 4206 }, { "epoch": 0.43, "grad_norm": 1.4571204296952587, "learning_rate": 1.2782823737234452e-05, "loss": 0.7022, "step": 4207 }, { "epoch": 0.43, "grad_norm": 1.5581818868739223, "learning_rate": 1.2779659264160052e-05, "loss": 0.7965, "step": 4208 }, { "epoch": 0.43, "grad_norm": 1.5640531713720136, "learning_rate": 1.2776494489397106e-05, "loss": 0.7584, "step": 4209 }, { "epoch": 0.43, "grad_norm": 1.5477868655715468, "learning_rate": 1.2773329413289108e-05, "loss": 0.6989, "step": 4210 }, { "epoch": 0.43, "grad_norm": 1.5960710113205359, "learning_rate": 1.2770164036179569e-05, "loss": 0.7414, "step": 4211 }, { "epoch": 0.43, "grad_norm": 1.4883667796547893, "learning_rate": 1.2766998358412044e-05, "loss": 0.7878, "step": 4212 }, { "epoch": 0.43, "grad_norm": 1.5053209268600225, "learning_rate": 1.2763832380330118e-05, "loss": 0.7385, "step": 4213 }, { "epoch": 0.43, "grad_norm": 1.365287466500688, "learning_rate": 1.276066610227741e-05, "loss": 0.7644, "step": 4214 }, { "epoch": 0.43, "grad_norm": 1.617074598757576, "learning_rate": 1.275749952459757e-05, "loss": 0.8595, "step": 4215 }, { "epoch": 0.43, "grad_norm": 1.5454203983494088, "learning_rate": 1.2754332647634278e-05, "loss": 0.7681, "step": 4216 }, { "epoch": 0.43, "grad_norm": 1.5272545256382615, "learning_rate": 1.2751165471731248e-05, "loss": 0.6502, "step": 4217 }, { "epoch": 0.43, "grad_norm": 1.4746003923918978, "learning_rate": 1.274799799723223e-05, "loss": 0.7119, "step": 4218 }, { "epoch": 0.43, "grad_norm": 1.4663198618339883, "learning_rate": 1.2744830224481004e-05, "loss": 0.6739, "step": 4219 }, { "epoch": 0.43, "grad_norm": 1.4428619255434645, "learning_rate": 1.2741662153821381e-05, "loss": 0.7681, "step": 4220 }, { "epoch": 0.43, "grad_norm": 1.4664268102529439, "learning_rate": 1.2738493785597208e-05, "loss": 0.6864, "step": 4221 }, { "epoch": 0.43, "grad_norm": 1.5853029515488561, "learning_rate": 1.2735325120152357e-05, "loss": 0.7223, "step": 4222 }, { "epoch": 0.43, "grad_norm": 1.5644251327546417, "learning_rate": 1.2732156157830744e-05, "loss": 0.6766, "step": 4223 }, { "epoch": 0.43, "grad_norm": 1.4276214621906658, "learning_rate": 1.27289868989763e-05, "loss": 0.6797, "step": 4224 }, { "epoch": 0.43, "grad_norm": 1.4727346588567345, "learning_rate": 1.272581734393301e-05, "loss": 0.7309, "step": 4225 }, { "epoch": 0.43, "grad_norm": 1.5771596386172593, "learning_rate": 1.2722647493044873e-05, "loss": 0.779, "step": 4226 }, { "epoch": 0.43, "grad_norm": 1.540483221373571, "learning_rate": 1.2719477346655926e-05, "loss": 0.7378, "step": 4227 }, { "epoch": 0.43, "grad_norm": 1.6694464546919359, "learning_rate": 1.2716306905110243e-05, "loss": 0.7269, "step": 4228 }, { "epoch": 0.43, "grad_norm": 1.6459659204994768, "learning_rate": 1.2713136168751923e-05, "loss": 0.682, "step": 4229 }, { "epoch": 0.43, "grad_norm": 1.4297368947921283, "learning_rate": 1.27099651379251e-05, "loss": 0.6766, "step": 4230 }, { "epoch": 0.43, "grad_norm": 1.5360672121688423, "learning_rate": 1.2706793812973941e-05, "loss": 0.7731, "step": 4231 }, { "epoch": 0.43, "grad_norm": 1.6790490187862654, "learning_rate": 1.2703622194242644e-05, "loss": 0.7898, "step": 4232 }, { "epoch": 0.43, "grad_norm": 1.6940426223260903, "learning_rate": 1.2700450282075439e-05, "loss": 0.7994, "step": 4233 }, { "epoch": 0.43, "grad_norm": 1.6035809726333352, "learning_rate": 1.2697278076816584e-05, "loss": 0.8182, "step": 4234 }, { "epoch": 0.43, "grad_norm": 1.911690966187699, "learning_rate": 1.2694105578810372e-05, "loss": 0.7525, "step": 4235 }, { "epoch": 0.43, "grad_norm": 1.5921485670353903, "learning_rate": 1.2690932788401135e-05, "loss": 0.7356, "step": 4236 }, { "epoch": 0.43, "grad_norm": 1.68167081984641, "learning_rate": 1.2687759705933225e-05, "loss": 0.7508, "step": 4237 }, { "epoch": 0.43, "grad_norm": 1.6075358036783265, "learning_rate": 1.2684586331751027e-05, "loss": 0.734, "step": 4238 }, { "epoch": 0.43, "grad_norm": 1.4133034871638013, "learning_rate": 1.2681412666198967e-05, "loss": 0.7107, "step": 4239 }, { "epoch": 0.43, "grad_norm": 1.4271432982156649, "learning_rate": 1.2678238709621495e-05, "loss": 0.6239, "step": 4240 }, { "epoch": 0.43, "grad_norm": 1.4874506931674698, "learning_rate": 1.2675064462363095e-05, "loss": 0.7471, "step": 4241 }, { "epoch": 0.43, "grad_norm": 1.6615279803875191, "learning_rate": 1.267188992476828e-05, "loss": 0.7187, "step": 4242 }, { "epoch": 0.43, "grad_norm": 1.7055012784103516, "learning_rate": 1.2668715097181598e-05, "loss": 0.7781, "step": 4243 }, { "epoch": 0.43, "grad_norm": 1.5392643779564632, "learning_rate": 1.2665539979947625e-05, "loss": 0.8275, "step": 4244 }, { "epoch": 0.43, "grad_norm": 1.6392295533469456, "learning_rate": 1.2662364573410972e-05, "loss": 0.7079, "step": 4245 }, { "epoch": 0.43, "grad_norm": 1.467439358859539, "learning_rate": 1.265918887791628e-05, "loss": 0.8107, "step": 4246 }, { "epoch": 0.43, "grad_norm": 1.5340914320267773, "learning_rate": 1.265601289380822e-05, "loss": 0.6401, "step": 4247 }, { "epoch": 0.43, "grad_norm": 1.6326046668917402, "learning_rate": 1.2652836621431494e-05, "loss": 0.6579, "step": 4248 }, { "epoch": 0.43, "grad_norm": 1.6060006722081543, "learning_rate": 1.264966006113084e-05, "loss": 0.7607, "step": 4249 }, { "epoch": 0.43, "grad_norm": 1.6301556197140081, "learning_rate": 1.2646483213251024e-05, "loss": 0.7725, "step": 4250 }, { "epoch": 0.43, "grad_norm": 1.4477180043687317, "learning_rate": 1.2643306078136839e-05, "loss": 0.6654, "step": 4251 }, { "epoch": 0.43, "grad_norm": 1.7183853097079613, "learning_rate": 1.264012865613312e-05, "loss": 0.757, "step": 4252 }, { "epoch": 0.43, "grad_norm": 1.4991277481962177, "learning_rate": 1.2636950947584716e-05, "loss": 0.7827, "step": 4253 }, { "epoch": 0.43, "grad_norm": 1.3682709122047572, "learning_rate": 1.2633772952836527e-05, "loss": 0.6954, "step": 4254 }, { "epoch": 0.43, "grad_norm": 1.5264634061223141, "learning_rate": 1.2630594672233474e-05, "loss": 0.6219, "step": 4255 }, { "epoch": 0.43, "grad_norm": 1.6081603775260733, "learning_rate": 1.2627416106120505e-05, "loss": 0.7464, "step": 4256 }, { "epoch": 0.43, "grad_norm": 1.6664012905975172, "learning_rate": 1.2624237254842606e-05, "loss": 0.8268, "step": 4257 }, { "epoch": 0.43, "grad_norm": 1.7236809343198751, "learning_rate": 1.2621058118744789e-05, "loss": 0.7237, "step": 4258 }, { "epoch": 0.43, "grad_norm": 1.7199094659192433, "learning_rate": 1.2617878698172106e-05, "loss": 0.8653, "step": 4259 }, { "epoch": 0.43, "grad_norm": 1.5702803700581136, "learning_rate": 1.2614698993469627e-05, "loss": 0.7597, "step": 4260 }, { "epoch": 0.43, "grad_norm": 1.6599947985801582, "learning_rate": 1.2611519004982463e-05, "loss": 0.72, "step": 4261 }, { "epoch": 0.43, "grad_norm": 1.5007726034218898, "learning_rate": 1.2608338733055746e-05, "loss": 0.7291, "step": 4262 }, { "epoch": 0.43, "grad_norm": 1.6180141696571795, "learning_rate": 1.2605158178034656e-05, "loss": 0.8082, "step": 4263 }, { "epoch": 0.43, "grad_norm": 1.5370907818119977, "learning_rate": 1.2601977340264385e-05, "loss": 0.7279, "step": 4264 }, { "epoch": 0.43, "grad_norm": 1.6017277705458968, "learning_rate": 1.2598796220090162e-05, "loss": 0.7468, "step": 4265 }, { "epoch": 0.43, "grad_norm": 1.470357468363117, "learning_rate": 1.259561481785725e-05, "loss": 0.7744, "step": 4266 }, { "epoch": 0.43, "grad_norm": 1.5011085967488755, "learning_rate": 1.2592433133910937e-05, "loss": 0.7894, "step": 4267 }, { "epoch": 0.43, "grad_norm": 1.8058621580428456, "learning_rate": 1.2589251168596554e-05, "loss": 0.6833, "step": 4268 }, { "epoch": 0.43, "grad_norm": 1.5790531507207646, "learning_rate": 1.2586068922259446e-05, "loss": 0.807, "step": 4269 }, { "epoch": 0.43, "grad_norm": 1.6129616607668176, "learning_rate": 1.2582886395245e-05, "loss": 0.7582, "step": 4270 }, { "epoch": 0.43, "grad_norm": 1.5636248037220053, "learning_rate": 1.2579703587898622e-05, "loss": 0.7492, "step": 4271 }, { "epoch": 0.43, "grad_norm": 1.6477268242323033, "learning_rate": 1.2576520500565767e-05, "loss": 0.8129, "step": 4272 }, { "epoch": 0.43, "grad_norm": 1.53427061103123, "learning_rate": 1.2573337133591904e-05, "loss": 0.7485, "step": 4273 }, { "epoch": 0.43, "grad_norm": 1.2760812692182046, "learning_rate": 1.2570153487322534e-05, "loss": 0.6305, "step": 4274 }, { "epoch": 0.43, "grad_norm": 1.4831290425463768, "learning_rate": 1.2566969562103197e-05, "loss": 0.6692, "step": 4275 }, { "epoch": 0.43, "grad_norm": 1.4653955773573912, "learning_rate": 1.2563785358279459e-05, "loss": 0.6619, "step": 4276 }, { "epoch": 0.44, "grad_norm": 1.3376784691184405, "learning_rate": 1.2560600876196911e-05, "loss": 0.6905, "step": 4277 }, { "epoch": 0.44, "grad_norm": 1.5494933539636409, "learning_rate": 1.255741611620118e-05, "loss": 0.7156, "step": 4278 }, { "epoch": 0.44, "grad_norm": 1.8980840079305699, "learning_rate": 1.2554231078637926e-05, "loss": 0.7758, "step": 4279 }, { "epoch": 0.44, "grad_norm": 1.5326597254174636, "learning_rate": 1.2551045763852832e-05, "loss": 0.7281, "step": 4280 }, { "epoch": 0.44, "grad_norm": 1.6493596040522875, "learning_rate": 1.2547860172191613e-05, "loss": 0.8091, "step": 4281 }, { "epoch": 0.44, "grad_norm": 1.6204455055952354, "learning_rate": 1.2544674304000017e-05, "loss": 0.7701, "step": 4282 }, { "epoch": 0.44, "grad_norm": 1.5362361176412207, "learning_rate": 1.254148815962382e-05, "loss": 0.6546, "step": 4283 }, { "epoch": 0.44, "grad_norm": 1.5859203814985514, "learning_rate": 1.2538301739408827e-05, "loss": 0.7461, "step": 4284 }, { "epoch": 0.44, "grad_norm": 1.6904326299259982, "learning_rate": 1.2535115043700873e-05, "loss": 0.7867, "step": 4285 }, { "epoch": 0.44, "grad_norm": 1.49829461015147, "learning_rate": 1.2531928072845829e-05, "loss": 0.7796, "step": 4286 }, { "epoch": 0.44, "grad_norm": 1.5082088395185627, "learning_rate": 1.2528740827189588e-05, "loss": 0.7096, "step": 4287 }, { "epoch": 0.44, "grad_norm": 1.6725549597277543, "learning_rate": 1.2525553307078074e-05, "loss": 0.7854, "step": 4288 }, { "epoch": 0.44, "grad_norm": 1.4817730777320866, "learning_rate": 1.2522365512857241e-05, "loss": 0.6682, "step": 4289 }, { "epoch": 0.44, "grad_norm": 1.535915561788933, "learning_rate": 1.251917744487308e-05, "loss": 0.7609, "step": 4290 }, { "epoch": 0.44, "grad_norm": 1.5623924070870114, "learning_rate": 1.2515989103471602e-05, "loss": 0.6874, "step": 4291 }, { "epoch": 0.44, "grad_norm": 1.525575165734888, "learning_rate": 1.2512800488998856e-05, "loss": 0.8236, "step": 4292 }, { "epoch": 0.44, "grad_norm": 1.7352730208482219, "learning_rate": 1.2509611601800908e-05, "loss": 0.6995, "step": 4293 }, { "epoch": 0.44, "grad_norm": 1.6429301062021924, "learning_rate": 1.2506422442223867e-05, "loss": 0.8077, "step": 4294 }, { "epoch": 0.44, "grad_norm": 1.4617991681259823, "learning_rate": 1.2503233010613866e-05, "loss": 0.6972, "step": 4295 }, { "epoch": 0.44, "grad_norm": 1.5839803085209656, "learning_rate": 1.2500043307317072e-05, "loss": 0.7635, "step": 4296 }, { "epoch": 0.44, "grad_norm": 1.4985757263506088, "learning_rate": 1.2496853332679668e-05, "loss": 0.68, "step": 4297 }, { "epoch": 0.44, "grad_norm": 1.876952140170346, "learning_rate": 1.2493663087047883e-05, "loss": 0.8143, "step": 4298 }, { "epoch": 0.44, "grad_norm": 1.4912297726382522, "learning_rate": 1.2490472570767966e-05, "loss": 0.7566, "step": 4299 }, { "epoch": 0.44, "grad_norm": 1.4135576446856426, "learning_rate": 1.24872817841862e-05, "loss": 0.7676, "step": 4300 }, { "epoch": 0.44, "grad_norm": 1.6894482298761555, "learning_rate": 1.2484090727648892e-05, "loss": 0.7346, "step": 4301 }, { "epoch": 0.44, "grad_norm": 1.410949297535782, "learning_rate": 1.2480899401502384e-05, "loss": 0.698, "step": 4302 }, { "epoch": 0.44, "grad_norm": 1.4645565318403775, "learning_rate": 1.247770780609304e-05, "loss": 0.6811, "step": 4303 }, { "epoch": 0.44, "grad_norm": 1.6854348277084623, "learning_rate": 1.2474515941767262e-05, "loss": 0.7574, "step": 4304 }, { "epoch": 0.44, "grad_norm": 1.7077740397861376, "learning_rate": 1.2471323808871475e-05, "loss": 0.7648, "step": 4305 }, { "epoch": 0.44, "grad_norm": 1.670997861295573, "learning_rate": 1.2468131407752138e-05, "loss": 0.7125, "step": 4306 }, { "epoch": 0.44, "grad_norm": 1.6537332834926746, "learning_rate": 1.246493873875573e-05, "loss": 0.9453, "step": 4307 }, { "epoch": 0.44, "grad_norm": 1.6298292725373442, "learning_rate": 1.2461745802228771e-05, "loss": 0.6995, "step": 4308 }, { "epoch": 0.44, "grad_norm": 1.6336178608807248, "learning_rate": 1.2458552598517804e-05, "loss": 0.9437, "step": 4309 }, { "epoch": 0.44, "grad_norm": 1.6605536767879754, "learning_rate": 1.24553591279694e-05, "loss": 0.7589, "step": 4310 }, { "epoch": 0.44, "grad_norm": 1.6214145562153597, "learning_rate": 1.2452165390930159e-05, "loss": 0.756, "step": 4311 }, { "epoch": 0.44, "grad_norm": 1.7693944283318281, "learning_rate": 1.244897138774671e-05, "loss": 0.776, "step": 4312 }, { "epoch": 0.44, "grad_norm": 1.9429667641152428, "learning_rate": 1.244577711876572e-05, "loss": 0.7921, "step": 4313 }, { "epoch": 0.44, "grad_norm": 1.4719654131064106, "learning_rate": 1.2442582584333867e-05, "loss": 0.7164, "step": 4314 }, { "epoch": 0.44, "grad_norm": 1.6998681620739857, "learning_rate": 1.2439387784797873e-05, "loss": 0.7358, "step": 4315 }, { "epoch": 0.44, "grad_norm": 1.5367814354024754, "learning_rate": 1.2436192720504483e-05, "loss": 0.7949, "step": 4316 }, { "epoch": 0.44, "grad_norm": 1.5814752360852835, "learning_rate": 1.2432997391800471e-05, "loss": 0.7588, "step": 4317 }, { "epoch": 0.44, "grad_norm": 1.5482592457995743, "learning_rate": 1.242980179903264e-05, "loss": 0.7803, "step": 4318 }, { "epoch": 0.44, "grad_norm": 1.543902286716255, "learning_rate": 1.2426605942547822e-05, "loss": 0.7623, "step": 4319 }, { "epoch": 0.44, "grad_norm": 1.5364496774447027, "learning_rate": 1.2423409822692878e-05, "loss": 0.7359, "step": 4320 }, { "epoch": 0.44, "grad_norm": 1.7470060336826811, "learning_rate": 1.2420213439814693e-05, "loss": 0.8224, "step": 4321 }, { "epoch": 0.44, "grad_norm": 1.7947145930976374, "learning_rate": 1.2417016794260188e-05, "loss": 0.9604, "step": 4322 }, { "epoch": 0.44, "grad_norm": 1.4318612043412737, "learning_rate": 1.2413819886376308e-05, "loss": 0.7188, "step": 4323 }, { "epoch": 0.44, "grad_norm": 1.578719074009928, "learning_rate": 1.241062271651003e-05, "loss": 0.7632, "step": 4324 }, { "epoch": 0.44, "grad_norm": 1.4222966215231232, "learning_rate": 1.2407425285008348e-05, "loss": 0.727, "step": 4325 }, { "epoch": 0.44, "grad_norm": 1.7037074418102436, "learning_rate": 1.2404227592218302e-05, "loss": 0.7614, "step": 4326 }, { "epoch": 0.44, "grad_norm": 1.5560684096552946, "learning_rate": 1.2401029638486952e-05, "loss": 0.7566, "step": 4327 }, { "epoch": 0.44, "grad_norm": 1.5496149562355548, "learning_rate": 1.2397831424161379e-05, "loss": 0.6464, "step": 4328 }, { "epoch": 0.44, "grad_norm": 1.5661260611233834, "learning_rate": 1.2394632949588702e-05, "loss": 0.7027, "step": 4329 }, { "epoch": 0.44, "grad_norm": 1.647435871699377, "learning_rate": 1.239143421511607e-05, "loss": 0.7214, "step": 4330 }, { "epoch": 0.44, "grad_norm": 1.6376179783423268, "learning_rate": 1.238823522109065e-05, "loss": 0.7569, "step": 4331 }, { "epoch": 0.44, "grad_norm": 1.7213715524105555, "learning_rate": 1.2385035967859644e-05, "loss": 0.8125, "step": 4332 }, { "epoch": 0.44, "grad_norm": 1.5625709746455267, "learning_rate": 1.2381836455770281e-05, "loss": 0.696, "step": 4333 }, { "epoch": 0.44, "grad_norm": 1.5850692435817693, "learning_rate": 1.237863668516982e-05, "loss": 0.8419, "step": 4334 }, { "epoch": 0.44, "grad_norm": 1.6029512722585655, "learning_rate": 1.2375436656405543e-05, "loss": 0.8223, "step": 4335 }, { "epoch": 0.44, "grad_norm": 1.4897526621851649, "learning_rate": 1.2372236369824766e-05, "loss": 0.7425, "step": 4336 }, { "epoch": 0.44, "grad_norm": 1.4048163565710574, "learning_rate": 1.236903582577483e-05, "loss": 0.6398, "step": 4337 }, { "epoch": 0.44, "grad_norm": 1.5031420071231552, "learning_rate": 1.2365835024603099e-05, "loss": 0.8474, "step": 4338 }, { "epoch": 0.44, "grad_norm": 1.3533265566661303, "learning_rate": 1.2362633966656974e-05, "loss": 0.5881, "step": 4339 }, { "epoch": 0.44, "grad_norm": 1.6080843502117637, "learning_rate": 1.2359432652283882e-05, "loss": 0.7397, "step": 4340 }, { "epoch": 0.44, "grad_norm": 1.6916691645649253, "learning_rate": 1.2356231081831274e-05, "loss": 0.835, "step": 4341 }, { "epoch": 0.44, "grad_norm": 1.541941556966986, "learning_rate": 1.2353029255646627e-05, "loss": 0.7793, "step": 4342 }, { "epoch": 0.44, "grad_norm": 1.3431183490118175, "learning_rate": 1.234982717407745e-05, "loss": 0.6793, "step": 4343 }, { "epoch": 0.44, "grad_norm": 1.5996485332722739, "learning_rate": 1.2346624837471282e-05, "loss": 0.8009, "step": 4344 }, { "epoch": 0.44, "grad_norm": 1.5334897538387262, "learning_rate": 1.2343422246175687e-05, "loss": 0.6806, "step": 4345 }, { "epoch": 0.44, "grad_norm": 1.5390653761364153, "learning_rate": 1.2340219400538251e-05, "loss": 0.7227, "step": 4346 }, { "epoch": 0.44, "grad_norm": 1.5577072860816348, "learning_rate": 1.2337016300906599e-05, "loss": 0.7152, "step": 4347 }, { "epoch": 0.44, "grad_norm": 1.4338529859628366, "learning_rate": 1.2333812947628372e-05, "loss": 0.6529, "step": 4348 }, { "epoch": 0.44, "grad_norm": 1.5103611554283487, "learning_rate": 1.2330609341051248e-05, "loss": 0.5906, "step": 4349 }, { "epoch": 0.44, "grad_norm": 1.5452087394046694, "learning_rate": 1.2327405481522925e-05, "loss": 0.7806, "step": 4350 }, { "epoch": 0.44, "grad_norm": 1.506080493851666, "learning_rate": 1.2324201369391135e-05, "loss": 0.7061, "step": 4351 }, { "epoch": 0.44, "grad_norm": 1.611522681979579, "learning_rate": 1.232099700500363e-05, "loss": 0.7171, "step": 4352 }, { "epoch": 0.44, "grad_norm": 1.4512234024915196, "learning_rate": 1.23177923887082e-05, "loss": 0.7174, "step": 4353 }, { "epoch": 0.44, "grad_norm": 1.4573865532669146, "learning_rate": 1.2314587520852652e-05, "loss": 0.6301, "step": 4354 }, { "epoch": 0.44, "grad_norm": 1.639497091761245, "learning_rate": 1.2311382401784823e-05, "loss": 0.7631, "step": 4355 }, { "epoch": 0.44, "grad_norm": 1.502122098326732, "learning_rate": 1.2308177031852581e-05, "loss": 0.7225, "step": 4356 }, { "epoch": 0.44, "grad_norm": 1.6607915454021849, "learning_rate": 1.2304971411403818e-05, "loss": 0.7495, "step": 4357 }, { "epoch": 0.44, "grad_norm": 1.4264459129384013, "learning_rate": 1.2301765540786454e-05, "loss": 0.6694, "step": 4358 }, { "epoch": 0.44, "grad_norm": 1.4441931508298085, "learning_rate": 1.2298559420348437e-05, "loss": 0.6286, "step": 4359 }, { "epoch": 0.44, "grad_norm": 1.6350542372791959, "learning_rate": 1.229535305043774e-05, "loss": 0.7763, "step": 4360 }, { "epoch": 0.44, "grad_norm": 1.5076741786633405, "learning_rate": 1.2292146431402364e-05, "loss": 0.7056, "step": 4361 }, { "epoch": 0.44, "grad_norm": 1.4946756140117825, "learning_rate": 1.2288939563590336e-05, "loss": 0.7498, "step": 4362 }, { "epoch": 0.44, "grad_norm": 1.4954116089047855, "learning_rate": 1.2285732447349718e-05, "loss": 0.7052, "step": 4363 }, { "epoch": 0.44, "grad_norm": 1.539129278892199, "learning_rate": 1.2282525083028585e-05, "loss": 0.7045, "step": 4364 }, { "epoch": 0.44, "grad_norm": 1.5616495206143877, "learning_rate": 1.227931747097505e-05, "loss": 0.7171, "step": 4365 }, { "epoch": 0.44, "grad_norm": 1.541181730746174, "learning_rate": 1.2276109611537246e-05, "loss": 0.6921, "step": 4366 }, { "epoch": 0.44, "grad_norm": 1.574039031315957, "learning_rate": 1.2272901505063339e-05, "loss": 0.7405, "step": 4367 }, { "epoch": 0.44, "grad_norm": 1.6611638774575088, "learning_rate": 1.226969315190152e-05, "loss": 0.7466, "step": 4368 }, { "epoch": 0.44, "grad_norm": 1.795872991044649, "learning_rate": 1.2266484552400002e-05, "loss": 0.7829, "step": 4369 }, { "epoch": 0.44, "grad_norm": 1.5984338918264582, "learning_rate": 1.2263275706907028e-05, "loss": 0.6836, "step": 4370 }, { "epoch": 0.44, "grad_norm": 1.6082604218617218, "learning_rate": 1.2260066615770872e-05, "loss": 0.7268, "step": 4371 }, { "epoch": 0.44, "grad_norm": 1.5863700764160853, "learning_rate": 1.2256857279339826e-05, "loss": 0.7271, "step": 4372 }, { "epoch": 0.44, "grad_norm": 1.502654497794165, "learning_rate": 1.2253647697962219e-05, "loss": 0.7056, "step": 4373 }, { "epoch": 0.44, "grad_norm": 1.455642030024103, "learning_rate": 1.2250437871986396e-05, "loss": 0.6944, "step": 4374 }, { "epoch": 0.45, "grad_norm": 1.732673603633385, "learning_rate": 1.2247227801760733e-05, "loss": 0.8043, "step": 4375 }, { "epoch": 0.45, "grad_norm": 1.5572231314499265, "learning_rate": 1.2244017487633637e-05, "loss": 0.6938, "step": 4376 }, { "epoch": 0.45, "grad_norm": 1.687209376387509, "learning_rate": 1.2240806929953535e-05, "loss": 0.703, "step": 4377 }, { "epoch": 0.45, "grad_norm": 1.5609158173524036, "learning_rate": 1.2237596129068883e-05, "loss": 0.812, "step": 4378 }, { "epoch": 0.45, "grad_norm": 1.530954813512489, "learning_rate": 1.223438508532816e-05, "loss": 0.77, "step": 4379 }, { "epoch": 0.45, "grad_norm": 1.5362119262865612, "learning_rate": 1.2231173799079881e-05, "loss": 0.718, "step": 4380 }, { "epoch": 0.45, "grad_norm": 1.4563236326328328, "learning_rate": 1.2227962270672578e-05, "loss": 0.7434, "step": 4381 }, { "epoch": 0.45, "grad_norm": 1.6876187955506705, "learning_rate": 1.2224750500454812e-05, "loss": 0.7205, "step": 4382 }, { "epoch": 0.45, "grad_norm": 1.5422731338231965, "learning_rate": 1.2221538488775169e-05, "loss": 0.7501, "step": 4383 }, { "epoch": 0.45, "grad_norm": 1.6188008465025745, "learning_rate": 1.2218326235982265e-05, "loss": 0.7096, "step": 4384 }, { "epoch": 0.45, "grad_norm": 1.5020069685350754, "learning_rate": 1.2215113742424737e-05, "loss": 0.7346, "step": 4385 }, { "epoch": 0.45, "grad_norm": 1.5813327112219508, "learning_rate": 1.2211901008451254e-05, "loss": 0.6556, "step": 4386 }, { "epoch": 0.45, "grad_norm": 1.666893881592953, "learning_rate": 1.2208688034410508e-05, "loss": 0.7044, "step": 4387 }, { "epoch": 0.45, "grad_norm": 1.5003835672667813, "learning_rate": 1.2205474820651216e-05, "loss": 0.7246, "step": 4388 }, { "epoch": 0.45, "grad_norm": 1.549861347647712, "learning_rate": 1.2202261367522119e-05, "loss": 0.6512, "step": 4389 }, { "epoch": 0.45, "grad_norm": 1.5602212910735438, "learning_rate": 1.2199047675371993e-05, "loss": 0.7465, "step": 4390 }, { "epoch": 0.45, "grad_norm": 1.753807997087651, "learning_rate": 1.219583374454963e-05, "loss": 0.7398, "step": 4391 }, { "epoch": 0.45, "grad_norm": 1.6286670549259223, "learning_rate": 1.2192619575403855e-05, "loss": 0.7511, "step": 4392 }, { "epoch": 0.45, "grad_norm": 1.3647542817459297, "learning_rate": 1.218940516828351e-05, "loss": 0.6561, "step": 4393 }, { "epoch": 0.45, "grad_norm": 1.5529890621235534, "learning_rate": 1.2186190523537476e-05, "loss": 0.695, "step": 4394 }, { "epoch": 0.45, "grad_norm": 1.5510040334936726, "learning_rate": 1.2182975641514646e-05, "loss": 0.6774, "step": 4395 }, { "epoch": 0.45, "grad_norm": 1.5926843629490448, "learning_rate": 1.217976052256395e-05, "loss": 0.7498, "step": 4396 }, { "epoch": 0.45, "grad_norm": 1.5187507805011788, "learning_rate": 1.2176545167034337e-05, "loss": 0.7326, "step": 4397 }, { "epoch": 0.45, "grad_norm": 1.6532601994941343, "learning_rate": 1.2173329575274781e-05, "loss": 0.6889, "step": 4398 }, { "epoch": 0.45, "grad_norm": 1.5301841899089985, "learning_rate": 1.217011374763429e-05, "loss": 0.8719, "step": 4399 }, { "epoch": 0.45, "grad_norm": 1.5979785297825504, "learning_rate": 1.2166897684461887e-05, "loss": 0.6897, "step": 4400 }, { "epoch": 0.45, "grad_norm": 1.4936498800599838, "learning_rate": 1.2163681386106628e-05, "loss": 0.7131, "step": 4401 }, { "epoch": 0.45, "grad_norm": 1.561516544759848, "learning_rate": 1.2160464852917586e-05, "loss": 0.7337, "step": 4402 }, { "epoch": 0.45, "grad_norm": 1.6101791995119874, "learning_rate": 1.2157248085243875e-05, "loss": 0.7181, "step": 4403 }, { "epoch": 0.45, "grad_norm": 1.4056365477488921, "learning_rate": 1.2154031083434618e-05, "loss": 0.7246, "step": 4404 }, { "epoch": 0.45, "grad_norm": 1.4791446978711529, "learning_rate": 1.2150813847838973e-05, "loss": 0.7785, "step": 4405 }, { "epoch": 0.45, "grad_norm": 1.6116695930351572, "learning_rate": 1.2147596378806122e-05, "loss": 0.8783, "step": 4406 }, { "epoch": 0.45, "grad_norm": 1.525736879948389, "learning_rate": 1.2144378676685265e-05, "loss": 0.6726, "step": 4407 }, { "epoch": 0.45, "grad_norm": 1.704644176726741, "learning_rate": 1.2141160741825637e-05, "loss": 0.8096, "step": 4408 }, { "epoch": 0.45, "grad_norm": 1.6201455401565257, "learning_rate": 1.2137942574576498e-05, "loss": 0.7243, "step": 4409 }, { "epoch": 0.45, "grad_norm": 1.5974565990510263, "learning_rate": 1.2134724175287124e-05, "loss": 0.7345, "step": 4410 }, { "epoch": 0.45, "grad_norm": 1.5472920340894925, "learning_rate": 1.213150554430682e-05, "loss": 0.6502, "step": 4411 }, { "epoch": 0.45, "grad_norm": 1.6689059927391285, "learning_rate": 1.2128286681984929e-05, "loss": 0.8014, "step": 4412 }, { "epoch": 0.45, "grad_norm": 1.7726442768700028, "learning_rate": 1.2125067588670798e-05, "loss": 0.6601, "step": 4413 }, { "epoch": 0.45, "grad_norm": 1.6477284044940803, "learning_rate": 1.2121848264713813e-05, "loss": 0.7439, "step": 4414 }, { "epoch": 0.45, "grad_norm": 1.4965439315138036, "learning_rate": 1.2118628710463383e-05, "loss": 0.6484, "step": 4415 }, { "epoch": 0.45, "grad_norm": 1.5165985892615441, "learning_rate": 1.2115408926268935e-05, "loss": 0.7808, "step": 4416 }, { "epoch": 0.45, "grad_norm": 1.5558271212569084, "learning_rate": 1.211218891247993e-05, "loss": 0.6624, "step": 4417 }, { "epoch": 0.45, "grad_norm": 1.5968364577633354, "learning_rate": 1.210896866944585e-05, "loss": 0.7942, "step": 4418 }, { "epoch": 0.45, "grad_norm": 1.5649442392983401, "learning_rate": 1.2105748197516202e-05, "loss": 0.7289, "step": 4419 }, { "epoch": 0.45, "grad_norm": 1.4583243392481153, "learning_rate": 1.2102527497040518e-05, "loss": 0.7746, "step": 4420 }, { "epoch": 0.45, "grad_norm": 1.4272608738704586, "learning_rate": 1.2099306568368351e-05, "loss": 0.645, "step": 4421 }, { "epoch": 0.45, "grad_norm": 1.4106183228923768, "learning_rate": 1.2096085411849288e-05, "loss": 0.7285, "step": 4422 }, { "epoch": 0.45, "grad_norm": 1.5010831315856243, "learning_rate": 1.2092864027832934e-05, "loss": 0.6688, "step": 4423 }, { "epoch": 0.45, "grad_norm": 1.6271030020091606, "learning_rate": 1.2089642416668917e-05, "loss": 0.7768, "step": 4424 }, { "epoch": 0.45, "grad_norm": 1.5235151617778604, "learning_rate": 1.2086420578706892e-05, "loss": 0.6403, "step": 4425 }, { "epoch": 0.45, "grad_norm": 1.5076423948411202, "learning_rate": 1.2083198514296543e-05, "loss": 0.7494, "step": 4426 }, { "epoch": 0.45, "grad_norm": 1.5464772791066064, "learning_rate": 1.2079976223787572e-05, "loss": 0.889, "step": 4427 }, { "epoch": 0.45, "grad_norm": 1.5354979787731868, "learning_rate": 1.2076753707529712e-05, "loss": 0.6382, "step": 4428 }, { "epoch": 0.45, "grad_norm": 1.423218504781596, "learning_rate": 1.207353096587271e-05, "loss": 0.7312, "step": 4429 }, { "epoch": 0.45, "grad_norm": 1.3970447157524866, "learning_rate": 1.2070307999166349e-05, "loss": 0.6877, "step": 4430 }, { "epoch": 0.45, "grad_norm": 1.6599140309374598, "learning_rate": 1.2067084807760432e-05, "loss": 0.7027, "step": 4431 }, { "epoch": 0.45, "grad_norm": 1.6651396370667126, "learning_rate": 1.2063861392004786e-05, "loss": 0.7474, "step": 4432 }, { "epoch": 0.45, "grad_norm": 1.430089832277469, "learning_rate": 1.2060637752249258e-05, "loss": 0.7243, "step": 4433 }, { "epoch": 0.45, "grad_norm": 1.614481881170845, "learning_rate": 1.2057413888843725e-05, "loss": 0.8231, "step": 4434 }, { "epoch": 0.45, "grad_norm": 1.4348838527996164, "learning_rate": 1.2054189802138092e-05, "loss": 0.7713, "step": 4435 }, { "epoch": 0.45, "grad_norm": 1.7212586577126978, "learning_rate": 1.205096549248228e-05, "loss": 0.658, "step": 4436 }, { "epoch": 0.45, "grad_norm": 1.658407315132644, "learning_rate": 1.2047740960226237e-05, "loss": 0.7476, "step": 4437 }, { "epoch": 0.45, "grad_norm": 1.5328008396841144, "learning_rate": 1.2044516205719936e-05, "loss": 0.8208, "step": 4438 }, { "epoch": 0.45, "grad_norm": 1.4121317489710041, "learning_rate": 1.2041291229313372e-05, "loss": 0.7737, "step": 4439 }, { "epoch": 0.45, "grad_norm": 1.6338576991003722, "learning_rate": 1.2038066031356568e-05, "loss": 0.777, "step": 4440 }, { "epoch": 0.45, "grad_norm": 1.6169747349261165, "learning_rate": 1.203484061219957e-05, "loss": 0.7694, "step": 4441 }, { "epoch": 0.45, "grad_norm": 1.534574166088398, "learning_rate": 1.2031614972192443e-05, "loss": 0.7543, "step": 4442 }, { "epoch": 0.45, "grad_norm": 1.5235420808364872, "learning_rate": 1.2028389111685283e-05, "loss": 0.766, "step": 4443 }, { "epoch": 0.45, "grad_norm": 1.505387947763993, "learning_rate": 1.2025163031028203e-05, "loss": 0.8059, "step": 4444 }, { "epoch": 0.45, "grad_norm": 1.5351796430514244, "learning_rate": 1.202193673057135e-05, "loss": 0.6711, "step": 4445 }, { "epoch": 0.45, "grad_norm": 1.5112091190626977, "learning_rate": 1.201871021066488e-05, "loss": 0.8201, "step": 4446 }, { "epoch": 0.45, "grad_norm": 1.627302058235011, "learning_rate": 1.2015483471658986e-05, "loss": 0.7077, "step": 4447 }, { "epoch": 0.45, "grad_norm": 2.0278561697736004, "learning_rate": 1.2012256513903881e-05, "loss": 0.7857, "step": 4448 }, { "epoch": 0.45, "grad_norm": 1.611940844306635, "learning_rate": 1.2009029337749803e-05, "loss": 0.7612, "step": 4449 }, { "epoch": 0.45, "grad_norm": 1.8121778869497533, "learning_rate": 1.2005801943547004e-05, "loss": 0.78, "step": 4450 }, { "epoch": 0.45, "grad_norm": 1.5552713757449876, "learning_rate": 1.2002574331645774e-05, "loss": 0.7561, "step": 4451 }, { "epoch": 0.45, "grad_norm": 1.6294178143747788, "learning_rate": 1.1999346502396411e-05, "loss": 0.6489, "step": 4452 }, { "epoch": 0.45, "grad_norm": 1.5805010400813575, "learning_rate": 1.1996118456149258e-05, "loss": 0.6256, "step": 4453 }, { "epoch": 0.45, "grad_norm": 1.6173180486478613, "learning_rate": 1.199289019325466e-05, "loss": 0.7834, "step": 4454 }, { "epoch": 0.45, "grad_norm": 1.4599722225184395, "learning_rate": 1.1989661714063e-05, "loss": 0.7161, "step": 4455 }, { "epoch": 0.45, "grad_norm": 1.5629592431617074, "learning_rate": 1.198643301892467e-05, "loss": 0.6841, "step": 4456 }, { "epoch": 0.45, "grad_norm": 1.6195847291825776, "learning_rate": 1.1983204108190102e-05, "loss": 0.7359, "step": 4457 }, { "epoch": 0.45, "grad_norm": 1.6672599164503639, "learning_rate": 1.1979974982209745e-05, "loss": 0.8227, "step": 4458 }, { "epoch": 0.45, "grad_norm": 1.6956380914634728, "learning_rate": 1.1976745641334064e-05, "loss": 0.7724, "step": 4459 }, { "epoch": 0.45, "grad_norm": 1.3448791245225744, "learning_rate": 1.197351608591356e-05, "loss": 0.6883, "step": 4460 }, { "epoch": 0.45, "grad_norm": 1.4919968482707506, "learning_rate": 1.1970286316298743e-05, "loss": 0.8661, "step": 4461 }, { "epoch": 0.45, "grad_norm": 1.5888723787256789, "learning_rate": 1.1967056332840159e-05, "loss": 0.6612, "step": 4462 }, { "epoch": 0.45, "grad_norm": 1.500626958840491, "learning_rate": 1.1963826135888374e-05, "loss": 0.6686, "step": 4463 }, { "epoch": 0.45, "grad_norm": 1.3928861170099849, "learning_rate": 1.196059572579397e-05, "loss": 0.7959, "step": 4464 }, { "epoch": 0.45, "grad_norm": 1.6296766789886576, "learning_rate": 1.1957365102907562e-05, "loss": 0.8137, "step": 4465 }, { "epoch": 0.45, "grad_norm": 2.0328265489817867, "learning_rate": 1.1954134267579781e-05, "loss": 0.8245, "step": 4466 }, { "epoch": 0.45, "grad_norm": 1.654234354125284, "learning_rate": 1.1950903220161286e-05, "loss": 0.6642, "step": 4467 }, { "epoch": 0.45, "grad_norm": 1.5800488068146095, "learning_rate": 1.1947671961002753e-05, "loss": 0.7177, "step": 4468 }, { "epoch": 0.45, "grad_norm": 1.4788972818110588, "learning_rate": 1.1944440490454887e-05, "loss": 0.764, "step": 4469 }, { "epoch": 0.45, "grad_norm": 1.7483788792193522, "learning_rate": 1.1941208808868411e-05, "loss": 0.7508, "step": 4470 }, { "epoch": 0.45, "grad_norm": 1.518687096393544, "learning_rate": 1.193797691659408e-05, "loss": 0.8022, "step": 4471 }, { "epoch": 0.45, "grad_norm": 1.5851103825987014, "learning_rate": 1.1934744813982662e-05, "loss": 0.7285, "step": 4472 }, { "epoch": 0.45, "grad_norm": 1.5147747934349225, "learning_rate": 1.193151250138495e-05, "loss": 0.6713, "step": 4473 }, { "epoch": 0.46, "grad_norm": 1.7252009780168112, "learning_rate": 1.1928279979151757e-05, "loss": 0.8059, "step": 4474 }, { "epoch": 0.46, "grad_norm": 1.6550933483637722, "learning_rate": 1.1925047247633931e-05, "loss": 0.7824, "step": 4475 }, { "epoch": 0.46, "grad_norm": 1.537864083464159, "learning_rate": 1.1921814307182331e-05, "loss": 0.7198, "step": 4476 }, { "epoch": 0.46, "grad_norm": 1.6247694014405787, "learning_rate": 1.1918581158147842e-05, "loss": 0.7996, "step": 4477 }, { "epoch": 0.46, "grad_norm": 1.5040273170796579, "learning_rate": 1.1915347800881369e-05, "loss": 0.7318, "step": 4478 }, { "epoch": 0.46, "grad_norm": 1.624008872920982, "learning_rate": 1.1912114235733846e-05, "loss": 0.8635, "step": 4479 }, { "epoch": 0.46, "grad_norm": 1.6783046655630303, "learning_rate": 1.1908880463056225e-05, "loss": 0.7072, "step": 4480 }, { "epoch": 0.46, "grad_norm": 1.5987382924783546, "learning_rate": 1.1905646483199481e-05, "loss": 0.737, "step": 4481 }, { "epoch": 0.46, "grad_norm": 1.4190839686862713, "learning_rate": 1.1902412296514614e-05, "loss": 0.6069, "step": 4482 }, { "epoch": 0.46, "grad_norm": 1.4717341097697567, "learning_rate": 1.1899177903352639e-05, "loss": 0.6212, "step": 4483 }, { "epoch": 0.46, "grad_norm": 1.5764180668532193, "learning_rate": 1.1895943304064606e-05, "loss": 0.6834, "step": 4484 }, { "epoch": 0.46, "grad_norm": 1.545690402929286, "learning_rate": 1.1892708499001576e-05, "loss": 0.613, "step": 4485 }, { "epoch": 0.46, "grad_norm": 1.5153286028527162, "learning_rate": 1.1889473488514636e-05, "loss": 0.7848, "step": 4486 }, { "epoch": 0.46, "grad_norm": 1.5809882519403804, "learning_rate": 1.1886238272954897e-05, "loss": 0.7388, "step": 4487 }, { "epoch": 0.46, "grad_norm": 1.5637085518869727, "learning_rate": 1.1883002852673489e-05, "loss": 0.7386, "step": 4488 }, { "epoch": 0.46, "grad_norm": 1.522366166224289, "learning_rate": 1.187976722802157e-05, "loss": 0.6202, "step": 4489 }, { "epoch": 0.46, "grad_norm": 1.6818519644180367, "learning_rate": 1.1876531399350316e-05, "loss": 0.7759, "step": 4490 }, { "epoch": 0.46, "grad_norm": 1.552904746172006, "learning_rate": 1.1873295367010923e-05, "loss": 0.7709, "step": 4491 }, { "epoch": 0.46, "grad_norm": 1.4481819506527602, "learning_rate": 1.1870059131354611e-05, "loss": 0.7444, "step": 4492 }, { "epoch": 0.46, "grad_norm": 1.5800787460222114, "learning_rate": 1.186682269273263e-05, "loss": 0.8605, "step": 4493 }, { "epoch": 0.46, "grad_norm": 1.6270808444334217, "learning_rate": 1.1863586051496235e-05, "loss": 0.6664, "step": 4494 }, { "epoch": 0.46, "grad_norm": 1.3324751236800856, "learning_rate": 1.1860349207996717e-05, "loss": 0.7568, "step": 4495 }, { "epoch": 0.46, "grad_norm": 1.6101269155158606, "learning_rate": 1.1857112162585387e-05, "loss": 0.811, "step": 4496 }, { "epoch": 0.46, "grad_norm": 1.5018243352019984, "learning_rate": 1.1853874915613575e-05, "loss": 0.816, "step": 4497 }, { "epoch": 0.46, "grad_norm": 1.6115192399287763, "learning_rate": 1.1850637467432629e-05, "loss": 0.7215, "step": 4498 }, { "epoch": 0.46, "grad_norm": 1.5961199154311783, "learning_rate": 1.184739981839393e-05, "loss": 0.6597, "step": 4499 }, { "epoch": 0.46, "grad_norm": 1.577479387062586, "learning_rate": 1.1844161968848868e-05, "loss": 0.7565, "step": 4500 }, { "epoch": 0.46, "grad_norm": 1.5304523479785352, "learning_rate": 1.1840923919148863e-05, "loss": 0.6509, "step": 4501 }, { "epoch": 0.46, "grad_norm": 1.318352712290105, "learning_rate": 1.1837685669645358e-05, "loss": 0.6931, "step": 4502 }, { "epoch": 0.46, "grad_norm": 1.5895914554412998, "learning_rate": 1.1834447220689813e-05, "loss": 0.737, "step": 4503 }, { "epoch": 0.46, "grad_norm": 1.5971370797130637, "learning_rate": 1.1831208572633708e-05, "loss": 0.7396, "step": 4504 }, { "epoch": 0.46, "grad_norm": 1.544909155827977, "learning_rate": 1.182796972582855e-05, "loss": 0.6391, "step": 4505 }, { "epoch": 0.46, "grad_norm": 1.5076713528789134, "learning_rate": 1.1824730680625862e-05, "loss": 0.7217, "step": 4506 }, { "epoch": 0.46, "grad_norm": 1.6015860072889743, "learning_rate": 1.1821491437377198e-05, "loss": 0.7712, "step": 4507 }, { "epoch": 0.46, "grad_norm": 1.559399696293891, "learning_rate": 1.181825199643412e-05, "loss": 0.682, "step": 4508 }, { "epoch": 0.46, "grad_norm": 1.3669347461286494, "learning_rate": 1.1815012358148223e-05, "loss": 0.5674, "step": 4509 }, { "epoch": 0.46, "grad_norm": 1.7240826706457286, "learning_rate": 1.1811772522871119e-05, "loss": 0.7303, "step": 4510 }, { "epoch": 0.46, "grad_norm": 1.6573139580065175, "learning_rate": 1.1808532490954438e-05, "loss": 0.7783, "step": 4511 }, { "epoch": 0.46, "grad_norm": 1.6018607990043154, "learning_rate": 1.180529226274984e-05, "loss": 0.8296, "step": 4512 }, { "epoch": 0.46, "grad_norm": 1.6779980575994735, "learning_rate": 1.1802051838609e-05, "loss": 0.7266, "step": 4513 }, { "epoch": 0.46, "grad_norm": 1.4748993331800264, "learning_rate": 1.1798811218883613e-05, "loss": 0.7311, "step": 4514 }, { "epoch": 0.46, "grad_norm": 1.6332739658424689, "learning_rate": 1.1795570403925397e-05, "loss": 0.6996, "step": 4515 }, { "epoch": 0.46, "grad_norm": 1.520362224377642, "learning_rate": 1.1792329394086094e-05, "loss": 0.6282, "step": 4516 }, { "epoch": 0.46, "grad_norm": 1.3075697117316751, "learning_rate": 1.1789088189717463e-05, "loss": 0.7129, "step": 4517 }, { "epoch": 0.46, "grad_norm": 1.6161533444931127, "learning_rate": 1.178584679117129e-05, "loss": 0.6684, "step": 4518 }, { "epoch": 0.46, "grad_norm": 1.5433187962870167, "learning_rate": 1.1782605198799371e-05, "loss": 0.721, "step": 4519 }, { "epoch": 0.46, "grad_norm": 1.4472065593881311, "learning_rate": 1.1779363412953537e-05, "loss": 0.806, "step": 4520 }, { "epoch": 0.46, "grad_norm": 1.4993530836356177, "learning_rate": 1.177612143398563e-05, "loss": 0.7325, "step": 4521 }, { "epoch": 0.46, "grad_norm": 1.7396800214504256, "learning_rate": 1.1772879262247519e-05, "loss": 0.7748, "step": 4522 }, { "epoch": 0.46, "grad_norm": 1.7334103979363331, "learning_rate": 1.1769636898091089e-05, "loss": 0.6951, "step": 4523 }, { "epoch": 0.46, "grad_norm": 1.5340003104618167, "learning_rate": 1.1766394341868242e-05, "loss": 0.6365, "step": 4524 }, { "epoch": 0.46, "grad_norm": 1.5822547649750762, "learning_rate": 1.1763151593930918e-05, "loss": 0.7985, "step": 4525 }, { "epoch": 0.46, "grad_norm": 1.4388485063230003, "learning_rate": 1.1759908654631057e-05, "loss": 0.7047, "step": 4526 }, { "epoch": 0.46, "grad_norm": 1.6219002551817887, "learning_rate": 1.1756665524320638e-05, "loss": 0.6781, "step": 4527 }, { "epoch": 0.46, "grad_norm": 1.5640124456440354, "learning_rate": 1.1753422203351643e-05, "loss": 0.7798, "step": 4528 }, { "epoch": 0.46, "grad_norm": 1.3788120020476304, "learning_rate": 1.1750178692076092e-05, "loss": 0.7017, "step": 4529 }, { "epoch": 0.46, "grad_norm": 1.7685053143291274, "learning_rate": 1.1746934990846013e-05, "loss": 0.7724, "step": 4530 }, { "epoch": 0.46, "grad_norm": 1.590333764801969, "learning_rate": 1.1743691100013459e-05, "loss": 0.7066, "step": 4531 }, { "epoch": 0.46, "grad_norm": 1.556934087076597, "learning_rate": 1.1740447019930506e-05, "loss": 0.686, "step": 4532 }, { "epoch": 0.46, "grad_norm": 1.4977064506551216, "learning_rate": 1.1737202750949244e-05, "loss": 0.7134, "step": 4533 }, { "epoch": 0.46, "grad_norm": 1.5293106757310524, "learning_rate": 1.173395829342179e-05, "loss": 0.6573, "step": 4534 }, { "epoch": 0.46, "grad_norm": 1.4865247698778525, "learning_rate": 1.1730713647700282e-05, "loss": 0.6099, "step": 4535 }, { "epoch": 0.46, "grad_norm": 1.5621144654474877, "learning_rate": 1.1727468814136871e-05, "loss": 0.7107, "step": 4536 }, { "epoch": 0.46, "grad_norm": 1.9113889775095725, "learning_rate": 1.1724223793083733e-05, "loss": 0.81, "step": 4537 }, { "epoch": 0.46, "grad_norm": 1.3475550233522957, "learning_rate": 1.172097858489307e-05, "loss": 0.6461, "step": 4538 }, { "epoch": 0.46, "grad_norm": 1.6564171669585754, "learning_rate": 1.1717733189917092e-05, "loss": 0.722, "step": 4539 }, { "epoch": 0.46, "grad_norm": 1.6240145152483296, "learning_rate": 1.1714487608508043e-05, "loss": 0.699, "step": 4540 }, { "epoch": 0.46, "grad_norm": 1.7102134687250472, "learning_rate": 1.1711241841018173e-05, "loss": 0.6937, "step": 4541 }, { "epoch": 0.46, "grad_norm": 1.6969571762175641, "learning_rate": 1.1707995887799759e-05, "loss": 0.7627, "step": 4542 }, { "epoch": 0.46, "grad_norm": 1.7822040158464596, "learning_rate": 1.1704749749205105e-05, "loss": 0.8299, "step": 4543 }, { "epoch": 0.46, "grad_norm": 1.5846248801060048, "learning_rate": 1.1701503425586522e-05, "loss": 0.8152, "step": 4544 }, { "epoch": 0.46, "grad_norm": 1.458804753241937, "learning_rate": 1.1698256917296354e-05, "loss": 0.7306, "step": 4545 }, { "epoch": 0.46, "grad_norm": 1.509308899737421, "learning_rate": 1.1695010224686951e-05, "loss": 0.7077, "step": 4546 }, { "epoch": 0.46, "grad_norm": 1.5690076410012654, "learning_rate": 1.1691763348110698e-05, "loss": 0.7536, "step": 4547 }, { "epoch": 0.46, "grad_norm": 1.6779952948032286, "learning_rate": 1.168851628791999e-05, "loss": 0.7712, "step": 4548 }, { "epoch": 0.46, "grad_norm": 1.7875031142218436, "learning_rate": 1.1685269044467244e-05, "loss": 0.8445, "step": 4549 }, { "epoch": 0.46, "grad_norm": 1.4464393868159005, "learning_rate": 1.16820216181049e-05, "loss": 0.6798, "step": 4550 }, { "epoch": 0.46, "grad_norm": 1.5728041249582039, "learning_rate": 1.167877400918541e-05, "loss": 0.6674, "step": 4551 }, { "epoch": 0.46, "grad_norm": 1.9005455827057687, "learning_rate": 1.1675526218061254e-05, "loss": 0.7243, "step": 4552 }, { "epoch": 0.46, "grad_norm": 1.5418841385192381, "learning_rate": 1.1672278245084931e-05, "loss": 0.6592, "step": 4553 }, { "epoch": 0.46, "grad_norm": 1.495491500895745, "learning_rate": 1.1669030090608957e-05, "loss": 0.6856, "step": 4554 }, { "epoch": 0.46, "grad_norm": 1.6104076342307279, "learning_rate": 1.1665781754985867e-05, "loss": 0.6973, "step": 4555 }, { "epoch": 0.46, "grad_norm": 1.5121867638957396, "learning_rate": 1.1662533238568214e-05, "loss": 0.7295, "step": 4556 }, { "epoch": 0.46, "grad_norm": 1.5661736979533423, "learning_rate": 1.1659284541708581e-05, "loss": 0.7195, "step": 4557 }, { "epoch": 0.46, "grad_norm": 1.454697725959841, "learning_rate": 1.165603566475956e-05, "loss": 0.6999, "step": 4558 }, { "epoch": 0.46, "grad_norm": 1.4835668066546128, "learning_rate": 1.1652786608073763e-05, "loss": 0.7271, "step": 4559 }, { "epoch": 0.46, "grad_norm": 1.3864305015981893, "learning_rate": 1.1649537372003825e-05, "loss": 0.6977, "step": 4560 }, { "epoch": 0.46, "grad_norm": 1.5254952489151536, "learning_rate": 1.1646287956902402e-05, "loss": 0.6152, "step": 4561 }, { "epoch": 0.46, "grad_norm": 1.6481920827138161, "learning_rate": 1.1643038363122168e-05, "loss": 0.7651, "step": 4562 }, { "epoch": 0.46, "grad_norm": 1.4195624179129456, "learning_rate": 1.1639788591015815e-05, "loss": 0.6878, "step": 4563 }, { "epoch": 0.46, "grad_norm": 1.6036746756352342, "learning_rate": 1.1636538640936052e-05, "loss": 0.7982, "step": 4564 }, { "epoch": 0.46, "grad_norm": 1.5207564745319613, "learning_rate": 1.163328851323561e-05, "loss": 0.7535, "step": 4565 }, { "epoch": 0.46, "grad_norm": 1.5653334921119917, "learning_rate": 1.1630038208267244e-05, "loss": 0.7109, "step": 4566 }, { "epoch": 0.46, "grad_norm": 1.5478786441455192, "learning_rate": 1.1626787726383722e-05, "loss": 0.6924, "step": 4567 }, { "epoch": 0.46, "grad_norm": 1.5811167412057043, "learning_rate": 1.1623537067937832e-05, "loss": 0.766, "step": 4568 }, { "epoch": 0.46, "grad_norm": 1.5202293492541648, "learning_rate": 1.162028623328238e-05, "loss": 0.709, "step": 4569 }, { "epoch": 0.46, "grad_norm": 1.538960478366557, "learning_rate": 1.1617035222770199e-05, "loss": 0.7687, "step": 4570 }, { "epoch": 0.46, "grad_norm": 1.4244896266657237, "learning_rate": 1.1613784036754132e-05, "loss": 0.6506, "step": 4571 }, { "epoch": 0.47, "grad_norm": 1.5037212119134407, "learning_rate": 1.1610532675587042e-05, "loss": 0.6561, "step": 4572 }, { "epoch": 0.47, "grad_norm": 1.5084780381210663, "learning_rate": 1.160728113962182e-05, "loss": 0.6705, "step": 4573 }, { "epoch": 0.47, "grad_norm": 1.7686645441932545, "learning_rate": 1.1604029429211361e-05, "loss": 0.7333, "step": 4574 }, { "epoch": 0.47, "grad_norm": 1.4847152737520022, "learning_rate": 1.1600777544708595e-05, "loss": 0.7147, "step": 4575 }, { "epoch": 0.47, "grad_norm": 1.550495765299229, "learning_rate": 1.1597525486466458e-05, "loss": 0.68, "step": 4576 }, { "epoch": 0.47, "grad_norm": 1.5757475793774263, "learning_rate": 1.1594273254837914e-05, "loss": 0.7538, "step": 4577 }, { "epoch": 0.47, "grad_norm": 1.5793316860033737, "learning_rate": 1.1591020850175935e-05, "loss": 0.7194, "step": 4578 }, { "epoch": 0.47, "grad_norm": 1.5312094094225086, "learning_rate": 1.1587768272833529e-05, "loss": 0.795, "step": 4579 }, { "epoch": 0.47, "grad_norm": 1.6037209281036378, "learning_rate": 1.1584515523163705e-05, "loss": 0.6588, "step": 4580 }, { "epoch": 0.47, "grad_norm": 1.4918056952223486, "learning_rate": 1.15812626015195e-05, "loss": 0.6309, "step": 4581 }, { "epoch": 0.47, "grad_norm": 1.605269547265067, "learning_rate": 1.1578009508253966e-05, "loss": 0.7243, "step": 4582 }, { "epoch": 0.47, "grad_norm": 1.4666143394088873, "learning_rate": 1.157475624372018e-05, "loss": 0.6769, "step": 4583 }, { "epoch": 0.47, "grad_norm": 1.6405211541922342, "learning_rate": 1.1571502808271229e-05, "loss": 0.7228, "step": 4584 }, { "epoch": 0.47, "grad_norm": 1.6232228814453615, "learning_rate": 1.1568249202260224e-05, "loss": 0.8883, "step": 4585 }, { "epoch": 0.47, "grad_norm": 1.5683314189351627, "learning_rate": 1.1564995426040293e-05, "loss": 0.672, "step": 4586 }, { "epoch": 0.47, "grad_norm": 1.548145977674433, "learning_rate": 1.1561741479964579e-05, "loss": 0.6571, "step": 4587 }, { "epoch": 0.47, "grad_norm": 1.6133717388005364, "learning_rate": 1.1558487364386255e-05, "loss": 0.7443, "step": 4588 }, { "epoch": 0.47, "grad_norm": 1.5584829486616945, "learning_rate": 1.1555233079658496e-05, "loss": 0.7495, "step": 4589 }, { "epoch": 0.47, "grad_norm": 1.5753794669064949, "learning_rate": 1.155197862613451e-05, "loss": 0.7855, "step": 4590 }, { "epoch": 0.47, "grad_norm": 1.4472244902213225, "learning_rate": 1.1548724004167513e-05, "loss": 0.7718, "step": 4591 }, { "epoch": 0.47, "grad_norm": 1.5367327064558058, "learning_rate": 1.1545469214110745e-05, "loss": 0.8578, "step": 4592 }, { "epoch": 0.47, "grad_norm": 1.5334893194368162, "learning_rate": 1.1542214256317461e-05, "loss": 0.83, "step": 4593 }, { "epoch": 0.47, "grad_norm": 1.5910044241350005, "learning_rate": 1.153895913114094e-05, "loss": 0.6814, "step": 4594 }, { "epoch": 0.47, "grad_norm": 1.6807368860123768, "learning_rate": 1.153570383893447e-05, "loss": 0.733, "step": 4595 }, { "epoch": 0.47, "grad_norm": 1.5584840199996832, "learning_rate": 1.1532448380051362e-05, "loss": 0.6921, "step": 4596 }, { "epoch": 0.47, "grad_norm": 1.5458643754720789, "learning_rate": 1.152919275484495e-05, "loss": 0.7061, "step": 4597 }, { "epoch": 0.47, "grad_norm": 1.7702393765678885, "learning_rate": 1.1525936963668579e-05, "loss": 0.7618, "step": 4598 }, { "epoch": 0.47, "grad_norm": 1.4375965231055472, "learning_rate": 1.1522681006875614e-05, "loss": 0.6703, "step": 4599 }, { "epoch": 0.47, "grad_norm": 1.697375977900443, "learning_rate": 1.1519424884819432e-05, "loss": 0.7234, "step": 4600 }, { "epoch": 0.47, "grad_norm": 1.4384659116884588, "learning_rate": 1.1516168597853446e-05, "loss": 0.7087, "step": 4601 }, { "epoch": 0.47, "grad_norm": 1.495867458271716, "learning_rate": 1.1512912146331066e-05, "loss": 0.6975, "step": 4602 }, { "epoch": 0.47, "grad_norm": 1.5537088310355118, "learning_rate": 1.1509655530605732e-05, "loss": 0.8397, "step": 4603 }, { "epoch": 0.47, "grad_norm": 1.374743892023546, "learning_rate": 1.1506398751030899e-05, "loss": 0.6131, "step": 4604 }, { "epoch": 0.47, "grad_norm": 1.677575822578111, "learning_rate": 1.150314180796004e-05, "loss": 0.6829, "step": 4605 }, { "epoch": 0.47, "grad_norm": 1.4445203506416073, "learning_rate": 1.1499884701746642e-05, "loss": 0.798, "step": 4606 }, { "epoch": 0.47, "grad_norm": 1.712464858100849, "learning_rate": 1.1496627432744216e-05, "loss": 0.7282, "step": 4607 }, { "epoch": 0.47, "grad_norm": 1.5985773032288149, "learning_rate": 1.1493370001306288e-05, "loss": 0.7807, "step": 4608 }, { "epoch": 0.47, "grad_norm": 1.486347003648851, "learning_rate": 1.14901124077864e-05, "loss": 0.7325, "step": 4609 }, { "epoch": 0.47, "grad_norm": 1.5134227711225614, "learning_rate": 1.148685465253811e-05, "loss": 0.6873, "step": 4610 }, { "epoch": 0.47, "grad_norm": 1.4965204314771694, "learning_rate": 1.1483596735915003e-05, "loss": 0.6983, "step": 4611 }, { "epoch": 0.47, "grad_norm": 1.6605822099109793, "learning_rate": 1.148033865827067e-05, "loss": 0.6704, "step": 4612 }, { "epoch": 0.47, "grad_norm": 1.2937035044904677, "learning_rate": 1.1477080419958726e-05, "loss": 0.6363, "step": 4613 }, { "epoch": 0.47, "grad_norm": 1.8160972538634474, "learning_rate": 1.14738220213328e-05, "loss": 0.7374, "step": 4614 }, { "epoch": 0.47, "grad_norm": 1.4711744385294088, "learning_rate": 1.1470563462746542e-05, "loss": 0.7394, "step": 4615 }, { "epoch": 0.47, "grad_norm": 1.8100258979621198, "learning_rate": 1.1467304744553618e-05, "loss": 0.7842, "step": 4616 }, { "epoch": 0.47, "grad_norm": 1.4886306908892508, "learning_rate": 1.1464045867107712e-05, "loss": 0.7585, "step": 4617 }, { "epoch": 0.47, "grad_norm": 1.4741574652866123, "learning_rate": 1.1460786830762519e-05, "loss": 0.6208, "step": 4618 }, { "epoch": 0.47, "grad_norm": 1.5586042387688461, "learning_rate": 1.1457527635871759e-05, "loss": 0.6356, "step": 4619 }, { "epoch": 0.47, "grad_norm": 1.6055025072357336, "learning_rate": 1.145426828278917e-05, "loss": 0.8323, "step": 4620 }, { "epoch": 0.47, "grad_norm": 1.6099059419948964, "learning_rate": 1.14510087718685e-05, "loss": 0.7571, "step": 4621 }, { "epoch": 0.47, "grad_norm": 1.6203016711208587, "learning_rate": 1.1447749103463519e-05, "loss": 0.7162, "step": 4622 }, { "epoch": 0.47, "grad_norm": 1.7047316755504125, "learning_rate": 1.144448927792801e-05, "loss": 0.8016, "step": 4623 }, { "epoch": 0.47, "grad_norm": 1.5676650614066094, "learning_rate": 1.1441229295615784e-05, "loss": 0.7296, "step": 4624 }, { "epoch": 0.47, "grad_norm": 1.622912975230112, "learning_rate": 1.1437969156880656e-05, "loss": 0.764, "step": 4625 }, { "epoch": 0.47, "grad_norm": 1.4111662466560349, "learning_rate": 1.143470886207646e-05, "loss": 0.6159, "step": 4626 }, { "epoch": 0.47, "grad_norm": 1.5863421762962944, "learning_rate": 1.1431448411557056e-05, "loss": 0.7665, "step": 4627 }, { "epoch": 0.47, "grad_norm": 1.6377625649613345, "learning_rate": 1.1428187805676309e-05, "loss": 0.7062, "step": 4628 }, { "epoch": 0.47, "grad_norm": 1.4509682195293687, "learning_rate": 1.1424927044788114e-05, "loss": 0.6903, "step": 4629 }, { "epoch": 0.47, "grad_norm": 1.5453389143146479, "learning_rate": 1.1421666129246372e-05, "loss": 0.7763, "step": 4630 }, { "epoch": 0.47, "grad_norm": 1.483135906834292, "learning_rate": 1.1418405059405002e-05, "loss": 0.797, "step": 4631 }, { "epoch": 0.47, "grad_norm": 1.7483319893470177, "learning_rate": 1.1415143835617943e-05, "loss": 0.8703, "step": 4632 }, { "epoch": 0.47, "grad_norm": 1.5270226220355356, "learning_rate": 1.1411882458239152e-05, "loss": 0.7648, "step": 4633 }, { "epoch": 0.47, "grad_norm": 1.6431099516940701, "learning_rate": 1.14086209276226e-05, "loss": 0.7689, "step": 4634 }, { "epoch": 0.47, "grad_norm": 1.4282775689777298, "learning_rate": 1.1405359244122275e-05, "loss": 0.6884, "step": 4635 }, { "epoch": 0.47, "grad_norm": 1.6107562592794664, "learning_rate": 1.1402097408092184e-05, "loss": 0.7794, "step": 4636 }, { "epoch": 0.47, "grad_norm": 1.7817765520103868, "learning_rate": 1.1398835419886339e-05, "loss": 0.7781, "step": 4637 }, { "epoch": 0.47, "grad_norm": 1.7683257244565183, "learning_rate": 1.1395573279858789e-05, "loss": 0.7045, "step": 4638 }, { "epoch": 0.47, "grad_norm": 1.6521730801992396, "learning_rate": 1.1392310988363584e-05, "loss": 0.75, "step": 4639 }, { "epoch": 0.47, "grad_norm": 1.521210187770528, "learning_rate": 1.1389048545754794e-05, "loss": 0.7056, "step": 4640 }, { "epoch": 0.47, "grad_norm": 1.5150310415725645, "learning_rate": 1.1385785952386503e-05, "loss": 0.7595, "step": 4641 }, { "epoch": 0.47, "grad_norm": 1.3661564856473303, "learning_rate": 1.1382523208612823e-05, "loss": 0.7069, "step": 4642 }, { "epoch": 0.47, "grad_norm": 1.6708812769448553, "learning_rate": 1.1379260314787867e-05, "loss": 0.7898, "step": 4643 }, { "epoch": 0.47, "grad_norm": 1.6704991799723237, "learning_rate": 1.1375997271265775e-05, "loss": 0.77, "step": 4644 }, { "epoch": 0.47, "grad_norm": 1.5237140033724268, "learning_rate": 1.1372734078400695e-05, "loss": 0.7653, "step": 4645 }, { "epoch": 0.47, "grad_norm": 1.5858634760779247, "learning_rate": 1.1369470736546798e-05, "loss": 0.8662, "step": 4646 }, { "epoch": 0.47, "grad_norm": 1.4565672294570617, "learning_rate": 1.1366207246058269e-05, "loss": 0.7507, "step": 4647 }, { "epoch": 0.47, "grad_norm": 1.4991843578593989, "learning_rate": 1.136294360728931e-05, "loss": 0.7551, "step": 4648 }, { "epoch": 0.47, "grad_norm": 1.5417235142162893, "learning_rate": 1.1359679820594137e-05, "loss": 0.6552, "step": 4649 }, { "epoch": 0.47, "grad_norm": 1.548589989786535, "learning_rate": 1.135641588632698e-05, "loss": 0.611, "step": 4650 }, { "epoch": 0.47, "grad_norm": 1.4696288433018039, "learning_rate": 1.1353151804842088e-05, "loss": 0.7258, "step": 4651 }, { "epoch": 0.47, "grad_norm": 1.4592926096276786, "learning_rate": 1.1349887576493735e-05, "loss": 0.6607, "step": 4652 }, { "epoch": 0.47, "grad_norm": 1.6058655036855474, "learning_rate": 1.1346623201636192e-05, "loss": 0.7338, "step": 4653 }, { "epoch": 0.47, "grad_norm": 1.580983795741087, "learning_rate": 1.1343358680623757e-05, "loss": 0.8141, "step": 4654 }, { "epoch": 0.47, "grad_norm": 1.3749699175665049, "learning_rate": 1.1340094013810749e-05, "loss": 0.6731, "step": 4655 }, { "epoch": 0.47, "grad_norm": 1.6096032470138852, "learning_rate": 1.1336829201551492e-05, "loss": 0.7535, "step": 4656 }, { "epoch": 0.47, "grad_norm": 1.6262893926298707, "learning_rate": 1.1333564244200328e-05, "loss": 0.7377, "step": 4657 }, { "epoch": 0.47, "grad_norm": 1.5410648418608643, "learning_rate": 1.1330299142111623e-05, "loss": 0.739, "step": 4658 }, { "epoch": 0.47, "grad_norm": 1.5485513987128794, "learning_rate": 1.1327033895639747e-05, "loss": 0.8029, "step": 4659 }, { "epoch": 0.47, "grad_norm": 1.410059273776502, "learning_rate": 1.1323768505139095e-05, "loss": 0.7232, "step": 4660 }, { "epoch": 0.47, "grad_norm": 1.5624646887742166, "learning_rate": 1.1320502970964076e-05, "loss": 0.7231, "step": 4661 }, { "epoch": 0.47, "grad_norm": 1.5372926149461579, "learning_rate": 1.1317237293469108e-05, "loss": 0.7501, "step": 4662 }, { "epoch": 0.47, "grad_norm": 1.5681818631952515, "learning_rate": 1.1313971473008628e-05, "loss": 0.7218, "step": 4663 }, { "epoch": 0.47, "grad_norm": 1.482830029646937, "learning_rate": 1.1310705509937096e-05, "loss": 0.7101, "step": 4664 }, { "epoch": 0.47, "grad_norm": 1.562764113175667, "learning_rate": 1.1307439404608978e-05, "loss": 0.7555, "step": 4665 }, { "epoch": 0.47, "grad_norm": 1.4911542811063758, "learning_rate": 1.130417315737876e-05, "loss": 0.6715, "step": 4666 }, { "epoch": 0.47, "grad_norm": 1.5254406078226133, "learning_rate": 1.1300906768600939e-05, "loss": 0.6447, "step": 4667 }, { "epoch": 0.47, "grad_norm": 1.4471085397289045, "learning_rate": 1.129764023863003e-05, "loss": 0.7193, "step": 4668 }, { "epoch": 0.47, "grad_norm": 1.5608343873438544, "learning_rate": 1.1294373567820568e-05, "loss": 0.661, "step": 4669 }, { "epoch": 0.48, "grad_norm": 1.4437985820957768, "learning_rate": 1.12911067565271e-05, "loss": 0.624, "step": 4670 }, { "epoch": 0.48, "grad_norm": 1.4246038270133796, "learning_rate": 1.1287839805104182e-05, "loss": 0.691, "step": 4671 }, { "epoch": 0.48, "grad_norm": 1.4721353611911032, "learning_rate": 1.1284572713906392e-05, "loss": 0.7062, "step": 4672 }, { "epoch": 0.48, "grad_norm": 1.7658949422577648, "learning_rate": 1.1281305483288321e-05, "loss": 0.7626, "step": 4673 }, { "epoch": 0.48, "grad_norm": 1.504533207614064, "learning_rate": 1.1278038113604579e-05, "loss": 0.7198, "step": 4674 }, { "epoch": 0.48, "grad_norm": 1.5931060163297304, "learning_rate": 1.1274770605209786e-05, "loss": 0.8055, "step": 4675 }, { "epoch": 0.48, "grad_norm": 1.5542388836057903, "learning_rate": 1.1271502958458577e-05, "loss": 0.7602, "step": 4676 }, { "epoch": 0.48, "grad_norm": 1.575193549763372, "learning_rate": 1.1268235173705608e-05, "loss": 0.8088, "step": 4677 }, { "epoch": 0.48, "grad_norm": 1.4411760933441318, "learning_rate": 1.1264967251305542e-05, "loss": 0.6863, "step": 4678 }, { "epoch": 0.48, "grad_norm": 1.6648858012128047, "learning_rate": 1.1261699191613067e-05, "loss": 0.7772, "step": 4679 }, { "epoch": 0.48, "grad_norm": 1.7199672360791733, "learning_rate": 1.1258430994982872e-05, "loss": 0.8195, "step": 4680 }, { "epoch": 0.48, "grad_norm": 1.4568557243100269, "learning_rate": 1.1255162661769674e-05, "loss": 0.6591, "step": 4681 }, { "epoch": 0.48, "grad_norm": 1.520795016619549, "learning_rate": 1.1251894192328194e-05, "loss": 0.7553, "step": 4682 }, { "epoch": 0.48, "grad_norm": 1.6015181193109884, "learning_rate": 1.124862558701318e-05, "loss": 0.7741, "step": 4683 }, { "epoch": 0.48, "grad_norm": 1.4447539094155426, "learning_rate": 1.1245356846179384e-05, "loss": 0.7089, "step": 4684 }, { "epoch": 0.48, "grad_norm": 1.4584951955485221, "learning_rate": 1.1242087970181578e-05, "loss": 0.7395, "step": 4685 }, { "epoch": 0.48, "grad_norm": 1.4461603196919004, "learning_rate": 1.1238818959374547e-05, "loss": 0.6715, "step": 4686 }, { "epoch": 0.48, "grad_norm": 1.7311798026421528, "learning_rate": 1.1235549814113092e-05, "loss": 0.8515, "step": 4687 }, { "epoch": 0.48, "grad_norm": 1.4048144176782515, "learning_rate": 1.1232280534752025e-05, "loss": 0.5922, "step": 4688 }, { "epoch": 0.48, "grad_norm": 1.5481973447074155, "learning_rate": 1.1229011121646176e-05, "loss": 0.7715, "step": 4689 }, { "epoch": 0.48, "grad_norm": 1.6958381976979935, "learning_rate": 1.1225741575150391e-05, "loss": 0.7143, "step": 4690 }, { "epoch": 0.48, "grad_norm": 1.4660906832812384, "learning_rate": 1.1222471895619525e-05, "loss": 0.7651, "step": 4691 }, { "epoch": 0.48, "grad_norm": 1.514977911309017, "learning_rate": 1.1219202083408454e-05, "loss": 0.7018, "step": 4692 }, { "epoch": 0.48, "grad_norm": 1.7532823984411445, "learning_rate": 1.1215932138872061e-05, "loss": 0.7323, "step": 4693 }, { "epoch": 0.48, "grad_norm": 1.4676790184219108, "learning_rate": 1.1212662062365252e-05, "loss": 0.7031, "step": 4694 }, { "epoch": 0.48, "grad_norm": 1.6054557795695394, "learning_rate": 1.1209391854242938e-05, "loss": 0.8304, "step": 4695 }, { "epoch": 0.48, "grad_norm": 1.5991701867348644, "learning_rate": 1.120612151486005e-05, "loss": 0.6987, "step": 4696 }, { "epoch": 0.48, "grad_norm": 1.5170179933755257, "learning_rate": 1.1202851044571533e-05, "loss": 0.6792, "step": 4697 }, { "epoch": 0.48, "grad_norm": 1.7157827956741485, "learning_rate": 1.1199580443732347e-05, "loss": 0.7554, "step": 4698 }, { "epoch": 0.48, "grad_norm": 1.4753493754003022, "learning_rate": 1.1196309712697463e-05, "loss": 0.6668, "step": 4699 }, { "epoch": 0.48, "grad_norm": 1.4924475104111545, "learning_rate": 1.1193038851821867e-05, "loss": 0.7118, "step": 4700 }, { "epoch": 0.48, "grad_norm": 1.5430843931734841, "learning_rate": 1.1189767861460563e-05, "loss": 0.7068, "step": 4701 }, { "epoch": 0.48, "grad_norm": 1.5076245973794509, "learning_rate": 1.1186496741968562e-05, "loss": 0.6831, "step": 4702 }, { "epoch": 0.48, "grad_norm": 1.7370968200999768, "learning_rate": 1.1183225493700895e-05, "loss": 0.7394, "step": 4703 }, { "epoch": 0.48, "grad_norm": 1.800654570183203, "learning_rate": 1.1179954117012603e-05, "loss": 0.7211, "step": 4704 }, { "epoch": 0.48, "grad_norm": 1.5017002098978107, "learning_rate": 1.1176682612258745e-05, "loss": 0.7532, "step": 4705 }, { "epoch": 0.48, "grad_norm": 1.6617996193529947, "learning_rate": 1.117341097979439e-05, "loss": 0.7171, "step": 4706 }, { "epoch": 0.48, "grad_norm": 1.7063874397456549, "learning_rate": 1.1170139219974626e-05, "loss": 0.7891, "step": 4707 }, { "epoch": 0.48, "grad_norm": 1.6636920434588713, "learning_rate": 1.1166867333154543e-05, "loss": 0.7299, "step": 4708 }, { "epoch": 0.48, "grad_norm": 1.5654818487601652, "learning_rate": 1.1163595319689264e-05, "loss": 0.6421, "step": 4709 }, { "epoch": 0.48, "grad_norm": 1.4218850872715825, "learning_rate": 1.1160323179933908e-05, "loss": 0.6249, "step": 4710 }, { "epoch": 0.48, "grad_norm": 1.5576364310269057, "learning_rate": 1.1157050914243614e-05, "loss": 0.7484, "step": 4711 }, { "epoch": 0.48, "grad_norm": 1.6310208232754702, "learning_rate": 1.115377852297354e-05, "loss": 0.7097, "step": 4712 }, { "epoch": 0.48, "grad_norm": 1.6373612010247085, "learning_rate": 1.1150506006478849e-05, "loss": 0.7567, "step": 4713 }, { "epoch": 0.48, "grad_norm": 1.764893872771686, "learning_rate": 1.1147233365114725e-05, "loss": 0.7486, "step": 4714 }, { "epoch": 0.48, "grad_norm": 1.4628182364711966, "learning_rate": 1.1143960599236358e-05, "loss": 0.6277, "step": 4715 }, { "epoch": 0.48, "grad_norm": 1.7369395077416692, "learning_rate": 1.1140687709198958e-05, "loss": 0.7288, "step": 4716 }, { "epoch": 0.48, "grad_norm": 1.635015653974785, "learning_rate": 1.1137414695357747e-05, "loss": 0.7613, "step": 4717 }, { "epoch": 0.48, "grad_norm": 1.5503660871383618, "learning_rate": 1.1134141558067955e-05, "loss": 0.7288, "step": 4718 }, { "epoch": 0.48, "grad_norm": 1.5538599617254092, "learning_rate": 1.1130868297684833e-05, "loss": 0.7645, "step": 4719 }, { "epoch": 0.48, "grad_norm": 1.511799580256174, "learning_rate": 1.1127594914563647e-05, "loss": 0.8259, "step": 4720 }, { "epoch": 0.48, "grad_norm": 1.557814325422162, "learning_rate": 1.1124321409059661e-05, "loss": 0.7098, "step": 4721 }, { "epoch": 0.48, "grad_norm": 1.5253434446450445, "learning_rate": 1.1121047781528174e-05, "loss": 0.7664, "step": 4722 }, { "epoch": 0.48, "grad_norm": 1.7265330902888985, "learning_rate": 1.1117774032324476e-05, "loss": 0.7295, "step": 4723 }, { "epoch": 0.48, "grad_norm": 1.5518678435153568, "learning_rate": 1.111450016180389e-05, "loss": 0.7724, "step": 4724 }, { "epoch": 0.48, "grad_norm": 1.5664907417938099, "learning_rate": 1.1111226170321738e-05, "loss": 0.7906, "step": 4725 }, { "epoch": 0.48, "grad_norm": 1.5130135727104113, "learning_rate": 1.1107952058233367e-05, "loss": 0.766, "step": 4726 }, { "epoch": 0.48, "grad_norm": 2.2548589391419385, "learning_rate": 1.1104677825894121e-05, "loss": 0.7856, "step": 4727 }, { "epoch": 0.48, "grad_norm": 1.4289814666356178, "learning_rate": 1.1101403473659376e-05, "loss": 0.7476, "step": 4728 }, { "epoch": 0.48, "grad_norm": 1.6747946340533135, "learning_rate": 1.1098129001884508e-05, "loss": 0.8052, "step": 4729 }, { "epoch": 0.48, "grad_norm": 1.5331014347391687, "learning_rate": 1.1094854410924909e-05, "loss": 0.8508, "step": 4730 }, { "epoch": 0.48, "grad_norm": 1.8579945737436387, "learning_rate": 1.1091579701135986e-05, "loss": 0.7624, "step": 4731 }, { "epoch": 0.48, "grad_norm": 1.566768370211104, "learning_rate": 1.1088304872873152e-05, "loss": 0.7611, "step": 4732 }, { "epoch": 0.48, "grad_norm": 1.9112271223739234, "learning_rate": 1.1085029926491848e-05, "loss": 0.8016, "step": 4733 }, { "epoch": 0.48, "grad_norm": 1.507987571004608, "learning_rate": 1.1081754862347513e-05, "loss": 0.7083, "step": 4734 }, { "epoch": 0.48, "grad_norm": 1.4228665977224153, "learning_rate": 1.1078479680795604e-05, "loss": 0.7893, "step": 4735 }, { "epoch": 0.48, "grad_norm": 1.6040397098394448, "learning_rate": 1.107520438219159e-05, "loss": 0.7328, "step": 4736 }, { "epoch": 0.48, "grad_norm": 1.5392882633332718, "learning_rate": 1.1071928966890957e-05, "loss": 0.781, "step": 4737 }, { "epoch": 0.48, "grad_norm": 1.666983043219811, "learning_rate": 1.1068653435249197e-05, "loss": 0.7516, "step": 4738 }, { "epoch": 0.48, "grad_norm": 1.6269538017114316, "learning_rate": 1.1065377787621819e-05, "loss": 0.7179, "step": 4739 }, { "epoch": 0.48, "grad_norm": 1.714058517254383, "learning_rate": 1.1062102024364341e-05, "loss": 0.786, "step": 4740 }, { "epoch": 0.48, "grad_norm": 1.6659851540381478, "learning_rate": 1.1058826145832298e-05, "loss": 0.8467, "step": 4741 }, { "epoch": 0.48, "grad_norm": 1.6685789483122337, "learning_rate": 1.1055550152381235e-05, "loss": 0.7775, "step": 4742 }, { "epoch": 0.48, "grad_norm": 1.6167305758786248, "learning_rate": 1.1052274044366711e-05, "loss": 0.7274, "step": 4743 }, { "epoch": 0.48, "grad_norm": 1.4746836673435093, "learning_rate": 1.1048997822144296e-05, "loss": 0.8207, "step": 4744 }, { "epoch": 0.48, "grad_norm": 1.633586522725332, "learning_rate": 1.1045721486069568e-05, "loss": 0.7326, "step": 4745 }, { "epoch": 0.48, "grad_norm": 1.4809266073703506, "learning_rate": 1.1042445036498129e-05, "loss": 0.7137, "step": 4746 }, { "epoch": 0.48, "grad_norm": 1.5195680421319833, "learning_rate": 1.1039168473785584e-05, "loss": 0.7911, "step": 4747 }, { "epoch": 0.48, "grad_norm": 1.6137137131166324, "learning_rate": 1.103589179828755e-05, "loss": 0.7207, "step": 4748 }, { "epoch": 0.48, "grad_norm": 1.4873281881742182, "learning_rate": 1.1032615010359661e-05, "loss": 0.672, "step": 4749 }, { "epoch": 0.48, "grad_norm": 1.5131039557796713, "learning_rate": 1.1029338110357561e-05, "loss": 0.5865, "step": 4750 }, { "epoch": 0.48, "grad_norm": 1.4201459792639004, "learning_rate": 1.1026061098636906e-05, "loss": 0.6513, "step": 4751 }, { "epoch": 0.48, "grad_norm": 1.486349912355769, "learning_rate": 1.1022783975553366e-05, "loss": 0.6578, "step": 4752 }, { "epoch": 0.48, "grad_norm": 1.5176489180640584, "learning_rate": 1.101950674146262e-05, "loss": 0.6687, "step": 4753 }, { "epoch": 0.48, "grad_norm": 1.6306745452948275, "learning_rate": 1.101622939672036e-05, "loss": 0.6677, "step": 4754 }, { "epoch": 0.48, "grad_norm": 1.78403339360376, "learning_rate": 1.1012951941682291e-05, "loss": 0.747, "step": 4755 }, { "epoch": 0.48, "grad_norm": 1.5074978240557007, "learning_rate": 1.1009674376704128e-05, "loss": 0.751, "step": 4756 }, { "epoch": 0.48, "grad_norm": 1.539491194858523, "learning_rate": 1.1006396702141605e-05, "loss": 0.7455, "step": 4757 }, { "epoch": 0.48, "grad_norm": 1.7450620289002257, "learning_rate": 1.1003118918350456e-05, "loss": 0.7322, "step": 4758 }, { "epoch": 0.48, "grad_norm": 1.5733533171495808, "learning_rate": 1.0999841025686431e-05, "loss": 0.5972, "step": 4759 }, { "epoch": 0.48, "grad_norm": 1.5719877764421808, "learning_rate": 1.0996563024505303e-05, "loss": 0.7433, "step": 4760 }, { "epoch": 0.48, "grad_norm": 1.3363325039311442, "learning_rate": 1.0993284915162843e-05, "loss": 0.6251, "step": 4761 }, { "epoch": 0.48, "grad_norm": 1.4101295343750289, "learning_rate": 1.0990006698014837e-05, "loss": 0.6571, "step": 4762 }, { "epoch": 0.48, "grad_norm": 1.3324377019912343, "learning_rate": 1.0986728373417088e-05, "loss": 0.5947, "step": 4763 }, { "epoch": 0.48, "grad_norm": 1.6333620766873393, "learning_rate": 1.0983449941725404e-05, "loss": 0.7579, "step": 4764 }, { "epoch": 0.48, "grad_norm": 1.6989820977183576, "learning_rate": 1.098017140329561e-05, "loss": 0.6614, "step": 4765 }, { "epoch": 0.48, "grad_norm": 1.5800683448962918, "learning_rate": 1.0976892758483533e-05, "loss": 0.6856, "step": 4766 }, { "epoch": 0.48, "grad_norm": 1.5916909083774833, "learning_rate": 1.0973614007645027e-05, "loss": 0.7061, "step": 4767 }, { "epoch": 0.48, "grad_norm": 1.4914111133780676, "learning_rate": 1.097033515113595e-05, "loss": 0.6408, "step": 4768 }, { "epoch": 0.49, "grad_norm": 1.657441182352292, "learning_rate": 1.096705618931216e-05, "loss": 0.6737, "step": 4769 }, { "epoch": 0.49, "grad_norm": 1.6302751638129835, "learning_rate": 1.0963777122529547e-05, "loss": 0.6715, "step": 4770 }, { "epoch": 0.49, "grad_norm": 1.5698054269632449, "learning_rate": 1.0960497951144001e-05, "loss": 0.8629, "step": 4771 }, { "epoch": 0.49, "grad_norm": 1.5812627655597422, "learning_rate": 1.0957218675511418e-05, "loss": 0.7924, "step": 4772 }, { "epoch": 0.49, "grad_norm": 1.6161084006625803, "learning_rate": 1.0953939295987722e-05, "loss": 0.7627, "step": 4773 }, { "epoch": 0.49, "grad_norm": 1.6299841409594045, "learning_rate": 1.095065981292883e-05, "loss": 0.6798, "step": 4774 }, { "epoch": 0.49, "grad_norm": 1.588289422502915, "learning_rate": 1.0947380226690686e-05, "loss": 0.6632, "step": 4775 }, { "epoch": 0.49, "grad_norm": 1.7083918017260773, "learning_rate": 1.0944100537629229e-05, "loss": 0.7163, "step": 4776 }, { "epoch": 0.49, "grad_norm": 1.4988568694539077, "learning_rate": 1.0940820746100425e-05, "loss": 0.758, "step": 4777 }, { "epoch": 0.49, "grad_norm": 1.4764122582404338, "learning_rate": 1.0937540852460242e-05, "loss": 0.7082, "step": 4778 }, { "epoch": 0.49, "grad_norm": 1.4720694527200804, "learning_rate": 1.093426085706466e-05, "loss": 0.7218, "step": 4779 }, { "epoch": 0.49, "grad_norm": 1.5462059060882711, "learning_rate": 1.0930980760269673e-05, "loss": 0.7314, "step": 4780 }, { "epoch": 0.49, "grad_norm": 1.4346137380421373, "learning_rate": 1.0927700562431283e-05, "loss": 0.695, "step": 4781 }, { "epoch": 0.49, "grad_norm": 1.5832976035826196, "learning_rate": 1.0924420263905505e-05, "loss": 0.7294, "step": 4782 }, { "epoch": 0.49, "grad_norm": 1.557316426835322, "learning_rate": 1.0921139865048362e-05, "loss": 0.7435, "step": 4783 }, { "epoch": 0.49, "grad_norm": 1.4448128642118385, "learning_rate": 1.0917859366215895e-05, "loss": 0.7079, "step": 4784 }, { "epoch": 0.49, "grad_norm": 1.5853405092750676, "learning_rate": 1.0914578767764145e-05, "loss": 0.7592, "step": 4785 }, { "epoch": 0.49, "grad_norm": 1.4668078137037874, "learning_rate": 1.0911298070049172e-05, "loss": 0.7091, "step": 4786 }, { "epoch": 0.49, "grad_norm": 1.4979091711758297, "learning_rate": 1.0908017273427044e-05, "loss": 0.7898, "step": 4787 }, { "epoch": 0.49, "grad_norm": 1.4492199846891123, "learning_rate": 1.0904736378253844e-05, "loss": 0.6762, "step": 4788 }, { "epoch": 0.49, "grad_norm": 1.4721752479841992, "learning_rate": 1.0901455384885658e-05, "loss": 0.6579, "step": 4789 }, { "epoch": 0.49, "grad_norm": 1.428659963881077, "learning_rate": 1.0898174293678583e-05, "loss": 0.6705, "step": 4790 }, { "epoch": 0.49, "grad_norm": 1.6378135027870262, "learning_rate": 1.0894893104988738e-05, "loss": 0.7132, "step": 4791 }, { "epoch": 0.49, "grad_norm": 1.556497741824382, "learning_rate": 1.089161181917224e-05, "loss": 0.7206, "step": 4792 }, { "epoch": 0.49, "grad_norm": 1.7230568439952965, "learning_rate": 1.0888330436585224e-05, "loss": 0.6613, "step": 4793 }, { "epoch": 0.49, "grad_norm": 1.612851513748318, "learning_rate": 1.088504895758383e-05, "loss": 0.7709, "step": 4794 }, { "epoch": 0.49, "grad_norm": 1.4659086390595735, "learning_rate": 1.088176738252421e-05, "loss": 0.6839, "step": 4795 }, { "epoch": 0.49, "grad_norm": 1.9422400062295213, "learning_rate": 1.0878485711762533e-05, "loss": 0.8199, "step": 4796 }, { "epoch": 0.49, "grad_norm": 1.5666072973905, "learning_rate": 1.0875203945654969e-05, "loss": 0.758, "step": 4797 }, { "epoch": 0.49, "grad_norm": 1.4006583596790678, "learning_rate": 1.0871922084557702e-05, "loss": 0.6683, "step": 4798 }, { "epoch": 0.49, "grad_norm": 1.5592372404936563, "learning_rate": 1.0868640128826929e-05, "loss": 0.7184, "step": 4799 }, { "epoch": 0.49, "grad_norm": 1.392316596997383, "learning_rate": 1.0865358078818855e-05, "loss": 0.6242, "step": 4800 }, { "epoch": 0.49, "grad_norm": 1.4557220760954837, "learning_rate": 1.0862075934889694e-05, "loss": 0.6217, "step": 4801 }, { "epoch": 0.49, "grad_norm": 1.6556912744299246, "learning_rate": 1.085879369739567e-05, "loss": 0.6765, "step": 4802 }, { "epoch": 0.49, "grad_norm": 1.5322411610345907, "learning_rate": 1.085551136669302e-05, "loss": 0.6711, "step": 4803 }, { "epoch": 0.49, "grad_norm": 1.5823221318504805, "learning_rate": 1.0852228943137992e-05, "loss": 0.703, "step": 4804 }, { "epoch": 0.49, "grad_norm": 1.5657096226939102, "learning_rate": 1.0848946427086839e-05, "loss": 0.7833, "step": 4805 }, { "epoch": 0.49, "grad_norm": 1.452125827239233, "learning_rate": 1.0845663818895826e-05, "loss": 0.5953, "step": 4806 }, { "epoch": 0.49, "grad_norm": 1.4984899358281312, "learning_rate": 1.0842381118921233e-05, "loss": 0.6292, "step": 4807 }, { "epoch": 0.49, "grad_norm": 1.705995215029523, "learning_rate": 1.083909832751934e-05, "loss": 0.7315, "step": 4808 }, { "epoch": 0.49, "grad_norm": 1.6352719630144974, "learning_rate": 1.0835815445046447e-05, "loss": 0.7375, "step": 4809 }, { "epoch": 0.49, "grad_norm": 1.6271663906870726, "learning_rate": 1.0832532471858857e-05, "loss": 0.8057, "step": 4810 }, { "epoch": 0.49, "grad_norm": 1.4259725059873003, "learning_rate": 1.0829249408312888e-05, "loss": 0.6011, "step": 4811 }, { "epoch": 0.49, "grad_norm": 1.5507813456773818, "learning_rate": 1.0825966254764864e-05, "loss": 0.7963, "step": 4812 }, { "epoch": 0.49, "grad_norm": 2.4161423181330814, "learning_rate": 1.0822683011571118e-05, "loss": 0.7603, "step": 4813 }, { "epoch": 0.49, "grad_norm": 1.7237254932378825, "learning_rate": 1.0819399679087997e-05, "loss": 0.7259, "step": 4814 }, { "epoch": 0.49, "grad_norm": 1.672925153363037, "learning_rate": 1.0816116257671856e-05, "loss": 0.7901, "step": 4815 }, { "epoch": 0.49, "grad_norm": 1.6042468592684607, "learning_rate": 1.0812832747679054e-05, "loss": 0.6788, "step": 4816 }, { "epoch": 0.49, "grad_norm": 1.496471070682988, "learning_rate": 1.0809549149465971e-05, "loss": 0.7245, "step": 4817 }, { "epoch": 0.49, "grad_norm": 1.6801939660509613, "learning_rate": 1.0806265463388989e-05, "loss": 0.6754, "step": 4818 }, { "epoch": 0.49, "grad_norm": 1.7168952904835155, "learning_rate": 1.0802981689804499e-05, "loss": 0.7401, "step": 4819 }, { "epoch": 0.49, "grad_norm": 1.5862059151970802, "learning_rate": 1.0799697829068903e-05, "loss": 0.8646, "step": 4820 }, { "epoch": 0.49, "grad_norm": 1.504679180024265, "learning_rate": 1.0796413881538614e-05, "loss": 0.7113, "step": 4821 }, { "epoch": 0.49, "grad_norm": 1.5559146837534314, "learning_rate": 1.0793129847570052e-05, "loss": 0.6912, "step": 4822 }, { "epoch": 0.49, "grad_norm": 1.4792854093478405, "learning_rate": 1.0789845727519647e-05, "loss": 0.6655, "step": 4823 }, { "epoch": 0.49, "grad_norm": 1.3621064320761462, "learning_rate": 1.0786561521743843e-05, "loss": 0.7083, "step": 4824 }, { "epoch": 0.49, "grad_norm": 1.5724233675494086, "learning_rate": 1.0783277230599088e-05, "loss": 0.6914, "step": 4825 }, { "epoch": 0.49, "grad_norm": 1.6449096777501475, "learning_rate": 1.0779992854441833e-05, "loss": 0.7233, "step": 4826 }, { "epoch": 0.49, "grad_norm": 1.5395166521358816, "learning_rate": 1.0776708393628555e-05, "loss": 0.7435, "step": 4827 }, { "epoch": 0.49, "grad_norm": 1.5538379810180913, "learning_rate": 1.0773423848515728e-05, "loss": 0.6461, "step": 4828 }, { "epoch": 0.49, "grad_norm": 1.7966878586338726, "learning_rate": 1.0770139219459834e-05, "loss": 0.7655, "step": 4829 }, { "epoch": 0.49, "grad_norm": 1.6554512917828683, "learning_rate": 1.0766854506817374e-05, "loss": 0.7496, "step": 4830 }, { "epoch": 0.49, "grad_norm": 1.5582376751493443, "learning_rate": 1.0763569710944848e-05, "loss": 0.7463, "step": 4831 }, { "epoch": 0.49, "grad_norm": 1.9045453022232466, "learning_rate": 1.076028483219877e-05, "loss": 0.7948, "step": 4832 }, { "epoch": 0.49, "grad_norm": 1.6515591767481628, "learning_rate": 1.0756999870935668e-05, "loss": 0.8395, "step": 4833 }, { "epoch": 0.49, "grad_norm": 1.4945025057050394, "learning_rate": 1.0753714827512063e-05, "loss": 0.7287, "step": 4834 }, { "epoch": 0.49, "grad_norm": 1.586248043237468, "learning_rate": 1.0750429702284499e-05, "loss": 0.7354, "step": 4835 }, { "epoch": 0.49, "grad_norm": 1.567076394776895, "learning_rate": 1.0747144495609531e-05, "loss": 0.7669, "step": 4836 }, { "epoch": 0.49, "grad_norm": 1.6765578455669068, "learning_rate": 1.074385920784371e-05, "loss": 0.7558, "step": 4837 }, { "epoch": 0.49, "grad_norm": 1.512168260010115, "learning_rate": 1.0740573839343607e-05, "loss": 0.829, "step": 4838 }, { "epoch": 0.49, "grad_norm": 1.4896477079673143, "learning_rate": 1.0737288390465792e-05, "loss": 0.6725, "step": 4839 }, { "epoch": 0.49, "grad_norm": 1.4702480151410497, "learning_rate": 1.073400286156685e-05, "loss": 0.7041, "step": 4840 }, { "epoch": 0.49, "grad_norm": 1.6569811631390055, "learning_rate": 1.073071725300338e-05, "loss": 0.7568, "step": 4841 }, { "epoch": 0.49, "grad_norm": 1.6490024318489755, "learning_rate": 1.0727431565131978e-05, "loss": 0.7976, "step": 4842 }, { "epoch": 0.49, "grad_norm": 1.5752260202004427, "learning_rate": 1.0724145798309258e-05, "loss": 0.714, "step": 4843 }, { "epoch": 0.49, "grad_norm": 1.5449459528099991, "learning_rate": 1.0720859952891834e-05, "loss": 0.7015, "step": 4844 }, { "epoch": 0.49, "grad_norm": 1.6503976761089003, "learning_rate": 1.0717574029236333e-05, "loss": 0.8448, "step": 4845 }, { "epoch": 0.49, "grad_norm": 1.612022678078976, "learning_rate": 1.0714288027699392e-05, "loss": 0.7325, "step": 4846 }, { "epoch": 0.49, "grad_norm": 1.6225715275901382, "learning_rate": 1.071100194863766e-05, "loss": 0.7276, "step": 4847 }, { "epoch": 0.49, "grad_norm": 1.476340849753411, "learning_rate": 1.0707715792407784e-05, "loss": 0.726, "step": 4848 }, { "epoch": 0.49, "grad_norm": 1.4837181460190332, "learning_rate": 1.0704429559366424e-05, "loss": 0.6483, "step": 4849 }, { "epoch": 0.49, "grad_norm": 1.5986402635920125, "learning_rate": 1.0701143249870253e-05, "loss": 0.6338, "step": 4850 }, { "epoch": 0.49, "grad_norm": 1.7729055343245756, "learning_rate": 1.0697856864275949e-05, "loss": 0.8581, "step": 4851 }, { "epoch": 0.49, "grad_norm": 1.4999791982867494, "learning_rate": 1.0694570402940192e-05, "loss": 0.756, "step": 4852 }, { "epoch": 0.49, "grad_norm": 1.4845152414841658, "learning_rate": 1.0691283866219682e-05, "loss": 0.7603, "step": 4853 }, { "epoch": 0.49, "grad_norm": 1.5932376783084876, "learning_rate": 1.0687997254471117e-05, "loss": 0.6744, "step": 4854 }, { "epoch": 0.49, "grad_norm": 1.685232234665782, "learning_rate": 1.0684710568051211e-05, "loss": 0.734, "step": 4855 }, { "epoch": 0.49, "grad_norm": 1.7060391227861584, "learning_rate": 1.068142380731668e-05, "loss": 0.764, "step": 4856 }, { "epoch": 0.49, "grad_norm": 1.53478915495137, "learning_rate": 1.0678136972624249e-05, "loss": 0.7804, "step": 4857 }, { "epoch": 0.49, "grad_norm": 1.5942102451563043, "learning_rate": 1.0674850064330655e-05, "loss": 0.7312, "step": 4858 }, { "epoch": 0.49, "grad_norm": 1.5305582484928222, "learning_rate": 1.0671563082792643e-05, "loss": 0.624, "step": 4859 }, { "epoch": 0.49, "grad_norm": 1.7287157430425841, "learning_rate": 1.0668276028366957e-05, "loss": 0.7495, "step": 4860 }, { "epoch": 0.49, "grad_norm": 1.5015860213975631, "learning_rate": 1.0664988901410361e-05, "loss": 0.6169, "step": 4861 }, { "epoch": 0.49, "grad_norm": 1.5295651950629567, "learning_rate": 1.0661701702279619e-05, "loss": 0.7288, "step": 4862 }, { "epoch": 0.49, "grad_norm": 1.3833428272326505, "learning_rate": 1.0658414431331502e-05, "loss": 0.7572, "step": 4863 }, { "epoch": 0.49, "grad_norm": 1.8452863333294058, "learning_rate": 1.0655127088922799e-05, "loss": 0.7759, "step": 4864 }, { "epoch": 0.49, "grad_norm": 1.5262039228128124, "learning_rate": 1.0651839675410295e-05, "loss": 0.7951, "step": 4865 }, { "epoch": 0.49, "grad_norm": 1.5133758309522023, "learning_rate": 1.0648552191150784e-05, "loss": 0.5965, "step": 4866 }, { "epoch": 0.5, "grad_norm": 1.473671675400503, "learning_rate": 1.0645264636501078e-05, "loss": 0.7426, "step": 4867 }, { "epoch": 0.5, "grad_norm": 1.5734654354890787, "learning_rate": 1.0641977011817986e-05, "loss": 0.7293, "step": 4868 }, { "epoch": 0.5, "grad_norm": 1.5238583307710616, "learning_rate": 1.0638689317458328e-05, "loss": 0.667, "step": 4869 }, { "epoch": 0.5, "grad_norm": 1.5674873789425725, "learning_rate": 1.0635401553778934e-05, "loss": 0.7048, "step": 4870 }, { "epoch": 0.5, "grad_norm": 1.476556899148976, "learning_rate": 1.0632113721136636e-05, "loss": 0.693, "step": 4871 }, { "epoch": 0.5, "grad_norm": 1.5318947628197657, "learning_rate": 1.0628825819888281e-05, "loss": 0.7025, "step": 4872 }, { "epoch": 0.5, "grad_norm": 1.317050099437974, "learning_rate": 1.0625537850390718e-05, "loss": 0.7165, "step": 4873 }, { "epoch": 0.5, "grad_norm": 1.591593406876283, "learning_rate": 1.0622249813000799e-05, "loss": 0.6269, "step": 4874 }, { "epoch": 0.5, "grad_norm": 1.521353505316856, "learning_rate": 1.0618961708075398e-05, "loss": 0.6831, "step": 4875 }, { "epoch": 0.5, "grad_norm": 1.6774101897798614, "learning_rate": 1.061567353597138e-05, "loss": 0.8003, "step": 4876 }, { "epoch": 0.5, "grad_norm": 1.5712174893359296, "learning_rate": 1.061238529704563e-05, "loss": 0.7023, "step": 4877 }, { "epoch": 0.5, "grad_norm": 1.5083574288941524, "learning_rate": 1.060909699165503e-05, "loss": 0.6789, "step": 4878 }, { "epoch": 0.5, "grad_norm": 1.7912885880405833, "learning_rate": 1.060580862015648e-05, "loss": 0.7922, "step": 4879 }, { "epoch": 0.5, "grad_norm": 1.4282792013974326, "learning_rate": 1.0602520182906877e-05, "loss": 0.6505, "step": 4880 }, { "epoch": 0.5, "grad_norm": 1.579379700276147, "learning_rate": 1.0599231680263127e-05, "loss": 0.688, "step": 4881 }, { "epoch": 0.5, "grad_norm": 1.4453470872146301, "learning_rate": 1.0595943112582152e-05, "loss": 0.6834, "step": 4882 }, { "epoch": 0.5, "grad_norm": 1.5607007989926776, "learning_rate": 1.0592654480220874e-05, "loss": 0.5767, "step": 4883 }, { "epoch": 0.5, "grad_norm": 1.5797955334370553, "learning_rate": 1.0589365783536218e-05, "loss": 0.6384, "step": 4884 }, { "epoch": 0.5, "grad_norm": 1.5695344367475006, "learning_rate": 1.0586077022885122e-05, "loss": 0.7588, "step": 4885 }, { "epoch": 0.5, "grad_norm": 1.6856965266457415, "learning_rate": 1.0582788198624532e-05, "loss": 0.7736, "step": 4886 }, { "epoch": 0.5, "grad_norm": 1.5489583302375258, "learning_rate": 1.0579499311111397e-05, "loss": 0.7299, "step": 4887 }, { "epoch": 0.5, "grad_norm": 1.5112208970054677, "learning_rate": 1.0576210360702673e-05, "loss": 0.7585, "step": 4888 }, { "epoch": 0.5, "grad_norm": 1.630106670056698, "learning_rate": 1.0572921347755325e-05, "loss": 0.6544, "step": 4889 }, { "epoch": 0.5, "grad_norm": 1.4713329795106127, "learning_rate": 1.0569632272626323e-05, "loss": 0.677, "step": 4890 }, { "epoch": 0.5, "grad_norm": 1.5021811281953732, "learning_rate": 1.0566343135672647e-05, "loss": 0.7436, "step": 4891 }, { "epoch": 0.5, "grad_norm": 1.6665198712378295, "learning_rate": 1.0563053937251282e-05, "loss": 0.7593, "step": 4892 }, { "epoch": 0.5, "grad_norm": 1.5413742120634135, "learning_rate": 1.0559764677719218e-05, "loss": 0.731, "step": 4893 }, { "epoch": 0.5, "grad_norm": 1.694579223239616, "learning_rate": 1.0556475357433446e-05, "loss": 0.6751, "step": 4894 }, { "epoch": 0.5, "grad_norm": 1.66881771693106, "learning_rate": 1.0553185976750981e-05, "loss": 0.7052, "step": 4895 }, { "epoch": 0.5, "grad_norm": 1.570220760256492, "learning_rate": 1.054989653602883e-05, "loss": 0.633, "step": 4896 }, { "epoch": 0.5, "grad_norm": 1.6084551011215464, "learning_rate": 1.0546607035624008e-05, "loss": 0.842, "step": 4897 }, { "epoch": 0.5, "grad_norm": 1.5322621024852836, "learning_rate": 1.0543317475893541e-05, "loss": 0.7698, "step": 4898 }, { "epoch": 0.5, "grad_norm": 1.6600600782491488, "learning_rate": 1.0540027857194457e-05, "loss": 0.7044, "step": 4899 }, { "epoch": 0.5, "grad_norm": 1.381638149166171, "learning_rate": 1.0536738179883795e-05, "loss": 0.6612, "step": 4900 }, { "epoch": 0.5, "grad_norm": 1.419555536907255, "learning_rate": 1.0533448444318598e-05, "loss": 0.6967, "step": 4901 }, { "epoch": 0.5, "grad_norm": 1.6555120053495183, "learning_rate": 1.0530158650855913e-05, "loss": 0.7314, "step": 4902 }, { "epoch": 0.5, "grad_norm": 1.6046414041018124, "learning_rate": 1.0526868799852797e-05, "loss": 0.744, "step": 4903 }, { "epoch": 0.5, "grad_norm": 1.7339299401857255, "learning_rate": 1.0523578891666313e-05, "loss": 0.7227, "step": 4904 }, { "epoch": 0.5, "grad_norm": 1.5164595375565013, "learning_rate": 1.0520288926653528e-05, "loss": 0.7927, "step": 4905 }, { "epoch": 0.5, "grad_norm": 1.5925848482022713, "learning_rate": 1.0516998905171516e-05, "loss": 0.6613, "step": 4906 }, { "epoch": 0.5, "grad_norm": 1.614041586169326, "learning_rate": 1.051370882757736e-05, "loss": 0.7167, "step": 4907 }, { "epoch": 0.5, "grad_norm": 1.690770155753182, "learning_rate": 1.0510418694228139e-05, "loss": 0.7274, "step": 4908 }, { "epoch": 0.5, "grad_norm": 1.558572494279657, "learning_rate": 1.0507128505480956e-05, "loss": 0.6946, "step": 4909 }, { "epoch": 0.5, "grad_norm": 1.64031812904918, "learning_rate": 1.0503838261692902e-05, "loss": 0.7701, "step": 4910 }, { "epoch": 0.5, "grad_norm": 1.461769504781515, "learning_rate": 1.0500547963221086e-05, "loss": 0.6864, "step": 4911 }, { "epoch": 0.5, "grad_norm": 1.5102708091137018, "learning_rate": 1.0497257610422614e-05, "loss": 0.6578, "step": 4912 }, { "epoch": 0.5, "grad_norm": 1.863032025828687, "learning_rate": 1.0493967203654604e-05, "loss": 0.682, "step": 4913 }, { "epoch": 0.5, "grad_norm": 1.383540654360992, "learning_rate": 1.0490676743274181e-05, "loss": 0.7154, "step": 4914 }, { "epoch": 0.5, "grad_norm": 1.5303593280040118, "learning_rate": 1.0487386229638473e-05, "loss": 0.7482, "step": 4915 }, { "epoch": 0.5, "grad_norm": 1.4621120752135677, "learning_rate": 1.0484095663104608e-05, "loss": 0.6405, "step": 4916 }, { "epoch": 0.5, "grad_norm": 1.604206772487237, "learning_rate": 1.048080504402973e-05, "loss": 0.6942, "step": 4917 }, { "epoch": 0.5, "grad_norm": 1.6209202519114927, "learning_rate": 1.0477514372770983e-05, "loss": 0.7303, "step": 4918 }, { "epoch": 0.5, "grad_norm": 1.5004633451450609, "learning_rate": 1.0474223649685517e-05, "loss": 0.67, "step": 4919 }, { "epoch": 0.5, "grad_norm": 1.6091999763739429, "learning_rate": 1.0470932875130493e-05, "loss": 0.6157, "step": 4920 }, { "epoch": 0.5, "grad_norm": 1.6686612577304816, "learning_rate": 1.0467642049463068e-05, "loss": 0.7565, "step": 4921 }, { "epoch": 0.5, "grad_norm": 1.6312257099474892, "learning_rate": 1.0464351173040412e-05, "loss": 0.7045, "step": 4922 }, { "epoch": 0.5, "grad_norm": 1.7194543341860458, "learning_rate": 1.0461060246219699e-05, "loss": 0.7014, "step": 4923 }, { "epoch": 0.5, "grad_norm": 1.477713446193529, "learning_rate": 1.0457769269358104e-05, "loss": 0.661, "step": 4924 }, { "epoch": 0.5, "grad_norm": 1.5176551779973013, "learning_rate": 1.0454478242812814e-05, "loss": 0.7093, "step": 4925 }, { "epoch": 0.5, "grad_norm": 1.6337582845395682, "learning_rate": 1.0451187166941019e-05, "loss": 0.7892, "step": 4926 }, { "epoch": 0.5, "grad_norm": 1.5818579907899044, "learning_rate": 1.0447896042099914e-05, "loss": 0.7644, "step": 4927 }, { "epoch": 0.5, "grad_norm": 1.5478429707052999, "learning_rate": 1.0444604868646695e-05, "loss": 0.8387, "step": 4928 }, { "epoch": 0.5, "grad_norm": 1.5116126932431422, "learning_rate": 1.0441313646938571e-05, "loss": 0.7394, "step": 4929 }, { "epoch": 0.5, "grad_norm": 1.3437106808936985, "learning_rate": 1.0438022377332755e-05, "loss": 0.5898, "step": 4930 }, { "epoch": 0.5, "grad_norm": 1.5724323804816993, "learning_rate": 1.0434731060186456e-05, "loss": 0.7116, "step": 4931 }, { "epoch": 0.5, "grad_norm": 1.4391075832989195, "learning_rate": 1.0431439695856902e-05, "loss": 0.7365, "step": 4932 }, { "epoch": 0.5, "grad_norm": 1.7101197878193377, "learning_rate": 1.0428148284701313e-05, "loss": 0.7036, "step": 4933 }, { "epoch": 0.5, "grad_norm": 1.5331147672575676, "learning_rate": 1.0424856827076926e-05, "loss": 0.7613, "step": 4934 }, { "epoch": 0.5, "grad_norm": 1.5142965480147195, "learning_rate": 1.0421565323340971e-05, "loss": 0.7013, "step": 4935 }, { "epoch": 0.5, "grad_norm": 1.5435928907819016, "learning_rate": 1.0418273773850696e-05, "loss": 0.7334, "step": 4936 }, { "epoch": 0.5, "grad_norm": 1.603917057655441, "learning_rate": 1.0414982178963344e-05, "loss": 0.6197, "step": 4937 }, { "epoch": 0.5, "grad_norm": 1.6710661824730904, "learning_rate": 1.0411690539036165e-05, "loss": 0.7766, "step": 4938 }, { "epoch": 0.5, "grad_norm": 1.796042940499753, "learning_rate": 1.0408398854426413e-05, "loss": 0.779, "step": 4939 }, { "epoch": 0.5, "grad_norm": 1.5307692075330168, "learning_rate": 1.0405107125491359e-05, "loss": 0.7377, "step": 4940 }, { "epoch": 0.5, "grad_norm": 1.7343403838596432, "learning_rate": 1.0401815352588256e-05, "loss": 0.6619, "step": 4941 }, { "epoch": 0.5, "grad_norm": 1.6659604279911036, "learning_rate": 1.0398523536074383e-05, "loss": 0.7201, "step": 4942 }, { "epoch": 0.5, "grad_norm": 1.4924325225794244, "learning_rate": 1.0395231676307012e-05, "loss": 0.7449, "step": 4943 }, { "epoch": 0.5, "grad_norm": 1.6215899504993245, "learning_rate": 1.039193977364342e-05, "loss": 0.6166, "step": 4944 }, { "epoch": 0.5, "grad_norm": 1.5413755103496067, "learning_rate": 1.0388647828440899e-05, "loss": 0.6889, "step": 4945 }, { "epoch": 0.5, "grad_norm": 1.8571969034985918, "learning_rate": 1.0385355841056732e-05, "loss": 0.7965, "step": 4946 }, { "epoch": 0.5, "grad_norm": 1.505675486970467, "learning_rate": 1.0382063811848216e-05, "loss": 0.7368, "step": 4947 }, { "epoch": 0.5, "grad_norm": 1.6573830089678232, "learning_rate": 1.0378771741172647e-05, "loss": 0.728, "step": 4948 }, { "epoch": 0.5, "grad_norm": 1.5151009935557034, "learning_rate": 1.0375479629387328e-05, "loss": 0.6786, "step": 4949 }, { "epoch": 0.5, "grad_norm": 1.6888252027377988, "learning_rate": 1.037218747684957e-05, "loss": 0.785, "step": 4950 }, { "epoch": 0.5, "grad_norm": 1.6060069581627165, "learning_rate": 1.0368895283916678e-05, "loss": 0.7782, "step": 4951 }, { "epoch": 0.5, "grad_norm": 1.5784964492645637, "learning_rate": 1.0365603050945977e-05, "loss": 0.6826, "step": 4952 }, { "epoch": 0.5, "grad_norm": 1.5425911773394898, "learning_rate": 1.0362310778294775e-05, "loss": 0.7056, "step": 4953 }, { "epoch": 0.5, "grad_norm": 1.599694320939845, "learning_rate": 1.0359018466320407e-05, "loss": 0.7094, "step": 4954 }, { "epoch": 0.5, "grad_norm": 1.6737011916314462, "learning_rate": 1.0355726115380203e-05, "loss": 0.7664, "step": 4955 }, { "epoch": 0.5, "grad_norm": 1.530616756386857, "learning_rate": 1.0352433725831487e-05, "loss": 0.7243, "step": 4956 }, { "epoch": 0.5, "grad_norm": 1.5627773628239507, "learning_rate": 1.0349141298031601e-05, "loss": 0.6623, "step": 4957 }, { "epoch": 0.5, "grad_norm": 1.5693580207455513, "learning_rate": 1.0345848832337891e-05, "loss": 0.7476, "step": 4958 }, { "epoch": 0.5, "grad_norm": 1.4476294616961733, "learning_rate": 1.0342556329107699e-05, "loss": 0.7097, "step": 4959 }, { "epoch": 0.5, "grad_norm": 1.4493711917271666, "learning_rate": 1.0339263788698373e-05, "loss": 0.607, "step": 4960 }, { "epoch": 0.5, "grad_norm": 1.7351171237436391, "learning_rate": 1.0335971211467265e-05, "loss": 0.8264, "step": 4961 }, { "epoch": 0.5, "grad_norm": 1.4803412656979784, "learning_rate": 1.0332678597771739e-05, "loss": 0.7487, "step": 4962 }, { "epoch": 0.5, "grad_norm": 1.4887948440376384, "learning_rate": 1.0329385947969154e-05, "loss": 0.6569, "step": 4963 }, { "epoch": 0.5, "grad_norm": 1.8106392631604589, "learning_rate": 1.0326093262416874e-05, "loss": 0.8789, "step": 4964 }, { "epoch": 0.51, "grad_norm": 1.6668317622828839, "learning_rate": 1.0322800541472273e-05, "loss": 0.8512, "step": 4965 }, { "epoch": 0.51, "grad_norm": 1.4499170830275672, "learning_rate": 1.0319507785492718e-05, "loss": 0.7089, "step": 4966 }, { "epoch": 0.51, "grad_norm": 1.4472378715645056, "learning_rate": 1.031621499483559e-05, "loss": 0.6131, "step": 4967 }, { "epoch": 0.51, "grad_norm": 1.6582927017623401, "learning_rate": 1.031292216985827e-05, "loss": 0.8072, "step": 4968 }, { "epoch": 0.51, "grad_norm": 1.4679495456025033, "learning_rate": 1.030962931091814e-05, "loss": 0.7026, "step": 4969 }, { "epoch": 0.51, "grad_norm": 1.6686120921027308, "learning_rate": 1.0306336418372595e-05, "loss": 0.6743, "step": 4970 }, { "epoch": 0.51, "grad_norm": 1.6297080468438931, "learning_rate": 1.0303043492579017e-05, "loss": 0.7175, "step": 4971 }, { "epoch": 0.51, "grad_norm": 1.4740594819956203, "learning_rate": 1.029975053389481e-05, "loss": 0.6553, "step": 4972 }, { "epoch": 0.51, "grad_norm": 1.6604044930548936, "learning_rate": 1.0296457542677371e-05, "loss": 0.7285, "step": 4973 }, { "epoch": 0.51, "grad_norm": 1.5331123784276655, "learning_rate": 1.02931645192841e-05, "loss": 0.6788, "step": 4974 }, { "epoch": 0.51, "grad_norm": 1.5496946285391504, "learning_rate": 1.0289871464072405e-05, "loss": 0.7427, "step": 4975 }, { "epoch": 0.51, "grad_norm": 1.5626346544494052, "learning_rate": 1.0286578377399696e-05, "loss": 0.5882, "step": 4976 }, { "epoch": 0.51, "grad_norm": 1.7064394550189277, "learning_rate": 1.0283285259623385e-05, "loss": 0.7806, "step": 4977 }, { "epoch": 0.51, "grad_norm": 1.74754784601924, "learning_rate": 1.0279992111100891e-05, "loss": 0.8419, "step": 4978 }, { "epoch": 0.51, "grad_norm": 1.6513705967731058, "learning_rate": 1.027669893218963e-05, "loss": 0.7918, "step": 4979 }, { "epoch": 0.51, "grad_norm": 1.5863692267049108, "learning_rate": 1.0273405723247028e-05, "loss": 0.7655, "step": 4980 }, { "epoch": 0.51, "grad_norm": 1.5455744916739862, "learning_rate": 1.027011248463051e-05, "loss": 0.662, "step": 4981 }, { "epoch": 0.51, "grad_norm": 1.6189715089164287, "learning_rate": 1.0266819216697506e-05, "loss": 0.6878, "step": 4982 }, { "epoch": 0.51, "grad_norm": 1.5728852805180555, "learning_rate": 1.0263525919805448e-05, "loss": 0.7392, "step": 4983 }, { "epoch": 0.51, "grad_norm": 1.6485182074086644, "learning_rate": 1.0260232594311775e-05, "loss": 0.6494, "step": 4984 }, { "epoch": 0.51, "grad_norm": 1.6860934646556733, "learning_rate": 1.025693924057392e-05, "loss": 0.8304, "step": 4985 }, { "epoch": 0.51, "grad_norm": 1.5444623763479748, "learning_rate": 1.025364585894933e-05, "loss": 0.721, "step": 4986 }, { "epoch": 0.51, "grad_norm": 1.6114327837659634, "learning_rate": 1.0250352449795449e-05, "loss": 0.6927, "step": 4987 }, { "epoch": 0.51, "grad_norm": 1.6844313667842148, "learning_rate": 1.0247059013469725e-05, "loss": 0.8153, "step": 4988 }, { "epoch": 0.51, "grad_norm": 1.3912046210481805, "learning_rate": 1.0243765550329606e-05, "loss": 0.7573, "step": 4989 }, { "epoch": 0.51, "grad_norm": 1.7719163050138464, "learning_rate": 1.0240472060732552e-05, "loss": 0.7606, "step": 4990 }, { "epoch": 0.51, "grad_norm": 1.5907944102676055, "learning_rate": 1.0237178545036015e-05, "loss": 0.6463, "step": 4991 }, { "epoch": 0.51, "grad_norm": 1.4653612312225397, "learning_rate": 1.0233885003597457e-05, "loss": 0.6643, "step": 4992 }, { "epoch": 0.51, "grad_norm": 1.6555274215570521, "learning_rate": 1.0230591436774337e-05, "loss": 0.7665, "step": 4993 }, { "epoch": 0.51, "grad_norm": 1.480369019040231, "learning_rate": 1.0227297844924123e-05, "loss": 0.7743, "step": 4994 }, { "epoch": 0.51, "grad_norm": 1.4834908923443815, "learning_rate": 1.0224004228404286e-05, "loss": 0.6652, "step": 4995 }, { "epoch": 0.51, "grad_norm": 1.7027958245467154, "learning_rate": 1.0220710587572289e-05, "loss": 0.6794, "step": 4996 }, { "epoch": 0.51, "grad_norm": 1.5892728457192657, "learning_rate": 1.0217416922785613e-05, "loss": 0.8259, "step": 4997 }, { "epoch": 0.51, "grad_norm": 1.417842304139637, "learning_rate": 1.0214123234401725e-05, "loss": 0.6653, "step": 4998 }, { "epoch": 0.51, "grad_norm": 1.4897514214464702, "learning_rate": 1.0210829522778111e-05, "loss": 0.6722, "step": 4999 }, { "epoch": 0.51, "grad_norm": 1.4753853811493134, "learning_rate": 1.020753578827225e-05, "loss": 0.6329, "step": 5000 }, { "epoch": 0.51, "grad_norm": 1.698061227933607, "learning_rate": 1.0204242031241624e-05, "loss": 0.6545, "step": 5001 }, { "epoch": 0.51, "grad_norm": 1.5685023415147041, "learning_rate": 1.020094825204372e-05, "loss": 0.7103, "step": 5002 }, { "epoch": 0.51, "grad_norm": 1.757520070304479, "learning_rate": 1.0197654451036025e-05, "loss": 0.8271, "step": 5003 }, { "epoch": 0.51, "grad_norm": 1.496912420552621, "learning_rate": 1.0194360628576032e-05, "loss": 0.646, "step": 5004 }, { "epoch": 0.51, "grad_norm": 1.6368808812363533, "learning_rate": 1.0191066785021232e-05, "loss": 0.6652, "step": 5005 }, { "epoch": 0.51, "grad_norm": 1.7515757375629755, "learning_rate": 1.0187772920729118e-05, "loss": 0.7075, "step": 5006 }, { "epoch": 0.51, "grad_norm": 1.532702151487452, "learning_rate": 1.0184479036057191e-05, "loss": 0.7008, "step": 5007 }, { "epoch": 0.51, "grad_norm": 1.8067628392350867, "learning_rate": 1.0181185131362949e-05, "loss": 0.8778, "step": 5008 }, { "epoch": 0.51, "grad_norm": 1.5930483783286502, "learning_rate": 1.0177891207003897e-05, "loss": 0.6907, "step": 5009 }, { "epoch": 0.51, "grad_norm": 1.5893821059072393, "learning_rate": 1.0174597263337533e-05, "loss": 0.6614, "step": 5010 }, { "epoch": 0.51, "grad_norm": 1.6078386505119349, "learning_rate": 1.0171303300721369e-05, "loss": 0.8173, "step": 5011 }, { "epoch": 0.51, "grad_norm": 1.4627833807251032, "learning_rate": 1.0168009319512908e-05, "loss": 0.735, "step": 5012 }, { "epoch": 0.51, "grad_norm": 1.5924423835822419, "learning_rate": 1.0164715320069667e-05, "loss": 0.6657, "step": 5013 }, { "epoch": 0.51, "grad_norm": 1.3847413718790185, "learning_rate": 1.016142130274915e-05, "loss": 0.7492, "step": 5014 }, { "epoch": 0.51, "grad_norm": 1.4717912087381975, "learning_rate": 1.0158127267908877e-05, "loss": 0.6354, "step": 5015 }, { "epoch": 0.51, "grad_norm": 1.5804650771140725, "learning_rate": 1.0154833215906359e-05, "loss": 0.6814, "step": 5016 }, { "epoch": 0.51, "grad_norm": 1.802943339801398, "learning_rate": 1.015153914709912e-05, "loss": 0.9026, "step": 5017 }, { "epoch": 0.51, "grad_norm": 1.5091332096941117, "learning_rate": 1.0148245061844678e-05, "loss": 0.8205, "step": 5018 }, { "epoch": 0.51, "grad_norm": 1.6077826529932098, "learning_rate": 1.0144950960500548e-05, "loss": 0.6848, "step": 5019 }, { "epoch": 0.51, "grad_norm": 1.498282901072857, "learning_rate": 1.0141656843424261e-05, "loss": 0.7553, "step": 5020 }, { "epoch": 0.51, "grad_norm": 1.5345091773948873, "learning_rate": 1.0138362710973337e-05, "loss": 0.7252, "step": 5021 }, { "epoch": 0.51, "grad_norm": 1.5877742089090119, "learning_rate": 1.0135068563505305e-05, "loss": 0.6948, "step": 5022 }, { "epoch": 0.51, "grad_norm": 1.9732284318068651, "learning_rate": 1.0131774401377694e-05, "loss": 0.6836, "step": 5023 }, { "epoch": 0.51, "grad_norm": 1.4967048574718913, "learning_rate": 1.0128480224948032e-05, "loss": 0.653, "step": 5024 }, { "epoch": 0.51, "grad_norm": 1.472468435443647, "learning_rate": 1.0125186034573848e-05, "loss": 0.7266, "step": 5025 }, { "epoch": 0.51, "grad_norm": 1.8378646799585223, "learning_rate": 1.0121891830612682e-05, "loss": 0.7543, "step": 5026 }, { "epoch": 0.51, "grad_norm": 1.7292167157582072, "learning_rate": 1.0118597613422064e-05, "loss": 0.777, "step": 5027 }, { "epoch": 0.51, "grad_norm": 1.747577986929217, "learning_rate": 1.0115303383359527e-05, "loss": 0.6887, "step": 5028 }, { "epoch": 0.51, "grad_norm": 1.5050282607663057, "learning_rate": 1.011200914078261e-05, "loss": 0.7344, "step": 5029 }, { "epoch": 0.51, "grad_norm": 1.5261534192520534, "learning_rate": 1.0108714886048854e-05, "loss": 0.7532, "step": 5030 }, { "epoch": 0.51, "grad_norm": 1.4756222381550796, "learning_rate": 1.0105420619515798e-05, "loss": 0.7665, "step": 5031 }, { "epoch": 0.51, "grad_norm": 1.4293226846812288, "learning_rate": 1.0102126341540981e-05, "loss": 0.7232, "step": 5032 }, { "epoch": 0.51, "grad_norm": 1.8230140564560462, "learning_rate": 1.0098832052481946e-05, "loss": 0.7146, "step": 5033 }, { "epoch": 0.51, "grad_norm": 1.6393459817543277, "learning_rate": 1.0095537752696238e-05, "loss": 0.7186, "step": 5034 }, { "epoch": 0.51, "grad_norm": 1.540375836450423, "learning_rate": 1.0092243442541404e-05, "loss": 0.6179, "step": 5035 }, { "epoch": 0.51, "grad_norm": 1.6002854156707615, "learning_rate": 1.0088949122374982e-05, "loss": 0.7583, "step": 5036 }, { "epoch": 0.51, "grad_norm": 1.791900632212417, "learning_rate": 1.0085654792554527e-05, "loss": 0.6443, "step": 5037 }, { "epoch": 0.51, "grad_norm": 1.8762373825564007, "learning_rate": 1.0082360453437583e-05, "loss": 0.7761, "step": 5038 }, { "epoch": 0.51, "grad_norm": 1.540714618809593, "learning_rate": 1.00790661053817e-05, "loss": 0.6828, "step": 5039 }, { "epoch": 0.51, "grad_norm": 1.584376129187976, "learning_rate": 1.0075771748744425e-05, "loss": 0.739, "step": 5040 }, { "epoch": 0.51, "grad_norm": 1.8348523122891849, "learning_rate": 1.0072477383883315e-05, "loss": 0.6564, "step": 5041 }, { "epoch": 0.51, "grad_norm": 1.7931328635932917, "learning_rate": 1.0069183011155919e-05, "loss": 0.7407, "step": 5042 }, { "epoch": 0.51, "grad_norm": 1.6265720031446385, "learning_rate": 1.0065888630919784e-05, "loss": 0.8066, "step": 5043 }, { "epoch": 0.51, "grad_norm": 1.6535312579766697, "learning_rate": 1.0062594243532473e-05, "loss": 0.7392, "step": 5044 }, { "epoch": 0.51, "grad_norm": 1.6066458750468946, "learning_rate": 1.0059299849351537e-05, "loss": 0.772, "step": 5045 }, { "epoch": 0.51, "grad_norm": 1.490332422068347, "learning_rate": 1.0056005448734527e-05, "loss": 0.7085, "step": 5046 }, { "epoch": 0.51, "grad_norm": 1.5434677098664784, "learning_rate": 1.0052711042039e-05, "loss": 0.6175, "step": 5047 }, { "epoch": 0.51, "grad_norm": 1.7218251257628168, "learning_rate": 1.0049416629622515e-05, "loss": 0.7077, "step": 5048 }, { "epoch": 0.51, "grad_norm": 1.5429660052228062, "learning_rate": 1.0046122211842629e-05, "loss": 0.6454, "step": 5049 }, { "epoch": 0.51, "grad_norm": 1.7276557227894023, "learning_rate": 1.0042827789056897e-05, "loss": 0.8497, "step": 5050 }, { "epoch": 0.51, "grad_norm": 1.5107847951141293, "learning_rate": 1.003953336162288e-05, "loss": 0.675, "step": 5051 }, { "epoch": 0.51, "grad_norm": 1.5534066484669686, "learning_rate": 1.0036238929898133e-05, "loss": 0.6476, "step": 5052 }, { "epoch": 0.51, "grad_norm": 1.7502090023426649, "learning_rate": 1.0032944494240215e-05, "loss": 0.7414, "step": 5053 }, { "epoch": 0.51, "grad_norm": 1.61180441500941, "learning_rate": 1.002965005500669e-05, "loss": 0.7667, "step": 5054 }, { "epoch": 0.51, "grad_norm": 1.5907909366442183, "learning_rate": 1.0026355612555116e-05, "loss": 0.8143, "step": 5055 }, { "epoch": 0.51, "grad_norm": 1.6927249074587167, "learning_rate": 1.0023061167243051e-05, "loss": 0.8298, "step": 5056 }, { "epoch": 0.51, "grad_norm": 1.558245964430676, "learning_rate": 1.0019766719428056e-05, "loss": 0.6458, "step": 5057 }, { "epoch": 0.51, "grad_norm": 1.5989346457818998, "learning_rate": 1.0016472269467694e-05, "loss": 0.7566, "step": 5058 }, { "epoch": 0.51, "grad_norm": 1.5501716886854584, "learning_rate": 1.0013177817719525e-05, "loss": 0.6956, "step": 5059 }, { "epoch": 0.51, "grad_norm": 1.5298661053568097, "learning_rate": 1.000988336454111e-05, "loss": 0.7485, "step": 5060 }, { "epoch": 0.51, "grad_norm": 1.534800077294969, "learning_rate": 1.0006588910290009e-05, "loss": 0.72, "step": 5061 }, { "epoch": 0.51, "grad_norm": 1.4039336604523878, "learning_rate": 1.0003294455323787e-05, "loss": 0.6622, "step": 5062 }, { "epoch": 0.52, "grad_norm": 1.4625189066192077, "learning_rate": 1e-05, "loss": 0.7393, "step": 5063 }, { "epoch": 0.52, "grad_norm": 1.6009023241100437, "learning_rate": 9.996705544676214e-06, "loss": 0.7206, "step": 5064 }, { "epoch": 0.52, "grad_norm": 1.6802185162627696, "learning_rate": 9.993411089709995e-06, "loss": 0.703, "step": 5065 }, { "epoch": 0.52, "grad_norm": 1.4998292988494204, "learning_rate": 9.990116635458893e-06, "loss": 0.6667, "step": 5066 }, { "epoch": 0.52, "grad_norm": 1.5951409889363042, "learning_rate": 9.98682218228048e-06, "loss": 0.7688, "step": 5067 }, { "epoch": 0.52, "grad_norm": 1.4420550450229865, "learning_rate": 9.98352773053231e-06, "loss": 0.833, "step": 5068 }, { "epoch": 0.52, "grad_norm": 1.3540576285928896, "learning_rate": 9.980233280571946e-06, "loss": 0.6754, "step": 5069 }, { "epoch": 0.52, "grad_norm": 1.58857712960787, "learning_rate": 9.976938832756952e-06, "loss": 0.7208, "step": 5070 }, { "epoch": 0.52, "grad_norm": 1.6888765345374839, "learning_rate": 9.973644387444887e-06, "loss": 0.8324, "step": 5071 }, { "epoch": 0.52, "grad_norm": 1.6628467015311343, "learning_rate": 9.970349944993315e-06, "loss": 0.7039, "step": 5072 }, { "epoch": 0.52, "grad_norm": 1.4986284311832823, "learning_rate": 9.967055505759787e-06, "loss": 0.6795, "step": 5073 }, { "epoch": 0.52, "grad_norm": 1.8380637247061966, "learning_rate": 9.963761070101869e-06, "loss": 0.8186, "step": 5074 }, { "epoch": 0.52, "grad_norm": 1.6236843675335488, "learning_rate": 9.960466638377125e-06, "loss": 0.7437, "step": 5075 }, { "epoch": 0.52, "grad_norm": 1.7043518992354776, "learning_rate": 9.957172210943105e-06, "loss": 0.6886, "step": 5076 }, { "epoch": 0.52, "grad_norm": 1.5516039920891835, "learning_rate": 9.953877788157373e-06, "loss": 0.7047, "step": 5077 }, { "epoch": 0.52, "grad_norm": 1.4788193377628533, "learning_rate": 9.950583370377488e-06, "loss": 0.7271, "step": 5078 }, { "epoch": 0.52, "grad_norm": 1.560881796187408, "learning_rate": 9.947288957961001e-06, "loss": 0.6769, "step": 5079 }, { "epoch": 0.52, "grad_norm": 1.5307649077697028, "learning_rate": 9.943994551265478e-06, "loss": 0.6648, "step": 5080 }, { "epoch": 0.52, "grad_norm": 1.565759677818178, "learning_rate": 9.940700150648467e-06, "loss": 0.7498, "step": 5081 }, { "epoch": 0.52, "grad_norm": 1.5830488858116634, "learning_rate": 9.937405756467527e-06, "loss": 0.7328, "step": 5082 }, { "epoch": 0.52, "grad_norm": 1.4985569654884294, "learning_rate": 9.934111369080218e-06, "loss": 0.6863, "step": 5083 }, { "epoch": 0.52, "grad_norm": 1.6931785992505062, "learning_rate": 9.930816988844084e-06, "loss": 0.6846, "step": 5084 }, { "epoch": 0.52, "grad_norm": 1.5202715907906006, "learning_rate": 9.927522616116687e-06, "loss": 0.6753, "step": 5085 }, { "epoch": 0.52, "grad_norm": 1.6362017411817606, "learning_rate": 9.924228251255577e-06, "loss": 0.8604, "step": 5086 }, { "epoch": 0.52, "grad_norm": 1.5759133874543885, "learning_rate": 9.920933894618303e-06, "loss": 0.696, "step": 5087 }, { "epoch": 0.52, "grad_norm": 1.7292168383139979, "learning_rate": 9.91763954656242e-06, "loss": 0.8085, "step": 5088 }, { "epoch": 0.52, "grad_norm": 1.5071322373168559, "learning_rate": 9.914345207445476e-06, "loss": 0.7611, "step": 5089 }, { "epoch": 0.52, "grad_norm": 1.6228730452289775, "learning_rate": 9.911050877625022e-06, "loss": 0.6774, "step": 5090 }, { "epoch": 0.52, "grad_norm": 1.608877474252974, "learning_rate": 9.9077565574586e-06, "loss": 0.7135, "step": 5091 }, { "epoch": 0.52, "grad_norm": 1.5956742547959841, "learning_rate": 9.904462247303764e-06, "loss": 0.8134, "step": 5092 }, { "epoch": 0.52, "grad_norm": 1.4093253731337068, "learning_rate": 9.901167947518057e-06, "loss": 0.6145, "step": 5093 }, { "epoch": 0.52, "grad_norm": 1.6676622348956422, "learning_rate": 9.89787365845902e-06, "loss": 0.7878, "step": 5094 }, { "epoch": 0.52, "grad_norm": 1.485830024047951, "learning_rate": 9.894579380484206e-06, "loss": 0.6392, "step": 5095 }, { "epoch": 0.52, "grad_norm": 1.8186613390608457, "learning_rate": 9.89128511395115e-06, "loss": 0.6955, "step": 5096 }, { "epoch": 0.52, "grad_norm": 1.5786168135399719, "learning_rate": 9.887990859217391e-06, "loss": 0.7995, "step": 5097 }, { "epoch": 0.52, "grad_norm": 1.7458285958219886, "learning_rate": 9.88469661664048e-06, "loss": 0.6816, "step": 5098 }, { "epoch": 0.52, "grad_norm": 1.511674397329433, "learning_rate": 9.881402386577942e-06, "loss": 0.6325, "step": 5099 }, { "epoch": 0.52, "grad_norm": 1.5292384328734583, "learning_rate": 9.87810816938732e-06, "loss": 0.6494, "step": 5100 }, { "epoch": 0.52, "grad_norm": 1.4321640908065174, "learning_rate": 9.874813965426154e-06, "loss": 0.6423, "step": 5101 }, { "epoch": 0.52, "grad_norm": 1.504524200960757, "learning_rate": 9.871519775051971e-06, "loss": 0.6557, "step": 5102 }, { "epoch": 0.52, "grad_norm": 1.4892683522137387, "learning_rate": 9.86822559862231e-06, "loss": 0.7091, "step": 5103 }, { "epoch": 0.52, "grad_norm": 1.6672876374415153, "learning_rate": 9.864931436494696e-06, "loss": 0.7504, "step": 5104 }, { "epoch": 0.52, "grad_norm": 1.5283640218290506, "learning_rate": 9.861637289026664e-06, "loss": 0.7191, "step": 5105 }, { "epoch": 0.52, "grad_norm": 1.476362253308102, "learning_rate": 9.858343156575744e-06, "loss": 0.6484, "step": 5106 }, { "epoch": 0.52, "grad_norm": 1.6467227908333926, "learning_rate": 9.855049039499454e-06, "loss": 0.7415, "step": 5107 }, { "epoch": 0.52, "grad_norm": 1.5866704705759278, "learning_rate": 9.851754938155329e-06, "loss": 0.722, "step": 5108 }, { "epoch": 0.52, "grad_norm": 1.51916605439026, "learning_rate": 9.848460852900883e-06, "loss": 0.6658, "step": 5109 }, { "epoch": 0.52, "grad_norm": 1.4627539145682227, "learning_rate": 9.845166784093641e-06, "loss": 0.675, "step": 5110 }, { "epoch": 0.52, "grad_norm": 1.681030853676064, "learning_rate": 9.841872732091128e-06, "loss": 0.7634, "step": 5111 }, { "epoch": 0.52, "grad_norm": 1.7011084668318175, "learning_rate": 9.838578697250852e-06, "loss": 0.7477, "step": 5112 }, { "epoch": 0.52, "grad_norm": 1.6294344584210423, "learning_rate": 9.835284679930335e-06, "loss": 0.7684, "step": 5113 }, { "epoch": 0.52, "grad_norm": 1.6873612611256998, "learning_rate": 9.831990680487094e-06, "loss": 0.7013, "step": 5114 }, { "epoch": 0.52, "grad_norm": 1.5622192387852167, "learning_rate": 9.828696699278633e-06, "loss": 0.749, "step": 5115 }, { "epoch": 0.52, "grad_norm": 1.4396246048138015, "learning_rate": 9.82540273666247e-06, "loss": 0.6634, "step": 5116 }, { "epoch": 0.52, "grad_norm": 1.59187032207698, "learning_rate": 9.822108792996107e-06, "loss": 0.7317, "step": 5117 }, { "epoch": 0.52, "grad_norm": 1.573390113679101, "learning_rate": 9.818814868637051e-06, "loss": 0.7894, "step": 5118 }, { "epoch": 0.52, "grad_norm": 1.6061202138652004, "learning_rate": 9.815520963942812e-06, "loss": 0.6493, "step": 5119 }, { "epoch": 0.52, "grad_norm": 1.5454123702799032, "learning_rate": 9.812227079270884e-06, "loss": 0.7948, "step": 5120 }, { "epoch": 0.52, "grad_norm": 1.597879788112768, "learning_rate": 9.808933214978773e-06, "loss": 0.7693, "step": 5121 }, { "epoch": 0.52, "grad_norm": 1.690313982093511, "learning_rate": 9.805639371423971e-06, "loss": 0.6432, "step": 5122 }, { "epoch": 0.52, "grad_norm": 1.677017710340723, "learning_rate": 9.802345548963977e-06, "loss": 0.7294, "step": 5123 }, { "epoch": 0.52, "grad_norm": 1.6484158834398428, "learning_rate": 9.799051747956284e-06, "loss": 0.718, "step": 5124 }, { "epoch": 0.52, "grad_norm": 1.7048099351617905, "learning_rate": 9.795757968758378e-06, "loss": 0.7263, "step": 5125 }, { "epoch": 0.52, "grad_norm": 1.6467261635402397, "learning_rate": 9.792464211727755e-06, "loss": 0.6859, "step": 5126 }, { "epoch": 0.52, "grad_norm": 1.3529923691945207, "learning_rate": 9.789170477221892e-06, "loss": 0.6789, "step": 5127 }, { "epoch": 0.52, "grad_norm": 1.608164579433592, "learning_rate": 9.785876765598276e-06, "loss": 0.754, "step": 5128 }, { "epoch": 0.52, "grad_norm": 1.6396310055821297, "learning_rate": 9.782583077214394e-06, "loss": 0.7712, "step": 5129 }, { "epoch": 0.52, "grad_norm": 1.6564971675287619, "learning_rate": 9.779289412427714e-06, "loss": 0.7607, "step": 5130 }, { "epoch": 0.52, "grad_norm": 1.6173430588878457, "learning_rate": 9.775995771595716e-06, "loss": 0.7981, "step": 5131 }, { "epoch": 0.52, "grad_norm": 1.6017258418261529, "learning_rate": 9.772702155075879e-06, "loss": 0.7253, "step": 5132 }, { "epoch": 0.52, "grad_norm": 1.7709026391696094, "learning_rate": 9.769408563225665e-06, "loss": 0.7332, "step": 5133 }, { "epoch": 0.52, "grad_norm": 1.519283381436825, "learning_rate": 9.76611499640255e-06, "loss": 0.6772, "step": 5134 }, { "epoch": 0.52, "grad_norm": 1.609935659876278, "learning_rate": 9.76282145496399e-06, "loss": 0.7068, "step": 5135 }, { "epoch": 0.52, "grad_norm": 1.531593684480193, "learning_rate": 9.75952793926745e-06, "loss": 0.646, "step": 5136 }, { "epoch": 0.52, "grad_norm": 1.5790104519401773, "learning_rate": 9.756234449670396e-06, "loss": 0.6888, "step": 5137 }, { "epoch": 0.52, "grad_norm": 1.446800963186642, "learning_rate": 9.752940986530279e-06, "loss": 0.6697, "step": 5138 }, { "epoch": 0.52, "grad_norm": 1.6523518022610042, "learning_rate": 9.749647550204555e-06, "loss": 0.7867, "step": 5139 }, { "epoch": 0.52, "grad_norm": 1.4971052938564537, "learning_rate": 9.746354141050674e-06, "loss": 0.7876, "step": 5140 }, { "epoch": 0.52, "grad_norm": 1.570377547663415, "learning_rate": 9.743060759426082e-06, "loss": 0.7654, "step": 5141 }, { "epoch": 0.52, "grad_norm": 1.6064579733110664, "learning_rate": 9.739767405688228e-06, "loss": 0.737, "step": 5142 }, { "epoch": 0.52, "grad_norm": 1.5455426190649941, "learning_rate": 9.736474080194555e-06, "loss": 0.6627, "step": 5143 }, { "epoch": 0.52, "grad_norm": 1.7875043911265842, "learning_rate": 9.733180783302499e-06, "loss": 0.8372, "step": 5144 }, { "epoch": 0.52, "grad_norm": 1.4778229053041372, "learning_rate": 9.729887515369491e-06, "loss": 0.6284, "step": 5145 }, { "epoch": 0.52, "grad_norm": 1.6191155801176957, "learning_rate": 9.726594276752975e-06, "loss": 0.663, "step": 5146 }, { "epoch": 0.52, "grad_norm": 1.8209617158366576, "learning_rate": 9.723301067810373e-06, "loss": 0.8653, "step": 5147 }, { "epoch": 0.52, "grad_norm": 1.6978790968854747, "learning_rate": 9.720007888899112e-06, "loss": 0.7375, "step": 5148 }, { "epoch": 0.52, "grad_norm": 1.5651878518597222, "learning_rate": 9.716714740376616e-06, "loss": 0.7489, "step": 5149 }, { "epoch": 0.52, "grad_norm": 1.2843081750148266, "learning_rate": 9.713421622600307e-06, "loss": 0.5879, "step": 5150 }, { "epoch": 0.52, "grad_norm": 1.553289187108251, "learning_rate": 9.710128535927597e-06, "loss": 0.7157, "step": 5151 }, { "epoch": 0.52, "grad_norm": 1.6699906188531186, "learning_rate": 9.706835480715905e-06, "loss": 0.8308, "step": 5152 }, { "epoch": 0.52, "grad_norm": 1.512597982088698, "learning_rate": 9.703542457322632e-06, "loss": 0.7099, "step": 5153 }, { "epoch": 0.52, "grad_norm": 1.416985900975218, "learning_rate": 9.700249466105193e-06, "loss": 0.6356, "step": 5154 }, { "epoch": 0.52, "grad_norm": 1.6035167743253373, "learning_rate": 9.696956507420984e-06, "loss": 0.6669, "step": 5155 }, { "epoch": 0.52, "grad_norm": 1.534607483177831, "learning_rate": 9.693663581627408e-06, "loss": 0.6586, "step": 5156 }, { "epoch": 0.52, "grad_norm": 1.5302169037608306, "learning_rate": 9.690370689081863e-06, "loss": 0.674, "step": 5157 }, { "epoch": 0.52, "grad_norm": 1.6115188616385638, "learning_rate": 9.687077830141734e-06, "loss": 0.6778, "step": 5158 }, { "epoch": 0.52, "grad_norm": 1.618571618506667, "learning_rate": 9.683785005164412e-06, "loss": 0.7339, "step": 5159 }, { "epoch": 0.52, "grad_norm": 1.4804908116816398, "learning_rate": 9.680492214507287e-06, "loss": 0.6456, "step": 5160 }, { "epoch": 0.52, "grad_norm": 1.494140203548696, "learning_rate": 9.67719945852773e-06, "loss": 0.717, "step": 5161 }, { "epoch": 0.53, "grad_norm": 1.6607958643733662, "learning_rate": 9.67390673758313e-06, "loss": 0.7738, "step": 5162 }, { "epoch": 0.53, "grad_norm": 1.4974459021451776, "learning_rate": 9.670614052030849e-06, "loss": 0.8225, "step": 5163 }, { "epoch": 0.53, "grad_norm": 1.7055403350977216, "learning_rate": 9.667321402228261e-06, "loss": 0.7338, "step": 5164 }, { "epoch": 0.53, "grad_norm": 1.5909879761244057, "learning_rate": 9.664028788532738e-06, "loss": 0.7313, "step": 5165 }, { "epoch": 0.53, "grad_norm": 1.5359287895843046, "learning_rate": 9.66073621130163e-06, "loss": 0.8177, "step": 5166 }, { "epoch": 0.53, "grad_norm": 1.503526554781987, "learning_rate": 9.657443670892303e-06, "loss": 0.6308, "step": 5167 }, { "epoch": 0.53, "grad_norm": 1.6412671598656263, "learning_rate": 9.65415116766211e-06, "loss": 0.6373, "step": 5168 }, { "epoch": 0.53, "grad_norm": 1.5027991930993563, "learning_rate": 9.650858701968399e-06, "loss": 0.666, "step": 5169 }, { "epoch": 0.53, "grad_norm": 1.5524957943245377, "learning_rate": 9.647566274168516e-06, "loss": 0.681, "step": 5170 }, { "epoch": 0.53, "grad_norm": 1.4380181512787078, "learning_rate": 9.644273884619802e-06, "loss": 0.7568, "step": 5171 }, { "epoch": 0.53, "grad_norm": 1.6762040745449318, "learning_rate": 9.640981533679593e-06, "loss": 0.6954, "step": 5172 }, { "epoch": 0.53, "grad_norm": 1.592284969495283, "learning_rate": 9.637689221705227e-06, "loss": 0.6853, "step": 5173 }, { "epoch": 0.53, "grad_norm": 1.4948404765759724, "learning_rate": 9.634396949054028e-06, "loss": 0.6828, "step": 5174 }, { "epoch": 0.53, "grad_norm": 1.5866449703821197, "learning_rate": 9.631104716083327e-06, "loss": 0.6721, "step": 5175 }, { "epoch": 0.53, "grad_norm": 1.4904129492677478, "learning_rate": 9.627812523150434e-06, "loss": 0.6681, "step": 5176 }, { "epoch": 0.53, "grad_norm": 1.6687105576410526, "learning_rate": 9.624520370612672e-06, "loss": 0.7614, "step": 5177 }, { "epoch": 0.53, "grad_norm": 1.662810354791064, "learning_rate": 9.621228258827358e-06, "loss": 0.6917, "step": 5178 }, { "epoch": 0.53, "grad_norm": 1.597033737761668, "learning_rate": 9.617936188151786e-06, "loss": 0.6599, "step": 5179 }, { "epoch": 0.53, "grad_norm": 1.59732571462882, "learning_rate": 9.614644158943273e-06, "loss": 0.7304, "step": 5180 }, { "epoch": 0.53, "grad_norm": 1.4910868084236737, "learning_rate": 9.611352171559105e-06, "loss": 0.6728, "step": 5181 }, { "epoch": 0.53, "grad_norm": 1.608341256608166, "learning_rate": 9.60806022635658e-06, "loss": 0.7524, "step": 5182 }, { "epoch": 0.53, "grad_norm": 1.550887296530815, "learning_rate": 9.604768323692993e-06, "loss": 0.6347, "step": 5183 }, { "epoch": 0.53, "grad_norm": 1.7808332484032412, "learning_rate": 9.601476463925619e-06, "loss": 0.8607, "step": 5184 }, { "epoch": 0.53, "grad_norm": 1.556241721735327, "learning_rate": 9.598184647411744e-06, "loss": 0.6297, "step": 5185 }, { "epoch": 0.53, "grad_norm": 1.585860373624063, "learning_rate": 9.594892874508646e-06, "loss": 0.6625, "step": 5186 }, { "epoch": 0.53, "grad_norm": 1.6386386267965907, "learning_rate": 9.591601145573585e-06, "loss": 0.7331, "step": 5187 }, { "epoch": 0.53, "grad_norm": 1.4186691498174373, "learning_rate": 9.58830946096384e-06, "loss": 0.6774, "step": 5188 }, { "epoch": 0.53, "grad_norm": 1.5007796466653354, "learning_rate": 9.58501782103666e-06, "loss": 0.789, "step": 5189 }, { "epoch": 0.53, "grad_norm": 1.6658588544515636, "learning_rate": 9.581726226149304e-06, "loss": 0.6567, "step": 5190 }, { "epoch": 0.53, "grad_norm": 1.4548246999305345, "learning_rate": 9.57843467665903e-06, "loss": 0.7615, "step": 5191 }, { "epoch": 0.53, "grad_norm": 1.487561384764103, "learning_rate": 9.575143172923076e-06, "loss": 0.6926, "step": 5192 }, { "epoch": 0.53, "grad_norm": 1.583406569115701, "learning_rate": 9.571851715298688e-06, "loss": 0.6854, "step": 5193 }, { "epoch": 0.53, "grad_norm": 1.7058952396836893, "learning_rate": 9.568560304143101e-06, "loss": 0.7685, "step": 5194 }, { "epoch": 0.53, "grad_norm": 1.4557818774250706, "learning_rate": 9.565268939813545e-06, "loss": 0.6032, "step": 5195 }, { "epoch": 0.53, "grad_norm": 1.5812853823646884, "learning_rate": 9.561977622667248e-06, "loss": 0.6898, "step": 5196 }, { "epoch": 0.53, "grad_norm": 1.5803499566349357, "learning_rate": 9.55868635306143e-06, "loss": 0.7041, "step": 5197 }, { "epoch": 0.53, "grad_norm": 1.5179266574469583, "learning_rate": 9.555395131353307e-06, "loss": 0.6686, "step": 5198 }, { "epoch": 0.53, "grad_norm": 1.583336365752639, "learning_rate": 9.55210395790009e-06, "loss": 0.7714, "step": 5199 }, { "epoch": 0.53, "grad_norm": 1.7268133639594228, "learning_rate": 9.548812833058984e-06, "loss": 0.7094, "step": 5200 }, { "epoch": 0.53, "grad_norm": 1.5994523133476826, "learning_rate": 9.545521757187188e-06, "loss": 0.6845, "step": 5201 }, { "epoch": 0.53, "grad_norm": 1.5297856828233163, "learning_rate": 9.542230730641898e-06, "loss": 0.689, "step": 5202 }, { "epoch": 0.53, "grad_norm": 1.6519608888054835, "learning_rate": 9.538939753780304e-06, "loss": 0.7541, "step": 5203 }, { "epoch": 0.53, "grad_norm": 1.5538051540501847, "learning_rate": 9.535648826959591e-06, "loss": 0.7378, "step": 5204 }, { "epoch": 0.53, "grad_norm": 1.5560502994532663, "learning_rate": 9.532357950536934e-06, "loss": 0.7586, "step": 5205 }, { "epoch": 0.53, "grad_norm": 1.5562973234346362, "learning_rate": 9.529067124869512e-06, "loss": 0.7769, "step": 5206 }, { "epoch": 0.53, "grad_norm": 1.4718150268643992, "learning_rate": 9.525776350314486e-06, "loss": 0.6668, "step": 5207 }, { "epoch": 0.53, "grad_norm": 1.5758318544958667, "learning_rate": 9.522485627229022e-06, "loss": 0.7326, "step": 5208 }, { "epoch": 0.53, "grad_norm": 1.6178121744079512, "learning_rate": 9.519194955970275e-06, "loss": 0.7513, "step": 5209 }, { "epoch": 0.53, "grad_norm": 1.5169507449367376, "learning_rate": 9.515904336895395e-06, "loss": 0.7525, "step": 5210 }, { "epoch": 0.53, "grad_norm": 1.4632947803615415, "learning_rate": 9.512613770361532e-06, "loss": 0.6774, "step": 5211 }, { "epoch": 0.53, "grad_norm": 1.5564286603593986, "learning_rate": 9.50932325672582e-06, "loss": 0.7545, "step": 5212 }, { "epoch": 0.53, "grad_norm": 1.4108790343983597, "learning_rate": 9.506032796345394e-06, "loss": 0.6761, "step": 5213 }, { "epoch": 0.53, "grad_norm": 1.4547297896795017, "learning_rate": 9.50274238957739e-06, "loss": 0.7189, "step": 5214 }, { "epoch": 0.53, "grad_norm": 1.6395134721925821, "learning_rate": 9.499452036778917e-06, "loss": 0.6663, "step": 5215 }, { "epoch": 0.53, "grad_norm": 1.4346935198396273, "learning_rate": 9.4961617383071e-06, "loss": 0.5815, "step": 5216 }, { "epoch": 0.53, "grad_norm": 1.5006255850588064, "learning_rate": 9.492871494519047e-06, "loss": 0.7279, "step": 5217 }, { "epoch": 0.53, "grad_norm": 1.4022924331040245, "learning_rate": 9.48958130577186e-06, "loss": 0.6135, "step": 5218 }, { "epoch": 0.53, "grad_norm": 1.7138990478244631, "learning_rate": 9.486291172422646e-06, "loss": 0.8144, "step": 5219 }, { "epoch": 0.53, "grad_norm": 1.6216852067769545, "learning_rate": 9.483001094828486e-06, "loss": 0.7125, "step": 5220 }, { "epoch": 0.53, "grad_norm": 1.5811436084819726, "learning_rate": 9.479711073346477e-06, "loss": 0.605, "step": 5221 }, { "epoch": 0.53, "grad_norm": 1.5977073093399896, "learning_rate": 9.476421108333692e-06, "loss": 0.6632, "step": 5222 }, { "epoch": 0.53, "grad_norm": 1.6516495394418902, "learning_rate": 9.473131200147205e-06, "loss": 0.6504, "step": 5223 }, { "epoch": 0.53, "grad_norm": 1.4696334684470354, "learning_rate": 9.469841349144092e-06, "loss": 0.6536, "step": 5224 }, { "epoch": 0.53, "grad_norm": 1.398855086381696, "learning_rate": 9.466551555681406e-06, "loss": 0.7457, "step": 5225 }, { "epoch": 0.53, "grad_norm": 1.7310787645704016, "learning_rate": 9.463261820116207e-06, "loss": 0.7888, "step": 5226 }, { "epoch": 0.53, "grad_norm": 1.4237314465626654, "learning_rate": 9.459972142805546e-06, "loss": 0.8367, "step": 5227 }, { "epoch": 0.53, "grad_norm": 1.7246104531629485, "learning_rate": 9.456682524106462e-06, "loss": 0.7903, "step": 5228 }, { "epoch": 0.53, "grad_norm": 1.5646826279558628, "learning_rate": 9.453392964375997e-06, "loss": 0.7519, "step": 5229 }, { "epoch": 0.53, "grad_norm": 1.5724704042349023, "learning_rate": 9.450103463971173e-06, "loss": 0.6894, "step": 5230 }, { "epoch": 0.53, "grad_norm": 1.563277249429908, "learning_rate": 9.446814023249017e-06, "loss": 0.7203, "step": 5231 }, { "epoch": 0.53, "grad_norm": 1.6000393194900262, "learning_rate": 9.443524642566555e-06, "loss": 0.704, "step": 5232 }, { "epoch": 0.53, "grad_norm": 1.5404602459582803, "learning_rate": 9.440235322280787e-06, "loss": 0.6584, "step": 5233 }, { "epoch": 0.53, "grad_norm": 1.731041243050856, "learning_rate": 9.436946062748723e-06, "loss": 0.6915, "step": 5234 }, { "epoch": 0.53, "grad_norm": 1.4755425913249085, "learning_rate": 9.433656864327355e-06, "loss": 0.7044, "step": 5235 }, { "epoch": 0.53, "grad_norm": 1.630038704091692, "learning_rate": 9.430367727373677e-06, "loss": 0.6958, "step": 5236 }, { "epoch": 0.53, "grad_norm": 1.5067240174520171, "learning_rate": 9.42707865224468e-06, "loss": 0.6853, "step": 5237 }, { "epoch": 0.53, "grad_norm": 1.4613411878535234, "learning_rate": 9.42378963929733e-06, "loss": 0.7166, "step": 5238 }, { "epoch": 0.53, "grad_norm": 1.4578198906412034, "learning_rate": 9.42050068888861e-06, "loss": 0.6512, "step": 5239 }, { "epoch": 0.53, "grad_norm": 1.7000464098217933, "learning_rate": 9.417211801375473e-06, "loss": 0.8523, "step": 5240 }, { "epoch": 0.53, "grad_norm": 1.4453794950143881, "learning_rate": 9.41392297711488e-06, "loss": 0.7007, "step": 5241 }, { "epoch": 0.53, "grad_norm": 1.7221610086703045, "learning_rate": 9.410634216463788e-06, "loss": 0.7637, "step": 5242 }, { "epoch": 0.53, "grad_norm": 1.523370062055753, "learning_rate": 9.407345519779129e-06, "loss": 0.6814, "step": 5243 }, { "epoch": 0.53, "grad_norm": 1.608673973036898, "learning_rate": 9.404056887417847e-06, "loss": 0.65, "step": 5244 }, { "epoch": 0.53, "grad_norm": 1.6451242028947322, "learning_rate": 9.400768319736874e-06, "loss": 0.694, "step": 5245 }, { "epoch": 0.53, "grad_norm": 1.6038945321260987, "learning_rate": 9.397479817093126e-06, "loss": 0.6588, "step": 5246 }, { "epoch": 0.53, "grad_norm": 1.6994527008054192, "learning_rate": 9.394191379843524e-06, "loss": 0.8255, "step": 5247 }, { "epoch": 0.53, "grad_norm": 1.681999020364834, "learning_rate": 9.390903008344972e-06, "loss": 0.6967, "step": 5248 }, { "epoch": 0.53, "grad_norm": 1.5577270594188073, "learning_rate": 9.387614702954372e-06, "loss": 0.6311, "step": 5249 }, { "epoch": 0.53, "grad_norm": 1.6413677367230959, "learning_rate": 9.384326464028622e-06, "loss": 0.7477, "step": 5250 }, { "epoch": 0.53, "grad_norm": 1.5642001057958752, "learning_rate": 9.381038291924607e-06, "loss": 0.733, "step": 5251 }, { "epoch": 0.53, "grad_norm": 1.505431451576894, "learning_rate": 9.377750186999203e-06, "loss": 0.6696, "step": 5252 }, { "epoch": 0.53, "grad_norm": 1.5609368395988965, "learning_rate": 9.374462149609286e-06, "loss": 0.7326, "step": 5253 }, { "epoch": 0.53, "grad_norm": 1.4406046445318315, "learning_rate": 9.371174180111722e-06, "loss": 0.6684, "step": 5254 }, { "epoch": 0.53, "grad_norm": 1.6830633570580327, "learning_rate": 9.367886278863366e-06, "loss": 0.7631, "step": 5255 }, { "epoch": 0.53, "grad_norm": 1.4375200749846495, "learning_rate": 9.364598446221068e-06, "loss": 0.7355, "step": 5256 }, { "epoch": 0.53, "grad_norm": 1.5440091833277783, "learning_rate": 9.361310682541675e-06, "loss": 0.7658, "step": 5257 }, { "epoch": 0.53, "grad_norm": 1.352238801113515, "learning_rate": 9.358022988182017e-06, "loss": 0.6368, "step": 5258 }, { "epoch": 0.53, "grad_norm": 1.5949685287350601, "learning_rate": 9.354735363498925e-06, "loss": 0.709, "step": 5259 }, { "epoch": 0.54, "grad_norm": 1.4948494174337732, "learning_rate": 9.351447808849219e-06, "loss": 0.6661, "step": 5260 }, { "epoch": 0.54, "grad_norm": 1.624657505673658, "learning_rate": 9.348160324589709e-06, "loss": 0.7397, "step": 5261 }, { "epoch": 0.54, "grad_norm": 1.5507950151661274, "learning_rate": 9.344872911077206e-06, "loss": 0.7714, "step": 5262 }, { "epoch": 0.54, "grad_norm": 1.5574701634407244, "learning_rate": 9.341585568668501e-06, "loss": 0.7168, "step": 5263 }, { "epoch": 0.54, "grad_norm": 1.4886619135598378, "learning_rate": 9.338298297720385e-06, "loss": 0.7643, "step": 5264 }, { "epoch": 0.54, "grad_norm": 1.48937824246935, "learning_rate": 9.335011098589644e-06, "loss": 0.7108, "step": 5265 }, { "epoch": 0.54, "grad_norm": 1.5537164166555901, "learning_rate": 9.331723971633046e-06, "loss": 0.6894, "step": 5266 }, { "epoch": 0.54, "grad_norm": 1.4409876623283242, "learning_rate": 9.328436917207359e-06, "loss": 0.6557, "step": 5267 }, { "epoch": 0.54, "grad_norm": 1.3906556675122819, "learning_rate": 9.325149935669347e-06, "loss": 0.6577, "step": 5268 }, { "epoch": 0.54, "grad_norm": 1.4896382181245778, "learning_rate": 9.321863027375753e-06, "loss": 0.6772, "step": 5269 }, { "epoch": 0.54, "grad_norm": 1.5065184254475665, "learning_rate": 9.318576192683326e-06, "loss": 0.6624, "step": 5270 }, { "epoch": 0.54, "grad_norm": 1.6122106573181099, "learning_rate": 9.315289431948794e-06, "loss": 0.7075, "step": 5271 }, { "epoch": 0.54, "grad_norm": 1.6082453926127243, "learning_rate": 9.312002745528885e-06, "loss": 0.7192, "step": 5272 }, { "epoch": 0.54, "grad_norm": 1.5519173002255817, "learning_rate": 9.308716133780323e-06, "loss": 0.7058, "step": 5273 }, { "epoch": 0.54, "grad_norm": 1.5511865568301473, "learning_rate": 9.30542959705981e-06, "loss": 0.6688, "step": 5274 }, { "epoch": 0.54, "grad_norm": 1.6629909624506398, "learning_rate": 9.302143135724058e-06, "loss": 0.722, "step": 5275 }, { "epoch": 0.54, "grad_norm": 1.554510834086878, "learning_rate": 9.29885675012975e-06, "loss": 0.7647, "step": 5276 }, { "epoch": 0.54, "grad_norm": 1.4276555624226124, "learning_rate": 9.295570440633577e-06, "loss": 0.6158, "step": 5277 }, { "epoch": 0.54, "grad_norm": 1.741157099895649, "learning_rate": 9.29228420759222e-06, "loss": 0.6786, "step": 5278 }, { "epoch": 0.54, "grad_norm": 1.619832980822339, "learning_rate": 9.288998051362343e-06, "loss": 0.7473, "step": 5279 }, { "epoch": 0.54, "grad_norm": 1.3846762393519525, "learning_rate": 9.285711972300607e-06, "loss": 0.5589, "step": 5280 }, { "epoch": 0.54, "grad_norm": 1.494543803188233, "learning_rate": 9.28242597076367e-06, "loss": 0.6714, "step": 5281 }, { "epoch": 0.54, "grad_norm": 1.54481372971172, "learning_rate": 9.27914004710817e-06, "loss": 0.716, "step": 5282 }, { "epoch": 0.54, "grad_norm": 1.5701962361404953, "learning_rate": 9.275854201690748e-06, "loss": 0.7484, "step": 5283 }, { "epoch": 0.54, "grad_norm": 1.564642596041506, "learning_rate": 9.272568434868024e-06, "loss": 0.6589, "step": 5284 }, { "epoch": 0.54, "grad_norm": 1.5127112390691917, "learning_rate": 9.26928274699662e-06, "loss": 0.692, "step": 5285 }, { "epoch": 0.54, "grad_norm": 1.6438483898007703, "learning_rate": 9.265997138433152e-06, "loss": 0.7135, "step": 5286 }, { "epoch": 0.54, "grad_norm": 1.418691263372242, "learning_rate": 9.262711609534211e-06, "loss": 0.6108, "step": 5287 }, { "epoch": 0.54, "grad_norm": 1.4353502362260733, "learning_rate": 9.2594261606564e-06, "loss": 0.6181, "step": 5288 }, { "epoch": 0.54, "grad_norm": 1.5773405361749253, "learning_rate": 9.256140792156292e-06, "loss": 0.6366, "step": 5289 }, { "epoch": 0.54, "grad_norm": 1.740700222032807, "learning_rate": 9.25285550439047e-06, "loss": 0.713, "step": 5290 }, { "epoch": 0.54, "grad_norm": 1.5815623965705798, "learning_rate": 9.249570297715503e-06, "loss": 0.6745, "step": 5291 }, { "epoch": 0.54, "grad_norm": 1.7360353904593255, "learning_rate": 9.24628517248794e-06, "loss": 0.7386, "step": 5292 }, { "epoch": 0.54, "grad_norm": 1.600154153236615, "learning_rate": 9.243000129064339e-06, "loss": 0.7301, "step": 5293 }, { "epoch": 0.54, "grad_norm": 1.6322114591649122, "learning_rate": 9.239715167801232e-06, "loss": 0.6683, "step": 5294 }, { "epoch": 0.54, "grad_norm": 1.5597922373861357, "learning_rate": 9.236430289055154e-06, "loss": 0.7467, "step": 5295 }, { "epoch": 0.54, "grad_norm": 1.5373445772916388, "learning_rate": 9.23314549318263e-06, "loss": 0.7699, "step": 5296 }, { "epoch": 0.54, "grad_norm": 1.480216056117163, "learning_rate": 9.229860780540168e-06, "loss": 0.7339, "step": 5297 }, { "epoch": 0.54, "grad_norm": 1.6150409063359652, "learning_rate": 9.226576151484274e-06, "loss": 0.7056, "step": 5298 }, { "epoch": 0.54, "grad_norm": 1.5014317259723393, "learning_rate": 9.223291606371448e-06, "loss": 0.7125, "step": 5299 }, { "epoch": 0.54, "grad_norm": 1.4923615606329292, "learning_rate": 9.220007145558169e-06, "loss": 0.6393, "step": 5300 }, { "epoch": 0.54, "grad_norm": 1.5687143048914496, "learning_rate": 9.216722769400917e-06, "loss": 0.6065, "step": 5301 }, { "epoch": 0.54, "grad_norm": 1.5118993852348217, "learning_rate": 9.213438478256159e-06, "loss": 0.6034, "step": 5302 }, { "epoch": 0.54, "grad_norm": 1.4038497103453222, "learning_rate": 9.210154272480353e-06, "loss": 0.7255, "step": 5303 }, { "epoch": 0.54, "grad_norm": 1.7505356088247321, "learning_rate": 9.20687015242995e-06, "loss": 0.6883, "step": 5304 }, { "epoch": 0.54, "grad_norm": 1.552647846292909, "learning_rate": 9.203586118461389e-06, "loss": 0.6346, "step": 5305 }, { "epoch": 0.54, "grad_norm": 1.7016995825452594, "learning_rate": 9.2003021709311e-06, "loss": 0.6711, "step": 5306 }, { "epoch": 0.54, "grad_norm": 1.7047703184824687, "learning_rate": 9.197018310195503e-06, "loss": 0.7431, "step": 5307 }, { "epoch": 0.54, "grad_norm": 1.6009774451727634, "learning_rate": 9.193734536611013e-06, "loss": 0.753, "step": 5308 }, { "epoch": 0.54, "grad_norm": 1.5727984936944126, "learning_rate": 9.19045085053403e-06, "loss": 0.6955, "step": 5309 }, { "epoch": 0.54, "grad_norm": 1.5171645830514315, "learning_rate": 9.18716725232095e-06, "loss": 0.6256, "step": 5310 }, { "epoch": 0.54, "grad_norm": 1.5013686068549332, "learning_rate": 9.18388374232815e-06, "loss": 0.7419, "step": 5311 }, { "epoch": 0.54, "grad_norm": 1.5452514446664825, "learning_rate": 9.180600320912006e-06, "loss": 0.8011, "step": 5312 }, { "epoch": 0.54, "grad_norm": 1.6779607379171213, "learning_rate": 9.177316988428885e-06, "loss": 0.7006, "step": 5313 }, { "epoch": 0.54, "grad_norm": 1.5460751326931226, "learning_rate": 9.174033745235141e-06, "loss": 0.726, "step": 5314 }, { "epoch": 0.54, "grad_norm": 1.4406243469055928, "learning_rate": 9.170750591687114e-06, "loss": 0.7465, "step": 5315 }, { "epoch": 0.54, "grad_norm": 1.6337337025110672, "learning_rate": 9.167467528141144e-06, "loss": 0.7255, "step": 5316 }, { "epoch": 0.54, "grad_norm": 1.5467283757292964, "learning_rate": 9.164184554953557e-06, "loss": 0.7321, "step": 5317 }, { "epoch": 0.54, "grad_norm": 1.682046371729968, "learning_rate": 9.160901672480662e-06, "loss": 0.6444, "step": 5318 }, { "epoch": 0.54, "grad_norm": 1.488118046274293, "learning_rate": 9.157618881078772e-06, "loss": 0.713, "step": 5319 }, { "epoch": 0.54, "grad_norm": 1.7079121163396311, "learning_rate": 9.154336181104175e-06, "loss": 0.6949, "step": 5320 }, { "epoch": 0.54, "grad_norm": 1.5068370655989956, "learning_rate": 9.151053572913163e-06, "loss": 0.7095, "step": 5321 }, { "epoch": 0.54, "grad_norm": 1.7707562315922247, "learning_rate": 9.147771056862011e-06, "loss": 0.7966, "step": 5322 }, { "epoch": 0.54, "grad_norm": 1.6133590922465229, "learning_rate": 9.144488633306981e-06, "loss": 0.6659, "step": 5323 }, { "epoch": 0.54, "grad_norm": 1.4545510638629, "learning_rate": 9.141206302604336e-06, "loss": 0.7483, "step": 5324 }, { "epoch": 0.54, "grad_norm": 1.655522474567456, "learning_rate": 9.13792406511031e-06, "loss": 0.76, "step": 5325 }, { "epoch": 0.54, "grad_norm": 1.3576656692724869, "learning_rate": 9.134641921181147e-06, "loss": 0.6448, "step": 5326 }, { "epoch": 0.54, "grad_norm": 1.7787458227500046, "learning_rate": 9.131359871173074e-06, "loss": 0.7244, "step": 5327 }, { "epoch": 0.54, "grad_norm": 1.6178083657835598, "learning_rate": 9.1280779154423e-06, "loss": 0.6684, "step": 5328 }, { "epoch": 0.54, "grad_norm": 1.7010524693762994, "learning_rate": 9.124796054345036e-06, "loss": 0.7417, "step": 5329 }, { "epoch": 0.54, "grad_norm": 1.5360912007131207, "learning_rate": 9.12151428823747e-06, "loss": 0.7682, "step": 5330 }, { "epoch": 0.54, "grad_norm": 1.7481181530838306, "learning_rate": 9.118232617475791e-06, "loss": 0.7718, "step": 5331 }, { "epoch": 0.54, "grad_norm": 1.5760176359497586, "learning_rate": 9.114951042416175e-06, "loss": 0.6897, "step": 5332 }, { "epoch": 0.54, "grad_norm": 1.4532079356845804, "learning_rate": 9.11166956341478e-06, "loss": 0.7062, "step": 5333 }, { "epoch": 0.54, "grad_norm": 1.5295195035357474, "learning_rate": 9.10838818082776e-06, "loss": 0.6727, "step": 5334 }, { "epoch": 0.54, "grad_norm": 1.502411837379029, "learning_rate": 9.105106895011263e-06, "loss": 0.6246, "step": 5335 }, { "epoch": 0.54, "grad_norm": 1.6815569575078204, "learning_rate": 9.101825706321417e-06, "loss": 0.6468, "step": 5336 }, { "epoch": 0.54, "grad_norm": 1.6571536250194325, "learning_rate": 9.098544615114347e-06, "loss": 0.7936, "step": 5337 }, { "epoch": 0.54, "grad_norm": 1.5541916532325835, "learning_rate": 9.095263621746159e-06, "loss": 0.7494, "step": 5338 }, { "epoch": 0.54, "grad_norm": 1.5773470344567357, "learning_rate": 9.091982726572956e-06, "loss": 0.7626, "step": 5339 }, { "epoch": 0.54, "grad_norm": 1.7006352160254827, "learning_rate": 9.088701929950831e-06, "loss": 0.6884, "step": 5340 }, { "epoch": 0.54, "grad_norm": 1.5057538412071996, "learning_rate": 9.085421232235857e-06, "loss": 0.5913, "step": 5341 }, { "epoch": 0.54, "grad_norm": 1.681665813385068, "learning_rate": 9.08214063378411e-06, "loss": 0.8246, "step": 5342 }, { "epoch": 0.54, "grad_norm": 1.616750287402683, "learning_rate": 9.07886013495164e-06, "loss": 0.6381, "step": 5343 }, { "epoch": 0.54, "grad_norm": 1.5931263201795647, "learning_rate": 9.075579736094497e-06, "loss": 0.6813, "step": 5344 }, { "epoch": 0.54, "grad_norm": 1.5711422910918862, "learning_rate": 9.072299437568722e-06, "loss": 0.7396, "step": 5345 }, { "epoch": 0.54, "grad_norm": 1.4722021442670385, "learning_rate": 9.069019239730329e-06, "loss": 0.7187, "step": 5346 }, { "epoch": 0.54, "grad_norm": 1.684898911516207, "learning_rate": 9.065739142935345e-06, "loss": 0.784, "step": 5347 }, { "epoch": 0.54, "grad_norm": 1.5649833582235908, "learning_rate": 9.062459147539761e-06, "loss": 0.7162, "step": 5348 }, { "epoch": 0.54, "grad_norm": 1.70568458464764, "learning_rate": 9.059179253899577e-06, "loss": 0.6717, "step": 5349 }, { "epoch": 0.54, "grad_norm": 1.516662532354538, "learning_rate": 9.055899462370776e-06, "loss": 0.6827, "step": 5350 }, { "epoch": 0.54, "grad_norm": 1.527811050777723, "learning_rate": 9.052619773309318e-06, "loss": 0.715, "step": 5351 }, { "epoch": 0.54, "grad_norm": 1.394315245520379, "learning_rate": 9.04934018707117e-06, "loss": 0.6611, "step": 5352 }, { "epoch": 0.54, "grad_norm": 1.669341562664938, "learning_rate": 9.046060704012281e-06, "loss": 0.7238, "step": 5353 }, { "epoch": 0.54, "grad_norm": 1.4150418929513942, "learning_rate": 9.042781324488582e-06, "loss": 0.6455, "step": 5354 }, { "epoch": 0.54, "grad_norm": 1.7234502937842522, "learning_rate": 9.039502048856002e-06, "loss": 0.7208, "step": 5355 }, { "epoch": 0.54, "grad_norm": 1.4593406939103721, "learning_rate": 9.036222877470455e-06, "loss": 0.693, "step": 5356 }, { "epoch": 0.54, "grad_norm": 1.7215012614015754, "learning_rate": 9.032943810687841e-06, "loss": 0.792, "step": 5357 }, { "epoch": 0.55, "grad_norm": 1.6688124560243105, "learning_rate": 9.029664848864055e-06, "loss": 0.6758, "step": 5358 }, { "epoch": 0.55, "grad_norm": 1.8236526409515186, "learning_rate": 9.026385992354974e-06, "loss": 0.7684, "step": 5359 }, { "epoch": 0.55, "grad_norm": 1.633138913316259, "learning_rate": 9.023107241516469e-06, "loss": 0.7151, "step": 5360 }, { "epoch": 0.55, "grad_norm": 1.5803702477774828, "learning_rate": 9.019828596704394e-06, "loss": 0.8013, "step": 5361 }, { "epoch": 0.55, "grad_norm": 1.669277721960774, "learning_rate": 9.016550058274599e-06, "loss": 0.7613, "step": 5362 }, { "epoch": 0.55, "grad_norm": 1.5690020797998012, "learning_rate": 9.013271626582915e-06, "loss": 0.7337, "step": 5363 }, { "epoch": 0.55, "grad_norm": 1.41272139408782, "learning_rate": 9.009993301985167e-06, "loss": 0.6609, "step": 5364 }, { "epoch": 0.55, "grad_norm": 1.468996080511503, "learning_rate": 9.006715084837162e-06, "loss": 0.5811, "step": 5365 }, { "epoch": 0.55, "grad_norm": 1.5838321250978402, "learning_rate": 9.003436975494699e-06, "loss": 0.6922, "step": 5366 }, { "epoch": 0.55, "grad_norm": 1.4850631249125565, "learning_rate": 9.00015897431357e-06, "loss": 0.6375, "step": 5367 }, { "epoch": 0.55, "grad_norm": 1.529836227289035, "learning_rate": 8.996881081649551e-06, "loss": 0.629, "step": 5368 }, { "epoch": 0.55, "grad_norm": 1.5638270705319934, "learning_rate": 8.9936032978584e-06, "loss": 0.636, "step": 5369 }, { "epoch": 0.55, "grad_norm": 1.4665776471013017, "learning_rate": 8.990325623295877e-06, "loss": 0.7205, "step": 5370 }, { "epoch": 0.55, "grad_norm": 1.651673457610251, "learning_rate": 8.987048058317714e-06, "loss": 0.7849, "step": 5371 }, { "epoch": 0.55, "grad_norm": 1.7829197596581683, "learning_rate": 8.983770603279642e-06, "loss": 0.751, "step": 5372 }, { "epoch": 0.55, "grad_norm": 1.8456608183651604, "learning_rate": 8.980493258537384e-06, "loss": 0.7839, "step": 5373 }, { "epoch": 0.55, "grad_norm": 1.703926959579352, "learning_rate": 8.977216024446636e-06, "loss": 0.8251, "step": 5374 }, { "epoch": 0.55, "grad_norm": 1.530470230657618, "learning_rate": 8.973938901363094e-06, "loss": 0.6577, "step": 5375 }, { "epoch": 0.55, "grad_norm": 1.5809578736817425, "learning_rate": 8.970661889642442e-06, "loss": 0.7201, "step": 5376 }, { "epoch": 0.55, "grad_norm": 1.4271102652547045, "learning_rate": 8.96738498964034e-06, "loss": 0.6792, "step": 5377 }, { "epoch": 0.55, "grad_norm": 1.5426396797757143, "learning_rate": 8.964108201712455e-06, "loss": 0.6958, "step": 5378 }, { "epoch": 0.55, "grad_norm": 1.8103751233562824, "learning_rate": 8.96083152621442e-06, "loss": 0.8033, "step": 5379 }, { "epoch": 0.55, "grad_norm": 1.5761100242991692, "learning_rate": 8.957554963501873e-06, "loss": 0.7647, "step": 5380 }, { "epoch": 0.55, "grad_norm": 1.5905942042313983, "learning_rate": 8.954278513930434e-06, "loss": 0.7884, "step": 5381 }, { "epoch": 0.55, "grad_norm": 1.5539806558481204, "learning_rate": 8.951002177855708e-06, "loss": 0.7533, "step": 5382 }, { "epoch": 0.55, "grad_norm": 1.6703501138543935, "learning_rate": 8.947725955633294e-06, "loss": 0.749, "step": 5383 }, { "epoch": 0.55, "grad_norm": 1.5119616474184723, "learning_rate": 8.944449847618768e-06, "loss": 0.6985, "step": 5384 }, { "epoch": 0.55, "grad_norm": 1.6205786635233608, "learning_rate": 8.941173854167703e-06, "loss": 0.7124, "step": 5385 }, { "epoch": 0.55, "grad_norm": 1.5975066009074679, "learning_rate": 8.937897975635664e-06, "loss": 0.6692, "step": 5386 }, { "epoch": 0.55, "grad_norm": 1.636521080970513, "learning_rate": 8.934622212378185e-06, "loss": 0.6629, "step": 5387 }, { "epoch": 0.55, "grad_norm": 1.5190343789945384, "learning_rate": 8.931346564750808e-06, "loss": 0.651, "step": 5388 }, { "epoch": 0.55, "grad_norm": 1.5957467566016765, "learning_rate": 8.928071033109047e-06, "loss": 0.7199, "step": 5389 }, { "epoch": 0.55, "grad_norm": 1.4624387519553008, "learning_rate": 8.924795617808411e-06, "loss": 0.6886, "step": 5390 }, { "epoch": 0.55, "grad_norm": 1.4511607366742325, "learning_rate": 8.921520319204399e-06, "loss": 0.733, "step": 5391 }, { "epoch": 0.55, "grad_norm": 1.4985675491539114, "learning_rate": 8.91824513765249e-06, "loss": 0.7086, "step": 5392 }, { "epoch": 0.55, "grad_norm": 1.5089126612633614, "learning_rate": 8.914970073508152e-06, "loss": 0.7444, "step": 5393 }, { "epoch": 0.55, "grad_norm": 1.628177603423058, "learning_rate": 8.91169512712685e-06, "loss": 0.7051, "step": 5394 }, { "epoch": 0.55, "grad_norm": 1.5772919443785387, "learning_rate": 8.908420298864018e-06, "loss": 0.7317, "step": 5395 }, { "epoch": 0.55, "grad_norm": 1.4723112256257591, "learning_rate": 8.905145589075098e-06, "loss": 0.7227, "step": 5396 }, { "epoch": 0.55, "grad_norm": 1.5801448338705688, "learning_rate": 8.901870998115495e-06, "loss": 0.6773, "step": 5397 }, { "epoch": 0.55, "grad_norm": 1.5860986082201733, "learning_rate": 8.898596526340624e-06, "loss": 0.7343, "step": 5398 }, { "epoch": 0.55, "grad_norm": 1.6039832133572458, "learning_rate": 8.895322174105882e-06, "loss": 0.805, "step": 5399 }, { "epoch": 0.55, "grad_norm": 1.6155249124855937, "learning_rate": 8.892047941766636e-06, "loss": 0.7111, "step": 5400 }, { "epoch": 0.55, "grad_norm": 1.6067471312551125, "learning_rate": 8.888773829678265e-06, "loss": 0.7244, "step": 5401 }, { "epoch": 0.55, "grad_norm": 1.619332799189476, "learning_rate": 8.885499838196114e-06, "loss": 0.765, "step": 5402 }, { "epoch": 0.55, "grad_norm": 1.6699781838918835, "learning_rate": 8.882225967675526e-06, "loss": 0.7806, "step": 5403 }, { "epoch": 0.55, "grad_norm": 1.6180503220313573, "learning_rate": 8.878952218471833e-06, "loss": 0.7772, "step": 5404 }, { "epoch": 0.55, "grad_norm": 1.7606649996624608, "learning_rate": 8.87567859094034e-06, "loss": 0.7342, "step": 5405 }, { "epoch": 0.55, "grad_norm": 1.6189646431863196, "learning_rate": 8.872405085436356e-06, "loss": 0.6012, "step": 5406 }, { "epoch": 0.55, "grad_norm": 1.6365844250183907, "learning_rate": 8.869131702315169e-06, "loss": 0.7184, "step": 5407 }, { "epoch": 0.55, "grad_norm": 1.5492476290084674, "learning_rate": 8.865858441932047e-06, "loss": 0.7093, "step": 5408 }, { "epoch": 0.55, "grad_norm": 1.5731924035028098, "learning_rate": 8.862585304642257e-06, "loss": 0.7088, "step": 5409 }, { "epoch": 0.55, "grad_norm": 1.5326160778305695, "learning_rate": 8.859312290801044e-06, "loss": 0.6615, "step": 5410 }, { "epoch": 0.55, "grad_norm": 1.685641814822192, "learning_rate": 8.856039400763644e-06, "loss": 0.6843, "step": 5411 }, { "epoch": 0.55, "grad_norm": 1.6687141835760049, "learning_rate": 8.852766634885277e-06, "loss": 0.7037, "step": 5412 }, { "epoch": 0.55, "grad_norm": 1.5741605632580782, "learning_rate": 8.849493993521153e-06, "loss": 0.6725, "step": 5413 }, { "epoch": 0.55, "grad_norm": 1.5848257648940565, "learning_rate": 8.846221477026463e-06, "loss": 0.7393, "step": 5414 }, { "epoch": 0.55, "grad_norm": 1.4946988393052152, "learning_rate": 8.842949085756389e-06, "loss": 0.7276, "step": 5415 }, { "epoch": 0.55, "grad_norm": 1.633398789443463, "learning_rate": 8.839676820066095e-06, "loss": 0.7075, "step": 5416 }, { "epoch": 0.55, "grad_norm": 1.5064528057849553, "learning_rate": 8.836404680310739e-06, "loss": 0.7695, "step": 5417 }, { "epoch": 0.55, "grad_norm": 1.6169451224176257, "learning_rate": 8.833132666845459e-06, "loss": 0.7448, "step": 5418 }, { "epoch": 0.55, "grad_norm": 1.5249218319990228, "learning_rate": 8.82986078002538e-06, "loss": 0.6317, "step": 5419 }, { "epoch": 0.55, "grad_norm": 1.6626818643466355, "learning_rate": 8.826589020205611e-06, "loss": 0.6781, "step": 5420 }, { "epoch": 0.55, "grad_norm": 1.5977691718018296, "learning_rate": 8.823317387741258e-06, "loss": 0.7576, "step": 5421 }, { "epoch": 0.55, "grad_norm": 1.5462585411188319, "learning_rate": 8.8200458829874e-06, "loss": 0.8659, "step": 5422 }, { "epoch": 0.55, "grad_norm": 1.5190555713203076, "learning_rate": 8.816774506299106e-06, "loss": 0.6913, "step": 5423 }, { "epoch": 0.55, "grad_norm": 1.6269163363674695, "learning_rate": 8.813503258031443e-06, "loss": 0.7462, "step": 5424 }, { "epoch": 0.55, "grad_norm": 1.882660965495172, "learning_rate": 8.81023213853944e-06, "loss": 0.8512, "step": 5425 }, { "epoch": 0.55, "grad_norm": 1.5943514236393186, "learning_rate": 8.806961148178133e-06, "loss": 0.678, "step": 5426 }, { "epoch": 0.55, "grad_norm": 1.5669447477330756, "learning_rate": 8.803690287302542e-06, "loss": 0.668, "step": 5427 }, { "epoch": 0.55, "grad_norm": 1.6535602382835375, "learning_rate": 8.800419556267655e-06, "loss": 0.7596, "step": 5428 }, { "epoch": 0.55, "grad_norm": 1.6308465952597497, "learning_rate": 8.797148955428467e-06, "loss": 0.7934, "step": 5429 }, { "epoch": 0.55, "grad_norm": 1.408309632362028, "learning_rate": 8.793878485139954e-06, "loss": 0.6747, "step": 5430 }, { "epoch": 0.55, "grad_norm": 1.6085099512031618, "learning_rate": 8.790608145757066e-06, "loss": 0.5726, "step": 5431 }, { "epoch": 0.55, "grad_norm": 1.5522606137544397, "learning_rate": 8.787337937634755e-06, "loss": 0.7439, "step": 5432 }, { "epoch": 0.55, "grad_norm": 1.5890313434061458, "learning_rate": 8.784067861127942e-06, "loss": 0.7395, "step": 5433 }, { "epoch": 0.55, "grad_norm": 1.540243937101572, "learning_rate": 8.780797916591548e-06, "loss": 0.708, "step": 5434 }, { "epoch": 0.55, "grad_norm": 1.6690409088302076, "learning_rate": 8.777528104380478e-06, "loss": 0.7222, "step": 5435 }, { "epoch": 0.55, "grad_norm": 1.5706857558719922, "learning_rate": 8.77425842484961e-06, "loss": 0.786, "step": 5436 }, { "epoch": 0.55, "grad_norm": 1.4738961397397348, "learning_rate": 8.770988878353827e-06, "loss": 0.6863, "step": 5437 }, { "epoch": 0.55, "grad_norm": 1.49909854563047, "learning_rate": 8.767719465247979e-06, "loss": 0.6873, "step": 5438 }, { "epoch": 0.55, "grad_norm": 1.7050766612344421, "learning_rate": 8.76445018588691e-06, "loss": 0.7244, "step": 5439 }, { "epoch": 0.55, "grad_norm": 1.5630758956722233, "learning_rate": 8.761181040625457e-06, "loss": 0.6779, "step": 5440 }, { "epoch": 0.55, "grad_norm": 1.5859976116499224, "learning_rate": 8.757912029818424e-06, "loss": 0.7304, "step": 5441 }, { "epoch": 0.55, "grad_norm": 1.6690118530119127, "learning_rate": 8.75464315382062e-06, "loss": 0.6904, "step": 5442 }, { "epoch": 0.55, "grad_norm": 1.576408966950522, "learning_rate": 8.751374412986822e-06, "loss": 0.8189, "step": 5443 }, { "epoch": 0.55, "grad_norm": 1.6019837359423332, "learning_rate": 8.748105807671806e-06, "loss": 0.7219, "step": 5444 }, { "epoch": 0.55, "grad_norm": 1.8202596783529816, "learning_rate": 8.744837338230331e-06, "loss": 0.8334, "step": 5445 }, { "epoch": 0.55, "grad_norm": 1.5460218928791967, "learning_rate": 8.74156900501713e-06, "loss": 0.8658, "step": 5446 }, { "epoch": 0.55, "grad_norm": 1.6092606350579812, "learning_rate": 8.738300808386934e-06, "loss": 0.7317, "step": 5447 }, { "epoch": 0.55, "grad_norm": 1.6053711156944777, "learning_rate": 8.73503274869446e-06, "loss": 0.6881, "step": 5448 }, { "epoch": 0.55, "grad_norm": 1.6755703755220746, "learning_rate": 8.731764826294394e-06, "loss": 0.7608, "step": 5449 }, { "epoch": 0.55, "grad_norm": 1.61294871243576, "learning_rate": 8.728497041541426e-06, "loss": 0.6088, "step": 5450 }, { "epoch": 0.55, "grad_norm": 1.4898392329846786, "learning_rate": 8.725229394790217e-06, "loss": 0.6994, "step": 5451 }, { "epoch": 0.55, "grad_norm": 1.5406794279721598, "learning_rate": 8.721961886395423e-06, "loss": 0.6649, "step": 5452 }, { "epoch": 0.55, "grad_norm": 1.5276423066659235, "learning_rate": 8.718694516711684e-06, "loss": 0.7596, "step": 5453 }, { "epoch": 0.55, "grad_norm": 1.555067050858641, "learning_rate": 8.715427286093611e-06, "loss": 0.6365, "step": 5454 }, { "epoch": 0.55, "grad_norm": 1.4073917884142084, "learning_rate": 8.712160194895825e-06, "loss": 0.5868, "step": 5455 }, { "epoch": 0.55, "grad_norm": 1.5977461991934643, "learning_rate": 8.708893243472905e-06, "loss": 0.6963, "step": 5456 }, { "epoch": 0.56, "grad_norm": 1.6408899699791568, "learning_rate": 8.705626432179432e-06, "loss": 0.6912, "step": 5457 }, { "epoch": 0.56, "grad_norm": 1.5462815026322818, "learning_rate": 8.702359761369972e-06, "loss": 0.7436, "step": 5458 }, { "epoch": 0.56, "grad_norm": 1.5104521935408541, "learning_rate": 8.699093231399064e-06, "loss": 0.726, "step": 5459 }, { "epoch": 0.56, "grad_norm": 1.5145792617441518, "learning_rate": 8.695826842621244e-06, "loss": 0.6668, "step": 5460 }, { "epoch": 0.56, "grad_norm": 1.5567167872130345, "learning_rate": 8.692560595391023e-06, "loss": 0.6974, "step": 5461 }, { "epoch": 0.56, "grad_norm": 1.535631369442896, "learning_rate": 8.689294490062906e-06, "loss": 0.7519, "step": 5462 }, { "epoch": 0.56, "grad_norm": 1.525000280601043, "learning_rate": 8.686028526991373e-06, "loss": 0.6805, "step": 5463 }, { "epoch": 0.56, "grad_norm": 1.3676512965755294, "learning_rate": 8.682762706530897e-06, "loss": 0.6684, "step": 5464 }, { "epoch": 0.56, "grad_norm": 1.4824849114578038, "learning_rate": 8.679497029035926e-06, "loss": 0.7436, "step": 5465 }, { "epoch": 0.56, "grad_norm": 1.6201369875081661, "learning_rate": 8.676231494860907e-06, "loss": 0.8019, "step": 5466 }, { "epoch": 0.56, "grad_norm": 1.5794700859428037, "learning_rate": 8.672966104360255e-06, "loss": 0.6173, "step": 5467 }, { "epoch": 0.56, "grad_norm": 1.588515557807471, "learning_rate": 8.669700857888382e-06, "loss": 0.7549, "step": 5468 }, { "epoch": 0.56, "grad_norm": 1.4362244278634058, "learning_rate": 8.666435755799675e-06, "loss": 0.6177, "step": 5469 }, { "epoch": 0.56, "grad_norm": 1.4553152841062662, "learning_rate": 8.663170798448511e-06, "loss": 0.886, "step": 5470 }, { "epoch": 0.56, "grad_norm": 1.5276721104428754, "learning_rate": 8.659905986189254e-06, "loss": 0.6391, "step": 5471 }, { "epoch": 0.56, "grad_norm": 1.3626636058873272, "learning_rate": 8.656641319376244e-06, "loss": 0.6458, "step": 5472 }, { "epoch": 0.56, "grad_norm": 1.7623374037769408, "learning_rate": 8.653376798363813e-06, "loss": 0.7789, "step": 5473 }, { "epoch": 0.56, "grad_norm": 1.358068460640926, "learning_rate": 8.650112423506268e-06, "loss": 0.6103, "step": 5474 }, { "epoch": 0.56, "grad_norm": 1.6080385386359963, "learning_rate": 8.646848195157914e-06, "loss": 0.7079, "step": 5475 }, { "epoch": 0.56, "grad_norm": 1.4797588538693942, "learning_rate": 8.643584113673025e-06, "loss": 0.5676, "step": 5476 }, { "epoch": 0.56, "grad_norm": 1.5675290842063057, "learning_rate": 8.640320179405867e-06, "loss": 0.6131, "step": 5477 }, { "epoch": 0.56, "grad_norm": 1.5496226897761918, "learning_rate": 8.637056392710695e-06, "loss": 0.5674, "step": 5478 }, { "epoch": 0.56, "grad_norm": 1.5906722506085222, "learning_rate": 8.633792753941733e-06, "loss": 0.7608, "step": 5479 }, { "epoch": 0.56, "grad_norm": 1.5845995315266523, "learning_rate": 8.630529263453202e-06, "loss": 0.8818, "step": 5480 }, { "epoch": 0.56, "grad_norm": 1.5647801024812311, "learning_rate": 8.62726592159931e-06, "loss": 0.7297, "step": 5481 }, { "epoch": 0.56, "grad_norm": 1.5349042911362298, "learning_rate": 8.624002728734228e-06, "loss": 0.6783, "step": 5482 }, { "epoch": 0.56, "grad_norm": 1.5358799651119135, "learning_rate": 8.620739685212133e-06, "loss": 0.6789, "step": 5483 }, { "epoch": 0.56, "grad_norm": 1.6279261350508096, "learning_rate": 8.61747679138718e-06, "loss": 0.8028, "step": 5484 }, { "epoch": 0.56, "grad_norm": 1.3911315916545854, "learning_rate": 8.614214047613497e-06, "loss": 0.6441, "step": 5485 }, { "epoch": 0.56, "grad_norm": 1.638391124770247, "learning_rate": 8.610951454245211e-06, "loss": 0.715, "step": 5486 }, { "epoch": 0.56, "grad_norm": 1.4050757193555106, "learning_rate": 8.60768901163642e-06, "loss": 0.6294, "step": 5487 }, { "epoch": 0.56, "grad_norm": 1.4292014571964684, "learning_rate": 8.604426720141211e-06, "loss": 0.6052, "step": 5488 }, { "epoch": 0.56, "grad_norm": 1.5718555849430869, "learning_rate": 8.601164580113663e-06, "loss": 0.6714, "step": 5489 }, { "epoch": 0.56, "grad_norm": 1.6533540016966977, "learning_rate": 8.597902591907821e-06, "loss": 0.7301, "step": 5490 }, { "epoch": 0.56, "grad_norm": 1.7720517033439591, "learning_rate": 8.594640755877728e-06, "loss": 0.7721, "step": 5491 }, { "epoch": 0.56, "grad_norm": 1.5961720840389233, "learning_rate": 8.591379072377401e-06, "loss": 0.6951, "step": 5492 }, { "epoch": 0.56, "grad_norm": 1.8415406374214263, "learning_rate": 8.588117541760848e-06, "loss": 0.8032, "step": 5493 }, { "epoch": 0.56, "grad_norm": 1.3633959194012253, "learning_rate": 8.58485616438206e-06, "loss": 0.6743, "step": 5494 }, { "epoch": 0.56, "grad_norm": 1.512968423653738, "learning_rate": 8.581594940595002e-06, "loss": 0.7512, "step": 5495 }, { "epoch": 0.56, "grad_norm": 1.4542667920517232, "learning_rate": 8.578333870753635e-06, "loss": 0.7837, "step": 5496 }, { "epoch": 0.56, "grad_norm": 1.6243586978939764, "learning_rate": 8.575072955211888e-06, "loss": 0.6827, "step": 5497 }, { "epoch": 0.56, "grad_norm": 1.5343117176073966, "learning_rate": 8.57181219432369e-06, "loss": 0.6556, "step": 5498 }, { "epoch": 0.56, "grad_norm": 1.5582736131421835, "learning_rate": 8.568551588442949e-06, "loss": 0.7319, "step": 5499 }, { "epoch": 0.56, "grad_norm": 1.5461179064783763, "learning_rate": 8.565291137923542e-06, "loss": 0.7341, "step": 5500 }, { "epoch": 0.56, "grad_norm": 1.7157349604771854, "learning_rate": 8.562030843119346e-06, "loss": 0.7225, "step": 5501 }, { "epoch": 0.56, "grad_norm": 1.7871742129365662, "learning_rate": 8.55877070438422e-06, "loss": 0.7229, "step": 5502 }, { "epoch": 0.56, "grad_norm": 1.9183352542015495, "learning_rate": 8.55551072207199e-06, "loss": 0.7484, "step": 5503 }, { "epoch": 0.56, "grad_norm": 1.464447201539831, "learning_rate": 8.552250896536488e-06, "loss": 0.7543, "step": 5504 }, { "epoch": 0.56, "grad_norm": 1.8243480915817782, "learning_rate": 8.548991228131503e-06, "loss": 0.8253, "step": 5505 }, { "epoch": 0.56, "grad_norm": 1.601641158363978, "learning_rate": 8.545731717210832e-06, "loss": 0.6864, "step": 5506 }, { "epoch": 0.56, "grad_norm": 1.6520512376146232, "learning_rate": 8.542472364128244e-06, "loss": 0.6393, "step": 5507 }, { "epoch": 0.56, "grad_norm": 1.5117404534779368, "learning_rate": 8.539213169237483e-06, "loss": 0.7381, "step": 5508 }, { "epoch": 0.56, "grad_norm": 1.5655502787624929, "learning_rate": 8.535954132892295e-06, "loss": 0.7102, "step": 5509 }, { "epoch": 0.56, "grad_norm": 1.7211773204730685, "learning_rate": 8.532695255446384e-06, "loss": 0.7406, "step": 5510 }, { "epoch": 0.56, "grad_norm": 1.6313306762180304, "learning_rate": 8.529436537253458e-06, "loss": 0.7661, "step": 5511 }, { "epoch": 0.56, "grad_norm": 1.6058096645199098, "learning_rate": 8.526177978667202e-06, "loss": 0.7611, "step": 5512 }, { "epoch": 0.56, "grad_norm": 1.4762936540269178, "learning_rate": 8.522919580041276e-06, "loss": 0.8195, "step": 5513 }, { "epoch": 0.56, "grad_norm": 1.751430534818543, "learning_rate": 8.519661341729333e-06, "loss": 0.6863, "step": 5514 }, { "epoch": 0.56, "grad_norm": 1.462329357830292, "learning_rate": 8.516403264084998e-06, "loss": 0.681, "step": 5515 }, { "epoch": 0.56, "grad_norm": 1.4524260115250442, "learning_rate": 8.51314534746189e-06, "loss": 0.6933, "step": 5516 }, { "epoch": 0.56, "grad_norm": 1.5756065194086915, "learning_rate": 8.509887592213604e-06, "loss": 0.6209, "step": 5517 }, { "epoch": 0.56, "grad_norm": 1.7025368438615078, "learning_rate": 8.506629998693714e-06, "loss": 0.7609, "step": 5518 }, { "epoch": 0.56, "grad_norm": 1.7032354266244643, "learning_rate": 8.503372567255787e-06, "loss": 0.7251, "step": 5519 }, { "epoch": 0.56, "grad_norm": 1.6639787231587528, "learning_rate": 8.500115298253361e-06, "loss": 0.6176, "step": 5520 }, { "epoch": 0.56, "grad_norm": 1.4583955236227215, "learning_rate": 8.496858192039962e-06, "loss": 0.615, "step": 5521 }, { "epoch": 0.56, "grad_norm": 1.6337365754120245, "learning_rate": 8.493601248969103e-06, "loss": 0.8058, "step": 5522 }, { "epoch": 0.56, "grad_norm": 1.7051305347035186, "learning_rate": 8.490344469394271e-06, "loss": 0.7225, "step": 5523 }, { "epoch": 0.56, "grad_norm": 1.5575367248237868, "learning_rate": 8.487087853668935e-06, "loss": 0.63, "step": 5524 }, { "epoch": 0.56, "grad_norm": 1.6287292527008408, "learning_rate": 8.483831402146559e-06, "loss": 0.6938, "step": 5525 }, { "epoch": 0.56, "grad_norm": 1.50483887604819, "learning_rate": 8.48057511518057e-06, "loss": 0.6772, "step": 5526 }, { "epoch": 0.56, "grad_norm": 1.7309490692074987, "learning_rate": 8.477318993124393e-06, "loss": 0.8651, "step": 5527 }, { "epoch": 0.56, "grad_norm": 1.4125409131381073, "learning_rate": 8.474063036331425e-06, "loss": 0.712, "step": 5528 }, { "epoch": 0.56, "grad_norm": 1.5606438706805952, "learning_rate": 8.470807245155053e-06, "loss": 0.6616, "step": 5529 }, { "epoch": 0.56, "grad_norm": 1.4651607308021533, "learning_rate": 8.46755161994864e-06, "loss": 0.722, "step": 5530 }, { "epoch": 0.56, "grad_norm": 1.5445416441462756, "learning_rate": 8.464296161065533e-06, "loss": 0.7322, "step": 5531 }, { "epoch": 0.56, "grad_norm": 1.5176082048874708, "learning_rate": 8.461040868859065e-06, "loss": 0.628, "step": 5532 }, { "epoch": 0.56, "grad_norm": 1.5574105642679101, "learning_rate": 8.45778574368254e-06, "loss": 0.7133, "step": 5533 }, { "epoch": 0.56, "grad_norm": 1.5858107938600134, "learning_rate": 8.454530785889256e-06, "loss": 0.762, "step": 5534 }, { "epoch": 0.56, "grad_norm": 1.6696786532923031, "learning_rate": 8.45127599583249e-06, "loss": 0.7083, "step": 5535 }, { "epoch": 0.56, "grad_norm": 1.8353554641884566, "learning_rate": 8.448021373865493e-06, "loss": 0.7876, "step": 5536 }, { "epoch": 0.56, "grad_norm": 1.5643365391993624, "learning_rate": 8.444766920341509e-06, "loss": 0.7114, "step": 5537 }, { "epoch": 0.56, "grad_norm": 1.6023590781916328, "learning_rate": 8.441512635613749e-06, "loss": 0.6952, "step": 5538 }, { "epoch": 0.56, "grad_norm": 1.731932561372792, "learning_rate": 8.438258520035421e-06, "loss": 0.7214, "step": 5539 }, { "epoch": 0.56, "grad_norm": 1.5965376353785534, "learning_rate": 8.435004573959714e-06, "loss": 0.6458, "step": 5540 }, { "epoch": 0.56, "grad_norm": 1.8887697894262163, "learning_rate": 8.43175079773978e-06, "loss": 0.7957, "step": 5541 }, { "epoch": 0.56, "grad_norm": 1.5778138226547658, "learning_rate": 8.428497191728773e-06, "loss": 0.6641, "step": 5542 }, { "epoch": 0.56, "grad_norm": 1.5779590248921962, "learning_rate": 8.425243756279824e-06, "loss": 0.7809, "step": 5543 }, { "epoch": 0.56, "grad_norm": 1.5315990931760999, "learning_rate": 8.421990491746036e-06, "loss": 0.6848, "step": 5544 }, { "epoch": 0.56, "grad_norm": 1.5889701751512135, "learning_rate": 8.418737398480505e-06, "loss": 0.6625, "step": 5545 }, { "epoch": 0.56, "grad_norm": 1.6874965203363166, "learning_rate": 8.415484476836299e-06, "loss": 0.7285, "step": 5546 }, { "epoch": 0.56, "grad_norm": 1.540099537700872, "learning_rate": 8.412231727166473e-06, "loss": 0.7185, "step": 5547 }, { "epoch": 0.56, "grad_norm": 1.5670593760585358, "learning_rate": 8.408979149824066e-06, "loss": 0.749, "step": 5548 }, { "epoch": 0.56, "grad_norm": 1.5576932693594387, "learning_rate": 8.40572674516209e-06, "loss": 0.5929, "step": 5549 }, { "epoch": 0.56, "grad_norm": 1.4934320775822312, "learning_rate": 8.402474513533547e-06, "loss": 0.7322, "step": 5550 }, { "epoch": 0.56, "grad_norm": 1.6562092106422215, "learning_rate": 8.39922245529141e-06, "loss": 0.6973, "step": 5551 }, { "epoch": 0.56, "grad_norm": 1.6473263063260772, "learning_rate": 8.395970570788639e-06, "loss": 0.6905, "step": 5552 }, { "epoch": 0.56, "grad_norm": 1.585602680375613, "learning_rate": 8.392718860378184e-06, "loss": 0.7053, "step": 5553 }, { "epoch": 0.56, "grad_norm": 1.6454460458345783, "learning_rate": 8.38946732441296e-06, "loss": 0.7143, "step": 5554 }, { "epoch": 0.57, "grad_norm": 1.4922249663072742, "learning_rate": 8.386215963245873e-06, "loss": 0.6574, "step": 5555 }, { "epoch": 0.57, "grad_norm": 1.5801772788488986, "learning_rate": 8.382964777229805e-06, "loss": 0.7476, "step": 5556 }, { "epoch": 0.57, "grad_norm": 1.5681557434886617, "learning_rate": 8.379713766717621e-06, "loss": 0.6844, "step": 5557 }, { "epoch": 0.57, "grad_norm": 1.6810740575671264, "learning_rate": 8.376462932062173e-06, "loss": 0.6406, "step": 5558 }, { "epoch": 0.57, "grad_norm": 1.5934027970623843, "learning_rate": 8.373212273616281e-06, "loss": 0.7524, "step": 5559 }, { "epoch": 0.57, "grad_norm": 1.6566698688257904, "learning_rate": 8.369961791732758e-06, "loss": 0.6743, "step": 5560 }, { "epoch": 0.57, "grad_norm": 1.6857676438146336, "learning_rate": 8.366711486764392e-06, "loss": 0.6712, "step": 5561 }, { "epoch": 0.57, "grad_norm": 1.705561999111611, "learning_rate": 8.363461359063952e-06, "loss": 0.6882, "step": 5562 }, { "epoch": 0.57, "grad_norm": 1.374891944612468, "learning_rate": 8.360211408984192e-06, "loss": 0.5937, "step": 5563 }, { "epoch": 0.57, "grad_norm": 1.574393404851258, "learning_rate": 8.356961636877834e-06, "loss": 0.7417, "step": 5564 }, { "epoch": 0.57, "grad_norm": 1.7885837197042456, "learning_rate": 8.353712043097598e-06, "loss": 0.7578, "step": 5565 }, { "epoch": 0.57, "grad_norm": 1.6466916135457996, "learning_rate": 8.350462627996177e-06, "loss": 0.6979, "step": 5566 }, { "epoch": 0.57, "grad_norm": 1.655448490479182, "learning_rate": 8.34721339192624e-06, "loss": 0.7016, "step": 5567 }, { "epoch": 0.57, "grad_norm": 1.6885302646033078, "learning_rate": 8.343964335240443e-06, "loss": 0.6853, "step": 5568 }, { "epoch": 0.57, "grad_norm": 1.6656985984382529, "learning_rate": 8.340715458291422e-06, "loss": 0.6549, "step": 5569 }, { "epoch": 0.57, "grad_norm": 1.5822177752381685, "learning_rate": 8.337466761431785e-06, "loss": 0.7849, "step": 5570 }, { "epoch": 0.57, "grad_norm": 1.584122032281881, "learning_rate": 8.334218245014138e-06, "loss": 0.772, "step": 5571 }, { "epoch": 0.57, "grad_norm": 1.655308408893074, "learning_rate": 8.330969909391046e-06, "loss": 0.7676, "step": 5572 }, { "epoch": 0.57, "grad_norm": 1.5107759446204707, "learning_rate": 8.32772175491507e-06, "loss": 0.7032, "step": 5573 }, { "epoch": 0.57, "grad_norm": 1.5337381993316719, "learning_rate": 8.324473781938749e-06, "loss": 0.743, "step": 5574 }, { "epoch": 0.57, "grad_norm": 1.6892896481283168, "learning_rate": 8.321225990814592e-06, "loss": 0.6367, "step": 5575 }, { "epoch": 0.57, "grad_norm": 1.5491307737483462, "learning_rate": 8.317978381895105e-06, "loss": 0.7983, "step": 5576 }, { "epoch": 0.57, "grad_norm": 1.6842601260701693, "learning_rate": 8.314730955532757e-06, "loss": 0.7657, "step": 5577 }, { "epoch": 0.57, "grad_norm": 1.4904895744526254, "learning_rate": 8.311483712080011e-06, "loss": 0.6465, "step": 5578 }, { "epoch": 0.57, "grad_norm": 1.5253575724269708, "learning_rate": 8.308236651889304e-06, "loss": 0.5721, "step": 5579 }, { "epoch": 0.57, "grad_norm": 1.6042099863240251, "learning_rate": 8.30498977531305e-06, "loss": 0.7276, "step": 5580 }, { "epoch": 0.57, "grad_norm": 1.497215582210722, "learning_rate": 8.301743082703651e-06, "loss": 0.6767, "step": 5581 }, { "epoch": 0.57, "grad_norm": 1.5552740736267856, "learning_rate": 8.29849657441348e-06, "loss": 0.7603, "step": 5582 }, { "epoch": 0.57, "grad_norm": 1.5397565487267744, "learning_rate": 8.2952502507949e-06, "loss": 0.6586, "step": 5583 }, { "epoch": 0.57, "grad_norm": 1.6627728598845204, "learning_rate": 8.292004112200245e-06, "loss": 0.6835, "step": 5584 }, { "epoch": 0.57, "grad_norm": 1.6729356422110861, "learning_rate": 8.28875815898183e-06, "loss": 0.7202, "step": 5585 }, { "epoch": 0.57, "grad_norm": 1.5953255361916052, "learning_rate": 8.285512391491964e-06, "loss": 0.7248, "step": 5586 }, { "epoch": 0.57, "grad_norm": 1.5983069282455369, "learning_rate": 8.28226681008291e-06, "loss": 0.6544, "step": 5587 }, { "epoch": 0.57, "grad_norm": 1.3969342118400652, "learning_rate": 8.27902141510693e-06, "loss": 0.5857, "step": 5588 }, { "epoch": 0.57, "grad_norm": 1.6125249307841638, "learning_rate": 8.275776206916268e-06, "loss": 0.6612, "step": 5589 }, { "epoch": 0.57, "grad_norm": 1.6681447234561344, "learning_rate": 8.27253118586313e-06, "loss": 0.7215, "step": 5590 }, { "epoch": 0.57, "grad_norm": 1.5914349153850604, "learning_rate": 8.269286352299723e-06, "loss": 0.6635, "step": 5591 }, { "epoch": 0.57, "grad_norm": 1.7033247822836768, "learning_rate": 8.266041706578212e-06, "loss": 0.6536, "step": 5592 }, { "epoch": 0.57, "grad_norm": 1.6737519134798131, "learning_rate": 8.262797249050758e-06, "loss": 0.7259, "step": 5593 }, { "epoch": 0.57, "grad_norm": 1.6019239391867346, "learning_rate": 8.259552980069499e-06, "loss": 0.7038, "step": 5594 }, { "epoch": 0.57, "grad_norm": 1.7608015334026599, "learning_rate": 8.256308899986543e-06, "loss": 0.5839, "step": 5595 }, { "epoch": 0.57, "grad_norm": 1.6677107873015877, "learning_rate": 8.253065009153988e-06, "loss": 0.6422, "step": 5596 }, { "epoch": 0.57, "grad_norm": 1.5685334631977799, "learning_rate": 8.249821307923911e-06, "loss": 0.7272, "step": 5597 }, { "epoch": 0.57, "grad_norm": 1.4941559661351542, "learning_rate": 8.246577796648357e-06, "loss": 0.7019, "step": 5598 }, { "epoch": 0.57, "grad_norm": 1.6341561834848046, "learning_rate": 8.243334475679367e-06, "loss": 0.7615, "step": 5599 }, { "epoch": 0.57, "grad_norm": 1.7031917858396979, "learning_rate": 8.240091345368944e-06, "loss": 0.8472, "step": 5600 }, { "epoch": 0.57, "grad_norm": 1.4470246442660457, "learning_rate": 8.236848406069084e-06, "loss": 0.6536, "step": 5601 }, { "epoch": 0.57, "grad_norm": 1.7029488235864563, "learning_rate": 8.23360565813176e-06, "loss": 0.7819, "step": 5602 }, { "epoch": 0.57, "grad_norm": 1.7033324038098736, "learning_rate": 8.230363101908916e-06, "loss": 0.7275, "step": 5603 }, { "epoch": 0.57, "grad_norm": 1.485534242872215, "learning_rate": 8.227120737752486e-06, "loss": 0.6535, "step": 5604 }, { "epoch": 0.57, "grad_norm": 1.6491114795083066, "learning_rate": 8.223878566014372e-06, "loss": 0.7522, "step": 5605 }, { "epoch": 0.57, "grad_norm": 1.688704750751506, "learning_rate": 8.220636587046464e-06, "loss": 0.763, "step": 5606 }, { "epoch": 0.57, "grad_norm": 1.3452689780904759, "learning_rate": 8.217394801200632e-06, "loss": 0.6435, "step": 5607 }, { "epoch": 0.57, "grad_norm": 1.6312682950472936, "learning_rate": 8.214153208828714e-06, "loss": 0.7451, "step": 5608 }, { "epoch": 0.57, "grad_norm": 1.5168308517665703, "learning_rate": 8.210911810282542e-06, "loss": 0.6365, "step": 5609 }, { "epoch": 0.57, "grad_norm": 1.6131248031193193, "learning_rate": 8.20767060591391e-06, "loss": 0.6881, "step": 5610 }, { "epoch": 0.57, "grad_norm": 1.5979577897434138, "learning_rate": 8.204429596074605e-06, "loss": 0.7088, "step": 5611 }, { "epoch": 0.57, "grad_norm": 1.4937526190944859, "learning_rate": 8.201188781116392e-06, "loss": 0.6796, "step": 5612 }, { "epoch": 0.57, "grad_norm": 1.45679874555144, "learning_rate": 8.197948161391003e-06, "loss": 0.7388, "step": 5613 }, { "epoch": 0.57, "grad_norm": 1.5562185392172332, "learning_rate": 8.194707737250158e-06, "loss": 0.7071, "step": 5614 }, { "epoch": 0.57, "grad_norm": 1.7053520652444238, "learning_rate": 8.191467509045564e-06, "loss": 0.7217, "step": 5615 }, { "epoch": 0.57, "grad_norm": 1.5433041988831695, "learning_rate": 8.188227477128883e-06, "loss": 0.7122, "step": 5616 }, { "epoch": 0.57, "grad_norm": 1.5797508292402456, "learning_rate": 8.18498764185178e-06, "loss": 0.6357, "step": 5617 }, { "epoch": 0.57, "grad_norm": 1.47683890933061, "learning_rate": 8.181748003565883e-06, "loss": 0.6484, "step": 5618 }, { "epoch": 0.57, "grad_norm": 1.5348046552396966, "learning_rate": 8.178508562622804e-06, "loss": 0.7089, "step": 5619 }, { "epoch": 0.57, "grad_norm": 1.414162890397765, "learning_rate": 8.175269319374142e-06, "loss": 0.7159, "step": 5620 }, { "epoch": 0.57, "grad_norm": 1.7791038023349213, "learning_rate": 8.172030274171452e-06, "loss": 0.7431, "step": 5621 }, { "epoch": 0.57, "grad_norm": 1.4784290773633204, "learning_rate": 8.168791427366297e-06, "loss": 0.7103, "step": 5622 }, { "epoch": 0.57, "grad_norm": 1.635880216665212, "learning_rate": 8.16555277931019e-06, "loss": 0.7444, "step": 5623 }, { "epoch": 0.57, "grad_norm": 1.7363639324473743, "learning_rate": 8.162314330354642e-06, "loss": 0.733, "step": 5624 }, { "epoch": 0.57, "grad_norm": 1.7253204538491358, "learning_rate": 8.159076080851139e-06, "loss": 0.7552, "step": 5625 }, { "epoch": 0.57, "grad_norm": 1.5681074189686877, "learning_rate": 8.155838031151134e-06, "loss": 0.7665, "step": 5626 }, { "epoch": 0.57, "grad_norm": 1.5958043823114054, "learning_rate": 8.152600181606073e-06, "loss": 0.6868, "step": 5627 }, { "epoch": 0.57, "grad_norm": 1.5497556316778451, "learning_rate": 8.149362532567374e-06, "loss": 0.6495, "step": 5628 }, { "epoch": 0.57, "grad_norm": 1.5626805277983316, "learning_rate": 8.146125084386428e-06, "loss": 0.6569, "step": 5629 }, { "epoch": 0.57, "grad_norm": 1.6780030139091988, "learning_rate": 8.142887837414614e-06, "loss": 0.8537, "step": 5630 }, { "epoch": 0.57, "grad_norm": 1.5189408676389409, "learning_rate": 8.139650792003286e-06, "loss": 0.8312, "step": 5631 }, { "epoch": 0.57, "grad_norm": 1.4776069199360429, "learning_rate": 8.136413948503767e-06, "loss": 0.6339, "step": 5632 }, { "epoch": 0.57, "grad_norm": 1.5138991319169117, "learning_rate": 8.133177307267376e-06, "loss": 0.7134, "step": 5633 }, { "epoch": 0.57, "grad_norm": 1.5731729926117684, "learning_rate": 8.12994086864539e-06, "loss": 0.7459, "step": 5634 }, { "epoch": 0.57, "grad_norm": 1.5803257301451343, "learning_rate": 8.126704632989082e-06, "loss": 0.7218, "step": 5635 }, { "epoch": 0.57, "grad_norm": 1.6209290479538236, "learning_rate": 8.123468600649686e-06, "loss": 0.7538, "step": 5636 }, { "epoch": 0.57, "grad_norm": 1.4808690714988408, "learning_rate": 8.120232771978432e-06, "loss": 0.689, "step": 5637 }, { "epoch": 0.57, "grad_norm": 1.676758702387847, "learning_rate": 8.116997147326515e-06, "loss": 0.7705, "step": 5638 }, { "epoch": 0.57, "grad_norm": 1.5484496099134843, "learning_rate": 8.113761727045106e-06, "loss": 0.737, "step": 5639 }, { "epoch": 0.57, "grad_norm": 1.7164143999551997, "learning_rate": 8.110526511485371e-06, "loss": 0.7558, "step": 5640 }, { "epoch": 0.57, "grad_norm": 1.5754148116572044, "learning_rate": 8.10729150099843e-06, "loss": 0.6998, "step": 5641 }, { "epoch": 0.57, "grad_norm": 1.7140088046757154, "learning_rate": 8.104056695935396e-06, "loss": 0.7623, "step": 5642 }, { "epoch": 0.57, "grad_norm": 1.7869442676574774, "learning_rate": 8.100822096647365e-06, "loss": 0.7454, "step": 5643 }, { "epoch": 0.57, "grad_norm": 1.4764597743231278, "learning_rate": 8.09758770348539e-06, "loss": 0.6513, "step": 5644 }, { "epoch": 0.57, "grad_norm": 1.6279896509723633, "learning_rate": 8.094353516800524e-06, "loss": 0.6936, "step": 5645 }, { "epoch": 0.57, "grad_norm": 1.5207184246056769, "learning_rate": 8.091119536943779e-06, "loss": 0.7996, "step": 5646 }, { "epoch": 0.57, "grad_norm": 2.0178330689034416, "learning_rate": 8.087885764266156e-06, "loss": 0.7302, "step": 5647 }, { "epoch": 0.57, "grad_norm": 1.5985413437981397, "learning_rate": 8.084652199118636e-06, "loss": 0.7385, "step": 5648 }, { "epoch": 0.57, "grad_norm": 1.6947296632777942, "learning_rate": 8.081418841852163e-06, "loss": 0.7599, "step": 5649 }, { "epoch": 0.57, "grad_norm": 1.485467514936313, "learning_rate": 8.07818569281767e-06, "loss": 0.7569, "step": 5650 }, { "epoch": 0.57, "grad_norm": 1.6350537395684603, "learning_rate": 8.074952752366072e-06, "loss": 0.6527, "step": 5651 }, { "epoch": 0.57, "grad_norm": 1.4772057181632665, "learning_rate": 8.071720020848245e-06, "loss": 0.677, "step": 5652 }, { "epoch": 0.58, "grad_norm": 1.5288422750275847, "learning_rate": 8.068487498615057e-06, "loss": 0.6706, "step": 5653 }, { "epoch": 0.58, "grad_norm": 1.5773263458771583, "learning_rate": 8.065255186017342e-06, "loss": 0.7145, "step": 5654 }, { "epoch": 0.58, "grad_norm": 1.4698615244133397, "learning_rate": 8.06202308340592e-06, "loss": 0.6992, "step": 5655 }, { "epoch": 0.58, "grad_norm": 2.0458570117068953, "learning_rate": 8.05879119113159e-06, "loss": 0.7137, "step": 5656 }, { "epoch": 0.58, "grad_norm": 1.5528151714159797, "learning_rate": 8.055559509545115e-06, "loss": 0.5657, "step": 5657 }, { "epoch": 0.58, "grad_norm": 1.5618631597788561, "learning_rate": 8.052328038997252e-06, "loss": 0.7858, "step": 5658 }, { "epoch": 0.58, "grad_norm": 1.6302595225217231, "learning_rate": 8.04909677983872e-06, "loss": 0.6427, "step": 5659 }, { "epoch": 0.58, "grad_norm": 1.8190481089776078, "learning_rate": 8.04586573242022e-06, "loss": 0.803, "step": 5660 }, { "epoch": 0.58, "grad_norm": 1.6958660962032366, "learning_rate": 8.042634897092443e-06, "loss": 0.7755, "step": 5661 }, { "epoch": 0.58, "grad_norm": 1.742053888634955, "learning_rate": 8.039404274206031e-06, "loss": 0.6488, "step": 5662 }, { "epoch": 0.58, "grad_norm": 1.5210478365225468, "learning_rate": 8.03617386411163e-06, "loss": 0.6826, "step": 5663 }, { "epoch": 0.58, "grad_norm": 1.4114991514657393, "learning_rate": 8.032943667159843e-06, "loss": 0.6926, "step": 5664 }, { "epoch": 0.58, "grad_norm": 1.4134598644524605, "learning_rate": 8.029713683701259e-06, "loss": 0.6353, "step": 5665 }, { "epoch": 0.58, "grad_norm": 1.5362933596085961, "learning_rate": 8.026483914086446e-06, "loss": 0.7266, "step": 5666 }, { "epoch": 0.58, "grad_norm": 1.4561356510584063, "learning_rate": 8.023254358665938e-06, "loss": 0.7336, "step": 5667 }, { "epoch": 0.58, "grad_norm": 1.5192674346006763, "learning_rate": 8.020025017790261e-06, "loss": 0.7731, "step": 5668 }, { "epoch": 0.58, "grad_norm": 1.5640800467790366, "learning_rate": 8.0167958918099e-06, "loss": 0.716, "step": 5669 }, { "epoch": 0.58, "grad_norm": 1.456511074126383, "learning_rate": 8.013566981075331e-06, "loss": 0.6562, "step": 5670 }, { "epoch": 0.58, "grad_norm": 1.6136566932546004, "learning_rate": 8.010338285937006e-06, "loss": 0.7981, "step": 5671 }, { "epoch": 0.58, "grad_norm": 1.669060168683187, "learning_rate": 8.007109806745341e-06, "loss": 0.7027, "step": 5672 }, { "epoch": 0.58, "grad_norm": 1.4881521535751443, "learning_rate": 8.003881543850742e-06, "loss": 0.7885, "step": 5673 }, { "epoch": 0.58, "grad_norm": 1.5634498701023354, "learning_rate": 8.00065349760359e-06, "loss": 0.6158, "step": 5674 }, { "epoch": 0.58, "grad_norm": 1.6285482943204677, "learning_rate": 7.99742566835423e-06, "loss": 0.7625, "step": 5675 }, { "epoch": 0.58, "grad_norm": 1.5147804177704658, "learning_rate": 7.994198056453001e-06, "loss": 0.7938, "step": 5676 }, { "epoch": 0.58, "grad_norm": 1.7054944824732357, "learning_rate": 7.9909706622502e-06, "loss": 0.8145, "step": 5677 }, { "epoch": 0.58, "grad_norm": 1.4443496050342668, "learning_rate": 7.987743486096119e-06, "loss": 0.7224, "step": 5678 }, { "epoch": 0.58, "grad_norm": 1.7038128500780676, "learning_rate": 7.984516528341016e-06, "loss": 0.6913, "step": 5679 }, { "epoch": 0.58, "grad_norm": 1.582191849396049, "learning_rate": 7.981289789335122e-06, "loss": 0.7606, "step": 5680 }, { "epoch": 0.58, "grad_norm": 1.7291647907979886, "learning_rate": 7.978063269428656e-06, "loss": 0.7354, "step": 5681 }, { "epoch": 0.58, "grad_norm": 1.5886724144698394, "learning_rate": 7.974836968971799e-06, "loss": 0.7044, "step": 5682 }, { "epoch": 0.58, "grad_norm": 1.5544304168529097, "learning_rate": 7.97161088831472e-06, "loss": 0.7389, "step": 5683 }, { "epoch": 0.58, "grad_norm": 1.6719397258805486, "learning_rate": 7.968385027807558e-06, "loss": 0.7663, "step": 5684 }, { "epoch": 0.58, "grad_norm": 1.4880037468847649, "learning_rate": 7.965159387800434e-06, "loss": 0.6278, "step": 5685 }, { "epoch": 0.58, "grad_norm": 1.602445909793838, "learning_rate": 7.961933968643435e-06, "loss": 0.8443, "step": 5686 }, { "epoch": 0.58, "grad_norm": 1.7395603370748483, "learning_rate": 7.95870877068663e-06, "loss": 0.7678, "step": 5687 }, { "epoch": 0.58, "grad_norm": 1.8056856860517443, "learning_rate": 7.955483794280068e-06, "loss": 0.6395, "step": 5688 }, { "epoch": 0.58, "grad_norm": 1.55847808067052, "learning_rate": 7.952259039773767e-06, "loss": 0.6431, "step": 5689 }, { "epoch": 0.58, "grad_norm": 1.687208438007986, "learning_rate": 7.949034507517721e-06, "loss": 0.8264, "step": 5690 }, { "epoch": 0.58, "grad_norm": 1.835421354444477, "learning_rate": 7.94581019786191e-06, "loss": 0.7163, "step": 5691 }, { "epoch": 0.58, "grad_norm": 1.7973409383727332, "learning_rate": 7.942586111156277e-06, "loss": 0.7155, "step": 5692 }, { "epoch": 0.58, "grad_norm": 1.7173157692995349, "learning_rate": 7.939362247750745e-06, "loss": 0.7497, "step": 5693 }, { "epoch": 0.58, "grad_norm": 1.675781627264294, "learning_rate": 7.93613860799522e-06, "loss": 0.7794, "step": 5694 }, { "epoch": 0.58, "grad_norm": 1.5541808483981965, "learning_rate": 7.932915192239571e-06, "loss": 0.6026, "step": 5695 }, { "epoch": 0.58, "grad_norm": 1.5952182515805586, "learning_rate": 7.929692000833653e-06, "loss": 0.6893, "step": 5696 }, { "epoch": 0.58, "grad_norm": 1.6961654663081456, "learning_rate": 7.926469034127292e-06, "loss": 0.7462, "step": 5697 }, { "epoch": 0.58, "grad_norm": 1.732500632150684, "learning_rate": 7.923246292470292e-06, "loss": 0.7884, "step": 5698 }, { "epoch": 0.58, "grad_norm": 1.6452456021606285, "learning_rate": 7.920023776212433e-06, "loss": 0.7367, "step": 5699 }, { "epoch": 0.58, "grad_norm": 1.52846216501172, "learning_rate": 7.91680148570346e-06, "loss": 0.6121, "step": 5700 }, { "epoch": 0.58, "grad_norm": 1.6674960297503236, "learning_rate": 7.91357942129311e-06, "loss": 0.7909, "step": 5701 }, { "epoch": 0.58, "grad_norm": 1.5036535214427527, "learning_rate": 7.910357583331088e-06, "loss": 0.7316, "step": 5702 }, { "epoch": 0.58, "grad_norm": 1.652839330197833, "learning_rate": 7.90713597216707e-06, "loss": 0.7686, "step": 5703 }, { "epoch": 0.58, "grad_norm": 1.4637652515969, "learning_rate": 7.903914588150716e-06, "loss": 0.739, "step": 5704 }, { "epoch": 0.58, "grad_norm": 1.6957518798495284, "learning_rate": 7.90069343163165e-06, "loss": 0.7086, "step": 5705 }, { "epoch": 0.58, "grad_norm": 1.6581052593969878, "learning_rate": 7.897472502959484e-06, "loss": 0.6988, "step": 5706 }, { "epoch": 0.58, "grad_norm": 1.716293328997258, "learning_rate": 7.894251802483803e-06, "loss": 0.7448, "step": 5707 }, { "epoch": 0.58, "grad_norm": 1.608428356318047, "learning_rate": 7.891031330554151e-06, "loss": 0.7065, "step": 5708 }, { "epoch": 0.58, "grad_norm": 1.4342559198109504, "learning_rate": 7.88781108752007e-06, "loss": 0.7052, "step": 5709 }, { "epoch": 0.58, "grad_norm": 1.8107927017895886, "learning_rate": 7.884591073731068e-06, "loss": 0.7711, "step": 5710 }, { "epoch": 0.58, "grad_norm": 1.5861194643001888, "learning_rate": 7.88137128953662e-06, "loss": 0.7277, "step": 5711 }, { "epoch": 0.58, "grad_norm": 1.609787288473546, "learning_rate": 7.87815173528619e-06, "loss": 0.6833, "step": 5712 }, { "epoch": 0.58, "grad_norm": 1.648058051240888, "learning_rate": 7.874932411329203e-06, "loss": 0.6429, "step": 5713 }, { "epoch": 0.58, "grad_norm": 1.7289025911897014, "learning_rate": 7.871713318015071e-06, "loss": 0.6381, "step": 5714 }, { "epoch": 0.58, "grad_norm": 1.7201473743909554, "learning_rate": 7.868494455693181e-06, "loss": 0.7717, "step": 5715 }, { "epoch": 0.58, "grad_norm": 1.5865041465364405, "learning_rate": 7.86527582471288e-06, "loss": 0.653, "step": 5716 }, { "epoch": 0.58, "grad_norm": 1.577293936850371, "learning_rate": 7.862057425423509e-06, "loss": 0.753, "step": 5717 }, { "epoch": 0.58, "grad_norm": 1.414651766000381, "learning_rate": 7.858839258174365e-06, "loss": 0.6993, "step": 5718 }, { "epoch": 0.58, "grad_norm": 1.6155072297570676, "learning_rate": 7.855621323314736e-06, "loss": 0.7568, "step": 5719 }, { "epoch": 0.58, "grad_norm": 1.575719982898642, "learning_rate": 7.852403621193883e-06, "loss": 0.7807, "step": 5720 }, { "epoch": 0.58, "grad_norm": 1.6118080727936386, "learning_rate": 7.849186152161028e-06, "loss": 0.7623, "step": 5721 }, { "epoch": 0.58, "grad_norm": 1.6776561690628125, "learning_rate": 7.845968916565385e-06, "loss": 0.6105, "step": 5722 }, { "epoch": 0.58, "grad_norm": 1.6816273038027498, "learning_rate": 7.842751914756128e-06, "loss": 0.7343, "step": 5723 }, { "epoch": 0.58, "grad_norm": 1.521521598263868, "learning_rate": 7.839535147082414e-06, "loss": 0.7389, "step": 5724 }, { "epoch": 0.58, "grad_norm": 1.6662923031868875, "learning_rate": 7.836318613893377e-06, "loss": 0.7188, "step": 5725 }, { "epoch": 0.58, "grad_norm": 1.5968447870884663, "learning_rate": 7.833102315538116e-06, "loss": 0.6968, "step": 5726 }, { "epoch": 0.58, "grad_norm": 1.620655229813537, "learning_rate": 7.829886252365712e-06, "loss": 0.7694, "step": 5727 }, { "epoch": 0.58, "grad_norm": 1.5244934459907622, "learning_rate": 7.82667042472522e-06, "loss": 0.658, "step": 5728 }, { "epoch": 0.58, "grad_norm": 1.6190112998576944, "learning_rate": 7.823454832965666e-06, "loss": 0.706, "step": 5729 }, { "epoch": 0.58, "grad_norm": 1.7098127138691492, "learning_rate": 7.820239477436055e-06, "loss": 0.7253, "step": 5730 }, { "epoch": 0.58, "grad_norm": 1.6048506977406953, "learning_rate": 7.817024358485357e-06, "loss": 0.7531, "step": 5731 }, { "epoch": 0.58, "grad_norm": 1.5235882405052927, "learning_rate": 7.813809476462525e-06, "loss": 0.6348, "step": 5732 }, { "epoch": 0.58, "grad_norm": 1.4798292434357883, "learning_rate": 7.810594831716494e-06, "loss": 0.5956, "step": 5733 }, { "epoch": 0.58, "grad_norm": 1.5262332112480077, "learning_rate": 7.807380424596149e-06, "loss": 0.7542, "step": 5734 }, { "epoch": 0.58, "grad_norm": 1.442225125841356, "learning_rate": 7.804166255450372e-06, "loss": 0.7161, "step": 5735 }, { "epoch": 0.58, "grad_norm": 1.641892390278661, "learning_rate": 7.80095232462801e-06, "loss": 0.851, "step": 5736 }, { "epoch": 0.58, "grad_norm": 1.620114405413643, "learning_rate": 7.797738632477881e-06, "loss": 0.7402, "step": 5737 }, { "epoch": 0.58, "grad_norm": 1.4693871991300977, "learning_rate": 7.794525179348786e-06, "loss": 0.6569, "step": 5738 }, { "epoch": 0.58, "grad_norm": 1.5528409369558045, "learning_rate": 7.791311965589494e-06, "loss": 0.6966, "step": 5739 }, { "epoch": 0.58, "grad_norm": 1.5940979057868363, "learning_rate": 7.788098991548749e-06, "loss": 0.7308, "step": 5740 }, { "epoch": 0.58, "grad_norm": 1.5244751204342553, "learning_rate": 7.784886257575264e-06, "loss": 0.6187, "step": 5741 }, { "epoch": 0.58, "grad_norm": 1.6675693980486168, "learning_rate": 7.781673764017738e-06, "loss": 0.6508, "step": 5742 }, { "epoch": 0.58, "grad_norm": 1.624044453668377, "learning_rate": 7.778461511224835e-06, "loss": 0.6836, "step": 5743 }, { "epoch": 0.58, "grad_norm": 1.8273438127319237, "learning_rate": 7.775249499545189e-06, "loss": 0.6414, "step": 5744 }, { "epoch": 0.58, "grad_norm": 1.8859707954802163, "learning_rate": 7.772037729327423e-06, "loss": 0.7952, "step": 5745 }, { "epoch": 0.58, "grad_norm": 1.6982793251008128, "learning_rate": 7.76882620092012e-06, "loss": 0.8613, "step": 5746 }, { "epoch": 0.58, "grad_norm": 1.410673546463811, "learning_rate": 7.76561491467184e-06, "loss": 0.7405, "step": 5747 }, { "epoch": 0.58, "grad_norm": 1.4415440808988367, "learning_rate": 7.762403870931123e-06, "loss": 0.6729, "step": 5748 }, { "epoch": 0.58, "grad_norm": 1.5811451596234793, "learning_rate": 7.75919307004647e-06, "loss": 0.7217, "step": 5749 }, { "epoch": 0.58, "grad_norm": 1.6540154907977642, "learning_rate": 7.755982512366366e-06, "loss": 0.6948, "step": 5750 }, { "epoch": 0.58, "grad_norm": 1.6137901831730095, "learning_rate": 7.75277219823927e-06, "loss": 0.6641, "step": 5751 }, { "epoch": 0.59, "grad_norm": 1.6472949529054923, "learning_rate": 7.749562128013606e-06, "loss": 0.6697, "step": 5752 }, { "epoch": 0.59, "grad_norm": 1.6150286746139912, "learning_rate": 7.746352302037786e-06, "loss": 0.6422, "step": 5753 }, { "epoch": 0.59, "grad_norm": 1.801711018578529, "learning_rate": 7.743142720660175e-06, "loss": 0.6853, "step": 5754 }, { "epoch": 0.59, "grad_norm": 1.4361518027545168, "learning_rate": 7.73993338422913e-06, "loss": 0.6891, "step": 5755 }, { "epoch": 0.59, "grad_norm": 1.7345840777511297, "learning_rate": 7.736724293092975e-06, "loss": 0.7067, "step": 5756 }, { "epoch": 0.59, "grad_norm": 1.481089883684841, "learning_rate": 7.733515447600001e-06, "loss": 0.6426, "step": 5757 }, { "epoch": 0.59, "grad_norm": 1.4978401850787875, "learning_rate": 7.730306848098485e-06, "loss": 0.7194, "step": 5758 }, { "epoch": 0.59, "grad_norm": 1.5186636663429582, "learning_rate": 7.727098494936663e-06, "loss": 0.7443, "step": 5759 }, { "epoch": 0.59, "grad_norm": 1.60008866516873, "learning_rate": 7.723890388462755e-06, "loss": 0.7472, "step": 5760 }, { "epoch": 0.59, "grad_norm": 1.4684241101960502, "learning_rate": 7.720682529024954e-06, "loss": 0.6613, "step": 5761 }, { "epoch": 0.59, "grad_norm": 1.3717717677756704, "learning_rate": 7.717474916971417e-06, "loss": 0.5526, "step": 5762 }, { "epoch": 0.59, "grad_norm": 1.4948051573294627, "learning_rate": 7.714267552650283e-06, "loss": 0.6751, "step": 5763 }, { "epoch": 0.59, "grad_norm": 1.6615283937090968, "learning_rate": 7.711060436409666e-06, "loss": 0.8354, "step": 5764 }, { "epoch": 0.59, "grad_norm": 1.504730171070354, "learning_rate": 7.707853568597638e-06, "loss": 0.7561, "step": 5765 }, { "epoch": 0.59, "grad_norm": 1.7639588172269918, "learning_rate": 7.704646949562264e-06, "loss": 0.7053, "step": 5766 }, { "epoch": 0.59, "grad_norm": 1.6477621135946416, "learning_rate": 7.701440579651566e-06, "loss": 0.7342, "step": 5767 }, { "epoch": 0.59, "grad_norm": 1.4672356032530092, "learning_rate": 7.698234459213545e-06, "loss": 0.6801, "step": 5768 }, { "epoch": 0.59, "grad_norm": 1.6686897139751138, "learning_rate": 7.695028588596185e-06, "loss": 0.6892, "step": 5769 }, { "epoch": 0.59, "grad_norm": 1.5214058904728014, "learning_rate": 7.69182296814742e-06, "loss": 0.6733, "step": 5770 }, { "epoch": 0.59, "grad_norm": 1.7885394268906438, "learning_rate": 7.688617598215182e-06, "loss": 0.7831, "step": 5771 }, { "epoch": 0.59, "grad_norm": 1.6048047448421283, "learning_rate": 7.685412479147352e-06, "loss": 0.6657, "step": 5772 }, { "epoch": 0.59, "grad_norm": 1.5551971775290863, "learning_rate": 7.6822076112918e-06, "loss": 0.6063, "step": 5773 }, { "epoch": 0.59, "grad_norm": 1.6143685528810567, "learning_rate": 7.679002994996372e-06, "loss": 0.7048, "step": 5774 }, { "epoch": 0.59, "grad_norm": 1.657918462223705, "learning_rate": 7.675798630608867e-06, "loss": 0.771, "step": 5775 }, { "epoch": 0.59, "grad_norm": 1.528457546476097, "learning_rate": 7.672594518477078e-06, "loss": 0.7309, "step": 5776 }, { "epoch": 0.59, "grad_norm": 1.5279410934335003, "learning_rate": 7.669390658948755e-06, "loss": 0.7658, "step": 5777 }, { "epoch": 0.59, "grad_norm": 1.6382782909336935, "learning_rate": 7.66618705237163e-06, "loss": 0.7689, "step": 5778 }, { "epoch": 0.59, "grad_norm": 1.5531108260546986, "learning_rate": 7.662983699093406e-06, "loss": 0.7474, "step": 5779 }, { "epoch": 0.59, "grad_norm": 1.5019858046942969, "learning_rate": 7.65978059946175e-06, "loss": 0.581, "step": 5780 }, { "epoch": 0.59, "grad_norm": 1.6333470851583256, "learning_rate": 7.656577753824314e-06, "loss": 0.7437, "step": 5781 }, { "epoch": 0.59, "grad_norm": 1.6317538425852873, "learning_rate": 7.65337516252872e-06, "loss": 0.6981, "step": 5782 }, { "epoch": 0.59, "grad_norm": 1.65717576290963, "learning_rate": 7.65017282592255e-06, "loss": 0.6735, "step": 5783 }, { "epoch": 0.59, "grad_norm": 1.6307719724360934, "learning_rate": 7.64697074435338e-06, "loss": 0.6793, "step": 5784 }, { "epoch": 0.59, "grad_norm": 1.5764880566930657, "learning_rate": 7.64376891816873e-06, "loss": 0.707, "step": 5785 }, { "epoch": 0.59, "grad_norm": 1.5477253630397978, "learning_rate": 7.64056734771612e-06, "loss": 0.6924, "step": 5786 }, { "epoch": 0.59, "grad_norm": 1.5882482658079655, "learning_rate": 7.637366033343028e-06, "loss": 0.6662, "step": 5787 }, { "epoch": 0.59, "grad_norm": 1.4886743464528789, "learning_rate": 7.634164975396903e-06, "loss": 0.6774, "step": 5788 }, { "epoch": 0.59, "grad_norm": 1.5966823968369421, "learning_rate": 7.630964174225175e-06, "loss": 0.732, "step": 5789 }, { "epoch": 0.59, "grad_norm": 1.47748735264666, "learning_rate": 7.627763630175236e-06, "loss": 0.7289, "step": 5790 }, { "epoch": 0.59, "grad_norm": 1.5900714815590495, "learning_rate": 7.624563343594457e-06, "loss": 0.6183, "step": 5791 }, { "epoch": 0.59, "grad_norm": 1.4729519261324666, "learning_rate": 7.621363314830182e-06, "loss": 0.6789, "step": 5792 }, { "epoch": 0.59, "grad_norm": 1.5556688222110906, "learning_rate": 7.618163544229722e-06, "loss": 0.6508, "step": 5793 }, { "epoch": 0.59, "grad_norm": 1.642197194926469, "learning_rate": 7.614964032140359e-06, "loss": 0.6123, "step": 5794 }, { "epoch": 0.59, "grad_norm": 1.5312811606471664, "learning_rate": 7.611764778909352e-06, "loss": 0.7497, "step": 5795 }, { "epoch": 0.59, "grad_norm": 1.612451680261775, "learning_rate": 7.608565784883932e-06, "loss": 0.7227, "step": 5796 }, { "epoch": 0.59, "grad_norm": 1.619756588082997, "learning_rate": 7.6053670504112995e-06, "loss": 0.7505, "step": 5797 }, { "epoch": 0.59, "grad_norm": 1.5693701631566446, "learning_rate": 7.602168575838622e-06, "loss": 0.7009, "step": 5798 }, { "epoch": 0.59, "grad_norm": 1.856737086352245, "learning_rate": 7.598970361513052e-06, "loss": 0.7856, "step": 5799 }, { "epoch": 0.59, "grad_norm": 1.5410044281344544, "learning_rate": 7.5957724077816985e-06, "loss": 0.7093, "step": 5800 }, { "epoch": 0.59, "grad_norm": 1.7184048220446808, "learning_rate": 7.5925747149916515e-06, "loss": 0.6581, "step": 5801 }, { "epoch": 0.59, "grad_norm": 1.6669867702953898, "learning_rate": 7.589377283489976e-06, "loss": 0.7804, "step": 5802 }, { "epoch": 0.59, "grad_norm": 1.6790229508054892, "learning_rate": 7.586180113623694e-06, "loss": 0.769, "step": 5803 }, { "epoch": 0.59, "grad_norm": 1.4762824768291218, "learning_rate": 7.582983205739815e-06, "loss": 0.6036, "step": 5804 }, { "epoch": 0.59, "grad_norm": 1.3934053469588565, "learning_rate": 7.579786560185311e-06, "loss": 0.6886, "step": 5805 }, { "epoch": 0.59, "grad_norm": 1.6255197560643635, "learning_rate": 7.576590177307125e-06, "loss": 0.6945, "step": 5806 }, { "epoch": 0.59, "grad_norm": 1.6613780786906946, "learning_rate": 7.573394057452181e-06, "loss": 0.7167, "step": 5807 }, { "epoch": 0.59, "grad_norm": 1.570481880582874, "learning_rate": 7.570198200967363e-06, "loss": 0.7472, "step": 5808 }, { "epoch": 0.59, "grad_norm": 1.617755903243908, "learning_rate": 7.5670026081995295e-06, "loss": 0.7497, "step": 5809 }, { "epoch": 0.59, "grad_norm": 1.7559979574300808, "learning_rate": 7.563807279495521e-06, "loss": 0.7635, "step": 5810 }, { "epoch": 0.59, "grad_norm": 1.7041260538063296, "learning_rate": 7.560612215202129e-06, "loss": 0.7424, "step": 5811 }, { "epoch": 0.59, "grad_norm": 1.5673151751986434, "learning_rate": 7.557417415666138e-06, "loss": 0.7054, "step": 5812 }, { "epoch": 0.59, "grad_norm": 1.642617769709063, "learning_rate": 7.554222881234284e-06, "loss": 0.6868, "step": 5813 }, { "epoch": 0.59, "grad_norm": 1.4400619885328076, "learning_rate": 7.55102861225329e-06, "loss": 0.7142, "step": 5814 }, { "epoch": 0.59, "grad_norm": 1.52967714882465, "learning_rate": 7.547834609069846e-06, "loss": 0.7085, "step": 5815 }, { "epoch": 0.59, "grad_norm": 1.4760264857506311, "learning_rate": 7.544640872030604e-06, "loss": 0.6837, "step": 5816 }, { "epoch": 0.59, "grad_norm": 1.6786991582733812, "learning_rate": 7.5414474014822e-06, "loss": 0.8335, "step": 5817 }, { "epoch": 0.59, "grad_norm": 1.8209395656572913, "learning_rate": 7.538254197771231e-06, "loss": 0.7985, "step": 5818 }, { "epoch": 0.59, "grad_norm": 1.6453101136824253, "learning_rate": 7.535061261244271e-06, "loss": 0.7504, "step": 5819 }, { "epoch": 0.59, "grad_norm": 1.5088877840540806, "learning_rate": 7.5318685922478675e-06, "loss": 0.6652, "step": 5820 }, { "epoch": 0.59, "grad_norm": 1.3748999206010954, "learning_rate": 7.528676191128528e-06, "loss": 0.7178, "step": 5821 }, { "epoch": 0.59, "grad_norm": 1.60091876485254, "learning_rate": 7.525484058232739e-06, "loss": 0.8099, "step": 5822 }, { "epoch": 0.59, "grad_norm": 1.4644531630938904, "learning_rate": 7.522292193906964e-06, "loss": 0.6834, "step": 5823 }, { "epoch": 0.59, "grad_norm": 1.487917947088439, "learning_rate": 7.5191005984976196e-06, "loss": 0.6411, "step": 5824 }, { "epoch": 0.59, "grad_norm": 1.6010186255910743, "learning_rate": 7.515909272351112e-06, "loss": 0.7479, "step": 5825 }, { "epoch": 0.59, "grad_norm": 1.6386631361679176, "learning_rate": 7.512718215813802e-06, "loss": 0.7278, "step": 5826 }, { "epoch": 0.59, "grad_norm": 1.7203597276680547, "learning_rate": 7.509527429232033e-06, "loss": 0.7027, "step": 5827 }, { "epoch": 0.59, "grad_norm": 1.7403021291942407, "learning_rate": 7.506336912952121e-06, "loss": 0.6961, "step": 5828 }, { "epoch": 0.59, "grad_norm": 1.649231460060806, "learning_rate": 7.503146667320334e-06, "loss": 0.6828, "step": 5829 }, { "epoch": 0.59, "grad_norm": 1.548994166079608, "learning_rate": 7.499956692682935e-06, "loss": 0.684, "step": 5830 }, { "epoch": 0.59, "grad_norm": 1.5992775479851562, "learning_rate": 7.4967669893861364e-06, "loss": 0.6688, "step": 5831 }, { "epoch": 0.59, "grad_norm": 1.5744237446744, "learning_rate": 7.493577557776135e-06, "loss": 0.7714, "step": 5832 }, { "epoch": 0.59, "grad_norm": 1.5185226876303048, "learning_rate": 7.490388398199098e-06, "loss": 0.7452, "step": 5833 }, { "epoch": 0.59, "grad_norm": 1.8450325476321954, "learning_rate": 7.487199511001148e-06, "loss": 0.7273, "step": 5834 }, { "epoch": 0.59, "grad_norm": 1.6248177114263815, "learning_rate": 7.4840108965284016e-06, "loss": 0.7375, "step": 5835 }, { "epoch": 0.59, "grad_norm": 1.7510103642438084, "learning_rate": 7.480822555126923e-06, "loss": 0.8054, "step": 5836 }, { "epoch": 0.59, "grad_norm": 1.4450252781350619, "learning_rate": 7.477634487142759e-06, "loss": 0.7438, "step": 5837 }, { "epoch": 0.59, "grad_norm": 1.5620357709252137, "learning_rate": 7.474446692921931e-06, "loss": 0.8239, "step": 5838 }, { "epoch": 0.59, "grad_norm": 1.7434351076894188, "learning_rate": 7.471259172810417e-06, "loss": 0.7382, "step": 5839 }, { "epoch": 0.59, "grad_norm": 1.7191352077450146, "learning_rate": 7.468071927154173e-06, "loss": 0.7162, "step": 5840 }, { "epoch": 0.59, "grad_norm": 1.6492411606609065, "learning_rate": 7.46488495629913e-06, "loss": 0.7453, "step": 5841 }, { "epoch": 0.59, "grad_norm": 1.4196509139536047, "learning_rate": 7.461698260591175e-06, "loss": 0.668, "step": 5842 }, { "epoch": 0.59, "grad_norm": 1.5833384209273522, "learning_rate": 7.458511840376184e-06, "loss": 0.767, "step": 5843 }, { "epoch": 0.59, "grad_norm": 1.6025918147510856, "learning_rate": 7.455325695999986e-06, "loss": 0.7642, "step": 5844 }, { "epoch": 0.59, "grad_norm": 1.6468470813377398, "learning_rate": 7.452139827808389e-06, "loss": 0.6987, "step": 5845 }, { "epoch": 0.59, "grad_norm": 1.8830976071957954, "learning_rate": 7.44895423614717e-06, "loss": 0.8303, "step": 5846 }, { "epoch": 0.59, "grad_norm": 1.7994734743793335, "learning_rate": 7.445768921362076e-06, "loss": 0.7118, "step": 5847 }, { "epoch": 0.59, "grad_norm": 1.7340069598124714, "learning_rate": 7.442583883798822e-06, "loss": 0.801, "step": 5848 }, { "epoch": 0.59, "grad_norm": 1.7089345395030544, "learning_rate": 7.439399123803091e-06, "loss": 0.8203, "step": 5849 }, { "epoch": 0.6, "grad_norm": 1.4880693573774777, "learning_rate": 7.436214641720545e-06, "loss": 0.6633, "step": 5850 }, { "epoch": 0.6, "grad_norm": 1.559121371519373, "learning_rate": 7.433030437896806e-06, "loss": 0.6573, "step": 5851 }, { "epoch": 0.6, "grad_norm": 1.5023829504747404, "learning_rate": 7.429846512677468e-06, "loss": 0.7174, "step": 5852 }, { "epoch": 0.6, "grad_norm": 1.5674878085450052, "learning_rate": 7.426662866408103e-06, "loss": 0.6898, "step": 5853 }, { "epoch": 0.6, "grad_norm": 1.619686042807621, "learning_rate": 7.423479499434236e-06, "loss": 0.656, "step": 5854 }, { "epoch": 0.6, "grad_norm": 1.634263367846682, "learning_rate": 7.42029641210138e-06, "loss": 0.7641, "step": 5855 }, { "epoch": 0.6, "grad_norm": 1.6343792631487037, "learning_rate": 7.4171136047550065e-06, "loss": 0.6877, "step": 5856 }, { "epoch": 0.6, "grad_norm": 1.42843883095044, "learning_rate": 7.413931077740557e-06, "loss": 0.604, "step": 5857 }, { "epoch": 0.6, "grad_norm": 1.4921527424437566, "learning_rate": 7.410748831403449e-06, "loss": 0.6833, "step": 5858 }, { "epoch": 0.6, "grad_norm": 1.6232802400330084, "learning_rate": 7.4075668660890646e-06, "loss": 0.744, "step": 5859 }, { "epoch": 0.6, "grad_norm": 1.7416011582072546, "learning_rate": 7.404385182142753e-06, "loss": 0.7026, "step": 5860 }, { "epoch": 0.6, "grad_norm": 1.6063603146265764, "learning_rate": 7.4012037799098445e-06, "loss": 0.6517, "step": 5861 }, { "epoch": 0.6, "grad_norm": 1.8099053783680814, "learning_rate": 7.39802265973562e-06, "loss": 0.7787, "step": 5862 }, { "epoch": 0.6, "grad_norm": 1.6263527004572946, "learning_rate": 7.394841821965345e-06, "loss": 0.7453, "step": 5863 }, { "epoch": 0.6, "grad_norm": 1.4791392099068637, "learning_rate": 7.391661266944254e-06, "loss": 0.7141, "step": 5864 }, { "epoch": 0.6, "grad_norm": 1.5802351106753338, "learning_rate": 7.38848099501754e-06, "loss": 0.7015, "step": 5865 }, { "epoch": 0.6, "grad_norm": 1.4871552338594307, "learning_rate": 7.385301006530378e-06, "loss": 0.7324, "step": 5866 }, { "epoch": 0.6, "grad_norm": 1.6085992046388986, "learning_rate": 7.382121301827898e-06, "loss": 0.743, "step": 5867 }, { "epoch": 0.6, "grad_norm": 1.7153826304474897, "learning_rate": 7.378941881255211e-06, "loss": 0.7159, "step": 5868 }, { "epoch": 0.6, "grad_norm": 1.8728107832178462, "learning_rate": 7.3757627451573995e-06, "loss": 0.5738, "step": 5869 }, { "epoch": 0.6, "grad_norm": 1.5159368666910624, "learning_rate": 7.372583893879499e-06, "loss": 0.6209, "step": 5870 }, { "epoch": 0.6, "grad_norm": 1.449899387393714, "learning_rate": 7.369405327766532e-06, "loss": 0.6736, "step": 5871 }, { "epoch": 0.6, "grad_norm": 1.6303463734462023, "learning_rate": 7.366227047163476e-06, "loss": 0.6618, "step": 5872 }, { "epoch": 0.6, "grad_norm": 1.6863818917790458, "learning_rate": 7.363049052415285e-06, "loss": 0.836, "step": 5873 }, { "epoch": 0.6, "grad_norm": 1.820253340943577, "learning_rate": 7.359871343866887e-06, "loss": 0.741, "step": 5874 }, { "epoch": 0.6, "grad_norm": 1.5931785045286797, "learning_rate": 7.356693921863163e-06, "loss": 0.6547, "step": 5875 }, { "epoch": 0.6, "grad_norm": 1.6440388959898296, "learning_rate": 7.3535167867489775e-06, "loss": 0.7592, "step": 5876 }, { "epoch": 0.6, "grad_norm": 1.5939645437591048, "learning_rate": 7.350339938869162e-06, "loss": 0.6622, "step": 5877 }, { "epoch": 0.6, "grad_norm": 1.5597381779921151, "learning_rate": 7.347163378568507e-06, "loss": 0.8154, "step": 5878 }, { "epoch": 0.6, "grad_norm": 1.4589897039416655, "learning_rate": 7.343987106191786e-06, "loss": 0.5976, "step": 5879 }, { "epoch": 0.6, "grad_norm": 1.5984733750682367, "learning_rate": 7.340811122083723e-06, "loss": 0.6821, "step": 5880 }, { "epoch": 0.6, "grad_norm": 1.4764646748866384, "learning_rate": 7.3376354265890295e-06, "loss": 0.7543, "step": 5881 }, { "epoch": 0.6, "grad_norm": 1.5223731010955242, "learning_rate": 7.334460020052379e-06, "loss": 0.6846, "step": 5882 }, { "epoch": 0.6, "grad_norm": 1.4660607880193885, "learning_rate": 7.331284902818405e-06, "loss": 0.6711, "step": 5883 }, { "epoch": 0.6, "grad_norm": 1.446061417819951, "learning_rate": 7.328110075231725e-06, "loss": 0.7085, "step": 5884 }, { "epoch": 0.6, "grad_norm": 1.5524107193494978, "learning_rate": 7.324935537636908e-06, "loss": 0.697, "step": 5885 }, { "epoch": 0.6, "grad_norm": 1.6555445285652144, "learning_rate": 7.321761290378505e-06, "loss": 0.7275, "step": 5886 }, { "epoch": 0.6, "grad_norm": 1.4430592773775197, "learning_rate": 7.318587333801036e-06, "loss": 0.5604, "step": 5887 }, { "epoch": 0.6, "grad_norm": 1.4908733198747264, "learning_rate": 7.315413668248974e-06, "loss": 0.7183, "step": 5888 }, { "epoch": 0.6, "grad_norm": 1.5245766996522263, "learning_rate": 7.312240294066782e-06, "loss": 0.6907, "step": 5889 }, { "epoch": 0.6, "grad_norm": 1.6246441521241848, "learning_rate": 7.309067211598868e-06, "loss": 0.7805, "step": 5890 }, { "epoch": 0.6, "grad_norm": 1.529185478581184, "learning_rate": 7.305894421189628e-06, "loss": 0.7304, "step": 5891 }, { "epoch": 0.6, "grad_norm": 1.4448886607686946, "learning_rate": 7.302721923183421e-06, "loss": 0.6054, "step": 5892 }, { "epoch": 0.6, "grad_norm": 1.5751107153435748, "learning_rate": 7.299549717924565e-06, "loss": 0.6843, "step": 5893 }, { "epoch": 0.6, "grad_norm": 1.718706567927009, "learning_rate": 7.296377805757357e-06, "loss": 0.6116, "step": 5894 }, { "epoch": 0.6, "grad_norm": 1.5177722397037496, "learning_rate": 7.29320618702606e-06, "loss": 0.6166, "step": 5895 }, { "epoch": 0.6, "grad_norm": 1.5444918903217328, "learning_rate": 7.2900348620749016e-06, "loss": 0.722, "step": 5896 }, { "epoch": 0.6, "grad_norm": 1.537921306975767, "learning_rate": 7.286863831248078e-06, "loss": 0.7269, "step": 5897 }, { "epoch": 0.6, "grad_norm": 1.7285406302564792, "learning_rate": 7.28369309488976e-06, "loss": 0.7455, "step": 5898 }, { "epoch": 0.6, "grad_norm": 1.5245798267270128, "learning_rate": 7.280522653344076e-06, "loss": 0.7289, "step": 5899 }, { "epoch": 0.6, "grad_norm": 1.4914728486125917, "learning_rate": 7.27735250695513e-06, "loss": 0.6049, "step": 5900 }, { "epoch": 0.6, "grad_norm": 1.466868236257867, "learning_rate": 7.274182656066992e-06, "loss": 0.6538, "step": 5901 }, { "epoch": 0.6, "grad_norm": 1.7183062480640914, "learning_rate": 7.271013101023702e-06, "loss": 0.7655, "step": 5902 }, { "epoch": 0.6, "grad_norm": 1.8887092239674945, "learning_rate": 7.26784384216926e-06, "loss": 0.8537, "step": 5903 }, { "epoch": 0.6, "grad_norm": 1.7400780102568585, "learning_rate": 7.264674879847644e-06, "loss": 0.7871, "step": 5904 }, { "epoch": 0.6, "grad_norm": 1.7134581127166453, "learning_rate": 7.261506214402796e-06, "loss": 0.7119, "step": 5905 }, { "epoch": 0.6, "grad_norm": 1.5867941278621458, "learning_rate": 7.258337846178621e-06, "loss": 0.6933, "step": 5906 }, { "epoch": 0.6, "grad_norm": 1.5424375073206111, "learning_rate": 7.255169775519e-06, "loss": 0.8146, "step": 5907 }, { "epoch": 0.6, "grad_norm": 1.3814753377772704, "learning_rate": 7.2520020027677715e-06, "loss": 0.6826, "step": 5908 }, { "epoch": 0.6, "grad_norm": 1.6083435545153657, "learning_rate": 7.248834528268756e-06, "loss": 0.7188, "step": 5909 }, { "epoch": 0.6, "grad_norm": 1.6591768526517174, "learning_rate": 7.245667352365727e-06, "loss": 0.7414, "step": 5910 }, { "epoch": 0.6, "grad_norm": 1.4171103717364297, "learning_rate": 7.242500475402433e-06, "loss": 0.6912, "step": 5911 }, { "epoch": 0.6, "grad_norm": 1.5330338232654563, "learning_rate": 7.239333897722591e-06, "loss": 0.7653, "step": 5912 }, { "epoch": 0.6, "grad_norm": 1.5562892061873899, "learning_rate": 7.2361676196698834e-06, "loss": 0.6745, "step": 5913 }, { "epoch": 0.6, "grad_norm": 1.6240138314562722, "learning_rate": 7.233001641587958e-06, "loss": 0.5879, "step": 5914 }, { "epoch": 0.6, "grad_norm": 1.617474529996535, "learning_rate": 7.229835963820435e-06, "loss": 0.8026, "step": 5915 }, { "epoch": 0.6, "grad_norm": 1.6839525778259303, "learning_rate": 7.226670586710896e-06, "loss": 0.7293, "step": 5916 }, { "epoch": 0.6, "grad_norm": 1.5312147309286948, "learning_rate": 7.223505510602893e-06, "loss": 0.6628, "step": 5917 }, { "epoch": 0.6, "grad_norm": 1.7960645410763412, "learning_rate": 7.220340735839953e-06, "loss": 0.6784, "step": 5918 }, { "epoch": 0.6, "grad_norm": 1.700530965726789, "learning_rate": 7.217176262765551e-06, "loss": 0.746, "step": 5919 }, { "epoch": 0.6, "grad_norm": 1.77136543867775, "learning_rate": 7.2140120917231525e-06, "loss": 0.728, "step": 5920 }, { "epoch": 0.6, "grad_norm": 1.6255467952292564, "learning_rate": 7.210848223056169e-06, "loss": 0.6546, "step": 5921 }, { "epoch": 0.6, "grad_norm": 1.5471405337399904, "learning_rate": 7.207684657107994e-06, "loss": 0.7178, "step": 5922 }, { "epoch": 0.6, "grad_norm": 1.4928474424652634, "learning_rate": 7.204521394221986e-06, "loss": 0.7027, "step": 5923 }, { "epoch": 0.6, "grad_norm": 1.5710364609704472, "learning_rate": 7.201358434741461e-06, "loss": 0.7739, "step": 5924 }, { "epoch": 0.6, "grad_norm": 1.5515633009029757, "learning_rate": 7.1981957790097155e-06, "loss": 0.6327, "step": 5925 }, { "epoch": 0.6, "grad_norm": 1.589661024787032, "learning_rate": 7.195033427369998e-06, "loss": 0.8063, "step": 5926 }, { "epoch": 0.6, "grad_norm": 1.7548994446950992, "learning_rate": 7.191871380165538e-06, "loss": 0.7202, "step": 5927 }, { "epoch": 0.6, "grad_norm": 1.5781662873618456, "learning_rate": 7.1887096377395305e-06, "loss": 0.6457, "step": 5928 }, { "epoch": 0.6, "grad_norm": 1.6275642291493222, "learning_rate": 7.185548200435123e-06, "loss": 0.7623, "step": 5929 }, { "epoch": 0.6, "grad_norm": 1.8226216363463947, "learning_rate": 7.182387068595445e-06, "loss": 0.6989, "step": 5930 }, { "epoch": 0.6, "grad_norm": 1.6337489744342286, "learning_rate": 7.179226242563593e-06, "loss": 0.8565, "step": 5931 }, { "epoch": 0.6, "grad_norm": 1.7040405735331745, "learning_rate": 7.176065722682616e-06, "loss": 0.6508, "step": 5932 }, { "epoch": 0.6, "grad_norm": 1.7053161007144007, "learning_rate": 7.172905509295547e-06, "loss": 0.6041, "step": 5933 }, { "epoch": 0.6, "grad_norm": 1.5160666240079106, "learning_rate": 7.1697456027453705e-06, "loss": 0.7378, "step": 5934 }, { "epoch": 0.6, "grad_norm": 1.5134763013492174, "learning_rate": 7.166586003375049e-06, "loss": 0.6613, "step": 5935 }, { "epoch": 0.6, "grad_norm": 1.5604089079300418, "learning_rate": 7.16342671152751e-06, "loss": 0.7043, "step": 5936 }, { "epoch": 0.6, "grad_norm": 1.705865897955065, "learning_rate": 7.16026772754564e-06, "loss": 0.7221, "step": 5937 }, { "epoch": 0.6, "grad_norm": 1.5183849133745817, "learning_rate": 7.157109051772304e-06, "loss": 0.6544, "step": 5938 }, { "epoch": 0.6, "grad_norm": 1.6388424865238325, "learning_rate": 7.153950684550317e-06, "loss": 0.6588, "step": 5939 }, { "epoch": 0.6, "grad_norm": 1.735175336027495, "learning_rate": 7.150792626222476e-06, "loss": 0.8374, "step": 5940 }, { "epoch": 0.6, "grad_norm": 1.4780545701966636, "learning_rate": 7.147634877131544e-06, "loss": 0.7295, "step": 5941 }, { "epoch": 0.6, "grad_norm": 1.635128129316677, "learning_rate": 7.144477437620235e-06, "loss": 0.7532, "step": 5942 }, { "epoch": 0.6, "grad_norm": 1.531452447180397, "learning_rate": 7.14132030803125e-06, "loss": 0.7193, "step": 5943 }, { "epoch": 0.6, "grad_norm": 1.6133299287122889, "learning_rate": 7.138163488707235e-06, "loss": 0.7288, "step": 5944 }, { "epoch": 0.6, "grad_norm": 1.615628410179108, "learning_rate": 7.1350069799908205e-06, "loss": 0.6419, "step": 5945 }, { "epoch": 0.6, "grad_norm": 1.4970890926910432, "learning_rate": 7.131850782224598e-06, "loss": 0.7031, "step": 5946 }, { "epoch": 0.6, "grad_norm": 1.479217347173918, "learning_rate": 7.128694895751118e-06, "loss": 0.709, "step": 5947 }, { "epoch": 0.61, "grad_norm": 1.4703328917956562, "learning_rate": 7.1255393209129034e-06, "loss": 0.6481, "step": 5948 }, { "epoch": 0.61, "grad_norm": 1.5584138647975212, "learning_rate": 7.1223840580524485e-06, "loss": 0.6695, "step": 5949 }, { "epoch": 0.61, "grad_norm": 1.5948021840095914, "learning_rate": 7.1192291075121985e-06, "loss": 0.7755, "step": 5950 }, { "epoch": 0.61, "grad_norm": 1.589248305768824, "learning_rate": 7.116074469634582e-06, "loss": 0.8458, "step": 5951 }, { "epoch": 0.61, "grad_norm": 1.685088414455333, "learning_rate": 7.112920144761981e-06, "loss": 0.7741, "step": 5952 }, { "epoch": 0.61, "grad_norm": 1.6519851958314915, "learning_rate": 7.109766133236747e-06, "loss": 0.7756, "step": 5953 }, { "epoch": 0.61, "grad_norm": 1.603902906380497, "learning_rate": 7.106612435401204e-06, "loss": 0.7936, "step": 5954 }, { "epoch": 0.61, "grad_norm": 1.5540901955171227, "learning_rate": 7.103459051597634e-06, "loss": 0.6245, "step": 5955 }, { "epoch": 0.61, "grad_norm": 1.4938465866104673, "learning_rate": 7.100305982168287e-06, "loss": 0.7651, "step": 5956 }, { "epoch": 0.61, "grad_norm": 1.4482093578275226, "learning_rate": 7.097153227455379e-06, "loss": 0.7077, "step": 5957 }, { "epoch": 0.61, "grad_norm": 1.458455597539955, "learning_rate": 7.09400078780109e-06, "loss": 0.6886, "step": 5958 }, { "epoch": 0.61, "grad_norm": 1.6426093964650683, "learning_rate": 7.090848663547574e-06, "loss": 0.7427, "step": 5959 }, { "epoch": 0.61, "grad_norm": 1.5489288651190687, "learning_rate": 7.087696855036941e-06, "loss": 0.7297, "step": 5960 }, { "epoch": 0.61, "grad_norm": 1.4600247920573843, "learning_rate": 7.084545362611271e-06, "loss": 0.6347, "step": 5961 }, { "epoch": 0.61, "grad_norm": 1.45701917668946, "learning_rate": 7.081394186612607e-06, "loss": 0.7495, "step": 5962 }, { "epoch": 0.61, "grad_norm": 1.7218090265097128, "learning_rate": 7.078243327382965e-06, "loss": 0.7223, "step": 5963 }, { "epoch": 0.61, "grad_norm": 1.667199490568043, "learning_rate": 7.075092785264318e-06, "loss": 0.8159, "step": 5964 }, { "epoch": 0.61, "grad_norm": 1.6435345957610008, "learning_rate": 7.071942560598607e-06, "loss": 0.6628, "step": 5965 }, { "epoch": 0.61, "grad_norm": 1.512973675003832, "learning_rate": 7.068792653727745e-06, "loss": 0.6767, "step": 5966 }, { "epoch": 0.61, "grad_norm": 1.715175169843844, "learning_rate": 7.065643064993598e-06, "loss": 0.7698, "step": 5967 }, { "epoch": 0.61, "grad_norm": 1.695624356046342, "learning_rate": 7.062493794738008e-06, "loss": 0.7545, "step": 5968 }, { "epoch": 0.61, "grad_norm": 1.6865151048997264, "learning_rate": 7.059344843302783e-06, "loss": 0.7827, "step": 5969 }, { "epoch": 0.61, "grad_norm": 1.7368175081011257, "learning_rate": 7.0561962110296845e-06, "loss": 0.7576, "step": 5970 }, { "epoch": 0.61, "grad_norm": 1.615677836368483, "learning_rate": 7.0530478982604524e-06, "loss": 0.657, "step": 5971 }, { "epoch": 0.61, "grad_norm": 1.693141974058053, "learning_rate": 7.049899905336788e-06, "loss": 0.7445, "step": 5972 }, { "epoch": 0.61, "grad_norm": 1.5262126051228404, "learning_rate": 7.046752232600351e-06, "loss": 0.6724, "step": 5973 }, { "epoch": 0.61, "grad_norm": 1.7065540618543513, "learning_rate": 7.043604880392781e-06, "loss": 0.7284, "step": 5974 }, { "epoch": 0.61, "grad_norm": 1.5338232721417555, "learning_rate": 7.0404578490556616e-06, "loss": 0.6668, "step": 5975 }, { "epoch": 0.61, "grad_norm": 1.5935989717722543, "learning_rate": 7.037311138930563e-06, "loss": 0.6522, "step": 5976 }, { "epoch": 0.61, "grad_norm": 1.6575930100117833, "learning_rate": 7.034164750359011e-06, "loss": 0.627, "step": 5977 }, { "epoch": 0.61, "grad_norm": 1.7356854343165955, "learning_rate": 7.031018683682494e-06, "loss": 0.7598, "step": 5978 }, { "epoch": 0.61, "grad_norm": 1.6779200410679829, "learning_rate": 7.02787293924247e-06, "loss": 0.7907, "step": 5979 }, { "epoch": 0.61, "grad_norm": 1.501651087009643, "learning_rate": 7.024727517380356e-06, "loss": 0.6912, "step": 5980 }, { "epoch": 0.61, "grad_norm": 1.5649323303119, "learning_rate": 7.021582418437543e-06, "loss": 0.6899, "step": 5981 }, { "epoch": 0.61, "grad_norm": 1.6378941911076537, "learning_rate": 7.018437642755384e-06, "loss": 0.6915, "step": 5982 }, { "epoch": 0.61, "grad_norm": 1.5636475281530569, "learning_rate": 7.015293190675191e-06, "loss": 0.6637, "step": 5983 }, { "epoch": 0.61, "grad_norm": 1.561748989419727, "learning_rate": 7.012149062538249e-06, "loss": 0.7508, "step": 5984 }, { "epoch": 0.61, "grad_norm": 1.75746553351738, "learning_rate": 7.009005258685797e-06, "loss": 0.6662, "step": 5985 }, { "epoch": 0.61, "grad_norm": 1.7962183540031025, "learning_rate": 7.005861779459051e-06, "loss": 0.674, "step": 5986 }, { "epoch": 0.61, "grad_norm": 1.5312431132527884, "learning_rate": 7.00271862519919e-06, "loss": 0.6533, "step": 5987 }, { "epoch": 0.61, "grad_norm": 1.8068991522290494, "learning_rate": 6.999575796247344e-06, "loss": 0.7689, "step": 5988 }, { "epoch": 0.61, "grad_norm": 1.4364228321091383, "learning_rate": 6.996433292944627e-06, "loss": 0.759, "step": 5989 }, { "epoch": 0.61, "grad_norm": 1.5059840142036114, "learning_rate": 6.993291115632108e-06, "loss": 0.7712, "step": 5990 }, { "epoch": 0.61, "grad_norm": 1.879677897781722, "learning_rate": 6.990149264650814e-06, "loss": 0.735, "step": 5991 }, { "epoch": 0.61, "grad_norm": 1.5384761249987013, "learning_rate": 6.987007740341754e-06, "loss": 0.6578, "step": 5992 }, { "epoch": 0.61, "grad_norm": 1.442254383864038, "learning_rate": 6.983866543045881e-06, "loss": 0.6452, "step": 5993 }, { "epoch": 0.61, "grad_norm": 1.5261177107975168, "learning_rate": 6.980725673104128e-06, "loss": 0.708, "step": 5994 }, { "epoch": 0.61, "grad_norm": 1.5038665795578166, "learning_rate": 6.97758513085739e-06, "loss": 0.7081, "step": 5995 }, { "epoch": 0.61, "grad_norm": 1.846541150792894, "learning_rate": 6.974444916646517e-06, "loss": 0.7867, "step": 5996 }, { "epoch": 0.61, "grad_norm": 1.690342301741687, "learning_rate": 6.971305030812339e-06, "loss": 0.7515, "step": 5997 }, { "epoch": 0.61, "grad_norm": 1.5635055628794958, "learning_rate": 6.968165473695632e-06, "loss": 0.6293, "step": 5998 }, { "epoch": 0.61, "grad_norm": 1.4455948190504697, "learning_rate": 6.96502624563715e-06, "loss": 0.6306, "step": 5999 }, { "epoch": 0.61, "grad_norm": 1.585497684641971, "learning_rate": 6.961887346977611e-06, "loss": 0.6823, "step": 6000 }, { "epoch": 0.61, "grad_norm": 1.4864011580113403, "learning_rate": 6.958748778057687e-06, "loss": 0.7411, "step": 6001 }, { "epoch": 0.61, "grad_norm": 1.7449596125569689, "learning_rate": 6.955610539218023e-06, "loss": 0.6064, "step": 6002 }, { "epoch": 0.61, "grad_norm": 1.6330422724486302, "learning_rate": 6.952472630799227e-06, "loss": 0.7924, "step": 6003 }, { "epoch": 0.61, "grad_norm": 1.561676435086924, "learning_rate": 6.949335053141868e-06, "loss": 0.6949, "step": 6004 }, { "epoch": 0.61, "grad_norm": 1.5681058178598866, "learning_rate": 6.9461978065864835e-06, "loss": 0.8036, "step": 6005 }, { "epoch": 0.61, "grad_norm": 1.5518107210158107, "learning_rate": 6.943060891473572e-06, "loss": 0.7636, "step": 6006 }, { "epoch": 0.61, "grad_norm": 1.7040411881612014, "learning_rate": 6.939924308143591e-06, "loss": 0.7985, "step": 6007 }, { "epoch": 0.61, "grad_norm": 1.673280397750217, "learning_rate": 6.936788056936976e-06, "loss": 0.7416, "step": 6008 }, { "epoch": 0.61, "grad_norm": 1.674118049602933, "learning_rate": 6.933652138194114e-06, "loss": 0.6395, "step": 6009 }, { "epoch": 0.61, "grad_norm": 1.5361770186226622, "learning_rate": 6.93051655225536e-06, "loss": 0.6385, "step": 6010 }, { "epoch": 0.61, "grad_norm": 1.5032438867630116, "learning_rate": 6.9273812994610315e-06, "loss": 0.6813, "step": 6011 }, { "epoch": 0.61, "grad_norm": 1.7101399111295492, "learning_rate": 6.924246380151411e-06, "loss": 0.8143, "step": 6012 }, { "epoch": 0.61, "grad_norm": 1.5441825955179973, "learning_rate": 6.92111179466675e-06, "loss": 0.6683, "step": 6013 }, { "epoch": 0.61, "grad_norm": 1.7242217401584956, "learning_rate": 6.917977543347254e-06, "loss": 0.7674, "step": 6014 }, { "epoch": 0.61, "grad_norm": 1.7117024400753804, "learning_rate": 6.914843626533099e-06, "loss": 0.7421, "step": 6015 }, { "epoch": 0.61, "grad_norm": 1.644187079438976, "learning_rate": 6.911710044564419e-06, "loss": 0.769, "step": 6016 }, { "epoch": 0.61, "grad_norm": 1.4185821366042153, "learning_rate": 6.908576797781321e-06, "loss": 0.7162, "step": 6017 }, { "epoch": 0.61, "grad_norm": 1.5664785236770578, "learning_rate": 6.905443886523868e-06, "loss": 0.6154, "step": 6018 }, { "epoch": 0.61, "grad_norm": 1.472890070195125, "learning_rate": 6.902311311132084e-06, "loss": 0.6228, "step": 6019 }, { "epoch": 0.61, "grad_norm": 1.6096312989951134, "learning_rate": 6.89917907194597e-06, "loss": 0.6971, "step": 6020 }, { "epoch": 0.61, "grad_norm": 1.7037258038186427, "learning_rate": 6.896047169305471e-06, "loss": 0.661, "step": 6021 }, { "epoch": 0.61, "grad_norm": 1.5649396853725739, "learning_rate": 6.892915603550512e-06, "loss": 0.634, "step": 6022 }, { "epoch": 0.61, "grad_norm": 1.6722121537895382, "learning_rate": 6.8897843750209796e-06, "loss": 0.7228, "step": 6023 }, { "epoch": 0.61, "grad_norm": 1.5956272085005472, "learning_rate": 6.8866534840567104e-06, "loss": 0.7118, "step": 6024 }, { "epoch": 0.61, "grad_norm": 1.599425073147109, "learning_rate": 6.883522930997517e-06, "loss": 0.7629, "step": 6025 }, { "epoch": 0.61, "grad_norm": 1.6284369870991078, "learning_rate": 6.88039271618318e-06, "loss": 0.7095, "step": 6026 }, { "epoch": 0.61, "grad_norm": 1.4076269843619167, "learning_rate": 6.877262839953422e-06, "loss": 0.5916, "step": 6027 }, { "epoch": 0.61, "grad_norm": 1.5691541798606565, "learning_rate": 6.874133302647953e-06, "loss": 0.6988, "step": 6028 }, { "epoch": 0.61, "grad_norm": 1.5347110368511394, "learning_rate": 6.871004104606427e-06, "loss": 0.6454, "step": 6029 }, { "epoch": 0.61, "grad_norm": 1.4321167031271145, "learning_rate": 6.867875246168474e-06, "loss": 0.6535, "step": 6030 }, { "epoch": 0.61, "grad_norm": 1.6411822049037168, "learning_rate": 6.864746727673685e-06, "loss": 0.6945, "step": 6031 }, { "epoch": 0.61, "grad_norm": 1.633086470213813, "learning_rate": 6.861618549461606e-06, "loss": 0.624, "step": 6032 }, { "epoch": 0.61, "grad_norm": 1.6858246877293717, "learning_rate": 6.858490711871759e-06, "loss": 0.686, "step": 6033 }, { "epoch": 0.61, "grad_norm": 1.5239869152303707, "learning_rate": 6.855363215243612e-06, "loss": 0.8371, "step": 6034 }, { "epoch": 0.61, "grad_norm": 1.445563473951057, "learning_rate": 6.852236059916612e-06, "loss": 0.6643, "step": 6035 }, { "epoch": 0.61, "grad_norm": 1.6714240174258932, "learning_rate": 6.849109246230167e-06, "loss": 0.7424, "step": 6036 }, { "epoch": 0.61, "grad_norm": 1.683585445655326, "learning_rate": 6.845982774523634e-06, "loss": 0.6813, "step": 6037 }, { "epoch": 0.61, "grad_norm": 1.6850963323083905, "learning_rate": 6.842856645136351e-06, "loss": 0.744, "step": 6038 }, { "epoch": 0.61, "grad_norm": 1.6178748682318473, "learning_rate": 6.839730858407604e-06, "loss": 0.6569, "step": 6039 }, { "epoch": 0.61, "grad_norm": 1.5032196567724914, "learning_rate": 6.836605414676652e-06, "loss": 0.6416, "step": 6040 }, { "epoch": 0.61, "grad_norm": 1.5419909852187603, "learning_rate": 6.8334803142827144e-06, "loss": 0.7778, "step": 6041 }, { "epoch": 0.61, "grad_norm": 1.6887785419890449, "learning_rate": 6.830355557564966e-06, "loss": 0.8648, "step": 6042 }, { "epoch": 0.61, "grad_norm": 1.4451232947180794, "learning_rate": 6.827231144862555e-06, "loss": 0.7358, "step": 6043 }, { "epoch": 0.61, "grad_norm": 1.660669903928941, "learning_rate": 6.82410707651459e-06, "loss": 0.7228, "step": 6044 }, { "epoch": 0.61, "grad_norm": 1.6440758805537505, "learning_rate": 6.820983352860133e-06, "loss": 0.7577, "step": 6045 }, { "epoch": 0.61, "grad_norm": 1.6257757580594896, "learning_rate": 6.817859974238223e-06, "loss": 0.6243, "step": 6046 }, { "epoch": 0.62, "grad_norm": 1.6584220789207265, "learning_rate": 6.814736940987845e-06, "loss": 0.6615, "step": 6047 }, { "epoch": 0.62, "grad_norm": 1.5600593702346273, "learning_rate": 6.81161425344796e-06, "loss": 0.6646, "step": 6048 }, { "epoch": 0.62, "grad_norm": 1.7382755089642217, "learning_rate": 6.808491911957492e-06, "loss": 0.8191, "step": 6049 }, { "epoch": 0.62, "grad_norm": 1.6370998175010671, "learning_rate": 6.805369916855313e-06, "loss": 0.8585, "step": 6050 }, { "epoch": 0.62, "grad_norm": 1.5579772194685, "learning_rate": 6.8022482684802745e-06, "loss": 0.6841, "step": 6051 }, { "epoch": 0.62, "grad_norm": 1.5781599509351145, "learning_rate": 6.799126967171177e-06, "loss": 0.7058, "step": 6052 }, { "epoch": 0.62, "grad_norm": 1.6306166871228853, "learning_rate": 6.7960060132667895e-06, "loss": 0.7381, "step": 6053 }, { "epoch": 0.62, "grad_norm": 1.8356173142033885, "learning_rate": 6.792885407105848e-06, "loss": 0.7518, "step": 6054 }, { "epoch": 0.62, "grad_norm": 1.6522651945471996, "learning_rate": 6.789765149027039e-06, "loss": 0.6215, "step": 6055 }, { "epoch": 0.62, "grad_norm": 1.53831203818576, "learning_rate": 6.786645239369022e-06, "loss": 0.7443, "step": 6056 }, { "epoch": 0.62, "grad_norm": 1.9292567739728215, "learning_rate": 6.783525678470413e-06, "loss": 0.7387, "step": 6057 }, { "epoch": 0.62, "grad_norm": 1.3837803608137365, "learning_rate": 6.78040646666979e-06, "loss": 0.7023, "step": 6058 }, { "epoch": 0.62, "grad_norm": 1.6306535740518562, "learning_rate": 6.777287604305698e-06, "loss": 0.7212, "step": 6059 }, { "epoch": 0.62, "grad_norm": 1.6551843866151499, "learning_rate": 6.774169091716638e-06, "loss": 0.796, "step": 6060 }, { "epoch": 0.62, "grad_norm": 1.7677581368413209, "learning_rate": 6.771050929241076e-06, "loss": 0.8101, "step": 6061 }, { "epoch": 0.62, "grad_norm": 1.5152515724530302, "learning_rate": 6.767933117217444e-06, "loss": 0.7826, "step": 6062 }, { "epoch": 0.62, "grad_norm": 1.7830840121583593, "learning_rate": 6.764815655984125e-06, "loss": 0.7526, "step": 6063 }, { "epoch": 0.62, "grad_norm": 1.7562929056380636, "learning_rate": 6.761698545879476e-06, "loss": 0.7104, "step": 6064 }, { "epoch": 0.62, "grad_norm": 1.5567240302854402, "learning_rate": 6.758581787241807e-06, "loss": 0.5608, "step": 6065 }, { "epoch": 0.62, "grad_norm": 1.723317681957902, "learning_rate": 6.755465380409394e-06, "loss": 0.688, "step": 6066 }, { "epoch": 0.62, "grad_norm": 1.5952815129509346, "learning_rate": 6.7523493257204776e-06, "loss": 0.7464, "step": 6067 }, { "epoch": 0.62, "grad_norm": 1.4090609686958033, "learning_rate": 6.749233623513254e-06, "loss": 0.7506, "step": 6068 }, { "epoch": 0.62, "grad_norm": 1.5473101411992871, "learning_rate": 6.746118274125883e-06, "loss": 0.6807, "step": 6069 }, { "epoch": 0.62, "grad_norm": 1.6216043388355958, "learning_rate": 6.743003277896487e-06, "loss": 0.6917, "step": 6070 }, { "epoch": 0.62, "grad_norm": 1.916644081908343, "learning_rate": 6.739888635163155e-06, "loss": 0.7651, "step": 6071 }, { "epoch": 0.62, "grad_norm": 1.5161974751902205, "learning_rate": 6.7367743462639265e-06, "loss": 0.6257, "step": 6072 }, { "epoch": 0.62, "grad_norm": 1.300149205632678, "learning_rate": 6.733660411536811e-06, "loss": 0.6334, "step": 6073 }, { "epoch": 0.62, "grad_norm": 1.553990093872253, "learning_rate": 6.7305468313197815e-06, "loss": 0.7501, "step": 6074 }, { "epoch": 0.62, "grad_norm": 1.5132042880834147, "learning_rate": 6.7274336059507614e-06, "loss": 0.7175, "step": 6075 }, { "epoch": 0.62, "grad_norm": 1.5129442394667727, "learning_rate": 6.724320735767646e-06, "loss": 0.708, "step": 6076 }, { "epoch": 0.62, "grad_norm": 1.51569068130344, "learning_rate": 6.721208221108293e-06, "loss": 0.7555, "step": 6077 }, { "epoch": 0.62, "grad_norm": 1.6666093513562708, "learning_rate": 6.71809606231051e-06, "loss": 0.7786, "step": 6078 }, { "epoch": 0.62, "grad_norm": 1.3740089472688364, "learning_rate": 6.714984259712074e-06, "loss": 0.6608, "step": 6079 }, { "epoch": 0.62, "grad_norm": 1.6533318917815236, "learning_rate": 6.71187281365073e-06, "loss": 0.7125, "step": 6080 }, { "epoch": 0.62, "grad_norm": 1.5660729332736263, "learning_rate": 6.708761724464168e-06, "loss": 0.6867, "step": 6081 }, { "epoch": 0.62, "grad_norm": 1.5812173362275896, "learning_rate": 6.705650992490054e-06, "loss": 0.6205, "step": 6082 }, { "epoch": 0.62, "grad_norm": 1.4441682341203965, "learning_rate": 6.7025406180660046e-06, "loss": 0.6858, "step": 6083 }, { "epoch": 0.62, "grad_norm": 1.4743755511325085, "learning_rate": 6.699430601529604e-06, "loss": 0.6628, "step": 6084 }, { "epoch": 0.62, "grad_norm": 1.6450244119277522, "learning_rate": 6.696320943218401e-06, "loss": 0.6891, "step": 6085 }, { "epoch": 0.62, "grad_norm": 1.5033342990472907, "learning_rate": 6.69321164346989e-06, "loss": 0.7987, "step": 6086 }, { "epoch": 0.62, "grad_norm": 1.6742692228987246, "learning_rate": 6.690102702621548e-06, "loss": 0.7311, "step": 6087 }, { "epoch": 0.62, "grad_norm": 1.6586419641837007, "learning_rate": 6.686994121010794e-06, "loss": 0.6917, "step": 6088 }, { "epoch": 0.62, "grad_norm": 1.5909589091117324, "learning_rate": 6.683885898975016e-06, "loss": 0.6332, "step": 6089 }, { "epoch": 0.62, "grad_norm": 1.450643132802545, "learning_rate": 6.680778036851572e-06, "loss": 0.6433, "step": 6090 }, { "epoch": 0.62, "grad_norm": 1.551984481934261, "learning_rate": 6.677670534977759e-06, "loss": 0.7499, "step": 6091 }, { "epoch": 0.62, "grad_norm": 1.5859172831013613, "learning_rate": 6.674563393690858e-06, "loss": 0.7056, "step": 6092 }, { "epoch": 0.62, "grad_norm": 1.6366553804795465, "learning_rate": 6.6714566133280944e-06, "loss": 0.6751, "step": 6093 }, { "epoch": 0.62, "grad_norm": 1.6655711827187523, "learning_rate": 6.668350194226662e-06, "loss": 0.7645, "step": 6094 }, { "epoch": 0.62, "grad_norm": 1.5485819950508424, "learning_rate": 6.665244136723719e-06, "loss": 0.727, "step": 6095 }, { "epoch": 0.62, "grad_norm": 1.4127253810983589, "learning_rate": 6.662138441156371e-06, "loss": 0.6536, "step": 6096 }, { "epoch": 0.62, "grad_norm": 1.6329594626580382, "learning_rate": 6.659033107861697e-06, "loss": 0.7044, "step": 6097 }, { "epoch": 0.62, "grad_norm": 1.7760481582902228, "learning_rate": 6.655928137176735e-06, "loss": 0.7399, "step": 6098 }, { "epoch": 0.62, "grad_norm": 1.6032009667240732, "learning_rate": 6.652823529438476e-06, "loss": 0.6502, "step": 6099 }, { "epoch": 0.62, "grad_norm": 1.4651867018769336, "learning_rate": 6.649719284983882e-06, "loss": 0.7245, "step": 6100 }, { "epoch": 0.62, "grad_norm": 1.5920615165314944, "learning_rate": 6.646615404149863e-06, "loss": 0.7663, "step": 6101 }, { "epoch": 0.62, "grad_norm": 1.6675682691281073, "learning_rate": 6.6435118872733016e-06, "loss": 0.7442, "step": 6102 }, { "epoch": 0.62, "grad_norm": 1.6036547915026613, "learning_rate": 6.640408734691039e-06, "loss": 0.7039, "step": 6103 }, { "epoch": 0.62, "grad_norm": 1.702638743603976, "learning_rate": 6.637305946739865e-06, "loss": 0.7123, "step": 6104 }, { "epoch": 0.62, "grad_norm": 1.6000778248442582, "learning_rate": 6.63420352375655e-06, "loss": 0.6588, "step": 6105 }, { "epoch": 0.62, "grad_norm": 1.5714725149545097, "learning_rate": 6.631101466077801e-06, "loss": 0.677, "step": 6106 }, { "epoch": 0.62, "grad_norm": 1.6820828931724316, "learning_rate": 6.627999774040305e-06, "loss": 0.7083, "step": 6107 }, { "epoch": 0.62, "grad_norm": 1.4645956316659832, "learning_rate": 6.624898447980706e-06, "loss": 0.6704, "step": 6108 }, { "epoch": 0.62, "grad_norm": 1.6712984172262535, "learning_rate": 6.6217974882355955e-06, "loss": 0.6464, "step": 6109 }, { "epoch": 0.62, "grad_norm": 1.570087609165966, "learning_rate": 6.618696895141541e-06, "loss": 0.7499, "step": 6110 }, { "epoch": 0.62, "grad_norm": 1.7519649038605543, "learning_rate": 6.615596669035059e-06, "loss": 0.6598, "step": 6111 }, { "epoch": 0.62, "grad_norm": 1.5637128365097759, "learning_rate": 6.6124968102526325e-06, "loss": 0.8167, "step": 6112 }, { "epoch": 0.62, "grad_norm": 1.8123296526525825, "learning_rate": 6.6093973191307055e-06, "loss": 0.6705, "step": 6113 }, { "epoch": 0.62, "grad_norm": 1.5613541223195324, "learning_rate": 6.606298196005673e-06, "loss": 0.7487, "step": 6114 }, { "epoch": 0.62, "grad_norm": 1.7326002131841742, "learning_rate": 6.603199441213901e-06, "loss": 0.7714, "step": 6115 }, { "epoch": 0.62, "grad_norm": 1.7424750174350074, "learning_rate": 6.60010105509171e-06, "loss": 0.7891, "step": 6116 }, { "epoch": 0.62, "grad_norm": 1.5365332328220989, "learning_rate": 6.597003037975379e-06, "loss": 0.7173, "step": 6117 }, { "epoch": 0.62, "grad_norm": 1.5147365728214297, "learning_rate": 6.593905390201154e-06, "loss": 0.6687, "step": 6118 }, { "epoch": 0.62, "grad_norm": 1.6877996988645476, "learning_rate": 6.590808112105232e-06, "loss": 0.8397, "step": 6119 }, { "epoch": 0.62, "grad_norm": 1.7092597152986675, "learning_rate": 6.5877112040237746e-06, "loss": 0.7818, "step": 6120 }, { "epoch": 0.62, "grad_norm": 1.613334053324412, "learning_rate": 6.584614666292906e-06, "loss": 0.717, "step": 6121 }, { "epoch": 0.62, "grad_norm": 1.5180147856705175, "learning_rate": 6.581518499248705e-06, "loss": 0.7353, "step": 6122 }, { "epoch": 0.62, "grad_norm": 1.5256391495854664, "learning_rate": 6.578422703227211e-06, "loss": 0.7384, "step": 6123 }, { "epoch": 0.62, "grad_norm": 1.6215104435427263, "learning_rate": 6.5753272785644225e-06, "loss": 0.5765, "step": 6124 }, { "epoch": 0.62, "grad_norm": 1.4798270566748801, "learning_rate": 6.572232225596306e-06, "loss": 0.636, "step": 6125 }, { "epoch": 0.62, "grad_norm": 1.6553762148182485, "learning_rate": 6.569137544658775e-06, "loss": 0.8014, "step": 6126 }, { "epoch": 0.62, "grad_norm": 1.621778896682407, "learning_rate": 6.56604323608771e-06, "loss": 0.6951, "step": 6127 }, { "epoch": 0.62, "grad_norm": 1.5478542582256125, "learning_rate": 6.562949300218955e-06, "loss": 0.6891, "step": 6128 }, { "epoch": 0.62, "grad_norm": 1.751566216790655, "learning_rate": 6.559855737388299e-06, "loss": 0.7081, "step": 6129 }, { "epoch": 0.62, "grad_norm": 1.6892966354547168, "learning_rate": 6.556762547931505e-06, "loss": 0.7547, "step": 6130 }, { "epoch": 0.62, "grad_norm": 1.4676227029041156, "learning_rate": 6.553669732184296e-06, "loss": 0.6531, "step": 6131 }, { "epoch": 0.62, "grad_norm": 1.5368957217679278, "learning_rate": 6.550577290482336e-06, "loss": 0.6174, "step": 6132 }, { "epoch": 0.62, "grad_norm": 1.8966492736331908, "learning_rate": 6.547485223161274e-06, "loss": 0.8064, "step": 6133 }, { "epoch": 0.62, "grad_norm": 1.6919571246987175, "learning_rate": 6.544393530556697e-06, "loss": 0.7939, "step": 6134 }, { "epoch": 0.62, "grad_norm": 1.5433280990541385, "learning_rate": 6.54130221300416e-06, "loss": 0.6668, "step": 6135 }, { "epoch": 0.62, "grad_norm": 1.4384780138474969, "learning_rate": 6.538211270839185e-06, "loss": 0.6493, "step": 6136 }, { "epoch": 0.62, "grad_norm": 1.483118398389075, "learning_rate": 6.535120704397236e-06, "loss": 0.6313, "step": 6137 }, { "epoch": 0.62, "grad_norm": 1.5126790622807804, "learning_rate": 6.53203051401375e-06, "loss": 0.6973, "step": 6138 }, { "epoch": 0.62, "grad_norm": 1.7201848995408757, "learning_rate": 6.528940700024122e-06, "loss": 0.7339, "step": 6139 }, { "epoch": 0.62, "grad_norm": 1.7302480571234962, "learning_rate": 6.525851262763696e-06, "loss": 0.666, "step": 6140 }, { "epoch": 0.62, "grad_norm": 1.7036931968958287, "learning_rate": 6.522762202567792e-06, "loss": 0.7611, "step": 6141 }, { "epoch": 0.62, "grad_norm": 1.609127038666171, "learning_rate": 6.519673519771666e-06, "loss": 0.6689, "step": 6142 }, { "epoch": 0.62, "grad_norm": 1.4781757504167778, "learning_rate": 6.516585214710554e-06, "loss": 0.6643, "step": 6143 }, { "epoch": 0.62, "grad_norm": 1.5196067810040579, "learning_rate": 6.513497287719648e-06, "loss": 0.5777, "step": 6144 }, { "epoch": 0.63, "grad_norm": 1.5785347540072658, "learning_rate": 6.510409739134082e-06, "loss": 0.7492, "step": 6145 }, { "epoch": 0.63, "grad_norm": 1.666016850977178, "learning_rate": 6.507322569288974e-06, "loss": 0.7188, "step": 6146 }, { "epoch": 0.63, "grad_norm": 1.6450134972059112, "learning_rate": 6.504235778519376e-06, "loss": 0.6529, "step": 6147 }, { "epoch": 0.63, "grad_norm": 1.502261606984428, "learning_rate": 6.501149367160319e-06, "loss": 0.6806, "step": 6148 }, { "epoch": 0.63, "grad_norm": 1.5076422897426254, "learning_rate": 6.4980633355467845e-06, "loss": 0.7812, "step": 6149 }, { "epoch": 0.63, "grad_norm": 1.4866092473823802, "learning_rate": 6.494977684013708e-06, "loss": 0.5794, "step": 6150 }, { "epoch": 0.63, "grad_norm": 1.5008093258540893, "learning_rate": 6.4918924128959945e-06, "loss": 0.6753, "step": 6151 }, { "epoch": 0.63, "grad_norm": 1.7265260554302626, "learning_rate": 6.488807522528496e-06, "loss": 0.7154, "step": 6152 }, { "epoch": 0.63, "grad_norm": 1.5887570645077855, "learning_rate": 6.48572301324603e-06, "loss": 0.7191, "step": 6153 }, { "epoch": 0.63, "grad_norm": 1.5810627427814312, "learning_rate": 6.482638885383379e-06, "loss": 0.6966, "step": 6154 }, { "epoch": 0.63, "grad_norm": 1.6789536547200257, "learning_rate": 6.479555139275267e-06, "loss": 0.7727, "step": 6155 }, { "epoch": 0.63, "grad_norm": 1.6242882408342163, "learning_rate": 6.476471775256391e-06, "loss": 0.6108, "step": 6156 }, { "epoch": 0.63, "grad_norm": 1.6112020057308443, "learning_rate": 6.473388793661406e-06, "loss": 0.6523, "step": 6157 }, { "epoch": 0.63, "grad_norm": 2.0051813216712024, "learning_rate": 6.470306194824914e-06, "loss": 0.7441, "step": 6158 }, { "epoch": 0.63, "grad_norm": 1.6806981253531708, "learning_rate": 6.467223979081491e-06, "loss": 0.7096, "step": 6159 }, { "epoch": 0.63, "grad_norm": 1.702782121668881, "learning_rate": 6.464142146765653e-06, "loss": 0.7192, "step": 6160 }, { "epoch": 0.63, "grad_norm": 1.6169778774789403, "learning_rate": 6.4610606982118914e-06, "loss": 0.7052, "step": 6161 }, { "epoch": 0.63, "grad_norm": 1.6807203061396876, "learning_rate": 6.457979633754652e-06, "loss": 0.7438, "step": 6162 }, { "epoch": 0.63, "grad_norm": 1.6349504107469093, "learning_rate": 6.454898953728328e-06, "loss": 0.7091, "step": 6163 }, { "epoch": 0.63, "grad_norm": 1.562430195567715, "learning_rate": 6.4518186584672856e-06, "loss": 0.7275, "step": 6164 }, { "epoch": 0.63, "grad_norm": 1.6464753506977141, "learning_rate": 6.448738748305841e-06, "loss": 0.6985, "step": 6165 }, { "epoch": 0.63, "grad_norm": 1.6695608842141376, "learning_rate": 6.445659223578267e-06, "loss": 0.6286, "step": 6166 }, { "epoch": 0.63, "grad_norm": 1.5903742512985368, "learning_rate": 6.442580084618806e-06, "loss": 0.7277, "step": 6167 }, { "epoch": 0.63, "grad_norm": 1.6137454812957188, "learning_rate": 6.439501331761639e-06, "loss": 0.6366, "step": 6168 }, { "epoch": 0.63, "grad_norm": 1.4440989137214413, "learning_rate": 6.436422965340925e-06, "loss": 0.6553, "step": 6169 }, { "epoch": 0.63, "grad_norm": 1.6574414923263707, "learning_rate": 6.4333449856907705e-06, "loss": 0.7672, "step": 6170 }, { "epoch": 0.63, "grad_norm": 1.458801356848339, "learning_rate": 6.43026739314524e-06, "loss": 0.6427, "step": 6171 }, { "epoch": 0.63, "grad_norm": 1.5653358507579374, "learning_rate": 6.427190188038362e-06, "loss": 0.7527, "step": 6172 }, { "epoch": 0.63, "grad_norm": 1.650511862021473, "learning_rate": 6.424113370704115e-06, "loss": 0.7734, "step": 6173 }, { "epoch": 0.63, "grad_norm": 1.5755765541554974, "learning_rate": 6.421036941476439e-06, "loss": 0.6524, "step": 6174 }, { "epoch": 0.63, "grad_norm": 1.5420151974503422, "learning_rate": 6.417960900689238e-06, "loss": 0.6917, "step": 6175 }, { "epoch": 0.63, "grad_norm": 1.7208568860923064, "learning_rate": 6.414885248676361e-06, "loss": 0.7065, "step": 6176 }, { "epoch": 0.63, "grad_norm": 1.6013565217810959, "learning_rate": 6.411809985771629e-06, "loss": 0.73, "step": 6177 }, { "epoch": 0.63, "grad_norm": 1.5697716167814815, "learning_rate": 6.408735112308806e-06, "loss": 0.6028, "step": 6178 }, { "epoch": 0.63, "grad_norm": 1.571569770741846, "learning_rate": 6.405660628621628e-06, "loss": 0.7557, "step": 6179 }, { "epoch": 0.63, "grad_norm": 1.40999018231601, "learning_rate": 6.40258653504378e-06, "loss": 0.6783, "step": 6180 }, { "epoch": 0.63, "grad_norm": 1.3555682140849614, "learning_rate": 6.3995128319089036e-06, "loss": 0.6635, "step": 6181 }, { "epoch": 0.63, "grad_norm": 1.7179220579329704, "learning_rate": 6.396439519550609e-06, "loss": 0.821, "step": 6182 }, { "epoch": 0.63, "grad_norm": 1.556213439686506, "learning_rate": 6.3933665983024465e-06, "loss": 0.7582, "step": 6183 }, { "epoch": 0.63, "grad_norm": 1.76363791750226, "learning_rate": 6.390294068497937e-06, "loss": 0.7535, "step": 6184 }, { "epoch": 0.63, "grad_norm": 1.7217112711922193, "learning_rate": 6.387221930470564e-06, "loss": 0.6494, "step": 6185 }, { "epoch": 0.63, "grad_norm": 1.639820890694774, "learning_rate": 6.3841501845537464e-06, "loss": 0.7747, "step": 6186 }, { "epoch": 0.63, "grad_norm": 1.8380696332977846, "learning_rate": 6.3810788310808855e-06, "loss": 0.7267, "step": 6187 }, { "epoch": 0.63, "grad_norm": 1.6150044913304795, "learning_rate": 6.378007870385321e-06, "loss": 0.7976, "step": 6188 }, { "epoch": 0.63, "grad_norm": 1.6080626517186347, "learning_rate": 6.3749373028003595e-06, "loss": 0.645, "step": 6189 }, { "epoch": 0.63, "grad_norm": 1.590231884761789, "learning_rate": 6.371867128659267e-06, "loss": 0.6826, "step": 6190 }, { "epoch": 0.63, "grad_norm": 1.6438217494955731, "learning_rate": 6.368797348295257e-06, "loss": 0.7977, "step": 6191 }, { "epoch": 0.63, "grad_norm": 1.7206786971877635, "learning_rate": 6.36572796204151e-06, "loss": 0.7241, "step": 6192 }, { "epoch": 0.63, "grad_norm": 1.6000650003086683, "learning_rate": 6.362658970231161e-06, "loss": 0.7469, "step": 6193 }, { "epoch": 0.63, "grad_norm": 1.543427534178332, "learning_rate": 6.3595903731972975e-06, "loss": 0.7506, "step": 6194 }, { "epoch": 0.63, "grad_norm": 1.6441500222599714, "learning_rate": 6.356522171272972e-06, "loss": 0.7373, "step": 6195 }, { "epoch": 0.63, "grad_norm": 1.5579041192143457, "learning_rate": 6.353454364791184e-06, "loss": 0.7233, "step": 6196 }, { "epoch": 0.63, "grad_norm": 1.6165157378519235, "learning_rate": 6.350386954084898e-06, "loss": 0.7153, "step": 6197 }, { "epoch": 0.63, "grad_norm": 1.6553996838473866, "learning_rate": 6.34731993948704e-06, "loss": 0.6837, "step": 6198 }, { "epoch": 0.63, "grad_norm": 1.6509896130097286, "learning_rate": 6.344253321330476e-06, "loss": 0.7204, "step": 6199 }, { "epoch": 0.63, "grad_norm": 1.5788859467893934, "learning_rate": 6.341187099948049e-06, "loss": 0.7383, "step": 6200 }, { "epoch": 0.63, "grad_norm": 1.740660603581767, "learning_rate": 6.33812127567254e-06, "loss": 0.7318, "step": 6201 }, { "epoch": 0.63, "grad_norm": 1.6619288434644135, "learning_rate": 6.335055848836702e-06, "loss": 0.6514, "step": 6202 }, { "epoch": 0.63, "grad_norm": 1.740794983691795, "learning_rate": 6.3319908197732415e-06, "loss": 0.6587, "step": 6203 }, { "epoch": 0.63, "grad_norm": 1.6781697662136037, "learning_rate": 6.328926188814814e-06, "loss": 0.8202, "step": 6204 }, { "epoch": 0.63, "grad_norm": 1.5546369098985757, "learning_rate": 6.325861956294042e-06, "loss": 0.6052, "step": 6205 }, { "epoch": 0.63, "grad_norm": 1.6583434658522769, "learning_rate": 6.322798122543494e-06, "loss": 0.6109, "step": 6206 }, { "epoch": 0.63, "grad_norm": 1.7530808501779143, "learning_rate": 6.319734687895704e-06, "loss": 0.6413, "step": 6207 }, { "epoch": 0.63, "grad_norm": 1.611177983099102, "learning_rate": 6.316671652683166e-06, "loss": 0.7458, "step": 6208 }, { "epoch": 0.63, "grad_norm": 1.4791293149751368, "learning_rate": 6.3136090172383136e-06, "loss": 0.7304, "step": 6209 }, { "epoch": 0.63, "grad_norm": 1.7167625865519724, "learning_rate": 6.310546781893556e-06, "loss": 0.7791, "step": 6210 }, { "epoch": 0.63, "grad_norm": 1.6833743431663846, "learning_rate": 6.307484946981251e-06, "loss": 0.7106, "step": 6211 }, { "epoch": 0.63, "grad_norm": 1.7007098813718895, "learning_rate": 6.3044235128337065e-06, "loss": 0.7332, "step": 6212 }, { "epoch": 0.63, "grad_norm": 1.4874192642800728, "learning_rate": 6.301362479783202e-06, "loss": 0.648, "step": 6213 }, { "epoch": 0.63, "grad_norm": 1.6123542295788824, "learning_rate": 6.298301848161956e-06, "loss": 0.6245, "step": 6214 }, { "epoch": 0.63, "grad_norm": 1.4299700958218342, "learning_rate": 6.295241618302156e-06, "loss": 0.7062, "step": 6215 }, { "epoch": 0.63, "grad_norm": 1.6539641925588098, "learning_rate": 6.292181790535947e-06, "loss": 0.7223, "step": 6216 }, { "epoch": 0.63, "grad_norm": 1.778675414817209, "learning_rate": 6.289122365195416e-06, "loss": 0.8371, "step": 6217 }, { "epoch": 0.63, "grad_norm": 1.576626746519206, "learning_rate": 6.286063342612625e-06, "loss": 0.6435, "step": 6218 }, { "epoch": 0.63, "grad_norm": 1.514287004334272, "learning_rate": 6.283004723119575e-06, "loss": 0.6376, "step": 6219 }, { "epoch": 0.63, "grad_norm": 1.5667375882028205, "learning_rate": 6.2799465070482335e-06, "loss": 0.7439, "step": 6220 }, { "epoch": 0.63, "grad_norm": 1.5966702971852884, "learning_rate": 6.276888694730529e-06, "loss": 0.674, "step": 6221 }, { "epoch": 0.63, "grad_norm": 1.7417020646634092, "learning_rate": 6.27383128649833e-06, "loss": 0.7694, "step": 6222 }, { "epoch": 0.63, "grad_norm": 1.582188756240723, "learning_rate": 6.270774282683476e-06, "loss": 0.7764, "step": 6223 }, { "epoch": 0.63, "grad_norm": 1.6387750113625243, "learning_rate": 6.267717683617753e-06, "loss": 0.7126, "step": 6224 }, { "epoch": 0.63, "grad_norm": 1.7036047789699225, "learning_rate": 6.26466148963291e-06, "loss": 0.6611, "step": 6225 }, { "epoch": 0.63, "grad_norm": 1.4623154809803915, "learning_rate": 6.261605701060649e-06, "loss": 0.6599, "step": 6226 }, { "epoch": 0.63, "grad_norm": 1.6301884041651713, "learning_rate": 6.258550318232626e-06, "loss": 0.6825, "step": 6227 }, { "epoch": 0.63, "grad_norm": 1.500453691932618, "learning_rate": 6.255495341480455e-06, "loss": 0.8056, "step": 6228 }, { "epoch": 0.63, "grad_norm": 1.861004993798108, "learning_rate": 6.25244077113571e-06, "loss": 0.6886, "step": 6229 }, { "epoch": 0.63, "grad_norm": 1.4682486308246931, "learning_rate": 6.249386607529914e-06, "loss": 0.6634, "step": 6230 }, { "epoch": 0.63, "grad_norm": 1.5279737405454734, "learning_rate": 6.246332850994547e-06, "loss": 0.7362, "step": 6231 }, { "epoch": 0.63, "grad_norm": 1.5951840003051774, "learning_rate": 6.243279501861048e-06, "loss": 0.6437, "step": 6232 }, { "epoch": 0.63, "grad_norm": 1.5273387782005643, "learning_rate": 6.240226560460811e-06, "loss": 0.7581, "step": 6233 }, { "epoch": 0.63, "grad_norm": 1.5694446493717522, "learning_rate": 6.237174027125186e-06, "loss": 0.7142, "step": 6234 }, { "epoch": 0.63, "grad_norm": 1.7587644931185076, "learning_rate": 6.234121902185475e-06, "loss": 0.7101, "step": 6235 }, { "epoch": 0.63, "grad_norm": 1.538172724798086, "learning_rate": 6.231070185972943e-06, "loss": 0.588, "step": 6236 }, { "epoch": 0.63, "grad_norm": 1.732688285618746, "learning_rate": 6.2280188788187975e-06, "loss": 0.5983, "step": 6237 }, { "epoch": 0.63, "grad_norm": 1.681520609984493, "learning_rate": 6.224967981054216e-06, "loss": 0.7516, "step": 6238 }, { "epoch": 0.63, "grad_norm": 1.6385101464147946, "learning_rate": 6.22191749301033e-06, "loss": 0.7589, "step": 6239 }, { "epoch": 0.63, "grad_norm": 1.7230564965309907, "learning_rate": 6.218867415018213e-06, "loss": 0.7187, "step": 6240 }, { "epoch": 0.63, "grad_norm": 1.6237010455135803, "learning_rate": 6.215817747408912e-06, "loss": 0.7512, "step": 6241 }, { "epoch": 0.63, "grad_norm": 1.7284927765460951, "learning_rate": 6.212768490513412e-06, "loss": 0.7624, "step": 6242 }, { "epoch": 0.64, "grad_norm": 1.4408795649716721, "learning_rate": 6.209719644662668e-06, "loss": 0.6562, "step": 6243 }, { "epoch": 0.64, "grad_norm": 1.5648988173321112, "learning_rate": 6.206671210187587e-06, "loss": 0.6206, "step": 6244 }, { "epoch": 0.64, "grad_norm": 1.5329244582770034, "learning_rate": 6.203623187419021e-06, "loss": 0.7085, "step": 6245 }, { "epoch": 0.64, "grad_norm": 1.689201951258469, "learning_rate": 6.200575576687788e-06, "loss": 0.6889, "step": 6246 }, { "epoch": 0.64, "grad_norm": 1.5068712420456922, "learning_rate": 6.197528378324664e-06, "loss": 0.6117, "step": 6247 }, { "epoch": 0.64, "grad_norm": 1.7289125382594037, "learning_rate": 6.194481592660369e-06, "loss": 0.7321, "step": 6248 }, { "epoch": 0.64, "grad_norm": 1.6075378730320198, "learning_rate": 6.191435220025586e-06, "loss": 0.7114, "step": 6249 }, { "epoch": 0.64, "grad_norm": 1.6979383319765728, "learning_rate": 6.188389260750948e-06, "loss": 0.6504, "step": 6250 }, { "epoch": 0.64, "grad_norm": 1.741041947752599, "learning_rate": 6.18534371516705e-06, "loss": 0.7787, "step": 6251 }, { "epoch": 0.64, "grad_norm": 1.6692017023266297, "learning_rate": 6.18229858360444e-06, "loss": 0.7007, "step": 6252 }, { "epoch": 0.64, "grad_norm": 1.5863510304471515, "learning_rate": 6.179253866393613e-06, "loss": 0.7042, "step": 6253 }, { "epoch": 0.64, "grad_norm": 1.5770757934096424, "learning_rate": 6.1762095638650336e-06, "loss": 0.7247, "step": 6254 }, { "epoch": 0.64, "grad_norm": 1.5956129238205312, "learning_rate": 6.173165676349103e-06, "loss": 0.7633, "step": 6255 }, { "epoch": 0.64, "grad_norm": 1.6212786954144751, "learning_rate": 6.170122204176194e-06, "loss": 0.7317, "step": 6256 }, { "epoch": 0.64, "grad_norm": 1.6222304674140229, "learning_rate": 6.167079147676632e-06, "loss": 0.6724, "step": 6257 }, { "epoch": 0.64, "grad_norm": 1.5572506586899673, "learning_rate": 6.164036507180684e-06, "loss": 0.682, "step": 6258 }, { "epoch": 0.64, "grad_norm": 1.6638911977836073, "learning_rate": 6.160994283018589e-06, "loss": 0.7388, "step": 6259 }, { "epoch": 0.64, "grad_norm": 1.6200290901079435, "learning_rate": 6.157952475520525e-06, "loss": 0.7179, "step": 6260 }, { "epoch": 0.64, "grad_norm": 1.775490359235819, "learning_rate": 6.154911085016637e-06, "loss": 0.6368, "step": 6261 }, { "epoch": 0.64, "grad_norm": 1.5992580931554452, "learning_rate": 6.151870111837024e-06, "loss": 0.8094, "step": 6262 }, { "epoch": 0.64, "grad_norm": 1.7821024117668556, "learning_rate": 6.148829556311728e-06, "loss": 0.8342, "step": 6263 }, { "epoch": 0.64, "grad_norm": 1.5027920068721097, "learning_rate": 6.1457894187707644e-06, "loss": 0.6714, "step": 6264 }, { "epoch": 0.64, "grad_norm": 1.7657574807299323, "learning_rate": 6.142749699544079e-06, "loss": 0.7252, "step": 6265 }, { "epoch": 0.64, "grad_norm": 1.5835231242874228, "learning_rate": 6.1397103989615955e-06, "loss": 0.6411, "step": 6266 }, { "epoch": 0.64, "grad_norm": 1.5892404817482912, "learning_rate": 6.136671517353183e-06, "loss": 0.7354, "step": 6267 }, { "epoch": 0.64, "grad_norm": 1.3784828776341291, "learning_rate": 6.133633055048658e-06, "loss": 0.5934, "step": 6268 }, { "epoch": 0.64, "grad_norm": 1.7207808030825036, "learning_rate": 6.1305950123778024e-06, "loss": 0.7213, "step": 6269 }, { "epoch": 0.64, "grad_norm": 1.5378430526726719, "learning_rate": 6.127557389670351e-06, "loss": 0.6262, "step": 6270 }, { "epoch": 0.64, "grad_norm": 1.5920669806506544, "learning_rate": 6.124520187255983e-06, "loss": 0.7416, "step": 6271 }, { "epoch": 0.64, "grad_norm": 1.406596466187717, "learning_rate": 6.121483405464346e-06, "loss": 0.6638, "step": 6272 }, { "epoch": 0.64, "grad_norm": 1.7939125165954555, "learning_rate": 6.118447044625027e-06, "loss": 0.7147, "step": 6273 }, { "epoch": 0.64, "grad_norm": 1.54819035552784, "learning_rate": 6.115411105067582e-06, "loss": 0.5916, "step": 6274 }, { "epoch": 0.64, "grad_norm": 1.7152824570643843, "learning_rate": 6.112375587121518e-06, "loss": 0.7127, "step": 6275 }, { "epoch": 0.64, "grad_norm": 1.7193281644819463, "learning_rate": 6.109340491116284e-06, "loss": 0.762, "step": 6276 }, { "epoch": 0.64, "grad_norm": 1.6424410423978122, "learning_rate": 6.1063058173812975e-06, "loss": 0.7889, "step": 6277 }, { "epoch": 0.64, "grad_norm": 1.6949947318613092, "learning_rate": 6.103271566245925e-06, "loss": 0.7154, "step": 6278 }, { "epoch": 0.64, "grad_norm": 1.514080879501498, "learning_rate": 6.1002377380394835e-06, "loss": 0.7785, "step": 6279 }, { "epoch": 0.64, "grad_norm": 1.506629183284391, "learning_rate": 6.097204333091254e-06, "loss": 0.7608, "step": 6280 }, { "epoch": 0.64, "grad_norm": 1.5244336613717824, "learning_rate": 6.094171351730458e-06, "loss": 0.7205, "step": 6281 }, { "epoch": 0.64, "grad_norm": 1.66852571528665, "learning_rate": 6.0911387942862835e-06, "loss": 0.7075, "step": 6282 }, { "epoch": 0.64, "grad_norm": 1.6264908140490997, "learning_rate": 6.0881066610878624e-06, "loss": 0.7442, "step": 6283 }, { "epoch": 0.64, "grad_norm": 1.4881575242339578, "learning_rate": 6.08507495246429e-06, "loss": 0.8112, "step": 6284 }, { "epoch": 0.64, "grad_norm": 1.810367992210238, "learning_rate": 6.08204366874461e-06, "loss": 0.8227, "step": 6285 }, { "epoch": 0.64, "grad_norm": 1.6755010174301157, "learning_rate": 6.079012810257816e-06, "loss": 0.7535, "step": 6286 }, { "epoch": 0.64, "grad_norm": 1.5551086318305345, "learning_rate": 6.075982377332868e-06, "loss": 0.6764, "step": 6287 }, { "epoch": 0.64, "grad_norm": 1.5306851594326032, "learning_rate": 6.072952370298667e-06, "loss": 0.6971, "step": 6288 }, { "epoch": 0.64, "grad_norm": 1.7111329561521385, "learning_rate": 6.069922789484073e-06, "loss": 0.6651, "step": 6289 }, { "epoch": 0.64, "grad_norm": 1.6290435054738361, "learning_rate": 6.066893635217903e-06, "loss": 0.6927, "step": 6290 }, { "epoch": 0.64, "grad_norm": 1.792078224753121, "learning_rate": 6.0638649078289195e-06, "loss": 0.7057, "step": 6291 }, { "epoch": 0.64, "grad_norm": 1.5726534950494397, "learning_rate": 6.060836607645845e-06, "loss": 0.7077, "step": 6292 }, { "epoch": 0.64, "grad_norm": 1.414599565633372, "learning_rate": 6.057808734997359e-06, "loss": 0.6678, "step": 6293 }, { "epoch": 0.64, "grad_norm": 1.5506814668719011, "learning_rate": 6.054781290212083e-06, "loss": 0.617, "step": 6294 }, { "epoch": 0.64, "grad_norm": 1.5262725814759375, "learning_rate": 6.051754273618605e-06, "loss": 0.7637, "step": 6295 }, { "epoch": 0.64, "grad_norm": 1.5878570034248665, "learning_rate": 6.0487276855454525e-06, "loss": 0.7669, "step": 6296 }, { "epoch": 0.64, "grad_norm": 1.5719283837210194, "learning_rate": 6.04570152632112e-06, "loss": 0.7243, "step": 6297 }, { "epoch": 0.64, "grad_norm": 1.5875716436302951, "learning_rate": 6.042675796274051e-06, "loss": 0.6854, "step": 6298 }, { "epoch": 0.64, "grad_norm": 1.5012491845451224, "learning_rate": 6.0396504957326365e-06, "loss": 0.7472, "step": 6299 }, { "epoch": 0.64, "grad_norm": 1.5980477111010662, "learning_rate": 6.03662562502523e-06, "loss": 0.7216, "step": 6300 }, { "epoch": 0.64, "grad_norm": 1.5034426911917258, "learning_rate": 6.033601184480129e-06, "loss": 0.619, "step": 6301 }, { "epoch": 0.64, "grad_norm": 1.4686870305170632, "learning_rate": 6.03057717442559e-06, "loss": 0.638, "step": 6302 }, { "epoch": 0.64, "grad_norm": 1.5049175281810077, "learning_rate": 6.02755359518983e-06, "loss": 0.667, "step": 6303 }, { "epoch": 0.64, "grad_norm": 1.6873684104287792, "learning_rate": 6.024530447101e-06, "loss": 0.7237, "step": 6304 }, { "epoch": 0.64, "grad_norm": 1.7609432099220983, "learning_rate": 6.021507730487221e-06, "loss": 0.7489, "step": 6305 }, { "epoch": 0.64, "grad_norm": 1.4623537810787113, "learning_rate": 6.018485445676565e-06, "loss": 0.6506, "step": 6306 }, { "epoch": 0.64, "grad_norm": 1.888703526424575, "learning_rate": 6.015463592997046e-06, "loss": 0.7954, "step": 6307 }, { "epoch": 0.64, "grad_norm": 1.6726747559744533, "learning_rate": 6.0124421727766465e-06, "loss": 0.7867, "step": 6308 }, { "epoch": 0.64, "grad_norm": 1.6863782626305706, "learning_rate": 6.009421185343287e-06, "loss": 0.7393, "step": 6309 }, { "epoch": 0.64, "grad_norm": 1.6499492821668567, "learning_rate": 6.006400631024851e-06, "loss": 0.8357, "step": 6310 }, { "epoch": 0.64, "grad_norm": 1.7095743812571649, "learning_rate": 6.003380510149179e-06, "loss": 0.6429, "step": 6311 }, { "epoch": 0.64, "grad_norm": 1.6484443288049906, "learning_rate": 6.000360823044049e-06, "loss": 0.7172, "step": 6312 }, { "epoch": 0.64, "grad_norm": 1.4579244853163298, "learning_rate": 5.997341570037208e-06, "loss": 0.729, "step": 6313 }, { "epoch": 0.64, "grad_norm": 1.4871202444789973, "learning_rate": 5.994322751456339e-06, "loss": 0.6165, "step": 6314 }, { "epoch": 0.64, "grad_norm": 1.5291037181344551, "learning_rate": 5.9913043676290964e-06, "loss": 0.718, "step": 6315 }, { "epoch": 0.64, "grad_norm": 1.5223742645046954, "learning_rate": 5.988286418883078e-06, "loss": 0.7463, "step": 6316 }, { "epoch": 0.64, "grad_norm": 1.3940083711431022, "learning_rate": 5.985268905545829e-06, "loss": 0.5375, "step": 6317 }, { "epoch": 0.64, "grad_norm": 1.6237783872688747, "learning_rate": 5.98225182794486e-06, "loss": 0.812, "step": 6318 }, { "epoch": 0.64, "grad_norm": 1.6176773287997215, "learning_rate": 5.979235186407622e-06, "loss": 0.8133, "step": 6319 }, { "epoch": 0.64, "grad_norm": 1.9042342799536476, "learning_rate": 5.9762189812615254e-06, "loss": 0.7468, "step": 6320 }, { "epoch": 0.64, "grad_norm": 1.7788211346001306, "learning_rate": 5.973203212833939e-06, "loss": 0.7365, "step": 6321 }, { "epoch": 0.64, "grad_norm": 1.8176366507210457, "learning_rate": 5.970187881452168e-06, "loss": 0.6977, "step": 6322 }, { "epoch": 0.64, "grad_norm": 1.4221286727959068, "learning_rate": 5.967172987443482e-06, "loss": 0.5771, "step": 6323 }, { "epoch": 0.64, "grad_norm": 1.4065211568246696, "learning_rate": 5.964158531135106e-06, "loss": 0.6418, "step": 6324 }, { "epoch": 0.64, "grad_norm": 1.650739756578554, "learning_rate": 5.961144512854205e-06, "loss": 0.6653, "step": 6325 }, { "epoch": 0.64, "grad_norm": 1.6301746789006268, "learning_rate": 5.958130932927908e-06, "loss": 0.7417, "step": 6326 }, { "epoch": 0.64, "grad_norm": 1.4479738696607576, "learning_rate": 5.955117791683289e-06, "loss": 0.6565, "step": 6327 }, { "epoch": 0.64, "grad_norm": 1.5210492576094117, "learning_rate": 5.952105089447378e-06, "loss": 0.6512, "step": 6328 }, { "epoch": 0.64, "grad_norm": 1.656886978316453, "learning_rate": 5.94909282654716e-06, "loss": 0.6448, "step": 6329 }, { "epoch": 0.64, "grad_norm": 1.6081903057337392, "learning_rate": 5.946081003309565e-06, "loss": 0.5581, "step": 6330 }, { "epoch": 0.64, "grad_norm": 1.753778483406041, "learning_rate": 5.943069620061481e-06, "loss": 0.7395, "step": 6331 }, { "epoch": 0.64, "grad_norm": 1.6802658251468272, "learning_rate": 5.940058677129748e-06, "loss": 0.6784, "step": 6332 }, { "epoch": 0.64, "grad_norm": 1.6891268164739461, "learning_rate": 5.937048174841153e-06, "loss": 0.7862, "step": 6333 }, { "epoch": 0.64, "grad_norm": 1.918030920602363, "learning_rate": 5.934038113522442e-06, "loss": 0.7078, "step": 6334 }, { "epoch": 0.64, "grad_norm": 1.7695806628166773, "learning_rate": 5.9310284935003106e-06, "loss": 0.7207, "step": 6335 }, { "epoch": 0.64, "grad_norm": 1.5286937983901505, "learning_rate": 5.928019315101403e-06, "loss": 0.639, "step": 6336 }, { "epoch": 0.64, "grad_norm": 1.5447517041351149, "learning_rate": 5.92501057865232e-06, "loss": 0.6319, "step": 6337 }, { "epoch": 0.64, "grad_norm": 1.5609798103338413, "learning_rate": 5.922002284479614e-06, "loss": 0.642, "step": 6338 }, { "epoch": 0.64, "grad_norm": 1.5687889938638144, "learning_rate": 5.9189944329097885e-06, "loss": 0.7272, "step": 6339 }, { "epoch": 0.64, "grad_norm": 1.6224286950572846, "learning_rate": 5.915987024269294e-06, "loss": 0.7058, "step": 6340 }, { "epoch": 0.65, "grad_norm": 1.5027567146457037, "learning_rate": 5.912980058884543e-06, "loss": 0.7581, "step": 6341 }, { "epoch": 0.65, "grad_norm": 1.7082554644942154, "learning_rate": 5.909973537081893e-06, "loss": 0.7154, "step": 6342 }, { "epoch": 0.65, "grad_norm": 1.691328540293944, "learning_rate": 5.9069674591876535e-06, "loss": 0.7977, "step": 6343 }, { "epoch": 0.65, "grad_norm": 1.6256752787523274, "learning_rate": 5.903961825528092e-06, "loss": 0.6784, "step": 6344 }, { "epoch": 0.65, "grad_norm": 1.556542188553163, "learning_rate": 5.900956636429416e-06, "loss": 0.6217, "step": 6345 }, { "epoch": 0.65, "grad_norm": 1.7067246350845486, "learning_rate": 5.8979518922177956e-06, "loss": 0.7909, "step": 6346 }, { "epoch": 0.65, "grad_norm": 1.5820138550985727, "learning_rate": 5.8949475932193505e-06, "loss": 0.6841, "step": 6347 }, { "epoch": 0.65, "grad_norm": 1.540678213252206, "learning_rate": 5.891943739760144e-06, "loss": 0.6931, "step": 6348 }, { "epoch": 0.65, "grad_norm": 1.6771935590518536, "learning_rate": 5.888940332166209e-06, "loss": 0.6764, "step": 6349 }, { "epoch": 0.65, "grad_norm": 1.7257977664771067, "learning_rate": 5.885937370763503e-06, "loss": 0.6693, "step": 6350 }, { "epoch": 0.65, "grad_norm": 1.7398417630894818, "learning_rate": 5.882934855877962e-06, "loss": 0.7415, "step": 6351 }, { "epoch": 0.65, "grad_norm": 1.8211802165336226, "learning_rate": 5.8799327878354615e-06, "loss": 0.7329, "step": 6352 }, { "epoch": 0.65, "grad_norm": 1.6342677687069151, "learning_rate": 5.876931166961823e-06, "loss": 0.758, "step": 6353 }, { "epoch": 0.65, "grad_norm": 1.5043393902621687, "learning_rate": 5.873929993582832e-06, "loss": 0.6999, "step": 6354 }, { "epoch": 0.65, "grad_norm": 1.6347981025495912, "learning_rate": 5.8709292680242114e-06, "loss": 0.6204, "step": 6355 }, { "epoch": 0.65, "grad_norm": 1.5008672609314513, "learning_rate": 5.867928990611647e-06, "loss": 0.5959, "step": 6356 }, { "epoch": 0.65, "grad_norm": 1.6378971953945671, "learning_rate": 5.864929161670778e-06, "loss": 0.7179, "step": 6357 }, { "epoch": 0.65, "grad_norm": 1.5754498848685459, "learning_rate": 5.861929781527178e-06, "loss": 0.698, "step": 6358 }, { "epoch": 0.65, "grad_norm": 1.553793198749774, "learning_rate": 5.858930850506388e-06, "loss": 0.6156, "step": 6359 }, { "epoch": 0.65, "grad_norm": 1.458805236794188, "learning_rate": 5.8559323689338985e-06, "loss": 0.7464, "step": 6360 }, { "epoch": 0.65, "grad_norm": 1.5183497123812193, "learning_rate": 5.852934337135142e-06, "loss": 0.6604, "step": 6361 }, { "epoch": 0.65, "grad_norm": 1.7357794415167893, "learning_rate": 5.849936755435513e-06, "loss": 0.75, "step": 6362 }, { "epoch": 0.65, "grad_norm": 1.4167584344055437, "learning_rate": 5.846939624160346e-06, "loss": 0.6739, "step": 6363 }, { "epoch": 0.65, "grad_norm": 1.5571878965670656, "learning_rate": 5.843942943634937e-06, "loss": 0.7643, "step": 6364 }, { "epoch": 0.65, "grad_norm": 1.674640396827517, "learning_rate": 5.840946714184531e-06, "loss": 0.8063, "step": 6365 }, { "epoch": 0.65, "grad_norm": 1.6494268241629362, "learning_rate": 5.8379509361343154e-06, "loss": 0.6854, "step": 6366 }, { "epoch": 0.65, "grad_norm": 1.5840646906041485, "learning_rate": 5.834955609809443e-06, "loss": 0.6196, "step": 6367 }, { "epoch": 0.65, "grad_norm": 1.6550653534082749, "learning_rate": 5.831960735534999e-06, "loss": 0.7491, "step": 6368 }, { "epoch": 0.65, "grad_norm": 1.7931312062298828, "learning_rate": 5.8289663136360376e-06, "loss": 0.7552, "step": 6369 }, { "epoch": 0.65, "grad_norm": 1.6495599365387563, "learning_rate": 5.8259723444375605e-06, "loss": 0.805, "step": 6370 }, { "epoch": 0.65, "grad_norm": 1.6737151146352796, "learning_rate": 5.822978828264505e-06, "loss": 0.7276, "step": 6371 }, { "epoch": 0.65, "grad_norm": 1.6837966851701676, "learning_rate": 5.819985765441781e-06, "loss": 0.7014, "step": 6372 }, { "epoch": 0.65, "grad_norm": 1.5072843482812206, "learning_rate": 5.81699315629423e-06, "loss": 0.6911, "step": 6373 }, { "epoch": 0.65, "grad_norm": 1.7206761150182308, "learning_rate": 5.814001001146657e-06, "loss": 0.6909, "step": 6374 }, { "epoch": 0.65, "grad_norm": 1.5046183305177292, "learning_rate": 5.8110093003238175e-06, "loss": 0.627, "step": 6375 }, { "epoch": 0.65, "grad_norm": 1.4910705578526564, "learning_rate": 5.808018054150406e-06, "loss": 0.6718, "step": 6376 }, { "epoch": 0.65, "grad_norm": 1.5485078588939911, "learning_rate": 5.805027262951079e-06, "loss": 0.7327, "step": 6377 }, { "epoch": 0.65, "grad_norm": 1.6506032689070136, "learning_rate": 5.802036927050447e-06, "loss": 0.6427, "step": 6378 }, { "epoch": 0.65, "grad_norm": 1.6698136293154233, "learning_rate": 5.799047046773052e-06, "loss": 0.8641, "step": 6379 }, { "epoch": 0.65, "grad_norm": 1.5663409493127316, "learning_rate": 5.7960576224434074e-06, "loss": 0.6532, "step": 6380 }, { "epoch": 0.65, "grad_norm": 1.485427935130718, "learning_rate": 5.793068654385963e-06, "loss": 0.6535, "step": 6381 }, { "epoch": 0.65, "grad_norm": 1.6252387841757945, "learning_rate": 5.790080142925128e-06, "loss": 0.7337, "step": 6382 }, { "epoch": 0.65, "grad_norm": 1.6319477125343451, "learning_rate": 5.7870920883852595e-06, "loss": 0.6668, "step": 6383 }, { "epoch": 0.65, "grad_norm": 1.8594213711478256, "learning_rate": 5.7841044910906585e-06, "loss": 0.7431, "step": 6384 }, { "epoch": 0.65, "grad_norm": 1.6592215105240347, "learning_rate": 5.781117351365592e-06, "loss": 0.7135, "step": 6385 }, { "epoch": 0.65, "grad_norm": 1.517474499115124, "learning_rate": 5.778130669534254e-06, "loss": 0.6853, "step": 6386 }, { "epoch": 0.65, "grad_norm": 1.4892996147591693, "learning_rate": 5.775144445920811e-06, "loss": 0.7471, "step": 6387 }, { "epoch": 0.65, "grad_norm": 1.5256135918516867, "learning_rate": 5.772158680849374e-06, "loss": 0.7412, "step": 6388 }, { "epoch": 0.65, "grad_norm": 1.5529282088524847, "learning_rate": 5.769173374643991e-06, "loss": 0.6539, "step": 6389 }, { "epoch": 0.65, "grad_norm": 1.6485522629940237, "learning_rate": 5.766188527628679e-06, "loss": 0.7233, "step": 6390 }, { "epoch": 0.65, "grad_norm": 1.6621024165012506, "learning_rate": 5.76320414012739e-06, "loss": 0.6734, "step": 6391 }, { "epoch": 0.65, "grad_norm": 1.7848678387530656, "learning_rate": 5.760220212464034e-06, "loss": 0.7261, "step": 6392 }, { "epoch": 0.65, "grad_norm": 1.4887429010798594, "learning_rate": 5.757236744962476e-06, "loss": 0.6584, "step": 6393 }, { "epoch": 0.65, "grad_norm": 1.67450143515445, "learning_rate": 5.754253737946516e-06, "loss": 0.6852, "step": 6394 }, { "epoch": 0.65, "grad_norm": 1.5312323954574496, "learning_rate": 5.751271191739917e-06, "loss": 0.7058, "step": 6395 }, { "epoch": 0.65, "grad_norm": 1.6236234571249322, "learning_rate": 5.748289106666392e-06, "loss": 0.7407, "step": 6396 }, { "epoch": 0.65, "grad_norm": 1.6408794958238797, "learning_rate": 5.7453074830495896e-06, "loss": 0.7075, "step": 6397 }, { "epoch": 0.65, "grad_norm": 1.9122164979695884, "learning_rate": 5.742326321213127e-06, "loss": 0.6851, "step": 6398 }, { "epoch": 0.65, "grad_norm": 1.5172361668955892, "learning_rate": 5.739345621480559e-06, "loss": 0.6739, "step": 6399 }, { "epoch": 0.65, "grad_norm": 1.5217073707337485, "learning_rate": 5.736365384175393e-06, "loss": 0.5661, "step": 6400 }, { "epoch": 0.65, "grad_norm": 1.681497755371146, "learning_rate": 5.733385609621092e-06, "loss": 0.7032, "step": 6401 }, { "epoch": 0.65, "grad_norm": 1.4642727363740708, "learning_rate": 5.730406298141058e-06, "loss": 0.6612, "step": 6402 }, { "epoch": 0.65, "grad_norm": 1.6701836113853448, "learning_rate": 5.7274274500586535e-06, "loss": 0.6383, "step": 6403 }, { "epoch": 0.65, "grad_norm": 1.5146756310866385, "learning_rate": 5.724449065697182e-06, "loss": 0.6371, "step": 6404 }, { "epoch": 0.65, "grad_norm": 1.5455245536838689, "learning_rate": 5.721471145379901e-06, "loss": 0.6639, "step": 6405 }, { "epoch": 0.65, "grad_norm": 1.705302002513825, "learning_rate": 5.718493689430022e-06, "loss": 0.807, "step": 6406 }, { "epoch": 0.65, "grad_norm": 1.7491318011537786, "learning_rate": 5.715516698170696e-06, "loss": 0.8477, "step": 6407 }, { "epoch": 0.65, "grad_norm": 1.666722548545367, "learning_rate": 5.712540171925029e-06, "loss": 0.7674, "step": 6408 }, { "epoch": 0.65, "grad_norm": 1.7357789038610587, "learning_rate": 5.709564111016081e-06, "loss": 0.6924, "step": 6409 }, { "epoch": 0.65, "grad_norm": 1.6110366786415917, "learning_rate": 5.706588515766851e-06, "loss": 0.6787, "step": 6410 }, { "epoch": 0.65, "grad_norm": 1.6350931099155135, "learning_rate": 5.7036133865003e-06, "loss": 0.698, "step": 6411 }, { "epoch": 0.65, "grad_norm": 1.7228823458815403, "learning_rate": 5.700638723539325e-06, "loss": 0.7233, "step": 6412 }, { "epoch": 0.65, "grad_norm": 1.6021431264605037, "learning_rate": 5.69766452720678e-06, "loss": 0.6322, "step": 6413 }, { "epoch": 0.65, "grad_norm": 1.6334750931777735, "learning_rate": 5.694690797825475e-06, "loss": 0.6427, "step": 6414 }, { "epoch": 0.65, "grad_norm": 1.5833645717447067, "learning_rate": 5.691717535718151e-06, "loss": 0.7319, "step": 6415 }, { "epoch": 0.65, "grad_norm": 1.7498137753102825, "learning_rate": 5.688744741207516e-06, "loss": 0.6878, "step": 6416 }, { "epoch": 0.65, "grad_norm": 1.5676021784469838, "learning_rate": 5.6857724146162215e-06, "loss": 0.6379, "step": 6417 }, { "epoch": 0.65, "grad_norm": 1.5217944573670767, "learning_rate": 5.682800556266862e-06, "loss": 0.6389, "step": 6418 }, { "epoch": 0.65, "grad_norm": 1.3689128575405203, "learning_rate": 5.6798291664819875e-06, "loss": 0.5964, "step": 6419 }, { "epoch": 0.65, "grad_norm": 1.5680466041697287, "learning_rate": 5.676858245584103e-06, "loss": 0.6202, "step": 6420 }, { "epoch": 0.65, "grad_norm": 1.755950732711214, "learning_rate": 5.6738877938956426e-06, "loss": 0.7522, "step": 6421 }, { "epoch": 0.65, "grad_norm": 1.7249538817905252, "learning_rate": 5.6709178117390105e-06, "loss": 0.7686, "step": 6422 }, { "epoch": 0.65, "grad_norm": 1.69730402680779, "learning_rate": 5.667948299436555e-06, "loss": 0.682, "step": 6423 }, { "epoch": 0.65, "grad_norm": 1.6589657016064634, "learning_rate": 5.6649792573105625e-06, "loss": 0.6969, "step": 6424 }, { "epoch": 0.65, "grad_norm": 1.695822779042538, "learning_rate": 5.662010685683279e-06, "loss": 0.6876, "step": 6425 }, { "epoch": 0.65, "grad_norm": 1.7421322612105266, "learning_rate": 5.6590425848769e-06, "loss": 0.6974, "step": 6426 }, { "epoch": 0.65, "grad_norm": 1.8334794155879066, "learning_rate": 5.6560749552135605e-06, "loss": 0.8303, "step": 6427 }, { "epoch": 0.65, "grad_norm": 1.6470435016775014, "learning_rate": 5.653107797015354e-06, "loss": 0.6621, "step": 6428 }, { "epoch": 0.65, "grad_norm": 1.627521191505139, "learning_rate": 5.6501411106043205e-06, "loss": 0.6695, "step": 6429 }, { "epoch": 0.65, "grad_norm": 1.5213801668421556, "learning_rate": 5.647174896302442e-06, "loss": 0.6911, "step": 6430 }, { "epoch": 0.65, "grad_norm": 1.5363389440683215, "learning_rate": 5.644209154431662e-06, "loss": 0.6973, "step": 6431 }, { "epoch": 0.65, "grad_norm": 1.4381990884890545, "learning_rate": 5.641243885313856e-06, "loss": 0.697, "step": 6432 }, { "epoch": 0.65, "grad_norm": 1.7122163825991126, "learning_rate": 5.6382790892708665e-06, "loss": 0.6942, "step": 6433 }, { "epoch": 0.65, "grad_norm": 1.5466831181701948, "learning_rate": 5.635314766624474e-06, "loss": 0.6505, "step": 6434 }, { "epoch": 0.65, "grad_norm": 1.543566320891912, "learning_rate": 5.632350917696402e-06, "loss": 0.6747, "step": 6435 }, { "epoch": 0.65, "grad_norm": 1.6487087786876302, "learning_rate": 5.629387542808338e-06, "loss": 0.6439, "step": 6436 }, { "epoch": 0.65, "grad_norm": 1.5662196712883276, "learning_rate": 5.626424642281909e-06, "loss": 0.6167, "step": 6437 }, { "epoch": 0.65, "grad_norm": 1.6125241640825956, "learning_rate": 5.623462216438689e-06, "loss": 0.6974, "step": 6438 }, { "epoch": 0.65, "grad_norm": 1.4991815498617118, "learning_rate": 5.620500265600206e-06, "loss": 0.7292, "step": 6439 }, { "epoch": 0.66, "grad_norm": 1.6384111399872088, "learning_rate": 5.617538790087927e-06, "loss": 0.653, "step": 6440 }, { "epoch": 0.66, "grad_norm": 1.3135639297560717, "learning_rate": 5.614577790223279e-06, "loss": 0.61, "step": 6441 }, { "epoch": 0.66, "grad_norm": 1.5333413114465992, "learning_rate": 5.611617266327636e-06, "loss": 0.6731, "step": 6442 }, { "epoch": 0.66, "grad_norm": 1.6331806993463807, "learning_rate": 5.608657218722309e-06, "loss": 0.7274, "step": 6443 }, { "epoch": 0.66, "grad_norm": 1.6925118285778062, "learning_rate": 5.6056976477285695e-06, "loss": 0.884, "step": 6444 }, { "epoch": 0.66, "grad_norm": 1.4276036664358116, "learning_rate": 5.602738553667629e-06, "loss": 0.6985, "step": 6445 }, { "epoch": 0.66, "grad_norm": 1.5209224336151763, "learning_rate": 5.599779936860652e-06, "loss": 0.6954, "step": 6446 }, { "epoch": 0.66, "grad_norm": 1.611923903199948, "learning_rate": 5.5968217976287565e-06, "loss": 0.6656, "step": 6447 }, { "epoch": 0.66, "grad_norm": 1.8579107068263057, "learning_rate": 5.593864136292992e-06, "loss": 0.707, "step": 6448 }, { "epoch": 0.66, "grad_norm": 1.5452524877049152, "learning_rate": 5.5909069531743755e-06, "loss": 0.6808, "step": 6449 }, { "epoch": 0.66, "grad_norm": 1.698242540548482, "learning_rate": 5.5879502485938544e-06, "loss": 0.7089, "step": 6450 }, { "epoch": 0.66, "grad_norm": 1.8162676945644611, "learning_rate": 5.584994022872337e-06, "loss": 0.7118, "step": 6451 }, { "epoch": 0.66, "grad_norm": 1.538992476391323, "learning_rate": 5.582038276330679e-06, "loss": 0.7687, "step": 6452 }, { "epoch": 0.66, "grad_norm": 1.7325920782664779, "learning_rate": 5.5790830092896744e-06, "loss": 0.6782, "step": 6453 }, { "epoch": 0.66, "grad_norm": 1.7047326937943428, "learning_rate": 5.576128222070072e-06, "loss": 0.7279, "step": 6454 }, { "epoch": 0.66, "grad_norm": 1.5326744108994126, "learning_rate": 5.573173914992575e-06, "loss": 0.7136, "step": 6455 }, { "epoch": 0.66, "grad_norm": 1.5658639914644978, "learning_rate": 5.570220088377817e-06, "loss": 0.6877, "step": 6456 }, { "epoch": 0.66, "grad_norm": 1.5407873449624567, "learning_rate": 5.567266742546398e-06, "loss": 0.7392, "step": 6457 }, { "epoch": 0.66, "grad_norm": 1.7165329737166082, "learning_rate": 5.56431387781885e-06, "loss": 0.6999, "step": 6458 }, { "epoch": 0.66, "grad_norm": 1.5792158050361675, "learning_rate": 5.5613614945156644e-06, "loss": 0.8257, "step": 6459 }, { "epoch": 0.66, "grad_norm": 1.6861064784060478, "learning_rate": 5.558409592957281e-06, "loss": 0.6937, "step": 6460 }, { "epoch": 0.66, "grad_norm": 1.6622203408620764, "learning_rate": 5.555458173464073e-06, "loss": 0.7079, "step": 6461 }, { "epoch": 0.66, "grad_norm": 1.6179041108888554, "learning_rate": 5.552507236356378e-06, "loss": 0.6671, "step": 6462 }, { "epoch": 0.66, "grad_norm": 1.5561973558428366, "learning_rate": 5.549556781954468e-06, "loss": 0.688, "step": 6463 }, { "epoch": 0.66, "grad_norm": 1.8770461530985756, "learning_rate": 5.546606810578574e-06, "loss": 0.7056, "step": 6464 }, { "epoch": 0.66, "grad_norm": 1.6639773368960784, "learning_rate": 5.54365732254887e-06, "loss": 0.8691, "step": 6465 }, { "epoch": 0.66, "grad_norm": 1.766616000861856, "learning_rate": 5.540708318185471e-06, "loss": 0.6345, "step": 6466 }, { "epoch": 0.66, "grad_norm": 1.7447860975157186, "learning_rate": 5.537759797808451e-06, "loss": 0.8052, "step": 6467 }, { "epoch": 0.66, "grad_norm": 1.6314521492158323, "learning_rate": 5.5348117617378205e-06, "loss": 0.7425, "step": 6468 }, { "epoch": 0.66, "grad_norm": 1.6169731780187688, "learning_rate": 5.531864210293545e-06, "loss": 0.6888, "step": 6469 }, { "epoch": 0.66, "grad_norm": 1.8868048761897305, "learning_rate": 5.52891714379554e-06, "loss": 0.6861, "step": 6470 }, { "epoch": 0.66, "grad_norm": 1.563665870685886, "learning_rate": 5.525970562563656e-06, "loss": 0.7059, "step": 6471 }, { "epoch": 0.66, "grad_norm": 1.66926400990454, "learning_rate": 5.5230244669177e-06, "loss": 0.7607, "step": 6472 }, { "epoch": 0.66, "grad_norm": 1.6671494780823763, "learning_rate": 5.520078857177431e-06, "loss": 0.7621, "step": 6473 }, { "epoch": 0.66, "grad_norm": 1.574392066987864, "learning_rate": 5.5171337336625385e-06, "loss": 0.718, "step": 6474 }, { "epoch": 0.66, "grad_norm": 1.77239094210856, "learning_rate": 5.5141890966926814e-06, "loss": 0.7875, "step": 6475 }, { "epoch": 0.66, "grad_norm": 1.423741447571245, "learning_rate": 5.511244946587442e-06, "loss": 0.7009, "step": 6476 }, { "epoch": 0.66, "grad_norm": 1.555072089538958, "learning_rate": 5.5083012836663675e-06, "loss": 0.6933, "step": 6477 }, { "epoch": 0.66, "grad_norm": 1.685968478521124, "learning_rate": 5.5053581082489505e-06, "loss": 0.6945, "step": 6478 }, { "epoch": 0.66, "grad_norm": 1.4061025474606934, "learning_rate": 5.502415420654619e-06, "loss": 0.5818, "step": 6479 }, { "epoch": 0.66, "grad_norm": 1.6721597452098804, "learning_rate": 5.4994732212027625e-06, "loss": 0.7269, "step": 6480 }, { "epoch": 0.66, "grad_norm": 1.6885123796718169, "learning_rate": 5.4965315102127035e-06, "loss": 0.7111, "step": 6481 }, { "epoch": 0.66, "grad_norm": 1.587768016336439, "learning_rate": 5.4935902880037224e-06, "loss": 0.7552, "step": 6482 }, { "epoch": 0.66, "grad_norm": 1.6544396399034218, "learning_rate": 5.490649554895047e-06, "loss": 0.7939, "step": 6483 }, { "epoch": 0.66, "grad_norm": 1.903730177094068, "learning_rate": 5.487709311205842e-06, "loss": 0.703, "step": 6484 }, { "epoch": 0.66, "grad_norm": 1.5464431344845322, "learning_rate": 5.48476955725523e-06, "loss": 0.6926, "step": 6485 }, { "epoch": 0.66, "grad_norm": 1.6445360027395115, "learning_rate": 5.481830293362267e-06, "loss": 0.72, "step": 6486 }, { "epoch": 0.66, "grad_norm": 1.4844153552483355, "learning_rate": 5.47889151984597e-06, "loss": 0.6338, "step": 6487 }, { "epoch": 0.66, "grad_norm": 1.7527504474138509, "learning_rate": 5.475953237025301e-06, "loss": 0.7561, "step": 6488 }, { "epoch": 0.66, "grad_norm": 1.5552778353388261, "learning_rate": 5.473015445219155e-06, "loss": 0.821, "step": 6489 }, { "epoch": 0.66, "grad_norm": 1.6307048291861013, "learning_rate": 5.470078144746389e-06, "loss": 0.7274, "step": 6490 }, { "epoch": 0.66, "grad_norm": 1.7001117892238, "learning_rate": 5.467141335925804e-06, "loss": 0.7728, "step": 6491 }, { "epoch": 0.66, "grad_norm": 1.5869091692069035, "learning_rate": 5.464205019076138e-06, "loss": 0.6786, "step": 6492 }, { "epoch": 0.66, "grad_norm": 1.6329469565910237, "learning_rate": 5.461269194516088e-06, "loss": 0.7075, "step": 6493 }, { "epoch": 0.66, "grad_norm": 1.8988642438158536, "learning_rate": 5.458333862564286e-06, "loss": 0.7586, "step": 6494 }, { "epoch": 0.66, "grad_norm": 1.6633595330570712, "learning_rate": 5.4553990235393184e-06, "loss": 0.6541, "step": 6495 }, { "epoch": 0.66, "grad_norm": 1.5781666882673357, "learning_rate": 5.452464677759721e-06, "loss": 0.6325, "step": 6496 }, { "epoch": 0.66, "grad_norm": 1.5661497583477781, "learning_rate": 5.449530825543965e-06, "loss": 0.7228, "step": 6497 }, { "epoch": 0.66, "grad_norm": 1.5866430847198567, "learning_rate": 5.446597467210479e-06, "loss": 0.5912, "step": 6498 }, { "epoch": 0.66, "grad_norm": 1.6481833752296238, "learning_rate": 5.443664603077628e-06, "loss": 0.8418, "step": 6499 }, { "epoch": 0.66, "grad_norm": 1.695607932716036, "learning_rate": 5.44073223346373e-06, "loss": 0.7262, "step": 6500 }, { "epoch": 0.66, "grad_norm": 1.5218003829304017, "learning_rate": 5.437800358687054e-06, "loss": 0.7119, "step": 6501 }, { "epoch": 0.66, "grad_norm": 1.5239834220771615, "learning_rate": 5.4348689790658006e-06, "loss": 0.6818, "step": 6502 }, { "epoch": 0.66, "grad_norm": 1.5960102475372329, "learning_rate": 5.431938094918132e-06, "loss": 0.7411, "step": 6503 }, { "epoch": 0.66, "grad_norm": 1.657548516507031, "learning_rate": 5.4290077065621426e-06, "loss": 0.7023, "step": 6504 }, { "epoch": 0.66, "grad_norm": 1.6262940709200162, "learning_rate": 5.426077814315885e-06, "loss": 0.7244, "step": 6505 }, { "epoch": 0.66, "grad_norm": 1.6390654376483609, "learning_rate": 5.423148418497356e-06, "loss": 0.5825, "step": 6506 }, { "epoch": 0.66, "grad_norm": 1.6031873520521178, "learning_rate": 5.4202195194244876e-06, "loss": 0.7294, "step": 6507 }, { "epoch": 0.66, "grad_norm": 1.7798635128022935, "learning_rate": 5.417291117415172e-06, "loss": 0.6766, "step": 6508 }, { "epoch": 0.66, "grad_norm": 1.6385895753692539, "learning_rate": 5.414363212787244e-06, "loss": 0.6456, "step": 6509 }, { "epoch": 0.66, "grad_norm": 1.586186792105555, "learning_rate": 5.411435805858473e-06, "loss": 0.7585, "step": 6510 }, { "epoch": 0.66, "grad_norm": 1.6013166183073786, "learning_rate": 5.4085088969465916e-06, "loss": 0.703, "step": 6511 }, { "epoch": 0.66, "grad_norm": 1.5405890246283256, "learning_rate": 5.405582486369264e-06, "loss": 0.6945, "step": 6512 }, { "epoch": 0.66, "grad_norm": 1.4703347207965698, "learning_rate": 5.402656574444108e-06, "loss": 0.709, "step": 6513 }, { "epoch": 0.66, "grad_norm": 1.7235942090586773, "learning_rate": 5.399731161488691e-06, "loss": 0.7877, "step": 6514 }, { "epoch": 0.66, "grad_norm": 1.576014597382119, "learning_rate": 5.3968062478205115e-06, "loss": 0.705, "step": 6515 }, { "epoch": 0.66, "grad_norm": 1.4943026192192757, "learning_rate": 5.393881833757033e-06, "loss": 0.6441, "step": 6516 }, { "epoch": 0.66, "grad_norm": 1.4372588715266486, "learning_rate": 5.390957919615645e-06, "loss": 0.6064, "step": 6517 }, { "epoch": 0.66, "grad_norm": 1.5746568492901751, "learning_rate": 5.388034505713697e-06, "loss": 0.7424, "step": 6518 }, { "epoch": 0.66, "grad_norm": 1.6185110839386305, "learning_rate": 5.385111592368486e-06, "loss": 0.6514, "step": 6519 }, { "epoch": 0.66, "grad_norm": 1.795299732229972, "learning_rate": 5.382189179897237e-06, "loss": 0.7573, "step": 6520 }, { "epoch": 0.66, "grad_norm": 1.6681414848347083, "learning_rate": 5.379267268617139e-06, "loss": 0.731, "step": 6521 }, { "epoch": 0.66, "grad_norm": 1.8258298354662874, "learning_rate": 5.376345858845322e-06, "loss": 0.7484, "step": 6522 }, { "epoch": 0.66, "grad_norm": 1.4220644902625972, "learning_rate": 5.373424950898854e-06, "loss": 0.5895, "step": 6523 }, { "epoch": 0.66, "grad_norm": 1.5616930856307956, "learning_rate": 5.3705045450947546e-06, "loss": 0.7315, "step": 6524 }, { "epoch": 0.66, "grad_norm": 1.650050715075898, "learning_rate": 5.367584641749994e-06, "loss": 0.7211, "step": 6525 }, { "epoch": 0.66, "grad_norm": 1.5675250874675495, "learning_rate": 5.3646652411814735e-06, "loss": 0.767, "step": 6526 }, { "epoch": 0.66, "grad_norm": 1.7880041175999453, "learning_rate": 5.361746343706053e-06, "loss": 0.7265, "step": 6527 }, { "epoch": 0.66, "grad_norm": 1.4506290455380577, "learning_rate": 5.358827949640536e-06, "loss": 0.7461, "step": 6528 }, { "epoch": 0.66, "grad_norm": 1.9009951006619712, "learning_rate": 5.3559100593016615e-06, "loss": 0.6162, "step": 6529 }, { "epoch": 0.66, "grad_norm": 1.6192604915096651, "learning_rate": 5.352992673006125e-06, "loss": 0.6748, "step": 6530 }, { "epoch": 0.66, "grad_norm": 1.6344829669114818, "learning_rate": 5.350075791070566e-06, "loss": 0.7157, "step": 6531 }, { "epoch": 0.66, "grad_norm": 1.7282190505058468, "learning_rate": 5.34715941381156e-06, "loss": 0.7053, "step": 6532 }, { "epoch": 0.66, "grad_norm": 1.5616856047012901, "learning_rate": 5.344243541545637e-06, "loss": 0.7304, "step": 6533 }, { "epoch": 0.66, "grad_norm": 1.488895832298304, "learning_rate": 5.341328174589273e-06, "loss": 0.7114, "step": 6534 }, { "epoch": 0.66, "grad_norm": 1.7697051118099179, "learning_rate": 5.338413313258879e-06, "loss": 0.7133, "step": 6535 }, { "epoch": 0.66, "grad_norm": 1.6997375561691037, "learning_rate": 5.335498957870821e-06, "loss": 0.7103, "step": 6536 }, { "epoch": 0.66, "grad_norm": 1.737073289454585, "learning_rate": 5.332585108741411e-06, "loss": 0.7116, "step": 6537 }, { "epoch": 0.67, "grad_norm": 1.69707998251365, "learning_rate": 5.329671766186892e-06, "loss": 0.6982, "step": 6538 }, { "epoch": 0.67, "grad_norm": 1.5938705443800656, "learning_rate": 5.32675893052347e-06, "loss": 0.7617, "step": 6539 }, { "epoch": 0.67, "grad_norm": 1.6785160562909558, "learning_rate": 5.323846602067283e-06, "loss": 0.7133, "step": 6540 }, { "epoch": 0.67, "grad_norm": 1.7254541858211765, "learning_rate": 5.320934781134419e-06, "loss": 0.7006, "step": 6541 }, { "epoch": 0.67, "grad_norm": 1.4717849939657723, "learning_rate": 5.318023468040918e-06, "loss": 0.6648, "step": 6542 }, { "epoch": 0.67, "grad_norm": 1.542910712759783, "learning_rate": 5.315112663102746e-06, "loss": 0.7322, "step": 6543 }, { "epoch": 0.67, "grad_norm": 1.737205889574892, "learning_rate": 5.312202366635831e-06, "loss": 0.7091, "step": 6544 }, { "epoch": 0.67, "grad_norm": 1.5309583554809463, "learning_rate": 5.3092925789560445e-06, "loss": 0.6099, "step": 6545 }, { "epoch": 0.67, "grad_norm": 1.480662378918754, "learning_rate": 5.30638330037919e-06, "loss": 0.749, "step": 6546 }, { "epoch": 0.67, "grad_norm": 1.461291497402869, "learning_rate": 5.303474531221033e-06, "loss": 0.7021, "step": 6547 }, { "epoch": 0.67, "grad_norm": 1.65880915293672, "learning_rate": 5.300566271797265e-06, "loss": 0.6811, "step": 6548 }, { "epoch": 0.67, "grad_norm": 1.3833613813366754, "learning_rate": 5.297658522423539e-06, "loss": 0.65, "step": 6549 }, { "epoch": 0.67, "grad_norm": 1.587303853860589, "learning_rate": 5.294751283415446e-06, "loss": 0.76, "step": 6550 }, { "epoch": 0.67, "grad_norm": 1.563782664562314, "learning_rate": 5.2918445550885165e-06, "loss": 0.6299, "step": 6551 }, { "epoch": 0.67, "grad_norm": 1.4450028756994537, "learning_rate": 5.288938337758236e-06, "loss": 0.5837, "step": 6552 }, { "epoch": 0.67, "grad_norm": 1.663183818495271, "learning_rate": 5.286032631740023e-06, "loss": 0.7403, "step": 6553 }, { "epoch": 0.67, "grad_norm": 1.491077128099564, "learning_rate": 5.28312743734925e-06, "loss": 0.6107, "step": 6554 }, { "epoch": 0.67, "grad_norm": 1.7013609398124023, "learning_rate": 5.280222754901234e-06, "loss": 0.7547, "step": 6555 }, { "epoch": 0.67, "grad_norm": 1.488129397639651, "learning_rate": 5.277318584711224e-06, "loss": 0.5939, "step": 6556 }, { "epoch": 0.67, "grad_norm": 1.6660977197771891, "learning_rate": 5.274414927094433e-06, "loss": 0.718, "step": 6557 }, { "epoch": 0.67, "grad_norm": 1.596142506255152, "learning_rate": 5.271511782365997e-06, "loss": 0.5755, "step": 6558 }, { "epoch": 0.67, "grad_norm": 1.570684630382438, "learning_rate": 5.268609150841011e-06, "loss": 0.7117, "step": 6559 }, { "epoch": 0.67, "grad_norm": 1.3862051356848353, "learning_rate": 5.265707032834515e-06, "loss": 0.8393, "step": 6560 }, { "epoch": 0.67, "grad_norm": 1.676536115535365, "learning_rate": 5.262805428661481e-06, "loss": 0.8902, "step": 6561 }, { "epoch": 0.67, "grad_norm": 1.6601852418073246, "learning_rate": 5.259904338636839e-06, "loss": 0.6665, "step": 6562 }, { "epoch": 0.67, "grad_norm": 1.7040187006990393, "learning_rate": 5.257003763075452e-06, "loss": 0.7277, "step": 6563 }, { "epoch": 0.67, "grad_norm": 1.683236229240097, "learning_rate": 5.2541037022921335e-06, "loss": 0.7488, "step": 6564 }, { "epoch": 0.67, "grad_norm": 1.4747881576506794, "learning_rate": 5.251204156601645e-06, "loss": 0.6613, "step": 6565 }, { "epoch": 0.67, "grad_norm": 1.6054185824390816, "learning_rate": 5.248305126318678e-06, "loss": 0.7666, "step": 6566 }, { "epoch": 0.67, "grad_norm": 1.4386913476814391, "learning_rate": 5.245406611757882e-06, "loss": 0.7446, "step": 6567 }, { "epoch": 0.67, "grad_norm": 1.5303464616012228, "learning_rate": 5.242508613233848e-06, "loss": 0.6642, "step": 6568 }, { "epoch": 0.67, "grad_norm": 1.6563424630765726, "learning_rate": 5.239611131061102e-06, "loss": 0.6124, "step": 6569 }, { "epoch": 0.67, "grad_norm": 1.4331120110368254, "learning_rate": 5.236714165554126e-06, "loss": 0.6598, "step": 6570 }, { "epoch": 0.67, "grad_norm": 1.298885302331489, "learning_rate": 5.233817717027337e-06, "loss": 0.6008, "step": 6571 }, { "epoch": 0.67, "grad_norm": 1.603739287059268, "learning_rate": 5.230921785795097e-06, "loss": 0.6126, "step": 6572 }, { "epoch": 0.67, "grad_norm": 1.5185383591183974, "learning_rate": 5.228026372171722e-06, "loss": 0.6423, "step": 6573 }, { "epoch": 0.67, "grad_norm": 1.408739492333563, "learning_rate": 5.225131476471455e-06, "loss": 0.6603, "step": 6574 }, { "epoch": 0.67, "grad_norm": 1.5087200124354232, "learning_rate": 5.2222370990085006e-06, "loss": 0.5578, "step": 6575 }, { "epoch": 0.67, "grad_norm": 1.6882350770849142, "learning_rate": 5.219343240096988e-06, "loss": 0.7846, "step": 6576 }, { "epoch": 0.67, "grad_norm": 1.4165427103087591, "learning_rate": 5.216449900051006e-06, "loss": 0.6297, "step": 6577 }, { "epoch": 0.67, "grad_norm": 1.6031125511277677, "learning_rate": 5.213557079184584e-06, "loss": 0.6257, "step": 6578 }, { "epoch": 0.67, "grad_norm": 1.6330463089764724, "learning_rate": 5.210664777811687e-06, "loss": 0.7085, "step": 6579 }, { "epoch": 0.67, "grad_norm": 1.55459047365538, "learning_rate": 5.207772996246234e-06, "loss": 0.718, "step": 6580 }, { "epoch": 0.67, "grad_norm": 1.8562593153317937, "learning_rate": 5.204881734802078e-06, "loss": 0.7194, "step": 6581 }, { "epoch": 0.67, "grad_norm": 1.6494287308768552, "learning_rate": 5.201990993793022e-06, "loss": 0.7813, "step": 6582 }, { "epoch": 0.67, "grad_norm": 1.6253578733828447, "learning_rate": 5.199100773532815e-06, "loss": 0.7396, "step": 6583 }, { "epoch": 0.67, "grad_norm": 1.7042981931473753, "learning_rate": 5.196211074335136e-06, "loss": 0.7305, "step": 6584 }, { "epoch": 0.67, "grad_norm": 1.4128951522222983, "learning_rate": 5.1933218965136235e-06, "loss": 0.7753, "step": 6585 }, { "epoch": 0.67, "grad_norm": 1.6599676572854145, "learning_rate": 5.190433240381855e-06, "loss": 0.7574, "step": 6586 }, { "epoch": 0.67, "grad_norm": 1.6950322153257795, "learning_rate": 5.18754510625334e-06, "loss": 0.6192, "step": 6587 }, { "epoch": 0.67, "grad_norm": 1.5876474049189262, "learning_rate": 5.1846574944415505e-06, "loss": 0.6778, "step": 6588 }, { "epoch": 0.67, "grad_norm": 1.7067641494328831, "learning_rate": 5.181770405259883e-06, "loss": 0.6738, "step": 6589 }, { "epoch": 0.67, "grad_norm": 1.560063028944311, "learning_rate": 5.1788838390216874e-06, "loss": 0.604, "step": 6590 }, { "epoch": 0.67, "grad_norm": 1.6402935150003717, "learning_rate": 5.175997796040262e-06, "loss": 0.7236, "step": 6591 }, { "epoch": 0.67, "grad_norm": 1.4941234108482795, "learning_rate": 5.173112276628834e-06, "loss": 0.8053, "step": 6592 }, { "epoch": 0.67, "grad_norm": 1.8240564355995594, "learning_rate": 5.170227281100587e-06, "loss": 0.6713, "step": 6593 }, { "epoch": 0.67, "grad_norm": 1.5962204639297288, "learning_rate": 5.167342809768636e-06, "loss": 0.7, "step": 6594 }, { "epoch": 0.67, "grad_norm": 1.6994359482698425, "learning_rate": 5.164458862946049e-06, "loss": 0.6477, "step": 6595 }, { "epoch": 0.67, "grad_norm": 1.5502079375142361, "learning_rate": 5.1615754409458365e-06, "loss": 0.628, "step": 6596 }, { "epoch": 0.67, "grad_norm": 1.7346230320590885, "learning_rate": 5.158692544080941e-06, "loss": 0.7447, "step": 6597 }, { "epoch": 0.67, "grad_norm": 1.4492600740682662, "learning_rate": 5.155810172664264e-06, "loss": 0.6451, "step": 6598 }, { "epoch": 0.67, "grad_norm": 1.6226120765657546, "learning_rate": 5.152928327008635e-06, "loss": 0.5691, "step": 6599 }, { "epoch": 0.67, "grad_norm": 1.6380828656883044, "learning_rate": 5.1500470074268375e-06, "loss": 0.6748, "step": 6600 }, { "epoch": 0.67, "grad_norm": 1.5436601441971176, "learning_rate": 5.147166214231595e-06, "loss": 0.6615, "step": 6601 }, { "epoch": 0.67, "grad_norm": 1.4598286166313794, "learning_rate": 5.144285947735566e-06, "loss": 0.6504, "step": 6602 }, { "epoch": 0.67, "grad_norm": 1.691712192890952, "learning_rate": 5.141406208251363e-06, "loss": 0.6155, "step": 6603 }, { "epoch": 0.67, "grad_norm": 1.4712765944949058, "learning_rate": 5.13852699609154e-06, "loss": 0.6557, "step": 6604 }, { "epoch": 0.67, "grad_norm": 1.6182224948885766, "learning_rate": 5.135648311568583e-06, "loss": 0.7785, "step": 6605 }, { "epoch": 0.67, "grad_norm": 1.7863311346040405, "learning_rate": 5.132770154994935e-06, "loss": 0.7662, "step": 6606 }, { "epoch": 0.67, "grad_norm": 1.6875108524698368, "learning_rate": 5.1298925266829694e-06, "loss": 0.681, "step": 6607 }, { "epoch": 0.67, "grad_norm": 1.6087828341371864, "learning_rate": 5.127015426945008e-06, "loss": 0.6428, "step": 6608 }, { "epoch": 0.67, "grad_norm": 1.4344497230237643, "learning_rate": 5.124138856093321e-06, "loss": 0.6741, "step": 6609 }, { "epoch": 0.67, "grad_norm": 1.630878458485287, "learning_rate": 5.1212628144401086e-06, "loss": 0.7074, "step": 6610 }, { "epoch": 0.67, "grad_norm": 1.5157406026220226, "learning_rate": 5.118387302297527e-06, "loss": 0.674, "step": 6611 }, { "epoch": 0.67, "grad_norm": 1.7480585578015724, "learning_rate": 5.115512319977659e-06, "loss": 0.6263, "step": 6612 }, { "epoch": 0.67, "grad_norm": 1.8044357935825248, "learning_rate": 5.112637867792544e-06, "loss": 0.7768, "step": 6613 }, { "epoch": 0.67, "grad_norm": 1.5207172511477907, "learning_rate": 5.109763946054163e-06, "loss": 0.7245, "step": 6614 }, { "epoch": 0.67, "grad_norm": 1.6838757695281426, "learning_rate": 5.106890555074426e-06, "loss": 0.6495, "step": 6615 }, { "epoch": 0.67, "grad_norm": 1.5251784247030207, "learning_rate": 5.1040176951652046e-06, "loss": 0.668, "step": 6616 }, { "epoch": 0.67, "grad_norm": 1.965638309649969, "learning_rate": 5.101145366638294e-06, "loss": 0.6732, "step": 6617 }, { "epoch": 0.67, "grad_norm": 1.6207157830214798, "learning_rate": 5.098273569805443e-06, "loss": 0.6844, "step": 6618 }, { "epoch": 0.67, "grad_norm": 1.4730282022868064, "learning_rate": 5.095402304978346e-06, "loss": 0.6946, "step": 6619 }, { "epoch": 0.67, "grad_norm": 1.5416613162094852, "learning_rate": 5.092531572468627e-06, "loss": 0.6273, "step": 6620 }, { "epoch": 0.67, "grad_norm": 1.591078859736904, "learning_rate": 5.089661372587859e-06, "loss": 0.7806, "step": 6621 }, { "epoch": 0.67, "grad_norm": 1.6373143476009928, "learning_rate": 5.086791705647566e-06, "loss": 0.7163, "step": 6622 }, { "epoch": 0.67, "grad_norm": 1.5145191628770813, "learning_rate": 5.083922571959194e-06, "loss": 0.5919, "step": 6623 }, { "epoch": 0.67, "grad_norm": 1.4928294951142096, "learning_rate": 5.081053971834153e-06, "loss": 0.6832, "step": 6624 }, { "epoch": 0.67, "grad_norm": 1.551921705364535, "learning_rate": 5.078185905583774e-06, "loss": 0.6539, "step": 6625 }, { "epoch": 0.67, "grad_norm": 1.6046975733459894, "learning_rate": 5.075318373519349e-06, "loss": 0.7742, "step": 6626 }, { "epoch": 0.67, "grad_norm": 1.6771471734669585, "learning_rate": 5.072451375952104e-06, "loss": 0.7703, "step": 6627 }, { "epoch": 0.67, "grad_norm": 1.7927702142712978, "learning_rate": 5.069584913193199e-06, "loss": 0.812, "step": 6628 }, { "epoch": 0.67, "grad_norm": 1.6327378040139973, "learning_rate": 5.066718985553749e-06, "loss": 0.6578, "step": 6629 }, { "epoch": 0.67, "grad_norm": 1.5301086870655425, "learning_rate": 5.06385359334481e-06, "loss": 0.6644, "step": 6630 }, { "epoch": 0.67, "grad_norm": 1.553619736705293, "learning_rate": 5.060988736877366e-06, "loss": 0.727, "step": 6631 }, { "epoch": 0.67, "grad_norm": 1.6295412264746585, "learning_rate": 5.0581244164623576e-06, "loss": 0.687, "step": 6632 }, { "epoch": 0.67, "grad_norm": 1.4718060176301735, "learning_rate": 5.055260632410665e-06, "loss": 0.6943, "step": 6633 }, { "epoch": 0.67, "grad_norm": 1.5675917131961066, "learning_rate": 5.0523973850331e-06, "loss": 0.7724, "step": 6634 }, { "epoch": 0.67, "grad_norm": 1.678519567246987, "learning_rate": 5.049534674640426e-06, "loss": 0.6734, "step": 6635 }, { "epoch": 0.68, "grad_norm": 1.6314176517551688, "learning_rate": 5.0466725015433526e-06, "loss": 0.6701, "step": 6636 }, { "epoch": 0.68, "grad_norm": 1.6684931385240929, "learning_rate": 5.043810866052512e-06, "loss": 0.7685, "step": 6637 }, { "epoch": 0.68, "grad_norm": 1.559866500388052, "learning_rate": 5.040949768478496e-06, "loss": 0.7064, "step": 6638 }, { "epoch": 0.68, "grad_norm": 1.6826584219645397, "learning_rate": 5.038089209131837e-06, "loss": 0.7978, "step": 6639 }, { "epoch": 0.68, "grad_norm": 1.8855229766745862, "learning_rate": 5.035229188322994e-06, "loss": 0.7099, "step": 6640 }, { "epoch": 0.68, "grad_norm": 1.6390837167916354, "learning_rate": 5.032369706362381e-06, "loss": 0.7395, "step": 6641 }, { "epoch": 0.68, "grad_norm": 1.501803420191579, "learning_rate": 5.029510763560357e-06, "loss": 0.7012, "step": 6642 }, { "epoch": 0.68, "grad_norm": 1.4287509327965948, "learning_rate": 5.026652360227205e-06, "loss": 0.6711, "step": 6643 }, { "epoch": 0.68, "grad_norm": 1.6337553722301748, "learning_rate": 5.023794496673169e-06, "loss": 0.597, "step": 6644 }, { "epoch": 0.68, "grad_norm": 1.6900452701037583, "learning_rate": 5.020937173208418e-06, "loss": 0.7713, "step": 6645 }, { "epoch": 0.68, "grad_norm": 1.630566533375451, "learning_rate": 5.018080390143072e-06, "loss": 0.7175, "step": 6646 }, { "epoch": 0.68, "grad_norm": 1.6482122448531207, "learning_rate": 5.0152241477871955e-06, "loss": 0.665, "step": 6647 }, { "epoch": 0.68, "grad_norm": 1.5223059886944974, "learning_rate": 5.0123684464507805e-06, "loss": 0.6861, "step": 6648 }, { "epoch": 0.68, "grad_norm": 1.5625226235889387, "learning_rate": 5.009513286443773e-06, "loss": 0.7124, "step": 6649 }, { "epoch": 0.68, "grad_norm": 1.6641033534342602, "learning_rate": 5.00665866807606e-06, "loss": 0.6798, "step": 6650 }, { "epoch": 0.68, "grad_norm": 1.814997332535629, "learning_rate": 5.003804591657457e-06, "loss": 0.6784, "step": 6651 }, { "epoch": 0.68, "grad_norm": 1.7418241486270254, "learning_rate": 5.0009510574977385e-06, "loss": 0.684, "step": 6652 }, { "epoch": 0.68, "grad_norm": 1.8990670470019306, "learning_rate": 4.998098065906601e-06, "loss": 0.7724, "step": 6653 }, { "epoch": 0.68, "grad_norm": 1.5522110614566593, "learning_rate": 4.9952456171936985e-06, "loss": 0.6414, "step": 6654 }, { "epoch": 0.68, "grad_norm": 1.6383848530820382, "learning_rate": 4.992393711668622e-06, "loss": 0.8211, "step": 6655 }, { "epoch": 0.68, "grad_norm": 1.7466103343291657, "learning_rate": 4.989542349640894e-06, "loss": 0.7061, "step": 6656 }, { "epoch": 0.68, "grad_norm": 1.569027050350729, "learning_rate": 4.986691531419989e-06, "loss": 0.5821, "step": 6657 }, { "epoch": 0.68, "grad_norm": 1.6108341079235893, "learning_rate": 4.983841257315324e-06, "loss": 0.7087, "step": 6658 }, { "epoch": 0.68, "grad_norm": 1.786948163093711, "learning_rate": 4.980991527636242e-06, "loss": 0.6429, "step": 6659 }, { "epoch": 0.68, "grad_norm": 1.698101564669535, "learning_rate": 4.978142342692045e-06, "loss": 0.7126, "step": 6660 }, { "epoch": 0.68, "grad_norm": 1.6742980171896125, "learning_rate": 4.975293702791959e-06, "loss": 0.7271, "step": 6661 }, { "epoch": 0.68, "grad_norm": 1.8063655791321431, "learning_rate": 4.9724456082451655e-06, "loss": 0.6859, "step": 6662 }, { "epoch": 0.68, "grad_norm": 1.692416378690283, "learning_rate": 4.9695980593607815e-06, "loss": 0.7298, "step": 6663 }, { "epoch": 0.68, "grad_norm": 1.610922610672971, "learning_rate": 4.9667510564478584e-06, "loss": 0.7034, "step": 6664 }, { "epoch": 0.68, "grad_norm": 1.6859325226267035, "learning_rate": 4.9639045998154e-06, "loss": 0.7184, "step": 6665 }, { "epoch": 0.68, "grad_norm": 1.5747449742645898, "learning_rate": 4.961058689772339e-06, "loss": 0.6004, "step": 6666 }, { "epoch": 0.68, "grad_norm": 1.6240207523487893, "learning_rate": 4.958213326627555e-06, "loss": 0.6998, "step": 6667 }, { "epoch": 0.68, "grad_norm": 1.5305125840708707, "learning_rate": 4.9553685106898765e-06, "loss": 0.6738, "step": 6668 }, { "epoch": 0.68, "grad_norm": 1.4862581273327402, "learning_rate": 4.9525242422680504e-06, "loss": 0.6822, "step": 6669 }, { "epoch": 0.68, "grad_norm": 1.6232042430434463, "learning_rate": 4.9496805216707885e-06, "loss": 0.682, "step": 6670 }, { "epoch": 0.68, "grad_norm": 1.579863545696646, "learning_rate": 4.946837349206726e-06, "loss": 0.6512, "step": 6671 }, { "epoch": 0.68, "grad_norm": 1.6551829456643596, "learning_rate": 4.943994725184445e-06, "loss": 0.759, "step": 6672 }, { "epoch": 0.68, "grad_norm": 1.8637227571573647, "learning_rate": 4.941152649912473e-06, "loss": 0.7387, "step": 6673 }, { "epoch": 0.68, "grad_norm": 1.6064385826752752, "learning_rate": 4.938311123699265e-06, "loss": 0.6942, "step": 6674 }, { "epoch": 0.68, "grad_norm": 1.514599431310791, "learning_rate": 4.935470146853229e-06, "loss": 0.6767, "step": 6675 }, { "epoch": 0.68, "grad_norm": 1.6687265203998711, "learning_rate": 4.9326297196827136e-06, "loss": 0.6838, "step": 6676 }, { "epoch": 0.68, "grad_norm": 1.3655127070699202, "learning_rate": 4.929789842495992e-06, "loss": 0.6231, "step": 6677 }, { "epoch": 0.68, "grad_norm": 1.674511965233358, "learning_rate": 4.9269505156012974e-06, "loss": 0.7355, "step": 6678 }, { "epoch": 0.68, "grad_norm": 1.6532554229523138, "learning_rate": 4.924111739306788e-06, "loss": 0.732, "step": 6679 }, { "epoch": 0.68, "grad_norm": 1.6648534043225724, "learning_rate": 4.92127351392057e-06, "loss": 0.7031, "step": 6680 }, { "epoch": 0.68, "grad_norm": 1.6532155219495979, "learning_rate": 4.918435839750695e-06, "loss": 0.6672, "step": 6681 }, { "epoch": 0.68, "grad_norm": 1.5060944261708848, "learning_rate": 4.9155987171051374e-06, "loss": 0.5954, "step": 6682 }, { "epoch": 0.68, "grad_norm": 1.4914021316838773, "learning_rate": 4.912762146291833e-06, "loss": 0.7485, "step": 6683 }, { "epoch": 0.68, "grad_norm": 1.708142153364821, "learning_rate": 4.909926127618638e-06, "loss": 0.8124, "step": 6684 }, { "epoch": 0.68, "grad_norm": 1.6513017550261346, "learning_rate": 4.907090661393362e-06, "loss": 0.6309, "step": 6685 }, { "epoch": 0.68, "grad_norm": 1.7913658290602579, "learning_rate": 4.904255747923754e-06, "loss": 0.7755, "step": 6686 }, { "epoch": 0.68, "grad_norm": 1.6348298452794083, "learning_rate": 4.901421387517492e-06, "loss": 0.6811, "step": 6687 }, { "epoch": 0.68, "grad_norm": 1.582267175662689, "learning_rate": 4.89858758048221e-06, "loss": 0.6593, "step": 6688 }, { "epoch": 0.68, "grad_norm": 1.5243067426073116, "learning_rate": 4.895754327125465e-06, "loss": 0.7288, "step": 6689 }, { "epoch": 0.68, "grad_norm": 1.7220747486755543, "learning_rate": 4.8929216277547665e-06, "loss": 0.6891, "step": 6690 }, { "epoch": 0.68, "grad_norm": 1.6523749858452668, "learning_rate": 4.8900894826775626e-06, "loss": 0.6876, "step": 6691 }, { "epoch": 0.68, "grad_norm": 1.6800362457047207, "learning_rate": 4.887257892201231e-06, "loss": 0.7344, "step": 6692 }, { "epoch": 0.68, "grad_norm": 1.6366830126364655, "learning_rate": 4.884426856633102e-06, "loss": 0.7273, "step": 6693 }, { "epoch": 0.68, "grad_norm": 1.625082643217557, "learning_rate": 4.881596376280443e-06, "loss": 0.7569, "step": 6694 }, { "epoch": 0.68, "grad_norm": 1.601983835016552, "learning_rate": 4.878766451450451e-06, "loss": 0.6197, "step": 6695 }, { "epoch": 0.68, "grad_norm": 1.631133494077483, "learning_rate": 4.875937082450276e-06, "loss": 0.7626, "step": 6696 }, { "epoch": 0.68, "grad_norm": 1.8279187393898182, "learning_rate": 4.873108269586997e-06, "loss": 0.7777, "step": 6697 }, { "epoch": 0.68, "grad_norm": 1.499195022793931, "learning_rate": 4.870280013167639e-06, "loss": 0.6354, "step": 6698 }, { "epoch": 0.68, "grad_norm": 1.5436191227668006, "learning_rate": 4.867452313499169e-06, "loss": 0.7183, "step": 6699 }, { "epoch": 0.68, "grad_norm": 1.9507237044500054, "learning_rate": 4.864625170888483e-06, "loss": 0.7534, "step": 6700 }, { "epoch": 0.68, "grad_norm": 1.5253344116402054, "learning_rate": 4.86179858564243e-06, "loss": 0.6205, "step": 6701 }, { "epoch": 0.68, "grad_norm": 1.5286591265294258, "learning_rate": 4.858972558067784e-06, "loss": 0.5347, "step": 6702 }, { "epoch": 0.68, "grad_norm": 1.673034454696896, "learning_rate": 4.856147088471271e-06, "loss": 0.6869, "step": 6703 }, { "epoch": 0.68, "grad_norm": 1.7823550632594058, "learning_rate": 4.853322177159554e-06, "loss": 0.7187, "step": 6704 }, { "epoch": 0.68, "grad_norm": 1.7547562334998144, "learning_rate": 4.850497824439226e-06, "loss": 0.7486, "step": 6705 }, { "epoch": 0.68, "grad_norm": 1.5239934070506733, "learning_rate": 4.847674030616833e-06, "loss": 0.678, "step": 6706 }, { "epoch": 0.68, "grad_norm": 1.6717769248144259, "learning_rate": 4.844850795998848e-06, "loss": 0.8623, "step": 6707 }, { "epoch": 0.68, "grad_norm": 1.5968692392461008, "learning_rate": 4.842028120891691e-06, "loss": 0.6768, "step": 6708 }, { "epoch": 0.68, "grad_norm": 1.619490677510862, "learning_rate": 4.839206005601724e-06, "loss": 0.6379, "step": 6709 }, { "epoch": 0.68, "grad_norm": 1.419424593862716, "learning_rate": 4.8363844504352344e-06, "loss": 0.6016, "step": 6710 }, { "epoch": 0.68, "grad_norm": 1.558674176987791, "learning_rate": 4.8335634556984675e-06, "loss": 0.672, "step": 6711 }, { "epoch": 0.68, "grad_norm": 1.719086987642181, "learning_rate": 4.8307430216975895e-06, "loss": 0.7243, "step": 6712 }, { "epoch": 0.68, "grad_norm": 1.6798971927393056, "learning_rate": 4.827923148738718e-06, "loss": 0.7477, "step": 6713 }, { "epoch": 0.68, "grad_norm": 1.715498035259579, "learning_rate": 4.825103837127911e-06, "loss": 0.7564, "step": 6714 }, { "epoch": 0.68, "grad_norm": 1.7866680476785195, "learning_rate": 4.8222850871711525e-06, "loss": 0.7131, "step": 6715 }, { "epoch": 0.68, "grad_norm": 1.6589252405439832, "learning_rate": 4.819466899174377e-06, "loss": 0.6056, "step": 6716 }, { "epoch": 0.68, "grad_norm": 1.7596553684310678, "learning_rate": 4.816649273443457e-06, "loss": 0.6624, "step": 6717 }, { "epoch": 0.68, "grad_norm": 1.533186019876807, "learning_rate": 4.813832210284198e-06, "loss": 0.6477, "step": 6718 }, { "epoch": 0.68, "grad_norm": 1.782257739414974, "learning_rate": 4.8110157100023545e-06, "loss": 0.6781, "step": 6719 }, { "epoch": 0.68, "grad_norm": 1.5627791612424267, "learning_rate": 4.808199772903604e-06, "loss": 0.6295, "step": 6720 }, { "epoch": 0.68, "grad_norm": 1.538510864281181, "learning_rate": 4.805384399293578e-06, "loss": 0.6813, "step": 6721 }, { "epoch": 0.68, "grad_norm": 1.6580403085905862, "learning_rate": 4.802569589477844e-06, "loss": 0.6827, "step": 6722 }, { "epoch": 0.68, "grad_norm": 1.7106126858256103, "learning_rate": 4.799755343761899e-06, "loss": 0.7553, "step": 6723 }, { "epoch": 0.68, "grad_norm": 1.9278062978061696, "learning_rate": 4.796941662451192e-06, "loss": 0.7088, "step": 6724 }, { "epoch": 0.68, "grad_norm": 1.6350428421712366, "learning_rate": 4.794128545851098e-06, "loss": 0.63, "step": 6725 }, { "epoch": 0.68, "grad_norm": 1.6086575592589927, "learning_rate": 4.791315994266939e-06, "loss": 0.6599, "step": 6726 }, { "epoch": 0.68, "grad_norm": 1.4378733825249688, "learning_rate": 4.788504008003978e-06, "loss": 0.6634, "step": 6727 }, { "epoch": 0.68, "grad_norm": 1.6964357179844956, "learning_rate": 4.785692587367404e-06, "loss": 0.7541, "step": 6728 }, { "epoch": 0.68, "grad_norm": 1.5927379582068117, "learning_rate": 4.782881732662361e-06, "loss": 0.6434, "step": 6729 }, { "epoch": 0.68, "grad_norm": 1.5532184256900274, "learning_rate": 4.780071444193917e-06, "loss": 0.5576, "step": 6730 }, { "epoch": 0.68, "grad_norm": 1.4686464144373634, "learning_rate": 4.7772617222670846e-06, "loss": 0.8035, "step": 6731 }, { "epoch": 0.68, "grad_norm": 1.7957799314682596, "learning_rate": 4.7744525671868235e-06, "loss": 0.6975, "step": 6732 }, { "epoch": 0.68, "grad_norm": 1.8435799373752362, "learning_rate": 4.771643979258013e-06, "loss": 0.7369, "step": 6733 }, { "epoch": 0.68, "grad_norm": 1.501883749908581, "learning_rate": 4.768835958785487e-06, "loss": 0.6216, "step": 6734 }, { "epoch": 0.69, "grad_norm": 1.3488585383477605, "learning_rate": 4.766028506074014e-06, "loss": 0.6155, "step": 6735 }, { "epoch": 0.69, "grad_norm": 2.0300629350024613, "learning_rate": 4.763221621428294e-06, "loss": 0.7919, "step": 6736 }, { "epoch": 0.69, "grad_norm": 1.6568561202507486, "learning_rate": 4.760415305152973e-06, "loss": 0.7961, "step": 6737 }, { "epoch": 0.69, "grad_norm": 1.5372026401250878, "learning_rate": 4.757609557552635e-06, "loss": 0.6851, "step": 6738 }, { "epoch": 0.69, "grad_norm": 1.675958938449828, "learning_rate": 4.754804378931794e-06, "loss": 0.7546, "step": 6739 }, { "epoch": 0.69, "grad_norm": 1.5616244437905444, "learning_rate": 4.7519997695949135e-06, "loss": 0.6774, "step": 6740 }, { "epoch": 0.69, "grad_norm": 1.6035797884092577, "learning_rate": 4.749195729846392e-06, "loss": 0.7169, "step": 6741 }, { "epoch": 0.69, "grad_norm": 1.589931570461331, "learning_rate": 4.746392259990557e-06, "loss": 0.6673, "step": 6742 }, { "epoch": 0.69, "grad_norm": 1.4731801910893658, "learning_rate": 4.743589360331683e-06, "loss": 0.5595, "step": 6743 }, { "epoch": 0.69, "grad_norm": 1.599173395270854, "learning_rate": 4.740787031173989e-06, "loss": 0.7061, "step": 6744 }, { "epoch": 0.69, "grad_norm": 1.634886642584961, "learning_rate": 4.737985272821613e-06, "loss": 0.6353, "step": 6745 }, { "epoch": 0.69, "grad_norm": 1.457103767397003, "learning_rate": 4.735184085578647e-06, "loss": 0.6493, "step": 6746 }, { "epoch": 0.69, "grad_norm": 1.6446556562271464, "learning_rate": 4.732383469749119e-06, "loss": 0.7001, "step": 6747 }, { "epoch": 0.69, "grad_norm": 1.505602973831173, "learning_rate": 4.7295834256369865e-06, "loss": 0.7058, "step": 6748 }, { "epoch": 0.69, "grad_norm": 1.7229690110012572, "learning_rate": 4.726783953546156e-06, "loss": 0.7497, "step": 6749 }, { "epoch": 0.69, "grad_norm": 1.7983347955264688, "learning_rate": 4.72398505378046e-06, "loss": 0.7001, "step": 6750 }, { "epoch": 0.69, "grad_norm": 1.5291712364707872, "learning_rate": 4.721186726643679e-06, "loss": 0.7008, "step": 6751 }, { "epoch": 0.69, "grad_norm": 1.5772590436598817, "learning_rate": 4.718388972439531e-06, "loss": 0.6702, "step": 6752 }, { "epoch": 0.69, "grad_norm": 1.6705254133139626, "learning_rate": 4.715591791471661e-06, "loss": 0.7293, "step": 6753 }, { "epoch": 0.69, "grad_norm": 1.4528902800979537, "learning_rate": 4.712795184043664e-06, "loss": 0.7014, "step": 6754 }, { "epoch": 0.69, "grad_norm": 1.8495839131221563, "learning_rate": 4.709999150459069e-06, "loss": 0.7272, "step": 6755 }, { "epoch": 0.69, "grad_norm": 1.8308693369169993, "learning_rate": 4.707203691021338e-06, "loss": 0.8219, "step": 6756 }, { "epoch": 0.69, "grad_norm": 1.8251269818080302, "learning_rate": 4.704408806033876e-06, "loss": 0.7729, "step": 6757 }, { "epoch": 0.69, "grad_norm": 1.5691962502332488, "learning_rate": 4.7016144958000275e-06, "loss": 0.6621, "step": 6758 }, { "epoch": 0.69, "grad_norm": 1.5036672090092813, "learning_rate": 4.698820760623064e-06, "loss": 0.759, "step": 6759 }, { "epoch": 0.69, "grad_norm": 1.6126843045237915, "learning_rate": 4.69602760080621e-06, "loss": 0.7757, "step": 6760 }, { "epoch": 0.69, "grad_norm": 1.5749756295000583, "learning_rate": 4.693235016652611e-06, "loss": 0.7203, "step": 6761 }, { "epoch": 0.69, "grad_norm": 1.5723201831816331, "learning_rate": 4.690443008465363e-06, "loss": 0.7017, "step": 6762 }, { "epoch": 0.69, "grad_norm": 1.6678943205347223, "learning_rate": 4.687651576547497e-06, "loss": 0.7812, "step": 6763 }, { "epoch": 0.69, "grad_norm": 1.6563925187150188, "learning_rate": 4.684860721201973e-06, "loss": 0.737, "step": 6764 }, { "epoch": 0.69, "grad_norm": 1.6299027094882985, "learning_rate": 4.682070442731703e-06, "loss": 0.6962, "step": 6765 }, { "epoch": 0.69, "grad_norm": 1.6136354271562663, "learning_rate": 4.679280741439519e-06, "loss": 0.7279, "step": 6766 }, { "epoch": 0.69, "grad_norm": 1.5619144216443062, "learning_rate": 4.676491617628204e-06, "loss": 0.7066, "step": 6767 }, { "epoch": 0.69, "grad_norm": 1.691344837835973, "learning_rate": 4.673703071600477e-06, "loss": 0.7679, "step": 6768 }, { "epoch": 0.69, "grad_norm": 1.7127039489976044, "learning_rate": 4.670915103658984e-06, "loss": 0.7076, "step": 6769 }, { "epoch": 0.69, "grad_norm": 1.5145736321312864, "learning_rate": 4.6681277141063185e-06, "loss": 0.7129, "step": 6770 }, { "epoch": 0.69, "grad_norm": 1.6572377542918708, "learning_rate": 4.665340903245013e-06, "loss": 0.7299, "step": 6771 }, { "epoch": 0.69, "grad_norm": 1.7087069822360468, "learning_rate": 4.662554671377523e-06, "loss": 0.6861, "step": 6772 }, { "epoch": 0.69, "grad_norm": 1.5534019045426446, "learning_rate": 4.659769018806259e-06, "loss": 0.6448, "step": 6773 }, { "epoch": 0.69, "grad_norm": 1.697895456294768, "learning_rate": 4.656983945833553e-06, "loss": 0.7074, "step": 6774 }, { "epoch": 0.69, "grad_norm": 1.5580354877752964, "learning_rate": 4.654199452761684e-06, "loss": 0.6663, "step": 6775 }, { "epoch": 0.69, "grad_norm": 1.7294341968255658, "learning_rate": 4.65141553989287e-06, "loss": 0.6761, "step": 6776 }, { "epoch": 0.69, "grad_norm": 1.4565405990269753, "learning_rate": 4.648632207529251e-06, "loss": 0.6189, "step": 6777 }, { "epoch": 0.69, "grad_norm": 1.567351511841771, "learning_rate": 4.645849455972925e-06, "loss": 0.7025, "step": 6778 }, { "epoch": 0.69, "grad_norm": 1.6730705756743636, "learning_rate": 4.643067285525907e-06, "loss": 0.7288, "step": 6779 }, { "epoch": 0.69, "grad_norm": 2.4232719505119826, "learning_rate": 4.640285696490162e-06, "loss": 0.7396, "step": 6780 }, { "epoch": 0.69, "grad_norm": 1.7484179890091003, "learning_rate": 4.637504689167593e-06, "loss": 0.772, "step": 6781 }, { "epoch": 0.69, "grad_norm": 1.4096556326775183, "learning_rate": 4.634724263860024e-06, "loss": 0.6992, "step": 6782 }, { "epoch": 0.69, "grad_norm": 1.431220240140526, "learning_rate": 4.631944420869237e-06, "loss": 0.6121, "step": 6783 }, { "epoch": 0.69, "grad_norm": 1.5569615212600543, "learning_rate": 4.629165160496933e-06, "loss": 0.7122, "step": 6784 }, { "epoch": 0.69, "grad_norm": 1.61376524661055, "learning_rate": 4.62638648304476e-06, "loss": 0.7074, "step": 6785 }, { "epoch": 0.69, "grad_norm": 1.758923600513649, "learning_rate": 4.623608388814305e-06, "loss": 0.7616, "step": 6786 }, { "epoch": 0.69, "grad_norm": 1.8161912002507001, "learning_rate": 4.620830878107076e-06, "loss": 0.6772, "step": 6787 }, { "epoch": 0.69, "grad_norm": 1.5930783730088638, "learning_rate": 4.618053951224537e-06, "loss": 0.5818, "step": 6788 }, { "epoch": 0.69, "grad_norm": 1.6296840520642266, "learning_rate": 4.6152776084680795e-06, "loss": 0.6261, "step": 6789 }, { "epoch": 0.69, "grad_norm": 1.637986537306099, "learning_rate": 4.612501850139027e-06, "loss": 0.6713, "step": 6790 }, { "epoch": 0.69, "grad_norm": 1.7392169691741854, "learning_rate": 4.609726676538652e-06, "loss": 0.669, "step": 6791 }, { "epoch": 0.69, "grad_norm": 1.6813433964446511, "learning_rate": 4.6069520879681464e-06, "loss": 0.6574, "step": 6792 }, { "epoch": 0.69, "grad_norm": 1.566228995523543, "learning_rate": 4.604178084728655e-06, "loss": 0.7067, "step": 6793 }, { "epoch": 0.69, "grad_norm": 1.6410680316688766, "learning_rate": 4.601404667121254e-06, "loss": 0.7132, "step": 6794 }, { "epoch": 0.69, "grad_norm": 1.6200882326123738, "learning_rate": 4.5986318354469495e-06, "loss": 0.6418, "step": 6795 }, { "epoch": 0.69, "grad_norm": 1.846150123331626, "learning_rate": 4.595859590006694e-06, "loss": 0.8386, "step": 6796 }, { "epoch": 0.69, "grad_norm": 1.4435592575044285, "learning_rate": 4.593087931101366e-06, "loss": 0.6287, "step": 6797 }, { "epoch": 0.69, "grad_norm": 1.5756239357254955, "learning_rate": 4.590316859031787e-06, "loss": 0.606, "step": 6798 }, { "epoch": 0.69, "grad_norm": 1.5562145930337141, "learning_rate": 4.5875463740987196e-06, "loss": 0.6404, "step": 6799 }, { "epoch": 0.69, "grad_norm": 1.7451565402362117, "learning_rate": 4.584776476602848e-06, "loss": 0.7641, "step": 6800 }, { "epoch": 0.69, "grad_norm": 1.7561251878769575, "learning_rate": 4.582007166844808e-06, "loss": 0.7515, "step": 6801 }, { "epoch": 0.69, "grad_norm": 1.6443111262582564, "learning_rate": 4.579238445125159e-06, "loss": 0.7593, "step": 6802 }, { "epoch": 0.69, "grad_norm": 1.8102861573409768, "learning_rate": 4.576470311744406e-06, "loss": 0.7399, "step": 6803 }, { "epoch": 0.69, "grad_norm": 1.6888475048886598, "learning_rate": 4.573702767002991e-06, "loss": 0.6528, "step": 6804 }, { "epoch": 0.69, "grad_norm": 1.6537094560055399, "learning_rate": 4.570935811201277e-06, "loss": 0.743, "step": 6805 }, { "epoch": 0.69, "grad_norm": 1.609274630536314, "learning_rate": 4.5681694446395805e-06, "loss": 0.7941, "step": 6806 }, { "epoch": 0.69, "grad_norm": 1.6042699551754307, "learning_rate": 4.5654036676181505e-06, "loss": 0.7543, "step": 6807 }, { "epoch": 0.69, "grad_norm": 1.6824929983387507, "learning_rate": 4.5626384804371605e-06, "loss": 0.8842, "step": 6808 }, { "epoch": 0.69, "grad_norm": 1.48386149237531, "learning_rate": 4.559873883396737e-06, "loss": 0.5557, "step": 6809 }, { "epoch": 0.69, "grad_norm": 1.541725899034693, "learning_rate": 4.557109876796926e-06, "loss": 0.6702, "step": 6810 }, { "epoch": 0.69, "grad_norm": 1.71511462621031, "learning_rate": 4.554346460937719e-06, "loss": 0.6853, "step": 6811 }, { "epoch": 0.69, "grad_norm": 1.5697263264236596, "learning_rate": 4.551583636119048e-06, "loss": 0.6495, "step": 6812 }, { "epoch": 0.69, "grad_norm": 1.593495035401068, "learning_rate": 4.548821402640765e-06, "loss": 0.8006, "step": 6813 }, { "epoch": 0.69, "grad_norm": 1.5161862699596542, "learning_rate": 4.5460597608026765e-06, "loss": 0.7516, "step": 6814 }, { "epoch": 0.69, "grad_norm": 1.7836780242640413, "learning_rate": 4.543298710904507e-06, "loss": 0.6723, "step": 6815 }, { "epoch": 0.69, "grad_norm": 1.6477562008663253, "learning_rate": 4.54053825324593e-06, "loss": 0.6872, "step": 6816 }, { "epoch": 0.69, "grad_norm": 1.6047023712842892, "learning_rate": 4.537778388126552e-06, "loss": 0.6548, "step": 6817 }, { "epoch": 0.69, "grad_norm": 1.59282147631226, "learning_rate": 4.535019115845905e-06, "loss": 0.6552, "step": 6818 }, { "epoch": 0.69, "grad_norm": 1.5241484978282966, "learning_rate": 4.532260436703476e-06, "loss": 0.6947, "step": 6819 }, { "epoch": 0.69, "grad_norm": 1.629690488840868, "learning_rate": 4.529502350998667e-06, "loss": 0.6956, "step": 6820 }, { "epoch": 0.69, "grad_norm": 1.7973943291335965, "learning_rate": 4.526744859030828e-06, "loss": 0.7769, "step": 6821 }, { "epoch": 0.69, "grad_norm": 1.8716176708220122, "learning_rate": 4.523987961099247e-06, "loss": 0.8152, "step": 6822 }, { "epoch": 0.69, "grad_norm": 1.7628031001157096, "learning_rate": 4.5212316575031325e-06, "loss": 0.7304, "step": 6823 }, { "epoch": 0.69, "grad_norm": 1.6365695307885848, "learning_rate": 4.518475948541644e-06, "loss": 0.626, "step": 6824 }, { "epoch": 0.69, "grad_norm": 1.8518163270282286, "learning_rate": 4.5157208345138735e-06, "loss": 0.7279, "step": 6825 }, { "epoch": 0.69, "grad_norm": 1.503081942774786, "learning_rate": 4.5129663157188385e-06, "loss": 0.6428, "step": 6826 }, { "epoch": 0.69, "grad_norm": 1.6458945015573683, "learning_rate": 4.510212392455506e-06, "loss": 0.6701, "step": 6827 }, { "epoch": 0.69, "grad_norm": 1.528661732099273, "learning_rate": 4.507459065022764e-06, "loss": 0.6397, "step": 6828 }, { "epoch": 0.69, "grad_norm": 1.7525925171036087, "learning_rate": 4.504706333719446e-06, "loss": 0.7344, "step": 6829 }, { "epoch": 0.69, "grad_norm": 1.6132996914626434, "learning_rate": 4.501954198844323e-06, "loss": 0.6716, "step": 6830 }, { "epoch": 0.69, "grad_norm": 1.728012838361854, "learning_rate": 4.499202660696088e-06, "loss": 0.6334, "step": 6831 }, { "epoch": 0.69, "grad_norm": 1.6940877753151904, "learning_rate": 4.4964517195733845e-06, "loss": 0.7678, "step": 6832 }, { "epoch": 0.7, "grad_norm": 1.4309951646007812, "learning_rate": 4.493701375774778e-06, "loss": 0.6187, "step": 6833 }, { "epoch": 0.7, "grad_norm": 1.6994002275015478, "learning_rate": 4.490951629598779e-06, "loss": 0.6717, "step": 6834 }, { "epoch": 0.7, "grad_norm": 1.5225830335907207, "learning_rate": 4.488202481343831e-06, "loss": 0.6211, "step": 6835 }, { "epoch": 0.7, "grad_norm": 1.7419741594304876, "learning_rate": 4.485453931308306e-06, "loss": 0.7343, "step": 6836 }, { "epoch": 0.7, "grad_norm": 1.7353467457564924, "learning_rate": 4.482705979790522e-06, "loss": 0.623, "step": 6837 }, { "epoch": 0.7, "grad_norm": 1.6379948590288491, "learning_rate": 4.479958627088719e-06, "loss": 0.626, "step": 6838 }, { "epoch": 0.7, "grad_norm": 1.5886343067209587, "learning_rate": 4.477211873501085e-06, "loss": 0.6164, "step": 6839 }, { "epoch": 0.7, "grad_norm": 1.6088601476355608, "learning_rate": 4.474465719325738e-06, "loss": 0.5527, "step": 6840 }, { "epoch": 0.7, "grad_norm": 1.4754554021635713, "learning_rate": 4.471720164860723e-06, "loss": 0.6268, "step": 6841 }, { "epoch": 0.7, "grad_norm": 1.6109931340884776, "learning_rate": 4.468975210404032e-06, "loss": 0.7196, "step": 6842 }, { "epoch": 0.7, "grad_norm": 1.4207320523598963, "learning_rate": 4.4662308562535905e-06, "loss": 0.6549, "step": 6843 }, { "epoch": 0.7, "grad_norm": 1.5971967413292498, "learning_rate": 4.463487102707247e-06, "loss": 0.7004, "step": 6844 }, { "epoch": 0.7, "grad_norm": 1.5846312944544336, "learning_rate": 4.460743950062797e-06, "loss": 0.6683, "step": 6845 }, { "epoch": 0.7, "grad_norm": 1.5170102406603916, "learning_rate": 4.458001398617971e-06, "loss": 0.6649, "step": 6846 }, { "epoch": 0.7, "grad_norm": 1.6523674664305048, "learning_rate": 4.455259448670423e-06, "loss": 0.7747, "step": 6847 }, { "epoch": 0.7, "grad_norm": 1.5808458259236084, "learning_rate": 4.452518100517751e-06, "loss": 0.7314, "step": 6848 }, { "epoch": 0.7, "grad_norm": 1.5507415500961412, "learning_rate": 4.449777354457489e-06, "loss": 0.6792, "step": 6849 }, { "epoch": 0.7, "grad_norm": 1.6687667637042354, "learning_rate": 4.447037210787098e-06, "loss": 0.6677, "step": 6850 }, { "epoch": 0.7, "grad_norm": 1.6465995920302823, "learning_rate": 4.444297669803981e-06, "loss": 0.688, "step": 6851 }, { "epoch": 0.7, "grad_norm": 1.3837375906710079, "learning_rate": 4.441558731805467e-06, "loss": 0.6833, "step": 6852 }, { "epoch": 0.7, "grad_norm": 1.6111410414667975, "learning_rate": 4.438820397088829e-06, "loss": 0.7622, "step": 6853 }, { "epoch": 0.7, "grad_norm": 1.7558194753522183, "learning_rate": 4.436082665951272e-06, "loss": 0.7165, "step": 6854 }, { "epoch": 0.7, "grad_norm": 1.7187189582051745, "learning_rate": 4.43334553868993e-06, "loss": 0.7752, "step": 6855 }, { "epoch": 0.7, "grad_norm": 1.5985573828842572, "learning_rate": 4.430609015601875e-06, "loss": 0.7959, "step": 6856 }, { "epoch": 0.7, "grad_norm": 1.6240805305240187, "learning_rate": 4.427873096984122e-06, "loss": 0.7092, "step": 6857 }, { "epoch": 0.7, "grad_norm": 1.6149474725862267, "learning_rate": 4.4251377831336005e-06, "loss": 0.711, "step": 6858 }, { "epoch": 0.7, "grad_norm": 1.5581486292014552, "learning_rate": 4.422403074347192e-06, "loss": 0.7262, "step": 6859 }, { "epoch": 0.7, "grad_norm": 1.6611056685471053, "learning_rate": 4.41966897092171e-06, "loss": 0.7394, "step": 6860 }, { "epoch": 0.7, "grad_norm": 1.6201934588493567, "learning_rate": 4.416935473153891e-06, "loss": 0.7084, "step": 6861 }, { "epoch": 0.7, "grad_norm": 1.698794288828691, "learning_rate": 4.414202581340416e-06, "loss": 0.7496, "step": 6862 }, { "epoch": 0.7, "grad_norm": 1.5843943183950784, "learning_rate": 4.411470295777904e-06, "loss": 0.7264, "step": 6863 }, { "epoch": 0.7, "grad_norm": 1.7047690072131172, "learning_rate": 4.4087386167628905e-06, "loss": 0.6368, "step": 6864 }, { "epoch": 0.7, "grad_norm": 1.5051173591130502, "learning_rate": 4.4060075445918635e-06, "loss": 0.672, "step": 6865 }, { "epoch": 0.7, "grad_norm": 1.6290516020944361, "learning_rate": 4.4032770795612426e-06, "loss": 0.6965, "step": 6866 }, { "epoch": 0.7, "grad_norm": 1.4918612948316938, "learning_rate": 4.400547221967366e-06, "loss": 0.7075, "step": 6867 }, { "epoch": 0.7, "grad_norm": 1.5594186028274144, "learning_rate": 4.397817972106527e-06, "loss": 0.6735, "step": 6868 }, { "epoch": 0.7, "grad_norm": 1.4450516338889967, "learning_rate": 4.395089330274936e-06, "loss": 0.5918, "step": 6869 }, { "epoch": 0.7, "grad_norm": 1.5175043132909714, "learning_rate": 4.392361296768747e-06, "loss": 0.7221, "step": 6870 }, { "epoch": 0.7, "grad_norm": 1.6293215499348013, "learning_rate": 4.389633871884048e-06, "loss": 0.7563, "step": 6871 }, { "epoch": 0.7, "grad_norm": 1.4893946119439887, "learning_rate": 4.386907055916854e-06, "loss": 0.5862, "step": 6872 }, { "epoch": 0.7, "grad_norm": 1.5866286954728583, "learning_rate": 4.384180849163122e-06, "loss": 0.6307, "step": 6873 }, { "epoch": 0.7, "grad_norm": 1.533145190738263, "learning_rate": 4.381455251918735e-06, "loss": 0.6756, "step": 6874 }, { "epoch": 0.7, "grad_norm": 1.6868162377836897, "learning_rate": 4.378730264479516e-06, "loss": 0.6691, "step": 6875 }, { "epoch": 0.7, "grad_norm": 1.6819700258658672, "learning_rate": 4.376005887141223e-06, "loss": 0.6826, "step": 6876 }, { "epoch": 0.7, "grad_norm": 1.7399392860899976, "learning_rate": 4.373282120199538e-06, "loss": 0.6818, "step": 6877 }, { "epoch": 0.7, "grad_norm": 1.7575382587651571, "learning_rate": 4.370558963950091e-06, "loss": 0.7339, "step": 6878 }, { "epoch": 0.7, "grad_norm": 1.790098215420934, "learning_rate": 4.36783641868843e-06, "loss": 0.7084, "step": 6879 }, { "epoch": 0.7, "grad_norm": 1.7916971074702635, "learning_rate": 4.365114484710048e-06, "loss": 0.7472, "step": 6880 }, { "epoch": 0.7, "grad_norm": 1.634292686325334, "learning_rate": 4.3623931623103735e-06, "loss": 0.611, "step": 6881 }, { "epoch": 0.7, "grad_norm": 1.5793655961743505, "learning_rate": 4.359672451784754e-06, "loss": 0.624, "step": 6882 }, { "epoch": 0.7, "grad_norm": 1.750260127143254, "learning_rate": 4.356952353428486e-06, "loss": 0.7962, "step": 6883 }, { "epoch": 0.7, "grad_norm": 1.7649362401639273, "learning_rate": 4.354232867536797e-06, "loss": 0.7676, "step": 6884 }, { "epoch": 0.7, "grad_norm": 1.752447193640601, "learning_rate": 4.351513994404835e-06, "loss": 0.7463, "step": 6885 }, { "epoch": 0.7, "grad_norm": 1.5594556890072002, "learning_rate": 4.348795734327701e-06, "loss": 0.6703, "step": 6886 }, { "epoch": 0.7, "grad_norm": 1.648662065675375, "learning_rate": 4.346078087600411e-06, "loss": 0.7742, "step": 6887 }, { "epoch": 0.7, "grad_norm": 1.699704656113283, "learning_rate": 4.343361054517927e-06, "loss": 0.7956, "step": 6888 }, { "epoch": 0.7, "grad_norm": 1.6594430543250784, "learning_rate": 4.340644635375146e-06, "loss": 0.7169, "step": 6889 }, { "epoch": 0.7, "grad_norm": 1.7104654398985026, "learning_rate": 4.337928830466882e-06, "loss": 0.7365, "step": 6890 }, { "epoch": 0.7, "grad_norm": 1.755419660001335, "learning_rate": 4.335213640087902e-06, "loss": 0.7746, "step": 6891 }, { "epoch": 0.7, "grad_norm": 1.6038165272888565, "learning_rate": 4.332499064532891e-06, "loss": 0.6323, "step": 6892 }, { "epoch": 0.7, "grad_norm": 1.7456593937639815, "learning_rate": 4.329785104096477e-06, "loss": 0.7454, "step": 6893 }, { "epoch": 0.7, "grad_norm": 1.6068745247970297, "learning_rate": 4.327071759073221e-06, "loss": 0.6262, "step": 6894 }, { "epoch": 0.7, "grad_norm": 1.6818545930186335, "learning_rate": 4.324359029757607e-06, "loss": 0.6403, "step": 6895 }, { "epoch": 0.7, "grad_norm": 1.4850855188004741, "learning_rate": 4.321646916444068e-06, "loss": 0.6217, "step": 6896 }, { "epoch": 0.7, "grad_norm": 1.7718084166975085, "learning_rate": 4.318935419426951e-06, "loss": 0.7067, "step": 6897 }, { "epoch": 0.7, "grad_norm": 1.5457835040849344, "learning_rate": 4.316224539000554e-06, "loss": 0.6807, "step": 6898 }, { "epoch": 0.7, "grad_norm": 1.7754546506945497, "learning_rate": 4.313514275459103e-06, "loss": 0.8154, "step": 6899 }, { "epoch": 0.7, "grad_norm": 1.6481037379539556, "learning_rate": 4.310804629096746e-06, "loss": 0.7368, "step": 6900 }, { "epoch": 0.7, "grad_norm": 1.6254104503335625, "learning_rate": 4.308095600207578e-06, "loss": 0.6914, "step": 6901 }, { "epoch": 0.7, "grad_norm": 1.6361449332363762, "learning_rate": 4.3053871890856235e-06, "loss": 0.6178, "step": 6902 }, { "epoch": 0.7, "grad_norm": 1.4364947519767834, "learning_rate": 4.302679396024834e-06, "loss": 0.6167, "step": 6903 }, { "epoch": 0.7, "grad_norm": 1.6631765011083663, "learning_rate": 4.299972221319103e-06, "loss": 0.6483, "step": 6904 }, { "epoch": 0.7, "grad_norm": 1.6516281319651802, "learning_rate": 4.2972656652622446e-06, "loss": 0.769, "step": 6905 }, { "epoch": 0.7, "grad_norm": 1.5056024308236098, "learning_rate": 4.294559728148018e-06, "loss": 0.6928, "step": 6906 }, { "epoch": 0.7, "grad_norm": 1.6055309450592068, "learning_rate": 4.291854410270113e-06, "loss": 0.7404, "step": 6907 }, { "epoch": 0.7, "grad_norm": 1.671936816083641, "learning_rate": 4.289149711922143e-06, "loss": 0.746, "step": 6908 }, { "epoch": 0.7, "grad_norm": 1.4401133849964285, "learning_rate": 4.2864456333976676e-06, "loss": 0.6935, "step": 6909 }, { "epoch": 0.7, "grad_norm": 1.663931022188339, "learning_rate": 4.2837421749901655e-06, "loss": 0.7025, "step": 6910 }, { "epoch": 0.7, "grad_norm": 1.6514456944306992, "learning_rate": 4.281039336993058e-06, "loss": 0.7564, "step": 6911 }, { "epoch": 0.7, "grad_norm": 1.590988972850426, "learning_rate": 4.278337119699699e-06, "loss": 0.6574, "step": 6912 }, { "epoch": 0.7, "grad_norm": 1.7075607813218283, "learning_rate": 4.275635523403367e-06, "loss": 0.6821, "step": 6913 }, { "epoch": 0.7, "grad_norm": 1.6835764812133978, "learning_rate": 4.27293454839728e-06, "loss": 0.7864, "step": 6914 }, { "epoch": 0.7, "grad_norm": 1.6808712795164975, "learning_rate": 4.270234194974586e-06, "loss": 0.7632, "step": 6915 }, { "epoch": 0.7, "grad_norm": 1.6984519613192404, "learning_rate": 4.267534463428365e-06, "loss": 0.7588, "step": 6916 }, { "epoch": 0.7, "grad_norm": 1.7880445241677712, "learning_rate": 4.264835354051636e-06, "loss": 0.7459, "step": 6917 }, { "epoch": 0.7, "grad_norm": 1.524997395722238, "learning_rate": 4.262136867137339e-06, "loss": 0.7045, "step": 6918 }, { "epoch": 0.7, "grad_norm": 1.8045010726243293, "learning_rate": 4.259439002978354e-06, "loss": 0.7398, "step": 6919 }, { "epoch": 0.7, "grad_norm": 1.5151094218387287, "learning_rate": 4.256741761867497e-06, "loss": 0.6549, "step": 6920 }, { "epoch": 0.7, "grad_norm": 1.6018412694118114, "learning_rate": 4.2540451440975025e-06, "loss": 0.7484, "step": 6921 }, { "epoch": 0.7, "grad_norm": 1.5854447630426047, "learning_rate": 4.251349149961056e-06, "loss": 0.7279, "step": 6922 }, { "epoch": 0.7, "grad_norm": 1.4938982874778577, "learning_rate": 4.248653779750756e-06, "loss": 0.6409, "step": 6923 }, { "epoch": 0.7, "grad_norm": 1.5468644761910109, "learning_rate": 4.245959033759147e-06, "loss": 0.6326, "step": 6924 }, { "epoch": 0.7, "grad_norm": 1.666205435921289, "learning_rate": 4.243264912278706e-06, "loss": 0.804, "step": 6925 }, { "epoch": 0.7, "grad_norm": 1.6496617351717349, "learning_rate": 4.2405714156018295e-06, "loss": 0.7252, "step": 6926 }, { "epoch": 0.7, "grad_norm": 1.5762382787063267, "learning_rate": 4.237878544020861e-06, "loss": 0.6196, "step": 6927 }, { "epoch": 0.7, "grad_norm": 1.7349560074224841, "learning_rate": 4.235186297828066e-06, "loss": 0.682, "step": 6928 }, { "epoch": 0.7, "grad_norm": 1.6572899000817862, "learning_rate": 4.232494677315645e-06, "loss": 0.6835, "step": 6929 }, { "epoch": 0.7, "grad_norm": 1.70827240724343, "learning_rate": 4.2298036827757375e-06, "loss": 0.7192, "step": 6930 }, { "epoch": 0.71, "grad_norm": 1.9011840639795596, "learning_rate": 4.2271133145003995e-06, "loss": 0.7168, "step": 6931 }, { "epoch": 0.71, "grad_norm": 1.5761513167033099, "learning_rate": 4.224423572781637e-06, "loss": 0.768, "step": 6932 }, { "epoch": 0.71, "grad_norm": 1.5980729940148364, "learning_rate": 4.221734457911374e-06, "loss": 0.6542, "step": 6933 }, { "epoch": 0.71, "grad_norm": 1.577183650117114, "learning_rate": 4.219045970181472e-06, "loss": 0.6186, "step": 6934 }, { "epoch": 0.71, "grad_norm": 1.5433345907486062, "learning_rate": 4.21635810988373e-06, "loss": 0.6485, "step": 6935 }, { "epoch": 0.71, "grad_norm": 1.6069760167637266, "learning_rate": 4.213670877309867e-06, "loss": 0.6646, "step": 6936 }, { "epoch": 0.71, "grad_norm": 1.7019206675257987, "learning_rate": 4.210984272751541e-06, "loss": 0.6954, "step": 6937 }, { "epoch": 0.71, "grad_norm": 1.7657777698386452, "learning_rate": 4.208298296500345e-06, "loss": 0.7059, "step": 6938 }, { "epoch": 0.71, "grad_norm": 1.5774180458446942, "learning_rate": 4.2056129488477936e-06, "loss": 0.593, "step": 6939 }, { "epoch": 0.71, "grad_norm": 1.6366090495196506, "learning_rate": 4.2029282300853455e-06, "loss": 0.6479, "step": 6940 }, { "epoch": 0.71, "grad_norm": 1.6983953653821355, "learning_rate": 4.200244140504379e-06, "loss": 0.7052, "step": 6941 }, { "epoch": 0.71, "grad_norm": 1.6921720513226657, "learning_rate": 4.197560680396214e-06, "loss": 0.7774, "step": 6942 }, { "epoch": 0.71, "grad_norm": 1.5070654563971895, "learning_rate": 4.1948778500521e-06, "loss": 0.6766, "step": 6943 }, { "epoch": 0.71, "grad_norm": 1.518188850551967, "learning_rate": 4.19219564976321e-06, "loss": 0.7744, "step": 6944 }, { "epoch": 0.71, "grad_norm": 1.479936391862102, "learning_rate": 4.189514079820662e-06, "loss": 0.6599, "step": 6945 }, { "epoch": 0.71, "grad_norm": 1.6671871455146776, "learning_rate": 4.1868331405154905e-06, "loss": 0.6885, "step": 6946 }, { "epoch": 0.71, "grad_norm": 1.7031726324212713, "learning_rate": 4.184152832138673e-06, "loss": 0.7379, "step": 6947 }, { "epoch": 0.71, "grad_norm": 1.5421456573559247, "learning_rate": 4.181473154981122e-06, "loss": 0.5938, "step": 6948 }, { "epoch": 0.71, "grad_norm": 1.7023553958191253, "learning_rate": 4.1787941093336635e-06, "loss": 0.7094, "step": 6949 }, { "epoch": 0.71, "grad_norm": 1.4727549625930916, "learning_rate": 4.176115695487071e-06, "loss": 0.6625, "step": 6950 }, { "epoch": 0.71, "grad_norm": 1.7742791080305533, "learning_rate": 4.173437913732048e-06, "loss": 0.7806, "step": 6951 }, { "epoch": 0.71, "grad_norm": 1.6522047579037553, "learning_rate": 4.1707607643592185e-06, "loss": 0.6883, "step": 6952 }, { "epoch": 0.71, "grad_norm": 1.5643275729858779, "learning_rate": 4.1680842476591484e-06, "loss": 0.7131, "step": 6953 }, { "epoch": 0.71, "grad_norm": 1.6108363196318192, "learning_rate": 4.165408363922337e-06, "loss": 0.7833, "step": 6954 }, { "epoch": 0.71, "grad_norm": 1.7994867885909294, "learning_rate": 4.1627331134392e-06, "loss": 0.6775, "step": 6955 }, { "epoch": 0.71, "grad_norm": 1.7217488003033425, "learning_rate": 4.160058496500103e-06, "loss": 0.6403, "step": 6956 }, { "epoch": 0.71, "grad_norm": 1.6425976302194905, "learning_rate": 4.1573845133953275e-06, "loss": 0.7085, "step": 6957 }, { "epoch": 0.71, "grad_norm": 1.497892765596392, "learning_rate": 4.154711164415094e-06, "loss": 0.6671, "step": 6958 }, { "epoch": 0.71, "grad_norm": 1.8054907860900105, "learning_rate": 4.152038449849556e-06, "loss": 0.8228, "step": 6959 }, { "epoch": 0.71, "grad_norm": 1.562254776905117, "learning_rate": 4.14936636998879e-06, "loss": 0.6295, "step": 6960 }, { "epoch": 0.71, "grad_norm": 1.7181351357516357, "learning_rate": 4.14669492512281e-06, "loss": 0.7395, "step": 6961 }, { "epoch": 0.71, "grad_norm": 1.6343855996101109, "learning_rate": 4.144024115541565e-06, "loss": 0.7496, "step": 6962 }, { "epoch": 0.71, "grad_norm": 1.6993064244870406, "learning_rate": 4.14135394153492e-06, "loss": 0.6616, "step": 6963 }, { "epoch": 0.71, "grad_norm": 1.543490876092734, "learning_rate": 4.138684403392688e-06, "loss": 0.7636, "step": 6964 }, { "epoch": 0.71, "grad_norm": 1.5772250311731004, "learning_rate": 4.136015501404604e-06, "loss": 0.6949, "step": 6965 }, { "epoch": 0.71, "grad_norm": 2.0741991444368217, "learning_rate": 4.133347235860333e-06, "loss": 0.863, "step": 6966 }, { "epoch": 0.71, "grad_norm": 1.647681598984127, "learning_rate": 4.1306796070494755e-06, "loss": 0.7291, "step": 6967 }, { "epoch": 0.71, "grad_norm": 1.5520196505035508, "learning_rate": 4.1280126152615644e-06, "loss": 0.6729, "step": 6968 }, { "epoch": 0.71, "grad_norm": 1.5718487268044565, "learning_rate": 4.1253462607860515e-06, "loss": 0.6882, "step": 6969 }, { "epoch": 0.71, "grad_norm": 1.6006646987325581, "learning_rate": 4.122680543912334e-06, "loss": 0.6832, "step": 6970 }, { "epoch": 0.71, "grad_norm": 1.5881086125681252, "learning_rate": 4.120015464929735e-06, "loss": 0.6927, "step": 6971 }, { "epoch": 0.71, "grad_norm": 1.6140338625443162, "learning_rate": 4.117351024127501e-06, "loss": 0.7615, "step": 6972 }, { "epoch": 0.71, "grad_norm": 1.7039718584416361, "learning_rate": 4.11468722179482e-06, "loss": 0.8646, "step": 6973 }, { "epoch": 0.71, "grad_norm": 1.5917611835614012, "learning_rate": 4.112024058220808e-06, "loss": 0.6972, "step": 6974 }, { "epoch": 0.71, "grad_norm": 1.740603652542512, "learning_rate": 4.109361533694504e-06, "loss": 0.7257, "step": 6975 }, { "epoch": 0.71, "grad_norm": 1.7885656752367534, "learning_rate": 4.10669964850489e-06, "loss": 0.7949, "step": 6976 }, { "epoch": 0.71, "grad_norm": 1.7294983851635601, "learning_rate": 4.104038402940863e-06, "loss": 0.7238, "step": 6977 }, { "epoch": 0.71, "grad_norm": 1.6325619934130196, "learning_rate": 4.101377797291265e-06, "loss": 0.6057, "step": 6978 }, { "epoch": 0.71, "grad_norm": 1.5344764459127072, "learning_rate": 4.098717831844867e-06, "loss": 0.6152, "step": 6979 }, { "epoch": 0.71, "grad_norm": 1.6435855427908748, "learning_rate": 4.09605850689036e-06, "loss": 0.6682, "step": 6980 }, { "epoch": 0.71, "grad_norm": 1.9189032336719265, "learning_rate": 4.093399822716377e-06, "loss": 0.6975, "step": 6981 }, { "epoch": 0.71, "grad_norm": 1.6852666215395304, "learning_rate": 4.090741779611472e-06, "loss": 0.7259, "step": 6982 }, { "epoch": 0.71, "grad_norm": 1.710324596376418, "learning_rate": 4.088084377864136e-06, "loss": 0.767, "step": 6983 }, { "epoch": 0.71, "grad_norm": 1.6380525152065986, "learning_rate": 4.085427617762792e-06, "loss": 0.6968, "step": 6984 }, { "epoch": 0.71, "grad_norm": 1.5433413494513588, "learning_rate": 4.082771499595782e-06, "loss": 0.6676, "step": 6985 }, { "epoch": 0.71, "grad_norm": 1.779847131311914, "learning_rate": 4.080116023651396e-06, "loss": 0.6974, "step": 6986 }, { "epoch": 0.71, "grad_norm": 1.5818748331761248, "learning_rate": 4.077461190217835e-06, "loss": 0.7561, "step": 6987 }, { "epoch": 0.71, "grad_norm": 1.7205646792957754, "learning_rate": 4.074806999583243e-06, "loss": 0.7579, "step": 6988 }, { "epoch": 0.71, "grad_norm": 1.503529710542259, "learning_rate": 4.0721534520356955e-06, "loss": 0.6626, "step": 6989 }, { "epoch": 0.71, "grad_norm": 1.7433595830714752, "learning_rate": 4.069500547863185e-06, "loss": 0.7432, "step": 6990 }, { "epoch": 0.71, "grad_norm": 1.5407275190849465, "learning_rate": 4.06684828735365e-06, "loss": 0.6634, "step": 6991 }, { "epoch": 0.71, "grad_norm": 1.879585303039927, "learning_rate": 4.0641966707949506e-06, "loss": 0.7283, "step": 6992 }, { "epoch": 0.71, "grad_norm": 1.6665885895090278, "learning_rate": 4.061545698474875e-06, "loss": 0.7186, "step": 6993 }, { "epoch": 0.71, "grad_norm": 1.6482473801952147, "learning_rate": 4.0588953706811485e-06, "loss": 0.6727, "step": 6994 }, { "epoch": 0.71, "grad_norm": 1.6151987819235434, "learning_rate": 4.056245687701419e-06, "loss": 0.6786, "step": 6995 }, { "epoch": 0.71, "grad_norm": 1.723556396094163, "learning_rate": 4.053596649823269e-06, "loss": 0.6509, "step": 6996 }, { "epoch": 0.71, "grad_norm": 1.7011356810748508, "learning_rate": 4.050948257334215e-06, "loss": 0.8039, "step": 6997 }, { "epoch": 0.71, "grad_norm": 1.6895154305021753, "learning_rate": 4.048300510521692e-06, "loss": 0.6526, "step": 6998 }, { "epoch": 0.71, "grad_norm": 1.9452574818201525, "learning_rate": 4.045653409673079e-06, "loss": 0.8085, "step": 6999 }, { "epoch": 0.71, "grad_norm": 1.7609192721254838, "learning_rate": 4.043006955075667e-06, "loss": 0.7762, "step": 7000 }, { "epoch": 0.71, "grad_norm": 1.7793784128833332, "learning_rate": 4.040361147016693e-06, "loss": 0.7615, "step": 7001 }, { "epoch": 0.71, "grad_norm": 1.7238975070132547, "learning_rate": 4.0377159857833235e-06, "loss": 0.6991, "step": 7002 }, { "epoch": 0.71, "grad_norm": 1.6611134034266273, "learning_rate": 4.035071471662641e-06, "loss": 0.7009, "step": 7003 }, { "epoch": 0.71, "grad_norm": 1.7967481323766012, "learning_rate": 4.032427604941671e-06, "loss": 0.6879, "step": 7004 }, { "epoch": 0.71, "grad_norm": 1.6103088335675344, "learning_rate": 4.029784385907359e-06, "loss": 0.6998, "step": 7005 }, { "epoch": 0.71, "grad_norm": 1.4755229277285078, "learning_rate": 4.027141814846588e-06, "loss": 0.6892, "step": 7006 }, { "epoch": 0.71, "grad_norm": 1.7663439626959418, "learning_rate": 4.024499892046172e-06, "loss": 0.6752, "step": 7007 }, { "epoch": 0.71, "grad_norm": 1.6168490704977603, "learning_rate": 4.021858617792843e-06, "loss": 0.6625, "step": 7008 }, { "epoch": 0.71, "grad_norm": 1.7391448458975851, "learning_rate": 4.019217992373277e-06, "loss": 0.7316, "step": 7009 }, { "epoch": 0.71, "grad_norm": 1.7057666540441399, "learning_rate": 4.016578016074064e-06, "loss": 0.7041, "step": 7010 }, { "epoch": 0.71, "grad_norm": 1.6059359526383257, "learning_rate": 4.0139386891817376e-06, "loss": 0.6557, "step": 7011 }, { "epoch": 0.71, "grad_norm": 1.773911345778964, "learning_rate": 4.011300011982759e-06, "loss": 0.7215, "step": 7012 }, { "epoch": 0.71, "grad_norm": 1.4984640233051176, "learning_rate": 4.0086619847635075e-06, "loss": 0.6175, "step": 7013 }, { "epoch": 0.71, "grad_norm": 1.7100583414353592, "learning_rate": 4.006024607810304e-06, "loss": 0.7669, "step": 7014 }, { "epoch": 0.71, "grad_norm": 1.4668525303618518, "learning_rate": 4.003387881409397e-06, "loss": 0.5917, "step": 7015 }, { "epoch": 0.71, "grad_norm": 1.4201488465349763, "learning_rate": 4.000751805846956e-06, "loss": 0.6535, "step": 7016 }, { "epoch": 0.71, "grad_norm": 1.7988283699217553, "learning_rate": 3.998116381409091e-06, "loss": 0.7743, "step": 7017 }, { "epoch": 0.71, "grad_norm": 1.761206047677621, "learning_rate": 3.995481608381833e-06, "loss": 0.7344, "step": 7018 }, { "epoch": 0.71, "grad_norm": 1.6422469485706646, "learning_rate": 3.992847487051144e-06, "loss": 0.7581, "step": 7019 }, { "epoch": 0.71, "grad_norm": 1.625424783824051, "learning_rate": 3.990214017702923e-06, "loss": 0.707, "step": 7020 }, { "epoch": 0.71, "grad_norm": 1.456008490060363, "learning_rate": 3.987581200622984e-06, "loss": 0.6372, "step": 7021 }, { "epoch": 0.71, "grad_norm": 1.7653640607260257, "learning_rate": 3.984949036097085e-06, "loss": 0.7238, "step": 7022 }, { "epoch": 0.71, "grad_norm": 1.70968000846081, "learning_rate": 3.982317524410901e-06, "loss": 0.6212, "step": 7023 }, { "epoch": 0.71, "grad_norm": 1.7276547862614866, "learning_rate": 3.979686665850043e-06, "loss": 0.7209, "step": 7024 }, { "epoch": 0.71, "grad_norm": 1.4841760329433793, "learning_rate": 3.977056460700054e-06, "loss": 0.8146, "step": 7025 }, { "epoch": 0.71, "grad_norm": 1.7281365965194087, "learning_rate": 3.974426909246395e-06, "loss": 0.6069, "step": 7026 }, { "epoch": 0.71, "grad_norm": 1.7366179797813863, "learning_rate": 3.971798011774468e-06, "loss": 0.681, "step": 7027 }, { "epoch": 0.71, "grad_norm": 1.7665892061525008, "learning_rate": 3.969169768569593e-06, "loss": 0.6869, "step": 7028 }, { "epoch": 0.71, "grad_norm": 1.6786218317981734, "learning_rate": 3.966542179917028e-06, "loss": 0.6813, "step": 7029 }, { "epoch": 0.72, "grad_norm": 1.4711116283729369, "learning_rate": 3.963915246101961e-06, "loss": 0.5982, "step": 7030 }, { "epoch": 0.72, "grad_norm": 1.6434577385828204, "learning_rate": 3.9612889674094955e-06, "loss": 0.7025, "step": 7031 }, { "epoch": 0.72, "grad_norm": 1.5554711330154485, "learning_rate": 3.958663344124678e-06, "loss": 0.6731, "step": 7032 }, { "epoch": 0.72, "grad_norm": 1.6676558574806524, "learning_rate": 3.956038376532482e-06, "loss": 0.6567, "step": 7033 }, { "epoch": 0.72, "grad_norm": 1.7124209469173053, "learning_rate": 3.9534140649177995e-06, "loss": 0.7216, "step": 7034 }, { "epoch": 0.72, "grad_norm": 1.7075703905836943, "learning_rate": 3.950790409565467e-06, "loss": 0.7102, "step": 7035 }, { "epoch": 0.72, "grad_norm": 1.5417142310579948, "learning_rate": 3.948167410760232e-06, "loss": 0.664, "step": 7036 }, { "epoch": 0.72, "grad_norm": 1.461830993842134, "learning_rate": 3.9455450687867845e-06, "loss": 0.6277, "step": 7037 }, { "epoch": 0.72, "grad_norm": 1.8940398474818356, "learning_rate": 3.942923383929743e-06, "loss": 0.7572, "step": 7038 }, { "epoch": 0.72, "grad_norm": 1.9355738539011664, "learning_rate": 3.940302356473642e-06, "loss": 0.7879, "step": 7039 }, { "epoch": 0.72, "grad_norm": 1.687447276435726, "learning_rate": 3.93768198670296e-06, "loss": 0.6419, "step": 7040 }, { "epoch": 0.72, "grad_norm": 1.5842593327172416, "learning_rate": 3.935062274902092e-06, "loss": 0.7642, "step": 7041 }, { "epoch": 0.72, "grad_norm": 1.5795026594057051, "learning_rate": 3.932443221355369e-06, "loss": 0.6418, "step": 7042 }, { "epoch": 0.72, "grad_norm": 1.5554013080977451, "learning_rate": 3.929824826347051e-06, "loss": 0.6573, "step": 7043 }, { "epoch": 0.72, "grad_norm": 1.5958131006786735, "learning_rate": 3.927207090161318e-06, "loss": 0.648, "step": 7044 }, { "epoch": 0.72, "grad_norm": 1.4619791680331131, "learning_rate": 3.9245900130822915e-06, "loss": 0.6172, "step": 7045 }, { "epoch": 0.72, "grad_norm": 1.751601619237473, "learning_rate": 3.921973595394005e-06, "loss": 0.711, "step": 7046 }, { "epoch": 0.72, "grad_norm": 1.7066907412184105, "learning_rate": 3.919357837380437e-06, "loss": 0.7086, "step": 7047 }, { "epoch": 0.72, "grad_norm": 1.573108845705456, "learning_rate": 3.916742739325488e-06, "loss": 0.7856, "step": 7048 }, { "epoch": 0.72, "grad_norm": 1.6505398116251775, "learning_rate": 3.91412830151298e-06, "loss": 0.6768, "step": 7049 }, { "epoch": 0.72, "grad_norm": 1.565935148019075, "learning_rate": 3.9115145242266714e-06, "loss": 0.7227, "step": 7050 }, { "epoch": 0.72, "grad_norm": 1.7356480172771545, "learning_rate": 3.9089014077502515e-06, "loss": 0.7685, "step": 7051 }, { "epoch": 0.72, "grad_norm": 1.6549946772587585, "learning_rate": 3.906288952367328e-06, "loss": 0.7455, "step": 7052 }, { "epoch": 0.72, "grad_norm": 1.6271433182157338, "learning_rate": 3.903677158361445e-06, "loss": 0.6842, "step": 7053 }, { "epoch": 0.72, "grad_norm": 1.5938354446085299, "learning_rate": 3.901066026016068e-06, "loss": 0.5795, "step": 7054 }, { "epoch": 0.72, "grad_norm": 1.612843481164936, "learning_rate": 3.898455555614598e-06, "loss": 0.7869, "step": 7055 }, { "epoch": 0.72, "grad_norm": 1.514332800714335, "learning_rate": 3.895845747440363e-06, "loss": 0.6233, "step": 7056 }, { "epoch": 0.72, "grad_norm": 1.7666438214701161, "learning_rate": 3.89323660177661e-06, "loss": 0.7276, "step": 7057 }, { "epoch": 0.72, "grad_norm": 1.7673801040896533, "learning_rate": 3.890628118906525e-06, "loss": 0.6719, "step": 7058 }, { "epoch": 0.72, "grad_norm": 1.618196212982791, "learning_rate": 3.888020299113221e-06, "loss": 0.6988, "step": 7059 }, { "epoch": 0.72, "grad_norm": 1.6641889014520415, "learning_rate": 3.885413142679729e-06, "loss": 0.6884, "step": 7060 }, { "epoch": 0.72, "grad_norm": 1.5790124585813679, "learning_rate": 3.882806649889024e-06, "loss": 0.7119, "step": 7061 }, { "epoch": 0.72, "grad_norm": 1.5846068340561161, "learning_rate": 3.8802008210239906e-06, "loss": 0.6524, "step": 7062 }, { "epoch": 0.72, "grad_norm": 1.519531275732951, "learning_rate": 3.877595656367454e-06, "loss": 0.638, "step": 7063 }, { "epoch": 0.72, "grad_norm": 1.5115523763776504, "learning_rate": 3.8749911562021705e-06, "loss": 0.6822, "step": 7064 }, { "epoch": 0.72, "grad_norm": 1.764434489462425, "learning_rate": 3.872387320810807e-06, "loss": 0.7642, "step": 7065 }, { "epoch": 0.72, "grad_norm": 1.5793773972876426, "learning_rate": 3.869784150475977e-06, "loss": 0.6887, "step": 7066 }, { "epoch": 0.72, "grad_norm": 1.7045250995664494, "learning_rate": 3.867181645480214e-06, "loss": 0.676, "step": 7067 }, { "epoch": 0.72, "grad_norm": 1.5399538449783368, "learning_rate": 3.864579806105974e-06, "loss": 0.6566, "step": 7068 }, { "epoch": 0.72, "grad_norm": 1.6457773798036648, "learning_rate": 3.861978632635648e-06, "loss": 0.6843, "step": 7069 }, { "epoch": 0.72, "grad_norm": 1.5939853478787616, "learning_rate": 3.859378125351556e-06, "loss": 0.7579, "step": 7070 }, { "epoch": 0.72, "grad_norm": 1.8740643030417445, "learning_rate": 3.856778284535938e-06, "loss": 0.7391, "step": 7071 }, { "epoch": 0.72, "grad_norm": 1.6024680692373516, "learning_rate": 3.854179110470968e-06, "loss": 0.7591, "step": 7072 }, { "epoch": 0.72, "grad_norm": 1.8009607749236867, "learning_rate": 3.851580603438748e-06, "loss": 0.8274, "step": 7073 }, { "epoch": 0.72, "grad_norm": 1.6075153854514934, "learning_rate": 3.8489827637213e-06, "loss": 0.7427, "step": 7074 }, { "epoch": 0.72, "grad_norm": 1.764959273544147, "learning_rate": 3.846385591600582e-06, "loss": 0.7193, "step": 7075 }, { "epoch": 0.72, "grad_norm": 1.5309895025303242, "learning_rate": 3.843789087358481e-06, "loss": 0.6079, "step": 7076 }, { "epoch": 0.72, "grad_norm": 1.7067092529866092, "learning_rate": 3.841193251276797e-06, "loss": 0.6646, "step": 7077 }, { "epoch": 0.72, "grad_norm": 1.54944657886839, "learning_rate": 3.838598083637273e-06, "loss": 0.598, "step": 7078 }, { "epoch": 0.72, "grad_norm": 1.7348919530284566, "learning_rate": 3.836003584721577e-06, "loss": 0.7491, "step": 7079 }, { "epoch": 0.72, "grad_norm": 1.404024633603411, "learning_rate": 3.833409754811295e-06, "loss": 0.6814, "step": 7080 }, { "epoch": 0.72, "grad_norm": 1.555957462060979, "learning_rate": 3.830816594187951e-06, "loss": 0.748, "step": 7081 }, { "epoch": 0.72, "grad_norm": 1.7083144056582373, "learning_rate": 3.828224103132989e-06, "loss": 0.6971, "step": 7082 }, { "epoch": 0.72, "grad_norm": 1.4838984338133978, "learning_rate": 3.825632281927784e-06, "loss": 0.6188, "step": 7083 }, { "epoch": 0.72, "grad_norm": 1.6210945937368182, "learning_rate": 3.823041130853642e-06, "loss": 0.7154, "step": 7084 }, { "epoch": 0.72, "grad_norm": 1.6502752504544569, "learning_rate": 3.820450650191785e-06, "loss": 0.609, "step": 7085 }, { "epoch": 0.72, "grad_norm": 1.8119556281486937, "learning_rate": 3.817860840223373e-06, "loss": 0.6798, "step": 7086 }, { "epoch": 0.72, "grad_norm": 1.6083606169328408, "learning_rate": 3.815271701229491e-06, "loss": 0.6415, "step": 7087 }, { "epoch": 0.72, "grad_norm": 1.7234006915737945, "learning_rate": 3.8126832334911457e-06, "loss": 0.7479, "step": 7088 }, { "epoch": 0.72, "grad_norm": 1.7823470873162353, "learning_rate": 3.8100954372892796e-06, "loss": 0.7111, "step": 7089 }, { "epoch": 0.72, "grad_norm": 1.54738129409935, "learning_rate": 3.8075083129047507e-06, "loss": 0.6821, "step": 7090 }, { "epoch": 0.72, "grad_norm": 1.6814801668504775, "learning_rate": 3.8049218606183547e-06, "loss": 0.616, "step": 7091 }, { "epoch": 0.72, "grad_norm": 1.8021188952408784, "learning_rate": 3.8023360807108135e-06, "loss": 0.7924, "step": 7092 }, { "epoch": 0.72, "grad_norm": 1.743377592740335, "learning_rate": 3.799750973462768e-06, "loss": 0.7971, "step": 7093 }, { "epoch": 0.72, "grad_norm": 1.6937420380740584, "learning_rate": 3.797166539154795e-06, "loss": 0.6509, "step": 7094 }, { "epoch": 0.72, "grad_norm": 1.6321361424032252, "learning_rate": 3.7945827780673904e-06, "loss": 0.6793, "step": 7095 }, { "epoch": 0.72, "grad_norm": 1.603854582495709, "learning_rate": 3.791999690480984e-06, "loss": 0.6254, "step": 7096 }, { "epoch": 0.72, "grad_norm": 1.593723128503487, "learning_rate": 3.7894172766759307e-06, "loss": 0.6594, "step": 7097 }, { "epoch": 0.72, "grad_norm": 1.5564792340475972, "learning_rate": 3.786835536932507e-06, "loss": 0.6598, "step": 7098 }, { "epoch": 0.72, "grad_norm": 1.5831785482522764, "learning_rate": 3.784254471530926e-06, "loss": 0.7586, "step": 7099 }, { "epoch": 0.72, "grad_norm": 1.7443724076857514, "learning_rate": 3.7816740807513154e-06, "loss": 0.6868, "step": 7100 }, { "epoch": 0.72, "grad_norm": 1.509927809623996, "learning_rate": 3.7790943648737397e-06, "loss": 0.6671, "step": 7101 }, { "epoch": 0.72, "grad_norm": 1.725056179937763, "learning_rate": 3.7765153241781893e-06, "loss": 0.7384, "step": 7102 }, { "epoch": 0.72, "grad_norm": 1.6151974285444082, "learning_rate": 3.773936958944574e-06, "loss": 0.649, "step": 7103 }, { "epoch": 0.72, "grad_norm": 1.6428686899132872, "learning_rate": 3.7713592694527357e-06, "loss": 0.6798, "step": 7104 }, { "epoch": 0.72, "grad_norm": 1.5815525541499031, "learning_rate": 3.768782255982447e-06, "loss": 0.6567, "step": 7105 }, { "epoch": 0.72, "grad_norm": 1.693255150657801, "learning_rate": 3.766205918813397e-06, "loss": 0.6504, "step": 7106 }, { "epoch": 0.72, "grad_norm": 1.7690595597513055, "learning_rate": 3.7636302582252114e-06, "loss": 0.7456, "step": 7107 }, { "epoch": 0.72, "grad_norm": 1.584515931711388, "learning_rate": 3.761055274497433e-06, "loss": 0.6839, "step": 7108 }, { "epoch": 0.72, "grad_norm": 1.5816360333856814, "learning_rate": 3.758480967909537e-06, "loss": 0.6714, "step": 7109 }, { "epoch": 0.72, "grad_norm": 1.566909665599923, "learning_rate": 3.7559073387409286e-06, "loss": 0.7095, "step": 7110 }, { "epoch": 0.72, "grad_norm": 1.4996558972408467, "learning_rate": 3.7533343872709295e-06, "loss": 0.7148, "step": 7111 }, { "epoch": 0.72, "grad_norm": 1.7782962742990462, "learning_rate": 3.7507621137787988e-06, "loss": 0.7736, "step": 7112 }, { "epoch": 0.72, "grad_norm": 1.5960500136687246, "learning_rate": 3.7481905185437105e-06, "loss": 0.7787, "step": 7113 }, { "epoch": 0.72, "grad_norm": 1.606927467079132, "learning_rate": 3.745619601844773e-06, "loss": 0.6923, "step": 7114 }, { "epoch": 0.72, "grad_norm": 1.455797703919836, "learning_rate": 3.7430493639610245e-06, "loss": 0.6307, "step": 7115 }, { "epoch": 0.72, "grad_norm": 1.5626622983287617, "learning_rate": 3.7404798051714165e-06, "loss": 0.695, "step": 7116 }, { "epoch": 0.72, "grad_norm": 1.6079084286137084, "learning_rate": 3.7379109257548406e-06, "loss": 0.7181, "step": 7117 }, { "epoch": 0.72, "grad_norm": 1.4823136615497665, "learning_rate": 3.7353427259901022e-06, "loss": 0.7078, "step": 7118 }, { "epoch": 0.72, "grad_norm": 1.6497796267623495, "learning_rate": 3.732775206155943e-06, "loss": 0.7823, "step": 7119 }, { "epoch": 0.72, "grad_norm": 1.5963666093680742, "learning_rate": 3.7302083665310295e-06, "loss": 0.5863, "step": 7120 }, { "epoch": 0.72, "grad_norm": 1.7135779255987058, "learning_rate": 3.7276422073939466e-06, "loss": 0.7041, "step": 7121 }, { "epoch": 0.72, "grad_norm": 1.729275030874236, "learning_rate": 3.7250767290232124e-06, "loss": 0.7154, "step": 7122 }, { "epoch": 0.72, "grad_norm": 1.5755392311483456, "learning_rate": 3.7225119316972747e-06, "loss": 0.6911, "step": 7123 }, { "epoch": 0.72, "grad_norm": 1.663429556378475, "learning_rate": 3.7199478156944947e-06, "loss": 0.6331, "step": 7124 }, { "epoch": 0.72, "grad_norm": 1.711823333115215, "learning_rate": 3.717384381293173e-06, "loss": 0.7237, "step": 7125 }, { "epoch": 0.72, "grad_norm": 1.6672270773065978, "learning_rate": 3.7148216287715243e-06, "loss": 0.7352, "step": 7126 }, { "epoch": 0.72, "grad_norm": 1.8010218071305932, "learning_rate": 3.712259558407698e-06, "loss": 0.638, "step": 7127 }, { "epoch": 0.73, "grad_norm": 1.7201988979672762, "learning_rate": 3.709698170479772e-06, "loss": 0.6107, "step": 7128 }, { "epoch": 0.73, "grad_norm": 1.7275487216334133, "learning_rate": 3.7071374652657366e-06, "loss": 0.8083, "step": 7129 }, { "epoch": 0.73, "grad_norm": 1.6241632422864194, "learning_rate": 3.704577443043523e-06, "loss": 0.6765, "step": 7130 }, { "epoch": 0.73, "grad_norm": 1.5499977286232118, "learning_rate": 3.7020181040909755e-06, "loss": 0.6343, "step": 7131 }, { "epoch": 0.73, "grad_norm": 1.6018603460961796, "learning_rate": 3.6994594486858724e-06, "loss": 0.6779, "step": 7132 }, { "epoch": 0.73, "grad_norm": 1.6908374645330553, "learning_rate": 3.696901477105921e-06, "loss": 0.6236, "step": 7133 }, { "epoch": 0.73, "grad_norm": 1.521919889461445, "learning_rate": 3.694344189628741e-06, "loss": 0.6738, "step": 7134 }, { "epoch": 0.73, "grad_norm": 1.7491997799433427, "learning_rate": 3.6917875865318944e-06, "loss": 0.6773, "step": 7135 }, { "epoch": 0.73, "grad_norm": 1.6868041845930088, "learning_rate": 3.6892316680928508e-06, "loss": 0.6554, "step": 7136 }, { "epoch": 0.73, "grad_norm": 1.556084069113492, "learning_rate": 3.6866764345890217e-06, "loss": 0.6641, "step": 7137 }, { "epoch": 0.73, "grad_norm": 1.4191396117041615, "learning_rate": 3.684121886297739e-06, "loss": 0.6768, "step": 7138 }, { "epoch": 0.73, "grad_norm": 1.6062587399945396, "learning_rate": 3.6815680234962527e-06, "loss": 0.5832, "step": 7139 }, { "epoch": 0.73, "grad_norm": 1.7773274881370058, "learning_rate": 3.6790148464617483e-06, "loss": 0.7483, "step": 7140 }, { "epoch": 0.73, "grad_norm": 1.7298207064959892, "learning_rate": 3.6764623554713364e-06, "loss": 0.7185, "step": 7141 }, { "epoch": 0.73, "grad_norm": 1.802729381434695, "learning_rate": 3.6739105508020433e-06, "loss": 0.7059, "step": 7142 }, { "epoch": 0.73, "grad_norm": 1.55814796727082, "learning_rate": 3.6713594327308343e-06, "loss": 0.6882, "step": 7143 }, { "epoch": 0.73, "grad_norm": 1.9841458785651938, "learning_rate": 3.6688090015345878e-06, "loss": 0.7185, "step": 7144 }, { "epoch": 0.73, "grad_norm": 1.5236445095995443, "learning_rate": 3.6662592574901147e-06, "loss": 0.763, "step": 7145 }, { "epoch": 0.73, "grad_norm": 1.667091898622156, "learning_rate": 3.663710200874153e-06, "loss": 0.6871, "step": 7146 }, { "epoch": 0.73, "grad_norm": 1.640283370489685, "learning_rate": 3.661161831963358e-06, "loss": 0.6242, "step": 7147 }, { "epoch": 0.73, "grad_norm": 1.5959959058927706, "learning_rate": 3.658614151034321e-06, "loss": 0.7617, "step": 7148 }, { "epoch": 0.73, "grad_norm": 1.7141601341216794, "learning_rate": 3.6560671583635467e-06, "loss": 0.6534, "step": 7149 }, { "epoch": 0.73, "grad_norm": 1.7300259568139287, "learning_rate": 3.653520854227475e-06, "loss": 0.7433, "step": 7150 }, { "epoch": 0.73, "grad_norm": 1.5532886686274163, "learning_rate": 3.6509752389024687e-06, "loss": 0.579, "step": 7151 }, { "epoch": 0.73, "grad_norm": 1.6833266687520791, "learning_rate": 3.648430312664809e-06, "loss": 0.6755, "step": 7152 }, { "epoch": 0.73, "grad_norm": 1.5564738321146763, "learning_rate": 3.6458860757907155e-06, "loss": 0.6367, "step": 7153 }, { "epoch": 0.73, "grad_norm": 1.7105058056686722, "learning_rate": 3.6433425285563186e-06, "loss": 0.6857, "step": 7154 }, { "epoch": 0.73, "grad_norm": 1.6487205971867094, "learning_rate": 3.6407996712376826e-06, "loss": 0.6234, "step": 7155 }, { "epoch": 0.73, "grad_norm": 1.8336973044214955, "learning_rate": 3.6382575041107982e-06, "loss": 0.735, "step": 7156 }, { "epoch": 0.73, "grad_norm": 1.6426974325555639, "learning_rate": 3.6357160274515723e-06, "loss": 0.6363, "step": 7157 }, { "epoch": 0.73, "grad_norm": 1.5300249782218753, "learning_rate": 3.633175241535849e-06, "loss": 0.6511, "step": 7158 }, { "epoch": 0.73, "grad_norm": 1.6712026210826763, "learning_rate": 3.630635146639384e-06, "loss": 0.7023, "step": 7159 }, { "epoch": 0.73, "grad_norm": 1.5512216505261869, "learning_rate": 3.6280957430378683e-06, "loss": 0.6601, "step": 7160 }, { "epoch": 0.73, "grad_norm": 1.6199132742303428, "learning_rate": 3.6255570310069166e-06, "loss": 0.6928, "step": 7161 }, { "epoch": 0.73, "grad_norm": 1.7001832013414149, "learning_rate": 3.6230190108220607e-06, "loss": 0.7222, "step": 7162 }, { "epoch": 0.73, "grad_norm": 1.5664502892848486, "learning_rate": 3.6204816827587676e-06, "loss": 0.6602, "step": 7163 }, { "epoch": 0.73, "grad_norm": 1.7106532424063383, "learning_rate": 3.617945047092426e-06, "loss": 0.6976, "step": 7164 }, { "epoch": 0.73, "grad_norm": 1.5093380311405362, "learning_rate": 3.615409104098342e-06, "loss": 0.5972, "step": 7165 }, { "epoch": 0.73, "grad_norm": 1.6008320342553601, "learning_rate": 3.612873854051759e-06, "loss": 0.6195, "step": 7166 }, { "epoch": 0.73, "grad_norm": 1.65846303644904, "learning_rate": 3.610339297227833e-06, "loss": 0.7601, "step": 7167 }, { "epoch": 0.73, "grad_norm": 1.6312980744454015, "learning_rate": 3.6078054339016523e-06, "loss": 0.7859, "step": 7168 }, { "epoch": 0.73, "grad_norm": 1.6668920120325388, "learning_rate": 3.605272264348233e-06, "loss": 0.697, "step": 7169 }, { "epoch": 0.73, "grad_norm": 1.581682435271611, "learning_rate": 3.602739788842503e-06, "loss": 0.6864, "step": 7170 }, { "epoch": 0.73, "grad_norm": 1.622319941173912, "learning_rate": 3.6002080076593273e-06, "loss": 0.732, "step": 7171 }, { "epoch": 0.73, "grad_norm": 1.6851021641033066, "learning_rate": 3.597676921073494e-06, "loss": 0.7444, "step": 7172 }, { "epoch": 0.73, "grad_norm": 1.7772998980522972, "learning_rate": 3.5951465293597067e-06, "loss": 0.7153, "step": 7173 }, { "epoch": 0.73, "grad_norm": 1.7090383637910858, "learning_rate": 3.5926168327926037e-06, "loss": 0.7717, "step": 7174 }, { "epoch": 0.73, "grad_norm": 1.5283316067335495, "learning_rate": 3.590087831646746e-06, "loss": 0.5902, "step": 7175 }, { "epoch": 0.73, "grad_norm": 1.70593382715811, "learning_rate": 3.58755952619661e-06, "loss": 0.7558, "step": 7176 }, { "epoch": 0.73, "grad_norm": 1.4769021537695464, "learning_rate": 3.585031916716609e-06, "loss": 0.5626, "step": 7177 }, { "epoch": 0.73, "grad_norm": 1.5193009552710477, "learning_rate": 3.5825050034810783e-06, "loss": 0.6463, "step": 7178 }, { "epoch": 0.73, "grad_norm": 1.5464268469172968, "learning_rate": 3.5799787867642668e-06, "loss": 0.6902, "step": 7179 }, { "epoch": 0.73, "grad_norm": 1.9070860927402575, "learning_rate": 3.5774532668403606e-06, "loss": 0.8036, "step": 7180 }, { "epoch": 0.73, "grad_norm": 1.677251422282328, "learning_rate": 3.5749284439834686e-06, "loss": 0.7145, "step": 7181 }, { "epoch": 0.73, "grad_norm": 1.5070874738175213, "learning_rate": 3.5724043184676127e-06, "loss": 0.6111, "step": 7182 }, { "epoch": 0.73, "grad_norm": 1.3679124163375487, "learning_rate": 3.5698808905667525e-06, "loss": 0.6639, "step": 7183 }, { "epoch": 0.73, "grad_norm": 1.6294192752826915, "learning_rate": 3.567358160554769e-06, "loss": 0.6482, "step": 7184 }, { "epoch": 0.73, "grad_norm": 1.6115377083728697, "learning_rate": 3.564836128705459e-06, "loss": 0.6533, "step": 7185 }, { "epoch": 0.73, "grad_norm": 1.9367670221869318, "learning_rate": 3.562314795292552e-06, "loss": 0.6958, "step": 7186 }, { "epoch": 0.73, "grad_norm": 1.5439196900733712, "learning_rate": 3.5597941605897037e-06, "loss": 0.6759, "step": 7187 }, { "epoch": 0.73, "grad_norm": 1.8954483496019165, "learning_rate": 3.5572742248704817e-06, "loss": 0.702, "step": 7188 }, { "epoch": 0.73, "grad_norm": 1.5821038172561497, "learning_rate": 3.554754988408393e-06, "loss": 0.5861, "step": 7189 }, { "epoch": 0.73, "grad_norm": 1.6938894940828406, "learning_rate": 3.552236451476855e-06, "loss": 0.7871, "step": 7190 }, { "epoch": 0.73, "grad_norm": 1.614556870374976, "learning_rate": 3.5497186143492178e-06, "loss": 0.7146, "step": 7191 }, { "epoch": 0.73, "grad_norm": 1.5828409470899927, "learning_rate": 3.5472014772987573e-06, "loss": 0.7527, "step": 7192 }, { "epoch": 0.73, "grad_norm": 1.6435787132195512, "learning_rate": 3.544685040598663e-06, "loss": 0.6479, "step": 7193 }, { "epoch": 0.73, "grad_norm": 1.7080262144710854, "learning_rate": 3.5421693045220617e-06, "loss": 0.6583, "step": 7194 }, { "epoch": 0.73, "grad_norm": 1.6662616193281345, "learning_rate": 3.539654269341989e-06, "loss": 0.7191, "step": 7195 }, { "epoch": 0.73, "grad_norm": 1.7599616953319472, "learning_rate": 3.5371399353314174e-06, "loss": 0.659, "step": 7196 }, { "epoch": 0.73, "grad_norm": 1.5910727310443404, "learning_rate": 3.534626302763241e-06, "loss": 0.6978, "step": 7197 }, { "epoch": 0.73, "grad_norm": 1.7541929338966649, "learning_rate": 3.5321133719102685e-06, "loss": 0.665, "step": 7198 }, { "epoch": 0.73, "grad_norm": 1.563447514004173, "learning_rate": 3.529601143045244e-06, "loss": 0.5738, "step": 7199 }, { "epoch": 0.73, "grad_norm": 1.6272858485757566, "learning_rate": 3.527089616440832e-06, "loss": 0.6693, "step": 7200 }, { "epoch": 0.73, "grad_norm": 1.6767842031362263, "learning_rate": 3.524578792369614e-06, "loss": 0.6784, "step": 7201 }, { "epoch": 0.73, "grad_norm": 1.741601124837262, "learning_rate": 3.5220686711041085e-06, "loss": 0.6576, "step": 7202 }, { "epoch": 0.73, "grad_norm": 1.6554235824440988, "learning_rate": 3.51955925291674e-06, "loss": 0.7596, "step": 7203 }, { "epoch": 0.73, "grad_norm": 1.8480889756347079, "learning_rate": 3.5170505380798736e-06, "loss": 0.7017, "step": 7204 }, { "epoch": 0.73, "grad_norm": 1.7918181666049604, "learning_rate": 3.5145425268657916e-06, "loss": 0.7029, "step": 7205 }, { "epoch": 0.73, "grad_norm": 1.790400059060796, "learning_rate": 3.5120352195466932e-06, "loss": 0.6969, "step": 7206 }, { "epoch": 0.73, "grad_norm": 1.6471726588298956, "learning_rate": 3.509528616394716e-06, "loss": 0.6233, "step": 7207 }, { "epoch": 0.73, "grad_norm": 1.4980354612253914, "learning_rate": 3.5070227176819036e-06, "loss": 0.7079, "step": 7208 }, { "epoch": 0.73, "grad_norm": 1.570210483481177, "learning_rate": 3.5045175236802363e-06, "loss": 0.7148, "step": 7209 }, { "epoch": 0.73, "grad_norm": 1.553566799177608, "learning_rate": 3.502013034661619e-06, "loss": 0.6731, "step": 7210 }, { "epoch": 0.73, "grad_norm": 1.8000474948425484, "learning_rate": 3.4995092508978647e-06, "loss": 0.7375, "step": 7211 }, { "epoch": 0.73, "grad_norm": 1.6913024060790467, "learning_rate": 3.4970061726607273e-06, "loss": 0.6878, "step": 7212 }, { "epoch": 0.73, "grad_norm": 1.7068333067586372, "learning_rate": 3.494503800221872e-06, "loss": 0.71, "step": 7213 }, { "epoch": 0.73, "grad_norm": 1.817682464669328, "learning_rate": 3.4920021338528955e-06, "loss": 0.672, "step": 7214 }, { "epoch": 0.73, "grad_norm": 1.6311735296816936, "learning_rate": 3.489501173825316e-06, "loss": 0.7779, "step": 7215 }, { "epoch": 0.73, "grad_norm": 1.50529393797726, "learning_rate": 3.487000920410568e-06, "loss": 0.6823, "step": 7216 }, { "epoch": 0.73, "grad_norm": 1.6558426245962694, "learning_rate": 3.4845013738800194e-06, "loss": 0.61, "step": 7217 }, { "epoch": 0.73, "grad_norm": 1.612431482351146, "learning_rate": 3.482002534504958e-06, "loss": 0.7768, "step": 7218 }, { "epoch": 0.73, "grad_norm": 1.7138504683026008, "learning_rate": 3.479504402556587e-06, "loss": 0.6834, "step": 7219 }, { "epoch": 0.73, "grad_norm": 1.5804252165412198, "learning_rate": 3.477006978306049e-06, "loss": 0.7372, "step": 7220 }, { "epoch": 0.73, "grad_norm": 1.5959790428928604, "learning_rate": 3.474510262024391e-06, "loss": 0.6769, "step": 7221 }, { "epoch": 0.73, "grad_norm": 1.6203807255279645, "learning_rate": 3.4720142539825975e-06, "loss": 0.691, "step": 7222 }, { "epoch": 0.73, "grad_norm": 1.7315453441096231, "learning_rate": 3.4695189544515727e-06, "loss": 0.6951, "step": 7223 }, { "epoch": 0.73, "grad_norm": 1.508462206764295, "learning_rate": 3.467024363702138e-06, "loss": 0.6509, "step": 7224 }, { "epoch": 0.73, "grad_norm": 1.7963743245623451, "learning_rate": 3.4645304820050473e-06, "loss": 0.6806, "step": 7225 }, { "epoch": 0.74, "grad_norm": 1.538399822037882, "learning_rate": 3.4620373096309656e-06, "loss": 0.6781, "step": 7226 }, { "epoch": 0.74, "grad_norm": 1.7490947649773623, "learning_rate": 3.459544846850492e-06, "loss": 0.6844, "step": 7227 }, { "epoch": 0.74, "grad_norm": 1.7330712725230233, "learning_rate": 3.4570530939341472e-06, "loss": 0.6949, "step": 7228 }, { "epoch": 0.74, "grad_norm": 1.6639851511795398, "learning_rate": 3.454562051152366e-06, "loss": 0.7073, "step": 7229 }, { "epoch": 0.74, "grad_norm": 1.667892739832757, "learning_rate": 3.452071718775518e-06, "loss": 0.7281, "step": 7230 }, { "epoch": 0.74, "grad_norm": 1.5559555783691887, "learning_rate": 3.4495820970738834e-06, "loss": 0.6977, "step": 7231 }, { "epoch": 0.74, "grad_norm": 1.8376998130377118, "learning_rate": 3.4470931863176747e-06, "loss": 0.6824, "step": 7232 }, { "epoch": 0.74, "grad_norm": 1.624520742992479, "learning_rate": 3.4446049867770283e-06, "loss": 0.642, "step": 7233 }, { "epoch": 0.74, "grad_norm": 1.665652967735254, "learning_rate": 3.442117498721992e-06, "loss": 0.6934, "step": 7234 }, { "epoch": 0.74, "grad_norm": 1.495900862109376, "learning_rate": 3.439630722422548e-06, "loss": 0.6932, "step": 7235 }, { "epoch": 0.74, "grad_norm": 1.6200504827919735, "learning_rate": 3.437144658148599e-06, "loss": 0.6874, "step": 7236 }, { "epoch": 0.74, "grad_norm": 1.6871382210453179, "learning_rate": 3.434659306169963e-06, "loss": 0.7193, "step": 7237 }, { "epoch": 0.74, "grad_norm": 1.6020384570556934, "learning_rate": 3.4321746667563925e-06, "loss": 0.6491, "step": 7238 }, { "epoch": 0.74, "grad_norm": 1.4852594541553965, "learning_rate": 3.4296907401775494e-06, "loss": 0.612, "step": 7239 }, { "epoch": 0.74, "grad_norm": 1.653249261782761, "learning_rate": 3.4272075267030267e-06, "loss": 0.6335, "step": 7240 }, { "epoch": 0.74, "grad_norm": 1.5842397819032858, "learning_rate": 3.4247250266023446e-06, "loss": 0.6908, "step": 7241 }, { "epoch": 0.74, "grad_norm": 1.5817026113440948, "learning_rate": 3.4222432401449313e-06, "loss": 0.7081, "step": 7242 }, { "epoch": 0.74, "grad_norm": 1.6773022328073637, "learning_rate": 3.4197621676001537e-06, "loss": 0.7217, "step": 7243 }, { "epoch": 0.74, "grad_norm": 1.582428050031458, "learning_rate": 3.417281809237285e-06, "loss": 0.6992, "step": 7244 }, { "epoch": 0.74, "grad_norm": 1.6711688415393713, "learning_rate": 3.4148021653255347e-06, "loss": 0.754, "step": 7245 }, { "epoch": 0.74, "grad_norm": 1.8417283173332568, "learning_rate": 3.412323236134032e-06, "loss": 0.6941, "step": 7246 }, { "epoch": 0.74, "grad_norm": 1.8817162021377392, "learning_rate": 3.409845021931818e-06, "loss": 0.695, "step": 7247 }, { "epoch": 0.74, "grad_norm": 1.5693407212436574, "learning_rate": 3.4073675229878724e-06, "loss": 0.6799, "step": 7248 }, { "epoch": 0.74, "grad_norm": 1.602686936513114, "learning_rate": 3.404890739571081e-06, "loss": 0.7407, "step": 7249 }, { "epoch": 0.74, "grad_norm": 1.5627001624359027, "learning_rate": 3.4024146719502636e-06, "loss": 0.6668, "step": 7250 }, { "epoch": 0.74, "grad_norm": 1.9331849062270565, "learning_rate": 3.3999393203941623e-06, "loss": 0.731, "step": 7251 }, { "epoch": 0.74, "grad_norm": 1.5206783627275, "learning_rate": 3.397464685171431e-06, "loss": 0.6608, "step": 7252 }, { "epoch": 0.74, "grad_norm": 1.7107663982664039, "learning_rate": 3.3949907665506564e-06, "loss": 0.73, "step": 7253 }, { "epoch": 0.74, "grad_norm": 1.8654410443640381, "learning_rate": 3.3925175648003473e-06, "loss": 0.7268, "step": 7254 }, { "epoch": 0.74, "grad_norm": 1.6870964608863164, "learning_rate": 3.390045080188923e-06, "loss": 0.6921, "step": 7255 }, { "epoch": 0.74, "grad_norm": 1.6475194906845443, "learning_rate": 3.38757331298474e-06, "loss": 0.7748, "step": 7256 }, { "epoch": 0.74, "grad_norm": 1.8213267285460604, "learning_rate": 3.385102263456065e-06, "loss": 0.5969, "step": 7257 }, { "epoch": 0.74, "grad_norm": 1.6459408638945416, "learning_rate": 3.3826319318710933e-06, "loss": 0.697, "step": 7258 }, { "epoch": 0.74, "grad_norm": 1.703788221164889, "learning_rate": 3.3801623184979437e-06, "loss": 0.6976, "step": 7259 }, { "epoch": 0.74, "grad_norm": 1.7504852878801893, "learning_rate": 3.377693423604649e-06, "loss": 0.7039, "step": 7260 }, { "epoch": 0.74, "grad_norm": 1.5253648258267434, "learning_rate": 3.3752252474591762e-06, "loss": 0.6294, "step": 7261 }, { "epoch": 0.74, "grad_norm": 1.5040219167465418, "learning_rate": 3.3727577903293985e-06, "loss": 0.631, "step": 7262 }, { "epoch": 0.74, "grad_norm": 1.7870140766431388, "learning_rate": 3.3702910524831246e-06, "loss": 0.7444, "step": 7263 }, { "epoch": 0.74, "grad_norm": 1.607093992786678, "learning_rate": 3.3678250341880825e-06, "loss": 0.7117, "step": 7264 }, { "epoch": 0.74, "grad_norm": 1.5595706608009965, "learning_rate": 3.365359735711915e-06, "loss": 0.6255, "step": 7265 }, { "epoch": 0.74, "grad_norm": 1.7040629735909656, "learning_rate": 3.3628951573221966e-06, "loss": 0.7388, "step": 7266 }, { "epoch": 0.74, "grad_norm": 1.7937033138858276, "learning_rate": 3.360431299286413e-06, "loss": 0.6807, "step": 7267 }, { "epoch": 0.74, "grad_norm": 1.6216251520551597, "learning_rate": 3.3579681618719805e-06, "loss": 0.7217, "step": 7268 }, { "epoch": 0.74, "grad_norm": 1.5615083234069798, "learning_rate": 3.355505745346238e-06, "loss": 0.6231, "step": 7269 }, { "epoch": 0.74, "grad_norm": 1.532634334500432, "learning_rate": 3.353044049976435e-06, "loss": 0.645, "step": 7270 }, { "epoch": 0.74, "grad_norm": 1.638134421357406, "learning_rate": 3.3505830760297543e-06, "loss": 0.6466, "step": 7271 }, { "epoch": 0.74, "grad_norm": 1.653769742345302, "learning_rate": 3.3481228237732975e-06, "loss": 0.7258, "step": 7272 }, { "epoch": 0.74, "grad_norm": 1.5837947311775986, "learning_rate": 3.345663293474083e-06, "loss": 0.6474, "step": 7273 }, { "epoch": 0.74, "grad_norm": 1.64849332588951, "learning_rate": 3.3432044853990576e-06, "loss": 0.6081, "step": 7274 }, { "epoch": 0.74, "grad_norm": 1.6257045316549825, "learning_rate": 3.3407463998150815e-06, "loss": 0.5885, "step": 7275 }, { "epoch": 0.74, "grad_norm": 1.9181770773628148, "learning_rate": 3.338289036988945e-06, "loss": 0.6925, "step": 7276 }, { "epoch": 0.74, "grad_norm": 1.6579765006359775, "learning_rate": 3.3358323971873586e-06, "loss": 0.7907, "step": 7277 }, { "epoch": 0.74, "grad_norm": 1.6223788293680472, "learning_rate": 3.3333764806769474e-06, "loss": 0.7057, "step": 7278 }, { "epoch": 0.74, "grad_norm": 1.5562895761921027, "learning_rate": 3.3309212877242635e-06, "loss": 0.6549, "step": 7279 }, { "epoch": 0.74, "grad_norm": 1.8141933918317033, "learning_rate": 3.3284668185957857e-06, "loss": 0.7398, "step": 7280 }, { "epoch": 0.74, "grad_norm": 1.54263284558655, "learning_rate": 3.3260130735579e-06, "loss": 0.6571, "step": 7281 }, { "epoch": 0.74, "grad_norm": 1.619131530660715, "learning_rate": 3.3235600528769253e-06, "loss": 0.7058, "step": 7282 }, { "epoch": 0.74, "grad_norm": 1.465021228430089, "learning_rate": 3.321107756819103e-06, "loss": 0.5369, "step": 7283 }, { "epoch": 0.74, "grad_norm": 1.6381336452278406, "learning_rate": 3.318656185650584e-06, "loss": 0.6229, "step": 7284 }, { "epoch": 0.74, "grad_norm": 1.5536313913242494, "learning_rate": 3.3162053396374504e-06, "loss": 0.6387, "step": 7285 }, { "epoch": 0.74, "grad_norm": 1.6618093267828662, "learning_rate": 3.313755219045709e-06, "loss": 0.6368, "step": 7286 }, { "epoch": 0.74, "grad_norm": 1.6517390858077126, "learning_rate": 3.311305824141273e-06, "loss": 0.6692, "step": 7287 }, { "epoch": 0.74, "grad_norm": 1.4256680556420662, "learning_rate": 3.308857155189992e-06, "loss": 0.7384, "step": 7288 }, { "epoch": 0.74, "grad_norm": 1.5064498910789945, "learning_rate": 3.306409212457631e-06, "loss": 0.6581, "step": 7289 }, { "epoch": 0.74, "grad_norm": 1.7608860865876834, "learning_rate": 3.30396199620987e-06, "loss": 0.6862, "step": 7290 }, { "epoch": 0.74, "grad_norm": 1.6037617675529898, "learning_rate": 3.301515506712322e-06, "loss": 0.7146, "step": 7291 }, { "epoch": 0.74, "grad_norm": 1.7593652994874995, "learning_rate": 3.2990697442305143e-06, "loss": 0.6931, "step": 7292 }, { "epoch": 0.74, "grad_norm": 1.5088961297220562, "learning_rate": 3.296624709029893e-06, "loss": 0.5407, "step": 7293 }, { "epoch": 0.74, "grad_norm": 1.549978531019286, "learning_rate": 3.2941804013758337e-06, "loss": 0.7049, "step": 7294 }, { "epoch": 0.74, "grad_norm": 1.6325363025182515, "learning_rate": 3.291736821533621e-06, "loss": 0.7165, "step": 7295 }, { "epoch": 0.74, "grad_norm": 1.6735142078675114, "learning_rate": 3.289293969768471e-06, "loss": 0.6012, "step": 7296 }, { "epoch": 0.74, "grad_norm": 1.7178665383223046, "learning_rate": 3.2868518463455203e-06, "loss": 0.6026, "step": 7297 }, { "epoch": 0.74, "grad_norm": 1.6928985980299383, "learning_rate": 3.284410451529816e-06, "loss": 0.65, "step": 7298 }, { "epoch": 0.74, "grad_norm": 1.7305481371698612, "learning_rate": 3.2819697855863374e-06, "loss": 0.7129, "step": 7299 }, { "epoch": 0.74, "grad_norm": 1.7258542451488783, "learning_rate": 3.279529848779983e-06, "loss": 0.6139, "step": 7300 }, { "epoch": 0.74, "grad_norm": 1.6523221761729956, "learning_rate": 3.2770906413755644e-06, "loss": 0.7421, "step": 7301 }, { "epoch": 0.74, "grad_norm": 1.534840481965886, "learning_rate": 3.274652163637826e-06, "loss": 0.6207, "step": 7302 }, { "epoch": 0.74, "grad_norm": 1.7502912095179775, "learning_rate": 3.2722144158314184e-06, "loss": 0.7288, "step": 7303 }, { "epoch": 0.74, "grad_norm": 1.6496887127591713, "learning_rate": 3.2697773982209257e-06, "loss": 0.6785, "step": 7304 }, { "epoch": 0.74, "grad_norm": 1.6368353847674915, "learning_rate": 3.2673411110708507e-06, "loss": 0.666, "step": 7305 }, { "epoch": 0.74, "grad_norm": 1.5451756159890135, "learning_rate": 3.2649055546456076e-06, "loss": 0.7173, "step": 7306 }, { "epoch": 0.74, "grad_norm": 1.7004419330703944, "learning_rate": 3.2624707292095446e-06, "loss": 0.7743, "step": 7307 }, { "epoch": 0.74, "grad_norm": 1.595740560798837, "learning_rate": 3.2600366350269176e-06, "loss": 0.5945, "step": 7308 }, { "epoch": 0.74, "grad_norm": 1.722661566376523, "learning_rate": 3.257603272361912e-06, "loss": 0.6278, "step": 7309 }, { "epoch": 0.74, "grad_norm": 1.6681602626098242, "learning_rate": 3.2551706414786366e-06, "loss": 0.6901, "step": 7310 }, { "epoch": 0.74, "grad_norm": 1.6689875835667756, "learning_rate": 3.252738742641106e-06, "loss": 0.7482, "step": 7311 }, { "epoch": 0.74, "grad_norm": 1.6118580407253362, "learning_rate": 3.2503075761132707e-06, "loss": 0.6896, "step": 7312 }, { "epoch": 0.74, "grad_norm": 1.6008888195431321, "learning_rate": 3.247877142158997e-06, "loss": 0.7138, "step": 7313 }, { "epoch": 0.74, "grad_norm": 1.6096208333217168, "learning_rate": 3.2454474410420657e-06, "loss": 0.6315, "step": 7314 }, { "epoch": 0.74, "grad_norm": 1.6230174119350946, "learning_rate": 3.2430184730261883e-06, "loss": 0.6744, "step": 7315 }, { "epoch": 0.74, "grad_norm": 1.685018707256806, "learning_rate": 3.2405902383749856e-06, "loss": 0.684, "step": 7316 }, { "epoch": 0.74, "grad_norm": 1.6803874750251426, "learning_rate": 3.2381627373520066e-06, "loss": 0.7465, "step": 7317 }, { "epoch": 0.74, "grad_norm": 1.6429226662638055, "learning_rate": 3.2357359702207216e-06, "loss": 0.7916, "step": 7318 }, { "epoch": 0.74, "grad_norm": 1.7563407976090484, "learning_rate": 3.233309937244513e-06, "loss": 0.736, "step": 7319 }, { "epoch": 0.74, "grad_norm": 1.5215856115164546, "learning_rate": 3.2308846386866954e-06, "loss": 0.6469, "step": 7320 }, { "epoch": 0.74, "grad_norm": 1.5886452708615701, "learning_rate": 3.228460074810489e-06, "loss": 0.7408, "step": 7321 }, { "epoch": 0.74, "grad_norm": 1.6400376738359026, "learning_rate": 3.226036245879047e-06, "loss": 0.6373, "step": 7322 }, { "epoch": 0.74, "grad_norm": 1.6690055408195899, "learning_rate": 3.2236131521554405e-06, "loss": 0.6703, "step": 7323 }, { "epoch": 0.74, "grad_norm": 1.5373330223676502, "learning_rate": 3.221190793902653e-06, "loss": 0.5949, "step": 7324 }, { "epoch": 0.75, "grad_norm": 1.6379524975379682, "learning_rate": 3.218769171383599e-06, "loss": 0.6989, "step": 7325 }, { "epoch": 0.75, "grad_norm": 1.819067342413871, "learning_rate": 3.2163482848611003e-06, "loss": 0.6376, "step": 7326 }, { "epoch": 0.75, "grad_norm": 1.7154236629745496, "learning_rate": 3.213928134597912e-06, "loss": 0.7656, "step": 7327 }, { "epoch": 0.75, "grad_norm": 1.6693865924740787, "learning_rate": 3.2115087208567064e-06, "loss": 0.7179, "step": 7328 }, { "epoch": 0.75, "grad_norm": 1.7508571742865737, "learning_rate": 3.209090043900065e-06, "loss": 0.696, "step": 7329 }, { "epoch": 0.75, "grad_norm": 1.5057491696391703, "learning_rate": 3.206672103990501e-06, "loss": 0.596, "step": 7330 }, { "epoch": 0.75, "grad_norm": 1.6512693921958572, "learning_rate": 3.2042549013904476e-06, "loss": 0.7441, "step": 7331 }, { "epoch": 0.75, "grad_norm": 1.7628128435052854, "learning_rate": 3.201838436362249e-06, "loss": 0.6603, "step": 7332 }, { "epoch": 0.75, "grad_norm": 1.6365190818788702, "learning_rate": 3.1994227091681783e-06, "loss": 0.6335, "step": 7333 }, { "epoch": 0.75, "grad_norm": 1.5780363922604819, "learning_rate": 3.197007720070422e-06, "loss": 0.6621, "step": 7334 }, { "epoch": 0.75, "grad_norm": 1.572747001069289, "learning_rate": 3.1945934693310897e-06, "loss": 0.6726, "step": 7335 }, { "epoch": 0.75, "grad_norm": 1.5666495089845731, "learning_rate": 3.192179957212215e-06, "loss": 0.6427, "step": 7336 }, { "epoch": 0.75, "grad_norm": 1.6216586270269608, "learning_rate": 3.1897671839757405e-06, "loss": 0.6585, "step": 7337 }, { "epoch": 0.75, "grad_norm": 1.5232294696713657, "learning_rate": 3.187355149883541e-06, "loss": 0.6636, "step": 7338 }, { "epoch": 0.75, "grad_norm": 1.5701600681094992, "learning_rate": 3.1849438551973986e-06, "loss": 0.6563, "step": 7339 }, { "epoch": 0.75, "grad_norm": 1.7231236105797698, "learning_rate": 3.182533300179026e-06, "loss": 0.672, "step": 7340 }, { "epoch": 0.75, "grad_norm": 1.660836767656615, "learning_rate": 3.180123485090053e-06, "loss": 0.6324, "step": 7341 }, { "epoch": 0.75, "grad_norm": 1.6430898224518655, "learning_rate": 3.177714410192022e-06, "loss": 0.6109, "step": 7342 }, { "epoch": 0.75, "grad_norm": 1.6269418239884736, "learning_rate": 3.175306075746406e-06, "loss": 0.7617, "step": 7343 }, { "epoch": 0.75, "grad_norm": 1.6439427634892574, "learning_rate": 3.172898482014586e-06, "loss": 0.6748, "step": 7344 }, { "epoch": 0.75, "grad_norm": 1.7691958231320482, "learning_rate": 3.1704916292578715e-06, "loss": 0.6396, "step": 7345 }, { "epoch": 0.75, "grad_norm": 1.637905279936545, "learning_rate": 3.1680855177374935e-06, "loss": 0.6897, "step": 7346 }, { "epoch": 0.75, "grad_norm": 1.6798196456772034, "learning_rate": 3.1656801477145893e-06, "loss": 0.7228, "step": 7347 }, { "epoch": 0.75, "grad_norm": 1.7038195826667484, "learning_rate": 3.1632755194502286e-06, "loss": 0.6807, "step": 7348 }, { "epoch": 0.75, "grad_norm": 1.6980391062663183, "learning_rate": 3.160871633205398e-06, "loss": 0.6299, "step": 7349 }, { "epoch": 0.75, "grad_norm": 1.4863795099644443, "learning_rate": 3.1584684892409977e-06, "loss": 0.5528, "step": 7350 }, { "epoch": 0.75, "grad_norm": 1.5142489059614048, "learning_rate": 3.156066087817856e-06, "loss": 0.6406, "step": 7351 }, { "epoch": 0.75, "grad_norm": 1.7486622198807542, "learning_rate": 3.15366442919671e-06, "loss": 0.6327, "step": 7352 }, { "epoch": 0.75, "grad_norm": 1.6055617758275391, "learning_rate": 3.1512635136382252e-06, "loss": 0.5865, "step": 7353 }, { "epoch": 0.75, "grad_norm": 1.5219472276420638, "learning_rate": 3.1488633414029877e-06, "loss": 0.6461, "step": 7354 }, { "epoch": 0.75, "grad_norm": 1.8176492653029488, "learning_rate": 3.146463912751491e-06, "loss": 0.7689, "step": 7355 }, { "epoch": 0.75, "grad_norm": 1.6826159035851425, "learning_rate": 3.144065227944163e-06, "loss": 0.7403, "step": 7356 }, { "epoch": 0.75, "grad_norm": 1.6247469531612129, "learning_rate": 3.1416672872413357e-06, "loss": 0.7106, "step": 7357 }, { "epoch": 0.75, "grad_norm": 1.5779106715212203, "learning_rate": 3.1392700909032724e-06, "loss": 0.6871, "step": 7358 }, { "epoch": 0.75, "grad_norm": 1.4920076107625604, "learning_rate": 3.1368736391901544e-06, "loss": 0.6714, "step": 7359 }, { "epoch": 0.75, "grad_norm": 1.961808390580555, "learning_rate": 3.134477932362072e-06, "loss": 0.7265, "step": 7360 }, { "epoch": 0.75, "grad_norm": 1.938920841767802, "learning_rate": 3.132082970679049e-06, "loss": 0.7427, "step": 7361 }, { "epoch": 0.75, "grad_norm": 1.567752759558388, "learning_rate": 3.1296887544010133e-06, "loss": 0.6539, "step": 7362 }, { "epoch": 0.75, "grad_norm": 1.5151465329107645, "learning_rate": 3.1272952837878245e-06, "loss": 0.6595, "step": 7363 }, { "epoch": 0.75, "grad_norm": 1.705679499416008, "learning_rate": 3.124902559099258e-06, "loss": 0.6997, "step": 7364 }, { "epoch": 0.75, "grad_norm": 1.8858540122131102, "learning_rate": 3.1225105805950017e-06, "loss": 0.7913, "step": 7365 }, { "epoch": 0.75, "grad_norm": 1.7419499972808716, "learning_rate": 3.120119348534669e-06, "loss": 0.7856, "step": 7366 }, { "epoch": 0.75, "grad_norm": 1.573465112824827, "learning_rate": 3.1177288631777956e-06, "loss": 0.6733, "step": 7367 }, { "epoch": 0.75, "grad_norm": 1.6325035220696849, "learning_rate": 3.115339124783825e-06, "loss": 0.6511, "step": 7368 }, { "epoch": 0.75, "grad_norm": 1.7307285220036097, "learning_rate": 3.1129501336121303e-06, "loss": 0.615, "step": 7369 }, { "epoch": 0.75, "grad_norm": 1.9298849037539572, "learning_rate": 3.1105618899219946e-06, "loss": 0.6952, "step": 7370 }, { "epoch": 0.75, "grad_norm": 1.8535262033904598, "learning_rate": 3.1081743939726262e-06, "loss": 0.7454, "step": 7371 }, { "epoch": 0.75, "grad_norm": 1.6615890889245795, "learning_rate": 3.105787646023155e-06, "loss": 0.803, "step": 7372 }, { "epoch": 0.75, "grad_norm": 1.7370173676111043, "learning_rate": 3.103401646332619e-06, "loss": 0.6846, "step": 7373 }, { "epoch": 0.75, "grad_norm": 1.810281054365534, "learning_rate": 3.101016395159986e-06, "loss": 0.7291, "step": 7374 }, { "epoch": 0.75, "grad_norm": 1.6837037222155278, "learning_rate": 3.0986318927641312e-06, "loss": 0.7795, "step": 7375 }, { "epoch": 0.75, "grad_norm": 1.7021722264339456, "learning_rate": 3.09624813940386e-06, "loss": 0.6884, "step": 7376 }, { "epoch": 0.75, "grad_norm": 1.6763761247294078, "learning_rate": 3.0938651353378946e-06, "loss": 0.7276, "step": 7377 }, { "epoch": 0.75, "grad_norm": 1.5836681850071694, "learning_rate": 3.091482880824864e-06, "loss": 0.6714, "step": 7378 }, { "epoch": 0.75, "grad_norm": 1.731965481202213, "learning_rate": 3.0891013761233345e-06, "loss": 0.6925, "step": 7379 }, { "epoch": 0.75, "grad_norm": 1.7464462818615787, "learning_rate": 3.0867206214917723e-06, "loss": 0.7193, "step": 7380 }, { "epoch": 0.75, "grad_norm": 1.849544297503174, "learning_rate": 3.0843406171885746e-06, "loss": 0.8064, "step": 7381 }, { "epoch": 0.75, "grad_norm": 1.540043474640889, "learning_rate": 3.081961363472058e-06, "loss": 0.7355, "step": 7382 }, { "epoch": 0.75, "grad_norm": 1.6909442617771338, "learning_rate": 3.0795828606004453e-06, "loss": 0.6626, "step": 7383 }, { "epoch": 0.75, "grad_norm": 1.7735932876842673, "learning_rate": 3.0772051088318912e-06, "loss": 0.6367, "step": 7384 }, { "epoch": 0.75, "grad_norm": 1.6463646066423563, "learning_rate": 3.0748281084244635e-06, "loss": 0.7147, "step": 7385 }, { "epoch": 0.75, "grad_norm": 1.5406094829194776, "learning_rate": 3.0724518596361452e-06, "loss": 0.6248, "step": 7386 }, { "epoch": 0.75, "grad_norm": 1.6502764101119622, "learning_rate": 3.070076362724843e-06, "loss": 0.5867, "step": 7387 }, { "epoch": 0.75, "grad_norm": 1.4455671784961945, "learning_rate": 3.0677016179483823e-06, "loss": 0.7, "step": 7388 }, { "epoch": 0.75, "grad_norm": 1.7474920723140612, "learning_rate": 3.065327625564498e-06, "loss": 0.6584, "step": 7389 }, { "epoch": 0.75, "grad_norm": 1.6490934938705888, "learning_rate": 3.0629543858308552e-06, "loss": 0.6237, "step": 7390 }, { "epoch": 0.75, "grad_norm": 1.620282671271724, "learning_rate": 3.0605818990050327e-06, "loss": 0.6551, "step": 7391 }, { "epoch": 0.75, "grad_norm": 1.6635610737609734, "learning_rate": 3.0582101653445227e-06, "loss": 0.6701, "step": 7392 }, { "epoch": 0.75, "grad_norm": 1.7532758162200444, "learning_rate": 3.0558391851067416e-06, "loss": 0.783, "step": 7393 }, { "epoch": 0.75, "grad_norm": 1.6603614563648805, "learning_rate": 3.053468958549025e-06, "loss": 0.6462, "step": 7394 }, { "epoch": 0.75, "grad_norm": 1.5306270774029103, "learning_rate": 3.051099485928618e-06, "loss": 0.6622, "step": 7395 }, { "epoch": 0.75, "grad_norm": 1.500571714029283, "learning_rate": 3.0487307675026967e-06, "loss": 0.631, "step": 7396 }, { "epoch": 0.75, "grad_norm": 1.3669527221428746, "learning_rate": 3.0463628035283423e-06, "loss": 0.6113, "step": 7397 }, { "epoch": 0.75, "grad_norm": 1.6656957768204146, "learning_rate": 3.043995594262562e-06, "loss": 0.612, "step": 7398 }, { "epoch": 0.75, "grad_norm": 1.7199610639093554, "learning_rate": 3.0416291399622834e-06, "loss": 0.7657, "step": 7399 }, { "epoch": 0.75, "grad_norm": 1.5742384138792822, "learning_rate": 3.039263440884341e-06, "loss": 0.6016, "step": 7400 }, { "epoch": 0.75, "grad_norm": 1.5696383649368324, "learning_rate": 3.0368984972854997e-06, "loss": 0.5948, "step": 7401 }, { "epoch": 0.75, "grad_norm": 1.5431594277390508, "learning_rate": 3.034534309422439e-06, "loss": 0.7673, "step": 7402 }, { "epoch": 0.75, "grad_norm": 1.7611851032651105, "learning_rate": 3.0321708775517476e-06, "loss": 0.7376, "step": 7403 }, { "epoch": 0.75, "grad_norm": 1.6782638099138119, "learning_rate": 3.0298082019299424e-06, "loss": 0.6546, "step": 7404 }, { "epoch": 0.75, "grad_norm": 1.655260128106923, "learning_rate": 3.0274462828134587e-06, "loss": 0.8029, "step": 7405 }, { "epoch": 0.75, "grad_norm": 1.7300337613368677, "learning_rate": 3.0250851204586394e-06, "loss": 0.64, "step": 7406 }, { "epoch": 0.75, "grad_norm": 1.6253057086422333, "learning_rate": 3.0227247151217553e-06, "loss": 0.6651, "step": 7407 }, { "epoch": 0.75, "grad_norm": 1.7339750324433152, "learning_rate": 3.0203650670589945e-06, "loss": 0.672, "step": 7408 }, { "epoch": 0.75, "grad_norm": 1.477359102111727, "learning_rate": 3.0180061765264523e-06, "loss": 0.658, "step": 7409 }, { "epoch": 0.75, "grad_norm": 1.621876733888337, "learning_rate": 3.015648043780156e-06, "loss": 0.5895, "step": 7410 }, { "epoch": 0.75, "grad_norm": 1.6668748997638514, "learning_rate": 3.0132906690760398e-06, "loss": 0.6339, "step": 7411 }, { "epoch": 0.75, "grad_norm": 1.6796738882866564, "learning_rate": 3.0109340526699604e-06, "loss": 0.5918, "step": 7412 }, { "epoch": 0.75, "grad_norm": 1.5084714791846123, "learning_rate": 3.0085781948176974e-06, "loss": 0.6234, "step": 7413 }, { "epoch": 0.75, "grad_norm": 1.7944341554365648, "learning_rate": 3.006223095774933e-06, "loss": 0.7622, "step": 7414 }, { "epoch": 0.75, "grad_norm": 1.7037480514842924, "learning_rate": 3.003868755797286e-06, "loss": 0.7089, "step": 7415 }, { "epoch": 0.75, "grad_norm": 1.7032944351810269, "learning_rate": 3.0015151751402725e-06, "loss": 0.6763, "step": 7416 }, { "epoch": 0.75, "grad_norm": 1.745469835993225, "learning_rate": 2.9991623540593453e-06, "loss": 0.71, "step": 7417 }, { "epoch": 0.75, "grad_norm": 1.6436550601102142, "learning_rate": 2.996810292809865e-06, "loss": 0.8204, "step": 7418 }, { "epoch": 0.75, "grad_norm": 1.6882237970692697, "learning_rate": 2.994458991647108e-06, "loss": 0.6516, "step": 7419 }, { "epoch": 0.75, "grad_norm": 1.6759976105871366, "learning_rate": 2.9921084508262722e-06, "loss": 0.7869, "step": 7420 }, { "epoch": 0.75, "grad_norm": 1.632224254599666, "learning_rate": 2.9897586706024763e-06, "loss": 0.5967, "step": 7421 }, { "epoch": 0.75, "grad_norm": 1.7319331472604649, "learning_rate": 2.987409651230746e-06, "loss": 0.6773, "step": 7422 }, { "epoch": 0.76, "grad_norm": 1.6792342451955644, "learning_rate": 2.9850613929660366e-06, "loss": 0.698, "step": 7423 }, { "epoch": 0.76, "grad_norm": 1.7380245477625635, "learning_rate": 2.9827138960632076e-06, "loss": 0.6786, "step": 7424 }, { "epoch": 0.76, "grad_norm": 1.9112873659596736, "learning_rate": 2.980367160777048e-06, "loss": 0.8075, "step": 7425 }, { "epoch": 0.76, "grad_norm": 1.4329946044809587, "learning_rate": 2.978021187362261e-06, "loss": 0.5534, "step": 7426 }, { "epoch": 0.76, "grad_norm": 1.4934816434283067, "learning_rate": 2.97567597607346e-06, "loss": 0.5699, "step": 7427 }, { "epoch": 0.76, "grad_norm": 1.6117630516190384, "learning_rate": 2.9733315271651863e-06, "loss": 0.6782, "step": 7428 }, { "epoch": 0.76, "grad_norm": 1.8126687606012002, "learning_rate": 2.9709878408918867e-06, "loss": 0.7555, "step": 7429 }, { "epoch": 0.76, "grad_norm": 1.6912605643295366, "learning_rate": 2.968644917507937e-06, "loss": 0.746, "step": 7430 }, { "epoch": 0.76, "grad_norm": 1.626182602352006, "learning_rate": 2.966302757267625e-06, "loss": 0.661, "step": 7431 }, { "epoch": 0.76, "grad_norm": 1.5037473745802517, "learning_rate": 2.963961360425153e-06, "loss": 0.8026, "step": 7432 }, { "epoch": 0.76, "grad_norm": 1.559354351532829, "learning_rate": 2.961620727234645e-06, "loss": 0.725, "step": 7433 }, { "epoch": 0.76, "grad_norm": 1.9363196714314468, "learning_rate": 2.9592808579501364e-06, "loss": 0.7002, "step": 7434 }, { "epoch": 0.76, "grad_norm": 1.5776840241249293, "learning_rate": 2.956941752825587e-06, "loss": 0.634, "step": 7435 }, { "epoch": 0.76, "grad_norm": 1.7381703510621536, "learning_rate": 2.9546034121148714e-06, "loss": 0.7127, "step": 7436 }, { "epoch": 0.76, "grad_norm": 1.7190856091505584, "learning_rate": 2.952265836071776e-06, "loss": 0.6978, "step": 7437 }, { "epoch": 0.76, "grad_norm": 1.6735200753638035, "learning_rate": 2.9499290249500078e-06, "loss": 0.6708, "step": 7438 }, { "epoch": 0.76, "grad_norm": 1.7328932677954882, "learning_rate": 2.9475929790031975e-06, "loss": 0.8166, "step": 7439 }, { "epoch": 0.76, "grad_norm": 1.5408188030507148, "learning_rate": 2.945257698484879e-06, "loss": 0.7174, "step": 7440 }, { "epoch": 0.76, "grad_norm": 1.5689412370963598, "learning_rate": 2.9429231836485174e-06, "loss": 0.6592, "step": 7441 }, { "epoch": 0.76, "grad_norm": 1.6627008111717447, "learning_rate": 2.9405894347474793e-06, "loss": 0.6254, "step": 7442 }, { "epoch": 0.76, "grad_norm": 1.837083781585254, "learning_rate": 2.938256452035062e-06, "loss": 0.6579, "step": 7443 }, { "epoch": 0.76, "grad_norm": 1.7666715035355245, "learning_rate": 2.9359242357644757e-06, "loss": 0.7378, "step": 7444 }, { "epoch": 0.76, "grad_norm": 1.6119800098653143, "learning_rate": 2.9335927861888414e-06, "loss": 0.6269, "step": 7445 }, { "epoch": 0.76, "grad_norm": 1.7211340131128936, "learning_rate": 2.9312621035612077e-06, "loss": 0.8051, "step": 7446 }, { "epoch": 0.76, "grad_norm": 1.6180189522856365, "learning_rate": 2.9289321881345257e-06, "loss": 0.7323, "step": 7447 }, { "epoch": 0.76, "grad_norm": 1.7639254750773472, "learning_rate": 2.9266030401616762e-06, "loss": 0.6893, "step": 7448 }, { "epoch": 0.76, "grad_norm": 1.7179222972882686, "learning_rate": 2.924274659895454e-06, "loss": 0.7707, "step": 7449 }, { "epoch": 0.76, "grad_norm": 1.6182082420055828, "learning_rate": 2.9219470475885626e-06, "loss": 0.7779, "step": 7450 }, { "epoch": 0.76, "grad_norm": 1.6851692872088693, "learning_rate": 2.919620203493634e-06, "loss": 0.6607, "step": 7451 }, { "epoch": 0.76, "grad_norm": 1.640265505425813, "learning_rate": 2.917294127863204e-06, "loss": 0.7147, "step": 7452 }, { "epoch": 0.76, "grad_norm": 1.8049523286991427, "learning_rate": 2.9149688209497353e-06, "loss": 0.7451, "step": 7453 }, { "epoch": 0.76, "grad_norm": 1.7823267160412626, "learning_rate": 2.9126442830056066e-06, "loss": 0.6662, "step": 7454 }, { "epoch": 0.76, "grad_norm": 1.5495471833684278, "learning_rate": 2.910320514283104e-06, "loss": 0.7109, "step": 7455 }, { "epoch": 0.76, "grad_norm": 1.5447094856687085, "learning_rate": 2.9079975150344433e-06, "loss": 0.6821, "step": 7456 }, { "epoch": 0.76, "grad_norm": 1.858363830339255, "learning_rate": 2.905675285511742e-06, "loss": 0.6572, "step": 7457 }, { "epoch": 0.76, "grad_norm": 1.822158149306591, "learning_rate": 2.9033538259670458e-06, "loss": 0.7401, "step": 7458 }, { "epoch": 0.76, "grad_norm": 1.5929794743645305, "learning_rate": 2.901033136652316e-06, "loss": 0.6964, "step": 7459 }, { "epoch": 0.76, "grad_norm": 1.5170105854274156, "learning_rate": 2.89871321781942e-06, "loss": 0.6719, "step": 7460 }, { "epoch": 0.76, "grad_norm": 1.6390930304050815, "learning_rate": 2.8963940697201532e-06, "loss": 0.7041, "step": 7461 }, { "epoch": 0.76, "grad_norm": 1.7532602718217476, "learning_rate": 2.894075692606225e-06, "loss": 0.6553, "step": 7462 }, { "epoch": 0.76, "grad_norm": 1.7868171723470738, "learning_rate": 2.891758086729253e-06, "loss": 0.8173, "step": 7463 }, { "epoch": 0.76, "grad_norm": 1.828714452933475, "learning_rate": 2.8894412523407833e-06, "loss": 0.7114, "step": 7464 }, { "epoch": 0.76, "grad_norm": 1.6684999336577175, "learning_rate": 2.8871251896922657e-06, "loss": 0.6909, "step": 7465 }, { "epoch": 0.76, "grad_norm": 1.6634060136535094, "learning_rate": 2.884809899035077e-06, "loss": 0.7066, "step": 7466 }, { "epoch": 0.76, "grad_norm": 1.5578753536708678, "learning_rate": 2.882495380620507e-06, "loss": 0.629, "step": 7467 }, { "epoch": 0.76, "grad_norm": 1.856286344143219, "learning_rate": 2.8801816346997547e-06, "loss": 0.767, "step": 7468 }, { "epoch": 0.76, "grad_norm": 1.7429771399973584, "learning_rate": 2.877868661523947e-06, "loss": 0.7595, "step": 7469 }, { "epoch": 0.76, "grad_norm": 1.6852910997798851, "learning_rate": 2.875556461344117e-06, "loss": 0.6857, "step": 7470 }, { "epoch": 0.76, "grad_norm": 1.7373081083150153, "learning_rate": 2.873245034411217e-06, "loss": 0.6821, "step": 7471 }, { "epoch": 0.76, "grad_norm": 1.725776716881047, "learning_rate": 2.870934380976124e-06, "loss": 0.6353, "step": 7472 }, { "epoch": 0.76, "grad_norm": 1.5202372783896243, "learning_rate": 2.868624501289613e-06, "loss": 0.6614, "step": 7473 }, { "epoch": 0.76, "grad_norm": 1.7779656779653663, "learning_rate": 2.8663153956023938e-06, "loss": 0.7451, "step": 7474 }, { "epoch": 0.76, "grad_norm": 1.755161174321142, "learning_rate": 2.8640070641650776e-06, "loss": 0.7465, "step": 7475 }, { "epoch": 0.76, "grad_norm": 1.6513658016609747, "learning_rate": 2.8616995072281996e-06, "loss": 0.7849, "step": 7476 }, { "epoch": 0.76, "grad_norm": 1.8120431805519797, "learning_rate": 2.8593927250422126e-06, "loss": 0.7905, "step": 7477 }, { "epoch": 0.76, "grad_norm": 1.5569497819936433, "learning_rate": 2.8570867178574757e-06, "loss": 0.7119, "step": 7478 }, { "epoch": 0.76, "grad_norm": 1.573670506635637, "learning_rate": 2.854781485924273e-06, "loss": 0.6166, "step": 7479 }, { "epoch": 0.76, "grad_norm": 1.6970547434025136, "learning_rate": 2.852477029492804e-06, "loss": 0.8129, "step": 7480 }, { "epoch": 0.76, "grad_norm": 1.6728848081611243, "learning_rate": 2.8501733488131755e-06, "loss": 0.6801, "step": 7481 }, { "epoch": 0.76, "grad_norm": 1.786059631586086, "learning_rate": 2.847870444135422e-06, "loss": 0.7519, "step": 7482 }, { "epoch": 0.76, "grad_norm": 1.6756514266528544, "learning_rate": 2.8455683157094827e-06, "loss": 0.7224, "step": 7483 }, { "epoch": 0.76, "grad_norm": 1.5194335545928992, "learning_rate": 2.8432669637852208e-06, "loss": 0.5874, "step": 7484 }, { "epoch": 0.76, "grad_norm": 1.6876242980806946, "learning_rate": 2.840966388612413e-06, "loss": 0.7177, "step": 7485 }, { "epoch": 0.76, "grad_norm": 1.8113180493913745, "learning_rate": 2.838666590440746e-06, "loss": 0.7245, "step": 7486 }, { "epoch": 0.76, "grad_norm": 1.6203584425862203, "learning_rate": 2.8363675695198322e-06, "loss": 0.6698, "step": 7487 }, { "epoch": 0.76, "grad_norm": 1.4849723093207292, "learning_rate": 2.83406932609919e-06, "loss": 0.6125, "step": 7488 }, { "epoch": 0.76, "grad_norm": 1.850867969596928, "learning_rate": 2.8317718604282596e-06, "loss": 0.6292, "step": 7489 }, { "epoch": 0.76, "grad_norm": 1.5714885693578153, "learning_rate": 2.8294751727563986e-06, "loss": 0.6172, "step": 7490 }, { "epoch": 0.76, "grad_norm": 1.609748364086573, "learning_rate": 2.82717926333287e-06, "loss": 0.5915, "step": 7491 }, { "epoch": 0.76, "grad_norm": 1.7166256303963017, "learning_rate": 2.824884132406862e-06, "loss": 0.6597, "step": 7492 }, { "epoch": 0.76, "grad_norm": 1.9578253922637714, "learning_rate": 2.8225897802274784e-06, "loss": 0.8443, "step": 7493 }, { "epoch": 0.76, "grad_norm": 1.815363824497048, "learning_rate": 2.820296207043728e-06, "loss": 0.7102, "step": 7494 }, { "epoch": 0.76, "grad_norm": 1.634902000615274, "learning_rate": 2.8180034131045463e-06, "loss": 0.7201, "step": 7495 }, { "epoch": 0.76, "grad_norm": 1.5772494222413496, "learning_rate": 2.8157113986587846e-06, "loss": 0.65, "step": 7496 }, { "epoch": 0.76, "grad_norm": 1.5834670448103767, "learning_rate": 2.8134201639551974e-06, "loss": 0.6062, "step": 7497 }, { "epoch": 0.76, "grad_norm": 1.7440391226487488, "learning_rate": 2.811129709242465e-06, "loss": 0.6941, "step": 7498 }, { "epoch": 0.76, "grad_norm": 1.7803969915245104, "learning_rate": 2.8088400347691845e-06, "loss": 0.6201, "step": 7499 }, { "epoch": 0.76, "grad_norm": 1.646599239304442, "learning_rate": 2.8065511407838584e-06, "loss": 0.6177, "step": 7500 }, { "epoch": 0.76, "grad_norm": 1.6726681217351695, "learning_rate": 2.8042630275349168e-06, "loss": 0.7302, "step": 7501 }, { "epoch": 0.76, "grad_norm": 1.6567772383571295, "learning_rate": 2.801975695270691e-06, "loss": 0.6778, "step": 7502 }, { "epoch": 0.76, "grad_norm": 1.7253150467123428, "learning_rate": 2.7996891442394393e-06, "loss": 0.7049, "step": 7503 }, { "epoch": 0.76, "grad_norm": 1.6743500942376304, "learning_rate": 2.7974033746893326e-06, "loss": 0.702, "step": 7504 }, { "epoch": 0.76, "grad_norm": 1.6545542816533727, "learning_rate": 2.7951183868684516e-06, "loss": 0.8051, "step": 7505 }, { "epoch": 0.76, "grad_norm": 1.5076080445687705, "learning_rate": 2.792834181024798e-06, "loss": 0.6293, "step": 7506 }, { "epoch": 0.76, "grad_norm": 1.7708757256910792, "learning_rate": 2.7905507574062895e-06, "loss": 0.7554, "step": 7507 }, { "epoch": 0.76, "grad_norm": 1.7475955529744212, "learning_rate": 2.7882681162607494e-06, "loss": 0.7865, "step": 7508 }, { "epoch": 0.76, "grad_norm": 1.472225771004639, "learning_rate": 2.7859862578359276e-06, "loss": 0.6569, "step": 7509 }, { "epoch": 0.76, "grad_norm": 1.6686558241730758, "learning_rate": 2.783705182379485e-06, "loss": 0.7829, "step": 7510 }, { "epoch": 0.76, "grad_norm": 1.695276979117581, "learning_rate": 2.781424890138992e-06, "loss": 0.7095, "step": 7511 }, { "epoch": 0.76, "grad_norm": 1.646657766974499, "learning_rate": 2.779145381361942e-06, "loss": 0.6725, "step": 7512 }, { "epoch": 0.76, "grad_norm": 1.783718666727429, "learning_rate": 2.7768666562957413e-06, "loss": 0.6186, "step": 7513 }, { "epoch": 0.76, "grad_norm": 1.4484205856045251, "learning_rate": 2.774588715187705e-06, "loss": 0.5485, "step": 7514 }, { "epoch": 0.76, "grad_norm": 1.5731511109970013, "learning_rate": 2.7723115582850714e-06, "loss": 0.7341, "step": 7515 }, { "epoch": 0.76, "grad_norm": 1.691000533782961, "learning_rate": 2.7700351858349917e-06, "loss": 0.6658, "step": 7516 }, { "epoch": 0.76, "grad_norm": 1.7362006623624997, "learning_rate": 2.767759598084526e-06, "loss": 0.729, "step": 7517 }, { "epoch": 0.76, "grad_norm": 1.8996942080774435, "learning_rate": 2.7654847952806597e-06, "loss": 0.7411, "step": 7518 }, { "epoch": 0.76, "grad_norm": 1.6757634471720677, "learning_rate": 2.763210777670281e-06, "loss": 0.7495, "step": 7519 }, { "epoch": 0.76, "grad_norm": 1.6398472456760649, "learning_rate": 2.7609375455002007e-06, "loss": 0.6899, "step": 7520 }, { "epoch": 0.77, "grad_norm": 1.6280316209407817, "learning_rate": 2.758665099017147e-06, "loss": 0.7565, "step": 7521 }, { "epoch": 0.77, "grad_norm": 1.7555060068451787, "learning_rate": 2.7563934384677525e-06, "loss": 0.6801, "step": 7522 }, { "epoch": 0.77, "grad_norm": 1.503827561698193, "learning_rate": 2.7541225640985757e-06, "loss": 0.6543, "step": 7523 }, { "epoch": 0.77, "grad_norm": 1.3849744538887527, "learning_rate": 2.7518524761560784e-06, "loss": 0.6735, "step": 7524 }, { "epoch": 0.77, "grad_norm": 1.4838992879970025, "learning_rate": 2.749583174886646e-06, "loss": 0.6396, "step": 7525 }, { "epoch": 0.77, "grad_norm": 1.6575210361822101, "learning_rate": 2.7473146605365807e-06, "loss": 0.7453, "step": 7526 }, { "epoch": 0.77, "grad_norm": 1.7375430861012504, "learning_rate": 2.7450469333520856e-06, "loss": 0.7819, "step": 7527 }, { "epoch": 0.77, "grad_norm": 1.6032934711359028, "learning_rate": 2.7427799935792943e-06, "loss": 0.7154, "step": 7528 }, { "epoch": 0.77, "grad_norm": 1.7372976790322099, "learning_rate": 2.7405138414642405e-06, "loss": 0.6819, "step": 7529 }, { "epoch": 0.77, "grad_norm": 1.734259457654482, "learning_rate": 2.738248477252885e-06, "loss": 0.762, "step": 7530 }, { "epoch": 0.77, "grad_norm": 1.5259134220595947, "learning_rate": 2.7359839011910983e-06, "loss": 0.6118, "step": 7531 }, { "epoch": 0.77, "grad_norm": 1.6101814510917998, "learning_rate": 2.7337201135246604e-06, "loss": 0.7868, "step": 7532 }, { "epoch": 0.77, "grad_norm": 1.536141528487024, "learning_rate": 2.731457114499272e-06, "loss": 0.6493, "step": 7533 }, { "epoch": 0.77, "grad_norm": 1.8158612352878118, "learning_rate": 2.729194904360549e-06, "loss": 0.7682, "step": 7534 }, { "epoch": 0.77, "grad_norm": 1.5484988134903386, "learning_rate": 2.726933483354014e-06, "loss": 0.7837, "step": 7535 }, { "epoch": 0.77, "grad_norm": 1.6539243202922078, "learning_rate": 2.7246728517251142e-06, "loss": 0.6873, "step": 7536 }, { "epoch": 0.77, "grad_norm": 1.6073679457280263, "learning_rate": 2.722413009719199e-06, "loss": 0.6061, "step": 7537 }, { "epoch": 0.77, "grad_norm": 1.6858862021617782, "learning_rate": 2.720153957581544e-06, "loss": 0.6199, "step": 7538 }, { "epoch": 0.77, "grad_norm": 1.6222117288119169, "learning_rate": 2.7178956955573367e-06, "loss": 0.7623, "step": 7539 }, { "epoch": 0.77, "grad_norm": 1.733992369513456, "learning_rate": 2.715638223891668e-06, "loss": 0.7118, "step": 7540 }, { "epoch": 0.77, "grad_norm": 1.4844849837150704, "learning_rate": 2.7133815428295585e-06, "loss": 0.6526, "step": 7541 }, { "epoch": 0.77, "grad_norm": 1.6291670828286082, "learning_rate": 2.711125652615929e-06, "loss": 0.7378, "step": 7542 }, { "epoch": 0.77, "grad_norm": 1.6348415045516584, "learning_rate": 2.708870553495625e-06, "loss": 0.7867, "step": 7543 }, { "epoch": 0.77, "grad_norm": 1.6860056402117856, "learning_rate": 2.706616245713406e-06, "loss": 0.6777, "step": 7544 }, { "epoch": 0.77, "grad_norm": 1.604620396060263, "learning_rate": 2.704362729513934e-06, "loss": 0.712, "step": 7545 }, { "epoch": 0.77, "grad_norm": 1.4305280672456266, "learning_rate": 2.7021100051417994e-06, "loss": 0.7249, "step": 7546 }, { "epoch": 0.77, "grad_norm": 1.7898274507982526, "learning_rate": 2.6998580728414948e-06, "loss": 0.7568, "step": 7547 }, { "epoch": 0.77, "grad_norm": 1.6675353834219373, "learning_rate": 2.697606932857435e-06, "loss": 0.705, "step": 7548 }, { "epoch": 0.77, "grad_norm": 1.6749144420557143, "learning_rate": 2.695356585433949e-06, "loss": 0.8273, "step": 7549 }, { "epoch": 0.77, "grad_norm": 1.6316908542704445, "learning_rate": 2.693107030815271e-06, "loss": 0.6924, "step": 7550 }, { "epoch": 0.77, "grad_norm": 1.7561594210981057, "learning_rate": 2.6908582692455575e-06, "loss": 0.7581, "step": 7551 }, { "epoch": 0.77, "grad_norm": 1.6024697973330937, "learning_rate": 2.688610300968879e-06, "loss": 0.6427, "step": 7552 }, { "epoch": 0.77, "grad_norm": 1.6147055907512202, "learning_rate": 2.6863631262292136e-06, "loss": 0.7513, "step": 7553 }, { "epoch": 0.77, "grad_norm": 1.652596883620534, "learning_rate": 2.6841167452704597e-06, "loss": 0.6232, "step": 7554 }, { "epoch": 0.77, "grad_norm": 1.783816422018661, "learning_rate": 2.681871158336423e-06, "loss": 0.7052, "step": 7555 }, { "epoch": 0.77, "grad_norm": 1.7142749273013935, "learning_rate": 2.6796263656708297e-06, "loss": 0.7456, "step": 7556 }, { "epoch": 0.77, "grad_norm": 1.6310600719430486, "learning_rate": 2.6773823675173195e-06, "loss": 0.6341, "step": 7557 }, { "epoch": 0.77, "grad_norm": 1.7806935404941204, "learning_rate": 2.6751391641194378e-06, "loss": 0.7257, "step": 7558 }, { "epoch": 0.77, "grad_norm": 1.6827389065638336, "learning_rate": 2.6728967557206544e-06, "loss": 0.6987, "step": 7559 }, { "epoch": 0.77, "grad_norm": 1.6514931921887344, "learning_rate": 2.670655142564341e-06, "loss": 0.7026, "step": 7560 }, { "epoch": 0.77, "grad_norm": 1.616645078206799, "learning_rate": 2.6684143248937955e-06, "loss": 0.7544, "step": 7561 }, { "epoch": 0.77, "grad_norm": 1.7718859583686468, "learning_rate": 2.6661743029522236e-06, "loss": 0.6108, "step": 7562 }, { "epoch": 0.77, "grad_norm": 1.68104639669912, "learning_rate": 2.663935076982741e-06, "loss": 0.8155, "step": 7563 }, { "epoch": 0.77, "grad_norm": 1.8021958735996275, "learning_rate": 2.6616966472283843e-06, "loss": 0.7101, "step": 7564 }, { "epoch": 0.77, "grad_norm": 1.5721868545560538, "learning_rate": 2.6594590139320962e-06, "loss": 0.6331, "step": 7565 }, { "epoch": 0.77, "grad_norm": 1.777546054232048, "learning_rate": 2.6572221773367392e-06, "loss": 0.7279, "step": 7566 }, { "epoch": 0.77, "grad_norm": 1.845961818905768, "learning_rate": 2.6549861376850883e-06, "loss": 0.7134, "step": 7567 }, { "epoch": 0.77, "grad_norm": 1.6594238425370311, "learning_rate": 2.6527508952198276e-06, "loss": 0.7744, "step": 7568 }, { "epoch": 0.77, "grad_norm": 1.5296321223944114, "learning_rate": 2.650516450183558e-06, "loss": 0.6848, "step": 7569 }, { "epoch": 0.77, "grad_norm": 1.5650590985861443, "learning_rate": 2.6482828028187977e-06, "loss": 0.6022, "step": 7570 }, { "epoch": 0.77, "grad_norm": 1.7019456186669477, "learning_rate": 2.6460499533679685e-06, "loss": 0.6716, "step": 7571 }, { "epoch": 0.77, "grad_norm": 1.716828661162016, "learning_rate": 2.6438179020734165e-06, "loss": 0.6754, "step": 7572 }, { "epoch": 0.77, "grad_norm": 1.5025504069938012, "learning_rate": 2.641586649177391e-06, "loss": 0.7557, "step": 7573 }, { "epoch": 0.77, "grad_norm": 1.6821351398535194, "learning_rate": 2.6393561949220625e-06, "loss": 0.6906, "step": 7574 }, { "epoch": 0.77, "grad_norm": 1.6593300718919421, "learning_rate": 2.637126539549514e-06, "loss": 0.7463, "step": 7575 }, { "epoch": 0.77, "grad_norm": 2.0347185767831557, "learning_rate": 2.6348976833017336e-06, "loss": 0.7112, "step": 7576 }, { "epoch": 0.77, "grad_norm": 1.7191074392544898, "learning_rate": 2.6326696264206352e-06, "loss": 0.8615, "step": 7577 }, { "epoch": 0.77, "grad_norm": 1.8354634059261874, "learning_rate": 2.630442369148034e-06, "loss": 0.7765, "step": 7578 }, { "epoch": 0.77, "grad_norm": 1.61215000399093, "learning_rate": 2.628215911725668e-06, "loss": 0.7777, "step": 7579 }, { "epoch": 0.77, "grad_norm": 1.6966535322409748, "learning_rate": 2.6259902543951844e-06, "loss": 0.7105, "step": 7580 }, { "epoch": 0.77, "grad_norm": 1.6420719462317774, "learning_rate": 2.6237653973981393e-06, "loss": 0.6423, "step": 7581 }, { "epoch": 0.77, "grad_norm": 1.729672866162764, "learning_rate": 2.6215413409760113e-06, "loss": 0.755, "step": 7582 }, { "epoch": 0.77, "grad_norm": 1.8109345442166387, "learning_rate": 2.6193180853701825e-06, "loss": 0.7652, "step": 7583 }, { "epoch": 0.77, "grad_norm": 1.670833498715669, "learning_rate": 2.617095630821955e-06, "loss": 0.7259, "step": 7584 }, { "epoch": 0.77, "grad_norm": 1.626731046412688, "learning_rate": 2.6148739775725427e-06, "loss": 0.6965, "step": 7585 }, { "epoch": 0.77, "grad_norm": 1.7216672219356004, "learning_rate": 2.6126531258630682e-06, "loss": 0.5594, "step": 7586 }, { "epoch": 0.77, "grad_norm": 1.6056511431005087, "learning_rate": 2.6104330759345707e-06, "loss": 0.7508, "step": 7587 }, { "epoch": 0.77, "grad_norm": 1.727656277588795, "learning_rate": 2.6082138280280068e-06, "loss": 0.703, "step": 7588 }, { "epoch": 0.77, "grad_norm": 1.5854088043978658, "learning_rate": 2.6059953823842333e-06, "loss": 0.6803, "step": 7589 }, { "epoch": 0.77, "grad_norm": 1.599543277038824, "learning_rate": 2.6037777392440346e-06, "loss": 0.6838, "step": 7590 }, { "epoch": 0.77, "grad_norm": 1.4815510408464663, "learning_rate": 2.6015608988480956e-06, "loss": 0.6471, "step": 7591 }, { "epoch": 0.77, "grad_norm": 1.789488671669432, "learning_rate": 2.599344861437023e-06, "loss": 0.8288, "step": 7592 }, { "epoch": 0.77, "grad_norm": 1.7897867030103445, "learning_rate": 2.5971296272513347e-06, "loss": 0.6958, "step": 7593 }, { "epoch": 0.77, "grad_norm": 1.7961417034922662, "learning_rate": 2.5949151965314547e-06, "loss": 0.6808, "step": 7594 }, { "epoch": 0.77, "grad_norm": 1.4180384475284087, "learning_rate": 2.5927015695177305e-06, "loss": 0.6841, "step": 7595 }, { "epoch": 0.77, "grad_norm": 1.6465686467448446, "learning_rate": 2.5904887464504115e-06, "loss": 0.6897, "step": 7596 }, { "epoch": 0.77, "grad_norm": 1.6561505477920748, "learning_rate": 2.5882767275696663e-06, "loss": 0.7551, "step": 7597 }, { "epoch": 0.77, "grad_norm": 1.6116171921167224, "learning_rate": 2.5860655131155788e-06, "loss": 0.6677, "step": 7598 }, { "epoch": 0.77, "grad_norm": 1.7015043649888153, "learning_rate": 2.5838551033281366e-06, "loss": 0.7358, "step": 7599 }, { "epoch": 0.77, "grad_norm": 1.6427765139219823, "learning_rate": 2.581645498447247e-06, "loss": 0.7323, "step": 7600 }, { "epoch": 0.77, "grad_norm": 1.6021299536685927, "learning_rate": 2.579436698712732e-06, "loss": 0.7018, "step": 7601 }, { "epoch": 0.77, "grad_norm": 1.544168120396529, "learning_rate": 2.577228704364314e-06, "loss": 0.7018, "step": 7602 }, { "epoch": 0.77, "grad_norm": 1.5393286785916402, "learning_rate": 2.575021515641646e-06, "loss": 0.6846, "step": 7603 }, { "epoch": 0.77, "grad_norm": 1.5298286568053745, "learning_rate": 2.572815132784274e-06, "loss": 0.56, "step": 7604 }, { "epoch": 0.77, "grad_norm": 1.846411894237571, "learning_rate": 2.570609556031672e-06, "loss": 0.7396, "step": 7605 }, { "epoch": 0.77, "grad_norm": 1.7371574415157183, "learning_rate": 2.568404785623224e-06, "loss": 0.7063, "step": 7606 }, { "epoch": 0.77, "grad_norm": 1.6308986377759827, "learning_rate": 2.566200821798216e-06, "loss": 0.6815, "step": 7607 }, { "epoch": 0.77, "grad_norm": 1.6082901733996213, "learning_rate": 2.5639976647958564e-06, "loss": 0.6715, "step": 7608 }, { "epoch": 0.77, "grad_norm": 1.7413064688558009, "learning_rate": 2.5617953148552677e-06, "loss": 0.7327, "step": 7609 }, { "epoch": 0.77, "grad_norm": 1.5715591340768313, "learning_rate": 2.559593772215474e-06, "loss": 0.6851, "step": 7610 }, { "epoch": 0.77, "grad_norm": 1.7550216930518252, "learning_rate": 2.557393037115421e-06, "loss": 0.7452, "step": 7611 }, { "epoch": 0.77, "grad_norm": 1.4456545312409386, "learning_rate": 2.5551931097939676e-06, "loss": 0.5707, "step": 7612 }, { "epoch": 0.77, "grad_norm": 1.5996138048373714, "learning_rate": 2.552993990489876e-06, "loss": 0.7159, "step": 7613 }, { "epoch": 0.77, "grad_norm": 1.526924196735615, "learning_rate": 2.5507956794418275e-06, "loss": 0.6277, "step": 7614 }, { "epoch": 0.77, "grad_norm": 1.6733090546228477, "learning_rate": 2.548598176888419e-06, "loss": 0.7264, "step": 7615 }, { "epoch": 0.77, "grad_norm": 1.5680082178643975, "learning_rate": 2.5464014830681496e-06, "loss": 0.7707, "step": 7616 }, { "epoch": 0.77, "grad_norm": 1.7232490509422664, "learning_rate": 2.544205598219437e-06, "loss": 0.7231, "step": 7617 }, { "epoch": 0.77, "grad_norm": 1.7050713253171732, "learning_rate": 2.5420105225806134e-06, "loss": 0.6982, "step": 7618 }, { "epoch": 0.77, "grad_norm": 1.6466518129271135, "learning_rate": 2.5398162563899154e-06, "loss": 0.7152, "step": 7619 }, { "epoch": 0.78, "grad_norm": 1.640540070141524, "learning_rate": 2.5376227998854987e-06, "loss": 0.6974, "step": 7620 }, { "epoch": 0.78, "grad_norm": 1.6753118861849827, "learning_rate": 2.5354301533054315e-06, "loss": 0.7223, "step": 7621 }, { "epoch": 0.78, "grad_norm": 1.6313335673420037, "learning_rate": 2.5332383168876853e-06, "loss": 0.6825, "step": 7622 }, { "epoch": 0.78, "grad_norm": 1.856265822522839, "learning_rate": 2.5310472908701555e-06, "loss": 0.7676, "step": 7623 }, { "epoch": 0.78, "grad_norm": 1.5648429901605199, "learning_rate": 2.5288570754906374e-06, "loss": 0.7181, "step": 7624 }, { "epoch": 0.78, "grad_norm": 1.5724235897268772, "learning_rate": 2.5266676709868486e-06, "loss": 0.697, "step": 7625 }, { "epoch": 0.78, "grad_norm": 1.6165299290103299, "learning_rate": 2.524479077596418e-06, "loss": 0.7445, "step": 7626 }, { "epoch": 0.78, "grad_norm": 1.7399987056517636, "learning_rate": 2.5222912955568757e-06, "loss": 0.7388, "step": 7627 }, { "epoch": 0.78, "grad_norm": 1.5705171211115758, "learning_rate": 2.5201043251056756e-06, "loss": 0.7054, "step": 7628 }, { "epoch": 0.78, "grad_norm": 1.7790089942542553, "learning_rate": 2.517918166480181e-06, "loss": 0.7001, "step": 7629 }, { "epoch": 0.78, "grad_norm": 1.6494147915260875, "learning_rate": 2.51573281991766e-06, "loss": 0.7247, "step": 7630 }, { "epoch": 0.78, "grad_norm": 1.5286186025147486, "learning_rate": 2.5135482856553027e-06, "loss": 0.6604, "step": 7631 }, { "epoch": 0.78, "grad_norm": 1.7716342052578273, "learning_rate": 2.511364563930202e-06, "loss": 0.7095, "step": 7632 }, { "epoch": 0.78, "grad_norm": 1.5227796689934394, "learning_rate": 2.509181654979368e-06, "loss": 0.6938, "step": 7633 }, { "epoch": 0.78, "grad_norm": 1.7793672610158484, "learning_rate": 2.5069995590397255e-06, "loss": 0.7239, "step": 7634 }, { "epoch": 0.78, "grad_norm": 1.7725492488200256, "learning_rate": 2.504818276348101e-06, "loss": 0.6739, "step": 7635 }, { "epoch": 0.78, "grad_norm": 1.6668210564804644, "learning_rate": 2.5026378071412428e-06, "loss": 0.653, "step": 7636 }, { "epoch": 0.78, "grad_norm": 1.543062332712173, "learning_rate": 2.5004581516558033e-06, "loss": 0.509, "step": 7637 }, { "epoch": 0.78, "grad_norm": 1.767271637712074, "learning_rate": 2.498279310128351e-06, "loss": 0.6499, "step": 7638 }, { "epoch": 0.78, "grad_norm": 1.6216565293780412, "learning_rate": 2.496101282795369e-06, "loss": 0.7518, "step": 7639 }, { "epoch": 0.78, "grad_norm": 1.85997349227327, "learning_rate": 2.493924069893241e-06, "loss": 0.6976, "step": 7640 }, { "epoch": 0.78, "grad_norm": 1.661397393733398, "learning_rate": 2.4917476716582776e-06, "loss": 0.7266, "step": 7641 }, { "epoch": 0.78, "grad_norm": 1.5895203634704989, "learning_rate": 2.4895720883266848e-06, "loss": 0.6325, "step": 7642 }, { "epoch": 0.78, "grad_norm": 1.680372868464412, "learning_rate": 2.4873973201345924e-06, "loss": 0.7073, "step": 7643 }, { "epoch": 0.78, "grad_norm": 1.6119049547484752, "learning_rate": 2.4852233673180394e-06, "loss": 0.5843, "step": 7644 }, { "epoch": 0.78, "grad_norm": 1.6759906839769962, "learning_rate": 2.483050230112969e-06, "loss": 0.7405, "step": 7645 }, { "epoch": 0.78, "grad_norm": 1.8364949521475253, "learning_rate": 2.480877908755245e-06, "loss": 0.72, "step": 7646 }, { "epoch": 0.78, "grad_norm": 1.6927374995881244, "learning_rate": 2.4787064034806397e-06, "loss": 0.779, "step": 7647 }, { "epoch": 0.78, "grad_norm": 1.5775482181187785, "learning_rate": 2.4765357145248327e-06, "loss": 0.6775, "step": 7648 }, { "epoch": 0.78, "grad_norm": 1.9862724855522034, "learning_rate": 2.474365842123422e-06, "loss": 0.7681, "step": 7649 }, { "epoch": 0.78, "grad_norm": 1.5724179992615193, "learning_rate": 2.472196786511909e-06, "loss": 0.6553, "step": 7650 }, { "epoch": 0.78, "grad_norm": 1.711717378165331, "learning_rate": 2.4700285479257125e-06, "loss": 0.6962, "step": 7651 }, { "epoch": 0.78, "grad_norm": 1.6741527308645596, "learning_rate": 2.467861126600165e-06, "loss": 0.6617, "step": 7652 }, { "epoch": 0.78, "grad_norm": 1.7115676529691866, "learning_rate": 2.4656945227705e-06, "loss": 0.7768, "step": 7653 }, { "epoch": 0.78, "grad_norm": 1.575870000219261, "learning_rate": 2.463528736671874e-06, "loss": 0.6564, "step": 7654 }, { "epoch": 0.78, "grad_norm": 1.6279772119924147, "learning_rate": 2.4613637685393433e-06, "loss": 0.704, "step": 7655 }, { "epoch": 0.78, "grad_norm": 1.601742728230388, "learning_rate": 2.459199618607885e-06, "loss": 0.6312, "step": 7656 }, { "epoch": 0.78, "grad_norm": 1.6462873763439139, "learning_rate": 2.4570362871123856e-06, "loss": 0.745, "step": 7657 }, { "epoch": 0.78, "grad_norm": 1.7267682492023038, "learning_rate": 2.454873774287635e-06, "loss": 0.7738, "step": 7658 }, { "epoch": 0.78, "grad_norm": 1.8839223215834573, "learning_rate": 2.452712080368348e-06, "loss": 0.7788, "step": 7659 }, { "epoch": 0.78, "grad_norm": 1.6246102550786494, "learning_rate": 2.4505512055891345e-06, "loss": 0.7169, "step": 7660 }, { "epoch": 0.78, "grad_norm": 1.586426427748253, "learning_rate": 2.448391150184527e-06, "loss": 0.7218, "step": 7661 }, { "epoch": 0.78, "grad_norm": 1.6988982131523829, "learning_rate": 2.44623191438897e-06, "loss": 0.6827, "step": 7662 }, { "epoch": 0.78, "grad_norm": 1.665934565217917, "learning_rate": 2.4440734984368075e-06, "loss": 0.596, "step": 7663 }, { "epoch": 0.78, "grad_norm": 1.603377046407116, "learning_rate": 2.441915902562305e-06, "loss": 0.6256, "step": 7664 }, { "epoch": 0.78, "grad_norm": 1.5480250415462409, "learning_rate": 2.439759126999639e-06, "loss": 0.6626, "step": 7665 }, { "epoch": 0.78, "grad_norm": 1.5877128801469451, "learning_rate": 2.4376031719828885e-06, "loss": 0.5916, "step": 7666 }, { "epoch": 0.78, "grad_norm": 1.631419448675794, "learning_rate": 2.4354480377460544e-06, "loss": 0.665, "step": 7667 }, { "epoch": 0.78, "grad_norm": 1.5382330429891682, "learning_rate": 2.433293724523036e-06, "loss": 0.6861, "step": 7668 }, { "epoch": 0.78, "grad_norm": 1.7312481669876751, "learning_rate": 2.431140232547653e-06, "loss": 0.7801, "step": 7669 }, { "epoch": 0.78, "grad_norm": 1.9421959679511438, "learning_rate": 2.4289875620536375e-06, "loss": 0.7, "step": 7670 }, { "epoch": 0.78, "grad_norm": 1.691814528916687, "learning_rate": 2.426835713274622e-06, "loss": 0.6839, "step": 7671 }, { "epoch": 0.78, "grad_norm": 1.5587877855016785, "learning_rate": 2.424684686444162e-06, "loss": 0.6143, "step": 7672 }, { "epoch": 0.78, "grad_norm": 1.6222870119900274, "learning_rate": 2.422534481795711e-06, "loss": 0.6179, "step": 7673 }, { "epoch": 0.78, "grad_norm": 1.6791909756698284, "learning_rate": 2.420385099562644e-06, "loss": 0.6535, "step": 7674 }, { "epoch": 0.78, "grad_norm": 1.6326619615601652, "learning_rate": 2.4182365399782448e-06, "loss": 0.7029, "step": 7675 }, { "epoch": 0.78, "grad_norm": 1.5625829982073207, "learning_rate": 2.4160888032757014e-06, "loss": 0.7023, "step": 7676 }, { "epoch": 0.78, "grad_norm": 1.5667168902829505, "learning_rate": 2.413941889688123e-06, "loss": 0.7171, "step": 7677 }, { "epoch": 0.78, "grad_norm": 1.6014556397474373, "learning_rate": 2.4117957994485154e-06, "loss": 0.692, "step": 7678 }, { "epoch": 0.78, "grad_norm": 1.8147424714482825, "learning_rate": 2.409650532789808e-06, "loss": 0.7188, "step": 7679 }, { "epoch": 0.78, "grad_norm": 1.645615433366562, "learning_rate": 2.4075060899448388e-06, "loss": 0.6687, "step": 7680 }, { "epoch": 0.78, "grad_norm": 1.6718486820675713, "learning_rate": 2.4053624711463476e-06, "loss": 0.6324, "step": 7681 }, { "epoch": 0.78, "grad_norm": 1.8500146601648426, "learning_rate": 2.4032196766269924e-06, "loss": 0.7548, "step": 7682 }, { "epoch": 0.78, "grad_norm": 1.580356225872168, "learning_rate": 2.4010777066193437e-06, "loss": 0.7034, "step": 7683 }, { "epoch": 0.78, "grad_norm": 1.5901655451727288, "learning_rate": 2.3989365613558745e-06, "loss": 0.8001, "step": 7684 }, { "epoch": 0.78, "grad_norm": 1.6629579059500412, "learning_rate": 2.396796241068976e-06, "loss": 0.696, "step": 7685 }, { "epoch": 0.78, "grad_norm": 1.822392458462985, "learning_rate": 2.394656745990942e-06, "loss": 0.7652, "step": 7686 }, { "epoch": 0.78, "grad_norm": 1.62098466809815, "learning_rate": 2.3925180763539847e-06, "loss": 0.6656, "step": 7687 }, { "epoch": 0.78, "grad_norm": 1.4926140404062171, "learning_rate": 2.3903802323902246e-06, "loss": 0.6894, "step": 7688 }, { "epoch": 0.78, "grad_norm": 1.5855805740664932, "learning_rate": 2.388243214331686e-06, "loss": 0.6984, "step": 7689 }, { "epoch": 0.78, "grad_norm": 1.6375661414055525, "learning_rate": 2.3861070224103156e-06, "loss": 0.5445, "step": 7690 }, { "epoch": 0.78, "grad_norm": 1.593248155857221, "learning_rate": 2.3839716568579563e-06, "loss": 0.6813, "step": 7691 }, { "epoch": 0.78, "grad_norm": 1.6421393986690094, "learning_rate": 2.3818371179063728e-06, "loss": 0.6451, "step": 7692 }, { "epoch": 0.78, "grad_norm": 1.7259233059031243, "learning_rate": 2.379703405787238e-06, "loss": 0.6793, "step": 7693 }, { "epoch": 0.78, "grad_norm": 1.7278740003752728, "learning_rate": 2.3775705207321275e-06, "loss": 0.6303, "step": 7694 }, { "epoch": 0.78, "grad_norm": 1.6994493656161245, "learning_rate": 2.3754384629725392e-06, "loss": 0.7465, "step": 7695 }, { "epoch": 0.78, "grad_norm": 1.3677942954050732, "learning_rate": 2.373307232739868e-06, "loss": 0.6011, "step": 7696 }, { "epoch": 0.78, "grad_norm": 1.5774428491429453, "learning_rate": 2.371176830265427e-06, "loss": 0.6296, "step": 7697 }, { "epoch": 0.78, "grad_norm": 1.6453817635216235, "learning_rate": 2.3690472557804436e-06, "loss": 0.6988, "step": 7698 }, { "epoch": 0.78, "grad_norm": 1.4742886891745193, "learning_rate": 2.366918509516043e-06, "loss": 0.676, "step": 7699 }, { "epoch": 0.78, "grad_norm": 1.7364301584235993, "learning_rate": 2.36479059170327e-06, "loss": 0.6986, "step": 7700 }, { "epoch": 0.78, "grad_norm": 1.6897643385036853, "learning_rate": 2.3626635025730803e-06, "loss": 0.7172, "step": 7701 }, { "epoch": 0.78, "grad_norm": 1.7457534652300224, "learning_rate": 2.360537242356329e-06, "loss": 0.6111, "step": 7702 }, { "epoch": 0.78, "grad_norm": 1.7300879418703208, "learning_rate": 2.358411811283796e-06, "loss": 0.8058, "step": 7703 }, { "epoch": 0.78, "grad_norm": 1.6630483822750175, "learning_rate": 2.356287209586157e-06, "loss": 0.6579, "step": 7704 }, { "epoch": 0.78, "grad_norm": 1.7146054640600066, "learning_rate": 2.354163437494006e-06, "loss": 0.6689, "step": 7705 }, { "epoch": 0.78, "grad_norm": 1.7399670370764073, "learning_rate": 2.3520404952378495e-06, "loss": 0.7783, "step": 7706 }, { "epoch": 0.78, "grad_norm": 1.82897046288453, "learning_rate": 2.349918383048094e-06, "loss": 0.6876, "step": 7707 }, { "epoch": 0.78, "grad_norm": 1.55184455103855, "learning_rate": 2.347797101155067e-06, "loss": 0.635, "step": 7708 }, { "epoch": 0.78, "grad_norm": 1.6004898009171504, "learning_rate": 2.345676649788995e-06, "loss": 0.6636, "step": 7709 }, { "epoch": 0.78, "grad_norm": 1.6645234770267519, "learning_rate": 2.3435570291800214e-06, "loss": 0.6275, "step": 7710 }, { "epoch": 0.78, "grad_norm": 1.6193108195328993, "learning_rate": 2.3414382395582024e-06, "loss": 0.6788, "step": 7711 }, { "epoch": 0.78, "grad_norm": 1.6830045676431642, "learning_rate": 2.339320281153494e-06, "loss": 0.6558, "step": 7712 }, { "epoch": 0.78, "grad_norm": 1.5169238635880982, "learning_rate": 2.3372031541957684e-06, "loss": 0.6622, "step": 7713 }, { "epoch": 0.78, "grad_norm": 1.6380575150031764, "learning_rate": 2.335086858914811e-06, "loss": 0.7452, "step": 7714 }, { "epoch": 0.78, "grad_norm": 1.834230091319366, "learning_rate": 2.3329713955403067e-06, "loss": 0.6683, "step": 7715 }, { "epoch": 0.78, "grad_norm": 1.585914593940905, "learning_rate": 2.3308567643018574e-06, "loss": 0.6325, "step": 7716 }, { "epoch": 0.78, "grad_norm": 1.5950944991975575, "learning_rate": 2.3287429654289774e-06, "loss": 0.7191, "step": 7717 }, { "epoch": 0.79, "grad_norm": 1.7535720455581632, "learning_rate": 2.326629999151081e-06, "loss": 0.6727, "step": 7718 }, { "epoch": 0.79, "grad_norm": 1.650372784385992, "learning_rate": 2.324517865697501e-06, "loss": 0.651, "step": 7719 }, { "epoch": 0.79, "grad_norm": 1.6714604758276976, "learning_rate": 2.322406565297477e-06, "loss": 0.6351, "step": 7720 }, { "epoch": 0.79, "grad_norm": 1.8328149534027336, "learning_rate": 2.320296098180154e-06, "loss": 0.6832, "step": 7721 }, { "epoch": 0.79, "grad_norm": 1.6831044613699842, "learning_rate": 2.318186464574592e-06, "loss": 0.6869, "step": 7722 }, { "epoch": 0.79, "grad_norm": 1.7370014886232752, "learning_rate": 2.316077664709763e-06, "loss": 0.7249, "step": 7723 }, { "epoch": 0.79, "grad_norm": 1.5167056453499903, "learning_rate": 2.3139696988145367e-06, "loss": 0.6399, "step": 7724 }, { "epoch": 0.79, "grad_norm": 1.7606882595223425, "learning_rate": 2.311862567117703e-06, "loss": 0.6867, "step": 7725 }, { "epoch": 0.79, "grad_norm": 1.7430394028378702, "learning_rate": 2.3097562698479625e-06, "loss": 0.722, "step": 7726 }, { "epoch": 0.79, "grad_norm": 1.8257985091538622, "learning_rate": 2.307650807233913e-06, "loss": 0.735, "step": 7727 }, { "epoch": 0.79, "grad_norm": 1.370233037411586, "learning_rate": 2.3055461795040745e-06, "loss": 0.6408, "step": 7728 }, { "epoch": 0.79, "grad_norm": 1.9713638047994393, "learning_rate": 2.3034423868868728e-06, "loss": 0.7296, "step": 7729 }, { "epoch": 0.79, "grad_norm": 1.5408962439710896, "learning_rate": 2.301339429610637e-06, "loss": 0.6946, "step": 7730 }, { "epoch": 0.79, "grad_norm": 1.7594833023077556, "learning_rate": 2.2992373079036146e-06, "loss": 0.7895, "step": 7731 }, { "epoch": 0.79, "grad_norm": 1.8144959438221029, "learning_rate": 2.2971360219939543e-06, "loss": 0.7169, "step": 7732 }, { "epoch": 0.79, "grad_norm": 1.6810027650585382, "learning_rate": 2.295035572109718e-06, "loss": 0.5644, "step": 7733 }, { "epoch": 0.79, "grad_norm": 1.7103028604041395, "learning_rate": 2.2929359584788814e-06, "loss": 0.74, "step": 7734 }, { "epoch": 0.79, "grad_norm": 1.66925295250059, "learning_rate": 2.2908371813293195e-06, "loss": 0.6676, "step": 7735 }, { "epoch": 0.79, "grad_norm": 1.7461573353125468, "learning_rate": 2.2887392408888233e-06, "loss": 0.8088, "step": 7736 }, { "epoch": 0.79, "grad_norm": 1.6376307701564305, "learning_rate": 2.2866421373850943e-06, "loss": 0.7629, "step": 7737 }, { "epoch": 0.79, "grad_norm": 1.536706107384853, "learning_rate": 2.284545871045736e-06, "loss": 0.669, "step": 7738 }, { "epoch": 0.79, "grad_norm": 1.5271382033522722, "learning_rate": 2.2824504420982683e-06, "loss": 0.5886, "step": 7739 }, { "epoch": 0.79, "grad_norm": 1.7006083419959541, "learning_rate": 2.280355850770115e-06, "loss": 0.7803, "step": 7740 }, { "epoch": 0.79, "grad_norm": 1.5722117495050347, "learning_rate": 2.2782620972886116e-06, "loss": 0.6991, "step": 7741 }, { "epoch": 0.79, "grad_norm": 1.4054192798777325, "learning_rate": 2.276169181881006e-06, "loss": 0.6412, "step": 7742 }, { "epoch": 0.79, "grad_norm": 1.6087736248439393, "learning_rate": 2.274077104774446e-06, "loss": 0.6813, "step": 7743 }, { "epoch": 0.79, "grad_norm": 1.7157752209095043, "learning_rate": 2.2719858661959994e-06, "loss": 0.7376, "step": 7744 }, { "epoch": 0.79, "grad_norm": 1.514585889933833, "learning_rate": 2.26989546637263e-06, "loss": 0.6464, "step": 7745 }, { "epoch": 0.79, "grad_norm": 1.4337615493436142, "learning_rate": 2.267805905531224e-06, "loss": 0.6228, "step": 7746 }, { "epoch": 0.79, "grad_norm": 1.6849361457896674, "learning_rate": 2.2657171838985714e-06, "loss": 0.722, "step": 7747 }, { "epoch": 0.79, "grad_norm": 1.6944312615856458, "learning_rate": 2.2636293017013646e-06, "loss": 0.7142, "step": 7748 }, { "epoch": 0.79, "grad_norm": 1.879071950573806, "learning_rate": 2.2615422591662175e-06, "loss": 0.7979, "step": 7749 }, { "epoch": 0.79, "grad_norm": 1.614926120698719, "learning_rate": 2.259456056519639e-06, "loss": 0.6204, "step": 7750 }, { "epoch": 0.79, "grad_norm": 1.8507881781454831, "learning_rate": 2.257370693988056e-06, "loss": 0.7752, "step": 7751 }, { "epoch": 0.79, "grad_norm": 1.6303593430397691, "learning_rate": 2.255286171797807e-06, "loss": 0.6652, "step": 7752 }, { "epoch": 0.79, "grad_norm": 1.7243678063678334, "learning_rate": 2.2532024901751273e-06, "loss": 0.7451, "step": 7753 }, { "epoch": 0.79, "grad_norm": 1.6780366300873248, "learning_rate": 2.251119649346173e-06, "loss": 0.676, "step": 7754 }, { "epoch": 0.79, "grad_norm": 1.7377231196349392, "learning_rate": 2.249037649536999e-06, "loss": 0.7033, "step": 7755 }, { "epoch": 0.79, "grad_norm": 1.4944129500941656, "learning_rate": 2.2469564909735773e-06, "loss": 0.6111, "step": 7756 }, { "epoch": 0.79, "grad_norm": 1.6804228090544804, "learning_rate": 2.244876173881787e-06, "loss": 0.7282, "step": 7757 }, { "epoch": 0.79, "grad_norm": 1.5289001691878608, "learning_rate": 2.2427966984874095e-06, "loss": 0.669, "step": 7758 }, { "epoch": 0.79, "grad_norm": 1.6648677967922552, "learning_rate": 2.240718065016141e-06, "loss": 0.7155, "step": 7759 }, { "epoch": 0.79, "grad_norm": 1.5616347105803132, "learning_rate": 2.2386402736935865e-06, "loss": 0.6067, "step": 7760 }, { "epoch": 0.79, "grad_norm": 1.6999639438907512, "learning_rate": 2.2365633247452546e-06, "loss": 0.645, "step": 7761 }, { "epoch": 0.79, "grad_norm": 1.7686905266747723, "learning_rate": 2.2344872183965694e-06, "loss": 0.6266, "step": 7762 }, { "epoch": 0.79, "grad_norm": 1.551309634097524, "learning_rate": 2.232411954872855e-06, "loss": 0.6743, "step": 7763 }, { "epoch": 0.79, "grad_norm": 1.683891809634969, "learning_rate": 2.2303375343993515e-06, "loss": 0.7688, "step": 7764 }, { "epoch": 0.79, "grad_norm": 1.585579032545016, "learning_rate": 2.2282639572012075e-06, "loss": 0.6695, "step": 7765 }, { "epoch": 0.79, "grad_norm": 1.6017477879438695, "learning_rate": 2.226191223503472e-06, "loss": 0.718, "step": 7766 }, { "epoch": 0.79, "grad_norm": 1.5631122799217767, "learning_rate": 2.2241193335311127e-06, "loss": 0.6414, "step": 7767 }, { "epoch": 0.79, "grad_norm": 1.6802047965642206, "learning_rate": 2.2220482875089965e-06, "loss": 0.629, "step": 7768 }, { "epoch": 0.79, "grad_norm": 1.7959499571883017, "learning_rate": 2.2199780856619045e-06, "loss": 0.7789, "step": 7769 }, { "epoch": 0.79, "grad_norm": 1.700554129412106, "learning_rate": 2.2179087282145283e-06, "loss": 0.7761, "step": 7770 }, { "epoch": 0.79, "grad_norm": 1.7018819641783725, "learning_rate": 2.2158402153914583e-06, "loss": 0.7092, "step": 7771 }, { "epoch": 0.79, "grad_norm": 2.092281198826054, "learning_rate": 2.2137725474172056e-06, "loss": 0.7933, "step": 7772 }, { "epoch": 0.79, "grad_norm": 1.6815014348058988, "learning_rate": 2.2117057245161767e-06, "loss": 0.669, "step": 7773 }, { "epoch": 0.79, "grad_norm": 1.7578458090852305, "learning_rate": 2.209639746912696e-06, "loss": 0.7681, "step": 7774 }, { "epoch": 0.79, "grad_norm": 1.6446787326998182, "learning_rate": 2.2075746148309964e-06, "loss": 0.6735, "step": 7775 }, { "epoch": 0.79, "grad_norm": 1.7808940087861367, "learning_rate": 2.2055103284952094e-06, "loss": 0.7404, "step": 7776 }, { "epoch": 0.79, "grad_norm": 1.5939379532221074, "learning_rate": 2.2034468881293845e-06, "loss": 0.6936, "step": 7777 }, { "epoch": 0.79, "grad_norm": 1.6748246487282306, "learning_rate": 2.2013842939574783e-06, "loss": 0.6473, "step": 7778 }, { "epoch": 0.79, "grad_norm": 1.6786313202255883, "learning_rate": 2.1993225462033465e-06, "loss": 0.6747, "step": 7779 }, { "epoch": 0.79, "grad_norm": 1.865528066751501, "learning_rate": 2.197261645090767e-06, "loss": 0.7479, "step": 7780 }, { "epoch": 0.79, "grad_norm": 1.5587845597683763, "learning_rate": 2.195201590843412e-06, "loss": 0.5715, "step": 7781 }, { "epoch": 0.79, "grad_norm": 1.6196625061757, "learning_rate": 2.1931423836848697e-06, "loss": 0.6109, "step": 7782 }, { "epoch": 0.79, "grad_norm": 1.7556785120047103, "learning_rate": 2.19108402383864e-06, "loss": 0.6765, "step": 7783 }, { "epoch": 0.79, "grad_norm": 1.7597770933828298, "learning_rate": 2.1890265115281185e-06, "loss": 0.7867, "step": 7784 }, { "epoch": 0.79, "grad_norm": 1.7962624226990016, "learning_rate": 2.186969846976623e-06, "loss": 0.6741, "step": 7785 }, { "epoch": 0.79, "grad_norm": 1.5725184446544542, "learning_rate": 2.1849140304073647e-06, "loss": 0.7164, "step": 7786 }, { "epoch": 0.79, "grad_norm": 1.6603822362173248, "learning_rate": 2.1828590620434742e-06, "loss": 0.7547, "step": 7787 }, { "epoch": 0.79, "grad_norm": 1.8415056371952363, "learning_rate": 2.180804942107989e-06, "loss": 0.6835, "step": 7788 }, { "epoch": 0.79, "grad_norm": 1.716076411318612, "learning_rate": 2.1787516708238454e-06, "loss": 0.7467, "step": 7789 }, { "epoch": 0.79, "grad_norm": 1.6953951498862214, "learning_rate": 2.1766992484139015e-06, "loss": 0.778, "step": 7790 }, { "epoch": 0.79, "grad_norm": 1.5108432572094461, "learning_rate": 2.174647675100907e-06, "loss": 0.7437, "step": 7791 }, { "epoch": 0.79, "grad_norm": 1.659610127336146, "learning_rate": 2.172596951107534e-06, "loss": 0.6632, "step": 7792 }, { "epoch": 0.79, "grad_norm": 1.6463493242617568, "learning_rate": 2.1705470766563573e-06, "loss": 0.7365, "step": 7793 }, { "epoch": 0.79, "grad_norm": 1.6967353791802484, "learning_rate": 2.168498051969854e-06, "loss": 0.6863, "step": 7794 }, { "epoch": 0.79, "grad_norm": 1.7089615810906633, "learning_rate": 2.166449877270416e-06, "loss": 0.6617, "step": 7795 }, { "epoch": 0.79, "grad_norm": 1.6528730277901496, "learning_rate": 2.1644025527803426e-06, "loss": 0.5896, "step": 7796 }, { "epoch": 0.79, "grad_norm": 1.7234934710601961, "learning_rate": 2.1623560787218355e-06, "loss": 0.6047, "step": 7797 }, { "epoch": 0.79, "grad_norm": 1.7342875782041085, "learning_rate": 2.1603104553170108e-06, "loss": 0.6267, "step": 7798 }, { "epoch": 0.79, "grad_norm": 1.806632183962203, "learning_rate": 2.1582656827878846e-06, "loss": 0.7972, "step": 7799 }, { "epoch": 0.79, "grad_norm": 1.5230796084491267, "learning_rate": 2.1562217613563885e-06, "loss": 0.6782, "step": 7800 }, { "epoch": 0.79, "grad_norm": 1.5813890671558295, "learning_rate": 2.1541786912443596e-06, "loss": 0.6945, "step": 7801 }, { "epoch": 0.79, "grad_norm": 1.9117029170190478, "learning_rate": 2.152136472673535e-06, "loss": 0.656, "step": 7802 }, { "epoch": 0.79, "grad_norm": 1.7405050503451025, "learning_rate": 2.1500951058655727e-06, "loss": 0.7311, "step": 7803 }, { "epoch": 0.79, "grad_norm": 1.8246200827578696, "learning_rate": 2.148054591042026e-06, "loss": 0.7639, "step": 7804 }, { "epoch": 0.79, "grad_norm": 1.5414344095577803, "learning_rate": 2.1460149284243625e-06, "loss": 0.6125, "step": 7805 }, { "epoch": 0.79, "grad_norm": 1.7695724635486085, "learning_rate": 2.143976118233958e-06, "loss": 0.7938, "step": 7806 }, { "epoch": 0.79, "grad_norm": 1.6712057945183567, "learning_rate": 2.1419381606920887e-06, "loss": 0.6595, "step": 7807 }, { "epoch": 0.79, "grad_norm": 1.7053677147235984, "learning_rate": 2.1399010560199494e-06, "loss": 0.749, "step": 7808 }, { "epoch": 0.79, "grad_norm": 1.588553082819756, "learning_rate": 2.1378648044386285e-06, "loss": 0.6854, "step": 7809 }, { "epoch": 0.79, "grad_norm": 1.6490243239056168, "learning_rate": 2.135829406169133e-06, "loss": 0.7192, "step": 7810 }, { "epoch": 0.79, "grad_norm": 1.829693884474, "learning_rate": 2.133794861432378e-06, "loss": 0.7406, "step": 7811 }, { "epoch": 0.79, "grad_norm": 1.7410750894386053, "learning_rate": 2.1317611704491727e-06, "loss": 0.6912, "step": 7812 }, { "epoch": 0.79, "grad_norm": 1.6367704761381865, "learning_rate": 2.1297283334402474e-06, "loss": 0.7254, "step": 7813 }, { "epoch": 0.79, "grad_norm": 1.62040426586708, "learning_rate": 2.127696350626236e-06, "loss": 0.7247, "step": 7814 }, { "epoch": 0.79, "grad_norm": 1.6298201574818223, "learning_rate": 2.1256652222276753e-06, "loss": 0.6358, "step": 7815 }, { "epoch": 0.8, "grad_norm": 1.701264821377545, "learning_rate": 2.1236349484650164e-06, "loss": 0.7002, "step": 7816 }, { "epoch": 0.8, "grad_norm": 1.687077714298927, "learning_rate": 2.121605529558608e-06, "loss": 0.6635, "step": 7817 }, { "epoch": 0.8, "grad_norm": 1.522822106403037, "learning_rate": 2.1195769657287145e-06, "loss": 0.5933, "step": 7818 }, { "epoch": 0.8, "grad_norm": 1.8707382783379618, "learning_rate": 2.117549257195509e-06, "loss": 0.7004, "step": 7819 }, { "epoch": 0.8, "grad_norm": 1.7169068537145045, "learning_rate": 2.1155224041790614e-06, "loss": 0.6212, "step": 7820 }, { "epoch": 0.8, "grad_norm": 1.7913526157090918, "learning_rate": 2.1134964068993568e-06, "loss": 0.6165, "step": 7821 }, { "epoch": 0.8, "grad_norm": 1.68792325925831, "learning_rate": 2.1114712655762893e-06, "loss": 0.7724, "step": 7822 }, { "epoch": 0.8, "grad_norm": 1.6619920318546355, "learning_rate": 2.10944698042965e-06, "loss": 0.7583, "step": 7823 }, { "epoch": 0.8, "grad_norm": 1.5725040329920634, "learning_rate": 2.1074235516791475e-06, "loss": 0.6454, "step": 7824 }, { "epoch": 0.8, "grad_norm": 1.776077694659041, "learning_rate": 2.105400979544394e-06, "loss": 0.6861, "step": 7825 }, { "epoch": 0.8, "grad_norm": 1.6810506472695297, "learning_rate": 2.1033792642449037e-06, "loss": 0.7583, "step": 7826 }, { "epoch": 0.8, "grad_norm": 1.6039372665750604, "learning_rate": 2.101358406000106e-06, "loss": 0.6526, "step": 7827 }, { "epoch": 0.8, "grad_norm": 1.5976535524612527, "learning_rate": 2.0993384050293343e-06, "loss": 0.714, "step": 7828 }, { "epoch": 0.8, "grad_norm": 1.8511945629783728, "learning_rate": 2.0973192615518234e-06, "loss": 0.6959, "step": 7829 }, { "epoch": 0.8, "grad_norm": 1.709399661423479, "learning_rate": 2.095300975786723e-06, "loss": 0.6782, "step": 7830 }, { "epoch": 0.8, "grad_norm": 1.7332709455559026, "learning_rate": 2.093283547953088e-06, "loss": 0.6648, "step": 7831 }, { "epoch": 0.8, "grad_norm": 1.6192736018667757, "learning_rate": 2.0912669782698737e-06, "loss": 0.6277, "step": 7832 }, { "epoch": 0.8, "grad_norm": 1.6947247635348386, "learning_rate": 2.0892512669559505e-06, "loss": 0.7146, "step": 7833 }, { "epoch": 0.8, "grad_norm": 1.654417397999407, "learning_rate": 2.087236414230096e-06, "loss": 0.7693, "step": 7834 }, { "epoch": 0.8, "grad_norm": 1.6487583791677305, "learning_rate": 2.0852224203109826e-06, "loss": 0.6617, "step": 7835 }, { "epoch": 0.8, "grad_norm": 1.556761764582044, "learning_rate": 2.083209285417206e-06, "loss": 0.7113, "step": 7836 }, { "epoch": 0.8, "grad_norm": 1.7912806691152143, "learning_rate": 2.0811970097672527e-06, "loss": 0.6975, "step": 7837 }, { "epoch": 0.8, "grad_norm": 1.717649994233873, "learning_rate": 2.079185593579529e-06, "loss": 0.7063, "step": 7838 }, { "epoch": 0.8, "grad_norm": 1.5437100663665961, "learning_rate": 2.0771750370723444e-06, "loss": 0.6612, "step": 7839 }, { "epoch": 0.8, "grad_norm": 1.7449689894327503, "learning_rate": 2.0751653404639073e-06, "loss": 0.6719, "step": 7840 }, { "epoch": 0.8, "grad_norm": 1.5018868498932967, "learning_rate": 2.073156503972341e-06, "loss": 0.6652, "step": 7841 }, { "epoch": 0.8, "grad_norm": 1.636787781130712, "learning_rate": 2.0711485278156774e-06, "loss": 0.6727, "step": 7842 }, { "epoch": 0.8, "grad_norm": 1.6536033634879708, "learning_rate": 2.069141412211846e-06, "loss": 0.7434, "step": 7843 }, { "epoch": 0.8, "grad_norm": 1.6512144230931152, "learning_rate": 2.067135157378691e-06, "loss": 0.7674, "step": 7844 }, { "epoch": 0.8, "grad_norm": 1.5484002894263678, "learning_rate": 2.065129763533956e-06, "loss": 0.7699, "step": 7845 }, { "epoch": 0.8, "grad_norm": 1.5211767111624397, "learning_rate": 2.0631252308952986e-06, "loss": 0.6553, "step": 7846 }, { "epoch": 0.8, "grad_norm": 1.6673412897745399, "learning_rate": 2.06112155968028e-06, "loss": 0.6335, "step": 7847 }, { "epoch": 0.8, "grad_norm": 1.6113060699168706, "learning_rate": 2.059118750106365e-06, "loss": 0.6341, "step": 7848 }, { "epoch": 0.8, "grad_norm": 1.6460704313269863, "learning_rate": 2.0571168023909272e-06, "loss": 0.6317, "step": 7849 }, { "epoch": 0.8, "grad_norm": 1.7483784351965763, "learning_rate": 2.0551157167512503e-06, "loss": 0.7829, "step": 7850 }, { "epoch": 0.8, "grad_norm": 1.8651438770398827, "learning_rate": 2.053115493404515e-06, "loss": 0.7116, "step": 7851 }, { "epoch": 0.8, "grad_norm": 1.7193660661457268, "learning_rate": 2.0511161325678206e-06, "loss": 0.6865, "step": 7852 }, { "epoch": 0.8, "grad_norm": 1.622650547540691, "learning_rate": 2.0491176344581608e-06, "loss": 0.6429, "step": 7853 }, { "epoch": 0.8, "grad_norm": 1.524886595091858, "learning_rate": 2.047119999292444e-06, "loss": 0.6307, "step": 7854 }, { "epoch": 0.8, "grad_norm": 1.6940262251806701, "learning_rate": 2.0451232272874845e-06, "loss": 0.6906, "step": 7855 }, { "epoch": 0.8, "grad_norm": 1.5474554934700888, "learning_rate": 2.0431273186599964e-06, "loss": 0.5805, "step": 7856 }, { "epoch": 0.8, "grad_norm": 1.5127942854920668, "learning_rate": 2.041132273626608e-06, "loss": 0.6684, "step": 7857 }, { "epoch": 0.8, "grad_norm": 1.5997306154029722, "learning_rate": 2.039138092403846e-06, "loss": 0.6363, "step": 7858 }, { "epoch": 0.8, "grad_norm": 1.875216592490199, "learning_rate": 2.037144775208151e-06, "loss": 0.5804, "step": 7859 }, { "epoch": 0.8, "grad_norm": 1.6753429884560422, "learning_rate": 2.035152322255868e-06, "loss": 0.6385, "step": 7860 }, { "epoch": 0.8, "grad_norm": 1.7548658352691162, "learning_rate": 2.0331607337632407e-06, "loss": 0.6925, "step": 7861 }, { "epoch": 0.8, "grad_norm": 1.5398454208128212, "learning_rate": 2.0311700099464315e-06, "loss": 0.6483, "step": 7862 }, { "epoch": 0.8, "grad_norm": 1.6031866344789765, "learning_rate": 2.029180151021496e-06, "loss": 0.6919, "step": 7863 }, { "epoch": 0.8, "grad_norm": 1.5614753142650077, "learning_rate": 2.027191157204406e-06, "loss": 0.7527, "step": 7864 }, { "epoch": 0.8, "grad_norm": 1.835885954488514, "learning_rate": 2.025203028711038e-06, "loss": 0.7981, "step": 7865 }, { "epoch": 0.8, "grad_norm": 1.6885653035275787, "learning_rate": 2.023215765757166e-06, "loss": 0.7251, "step": 7866 }, { "epoch": 0.8, "grad_norm": 1.8891035652301391, "learning_rate": 2.0212293685584794e-06, "loss": 0.7153, "step": 7867 }, { "epoch": 0.8, "grad_norm": 1.974342714359956, "learning_rate": 2.0192438373305747e-06, "loss": 0.7538, "step": 7868 }, { "epoch": 0.8, "grad_norm": 1.7252816726702502, "learning_rate": 2.0172591722889423e-06, "loss": 0.7309, "step": 7869 }, { "epoch": 0.8, "grad_norm": 1.791791144501412, "learning_rate": 2.015275373648994e-06, "loss": 0.7317, "step": 7870 }, { "epoch": 0.8, "grad_norm": 1.853562934767157, "learning_rate": 2.0132924416260347e-06, "loss": 0.8221, "step": 7871 }, { "epoch": 0.8, "grad_norm": 1.7272393998078934, "learning_rate": 2.0113103764352814e-06, "loss": 0.7894, "step": 7872 }, { "epoch": 0.8, "grad_norm": 1.6633087363346242, "learning_rate": 2.009329178291861e-06, "loss": 0.768, "step": 7873 }, { "epoch": 0.8, "grad_norm": 1.9128303825762558, "learning_rate": 2.0073488474107962e-06, "loss": 0.6392, "step": 7874 }, { "epoch": 0.8, "grad_norm": 1.7616167689658218, "learning_rate": 2.0053693840070242e-06, "loss": 0.6602, "step": 7875 }, { "epoch": 0.8, "grad_norm": 1.699303749633239, "learning_rate": 2.0033907882953818e-06, "loss": 0.7439, "step": 7876 }, { "epoch": 0.8, "grad_norm": 1.6070346725208509, "learning_rate": 2.0014130604906167e-06, "loss": 0.7094, "step": 7877 }, { "epoch": 0.8, "grad_norm": 1.741448073560214, "learning_rate": 1.999436200807382e-06, "loss": 0.7499, "step": 7878 }, { "epoch": 0.8, "grad_norm": 1.6944355963588895, "learning_rate": 1.99746020946023e-06, "loss": 0.7529, "step": 7879 }, { "epoch": 0.8, "grad_norm": 1.8895579187524152, "learning_rate": 1.99548508666363e-06, "loss": 0.7048, "step": 7880 }, { "epoch": 0.8, "grad_norm": 1.9920721222550972, "learning_rate": 1.993510832631944e-06, "loss": 0.7369, "step": 7881 }, { "epoch": 0.8, "grad_norm": 1.5922170424114257, "learning_rate": 1.9915374475794492e-06, "loss": 0.6501, "step": 7882 }, { "epoch": 0.8, "grad_norm": 1.5634638693807896, "learning_rate": 1.989564931720329e-06, "loss": 0.6196, "step": 7883 }, { "epoch": 0.8, "grad_norm": 1.5635383047081672, "learning_rate": 1.987593285268664e-06, "loss": 0.7479, "step": 7884 }, { "epoch": 0.8, "grad_norm": 1.4883323785564222, "learning_rate": 1.9856225084384484e-06, "loss": 0.7076, "step": 7885 }, { "epoch": 0.8, "grad_norm": 1.6282107550188445, "learning_rate": 1.983652601443581e-06, "loss": 0.6514, "step": 7886 }, { "epoch": 0.8, "grad_norm": 1.527130392726828, "learning_rate": 1.981683564497858e-06, "loss": 0.683, "step": 7887 }, { "epoch": 0.8, "grad_norm": 1.5838502653009983, "learning_rate": 1.979715397814996e-06, "loss": 0.737, "step": 7888 }, { "epoch": 0.8, "grad_norm": 1.6495537665659166, "learning_rate": 1.977748101608601e-06, "loss": 0.6973, "step": 7889 }, { "epoch": 0.8, "grad_norm": 1.7194286106142869, "learning_rate": 1.9757816760921955e-06, "loss": 0.7559, "step": 7890 }, { "epoch": 0.8, "grad_norm": 1.5641719812122743, "learning_rate": 1.973816121479207e-06, "loss": 0.6447, "step": 7891 }, { "epoch": 0.8, "grad_norm": 1.5147137650724953, "learning_rate": 1.971851437982961e-06, "loss": 0.5577, "step": 7892 }, { "epoch": 0.8, "grad_norm": 1.7297562578736958, "learning_rate": 1.9698876258166968e-06, "loss": 0.698, "step": 7893 }, { "epoch": 0.8, "grad_norm": 1.872273742313337, "learning_rate": 1.967924685193552e-06, "loss": 0.7417, "step": 7894 }, { "epoch": 0.8, "grad_norm": 1.5730148472491021, "learning_rate": 1.965962616326574e-06, "loss": 0.6882, "step": 7895 }, { "epoch": 0.8, "grad_norm": 1.7367694504986644, "learning_rate": 1.9640014194287206e-06, "loss": 0.7052, "step": 7896 }, { "epoch": 0.8, "grad_norm": 1.794095966158344, "learning_rate": 1.96204109471284e-06, "loss": 0.6673, "step": 7897 }, { "epoch": 0.8, "grad_norm": 1.6628464531180407, "learning_rate": 1.9600816423917013e-06, "loss": 0.7936, "step": 7898 }, { "epoch": 0.8, "grad_norm": 1.7052997907461107, "learning_rate": 1.958123062677968e-06, "loss": 0.7681, "step": 7899 }, { "epoch": 0.8, "grad_norm": 1.6754284096878078, "learning_rate": 1.9561653557842142e-06, "loss": 0.7549, "step": 7900 }, { "epoch": 0.8, "grad_norm": 1.5443046736290091, "learning_rate": 1.9542085219229235e-06, "loss": 0.682, "step": 7901 }, { "epoch": 0.8, "grad_norm": 1.7456115388925508, "learning_rate": 1.9522525613064723e-06, "loss": 0.6539, "step": 7902 }, { "epoch": 0.8, "grad_norm": 1.7079663646484742, "learning_rate": 1.950297474147156e-06, "loss": 0.6013, "step": 7903 }, { "epoch": 0.8, "grad_norm": 1.6242157401336126, "learning_rate": 1.9483432606571627e-06, "loss": 0.6876, "step": 7904 }, { "epoch": 0.8, "grad_norm": 1.6509617781874775, "learning_rate": 1.946389921048595e-06, "loss": 0.6657, "step": 7905 }, { "epoch": 0.8, "grad_norm": 1.5558128791049708, "learning_rate": 1.944437455533459e-06, "loss": 0.6951, "step": 7906 }, { "epoch": 0.8, "grad_norm": 1.5442432225603513, "learning_rate": 1.9424858643236598e-06, "loss": 0.6857, "step": 7907 }, { "epoch": 0.8, "grad_norm": 1.8953492852649612, "learning_rate": 1.9405351476310154e-06, "loss": 0.6654, "step": 7908 }, { "epoch": 0.8, "grad_norm": 1.6694936693614468, "learning_rate": 1.9385853056672467e-06, "loss": 0.6512, "step": 7909 }, { "epoch": 0.8, "grad_norm": 1.5791933585290685, "learning_rate": 1.9366363386439747e-06, "loss": 0.7247, "step": 7910 }, { "epoch": 0.8, "grad_norm": 1.7048568831881048, "learning_rate": 1.9346882467727323e-06, "loss": 0.7314, "step": 7911 }, { "epoch": 0.8, "grad_norm": 1.6997667202698497, "learning_rate": 1.932741030264952e-06, "loss": 0.7043, "step": 7912 }, { "epoch": 0.8, "grad_norm": 1.6307421070504355, "learning_rate": 1.930794689331975e-06, "loss": 0.6322, "step": 7913 }, { "epoch": 0.81, "grad_norm": 2.0600411136366907, "learning_rate": 1.9288492241850486e-06, "loss": 0.8343, "step": 7914 }, { "epoch": 0.81, "grad_norm": 1.6263978112754838, "learning_rate": 1.9269046350353184e-06, "loss": 0.5917, "step": 7915 }, { "epoch": 0.81, "grad_norm": 1.7212214231886043, "learning_rate": 1.9249609220938425e-06, "loss": 0.6988, "step": 7916 }, { "epoch": 0.81, "grad_norm": 1.7923502452489541, "learning_rate": 1.9230180855715765e-06, "loss": 0.7721, "step": 7917 }, { "epoch": 0.81, "grad_norm": 1.6510583005199575, "learning_rate": 1.9210761256793876e-06, "loss": 0.6751, "step": 7918 }, { "epoch": 0.81, "grad_norm": 1.6965142454396533, "learning_rate": 1.9191350426280476e-06, "loss": 0.7389, "step": 7919 }, { "epoch": 0.81, "grad_norm": 1.5883857332917075, "learning_rate": 1.9171948366282256e-06, "loss": 0.6797, "step": 7920 }, { "epoch": 0.81, "grad_norm": 1.7740325510562096, "learning_rate": 1.9152555078905054e-06, "loss": 0.6812, "step": 7921 }, { "epoch": 0.81, "grad_norm": 1.7902049614453566, "learning_rate": 1.9133170566253665e-06, "loss": 0.6932, "step": 7922 }, { "epoch": 0.81, "grad_norm": 1.7610525327182251, "learning_rate": 1.9113794830431998e-06, "loss": 0.7173, "step": 7923 }, { "epoch": 0.81, "grad_norm": 1.737127894919951, "learning_rate": 1.9094427873542997e-06, "loss": 0.7971, "step": 7924 }, { "epoch": 0.81, "grad_norm": 1.842918131941055, "learning_rate": 1.907506969768862e-06, "loss": 0.6717, "step": 7925 }, { "epoch": 0.81, "grad_norm": 1.8822392409548165, "learning_rate": 1.9055720304969894e-06, "loss": 0.757, "step": 7926 }, { "epoch": 0.81, "grad_norm": 1.72327856763112, "learning_rate": 1.903637969748693e-06, "loss": 0.5784, "step": 7927 }, { "epoch": 0.81, "grad_norm": 1.5513018101818548, "learning_rate": 1.901704787733879e-06, "loss": 0.6345, "step": 7928 }, { "epoch": 0.81, "grad_norm": 1.6058853335363181, "learning_rate": 1.8997724846623666e-06, "loss": 0.6129, "step": 7929 }, { "epoch": 0.81, "grad_norm": 1.732733524266622, "learning_rate": 1.8978410607438812e-06, "loss": 0.6901, "step": 7930 }, { "epoch": 0.81, "grad_norm": 1.7278955082537923, "learning_rate": 1.895910516188042e-06, "loss": 0.7151, "step": 7931 }, { "epoch": 0.81, "grad_norm": 1.7211882101735263, "learning_rate": 1.8939808512043822e-06, "loss": 0.7134, "step": 7932 }, { "epoch": 0.81, "grad_norm": 1.6376433863918067, "learning_rate": 1.8920520660023412e-06, "loss": 0.6377, "step": 7933 }, { "epoch": 0.81, "grad_norm": 1.9063083868432706, "learning_rate": 1.8901241607912502e-06, "loss": 0.6743, "step": 7934 }, { "epoch": 0.81, "grad_norm": 1.6773780116605972, "learning_rate": 1.8881971357803575e-06, "loss": 0.6674, "step": 7935 }, { "epoch": 0.81, "grad_norm": 1.756703253587125, "learning_rate": 1.8862709911788145e-06, "loss": 0.7528, "step": 7936 }, { "epoch": 0.81, "grad_norm": 1.8278159313895643, "learning_rate": 1.8843457271956679e-06, "loss": 0.6927, "step": 7937 }, { "epoch": 0.81, "grad_norm": 1.5222474436371374, "learning_rate": 1.8824213440398776e-06, "loss": 0.6128, "step": 7938 }, { "epoch": 0.81, "grad_norm": 1.5055087749630518, "learning_rate": 1.880497841920308e-06, "loss": 0.635, "step": 7939 }, { "epoch": 0.81, "grad_norm": 1.817582363757226, "learning_rate": 1.8785752210457208e-06, "loss": 0.6813, "step": 7940 }, { "epoch": 0.81, "grad_norm": 1.76457845246533, "learning_rate": 1.8766534816247917e-06, "loss": 0.6767, "step": 7941 }, { "epoch": 0.81, "grad_norm": 1.5273211143254652, "learning_rate": 1.874732623866089e-06, "loss": 0.6553, "step": 7942 }, { "epoch": 0.81, "grad_norm": 1.7697396621253008, "learning_rate": 1.8728126479780951e-06, "loss": 0.7514, "step": 7943 }, { "epoch": 0.81, "grad_norm": 1.8317315916672514, "learning_rate": 1.8708935541691964e-06, "loss": 0.7215, "step": 7944 }, { "epoch": 0.81, "grad_norm": 1.7793918921800997, "learning_rate": 1.868975342647673e-06, "loss": 0.6822, "step": 7945 }, { "epoch": 0.81, "grad_norm": 1.813369886642259, "learning_rate": 1.8670580136217232e-06, "loss": 0.7838, "step": 7946 }, { "epoch": 0.81, "grad_norm": 1.6914376056588731, "learning_rate": 1.865141567299442e-06, "loss": 0.637, "step": 7947 }, { "epoch": 0.81, "grad_norm": 1.8448253000887582, "learning_rate": 1.8632260038888272e-06, "loss": 0.7215, "step": 7948 }, { "epoch": 0.81, "grad_norm": 1.6885452785875725, "learning_rate": 1.8613113235977854e-06, "loss": 0.7254, "step": 7949 }, { "epoch": 0.81, "grad_norm": 1.786429205383274, "learning_rate": 1.8593975266341258e-06, "loss": 0.7589, "step": 7950 }, { "epoch": 0.81, "grad_norm": 1.6108942657093994, "learning_rate": 1.857484613205558e-06, "loss": 0.7062, "step": 7951 }, { "epoch": 0.81, "grad_norm": 1.7621078214450279, "learning_rate": 1.8555725835197024e-06, "loss": 0.6255, "step": 7952 }, { "epoch": 0.81, "grad_norm": 1.7650961049535976, "learning_rate": 1.8536614377840767e-06, "loss": 0.6909, "step": 7953 }, { "epoch": 0.81, "grad_norm": 1.6656129367567623, "learning_rate": 1.851751176206107e-06, "loss": 0.6863, "step": 7954 }, { "epoch": 0.81, "grad_norm": 1.745463279141544, "learning_rate": 1.849841798993126e-06, "loss": 0.8086, "step": 7955 }, { "epoch": 0.81, "grad_norm": 1.6093159687854266, "learning_rate": 1.8479333063523596e-06, "loss": 0.666, "step": 7956 }, { "epoch": 0.81, "grad_norm": 1.6844585878472074, "learning_rate": 1.846025698490952e-06, "loss": 0.7696, "step": 7957 }, { "epoch": 0.81, "grad_norm": 1.6175676462127535, "learning_rate": 1.8441189756159384e-06, "loss": 0.7112, "step": 7958 }, { "epoch": 0.81, "grad_norm": 1.6762261447013207, "learning_rate": 1.8422131379342668e-06, "loss": 0.5827, "step": 7959 }, { "epoch": 0.81, "grad_norm": 1.6781795279750065, "learning_rate": 1.840308185652787e-06, "loss": 0.6993, "step": 7960 }, { "epoch": 0.81, "grad_norm": 1.5292222257030226, "learning_rate": 1.8384041189782487e-06, "loss": 0.5964, "step": 7961 }, { "epoch": 0.81, "grad_norm": 1.781420757861504, "learning_rate": 1.8365009381173104e-06, "loss": 0.7575, "step": 7962 }, { "epoch": 0.81, "grad_norm": 1.6092231268001482, "learning_rate": 1.8345986432765338e-06, "loss": 0.6708, "step": 7963 }, { "epoch": 0.81, "grad_norm": 1.569521817337266, "learning_rate": 1.8326972346623806e-06, "loss": 0.6602, "step": 7964 }, { "epoch": 0.81, "grad_norm": 1.606489507221082, "learning_rate": 1.8307967124812221e-06, "loss": 0.7262, "step": 7965 }, { "epoch": 0.81, "grad_norm": 1.6885580857972373, "learning_rate": 1.8288970769393267e-06, "loss": 0.7442, "step": 7966 }, { "epoch": 0.81, "grad_norm": 1.730201721005992, "learning_rate": 1.8269983282428705e-06, "loss": 0.7575, "step": 7967 }, { "epoch": 0.81, "grad_norm": 1.6215040305929973, "learning_rate": 1.8251004665979378e-06, "loss": 0.6663, "step": 7968 }, { "epoch": 0.81, "grad_norm": 1.5726907533294119, "learning_rate": 1.8232034922105058e-06, "loss": 0.6834, "step": 7969 }, { "epoch": 0.81, "grad_norm": 1.6229999795058794, "learning_rate": 1.8213074052864654e-06, "loss": 0.76, "step": 7970 }, { "epoch": 0.81, "grad_norm": 1.5485299111513613, "learning_rate": 1.8194122060316044e-06, "loss": 0.6972, "step": 7971 }, { "epoch": 0.81, "grad_norm": 1.5521491925286681, "learning_rate": 1.817517894651617e-06, "loss": 0.593, "step": 7972 }, { "epoch": 0.81, "grad_norm": 1.7300321739270723, "learning_rate": 1.8156244713521065e-06, "loss": 0.7811, "step": 7973 }, { "epoch": 0.81, "grad_norm": 1.829781856253929, "learning_rate": 1.8137319363385664e-06, "loss": 0.6688, "step": 7974 }, { "epoch": 0.81, "grad_norm": 1.6007145369222389, "learning_rate": 1.811840289816409e-06, "loss": 0.7405, "step": 7975 }, { "epoch": 0.81, "grad_norm": 1.597049195511601, "learning_rate": 1.8099495319909377e-06, "loss": 0.6841, "step": 7976 }, { "epoch": 0.81, "grad_norm": 1.465891069369129, "learning_rate": 1.8080596630673652e-06, "loss": 0.6568, "step": 7977 }, { "epoch": 0.81, "grad_norm": 1.6293286430781235, "learning_rate": 1.806170683250813e-06, "loss": 0.6951, "step": 7978 }, { "epoch": 0.81, "grad_norm": 1.6922012635236878, "learning_rate": 1.8042825927462915e-06, "loss": 0.7233, "step": 7979 }, { "epoch": 0.81, "grad_norm": 1.5849598458552254, "learning_rate": 1.802395391758729e-06, "loss": 0.6481, "step": 7980 }, { "epoch": 0.81, "grad_norm": 1.8039497724725537, "learning_rate": 1.8005090804929525e-06, "loss": 0.6401, "step": 7981 }, { "epoch": 0.81, "grad_norm": 1.4792034062672803, "learning_rate": 1.7986236591536875e-06, "loss": 0.7558, "step": 7982 }, { "epoch": 0.81, "grad_norm": 1.6653008329867707, "learning_rate": 1.7967391279455715e-06, "loss": 0.6519, "step": 7983 }, { "epoch": 0.81, "grad_norm": 1.6590694279898175, "learning_rate": 1.794855487073136e-06, "loss": 0.7157, "step": 7984 }, { "epoch": 0.81, "grad_norm": 1.6776772981968426, "learning_rate": 1.7929727367408233e-06, "loss": 0.7889, "step": 7985 }, { "epoch": 0.81, "grad_norm": 1.6534317199844832, "learning_rate": 1.79109087715298e-06, "loss": 0.6789, "step": 7986 }, { "epoch": 0.81, "grad_norm": 1.6922990817285495, "learning_rate": 1.7892099085138448e-06, "loss": 0.6641, "step": 7987 }, { "epoch": 0.81, "grad_norm": 1.6048970808366634, "learning_rate": 1.7873298310275755e-06, "loss": 0.6557, "step": 7988 }, { "epoch": 0.81, "grad_norm": 1.690427387402649, "learning_rate": 1.7854506448982179e-06, "loss": 0.7262, "step": 7989 }, { "epoch": 0.81, "grad_norm": 1.691584509499384, "learning_rate": 1.783572350329732e-06, "loss": 0.736, "step": 7990 }, { "epoch": 0.81, "grad_norm": 1.7445444143992106, "learning_rate": 1.7816949475259793e-06, "loss": 0.7128, "step": 7991 }, { "epoch": 0.81, "grad_norm": 1.821762724528318, "learning_rate": 1.7798184366907167e-06, "loss": 0.7088, "step": 7992 }, { "epoch": 0.81, "grad_norm": 1.6726024812181441, "learning_rate": 1.777942818027617e-06, "loss": 0.7523, "step": 7993 }, { "epoch": 0.81, "grad_norm": 1.6375638362892346, "learning_rate": 1.776068091740244e-06, "loss": 0.703, "step": 7994 }, { "epoch": 0.81, "grad_norm": 1.7839059170132194, "learning_rate": 1.7741942580320704e-06, "loss": 0.7539, "step": 7995 }, { "epoch": 0.81, "grad_norm": 1.8828237716172438, "learning_rate": 1.7723213171064757e-06, "loss": 0.6589, "step": 7996 }, { "epoch": 0.81, "grad_norm": 1.5749266185283466, "learning_rate": 1.770449269166733e-06, "loss": 0.6152, "step": 7997 }, { "epoch": 0.81, "grad_norm": 1.637547348638589, "learning_rate": 1.7685781144160276e-06, "loss": 0.7638, "step": 7998 }, { "epoch": 0.81, "grad_norm": 1.6348983075135142, "learning_rate": 1.7667078530574432e-06, "loss": 0.6158, "step": 7999 }, { "epoch": 0.81, "grad_norm": 1.7768978652343665, "learning_rate": 1.764838485293966e-06, "loss": 0.6817, "step": 8000 }, { "epoch": 0.81, "grad_norm": 1.9138605814810459, "learning_rate": 1.7629700113284898e-06, "loss": 0.6924, "step": 8001 }, { "epoch": 0.81, "grad_norm": 1.8006453653947696, "learning_rate": 1.7611024313638036e-06, "loss": 0.685, "step": 8002 }, { "epoch": 0.81, "grad_norm": 1.621591850869959, "learning_rate": 1.7592357456026065e-06, "loss": 0.668, "step": 8003 }, { "epoch": 0.81, "grad_norm": 1.5252090986604172, "learning_rate": 1.7573699542475009e-06, "loss": 0.6939, "step": 8004 }, { "epoch": 0.81, "grad_norm": 1.594839187279194, "learning_rate": 1.7555050575009836e-06, "loss": 0.6475, "step": 8005 }, { "epoch": 0.81, "grad_norm": 1.7306387118407995, "learning_rate": 1.7536410555654658e-06, "loss": 0.6955, "step": 8006 }, { "epoch": 0.81, "grad_norm": 1.7925998469644877, "learning_rate": 1.7517779486432495e-06, "loss": 0.691, "step": 8007 }, { "epoch": 0.81, "grad_norm": 1.8125246527297612, "learning_rate": 1.7499157369365504e-06, "loss": 0.6745, "step": 8008 }, { "epoch": 0.81, "grad_norm": 1.6051500991727927, "learning_rate": 1.7480544206474824e-06, "loss": 0.6287, "step": 8009 }, { "epoch": 0.81, "grad_norm": 1.721327369042911, "learning_rate": 1.7461939999780586e-06, "loss": 0.685, "step": 8010 }, { "epoch": 0.81, "grad_norm": 1.8154002512357872, "learning_rate": 1.7443344751302048e-06, "loss": 0.7326, "step": 8011 }, { "epoch": 0.81, "grad_norm": 1.734009665808617, "learning_rate": 1.7424758463057356e-06, "loss": 0.574, "step": 8012 }, { "epoch": 0.82, "grad_norm": 1.5821619651938132, "learning_rate": 1.7406181137063804e-06, "loss": 0.5944, "step": 8013 }, { "epoch": 0.82, "grad_norm": 1.6127780706422297, "learning_rate": 1.7387612775337703e-06, "loss": 0.6382, "step": 8014 }, { "epoch": 0.82, "grad_norm": 1.7682106393097323, "learning_rate": 1.7369053379894285e-06, "loss": 0.7225, "step": 8015 }, { "epoch": 0.82, "grad_norm": 1.6834214431659507, "learning_rate": 1.7350502952747916e-06, "loss": 0.7673, "step": 8016 }, { "epoch": 0.82, "grad_norm": 1.7819707141660162, "learning_rate": 1.7331961495911997e-06, "loss": 0.6721, "step": 8017 }, { "epoch": 0.82, "grad_norm": 1.6772630357351819, "learning_rate": 1.731342901139884e-06, "loss": 0.6176, "step": 8018 }, { "epoch": 0.82, "grad_norm": 1.6709942471951538, "learning_rate": 1.7294905501219915e-06, "loss": 0.6154, "step": 8019 }, { "epoch": 0.82, "grad_norm": 1.7757882299108645, "learning_rate": 1.7276390967385614e-06, "loss": 0.758, "step": 8020 }, { "epoch": 0.82, "grad_norm": 1.7015110760484848, "learning_rate": 1.7257885411905416e-06, "loss": 0.7283, "step": 8021 }, { "epoch": 0.82, "grad_norm": 1.7701017096659257, "learning_rate": 1.723938883678784e-06, "loss": 0.7977, "step": 8022 }, { "epoch": 0.82, "grad_norm": 1.93781008303018, "learning_rate": 1.7220901244040355e-06, "loss": 0.6425, "step": 8023 }, { "epoch": 0.82, "grad_norm": 1.545177983002849, "learning_rate": 1.7202422635669536e-06, "loss": 0.7073, "step": 8024 }, { "epoch": 0.82, "grad_norm": 1.8987430084382586, "learning_rate": 1.718395301368091e-06, "loss": 0.764, "step": 8025 }, { "epoch": 0.82, "grad_norm": 1.4753951907006009, "learning_rate": 1.7165492380079084e-06, "loss": 0.682, "step": 8026 }, { "epoch": 0.82, "grad_norm": 1.8881432214443394, "learning_rate": 1.7147040736867704e-06, "loss": 0.7082, "step": 8027 }, { "epoch": 0.82, "grad_norm": 1.7149537130909085, "learning_rate": 1.7128598086049353e-06, "loss": 0.6842, "step": 8028 }, { "epoch": 0.82, "grad_norm": 1.612966254277709, "learning_rate": 1.711016442962573e-06, "loss": 0.6647, "step": 8029 }, { "epoch": 0.82, "grad_norm": 1.7496388997343648, "learning_rate": 1.7091739769597492e-06, "loss": 0.7958, "step": 8030 }, { "epoch": 0.82, "grad_norm": 1.6416089663102067, "learning_rate": 1.7073324107964363e-06, "loss": 0.6912, "step": 8031 }, { "epoch": 0.82, "grad_norm": 1.7959977773068057, "learning_rate": 1.7054917446725083e-06, "loss": 0.7446, "step": 8032 }, { "epoch": 0.82, "grad_norm": 1.758838901126341, "learning_rate": 1.7036519787877393e-06, "loss": 0.7721, "step": 8033 }, { "epoch": 0.82, "grad_norm": 1.6413026632094811, "learning_rate": 1.701813113341806e-06, "loss": 0.6721, "step": 8034 }, { "epoch": 0.82, "grad_norm": 1.6692477027547694, "learning_rate": 1.699975148534293e-06, "loss": 0.5955, "step": 8035 }, { "epoch": 0.82, "grad_norm": 1.641655359956405, "learning_rate": 1.6981380845646779e-06, "loss": 0.6715, "step": 8036 }, { "epoch": 0.82, "grad_norm": 1.6286894104599299, "learning_rate": 1.6963019216323472e-06, "loss": 0.6614, "step": 8037 }, { "epoch": 0.82, "grad_norm": 1.5692556277032406, "learning_rate": 1.69446665993659e-06, "loss": 0.7314, "step": 8038 }, { "epoch": 0.82, "grad_norm": 1.7951697399440905, "learning_rate": 1.6926322996765899e-06, "loss": 0.6347, "step": 8039 }, { "epoch": 0.82, "grad_norm": 1.76987488315246, "learning_rate": 1.6907988410514408e-06, "loss": 0.7911, "step": 8040 }, { "epoch": 0.82, "grad_norm": 1.6345706578407215, "learning_rate": 1.6889662842601384e-06, "loss": 0.6243, "step": 8041 }, { "epoch": 0.82, "grad_norm": 1.7310740912828402, "learning_rate": 1.6871346295015744e-06, "loss": 0.6846, "step": 8042 }, { "epoch": 0.82, "grad_norm": 1.6632516322796669, "learning_rate": 1.6853038769745466e-06, "loss": 0.6219, "step": 8043 }, { "epoch": 0.82, "grad_norm": 1.4446934371691147, "learning_rate": 1.6834740268777594e-06, "loss": 0.6495, "step": 8044 }, { "epoch": 0.82, "grad_norm": 1.7943768358241974, "learning_rate": 1.681645079409807e-06, "loss": 0.7088, "step": 8045 }, { "epoch": 0.82, "grad_norm": 1.619771756196256, "learning_rate": 1.6798170347692e-06, "loss": 0.7114, "step": 8046 }, { "epoch": 0.82, "grad_norm": 1.4099576457270464, "learning_rate": 1.6779898931543382e-06, "loss": 0.5875, "step": 8047 }, { "epoch": 0.82, "grad_norm": 1.7769641316748075, "learning_rate": 1.6761636547635308e-06, "loss": 0.6824, "step": 8048 }, { "epoch": 0.82, "grad_norm": 1.6713721303852005, "learning_rate": 1.6743383197949925e-06, "loss": 0.7363, "step": 8049 }, { "epoch": 0.82, "grad_norm": 1.5786940687358804, "learning_rate": 1.6725138884468273e-06, "loss": 0.6727, "step": 8050 }, { "epoch": 0.82, "grad_norm": 1.78872539059089, "learning_rate": 1.6706903609170522e-06, "loss": 0.7712, "step": 8051 }, { "epoch": 0.82, "grad_norm": 1.5147655598325824, "learning_rate": 1.6688677374035856e-06, "loss": 0.6125, "step": 8052 }, { "epoch": 0.82, "grad_norm": 1.6544674731977558, "learning_rate": 1.6670460181042381e-06, "loss": 0.7031, "step": 8053 }, { "epoch": 0.82, "grad_norm": 1.709302577018215, "learning_rate": 1.6652252032167337e-06, "loss": 0.6829, "step": 8054 }, { "epoch": 0.82, "grad_norm": 1.6225215150122227, "learning_rate": 1.6634052929386946e-06, "loss": 0.7626, "step": 8055 }, { "epoch": 0.82, "grad_norm": 1.5709590449037962, "learning_rate": 1.661586287467638e-06, "loss": 0.6229, "step": 8056 }, { "epoch": 0.82, "grad_norm": 1.8451733707947704, "learning_rate": 1.6597681870009917e-06, "loss": 0.7429, "step": 8057 }, { "epoch": 0.82, "grad_norm": 1.620867770846572, "learning_rate": 1.6579509917360859e-06, "loss": 0.6697, "step": 8058 }, { "epoch": 0.82, "grad_norm": 1.6393845757334204, "learning_rate": 1.6561347018701412e-06, "loss": 0.5999, "step": 8059 }, { "epoch": 0.82, "grad_norm": 1.7245146300654126, "learning_rate": 1.6543193176002936e-06, "loss": 0.6413, "step": 8060 }, { "epoch": 0.82, "grad_norm": 1.6501570293526682, "learning_rate": 1.6525048391235698e-06, "loss": 0.6118, "step": 8061 }, { "epoch": 0.82, "grad_norm": 1.5671003694062557, "learning_rate": 1.6506912666369067e-06, "loss": 0.7136, "step": 8062 }, { "epoch": 0.82, "grad_norm": 1.7856317394038808, "learning_rate": 1.6488786003371393e-06, "loss": 0.7216, "step": 8063 }, { "epoch": 0.82, "grad_norm": 1.673438416970208, "learning_rate": 1.647066840421001e-06, "loss": 0.6913, "step": 8064 }, { "epoch": 0.82, "grad_norm": 1.6861994386869004, "learning_rate": 1.6452559870851336e-06, "loss": 0.7163, "step": 8065 }, { "epoch": 0.82, "grad_norm": 1.6480156694754329, "learning_rate": 1.6434460405260733e-06, "loss": 0.7316, "step": 8066 }, { "epoch": 0.82, "grad_norm": 1.465375917084765, "learning_rate": 1.6416370009402627e-06, "loss": 0.6073, "step": 8067 }, { "epoch": 0.82, "grad_norm": 1.6110628327794576, "learning_rate": 1.6398288685240494e-06, "loss": 0.7929, "step": 8068 }, { "epoch": 0.82, "grad_norm": 1.6048075073218324, "learning_rate": 1.6380216434736706e-06, "loss": 0.6459, "step": 8069 }, { "epoch": 0.82, "grad_norm": 1.643815248003964, "learning_rate": 1.6362153259852775e-06, "loss": 0.6194, "step": 8070 }, { "epoch": 0.82, "grad_norm": 1.6452411850619555, "learning_rate": 1.6344099162549143e-06, "loss": 0.7063, "step": 8071 }, { "epoch": 0.82, "grad_norm": 1.7036325914469437, "learning_rate": 1.6326054144785319e-06, "loss": 0.6956, "step": 8072 }, { "epoch": 0.82, "grad_norm": 1.7277501315511394, "learning_rate": 1.6308018208519815e-06, "loss": 0.6394, "step": 8073 }, { "epoch": 0.82, "grad_norm": 1.7138251157363322, "learning_rate": 1.6289991355710121e-06, "loss": 0.7295, "step": 8074 }, { "epoch": 0.82, "grad_norm": 1.6265331835084222, "learning_rate": 1.6271973588312784e-06, "loss": 0.5779, "step": 8075 }, { "epoch": 0.82, "grad_norm": 1.6653808390691958, "learning_rate": 1.6253964908283382e-06, "loss": 0.7051, "step": 8076 }, { "epoch": 0.82, "grad_norm": 1.6804402910950127, "learning_rate": 1.6235965317576418e-06, "loss": 0.7356, "step": 8077 }, { "epoch": 0.82, "grad_norm": 1.7370222042757812, "learning_rate": 1.6217974818145532e-06, "loss": 0.7099, "step": 8078 }, { "epoch": 0.82, "grad_norm": 1.4747458546716434, "learning_rate": 1.6199993411943238e-06, "loss": 0.5782, "step": 8079 }, { "epoch": 0.82, "grad_norm": 1.6859470291193008, "learning_rate": 1.6182021100921185e-06, "loss": 0.7637, "step": 8080 }, { "epoch": 0.82, "grad_norm": 1.7702922525523404, "learning_rate": 1.6164057887029994e-06, "loss": 0.5905, "step": 8081 }, { "epoch": 0.82, "grad_norm": 1.6709702750784812, "learning_rate": 1.6146103772219246e-06, "loss": 0.6451, "step": 8082 }, { "epoch": 0.82, "grad_norm": 1.6401880834646672, "learning_rate": 1.612815875843763e-06, "loss": 0.5818, "step": 8083 }, { "epoch": 0.82, "grad_norm": 1.8810582744200497, "learning_rate": 1.611022284763274e-06, "loss": 0.7115, "step": 8084 }, { "epoch": 0.82, "grad_norm": 1.7683313979746031, "learning_rate": 1.6092296041751277e-06, "loss": 0.7298, "step": 8085 }, { "epoch": 0.82, "grad_norm": 1.7469266290104861, "learning_rate": 1.607437834273894e-06, "loss": 0.7389, "step": 8086 }, { "epoch": 0.82, "grad_norm": 1.5873409497942523, "learning_rate": 1.6056469752540349e-06, "loss": 0.7042, "step": 8087 }, { "epoch": 0.82, "grad_norm": 1.683437781534904, "learning_rate": 1.6038570273099273e-06, "loss": 0.6705, "step": 8088 }, { "epoch": 0.82, "grad_norm": 1.847510008683372, "learning_rate": 1.602067990635835e-06, "loss": 0.6703, "step": 8089 }, { "epoch": 0.82, "grad_norm": 1.5865058761843016, "learning_rate": 1.6002798654259333e-06, "loss": 0.6765, "step": 8090 }, { "epoch": 0.82, "grad_norm": 1.4981225938372302, "learning_rate": 1.5984926518742982e-06, "loss": 0.5621, "step": 8091 }, { "epoch": 0.82, "grad_norm": 1.6472692118142727, "learning_rate": 1.596706350174898e-06, "loss": 0.6508, "step": 8092 }, { "epoch": 0.82, "grad_norm": 1.645000625397688, "learning_rate": 1.594920960521611e-06, "loss": 0.756, "step": 8093 }, { "epoch": 0.82, "grad_norm": 1.628324434163129, "learning_rate": 1.5931364831082152e-06, "loss": 0.7143, "step": 8094 }, { "epoch": 0.82, "grad_norm": 1.8753708051209703, "learning_rate": 1.5913529181283837e-06, "loss": 0.7856, "step": 8095 }, { "epoch": 0.82, "grad_norm": 1.6221303737097068, "learning_rate": 1.5895702657756984e-06, "loss": 0.7017, "step": 8096 }, { "epoch": 0.82, "grad_norm": 1.601246385274789, "learning_rate": 1.5877885262436333e-06, "loss": 0.6951, "step": 8097 }, { "epoch": 0.82, "grad_norm": 1.5418810995917551, "learning_rate": 1.5860076997255725e-06, "loss": 0.6298, "step": 8098 }, { "epoch": 0.82, "grad_norm": 1.7361284550203793, "learning_rate": 1.5842277864147971e-06, "loss": 0.7036, "step": 8099 }, { "epoch": 0.82, "grad_norm": 1.5257967081619923, "learning_rate": 1.5824487865044857e-06, "loss": 0.5985, "step": 8100 }, { "epoch": 0.82, "grad_norm": 1.83644377022114, "learning_rate": 1.5806707001877253e-06, "loss": 0.7191, "step": 8101 }, { "epoch": 0.82, "grad_norm": 1.6539965105696637, "learning_rate": 1.5788935276574947e-06, "loss": 0.7151, "step": 8102 }, { "epoch": 0.82, "grad_norm": 1.7310246798846824, "learning_rate": 1.5771172691066793e-06, "loss": 0.6899, "step": 8103 }, { "epoch": 0.82, "grad_norm": 1.723225438860287, "learning_rate": 1.5753419247280676e-06, "loss": 0.7134, "step": 8104 }, { "epoch": 0.82, "grad_norm": 1.5959154250822916, "learning_rate": 1.573567494714342e-06, "loss": 0.677, "step": 8105 }, { "epoch": 0.82, "grad_norm": 1.742594180725846, "learning_rate": 1.5717939792580916e-06, "loss": 0.6746, "step": 8106 }, { "epoch": 0.82, "grad_norm": 1.709142755507124, "learning_rate": 1.5700213785518003e-06, "loss": 0.7439, "step": 8107 }, { "epoch": 0.82, "grad_norm": 1.6564530461284355, "learning_rate": 1.5682496927878577e-06, "loss": 0.6564, "step": 8108 }, { "epoch": 0.82, "grad_norm": 1.949082537417003, "learning_rate": 1.5664789221585552e-06, "loss": 0.6938, "step": 8109 }, { "epoch": 0.82, "grad_norm": 1.7131596161923532, "learning_rate": 1.5647090668560794e-06, "loss": 0.6444, "step": 8110 }, { "epoch": 0.83, "grad_norm": 1.6780301599326606, "learning_rate": 1.5629401270725197e-06, "loss": 0.6874, "step": 8111 }, { "epoch": 0.83, "grad_norm": 1.4744301947942198, "learning_rate": 1.5611721029998716e-06, "loss": 0.7268, "step": 8112 }, { "epoch": 0.83, "grad_norm": 1.6870236838857529, "learning_rate": 1.5594049948300205e-06, "loss": 0.6121, "step": 8113 }, { "epoch": 0.83, "grad_norm": 1.4872314938621514, "learning_rate": 1.557638802754763e-06, "loss": 0.6208, "step": 8114 }, { "epoch": 0.83, "grad_norm": 1.7013662140599177, "learning_rate": 1.5558735269657877e-06, "loss": 0.6237, "step": 8115 }, { "epoch": 0.83, "grad_norm": 1.7271219737991148, "learning_rate": 1.5541091676546904e-06, "loss": 0.6577, "step": 8116 }, { "epoch": 0.83, "grad_norm": 1.5893707443428777, "learning_rate": 1.5523457250129648e-06, "loss": 0.6867, "step": 8117 }, { "epoch": 0.83, "grad_norm": 1.9039068155013918, "learning_rate": 1.5505831992320031e-06, "loss": 0.665, "step": 8118 }, { "epoch": 0.83, "grad_norm": 1.7217487973886862, "learning_rate": 1.5488215905031033e-06, "loss": 0.7883, "step": 8119 }, { "epoch": 0.83, "grad_norm": 1.6230993010708583, "learning_rate": 1.5470608990174551e-06, "loss": 0.724, "step": 8120 }, { "epoch": 0.83, "grad_norm": 1.5198966086936623, "learning_rate": 1.5453011249661577e-06, "loss": 0.6361, "step": 8121 }, { "epoch": 0.83, "grad_norm": 1.7769794565911328, "learning_rate": 1.543542268540209e-06, "loss": 0.7086, "step": 8122 }, { "epoch": 0.83, "grad_norm": 1.7323610176209234, "learning_rate": 1.5417843299305002e-06, "loss": 0.6249, "step": 8123 }, { "epoch": 0.83, "grad_norm": 1.6130541426278338, "learning_rate": 1.5400273093278328e-06, "loss": 0.5961, "step": 8124 }, { "epoch": 0.83, "grad_norm": 1.8589668993459887, "learning_rate": 1.5382712069228999e-06, "loss": 0.6291, "step": 8125 }, { "epoch": 0.83, "grad_norm": 1.588078222019581, "learning_rate": 1.5365160229063004e-06, "loss": 0.6874, "step": 8126 }, { "epoch": 0.83, "grad_norm": 1.7482045277109108, "learning_rate": 1.5347617574685358e-06, "loss": 0.5672, "step": 8127 }, { "epoch": 0.83, "grad_norm": 1.6977910833089311, "learning_rate": 1.5330084107999976e-06, "loss": 0.6242, "step": 8128 }, { "epoch": 0.83, "grad_norm": 1.8183946031637237, "learning_rate": 1.5312559830909878e-06, "loss": 0.714, "step": 8129 }, { "epoch": 0.83, "grad_norm": 1.5263848763255998, "learning_rate": 1.5295044745317068e-06, "loss": 0.747, "step": 8130 }, { "epoch": 0.83, "grad_norm": 1.8710990805940892, "learning_rate": 1.5277538853122497e-06, "loss": 0.731, "step": 8131 }, { "epoch": 0.83, "grad_norm": 1.6603387288595752, "learning_rate": 1.5260042156226184e-06, "loss": 0.6077, "step": 8132 }, { "epoch": 0.83, "grad_norm": 1.671883523275829, "learning_rate": 1.5242554656527097e-06, "loss": 0.8259, "step": 8133 }, { "epoch": 0.83, "grad_norm": 1.840611579339051, "learning_rate": 1.522507635592324e-06, "loss": 0.609, "step": 8134 }, { "epoch": 0.83, "grad_norm": 1.851169885764712, "learning_rate": 1.5207607256311641e-06, "loss": 0.7148, "step": 8135 }, { "epoch": 0.83, "grad_norm": 1.619183101913956, "learning_rate": 1.5190147359588237e-06, "loss": 0.6515, "step": 8136 }, { "epoch": 0.83, "grad_norm": 1.7002117308407043, "learning_rate": 1.517269666764809e-06, "loss": 0.6969, "step": 8137 }, { "epoch": 0.83, "grad_norm": 1.7142362259110455, "learning_rate": 1.5155255182385143e-06, "loss": 0.6511, "step": 8138 }, { "epoch": 0.83, "grad_norm": 1.601914764339988, "learning_rate": 1.5137822905692423e-06, "loss": 0.6475, "step": 8139 }, { "epoch": 0.83, "grad_norm": 1.7644138479405698, "learning_rate": 1.5120399839461953e-06, "loss": 0.6861, "step": 8140 }, { "epoch": 0.83, "grad_norm": 1.7064974699464002, "learning_rate": 1.5102985985584695e-06, "loss": 0.7408, "step": 8141 }, { "epoch": 0.83, "grad_norm": 1.6246618466450624, "learning_rate": 1.508558134595066e-06, "loss": 0.6684, "step": 8142 }, { "epoch": 0.83, "grad_norm": 1.6562685064954157, "learning_rate": 1.5068185922448887e-06, "loss": 0.6657, "step": 8143 }, { "epoch": 0.83, "grad_norm": 1.9585946166025932, "learning_rate": 1.5050799716967313e-06, "loss": 0.6331, "step": 8144 }, { "epoch": 0.83, "grad_norm": 1.671683570356811, "learning_rate": 1.5033422731392977e-06, "loss": 0.7035, "step": 8145 }, { "epoch": 0.83, "grad_norm": 1.550323061271016, "learning_rate": 1.5016054967611904e-06, "loss": 0.6498, "step": 8146 }, { "epoch": 0.83, "grad_norm": 1.7797696570553185, "learning_rate": 1.499869642750904e-06, "loss": 0.747, "step": 8147 }, { "epoch": 0.83, "grad_norm": 1.7232419058262327, "learning_rate": 1.4981347112968426e-06, "loss": 0.7048, "step": 8148 }, { "epoch": 0.83, "grad_norm": 1.8706299110360487, "learning_rate": 1.4964007025873017e-06, "loss": 0.6893, "step": 8149 }, { "epoch": 0.83, "grad_norm": 1.604912756002985, "learning_rate": 1.4946676168104834e-06, "loss": 0.6486, "step": 8150 }, { "epoch": 0.83, "grad_norm": 1.7382472735836743, "learning_rate": 1.4929354541544882e-06, "loss": 0.7424, "step": 8151 }, { "epoch": 0.83, "grad_norm": 1.9145589612518605, "learning_rate": 1.4912042148073124e-06, "loss": 0.7238, "step": 8152 }, { "epoch": 0.83, "grad_norm": 1.5925140534896371, "learning_rate": 1.489473898956857e-06, "loss": 0.5963, "step": 8153 }, { "epoch": 0.83, "grad_norm": 1.7374229282832934, "learning_rate": 1.4877445067909213e-06, "loss": 0.7043, "step": 8154 }, { "epoch": 0.83, "grad_norm": 1.8373494978688998, "learning_rate": 1.4860160384972e-06, "loss": 0.6932, "step": 8155 }, { "epoch": 0.83, "grad_norm": 1.7972090772416303, "learning_rate": 1.4842884942632952e-06, "loss": 0.7046, "step": 8156 }, { "epoch": 0.83, "grad_norm": 1.9947400366471757, "learning_rate": 1.4825618742767045e-06, "loss": 0.7492, "step": 8157 }, { "epoch": 0.83, "grad_norm": 1.5968741451412138, "learning_rate": 1.4808361787248237e-06, "loss": 0.6735, "step": 8158 }, { "epoch": 0.83, "grad_norm": 1.7897791644317969, "learning_rate": 1.4791114077949497e-06, "loss": 0.6743, "step": 8159 }, { "epoch": 0.83, "grad_norm": 1.525728328072972, "learning_rate": 1.4773875616742828e-06, "loss": 0.6981, "step": 8160 }, { "epoch": 0.83, "grad_norm": 1.8020126122351128, "learning_rate": 1.4756646405499164e-06, "loss": 0.7265, "step": 8161 }, { "epoch": 0.83, "grad_norm": 1.7665628087081688, "learning_rate": 1.4739426446088467e-06, "loss": 0.7165, "step": 8162 }, { "epoch": 0.83, "grad_norm": 1.7674743044682641, "learning_rate": 1.472221574037972e-06, "loss": 0.6207, "step": 8163 }, { "epoch": 0.83, "grad_norm": 1.6395601025438578, "learning_rate": 1.4705014290240838e-06, "loss": 0.6381, "step": 8164 }, { "epoch": 0.83, "grad_norm": 1.5560174701688523, "learning_rate": 1.4687822097538796e-06, "loss": 0.5278, "step": 8165 }, { "epoch": 0.83, "grad_norm": 1.5288415132268074, "learning_rate": 1.4670639164139555e-06, "loss": 0.6048, "step": 8166 }, { "epoch": 0.83, "grad_norm": 1.805040181247626, "learning_rate": 1.4653465491908003e-06, "loss": 0.6458, "step": 8167 }, { "epoch": 0.83, "grad_norm": 1.552499177462516, "learning_rate": 1.4636301082708127e-06, "loss": 0.6218, "step": 8168 }, { "epoch": 0.83, "grad_norm": 1.6076231371219833, "learning_rate": 1.4619145938402813e-06, "loss": 0.6282, "step": 8169 }, { "epoch": 0.83, "grad_norm": 1.7702824958542995, "learning_rate": 1.4602000060853994e-06, "loss": 0.7116, "step": 8170 }, { "epoch": 0.83, "grad_norm": 1.8301214132381038, "learning_rate": 1.4584863451922615e-06, "loss": 0.7077, "step": 8171 }, { "epoch": 0.83, "grad_norm": 1.8228686877917728, "learning_rate": 1.4567736113468545e-06, "loss": 0.7512, "step": 8172 }, { "epoch": 0.83, "grad_norm": 1.7104293452984662, "learning_rate": 1.4550618047350728e-06, "loss": 0.7394, "step": 8173 }, { "epoch": 0.83, "grad_norm": 1.8864743707993548, "learning_rate": 1.4533509255427013e-06, "loss": 0.7376, "step": 8174 }, { "epoch": 0.83, "grad_norm": 1.7191844775183622, "learning_rate": 1.4516409739554338e-06, "loss": 0.6737, "step": 8175 }, { "epoch": 0.83, "grad_norm": 1.7633488087541436, "learning_rate": 1.4499319501588582e-06, "loss": 0.7298, "step": 8176 }, { "epoch": 0.83, "grad_norm": 1.7316191985407565, "learning_rate": 1.4482238543384596e-06, "loss": 0.6715, "step": 8177 }, { "epoch": 0.83, "grad_norm": 1.5948138141341184, "learning_rate": 1.4465166866796298e-06, "loss": 0.6769, "step": 8178 }, { "epoch": 0.83, "grad_norm": 1.6954440427739106, "learning_rate": 1.4448104473676483e-06, "loss": 0.7358, "step": 8179 }, { "epoch": 0.83, "grad_norm": 1.8078562613994282, "learning_rate": 1.4431051365877058e-06, "loss": 0.7675, "step": 8180 }, { "epoch": 0.83, "grad_norm": 1.6272987902402685, "learning_rate": 1.4414007545248875e-06, "loss": 0.6328, "step": 8181 }, { "epoch": 0.83, "grad_norm": 1.6629866223470973, "learning_rate": 1.4396973013641736e-06, "loss": 0.7112, "step": 8182 }, { "epoch": 0.83, "grad_norm": 1.7560931270102156, "learning_rate": 1.4379947772904502e-06, "loss": 0.6473, "step": 8183 }, { "epoch": 0.83, "grad_norm": 1.6533183558470694, "learning_rate": 1.4362931824884995e-06, "loss": 0.7396, "step": 8184 }, { "epoch": 0.83, "grad_norm": 1.7162203646747494, "learning_rate": 1.4345925171430019e-06, "loss": 0.6405, "step": 8185 }, { "epoch": 0.83, "grad_norm": 1.7371762319524415, "learning_rate": 1.4328927814385397e-06, "loss": 0.6946, "step": 8186 }, { "epoch": 0.83, "grad_norm": 1.6011820265500123, "learning_rate": 1.4311939755595904e-06, "loss": 0.6324, "step": 8187 }, { "epoch": 0.83, "grad_norm": 1.718557478458414, "learning_rate": 1.4294960996905328e-06, "loss": 0.7573, "step": 8188 }, { "epoch": 0.83, "grad_norm": 1.8507287971729904, "learning_rate": 1.4277991540156487e-06, "loss": 0.6725, "step": 8189 }, { "epoch": 0.83, "grad_norm": 1.6812252065295867, "learning_rate": 1.4261031387191093e-06, "loss": 0.6939, "step": 8190 }, { "epoch": 0.83, "grad_norm": 1.5990936705216816, "learning_rate": 1.424408053984997e-06, "loss": 0.6721, "step": 8191 }, { "epoch": 0.83, "grad_norm": 1.644941347595725, "learning_rate": 1.4227138999972801e-06, "loss": 0.6082, "step": 8192 }, { "epoch": 0.83, "grad_norm": 1.6488723534423682, "learning_rate": 1.421020676939835e-06, "loss": 0.6637, "step": 8193 }, { "epoch": 0.83, "grad_norm": 1.641270793531587, "learning_rate": 1.4193283849964379e-06, "loss": 0.7056, "step": 8194 }, { "epoch": 0.83, "grad_norm": 1.6060072990891627, "learning_rate": 1.4176370243507564e-06, "loss": 0.6986, "step": 8195 }, { "epoch": 0.83, "grad_norm": 1.7266934811155912, "learning_rate": 1.4159465951863638e-06, "loss": 0.7088, "step": 8196 }, { "epoch": 0.83, "grad_norm": 1.5718313566278994, "learning_rate": 1.4142570976867276e-06, "loss": 0.7139, "step": 8197 }, { "epoch": 0.83, "grad_norm": 1.7411643009476758, "learning_rate": 1.4125685320352167e-06, "loss": 0.6195, "step": 8198 }, { "epoch": 0.83, "grad_norm": 1.8999500432719307, "learning_rate": 1.4108808984151023e-06, "loss": 0.7139, "step": 8199 }, { "epoch": 0.83, "grad_norm": 1.493831320737935, "learning_rate": 1.4091941970095446e-06, "loss": 0.6281, "step": 8200 }, { "epoch": 0.83, "grad_norm": 1.7084876257684942, "learning_rate": 1.4075084280016138e-06, "loss": 0.7867, "step": 8201 }, { "epoch": 0.83, "grad_norm": 1.7936129382568762, "learning_rate": 1.4058235915742702e-06, "loss": 0.7075, "step": 8202 }, { "epoch": 0.83, "grad_norm": 1.6964101364789455, "learning_rate": 1.4041396879103763e-06, "loss": 0.6834, "step": 8203 }, { "epoch": 0.83, "grad_norm": 1.738053483920836, "learning_rate": 1.4024567171926984e-06, "loss": 0.7705, "step": 8204 }, { "epoch": 0.83, "grad_norm": 1.8014855759132964, "learning_rate": 1.4007746796038912e-06, "loss": 0.6916, "step": 8205 }, { "epoch": 0.83, "grad_norm": 1.7326603410539312, "learning_rate": 1.3990935753265155e-06, "loss": 0.816, "step": 8206 }, { "epoch": 0.83, "grad_norm": 1.7264773657111878, "learning_rate": 1.397413404543031e-06, "loss": 0.7231, "step": 8207 }, { "epoch": 0.83, "grad_norm": 1.5624102154971813, "learning_rate": 1.3957341674357904e-06, "loss": 0.7574, "step": 8208 }, { "epoch": 0.84, "grad_norm": 1.5545206389114075, "learning_rate": 1.3940558641870517e-06, "loss": 0.6499, "step": 8209 }, { "epoch": 0.84, "grad_norm": 1.6880268153759315, "learning_rate": 1.3923784949789654e-06, "loss": 0.7573, "step": 8210 }, { "epoch": 0.84, "grad_norm": 1.720853432426547, "learning_rate": 1.390702059993585e-06, "loss": 0.7196, "step": 8211 }, { "epoch": 0.84, "grad_norm": 1.6481869374763833, "learning_rate": 1.3890265594128638e-06, "loss": 0.6756, "step": 8212 }, { "epoch": 0.84, "grad_norm": 1.6094648483301166, "learning_rate": 1.3873519934186474e-06, "loss": 0.7078, "step": 8213 }, { "epoch": 0.84, "grad_norm": 1.618792032977977, "learning_rate": 1.3856783621926873e-06, "loss": 0.682, "step": 8214 }, { "epoch": 0.84, "grad_norm": 1.5878214192541364, "learning_rate": 1.3840056659166257e-06, "loss": 0.7473, "step": 8215 }, { "epoch": 0.84, "grad_norm": 1.5842895543466722, "learning_rate": 1.3823339047720096e-06, "loss": 0.6335, "step": 8216 }, { "epoch": 0.84, "grad_norm": 1.7782310880484486, "learning_rate": 1.3806630789402852e-06, "loss": 0.7653, "step": 8217 }, { "epoch": 0.84, "grad_norm": 1.6956498406697338, "learning_rate": 1.3789931886027907e-06, "loss": 0.5894, "step": 8218 }, { "epoch": 0.84, "grad_norm": 1.7475029519745575, "learning_rate": 1.37732423394077e-06, "loss": 0.7174, "step": 8219 }, { "epoch": 0.84, "grad_norm": 1.6154412892748924, "learning_rate": 1.375656215135358e-06, "loss": 0.6459, "step": 8220 }, { "epoch": 0.84, "grad_norm": 1.9434901904421764, "learning_rate": 1.3739891323675935e-06, "loss": 0.6698, "step": 8221 }, { "epoch": 0.84, "grad_norm": 1.6567625078813784, "learning_rate": 1.3723229858184162e-06, "loss": 0.6578, "step": 8222 }, { "epoch": 0.84, "grad_norm": 1.776408853955685, "learning_rate": 1.3706577756686545e-06, "loss": 0.7233, "step": 8223 }, { "epoch": 0.84, "grad_norm": 1.8514879954569197, "learning_rate": 1.368993502099043e-06, "loss": 0.7251, "step": 8224 }, { "epoch": 0.84, "grad_norm": 1.5768850107459567, "learning_rate": 1.367330165290215e-06, "loss": 0.6744, "step": 8225 }, { "epoch": 0.84, "grad_norm": 1.4427110331792115, "learning_rate": 1.3656677654226957e-06, "loss": 0.5715, "step": 8226 }, { "epoch": 0.84, "grad_norm": 1.7838598756278934, "learning_rate": 1.364006302676918e-06, "loss": 0.7783, "step": 8227 }, { "epoch": 0.84, "grad_norm": 1.6732822161714365, "learning_rate": 1.3623457772332005e-06, "loss": 0.6944, "step": 8228 }, { "epoch": 0.84, "grad_norm": 1.7225981120830656, "learning_rate": 1.3606861892717715e-06, "loss": 0.5786, "step": 8229 }, { "epoch": 0.84, "grad_norm": 1.7587114484153354, "learning_rate": 1.359027538972756e-06, "loss": 0.7033, "step": 8230 }, { "epoch": 0.84, "grad_norm": 1.850649807807029, "learning_rate": 1.3573698265161683e-06, "loss": 0.6353, "step": 8231 }, { "epoch": 0.84, "grad_norm": 1.6641610341874145, "learning_rate": 1.3557130520819328e-06, "loss": 0.7124, "step": 8232 }, { "epoch": 0.84, "grad_norm": 1.9149203507188295, "learning_rate": 1.3540572158498622e-06, "loss": 0.7167, "step": 8233 }, { "epoch": 0.84, "grad_norm": 1.7525317054907879, "learning_rate": 1.3524023179996725e-06, "loss": 0.767, "step": 8234 }, { "epoch": 0.84, "grad_norm": 1.7889664661142302, "learning_rate": 1.3507483587109805e-06, "loss": 0.7336, "step": 8235 }, { "epoch": 0.84, "grad_norm": 1.5378168604185913, "learning_rate": 1.3490953381632933e-06, "loss": 0.6962, "step": 8236 }, { "epoch": 0.84, "grad_norm": 1.8147784384972894, "learning_rate": 1.347443256536024e-06, "loss": 0.6153, "step": 8237 }, { "epoch": 0.84, "grad_norm": 1.7302525034904064, "learning_rate": 1.3457921140084761e-06, "loss": 0.7129, "step": 8238 }, { "epoch": 0.84, "grad_norm": 1.5835440184825371, "learning_rate": 1.3441419107598575e-06, "loss": 0.7461, "step": 8239 }, { "epoch": 0.84, "grad_norm": 1.9160856857123725, "learning_rate": 1.3424926469692734e-06, "loss": 0.7012, "step": 8240 }, { "epoch": 0.84, "grad_norm": 1.7432989014314013, "learning_rate": 1.340844322815723e-06, "loss": 0.698, "step": 8241 }, { "epoch": 0.84, "grad_norm": 1.826200832807161, "learning_rate": 1.3391969384781066e-06, "loss": 0.768, "step": 8242 }, { "epoch": 0.84, "grad_norm": 1.6947304695110976, "learning_rate": 1.3375504941352257e-06, "loss": 0.666, "step": 8243 }, { "epoch": 0.84, "grad_norm": 1.6355678464956755, "learning_rate": 1.3359049899657706e-06, "loss": 0.6915, "step": 8244 }, { "epoch": 0.84, "grad_norm": 1.7745025367531957, "learning_rate": 1.3342604261483406e-06, "loss": 0.7305, "step": 8245 }, { "epoch": 0.84, "grad_norm": 1.7418465569279045, "learning_rate": 1.3326168028614206e-06, "loss": 0.6738, "step": 8246 }, { "epoch": 0.84, "grad_norm": 1.7872437432117918, "learning_rate": 1.3309741202834047e-06, "loss": 0.6452, "step": 8247 }, { "epoch": 0.84, "grad_norm": 1.7614011552562099, "learning_rate": 1.3293323785925816e-06, "loss": 0.6973, "step": 8248 }, { "epoch": 0.84, "grad_norm": 1.8171301925702645, "learning_rate": 1.327691577967133e-06, "loss": 0.6592, "step": 8249 }, { "epoch": 0.84, "grad_norm": 1.7057564407848962, "learning_rate": 1.326051718585144e-06, "loss": 0.7125, "step": 8250 }, { "epoch": 0.84, "grad_norm": 1.9809910038871592, "learning_rate": 1.324412800624597e-06, "loss": 0.6787, "step": 8251 }, { "epoch": 0.84, "grad_norm": 1.6486996929916975, "learning_rate": 1.3227748242633675e-06, "loss": 0.6453, "step": 8252 }, { "epoch": 0.84, "grad_norm": 1.612309839141793, "learning_rate": 1.3211377896792365e-06, "loss": 0.6477, "step": 8253 }, { "epoch": 0.84, "grad_norm": 1.7284065002363935, "learning_rate": 1.319501697049874e-06, "loss": 0.6668, "step": 8254 }, { "epoch": 0.84, "grad_norm": 1.7727740544108945, "learning_rate": 1.3178665465528551e-06, "loss": 0.6749, "step": 8255 }, { "epoch": 0.84, "grad_norm": 1.7506034964250732, "learning_rate": 1.3162323383656506e-06, "loss": 0.5859, "step": 8256 }, { "epoch": 0.84, "grad_norm": 1.4653029512735753, "learning_rate": 1.3145990726656244e-06, "loss": 0.6423, "step": 8257 }, { "epoch": 0.84, "grad_norm": 1.7090647923838687, "learning_rate": 1.3129667496300446e-06, "loss": 0.6786, "step": 8258 }, { "epoch": 0.84, "grad_norm": 1.7074457410507926, "learning_rate": 1.3113353694360764e-06, "loss": 0.6967, "step": 8259 }, { "epoch": 0.84, "grad_norm": 1.469735361171108, "learning_rate": 1.309704932260777e-06, "loss": 0.6104, "step": 8260 }, { "epoch": 0.84, "grad_norm": 1.7563334951394989, "learning_rate": 1.3080754382811055e-06, "loss": 0.7081, "step": 8261 }, { "epoch": 0.84, "grad_norm": 1.9075491493123156, "learning_rate": 1.30644688767392e-06, "loss": 0.769, "step": 8262 }, { "epoch": 0.84, "grad_norm": 1.911303763578774, "learning_rate": 1.3048192806159721e-06, "loss": 0.598, "step": 8263 }, { "epoch": 0.84, "grad_norm": 1.7526459129348975, "learning_rate": 1.3031926172839126e-06, "loss": 0.7159, "step": 8264 }, { "epoch": 0.84, "grad_norm": 1.6665938973477412, "learning_rate": 1.301566897854295e-06, "loss": 0.6275, "step": 8265 }, { "epoch": 0.84, "grad_norm": 1.5753078101727518, "learning_rate": 1.2999421225035602e-06, "loss": 0.634, "step": 8266 }, { "epoch": 0.84, "grad_norm": 1.537542156790524, "learning_rate": 1.298318291408054e-06, "loss": 0.602, "step": 8267 }, { "epoch": 0.84, "grad_norm": 1.8364242230592978, "learning_rate": 1.2966954047440194e-06, "loss": 0.6858, "step": 8268 }, { "epoch": 0.84, "grad_norm": 1.5620594579846787, "learning_rate": 1.2950734626875927e-06, "loss": 0.6455, "step": 8269 }, { "epoch": 0.84, "grad_norm": 1.643541532600793, "learning_rate": 1.2934524654148118e-06, "loss": 0.6662, "step": 8270 }, { "epoch": 0.84, "grad_norm": 1.7780861869537536, "learning_rate": 1.2918324131016135e-06, "loss": 0.6691, "step": 8271 }, { "epoch": 0.84, "grad_norm": 1.477593720782914, "learning_rate": 1.290213305923823e-06, "loss": 0.6359, "step": 8272 }, { "epoch": 0.84, "grad_norm": 1.873980085787594, "learning_rate": 1.2885951440571754e-06, "loss": 0.6693, "step": 8273 }, { "epoch": 0.84, "grad_norm": 1.6247128817864538, "learning_rate": 1.286977927677291e-06, "loss": 0.6256, "step": 8274 }, { "epoch": 0.84, "grad_norm": 1.723425124139416, "learning_rate": 1.285361656959696e-06, "loss": 0.7455, "step": 8275 }, { "epoch": 0.84, "grad_norm": 1.5730964779506738, "learning_rate": 1.2837463320798138e-06, "loss": 0.7081, "step": 8276 }, { "epoch": 0.84, "grad_norm": 1.566190339493104, "learning_rate": 1.2821319532129584e-06, "loss": 0.6472, "step": 8277 }, { "epoch": 0.84, "grad_norm": 1.870457612150168, "learning_rate": 1.2805185205343462e-06, "loss": 0.6416, "step": 8278 }, { "epoch": 0.84, "grad_norm": 1.63866119536756, "learning_rate": 1.278906034219094e-06, "loss": 0.7834, "step": 8279 }, { "epoch": 0.84, "grad_norm": 1.8807338285351183, "learning_rate": 1.2772944944422073e-06, "loss": 0.721, "step": 8280 }, { "epoch": 0.84, "grad_norm": 1.8934271455408902, "learning_rate": 1.2756839013785971e-06, "loss": 0.7553, "step": 8281 }, { "epoch": 0.84, "grad_norm": 1.9750340584837798, "learning_rate": 1.274074255203065e-06, "loss": 0.7215, "step": 8282 }, { "epoch": 0.84, "grad_norm": 1.605310988428569, "learning_rate": 1.2724655560903133e-06, "loss": 0.7834, "step": 8283 }, { "epoch": 0.84, "grad_norm": 1.695518140987458, "learning_rate": 1.2708578042149444e-06, "loss": 0.7367, "step": 8284 }, { "epoch": 0.84, "grad_norm": 1.668408369362041, "learning_rate": 1.2692509997514513e-06, "loss": 0.5848, "step": 8285 }, { "epoch": 0.84, "grad_norm": 1.7085874099620402, "learning_rate": 1.2676451428742297e-06, "loss": 0.744, "step": 8286 }, { "epoch": 0.84, "grad_norm": 1.8688038523063668, "learning_rate": 1.2660402337575672e-06, "loss": 0.681, "step": 8287 }, { "epoch": 0.84, "grad_norm": 1.969306422104363, "learning_rate": 1.2644362725756531e-06, "loss": 0.7181, "step": 8288 }, { "epoch": 0.84, "grad_norm": 1.6681407177929888, "learning_rate": 1.262833259502575e-06, "loss": 0.7593, "step": 8289 }, { "epoch": 0.84, "grad_norm": 1.661293988860556, "learning_rate": 1.2612311947123102e-06, "loss": 0.7058, "step": 8290 }, { "epoch": 0.84, "grad_norm": 1.840624486989448, "learning_rate": 1.259630078378743e-06, "loss": 0.6597, "step": 8291 }, { "epoch": 0.84, "grad_norm": 1.7114529523092323, "learning_rate": 1.2580299106756444e-06, "loss": 0.6687, "step": 8292 }, { "epoch": 0.84, "grad_norm": 1.579609069103059, "learning_rate": 1.2564306917766888e-06, "loss": 0.6612, "step": 8293 }, { "epoch": 0.84, "grad_norm": 1.6687574529398108, "learning_rate": 1.2548324218554508e-06, "loss": 0.6822, "step": 8294 }, { "epoch": 0.84, "grad_norm": 1.6662587476244746, "learning_rate": 1.2532351010853916e-06, "loss": 0.701, "step": 8295 }, { "epoch": 0.84, "grad_norm": 1.780952607420748, "learning_rate": 1.251638729639878e-06, "loss": 0.6609, "step": 8296 }, { "epoch": 0.84, "grad_norm": 1.6701796692628006, "learning_rate": 1.2500433076921737e-06, "loss": 0.7236, "step": 8297 }, { "epoch": 0.84, "grad_norm": 1.6958271656639399, "learning_rate": 1.2484488354154322e-06, "loss": 0.7342, "step": 8298 }, { "epoch": 0.84, "grad_norm": 1.5776014698998455, "learning_rate": 1.2468553129827132e-06, "loss": 0.65, "step": 8299 }, { "epoch": 0.84, "grad_norm": 1.7931854484696272, "learning_rate": 1.2452627405669637e-06, "loss": 0.8245, "step": 8300 }, { "epoch": 0.84, "grad_norm": 1.7502794056969053, "learning_rate": 1.2436711183410344e-06, "loss": 0.5966, "step": 8301 }, { "epoch": 0.84, "grad_norm": 1.6829815801088328, "learning_rate": 1.2420804464776736e-06, "loss": 0.6106, "step": 8302 }, { "epoch": 0.84, "grad_norm": 1.519296170317051, "learning_rate": 1.24049072514952e-06, "loss": 0.6843, "step": 8303 }, { "epoch": 0.84, "grad_norm": 1.6324457551020213, "learning_rate": 1.238901954529117e-06, "loss": 0.6252, "step": 8304 }, { "epoch": 0.84, "grad_norm": 1.5249173181106488, "learning_rate": 1.2373141347888973e-06, "loss": 0.6963, "step": 8305 }, { "epoch": 0.84, "grad_norm": 2.622005851140321, "learning_rate": 1.2357272661011943e-06, "loss": 0.7062, "step": 8306 }, { "epoch": 0.84, "grad_norm": 1.8225506321546634, "learning_rate": 1.2341413486382404e-06, "loss": 0.6429, "step": 8307 }, { "epoch": 0.85, "grad_norm": 1.4583705955133515, "learning_rate": 1.2325563825721587e-06, "loss": 0.6684, "step": 8308 }, { "epoch": 0.85, "grad_norm": 1.7633612538154486, "learning_rate": 1.2309723680749763e-06, "loss": 0.6642, "step": 8309 }, { "epoch": 0.85, "grad_norm": 1.8176392806658217, "learning_rate": 1.229389305318609e-06, "loss": 0.7658, "step": 8310 }, { "epoch": 0.85, "grad_norm": 1.7129799594557809, "learning_rate": 1.2278071944748748e-06, "loss": 0.7848, "step": 8311 }, { "epoch": 0.85, "grad_norm": 1.6597211231973432, "learning_rate": 1.226226035715491e-06, "loss": 0.7077, "step": 8312 }, { "epoch": 0.85, "grad_norm": 1.509184909464347, "learning_rate": 1.2246458292120621e-06, "loss": 0.5519, "step": 8313 }, { "epoch": 0.85, "grad_norm": 1.7605713217212096, "learning_rate": 1.2230665751360983e-06, "loss": 0.691, "step": 8314 }, { "epoch": 0.85, "grad_norm": 1.6014344717266955, "learning_rate": 1.2214882736590029e-06, "loss": 0.6665, "step": 8315 }, { "epoch": 0.85, "grad_norm": 1.7419877348732173, "learning_rate": 1.2199109249520724e-06, "loss": 0.6899, "step": 8316 }, { "epoch": 0.85, "grad_norm": 1.726432291563157, "learning_rate": 1.2183345291865089e-06, "loss": 0.6821, "step": 8317 }, { "epoch": 0.85, "grad_norm": 1.8575890894392022, "learning_rate": 1.2167590865333988e-06, "loss": 0.7333, "step": 8318 }, { "epoch": 0.85, "grad_norm": 1.5533278448200125, "learning_rate": 1.2151845971637365e-06, "loss": 0.6554, "step": 8319 }, { "epoch": 0.85, "grad_norm": 1.7866467208283314, "learning_rate": 1.2136110612484086e-06, "loss": 0.7351, "step": 8320 }, { "epoch": 0.85, "grad_norm": 1.8386786003634514, "learning_rate": 1.2120384789581953e-06, "loss": 0.7588, "step": 8321 }, { "epoch": 0.85, "grad_norm": 1.5810040818908622, "learning_rate": 1.2104668504637773e-06, "loss": 0.6545, "step": 8322 }, { "epoch": 0.85, "grad_norm": 1.562322712822364, "learning_rate": 1.2088961759357287e-06, "loss": 0.6201, "step": 8323 }, { "epoch": 0.85, "grad_norm": 1.4897774268415849, "learning_rate": 1.207326455544522e-06, "loss": 0.6774, "step": 8324 }, { "epoch": 0.85, "grad_norm": 1.723186570192101, "learning_rate": 1.2057576894605294e-06, "loss": 0.7934, "step": 8325 }, { "epoch": 0.85, "grad_norm": 1.6427088516879875, "learning_rate": 1.2041898778540096e-06, "loss": 0.6514, "step": 8326 }, { "epoch": 0.85, "grad_norm": 1.6999594989840934, "learning_rate": 1.2026230208951307e-06, "loss": 0.7262, "step": 8327 }, { "epoch": 0.85, "grad_norm": 1.5443055445751726, "learning_rate": 1.2010571187539454e-06, "loss": 0.8429, "step": 8328 }, { "epoch": 0.85, "grad_norm": 1.7848686000066742, "learning_rate": 1.1994921716004093e-06, "loss": 0.6515, "step": 8329 }, { "epoch": 0.85, "grad_norm": 1.7576592002961013, "learning_rate": 1.1979281796043752e-06, "loss": 0.7112, "step": 8330 }, { "epoch": 0.85, "grad_norm": 1.7057954207529185, "learning_rate": 1.1963651429355871e-06, "loss": 0.6285, "step": 8331 }, { "epoch": 0.85, "grad_norm": 1.8488761820970288, "learning_rate": 1.1948030617636885e-06, "loss": 0.6229, "step": 8332 }, { "epoch": 0.85, "grad_norm": 1.8905900773570874, "learning_rate": 1.1932419362582215e-06, "loss": 0.8246, "step": 8333 }, { "epoch": 0.85, "grad_norm": 1.8718436174150623, "learning_rate": 1.1916817665886183e-06, "loss": 0.6485, "step": 8334 }, { "epoch": 0.85, "grad_norm": 1.912184871813602, "learning_rate": 1.1901225529242145e-06, "loss": 0.7346, "step": 8335 }, { "epoch": 0.85, "grad_norm": 1.7569317956831032, "learning_rate": 1.1885642954342347e-06, "loss": 0.8115, "step": 8336 }, { "epoch": 0.85, "grad_norm": 1.5771053460843896, "learning_rate": 1.187006994287806e-06, "loss": 0.6744, "step": 8337 }, { "epoch": 0.85, "grad_norm": 1.8762386730077556, "learning_rate": 1.1854506496539485e-06, "loss": 0.6746, "step": 8338 }, { "epoch": 0.85, "grad_norm": 1.664766180889851, "learning_rate": 1.1838952617015786e-06, "loss": 0.7055, "step": 8339 }, { "epoch": 0.85, "grad_norm": 1.8189649427529284, "learning_rate": 1.182340830599511e-06, "loss": 0.7021, "step": 8340 }, { "epoch": 0.85, "grad_norm": 1.5837208446038582, "learning_rate": 1.1807873565164507e-06, "loss": 0.5835, "step": 8341 }, { "epoch": 0.85, "grad_norm": 1.592398960952057, "learning_rate": 1.1792348396210064e-06, "loss": 0.6298, "step": 8342 }, { "epoch": 0.85, "grad_norm": 1.7031410402159544, "learning_rate": 1.1776832800816807e-06, "loss": 0.6705, "step": 8343 }, { "epoch": 0.85, "grad_norm": 1.39023837442245, "learning_rate": 1.176132678066868e-06, "loss": 0.5857, "step": 8344 }, { "epoch": 0.85, "grad_norm": 1.5811862674391624, "learning_rate": 1.174583033744865e-06, "loss": 0.6163, "step": 8345 }, { "epoch": 0.85, "grad_norm": 1.7060951684814682, "learning_rate": 1.1730343472838568e-06, "loss": 0.7131, "step": 8346 }, { "epoch": 0.85, "grad_norm": 1.672908454020415, "learning_rate": 1.1714866188519325e-06, "loss": 0.6297, "step": 8347 }, { "epoch": 0.85, "grad_norm": 1.6320128241024243, "learning_rate": 1.1699398486170755e-06, "loss": 0.5627, "step": 8348 }, { "epoch": 0.85, "grad_norm": 1.6833927256365844, "learning_rate": 1.1683940367471592e-06, "loss": 0.6213, "step": 8349 }, { "epoch": 0.85, "grad_norm": 1.722046538171397, "learning_rate": 1.1668491834099606e-06, "loss": 0.7279, "step": 8350 }, { "epoch": 0.85, "grad_norm": 1.6351716824061726, "learning_rate": 1.1653052887731465e-06, "loss": 0.5468, "step": 8351 }, { "epoch": 0.85, "grad_norm": 1.9516902571195383, "learning_rate": 1.1637623530042842e-06, "loss": 0.6836, "step": 8352 }, { "epoch": 0.85, "grad_norm": 1.5973794419071494, "learning_rate": 1.1622203762708374e-06, "loss": 0.6042, "step": 8353 }, { "epoch": 0.85, "grad_norm": 1.7871573535980738, "learning_rate": 1.1606793587401588e-06, "loss": 0.7547, "step": 8354 }, { "epoch": 0.85, "grad_norm": 1.593337729041243, "learning_rate": 1.1591393005795049e-06, "loss": 0.6016, "step": 8355 }, { "epoch": 0.85, "grad_norm": 1.6813138223461972, "learning_rate": 1.1576002019560261e-06, "loss": 0.6707, "step": 8356 }, { "epoch": 0.85, "grad_norm": 1.782124641021241, "learning_rate": 1.1560620630367635e-06, "loss": 0.6561, "step": 8357 }, { "epoch": 0.85, "grad_norm": 1.6552514721499414, "learning_rate": 1.1545248839886624e-06, "loss": 0.6258, "step": 8358 }, { "epoch": 0.85, "grad_norm": 1.7510283040904677, "learning_rate": 1.152988664978556e-06, "loss": 0.6952, "step": 8359 }, { "epoch": 0.85, "grad_norm": 1.6683898421711663, "learning_rate": 1.1514534061731774e-06, "loss": 0.6231, "step": 8360 }, { "epoch": 0.85, "grad_norm": 1.7258501235344073, "learning_rate": 1.149919107739158e-06, "loss": 0.6234, "step": 8361 }, { "epoch": 0.85, "grad_norm": 1.5408850111580221, "learning_rate": 1.1483857698430178e-06, "loss": 0.6398, "step": 8362 }, { "epoch": 0.85, "grad_norm": 1.6650248318563607, "learning_rate": 1.1468533926511783e-06, "loss": 0.7315, "step": 8363 }, { "epoch": 0.85, "grad_norm": 1.6331762688693068, "learning_rate": 1.1453219763299572e-06, "loss": 0.7218, "step": 8364 }, { "epoch": 0.85, "grad_norm": 1.6692634858355666, "learning_rate": 1.143791521045562e-06, "loss": 0.662, "step": 8365 }, { "epoch": 0.85, "grad_norm": 1.7281442159501406, "learning_rate": 1.1422620269641015e-06, "loss": 0.6767, "step": 8366 }, { "epoch": 0.85, "grad_norm": 1.8944519361838665, "learning_rate": 1.1407334942515801e-06, "loss": 0.7541, "step": 8367 }, { "epoch": 0.85, "grad_norm": 1.72952315228512, "learning_rate": 1.139205923073894e-06, "loss": 0.7566, "step": 8368 }, { "epoch": 0.85, "grad_norm": 1.615345607547421, "learning_rate": 1.1376793135968357e-06, "loss": 0.694, "step": 8369 }, { "epoch": 0.85, "grad_norm": 1.5608929827737215, "learning_rate": 1.1361536659861005e-06, "loss": 0.7242, "step": 8370 }, { "epoch": 0.85, "grad_norm": 1.814474961764568, "learning_rate": 1.1346289804072664e-06, "loss": 0.7095, "step": 8371 }, { "epoch": 0.85, "grad_norm": 1.6189912166838378, "learning_rate": 1.1331052570258183e-06, "loss": 0.7125, "step": 8372 }, { "epoch": 0.85, "grad_norm": 1.7259453310980772, "learning_rate": 1.131582496007133e-06, "loss": 0.7864, "step": 8373 }, { "epoch": 0.85, "grad_norm": 1.5270498544236337, "learning_rate": 1.1300606975164807e-06, "loss": 0.6883, "step": 8374 }, { "epoch": 0.85, "grad_norm": 1.9169050697498091, "learning_rate": 1.128539861719028e-06, "loss": 0.8026, "step": 8375 }, { "epoch": 0.85, "grad_norm": 1.7362936142752212, "learning_rate": 1.1270199887798417e-06, "loss": 0.6875, "step": 8376 }, { "epoch": 0.85, "grad_norm": 1.6440481486462275, "learning_rate": 1.1255010788638753e-06, "loss": 0.6966, "step": 8377 }, { "epoch": 0.85, "grad_norm": 1.646675454268616, "learning_rate": 1.1239831321359862e-06, "loss": 0.6603, "step": 8378 }, { "epoch": 0.85, "grad_norm": 1.725336742038386, "learning_rate": 1.1224661487609234e-06, "loss": 0.7253, "step": 8379 }, { "epoch": 0.85, "grad_norm": 1.995017255507201, "learning_rate": 1.1209501289033298e-06, "loss": 0.732, "step": 8380 }, { "epoch": 0.85, "grad_norm": 1.7151332610577539, "learning_rate": 1.1194350727277493e-06, "loss": 0.6279, "step": 8381 }, { "epoch": 0.85, "grad_norm": 1.488836247487096, "learning_rate": 1.1179209803986124e-06, "loss": 0.5818, "step": 8382 }, { "epoch": 0.85, "grad_norm": 1.7011228341231284, "learning_rate": 1.1164078520802535e-06, "loss": 0.7556, "step": 8383 }, { "epoch": 0.85, "grad_norm": 1.5593983291258506, "learning_rate": 1.1148956879369e-06, "loss": 0.6098, "step": 8384 }, { "epoch": 0.85, "grad_norm": 1.5765603798333052, "learning_rate": 1.1133844881326706e-06, "loss": 0.6282, "step": 8385 }, { "epoch": 0.85, "grad_norm": 1.480114990831892, "learning_rate": 1.111874252831585e-06, "loss": 0.4523, "step": 8386 }, { "epoch": 0.85, "grad_norm": 1.6096880954934176, "learning_rate": 1.1103649821975527e-06, "loss": 0.6659, "step": 8387 }, { "epoch": 0.85, "grad_norm": 1.7778688660947681, "learning_rate": 1.1088566763943843e-06, "loss": 0.7247, "step": 8388 }, { "epoch": 0.85, "grad_norm": 1.803483749854071, "learning_rate": 1.1073493355857823e-06, "loss": 0.6551, "step": 8389 }, { "epoch": 0.85, "grad_norm": 1.6709014820841264, "learning_rate": 1.1058429599353426e-06, "loss": 0.6616, "step": 8390 }, { "epoch": 0.85, "grad_norm": 1.9903915640329826, "learning_rate": 1.104337549606561e-06, "loss": 0.8037, "step": 8391 }, { "epoch": 0.85, "grad_norm": 1.5789512190681958, "learning_rate": 1.1028331047628282e-06, "loss": 0.7054, "step": 8392 }, { "epoch": 0.85, "grad_norm": 1.7115052153277104, "learning_rate": 1.1013296255674233e-06, "loss": 0.7137, "step": 8393 }, { "epoch": 0.85, "grad_norm": 1.5073715688022733, "learning_rate": 1.0998271121835302e-06, "loss": 0.595, "step": 8394 }, { "epoch": 0.85, "grad_norm": 1.887979213898425, "learning_rate": 1.0983255647742185e-06, "loss": 0.707, "step": 8395 }, { "epoch": 0.85, "grad_norm": 1.701860423331792, "learning_rate": 1.0968249835024603e-06, "loss": 0.7961, "step": 8396 }, { "epoch": 0.85, "grad_norm": 1.619199574800476, "learning_rate": 1.0953253685311227e-06, "loss": 0.5768, "step": 8397 }, { "epoch": 0.85, "grad_norm": 1.987229741846533, "learning_rate": 1.0938267200229603e-06, "loss": 0.6983, "step": 8398 }, { "epoch": 0.85, "grad_norm": 1.8108338151674153, "learning_rate": 1.0923290381406316e-06, "loss": 0.7066, "step": 8399 }, { "epoch": 0.85, "grad_norm": 1.7244286465155514, "learning_rate": 1.0908323230466845e-06, "loss": 0.6901, "step": 8400 }, { "epoch": 0.85, "grad_norm": 1.6688581986336752, "learning_rate": 1.0893365749035646e-06, "loss": 0.6577, "step": 8401 }, { "epoch": 0.85, "grad_norm": 1.601690834802701, "learning_rate": 1.0878417938736142e-06, "loss": 0.6563, "step": 8402 }, { "epoch": 0.85, "grad_norm": 1.653747397467601, "learning_rate": 1.0863479801190645e-06, "loss": 0.7102, "step": 8403 }, { "epoch": 0.85, "grad_norm": 1.6179264464657146, "learning_rate": 1.0848551338020497e-06, "loss": 0.702, "step": 8404 }, { "epoch": 0.85, "grad_norm": 1.6147101757367943, "learning_rate": 1.0833632550845907e-06, "loss": 0.6662, "step": 8405 }, { "epoch": 0.86, "grad_norm": 1.8821238740303694, "learning_rate": 1.0818723441286105e-06, "loss": 0.7495, "step": 8406 }, { "epoch": 0.86, "grad_norm": 1.6447027933493943, "learning_rate": 1.0803824010959253e-06, "loss": 0.7016, "step": 8407 }, { "epoch": 0.86, "grad_norm": 1.7410971803330708, "learning_rate": 1.078893426148241e-06, "loss": 0.6797, "step": 8408 }, { "epoch": 0.86, "grad_norm": 1.609515505448147, "learning_rate": 1.0774054194471638e-06, "loss": 0.6954, "step": 8409 }, { "epoch": 0.86, "grad_norm": 1.8647808273584001, "learning_rate": 1.0759183811541973e-06, "loss": 0.7015, "step": 8410 }, { "epoch": 0.86, "grad_norm": 1.656207416925414, "learning_rate": 1.074432311430732e-06, "loss": 0.7844, "step": 8411 }, { "epoch": 0.86, "grad_norm": 1.5818580586379434, "learning_rate": 1.0729472104380601e-06, "loss": 0.6853, "step": 8412 }, { "epoch": 0.86, "grad_norm": 1.6628763789830707, "learning_rate": 1.0714630783373636e-06, "loss": 0.7416, "step": 8413 }, { "epoch": 0.86, "grad_norm": 1.8792780484068097, "learning_rate": 1.0699799152897238e-06, "loss": 0.8003, "step": 8414 }, { "epoch": 0.86, "grad_norm": 1.6385320775186256, "learning_rate": 1.0684977214561155e-06, "loss": 0.6372, "step": 8415 }, { "epoch": 0.86, "grad_norm": 1.3279034371879015, "learning_rate": 1.067016496997404e-06, "loss": 0.5391, "step": 8416 }, { "epoch": 0.86, "grad_norm": 1.7867343700510006, "learning_rate": 1.0655362420743587e-06, "loss": 0.7462, "step": 8417 }, { "epoch": 0.86, "grad_norm": 1.5461375040632466, "learning_rate": 1.0640569568476323e-06, "loss": 0.5684, "step": 8418 }, { "epoch": 0.86, "grad_norm": 1.6583070609877475, "learning_rate": 1.0625786414777805e-06, "loss": 0.6747, "step": 8419 }, { "epoch": 0.86, "grad_norm": 1.703374935104682, "learning_rate": 1.0611012961252543e-06, "loss": 0.755, "step": 8420 }, { "epoch": 0.86, "grad_norm": 1.66739442848443, "learning_rate": 1.0596249209503906e-06, "loss": 0.8473, "step": 8421 }, { "epoch": 0.86, "grad_norm": 1.8210961467259086, "learning_rate": 1.0581495161134325e-06, "loss": 0.6847, "step": 8422 }, { "epoch": 0.86, "grad_norm": 1.5643904053537698, "learning_rate": 1.0566750817745076e-06, "loss": 0.6154, "step": 8423 }, { "epoch": 0.86, "grad_norm": 1.6738856312856583, "learning_rate": 1.0552016180936442e-06, "loss": 0.648, "step": 8424 }, { "epoch": 0.86, "grad_norm": 1.7984497698344761, "learning_rate": 1.0537291252307668e-06, "loss": 0.7255, "step": 8425 }, { "epoch": 0.86, "grad_norm": 1.611425019120535, "learning_rate": 1.0522576033456865e-06, "loss": 0.5955, "step": 8426 }, { "epoch": 0.86, "grad_norm": 1.5985930161310276, "learning_rate": 1.0507870525981166e-06, "loss": 0.647, "step": 8427 }, { "epoch": 0.86, "grad_norm": 1.602373516978655, "learning_rate": 1.0493174731476641e-06, "loss": 0.6878, "step": 8428 }, { "epoch": 0.86, "grad_norm": 1.684892750947452, "learning_rate": 1.0478488651538244e-06, "loss": 0.6671, "step": 8429 }, { "epoch": 0.86, "grad_norm": 1.751413082663372, "learning_rate": 1.0463812287759967e-06, "loss": 0.8063, "step": 8430 }, { "epoch": 0.86, "grad_norm": 1.6046268645591264, "learning_rate": 1.0449145641734648e-06, "loss": 0.5918, "step": 8431 }, { "epoch": 0.86, "grad_norm": 1.7159864744314377, "learning_rate": 1.0434488715054158e-06, "loss": 0.6877, "step": 8432 }, { "epoch": 0.86, "grad_norm": 1.631233184520412, "learning_rate": 1.0419841509309293e-06, "loss": 0.7765, "step": 8433 }, { "epoch": 0.86, "grad_norm": 1.532792792300549, "learning_rate": 1.0405204026089732e-06, "loss": 0.5547, "step": 8434 }, { "epoch": 0.86, "grad_norm": 1.6352150853995833, "learning_rate": 1.0390576266984186e-06, "loss": 0.785, "step": 8435 }, { "epoch": 0.86, "grad_norm": 1.621107971810868, "learning_rate": 1.0375958233580241e-06, "loss": 0.6549, "step": 8436 }, { "epoch": 0.86, "grad_norm": 1.7423638741067684, "learning_rate": 1.0361349927464459e-06, "loss": 0.671, "step": 8437 }, { "epoch": 0.86, "grad_norm": 1.6553449685180674, "learning_rate": 1.0346751350222373e-06, "loss": 0.7222, "step": 8438 }, { "epoch": 0.86, "grad_norm": 1.69168484776965, "learning_rate": 1.0332162503438382e-06, "loss": 0.7429, "step": 8439 }, { "epoch": 0.86, "grad_norm": 1.7137645740993, "learning_rate": 1.031758338869593e-06, "loss": 0.7141, "step": 8440 }, { "epoch": 0.86, "grad_norm": 1.7547540988136974, "learning_rate": 1.0303014007577306e-06, "loss": 0.6966, "step": 8441 }, { "epoch": 0.86, "grad_norm": 1.6525465199540916, "learning_rate": 1.0288454361663802e-06, "loss": 0.6479, "step": 8442 }, { "epoch": 0.86, "grad_norm": 1.5072249642946518, "learning_rate": 1.0273904452535666e-06, "loss": 0.6614, "step": 8443 }, { "epoch": 0.86, "grad_norm": 1.5417080513159942, "learning_rate": 1.0259364281772023e-06, "loss": 0.6281, "step": 8444 }, { "epoch": 0.86, "grad_norm": 1.586239143977994, "learning_rate": 1.024483385095101e-06, "loss": 0.739, "step": 8445 }, { "epoch": 0.86, "grad_norm": 1.6299264084653204, "learning_rate": 1.0230313161649674e-06, "loss": 0.71, "step": 8446 }, { "epoch": 0.86, "grad_norm": 1.652423537669246, "learning_rate": 1.0215802215443993e-06, "loss": 0.6331, "step": 8447 }, { "epoch": 0.86, "grad_norm": 1.486458058211073, "learning_rate": 1.0201301013908926e-06, "loss": 0.6625, "step": 8448 }, { "epoch": 0.86, "grad_norm": 1.7202949780514047, "learning_rate": 1.0186809558618327e-06, "loss": 0.7799, "step": 8449 }, { "epoch": 0.86, "grad_norm": 1.851452722138668, "learning_rate": 1.0172327851145026e-06, "loss": 0.713, "step": 8450 }, { "epoch": 0.86, "grad_norm": 1.7589655012461458, "learning_rate": 1.0157855893060809e-06, "loss": 0.706, "step": 8451 }, { "epoch": 0.86, "grad_norm": 1.7341879348494615, "learning_rate": 1.0143393685936342e-06, "loss": 0.7593, "step": 8452 }, { "epoch": 0.86, "grad_norm": 1.9346470924362966, "learning_rate": 1.0128941231341305e-06, "loss": 0.7522, "step": 8453 }, { "epoch": 0.86, "grad_norm": 1.6773163911161024, "learning_rate": 1.0114498530844253e-06, "loss": 0.732, "step": 8454 }, { "epoch": 0.86, "grad_norm": 1.6882931865366573, "learning_rate": 1.010006558601274e-06, "loss": 0.6606, "step": 8455 }, { "epoch": 0.86, "grad_norm": 1.6757261676598967, "learning_rate": 1.0085642398413243e-06, "loss": 0.5838, "step": 8456 }, { "epoch": 0.86, "grad_norm": 1.8452775158196029, "learning_rate": 1.0071228969611146e-06, "loss": 0.7233, "step": 8457 }, { "epoch": 0.86, "grad_norm": 1.5740820266658542, "learning_rate": 1.0056825301170836e-06, "loss": 0.5603, "step": 8458 }, { "epoch": 0.86, "grad_norm": 1.6426284893961631, "learning_rate": 1.0042431394655562e-06, "loss": 0.671, "step": 8459 }, { "epoch": 0.86, "grad_norm": 1.6699412335146264, "learning_rate": 1.0028047251627583e-06, "loss": 0.6582, "step": 8460 }, { "epoch": 0.86, "grad_norm": 1.6824431706589318, "learning_rate": 1.0013672873648083e-06, "loss": 0.7196, "step": 8461 }, { "epoch": 0.86, "grad_norm": 1.963283484565238, "learning_rate": 9.999308262277152e-07, "loss": 0.7811, "step": 8462 }, { "epoch": 0.86, "grad_norm": 1.5592969312288327, "learning_rate": 9.98495341907385e-07, "loss": 0.5841, "step": 8463 }, { "epoch": 0.86, "grad_norm": 1.578809012565993, "learning_rate": 9.9706083455962e-07, "loss": 0.6566, "step": 8464 }, { "epoch": 0.86, "grad_norm": 1.7662396613126716, "learning_rate": 9.9562730434011e-07, "loss": 0.7146, "step": 8465 }, { "epoch": 0.86, "grad_norm": 1.564066354381204, "learning_rate": 9.941947514044437e-07, "loss": 0.6503, "step": 8466 }, { "epoch": 0.86, "grad_norm": 1.74140264632033, "learning_rate": 9.927631759081013e-07, "loss": 0.7223, "step": 8467 }, { "epoch": 0.86, "grad_norm": 1.6125573691640296, "learning_rate": 9.913325780064586e-07, "loss": 0.7539, "step": 8468 }, { "epoch": 0.86, "grad_norm": 1.7445756450530683, "learning_rate": 9.89902957854787e-07, "loss": 0.5746, "step": 8469 }, { "epoch": 0.86, "grad_norm": 1.6232908198351832, "learning_rate": 9.884743156082445e-07, "loss": 0.6638, "step": 8470 }, { "epoch": 0.86, "grad_norm": 1.6749833434917487, "learning_rate": 9.870466514218912e-07, "loss": 0.6728, "step": 8471 }, { "epoch": 0.86, "grad_norm": 1.7949419456953852, "learning_rate": 9.856199654506781e-07, "loss": 0.6282, "step": 8472 }, { "epoch": 0.86, "grad_norm": 1.5343271623648493, "learning_rate": 9.84194257849448e-07, "loss": 0.6372, "step": 8473 }, { "epoch": 0.86, "grad_norm": 1.715316359108728, "learning_rate": 9.827695287729389e-07, "loss": 0.5719, "step": 8474 }, { "epoch": 0.86, "grad_norm": 1.6786352216456508, "learning_rate": 9.813457783757852e-07, "loss": 0.6733, "step": 8475 }, { "epoch": 0.86, "grad_norm": 1.6605747453901634, "learning_rate": 9.7992300681251e-07, "loss": 0.7578, "step": 8476 }, { "epoch": 0.86, "grad_norm": 1.675631402409404, "learning_rate": 9.78501214237535e-07, "loss": 0.694, "step": 8477 }, { "epoch": 0.86, "grad_norm": 1.5661955438528077, "learning_rate": 9.770804008051738e-07, "loss": 0.6373, "step": 8478 }, { "epoch": 0.86, "grad_norm": 1.583340621840412, "learning_rate": 9.756605666696305e-07, "loss": 0.7857, "step": 8479 }, { "epoch": 0.86, "grad_norm": 1.639875934388625, "learning_rate": 9.742417119850078e-07, "loss": 0.6977, "step": 8480 }, { "epoch": 0.86, "grad_norm": 1.572932766498734, "learning_rate": 9.72823836905301e-07, "loss": 0.643, "step": 8481 }, { "epoch": 0.86, "grad_norm": 1.7348617566859073, "learning_rate": 9.714069415843975e-07, "loss": 0.6105, "step": 8482 }, { "epoch": 0.86, "grad_norm": 1.5129019769957652, "learning_rate": 9.699910261760781e-07, "loss": 0.6587, "step": 8483 }, { "epoch": 0.86, "grad_norm": 1.7286293367869179, "learning_rate": 9.685760908340215e-07, "loss": 0.6924, "step": 8484 }, { "epoch": 0.86, "grad_norm": 1.6286863062115362, "learning_rate": 9.671621357117923e-07, "loss": 0.6642, "step": 8485 }, { "epoch": 0.86, "grad_norm": 1.7886869240578565, "learning_rate": 9.657491609628577e-07, "loss": 0.6126, "step": 8486 }, { "epoch": 0.86, "grad_norm": 1.6579069465573368, "learning_rate": 9.6433716674057e-07, "loss": 0.6803, "step": 8487 }, { "epoch": 0.86, "grad_norm": 1.6616465793411628, "learning_rate": 9.629261531981803e-07, "loss": 0.5699, "step": 8488 }, { "epoch": 0.86, "grad_norm": 1.6520740195369406, "learning_rate": 9.615161204888346e-07, "loss": 0.6986, "step": 8489 }, { "epoch": 0.86, "grad_norm": 1.7609516169236654, "learning_rate": 9.601070687655667e-07, "loss": 0.6555, "step": 8490 }, { "epoch": 0.86, "grad_norm": 1.7617317793956033, "learning_rate": 9.586989981813078e-07, "loss": 0.6843, "step": 8491 }, { "epoch": 0.86, "grad_norm": 1.6235398129125809, "learning_rate": 9.572919088888844e-07, "loss": 0.7243, "step": 8492 }, { "epoch": 0.86, "grad_norm": 1.618641132828586, "learning_rate": 9.55885801041011e-07, "loss": 0.675, "step": 8493 }, { "epoch": 0.86, "grad_norm": 1.7389103985815473, "learning_rate": 9.544806747903013e-07, "loss": 0.7018, "step": 8494 }, { "epoch": 0.86, "grad_norm": 1.6473771952026806, "learning_rate": 9.530765302892553e-07, "loss": 0.586, "step": 8495 }, { "epoch": 0.86, "grad_norm": 1.5652406055054269, "learning_rate": 9.516733676902756e-07, "loss": 0.6362, "step": 8496 }, { "epoch": 0.86, "grad_norm": 1.5478809662387278, "learning_rate": 9.502711871456527e-07, "loss": 0.5799, "step": 8497 }, { "epoch": 0.86, "grad_norm": 1.5968218731230266, "learning_rate": 9.488699888075681e-07, "loss": 0.695, "step": 8498 }, { "epoch": 0.86, "grad_norm": 1.9413116133308845, "learning_rate": 9.474697728281058e-07, "loss": 0.6953, "step": 8499 }, { "epoch": 0.86, "grad_norm": 1.7387764741853589, "learning_rate": 9.460705393592307e-07, "loss": 0.5861, "step": 8500 }, { "epoch": 0.86, "grad_norm": 1.6879635537094295, "learning_rate": 9.446722885528115e-07, "loss": 0.7975, "step": 8501 }, { "epoch": 0.86, "grad_norm": 1.7870935928368428, "learning_rate": 9.432750205606079e-07, "loss": 0.6651, "step": 8502 }, { "epoch": 0.86, "grad_norm": 1.7000098571909354, "learning_rate": 9.418787355342674e-07, "loss": 0.6625, "step": 8503 }, { "epoch": 0.87, "grad_norm": 1.6898540784275544, "learning_rate": 9.404834336253366e-07, "loss": 0.6732, "step": 8504 }, { "epoch": 0.87, "grad_norm": 1.5420769396071596, "learning_rate": 9.390891149852566e-07, "loss": 0.5964, "step": 8505 }, { "epoch": 0.87, "grad_norm": 1.8262952647153974, "learning_rate": 9.376957797653541e-07, "loss": 0.6322, "step": 8506 }, { "epoch": 0.87, "grad_norm": 1.6713233513580645, "learning_rate": 9.363034281168581e-07, "loss": 0.7156, "step": 8507 }, { "epoch": 0.87, "grad_norm": 1.558887253750619, "learning_rate": 9.349120601908834e-07, "loss": 0.6006, "step": 8508 }, { "epoch": 0.87, "grad_norm": 1.5578407002530088, "learning_rate": 9.335216761384414e-07, "loss": 0.6743, "step": 8509 }, { "epoch": 0.87, "grad_norm": 1.4893998244873128, "learning_rate": 9.321322761104401e-07, "loss": 0.5171, "step": 8510 }, { "epoch": 0.87, "grad_norm": 1.8732182263707253, "learning_rate": 9.307438602576724e-07, "loss": 0.7097, "step": 8511 }, { "epoch": 0.87, "grad_norm": 1.6579487951787695, "learning_rate": 9.293564287308332e-07, "loss": 0.6302, "step": 8512 }, { "epoch": 0.87, "grad_norm": 1.5898531234427316, "learning_rate": 9.279699816805032e-07, "loss": 0.7759, "step": 8513 }, { "epoch": 0.87, "grad_norm": 1.5373161200871173, "learning_rate": 9.26584519257161e-07, "loss": 0.5888, "step": 8514 }, { "epoch": 0.87, "grad_norm": 1.5786124799035222, "learning_rate": 9.252000416111784e-07, "loss": 0.6454, "step": 8515 }, { "epoch": 0.87, "grad_norm": 1.5839456176787687, "learning_rate": 9.238165488928152e-07, "loss": 0.6728, "step": 8516 }, { "epoch": 0.87, "grad_norm": 1.7281204784134618, "learning_rate": 9.224340412522325e-07, "loss": 0.6542, "step": 8517 }, { "epoch": 0.87, "grad_norm": 1.507704352537572, "learning_rate": 9.210525188394747e-07, "loss": 0.6282, "step": 8518 }, { "epoch": 0.87, "grad_norm": 1.6918760455004862, "learning_rate": 9.196719818044886e-07, "loss": 0.6208, "step": 8519 }, { "epoch": 0.87, "grad_norm": 1.7589913626033948, "learning_rate": 9.182924302971086e-07, "loss": 0.6981, "step": 8520 }, { "epoch": 0.87, "grad_norm": 1.6422584710703088, "learning_rate": 9.169138644670627e-07, "loss": 0.6739, "step": 8521 }, { "epoch": 0.87, "grad_norm": 1.767490173430962, "learning_rate": 9.155362844639725e-07, "loss": 0.6822, "step": 8522 }, { "epoch": 0.87, "grad_norm": 1.8181788312046294, "learning_rate": 9.14159690437355e-07, "loss": 0.7665, "step": 8523 }, { "epoch": 0.87, "grad_norm": 1.7480484857115235, "learning_rate": 9.127840825366152e-07, "loss": 0.7395, "step": 8524 }, { "epoch": 0.87, "grad_norm": 1.8396355344859228, "learning_rate": 9.114094609110569e-07, "loss": 0.582, "step": 8525 }, { "epoch": 0.87, "grad_norm": 1.6794017948340934, "learning_rate": 9.100358257098707e-07, "loss": 0.8449, "step": 8526 }, { "epoch": 0.87, "grad_norm": 1.7382601173910843, "learning_rate": 9.086631770821431e-07, "loss": 0.7845, "step": 8527 }, { "epoch": 0.87, "grad_norm": 1.6555141456200846, "learning_rate": 9.07291515176858e-07, "loss": 0.7532, "step": 8528 }, { "epoch": 0.87, "grad_norm": 1.4531404575168607, "learning_rate": 9.05920840142882e-07, "loss": 0.6653, "step": 8529 }, { "epoch": 0.87, "grad_norm": 1.9042978793688385, "learning_rate": 9.045511521289862e-07, "loss": 0.6411, "step": 8530 }, { "epoch": 0.87, "grad_norm": 1.6141730691986127, "learning_rate": 9.031824512838239e-07, "loss": 0.6548, "step": 8531 }, { "epoch": 0.87, "grad_norm": 1.7912378321838687, "learning_rate": 9.018147377559483e-07, "loss": 0.6434, "step": 8532 }, { "epoch": 0.87, "grad_norm": 1.6969882014294067, "learning_rate": 9.004480116938041e-07, "loss": 0.6282, "step": 8533 }, { "epoch": 0.87, "grad_norm": 1.7507718131371228, "learning_rate": 8.99082273245726e-07, "loss": 0.65, "step": 8534 }, { "epoch": 0.87, "grad_norm": 1.7382108953727158, "learning_rate": 8.977175225599466e-07, "loss": 0.7068, "step": 8535 }, { "epoch": 0.87, "grad_norm": 1.8785085612766732, "learning_rate": 8.963537597845839e-07, "loss": 0.6955, "step": 8536 }, { "epoch": 0.87, "grad_norm": 1.9876005931416723, "learning_rate": 8.949909850676564e-07, "loss": 0.796, "step": 8537 }, { "epoch": 0.87, "grad_norm": 1.8335946031497297, "learning_rate": 8.936291985570722e-07, "loss": 0.5972, "step": 8538 }, { "epoch": 0.87, "grad_norm": 1.6787740376123934, "learning_rate": 8.922684004006299e-07, "loss": 0.7053, "step": 8539 }, { "epoch": 0.87, "grad_norm": 1.7260937405636285, "learning_rate": 8.909085907460224e-07, "loss": 0.7055, "step": 8540 }, { "epoch": 0.87, "grad_norm": 1.8156357333218092, "learning_rate": 8.895497697408395e-07, "loss": 0.7014, "step": 8541 }, { "epoch": 0.87, "grad_norm": 1.8502892872932764, "learning_rate": 8.881919375325565e-07, "loss": 0.7066, "step": 8542 }, { "epoch": 0.87, "grad_norm": 1.7520094569059954, "learning_rate": 8.868350942685467e-07, "loss": 0.7422, "step": 8543 }, { "epoch": 0.87, "grad_norm": 1.6762712922095546, "learning_rate": 8.854792400960721e-07, "loss": 0.6672, "step": 8544 }, { "epoch": 0.87, "grad_norm": 1.7009066805680277, "learning_rate": 8.841243751622908e-07, "loss": 0.7102, "step": 8545 }, { "epoch": 0.87, "grad_norm": 1.7598393690465033, "learning_rate": 8.827704996142539e-07, "loss": 0.6883, "step": 8546 }, { "epoch": 0.87, "grad_norm": 1.6953232913217238, "learning_rate": 8.814176135989006e-07, "loss": 0.6376, "step": 8547 }, { "epoch": 0.87, "grad_norm": 1.7558576220517113, "learning_rate": 8.800657172630678e-07, "loss": 0.7404, "step": 8548 }, { "epoch": 0.87, "grad_norm": 1.8020573268309257, "learning_rate": 8.787148107534793e-07, "loss": 0.6439, "step": 8549 }, { "epoch": 0.87, "grad_norm": 1.7623781627660229, "learning_rate": 8.773648942167578e-07, "loss": 0.7491, "step": 8550 }, { "epoch": 0.87, "grad_norm": 1.5599459156928088, "learning_rate": 8.760159677994174e-07, "loss": 0.6676, "step": 8551 }, { "epoch": 0.87, "grad_norm": 1.7111379677657992, "learning_rate": 8.746680316478573e-07, "loss": 0.7308, "step": 8552 }, { "epoch": 0.87, "grad_norm": 1.7050018071892619, "learning_rate": 8.733210859083807e-07, "loss": 0.7254, "step": 8553 }, { "epoch": 0.87, "grad_norm": 1.6601874396647305, "learning_rate": 8.719751307271739e-07, "loss": 0.6913, "step": 8554 }, { "epoch": 0.87, "grad_norm": 1.7903128696098336, "learning_rate": 8.706301662503191e-07, "loss": 0.7505, "step": 8555 }, { "epoch": 0.87, "grad_norm": 1.6630933552740055, "learning_rate": 8.69286192623795e-07, "loss": 0.6685, "step": 8556 }, { "epoch": 0.87, "grad_norm": 1.8305512837202105, "learning_rate": 8.679432099934637e-07, "loss": 0.7627, "step": 8557 }, { "epoch": 0.87, "grad_norm": 1.644433408519988, "learning_rate": 8.666012185050876e-07, "loss": 0.7553, "step": 8558 }, { "epoch": 0.87, "grad_norm": 1.7837745416315152, "learning_rate": 8.652602183043213e-07, "loss": 0.7348, "step": 8559 }, { "epoch": 0.87, "grad_norm": 1.6394931714912497, "learning_rate": 8.63920209536705e-07, "loss": 0.7273, "step": 8560 }, { "epoch": 0.87, "grad_norm": 1.759179568705848, "learning_rate": 8.62581192347679e-07, "loss": 0.8114, "step": 8561 }, { "epoch": 0.87, "grad_norm": 1.9408825514066173, "learning_rate": 8.612431668825705e-07, "loss": 0.7462, "step": 8562 }, { "epoch": 0.87, "grad_norm": 1.6359865231405288, "learning_rate": 8.59906133286601e-07, "loss": 0.6535, "step": 8563 }, { "epoch": 0.87, "grad_norm": 1.7203917526452943, "learning_rate": 8.585700917048879e-07, "loss": 0.6331, "step": 8564 }, { "epoch": 0.87, "grad_norm": 1.715284772326796, "learning_rate": 8.572350422824338e-07, "loss": 0.7302, "step": 8565 }, { "epoch": 0.87, "grad_norm": 1.9761566130879007, "learning_rate": 8.559009851641397e-07, "loss": 0.7234, "step": 8566 }, { "epoch": 0.87, "grad_norm": 1.60364359740448, "learning_rate": 8.545679204947954e-07, "loss": 0.6343, "step": 8567 }, { "epoch": 0.87, "grad_norm": 1.7097803626771462, "learning_rate": 8.532358484190851e-07, "loss": 0.6143, "step": 8568 }, { "epoch": 0.87, "grad_norm": 1.6386662138382595, "learning_rate": 8.519047690815841e-07, "loss": 0.7445, "step": 8569 }, { "epoch": 0.87, "grad_norm": 1.692997496401604, "learning_rate": 8.505746826267602e-07, "loss": 0.7129, "step": 8570 }, { "epoch": 0.87, "grad_norm": 1.7807662897180458, "learning_rate": 8.492455891989737e-07, "loss": 0.7059, "step": 8571 }, { "epoch": 0.87, "grad_norm": 1.6263058459356212, "learning_rate": 8.479174889424758e-07, "loss": 0.7066, "step": 8572 }, { "epoch": 0.87, "grad_norm": 1.7883197031359765, "learning_rate": 8.465903820014121e-07, "loss": 0.7816, "step": 8573 }, { "epoch": 0.87, "grad_norm": 1.6248322732286755, "learning_rate": 8.452642685198209e-07, "loss": 0.8102, "step": 8574 }, { "epoch": 0.87, "grad_norm": 1.6366081161488895, "learning_rate": 8.43939148641627e-07, "loss": 0.7074, "step": 8575 }, { "epoch": 0.87, "grad_norm": 1.6256476717105108, "learning_rate": 8.426150225106544e-07, "loss": 0.7085, "step": 8576 }, { "epoch": 0.87, "grad_norm": 1.6823461597388514, "learning_rate": 8.412918902706169e-07, "loss": 0.8242, "step": 8577 }, { "epoch": 0.87, "grad_norm": 1.599186246228301, "learning_rate": 8.399697520651163e-07, "loss": 0.6813, "step": 8578 }, { "epoch": 0.87, "grad_norm": 2.0396075459147673, "learning_rate": 8.386486080376532e-07, "loss": 0.736, "step": 8579 }, { "epoch": 0.87, "grad_norm": 1.8394322681952746, "learning_rate": 8.373284583316176e-07, "loss": 0.6502, "step": 8580 }, { "epoch": 0.87, "grad_norm": 1.682872999505341, "learning_rate": 8.360093030902871e-07, "loss": 0.6431, "step": 8581 }, { "epoch": 0.87, "grad_norm": 1.7358414224590166, "learning_rate": 8.346911424568394e-07, "loss": 0.5806, "step": 8582 }, { "epoch": 0.87, "grad_norm": 1.7439800889886863, "learning_rate": 8.333739765743399e-07, "loss": 0.7109, "step": 8583 }, { "epoch": 0.87, "grad_norm": 1.699991984763425, "learning_rate": 8.320578055857432e-07, "loss": 0.7801, "step": 8584 }, { "epoch": 0.87, "grad_norm": 1.6099028565865006, "learning_rate": 8.307426296339017e-07, "loss": 0.6203, "step": 8585 }, { "epoch": 0.87, "grad_norm": 1.6382126778019257, "learning_rate": 8.294284488615578e-07, "loss": 0.6541, "step": 8586 }, { "epoch": 0.87, "grad_norm": 1.74805337997645, "learning_rate": 8.281152634113432e-07, "loss": 0.7318, "step": 8587 }, { "epoch": 0.87, "grad_norm": 1.6270872487229284, "learning_rate": 8.268030734257848e-07, "loss": 0.6896, "step": 8588 }, { "epoch": 0.87, "grad_norm": 1.5915860465129363, "learning_rate": 8.254918790472999e-07, "loss": 0.6669, "step": 8589 }, { "epoch": 0.87, "grad_norm": 1.5851282974358303, "learning_rate": 8.241816804181968e-07, "loss": 0.5469, "step": 8590 }, { "epoch": 0.87, "grad_norm": 1.6259615558647373, "learning_rate": 8.228724776806818e-07, "loss": 0.666, "step": 8591 }, { "epoch": 0.87, "grad_norm": 1.501919932252582, "learning_rate": 8.215642709768423e-07, "loss": 0.7237, "step": 8592 }, { "epoch": 0.87, "grad_norm": 1.5567954460938576, "learning_rate": 8.20257060448667e-07, "loss": 0.6374, "step": 8593 }, { "epoch": 0.87, "grad_norm": 1.837719264984136, "learning_rate": 8.189508462380335e-07, "loss": 0.7212, "step": 8594 }, { "epoch": 0.87, "grad_norm": 1.61006922062226, "learning_rate": 8.176456284867096e-07, "loss": 0.7692, "step": 8595 }, { "epoch": 0.87, "grad_norm": 1.5625071262859511, "learning_rate": 8.163414073363562e-07, "loss": 0.7378, "step": 8596 }, { "epoch": 0.87, "grad_norm": 1.731467292703643, "learning_rate": 8.150381829285282e-07, "loss": 0.6837, "step": 8597 }, { "epoch": 0.87, "grad_norm": 1.7203878203226242, "learning_rate": 8.137359554046676e-07, "loss": 0.6837, "step": 8598 }, { "epoch": 0.87, "grad_norm": 1.7322407343324207, "learning_rate": 8.124347249061115e-07, "loss": 0.5974, "step": 8599 }, { "epoch": 0.87, "grad_norm": 1.708328769120278, "learning_rate": 8.111344915740893e-07, "loss": 0.7015, "step": 8600 }, { "epoch": 0.87, "grad_norm": 1.492761598293974, "learning_rate": 8.098352555497202e-07, "loss": 0.6527, "step": 8601 }, { "epoch": 0.87, "grad_norm": 1.4512688790266, "learning_rate": 8.085370169740169e-07, "loss": 0.602, "step": 8602 }, { "epoch": 0.88, "grad_norm": 1.5763027777487575, "learning_rate": 8.072397759878803e-07, "loss": 0.6192, "step": 8603 }, { "epoch": 0.88, "grad_norm": 1.7378737574428034, "learning_rate": 8.059435327321074e-07, "loss": 0.7466, "step": 8604 }, { "epoch": 0.88, "grad_norm": 1.7063685731013865, "learning_rate": 8.046482873473871e-07, "loss": 0.6301, "step": 8605 }, { "epoch": 0.88, "grad_norm": 1.7740020706333472, "learning_rate": 8.033540399742945e-07, "loss": 0.7094, "step": 8606 }, { "epoch": 0.88, "grad_norm": 1.5270984069291418, "learning_rate": 8.020607907533017e-07, "loss": 0.6618, "step": 8607 }, { "epoch": 0.88, "grad_norm": 1.597389958525877, "learning_rate": 8.0076853982477e-07, "loss": 0.699, "step": 8608 }, { "epoch": 0.88, "grad_norm": 1.5897788497630603, "learning_rate": 7.994772873289536e-07, "loss": 0.76, "step": 8609 }, { "epoch": 0.88, "grad_norm": 1.6962932147366152, "learning_rate": 7.981870334059983e-07, "loss": 0.6271, "step": 8610 }, { "epoch": 0.88, "grad_norm": 1.667922649893392, "learning_rate": 7.968977781959387e-07, "loss": 0.6334, "step": 8611 }, { "epoch": 0.88, "grad_norm": 1.5968713655185358, "learning_rate": 7.95609521838705e-07, "loss": 0.6992, "step": 8612 }, { "epoch": 0.88, "grad_norm": 1.5791815893923733, "learning_rate": 7.943222644741189e-07, "loss": 0.6811, "step": 8613 }, { "epoch": 0.88, "grad_norm": 1.7572013151258459, "learning_rate": 7.930360062418896e-07, "loss": 0.6329, "step": 8614 }, { "epoch": 0.88, "grad_norm": 1.6938162903103777, "learning_rate": 7.91750747281621e-07, "loss": 0.7315, "step": 8615 }, { "epoch": 0.88, "grad_norm": 1.6832955244066146, "learning_rate": 7.904664877328072e-07, "loss": 0.6837, "step": 8616 }, { "epoch": 0.88, "grad_norm": 1.8860479223720243, "learning_rate": 7.891832277348344e-07, "loss": 0.7744, "step": 8617 }, { "epoch": 0.88, "grad_norm": 1.7369431625286054, "learning_rate": 7.879009674269845e-07, "loss": 0.7475, "step": 8618 }, { "epoch": 0.88, "grad_norm": 1.5136845094116347, "learning_rate": 7.866197069484205e-07, "loss": 0.6582, "step": 8619 }, { "epoch": 0.88, "grad_norm": 1.7750629918797527, "learning_rate": 7.85339446438208e-07, "loss": 0.7294, "step": 8620 }, { "epoch": 0.88, "grad_norm": 1.587406446054958, "learning_rate": 7.840601860352947e-07, "loss": 0.6335, "step": 8621 }, { "epoch": 0.88, "grad_norm": 1.6404575019637098, "learning_rate": 7.827819258785285e-07, "loss": 0.7823, "step": 8622 }, { "epoch": 0.88, "grad_norm": 1.5917314155579665, "learning_rate": 7.815046661066439e-07, "loss": 0.6375, "step": 8623 }, { "epoch": 0.88, "grad_norm": 1.8137847228022808, "learning_rate": 7.802284068582655e-07, "loss": 0.707, "step": 8624 }, { "epoch": 0.88, "grad_norm": 1.6504081878106207, "learning_rate": 7.789531482719148e-07, "loss": 0.6542, "step": 8625 }, { "epoch": 0.88, "grad_norm": 1.623803955785065, "learning_rate": 7.776788904859956e-07, "loss": 0.6638, "step": 8626 }, { "epoch": 0.88, "grad_norm": 1.6243223053097606, "learning_rate": 7.764056336388127e-07, "loss": 0.6989, "step": 8627 }, { "epoch": 0.88, "grad_norm": 1.798107487378668, "learning_rate": 7.751333778685588e-07, "loss": 0.814, "step": 8628 }, { "epoch": 0.88, "grad_norm": 1.6166916424175415, "learning_rate": 7.738621233133148e-07, "loss": 0.6075, "step": 8629 }, { "epoch": 0.88, "grad_norm": 1.4663527224002506, "learning_rate": 7.725918701110557e-07, "loss": 0.5568, "step": 8630 }, { "epoch": 0.88, "grad_norm": 1.78406648879962, "learning_rate": 7.713226183996513e-07, "loss": 0.7568, "step": 8631 }, { "epoch": 0.88, "grad_norm": 1.718185309054279, "learning_rate": 7.700543683168537e-07, "loss": 0.6125, "step": 8632 }, { "epoch": 0.88, "grad_norm": 1.6783471847093223, "learning_rate": 7.687871200003172e-07, "loss": 0.5707, "step": 8633 }, { "epoch": 0.88, "grad_norm": 1.8218426363881475, "learning_rate": 7.675208735875761e-07, "loss": 0.6897, "step": 8634 }, { "epoch": 0.88, "grad_norm": 1.7452221894583109, "learning_rate": 7.662556292160639e-07, "loss": 0.7302, "step": 8635 }, { "epoch": 0.88, "grad_norm": 1.8217020412534808, "learning_rate": 7.649913870231063e-07, "loss": 0.8073, "step": 8636 }, { "epoch": 0.88, "grad_norm": 1.9268201950458284, "learning_rate": 7.637281471459124e-07, "loss": 0.6536, "step": 8637 }, { "epoch": 0.88, "grad_norm": 1.7537251047143585, "learning_rate": 7.624659097215903e-07, "loss": 0.7079, "step": 8638 }, { "epoch": 0.88, "grad_norm": 1.7704002735144673, "learning_rate": 7.612046748871327e-07, "loss": 0.6428, "step": 8639 }, { "epoch": 0.88, "grad_norm": 1.7637132366632193, "learning_rate": 7.599444427794301e-07, "loss": 0.6163, "step": 8640 }, { "epoch": 0.88, "grad_norm": 1.6060984338985305, "learning_rate": 7.586852135352607e-07, "loss": 0.6644, "step": 8641 }, { "epoch": 0.88, "grad_norm": 1.557104348665908, "learning_rate": 7.574269872912921e-07, "loss": 0.6215, "step": 8642 }, { "epoch": 0.88, "grad_norm": 1.5334753071785745, "learning_rate": 7.561697641840882e-07, "loss": 0.7568, "step": 8643 }, { "epoch": 0.88, "grad_norm": 1.7956769060024518, "learning_rate": 7.549135443500966e-07, "loss": 0.653, "step": 8644 }, { "epoch": 0.88, "grad_norm": 1.6257566675549924, "learning_rate": 7.536583279256638e-07, "loss": 0.712, "step": 8645 }, { "epoch": 0.88, "grad_norm": 1.7807779192366155, "learning_rate": 7.524041150470241e-07, "loss": 0.7125, "step": 8646 }, { "epoch": 0.88, "grad_norm": 1.595006510159101, "learning_rate": 7.511509058502997e-07, "loss": 0.6043, "step": 8647 }, { "epoch": 0.88, "grad_norm": 1.7246952203941068, "learning_rate": 7.498987004715108e-07, "loss": 0.67, "step": 8648 }, { "epoch": 0.88, "grad_norm": 1.7245351538870213, "learning_rate": 7.486474990465598e-07, "loss": 0.6304, "step": 8649 }, { "epoch": 0.88, "grad_norm": 1.6393893794984247, "learning_rate": 7.47397301711249e-07, "loss": 0.5836, "step": 8650 }, { "epoch": 0.88, "grad_norm": 1.6812533149537447, "learning_rate": 7.461481086012679e-07, "loss": 0.796, "step": 8651 }, { "epoch": 0.88, "grad_norm": 1.6652425440486422, "learning_rate": 7.448999198521934e-07, "loss": 0.5835, "step": 8652 }, { "epoch": 0.88, "grad_norm": 1.7745124037983675, "learning_rate": 7.436527355994994e-07, "loss": 0.7556, "step": 8653 }, { "epoch": 0.88, "grad_norm": 1.6955759539400153, "learning_rate": 7.424065559785498e-07, "loss": 0.758, "step": 8654 }, { "epoch": 0.88, "grad_norm": 1.7262393580932531, "learning_rate": 7.411613811245944e-07, "loss": 0.696, "step": 8655 }, { "epoch": 0.88, "grad_norm": 1.7838213988096059, "learning_rate": 7.399172111727804e-07, "loss": 0.6724, "step": 8656 }, { "epoch": 0.88, "grad_norm": 1.7588142980376191, "learning_rate": 7.386740462581399e-07, "loss": 0.7651, "step": 8657 }, { "epoch": 0.88, "grad_norm": 1.5901706489580727, "learning_rate": 7.374318865156005e-07, "loss": 0.6707, "step": 8658 }, { "epoch": 0.88, "grad_norm": 1.7232413404704006, "learning_rate": 7.361907320799811e-07, "loss": 0.6283, "step": 8659 }, { "epoch": 0.88, "grad_norm": 1.6051860022865811, "learning_rate": 7.349505830859871e-07, "loss": 0.5946, "step": 8660 }, { "epoch": 0.88, "grad_norm": 1.5686212020624424, "learning_rate": 7.337114396682187e-07, "loss": 0.666, "step": 8661 }, { "epoch": 0.88, "grad_norm": 1.7657274710165263, "learning_rate": 7.324733019611641e-07, "loss": 0.6962, "step": 8662 }, { "epoch": 0.88, "grad_norm": 1.8283228142017602, "learning_rate": 7.312361700992043e-07, "loss": 0.7268, "step": 8663 }, { "epoch": 0.88, "grad_norm": 1.545472626591203, "learning_rate": 7.300000442166133e-07, "loss": 0.7287, "step": 8664 }, { "epoch": 0.88, "grad_norm": 1.5451732294601264, "learning_rate": 7.287649244475481e-07, "loss": 0.6112, "step": 8665 }, { "epoch": 0.88, "grad_norm": 1.6576688661857788, "learning_rate": 7.27530810926067e-07, "loss": 0.6033, "step": 8666 }, { "epoch": 0.88, "grad_norm": 1.8983431471455487, "learning_rate": 7.262977037861096e-07, "loss": 0.6632, "step": 8667 }, { "epoch": 0.88, "grad_norm": 1.539195718199749, "learning_rate": 7.250656031615111e-07, "loss": 0.6502, "step": 8668 }, { "epoch": 0.88, "grad_norm": 1.6100641613515339, "learning_rate": 7.23834509186e-07, "loss": 0.6516, "step": 8669 }, { "epoch": 0.88, "grad_norm": 1.712817735434763, "learning_rate": 7.226044219931883e-07, "loss": 0.6382, "step": 8670 }, { "epoch": 0.88, "grad_norm": 1.75885847360641, "learning_rate": 7.213753417165836e-07, "loss": 0.7896, "step": 8671 }, { "epoch": 0.88, "grad_norm": 1.7738662827225833, "learning_rate": 7.201472684895872e-07, "loss": 0.7514, "step": 8672 }, { "epoch": 0.88, "grad_norm": 1.5742849695052534, "learning_rate": 7.189202024454811e-07, "loss": 0.6635, "step": 8673 }, { "epoch": 0.88, "grad_norm": 1.6258377366677337, "learning_rate": 7.176941437174489e-07, "loss": 0.6817, "step": 8674 }, { "epoch": 0.88, "grad_norm": 1.6906760253031172, "learning_rate": 7.164690924385576e-07, "loss": 0.7801, "step": 8675 }, { "epoch": 0.88, "grad_norm": 1.5194349350543774, "learning_rate": 7.152450487417673e-07, "loss": 0.6269, "step": 8676 }, { "epoch": 0.88, "grad_norm": 1.6111332506431482, "learning_rate": 7.140220127599318e-07, "loss": 0.5915, "step": 8677 }, { "epoch": 0.88, "grad_norm": 1.6512927420232688, "learning_rate": 7.127999846257893e-07, "loss": 0.6012, "step": 8678 }, { "epoch": 0.88, "grad_norm": 1.5815162841761377, "learning_rate": 7.115789644719728e-07, "loss": 0.6751, "step": 8679 }, { "epoch": 0.88, "grad_norm": 1.5351316643915993, "learning_rate": 7.103589524310051e-07, "loss": 0.608, "step": 8680 }, { "epoch": 0.88, "grad_norm": 1.7310893799644735, "learning_rate": 7.09139948635299e-07, "loss": 0.7092, "step": 8681 }, { "epoch": 0.88, "grad_norm": 1.744228287786194, "learning_rate": 7.079219532171599e-07, "loss": 0.722, "step": 8682 }, { "epoch": 0.88, "grad_norm": 1.6453074599655961, "learning_rate": 7.067049663087788e-07, "loss": 0.7212, "step": 8683 }, { "epoch": 0.88, "grad_norm": 1.721431416393383, "learning_rate": 7.054889880422433e-07, "loss": 0.6608, "step": 8684 }, { "epoch": 0.88, "grad_norm": 1.603757868663997, "learning_rate": 7.0427401854953e-07, "loss": 0.717, "step": 8685 }, { "epoch": 0.88, "grad_norm": 1.9247365371113263, "learning_rate": 7.030600579625014e-07, "loss": 0.6379, "step": 8686 }, { "epoch": 0.88, "grad_norm": 1.8475447505522269, "learning_rate": 7.018471064129162e-07, "loss": 0.7608, "step": 8687 }, { "epoch": 0.88, "grad_norm": 1.6599123438996048, "learning_rate": 7.006351640324215e-07, "loss": 0.7324, "step": 8688 }, { "epoch": 0.88, "grad_norm": 1.5674875844829181, "learning_rate": 6.994242309525523e-07, "loss": 0.7213, "step": 8689 }, { "epoch": 0.88, "grad_norm": 1.9027885332859753, "learning_rate": 6.982143073047387e-07, "loss": 0.7656, "step": 8690 }, { "epoch": 0.88, "grad_norm": 1.8030207958156752, "learning_rate": 6.970053932202991e-07, "loss": 0.686, "step": 8691 }, { "epoch": 0.88, "grad_norm": 1.7440038038627508, "learning_rate": 6.957974888304408e-07, "loss": 0.7075, "step": 8692 }, { "epoch": 0.88, "grad_norm": 1.9613385767698652, "learning_rate": 6.945905942662646e-07, "loss": 0.7465, "step": 8693 }, { "epoch": 0.88, "grad_norm": 1.828627449456084, "learning_rate": 6.933847096587575e-07, "loss": 0.7298, "step": 8694 }, { "epoch": 0.88, "grad_norm": 1.552784833683063, "learning_rate": 6.92179835138802e-07, "loss": 0.6045, "step": 8695 }, { "epoch": 0.88, "grad_norm": 1.7410167562173768, "learning_rate": 6.909759708371678e-07, "loss": 0.6272, "step": 8696 }, { "epoch": 0.88, "grad_norm": 1.5793295105967549, "learning_rate": 6.897731168845145e-07, "loss": 0.6606, "step": 8697 }, { "epoch": 0.88, "grad_norm": 1.6323565095980503, "learning_rate": 6.885712734113925e-07, "loss": 0.6883, "step": 8698 }, { "epoch": 0.88, "grad_norm": 1.460791374584712, "learning_rate": 6.873704405482473e-07, "loss": 0.6915, "step": 8699 }, { "epoch": 0.88, "grad_norm": 1.5657541778249906, "learning_rate": 6.861706184254046e-07, "loss": 0.6578, "step": 8700 }, { "epoch": 0.89, "grad_norm": 1.754491594995093, "learning_rate": 6.849718071730905e-07, "loss": 0.6067, "step": 8701 }, { "epoch": 0.89, "grad_norm": 1.697738919853421, "learning_rate": 6.837740069214161e-07, "loss": 0.6951, "step": 8702 }, { "epoch": 0.89, "grad_norm": 1.6302173531920074, "learning_rate": 6.825772178003831e-07, "loss": 0.7389, "step": 8703 }, { "epoch": 0.89, "grad_norm": 1.9243450934911965, "learning_rate": 6.813814399398855e-07, "loss": 0.745, "step": 8704 }, { "epoch": 0.89, "grad_norm": 1.7761708127088411, "learning_rate": 6.80186673469706e-07, "loss": 0.7524, "step": 8705 }, { "epoch": 0.89, "grad_norm": 1.614586091489057, "learning_rate": 6.789929185195166e-07, "loss": 0.7115, "step": 8706 }, { "epoch": 0.89, "grad_norm": 1.676059358685063, "learning_rate": 6.778001752188823e-07, "loss": 0.6826, "step": 8707 }, { "epoch": 0.89, "grad_norm": 1.8001808657182368, "learning_rate": 6.766084436972564e-07, "loss": 0.6911, "step": 8708 }, { "epoch": 0.89, "grad_norm": 1.7749660708941688, "learning_rate": 6.75417724083981e-07, "loss": 0.6824, "step": 8709 }, { "epoch": 0.89, "grad_norm": 1.716846862284347, "learning_rate": 6.742280165082937e-07, "loss": 0.6694, "step": 8710 }, { "epoch": 0.89, "grad_norm": 1.6762582029546438, "learning_rate": 6.730393210993147e-07, "loss": 0.6982, "step": 8711 }, { "epoch": 0.89, "grad_norm": 1.7126512371411937, "learning_rate": 6.718516379860595e-07, "loss": 0.6423, "step": 8712 }, { "epoch": 0.89, "grad_norm": 1.6213435185517746, "learning_rate": 6.706649672974353e-07, "loss": 0.6024, "step": 8713 }, { "epoch": 0.89, "grad_norm": 1.7864519428301608, "learning_rate": 6.694793091622331e-07, "loss": 0.7844, "step": 8714 }, { "epoch": 0.89, "grad_norm": 1.778521136186733, "learning_rate": 6.682946637091404e-07, "loss": 0.6831, "step": 8715 }, { "epoch": 0.89, "grad_norm": 1.54754015717152, "learning_rate": 6.671110310667283e-07, "loss": 0.7726, "step": 8716 }, { "epoch": 0.89, "grad_norm": 1.6000257477234023, "learning_rate": 6.659284113634634e-07, "loss": 0.6161, "step": 8717 }, { "epoch": 0.89, "grad_norm": 1.5372503227345617, "learning_rate": 6.647468047277029e-07, "loss": 0.5754, "step": 8718 }, { "epoch": 0.89, "grad_norm": 1.704517683947603, "learning_rate": 6.635662112876884e-07, "loss": 0.6452, "step": 8719 }, { "epoch": 0.89, "grad_norm": 1.6588122662892621, "learning_rate": 6.623866311715576e-07, "loss": 0.7267, "step": 8720 }, { "epoch": 0.89, "grad_norm": 1.459853858961382, "learning_rate": 6.612080645073316e-07, "loss": 0.5757, "step": 8721 }, { "epoch": 0.89, "grad_norm": 1.5936972799654623, "learning_rate": 6.600305114229288e-07, "loss": 0.6597, "step": 8722 }, { "epoch": 0.89, "grad_norm": 1.8774277055499191, "learning_rate": 6.588539720461551e-07, "loss": 0.7892, "step": 8723 }, { "epoch": 0.89, "grad_norm": 1.7669374648720841, "learning_rate": 6.576784465047014e-07, "loss": 0.7177, "step": 8724 }, { "epoch": 0.89, "grad_norm": 1.560008410701888, "learning_rate": 6.565039349261548e-07, "loss": 0.6919, "step": 8725 }, { "epoch": 0.89, "grad_norm": 1.7644269109338624, "learning_rate": 6.55330437437991e-07, "loss": 0.6835, "step": 8726 }, { "epoch": 0.89, "grad_norm": 1.5372222677673302, "learning_rate": 6.541579541675736e-07, "loss": 0.5947, "step": 8727 }, { "epoch": 0.89, "grad_norm": 1.8259938415601342, "learning_rate": 6.529864852421586e-07, "loss": 0.7568, "step": 8728 }, { "epoch": 0.89, "grad_norm": 1.4769277739964026, "learning_rate": 6.518160307888877e-07, "loss": 0.5646, "step": 8729 }, { "epoch": 0.89, "grad_norm": 1.7987995664627618, "learning_rate": 6.506465909347981e-07, "loss": 0.6142, "step": 8730 }, { "epoch": 0.89, "grad_norm": 1.477685080019239, "learning_rate": 6.49478165806815e-07, "loss": 0.5199, "step": 8731 }, { "epoch": 0.89, "grad_norm": 1.7147832264792515, "learning_rate": 6.483107555317491e-07, "loss": 0.6818, "step": 8732 }, { "epoch": 0.89, "grad_norm": 1.7458082121307719, "learning_rate": 6.471443602363093e-07, "loss": 0.7152, "step": 8733 }, { "epoch": 0.89, "grad_norm": 1.745055777773359, "learning_rate": 6.45978980047085e-07, "loss": 0.5758, "step": 8734 }, { "epoch": 0.89, "grad_norm": 1.6966452521157125, "learning_rate": 6.44814615090561e-07, "loss": 0.6355, "step": 8735 }, { "epoch": 0.89, "grad_norm": 1.633730183689762, "learning_rate": 6.436512654931138e-07, "loss": 0.6368, "step": 8736 }, { "epoch": 0.89, "grad_norm": 1.6072410354623625, "learning_rate": 6.424889313810034e-07, "loss": 0.6961, "step": 8737 }, { "epoch": 0.89, "grad_norm": 1.8507298257170395, "learning_rate": 6.413276128803858e-07, "loss": 0.6825, "step": 8738 }, { "epoch": 0.89, "grad_norm": 1.746313217113024, "learning_rate": 6.401673101173012e-07, "loss": 0.7445, "step": 8739 }, { "epoch": 0.89, "grad_norm": 1.6390561996209958, "learning_rate": 6.390080232176832e-07, "loss": 0.6135, "step": 8740 }, { "epoch": 0.89, "grad_norm": 1.5743849846487263, "learning_rate": 6.378497523073568e-07, "loss": 0.6436, "step": 8741 }, { "epoch": 0.89, "grad_norm": 1.9545625466793428, "learning_rate": 6.366924975120303e-07, "loss": 0.7543, "step": 8742 }, { "epoch": 0.89, "grad_norm": 1.6648504342011303, "learning_rate": 6.355362589573078e-07, "loss": 0.6485, "step": 8743 }, { "epoch": 0.89, "grad_norm": 1.716423230872207, "learning_rate": 6.34381036768682e-07, "loss": 0.6911, "step": 8744 }, { "epoch": 0.89, "grad_norm": 1.65445994435496, "learning_rate": 6.332268310715306e-07, "loss": 0.7274, "step": 8745 }, { "epoch": 0.89, "grad_norm": 1.6244381998771817, "learning_rate": 6.320736419911289e-07, "loss": 0.5986, "step": 8746 }, { "epoch": 0.89, "grad_norm": 1.893230528199649, "learning_rate": 6.309214696526345e-07, "loss": 0.7471, "step": 8747 }, { "epoch": 0.89, "grad_norm": 1.6760771995237522, "learning_rate": 6.297703141810973e-07, "loss": 0.7281, "step": 8748 }, { "epoch": 0.89, "grad_norm": 1.758727507648121, "learning_rate": 6.286201757014609e-07, "loss": 0.673, "step": 8749 }, { "epoch": 0.89, "grad_norm": 1.6252006539545014, "learning_rate": 6.274710543385498e-07, "loss": 0.6644, "step": 8750 }, { "epoch": 0.89, "grad_norm": 1.711993065300969, "learning_rate": 6.263229502170887e-07, "loss": 0.72, "step": 8751 }, { "epoch": 0.89, "grad_norm": 1.598204332420291, "learning_rate": 6.251758634616811e-07, "loss": 0.6851, "step": 8752 }, { "epoch": 0.89, "grad_norm": 1.7099373984607578, "learning_rate": 6.240297941968276e-07, "loss": 0.6626, "step": 8753 }, { "epoch": 0.89, "grad_norm": 2.067578511288023, "learning_rate": 6.228847425469176e-07, "loss": 0.7337, "step": 8754 }, { "epoch": 0.89, "grad_norm": 1.6329116330234184, "learning_rate": 6.217407086362259e-07, "loss": 0.5908, "step": 8755 }, { "epoch": 0.89, "grad_norm": 1.8695458114514938, "learning_rate": 6.205976925889223e-07, "loss": 0.6287, "step": 8756 }, { "epoch": 0.89, "grad_norm": 1.6009515502837723, "learning_rate": 6.194556945290597e-07, "loss": 0.5986, "step": 8757 }, { "epoch": 0.89, "grad_norm": 1.7133996021572895, "learning_rate": 6.183147145805868e-07, "loss": 0.6671, "step": 8758 }, { "epoch": 0.89, "grad_norm": 1.547406379390001, "learning_rate": 6.171747528673399e-07, "loss": 0.5963, "step": 8759 }, { "epoch": 0.89, "grad_norm": 1.8441570176178421, "learning_rate": 6.160358095130414e-07, "loss": 0.7358, "step": 8760 }, { "epoch": 0.89, "grad_norm": 1.5426579135934184, "learning_rate": 6.148978846413067e-07, "loss": 0.6821, "step": 8761 }, { "epoch": 0.89, "grad_norm": 1.5955286906448458, "learning_rate": 6.137609783756415e-07, "loss": 0.703, "step": 8762 }, { "epoch": 0.89, "grad_norm": 1.9207348627068204, "learning_rate": 6.12625090839436e-07, "loss": 0.761, "step": 8763 }, { "epoch": 0.89, "grad_norm": 1.6145845753468058, "learning_rate": 6.11490222155976e-07, "loss": 0.7207, "step": 8764 }, { "epoch": 0.89, "grad_norm": 1.8111292561758374, "learning_rate": 6.103563724484318e-07, "loss": 0.5999, "step": 8765 }, { "epoch": 0.89, "grad_norm": 1.6732627046727127, "learning_rate": 6.092235418398651e-07, "loss": 0.6734, "step": 8766 }, { "epoch": 0.89, "grad_norm": 1.8411285377702618, "learning_rate": 6.080917304532297e-07, "loss": 0.6682, "step": 8767 }, { "epoch": 0.89, "grad_norm": 1.6387647062242647, "learning_rate": 6.069609384113617e-07, "loss": 0.629, "step": 8768 }, { "epoch": 0.89, "grad_norm": 1.6815980212566355, "learning_rate": 6.058311658369954e-07, "loss": 0.6013, "step": 8769 }, { "epoch": 0.89, "grad_norm": 1.706867933567596, "learning_rate": 6.047024128527456e-07, "loss": 0.7289, "step": 8770 }, { "epoch": 0.89, "grad_norm": 1.716827005321816, "learning_rate": 6.035746795811226e-07, "loss": 0.6986, "step": 8771 }, { "epoch": 0.89, "grad_norm": 1.5820734195546344, "learning_rate": 6.024479661445271e-07, "loss": 0.6539, "step": 8772 }, { "epoch": 0.89, "grad_norm": 1.6433261887082913, "learning_rate": 6.013222726652412e-07, "loss": 0.5254, "step": 8773 }, { "epoch": 0.89, "grad_norm": 1.6064092532647984, "learning_rate": 6.001975992654452e-07, "loss": 0.5876, "step": 8774 }, { "epoch": 0.89, "grad_norm": 1.6371089009253044, "learning_rate": 5.990739460672024e-07, "loss": 0.6335, "step": 8775 }, { "epoch": 0.89, "grad_norm": 1.9318912881798678, "learning_rate": 5.979513131924686e-07, "loss": 0.7516, "step": 8776 }, { "epoch": 0.89, "grad_norm": 1.696039269204613, "learning_rate": 5.968297007630897e-07, "loss": 0.7741, "step": 8777 }, { "epoch": 0.89, "grad_norm": 1.704697818598336, "learning_rate": 5.95709108900796e-07, "loss": 0.6288, "step": 8778 }, { "epoch": 0.89, "grad_norm": 1.6282649929871835, "learning_rate": 5.945895377272115e-07, "loss": 0.6602, "step": 8779 }, { "epoch": 0.89, "grad_norm": 1.7299319787649439, "learning_rate": 5.9347098736385e-07, "loss": 0.6479, "step": 8780 }, { "epoch": 0.89, "grad_norm": 1.5855476532183754, "learning_rate": 5.9235345793211e-07, "loss": 0.7551, "step": 8781 }, { "epoch": 0.89, "grad_norm": 1.9476339944739738, "learning_rate": 5.912369495532844e-07, "loss": 0.8062, "step": 8782 }, { "epoch": 0.89, "grad_norm": 1.8287390115049857, "learning_rate": 5.901214623485507e-07, "loss": 0.7113, "step": 8783 }, { "epoch": 0.89, "grad_norm": 1.7171646753440621, "learning_rate": 5.890069964389766e-07, "loss": 0.8326, "step": 8784 }, { "epoch": 0.89, "grad_norm": 1.6001861328056501, "learning_rate": 5.878935519455243e-07, "loss": 0.6403, "step": 8785 }, { "epoch": 0.89, "grad_norm": 1.8358706633985116, "learning_rate": 5.867811289890357e-07, "loss": 0.6297, "step": 8786 }, { "epoch": 0.89, "grad_norm": 1.8183021473531173, "learning_rate": 5.856697276902512e-07, "loss": 0.7184, "step": 8787 }, { "epoch": 0.89, "grad_norm": 1.7328689608493777, "learning_rate": 5.845593481697931e-07, "loss": 0.6518, "step": 8788 }, { "epoch": 0.89, "grad_norm": 1.7449931725599808, "learning_rate": 5.834499905481761e-07, "loss": 0.6504, "step": 8789 }, { "epoch": 0.89, "grad_norm": 1.7985757893626173, "learning_rate": 5.823416549458061e-07, "loss": 0.768, "step": 8790 }, { "epoch": 0.89, "grad_norm": 1.7058853899108344, "learning_rate": 5.812343414829725e-07, "loss": 0.6564, "step": 8791 }, { "epoch": 0.89, "grad_norm": 1.5406487918403375, "learning_rate": 5.801280502798578e-07, "loss": 0.6524, "step": 8792 }, { "epoch": 0.89, "grad_norm": 1.7917898832172519, "learning_rate": 5.790227814565342e-07, "loss": 0.6613, "step": 8793 }, { "epoch": 0.89, "grad_norm": 1.6094745785554516, "learning_rate": 5.779185351329586e-07, "loss": 0.677, "step": 8794 }, { "epoch": 0.89, "grad_norm": 1.6648861762187483, "learning_rate": 5.76815311428982e-07, "loss": 0.7195, "step": 8795 }, { "epoch": 0.89, "grad_norm": 1.7563619296436717, "learning_rate": 5.75713110464341e-07, "loss": 0.7305, "step": 8796 }, { "epoch": 0.89, "grad_norm": 1.7070913640267633, "learning_rate": 5.746119323586619e-07, "loss": 0.5819, "step": 8797 }, { "epoch": 0.89, "grad_norm": 1.791324973188892, "learning_rate": 5.735117772314625e-07, "loss": 0.6723, "step": 8798 }, { "epoch": 0.9, "grad_norm": 1.5459587913165356, "learning_rate": 5.724126452021439e-07, "loss": 0.6511, "step": 8799 }, { "epoch": 0.9, "grad_norm": 1.7139177229387026, "learning_rate": 5.713145363900019e-07, "loss": 0.6557, "step": 8800 }, { "epoch": 0.9, "grad_norm": 1.6248645438784817, "learning_rate": 5.702174509142211e-07, "loss": 0.6548, "step": 8801 }, { "epoch": 0.9, "grad_norm": 1.5587540887616815, "learning_rate": 5.691213888938684e-07, "loss": 0.7533, "step": 8802 }, { "epoch": 0.9, "grad_norm": 1.8162267140969848, "learning_rate": 5.680263504479067e-07, "loss": 0.691, "step": 8803 }, { "epoch": 0.9, "grad_norm": 1.9258082565468186, "learning_rate": 5.669323356951873e-07, "loss": 0.7071, "step": 8804 }, { "epoch": 0.9, "grad_norm": 1.8151940950777297, "learning_rate": 5.658393447544441e-07, "loss": 0.7587, "step": 8805 }, { "epoch": 0.9, "grad_norm": 1.5083937395603395, "learning_rate": 5.647473777443057e-07, "loss": 0.6167, "step": 8806 }, { "epoch": 0.9, "grad_norm": 1.6689479666754305, "learning_rate": 5.636564347832907e-07, "loss": 0.645, "step": 8807 }, { "epoch": 0.9, "grad_norm": 1.6817830151070023, "learning_rate": 5.625665159897997e-07, "loss": 0.6561, "step": 8808 }, { "epoch": 0.9, "grad_norm": 1.5860333611721988, "learning_rate": 5.614776214821293e-07, "loss": 0.6598, "step": 8809 }, { "epoch": 0.9, "grad_norm": 1.5677057151523508, "learning_rate": 5.603897513784628e-07, "loss": 0.6577, "step": 8810 }, { "epoch": 0.9, "grad_norm": 1.7430558459350933, "learning_rate": 5.593029057968668e-07, "loss": 0.7343, "step": 8811 }, { "epoch": 0.9, "grad_norm": 1.8275938824697127, "learning_rate": 5.582170848553059e-07, "loss": 0.7346, "step": 8812 }, { "epoch": 0.9, "grad_norm": 1.6236573604172517, "learning_rate": 5.571322886716279e-07, "loss": 0.6967, "step": 8813 }, { "epoch": 0.9, "grad_norm": 1.8136108917306695, "learning_rate": 5.560485173635699e-07, "loss": 0.6979, "step": 8814 }, { "epoch": 0.9, "grad_norm": 1.5996349472778744, "learning_rate": 5.549657710487588e-07, "loss": 0.6484, "step": 8815 }, { "epoch": 0.9, "grad_norm": 1.9571357824539959, "learning_rate": 5.538840498447084e-07, "loss": 0.7165, "step": 8816 }, { "epoch": 0.9, "grad_norm": 1.7955011318416891, "learning_rate": 5.528033538688227e-07, "loss": 0.723, "step": 8817 }, { "epoch": 0.9, "grad_norm": 1.6455737570632334, "learning_rate": 5.517236832383976e-07, "loss": 0.6117, "step": 8818 }, { "epoch": 0.9, "grad_norm": 1.5258600663929245, "learning_rate": 5.506450380706108e-07, "loss": 0.5365, "step": 8819 }, { "epoch": 0.9, "grad_norm": 1.5290508706361885, "learning_rate": 5.495674184825339e-07, "loss": 0.6308, "step": 8820 }, { "epoch": 0.9, "grad_norm": 1.4450450071362761, "learning_rate": 5.484908245911269e-07, "loss": 0.5544, "step": 8821 }, { "epoch": 0.9, "grad_norm": 1.861407949886194, "learning_rate": 5.47415256513234e-07, "loss": 0.7452, "step": 8822 }, { "epoch": 0.9, "grad_norm": 1.6397086283679179, "learning_rate": 5.463407143655941e-07, "loss": 0.6864, "step": 8823 }, { "epoch": 0.9, "grad_norm": 1.6945030783026827, "learning_rate": 5.452671982648306e-07, "loss": 0.5986, "step": 8824 }, { "epoch": 0.9, "grad_norm": 2.1115877118850843, "learning_rate": 5.441947083274568e-07, "loss": 0.6233, "step": 8825 }, { "epoch": 0.9, "grad_norm": 1.6672585918040428, "learning_rate": 5.431232446698764e-07, "loss": 0.68, "step": 8826 }, { "epoch": 0.9, "grad_norm": 1.7129379266769043, "learning_rate": 5.420528074083775e-07, "loss": 0.7318, "step": 8827 }, { "epoch": 0.9, "grad_norm": 1.7850490370440566, "learning_rate": 5.409833966591416e-07, "loss": 0.7114, "step": 8828 }, { "epoch": 0.9, "grad_norm": 1.6887809896724186, "learning_rate": 5.399150125382346e-07, "loss": 0.619, "step": 8829 }, { "epoch": 0.9, "grad_norm": 1.7080850987518374, "learning_rate": 5.38847655161614e-07, "loss": 0.7035, "step": 8830 }, { "epoch": 0.9, "grad_norm": 1.5918728979806458, "learning_rate": 5.377813246451258e-07, "loss": 0.6376, "step": 8831 }, { "epoch": 0.9, "grad_norm": 1.6662792606891546, "learning_rate": 5.367160211044997e-07, "loss": 0.5893, "step": 8832 }, { "epoch": 0.9, "grad_norm": 2.052849741595967, "learning_rate": 5.356517446553632e-07, "loss": 0.6952, "step": 8833 }, { "epoch": 0.9, "grad_norm": 1.8600307295170615, "learning_rate": 5.345884954132219e-07, "loss": 0.8211, "step": 8834 }, { "epoch": 0.9, "grad_norm": 1.7060491542482932, "learning_rate": 5.335262734934777e-07, "loss": 0.7927, "step": 8835 }, { "epoch": 0.9, "grad_norm": 1.6548610817413254, "learning_rate": 5.324650790114183e-07, "loss": 0.6175, "step": 8836 }, { "epoch": 0.9, "grad_norm": 1.5534172736814402, "learning_rate": 5.314049120822173e-07, "loss": 0.6578, "step": 8837 }, { "epoch": 0.9, "grad_norm": 1.6066570013867365, "learning_rate": 5.303457728209405e-07, "loss": 0.6216, "step": 8838 }, { "epoch": 0.9, "grad_norm": 1.7314963661007452, "learning_rate": 5.292876613425435e-07, "loss": 0.5863, "step": 8839 }, { "epoch": 0.9, "grad_norm": 1.8081504341128891, "learning_rate": 5.282305777618635e-07, "loss": 0.6855, "step": 8840 }, { "epoch": 0.9, "grad_norm": 1.6112349432475361, "learning_rate": 5.271745221936342e-07, "loss": 0.7426, "step": 8841 }, { "epoch": 0.9, "grad_norm": 1.8055474774673523, "learning_rate": 5.261194947524706e-07, "loss": 0.7428, "step": 8842 }, { "epoch": 0.9, "grad_norm": 1.5931208961495853, "learning_rate": 5.2506549555288e-07, "loss": 0.6494, "step": 8843 }, { "epoch": 0.9, "grad_norm": 1.640245601508315, "learning_rate": 5.240125247092609e-07, "loss": 0.6547, "step": 8844 }, { "epoch": 0.9, "grad_norm": 1.6570113616081257, "learning_rate": 5.22960582335893e-07, "loss": 0.6057, "step": 8845 }, { "epoch": 0.9, "grad_norm": 1.7830829445931253, "learning_rate": 5.219096685469505e-07, "loss": 0.6648, "step": 8846 }, { "epoch": 0.9, "grad_norm": 1.5182028049341616, "learning_rate": 5.20859783456491e-07, "loss": 0.6532, "step": 8847 }, { "epoch": 0.9, "grad_norm": 1.6825405089912515, "learning_rate": 5.198109271784657e-07, "loss": 0.7685, "step": 8848 }, { "epoch": 0.9, "grad_norm": 1.5108201298654471, "learning_rate": 5.187630998267112e-07, "loss": 0.6859, "step": 8849 }, { "epoch": 0.9, "grad_norm": 1.7284137246283864, "learning_rate": 5.177163015149522e-07, "loss": 0.7895, "step": 8850 }, { "epoch": 0.9, "grad_norm": 1.6011817453650048, "learning_rate": 5.166705323568022e-07, "loss": 0.7195, "step": 8851 }, { "epoch": 0.9, "grad_norm": 1.6879633198914614, "learning_rate": 5.156257924657626e-07, "loss": 0.6305, "step": 8852 }, { "epoch": 0.9, "grad_norm": 1.5509576011661719, "learning_rate": 5.145820819552239e-07, "loss": 0.5572, "step": 8853 }, { "epoch": 0.9, "grad_norm": 1.6281892283913746, "learning_rate": 5.135394009384665e-07, "loss": 0.7006, "step": 8854 }, { "epoch": 0.9, "grad_norm": 1.5648948142189403, "learning_rate": 5.124977495286543e-07, "loss": 0.634, "step": 8855 }, { "epoch": 0.9, "grad_norm": 1.6985997006874525, "learning_rate": 5.114571278388436e-07, "loss": 0.662, "step": 8856 }, { "epoch": 0.9, "grad_norm": 1.7954802063819606, "learning_rate": 5.104175359819785e-07, "loss": 0.7451, "step": 8857 }, { "epoch": 0.9, "grad_norm": 1.6763239305455557, "learning_rate": 5.093789740708877e-07, "loss": 0.6913, "step": 8858 }, { "epoch": 0.9, "grad_norm": 1.7869358727317133, "learning_rate": 5.083414422182942e-07, "loss": 0.7111, "step": 8859 }, { "epoch": 0.9, "grad_norm": 1.7085254944971797, "learning_rate": 5.073049405368025e-07, "loss": 0.8868, "step": 8860 }, { "epoch": 0.9, "grad_norm": 1.7339717725846475, "learning_rate": 5.062694691389114e-07, "loss": 0.7482, "step": 8861 }, { "epoch": 0.9, "grad_norm": 1.5826376971671454, "learning_rate": 5.052350281370045e-07, "loss": 0.7272, "step": 8862 }, { "epoch": 0.9, "grad_norm": 1.7025735469207472, "learning_rate": 5.042016176433529e-07, "loss": 0.7245, "step": 8863 }, { "epoch": 0.9, "grad_norm": 1.7507929274931837, "learning_rate": 5.031692377701192e-07, "loss": 0.7461, "step": 8864 }, { "epoch": 0.9, "grad_norm": 1.7509772240735764, "learning_rate": 5.021378886293493e-07, "loss": 0.7327, "step": 8865 }, { "epoch": 0.9, "grad_norm": 1.5796503675823719, "learning_rate": 5.011075703329816e-07, "loss": 0.6357, "step": 8866 }, { "epoch": 0.9, "grad_norm": 1.6355729150637843, "learning_rate": 5.00078282992843e-07, "loss": 0.6922, "step": 8867 }, { "epoch": 0.9, "grad_norm": 1.9593713687066243, "learning_rate": 4.990500267206422e-07, "loss": 0.6745, "step": 8868 }, { "epoch": 0.9, "grad_norm": 1.8007290348212777, "learning_rate": 4.980228016279853e-07, "loss": 0.6228, "step": 8869 }, { "epoch": 0.9, "grad_norm": 1.5854190455395145, "learning_rate": 4.969966078263578e-07, "loss": 0.6157, "step": 8870 }, { "epoch": 0.9, "grad_norm": 1.7029363611122843, "learning_rate": 4.95971445427137e-07, "loss": 0.6451, "step": 8871 }, { "epoch": 0.9, "grad_norm": 1.7041474249843223, "learning_rate": 4.949473145415917e-07, "loss": 0.6162, "step": 8872 }, { "epoch": 0.9, "grad_norm": 1.9193299614643426, "learning_rate": 4.939242152808709e-07, "loss": 0.6908, "step": 8873 }, { "epoch": 0.9, "grad_norm": 1.6604203951924335, "learning_rate": 4.929021477560181e-07, "loss": 0.6437, "step": 8874 }, { "epoch": 0.9, "grad_norm": 1.7807366861294014, "learning_rate": 4.918811120779655e-07, "loss": 0.7067, "step": 8875 }, { "epoch": 0.9, "grad_norm": 1.8913639048565587, "learning_rate": 4.908611083575254e-07, "loss": 0.6881, "step": 8876 }, { "epoch": 0.9, "grad_norm": 1.605533038984582, "learning_rate": 4.898421367054063e-07, "loss": 0.6292, "step": 8877 }, { "epoch": 0.9, "grad_norm": 1.7706508305498252, "learning_rate": 4.888241972322005e-07, "loss": 0.7253, "step": 8878 }, { "epoch": 0.9, "grad_norm": 1.7098943647143408, "learning_rate": 4.878072900483899e-07, "loss": 0.6648, "step": 8879 }, { "epoch": 0.9, "grad_norm": 1.7194533016750646, "learning_rate": 4.86791415264346e-07, "loss": 0.7055, "step": 8880 }, { "epoch": 0.9, "grad_norm": 1.6214507345058031, "learning_rate": 4.857765729903219e-07, "loss": 0.6689, "step": 8881 }, { "epoch": 0.9, "grad_norm": 1.5346747157802176, "learning_rate": 4.84762763336466e-07, "loss": 0.641, "step": 8882 }, { "epoch": 0.9, "grad_norm": 1.5087485363749233, "learning_rate": 4.837499864128104e-07, "loss": 0.573, "step": 8883 }, { "epoch": 0.9, "grad_norm": 1.7344990112796763, "learning_rate": 4.827382423292748e-07, "loss": 0.723, "step": 8884 }, { "epoch": 0.9, "grad_norm": 1.7580447093681373, "learning_rate": 4.817275311956715e-07, "loss": 0.6838, "step": 8885 }, { "epoch": 0.9, "grad_norm": 1.7700545438137756, "learning_rate": 4.80717853121695e-07, "loss": 0.8107, "step": 8886 }, { "epoch": 0.9, "grad_norm": 1.608306797865548, "learning_rate": 4.797092082169308e-07, "loss": 0.6479, "step": 8887 }, { "epoch": 0.9, "grad_norm": 1.8590731878633593, "learning_rate": 4.787015965908504e-07, "loss": 0.7478, "step": 8888 }, { "epoch": 0.9, "grad_norm": 1.9107457527445748, "learning_rate": 4.776950183528161e-07, "loss": 0.6514, "step": 8889 }, { "epoch": 0.9, "grad_norm": 1.7168042445704972, "learning_rate": 4.766894736120753e-07, "loss": 0.6907, "step": 8890 }, { "epoch": 0.9, "grad_norm": 1.8278719101055414, "learning_rate": 4.7568496247776373e-07, "loss": 0.7917, "step": 8891 }, { "epoch": 0.9, "grad_norm": 1.530010529849326, "learning_rate": 4.746814850589054e-07, "loss": 0.6842, "step": 8892 }, { "epoch": 0.9, "grad_norm": 1.488161536929116, "learning_rate": 4.7367904146441435e-07, "loss": 0.6651, "step": 8893 }, { "epoch": 0.9, "grad_norm": 1.8409654711792904, "learning_rate": 4.7267763180308676e-07, "loss": 0.719, "step": 8894 }, { "epoch": 0.9, "grad_norm": 1.883523084424903, "learning_rate": 4.716772561836136e-07, "loss": 0.7707, "step": 8895 }, { "epoch": 0.9, "grad_norm": 1.771972446027783, "learning_rate": 4.706779147145657e-07, "loss": 0.6725, "step": 8896 }, { "epoch": 0.9, "grad_norm": 1.6792519467501912, "learning_rate": 4.6967960750440855e-07, "loss": 0.6237, "step": 8897 }, { "epoch": 0.91, "grad_norm": 1.7675836119266148, "learning_rate": 4.6868233466149327e-07, "loss": 0.7123, "step": 8898 }, { "epoch": 0.91, "grad_norm": 1.6269745798391906, "learning_rate": 4.676860962940577e-07, "loss": 0.6876, "step": 8899 }, { "epoch": 0.91, "grad_norm": 1.7769618813041788, "learning_rate": 4.6669089251022757e-07, "loss": 0.6378, "step": 8900 }, { "epoch": 0.91, "grad_norm": 1.5590363379953227, "learning_rate": 4.6569672341801544e-07, "loss": 0.7152, "step": 8901 }, { "epoch": 0.91, "grad_norm": 1.8800667696896778, "learning_rate": 4.647035891253249e-07, "loss": 0.68, "step": 8902 }, { "epoch": 0.91, "grad_norm": 1.7554247388764033, "learning_rate": 4.637114897399453e-07, "loss": 0.7968, "step": 8903 }, { "epoch": 0.91, "grad_norm": 1.5907387453462725, "learning_rate": 4.6272042536955164e-07, "loss": 0.7354, "step": 8904 }, { "epoch": 0.91, "grad_norm": 1.718431267879274, "learning_rate": 4.61730396121709e-07, "loss": 0.6396, "step": 8905 }, { "epoch": 0.91, "grad_norm": 1.7295315815770955, "learning_rate": 4.607414021038714e-07, "loss": 0.695, "step": 8906 }, { "epoch": 0.91, "grad_norm": 1.66657712292234, "learning_rate": 4.5975344342337525e-07, "loss": 0.646, "step": 8907 }, { "epoch": 0.91, "grad_norm": 1.620845024767798, "learning_rate": 4.5876652018745136e-07, "loss": 0.6812, "step": 8908 }, { "epoch": 0.91, "grad_norm": 1.5818359888527045, "learning_rate": 4.5778063250321415e-07, "loss": 0.6347, "step": 8909 }, { "epoch": 0.91, "grad_norm": 1.575993128204435, "learning_rate": 4.567957804776646e-07, "loss": 0.6201, "step": 8910 }, { "epoch": 0.91, "grad_norm": 1.71544634015368, "learning_rate": 4.558119642176939e-07, "loss": 0.6866, "step": 8911 }, { "epoch": 0.91, "grad_norm": 1.8006115963235978, "learning_rate": 4.548291838300811e-07, "loss": 0.6697, "step": 8912 }, { "epoch": 0.91, "grad_norm": 1.6565075655767947, "learning_rate": 4.5384743942148977e-07, "loss": 0.7009, "step": 8913 }, { "epoch": 0.91, "grad_norm": 1.7720099961704778, "learning_rate": 4.5286673109847357e-07, "loss": 0.6587, "step": 8914 }, { "epoch": 0.91, "grad_norm": 1.7665613437823307, "learning_rate": 4.518870589674751e-07, "loss": 0.6843, "step": 8915 }, { "epoch": 0.91, "grad_norm": 1.6085851494760013, "learning_rate": 4.509084231348182e-07, "loss": 0.6389, "step": 8916 }, { "epoch": 0.91, "grad_norm": 1.5926002358110811, "learning_rate": 4.499308237067213e-07, "loss": 0.7338, "step": 8917 }, { "epoch": 0.91, "grad_norm": 1.6111636451724387, "learning_rate": 4.489542607892894e-07, "loss": 0.6652, "step": 8918 }, { "epoch": 0.91, "grad_norm": 1.7833895836947011, "learning_rate": 4.479787344885078e-07, "loss": 0.8026, "step": 8919 }, { "epoch": 0.91, "grad_norm": 1.6879517925619654, "learning_rate": 4.470042449102596e-07, "loss": 0.6402, "step": 8920 }, { "epoch": 0.91, "grad_norm": 1.8778262970244906, "learning_rate": 4.4603079216030797e-07, "loss": 0.7691, "step": 8921 }, { "epoch": 0.91, "grad_norm": 1.7471342930414981, "learning_rate": 4.4505837634430616e-07, "loss": 0.7868, "step": 8922 }, { "epoch": 0.91, "grad_norm": 1.4882165634147697, "learning_rate": 4.440869975677964e-07, "loss": 0.6551, "step": 8923 }, { "epoch": 0.91, "grad_norm": 1.7413866953660853, "learning_rate": 4.4311665593620323e-07, "loss": 0.6163, "step": 8924 }, { "epoch": 0.91, "grad_norm": 1.5929686827582712, "learning_rate": 4.421473515548447e-07, "loss": 0.6359, "step": 8925 }, { "epoch": 0.91, "grad_norm": 1.7485565665955185, "learning_rate": 4.411790845289243e-07, "loss": 0.6752, "step": 8926 }, { "epoch": 0.91, "grad_norm": 1.5022991327522344, "learning_rate": 4.4021185496353036e-07, "loss": 0.537, "step": 8927 }, { "epoch": 0.91, "grad_norm": 1.7332070844888663, "learning_rate": 4.392456629636399e-07, "loss": 0.6884, "step": 8928 }, { "epoch": 0.91, "grad_norm": 1.6941139000307985, "learning_rate": 4.382805086341213e-07, "loss": 0.6324, "step": 8929 }, { "epoch": 0.91, "grad_norm": 1.8277559933369407, "learning_rate": 4.373163920797241e-07, "loss": 0.6778, "step": 8930 }, { "epoch": 0.91, "grad_norm": 1.8101863523173403, "learning_rate": 4.36353313405089e-07, "loss": 0.7158, "step": 8931 }, { "epoch": 0.91, "grad_norm": 1.6692541425579466, "learning_rate": 4.353912727147425e-07, "loss": 0.7753, "step": 8932 }, { "epoch": 0.91, "grad_norm": 1.718180299453628, "learning_rate": 4.344302701130998e-07, "loss": 0.6736, "step": 8933 }, { "epoch": 0.91, "grad_norm": 1.7976225939029056, "learning_rate": 4.3347030570446314e-07, "loss": 0.6929, "step": 8934 }, { "epoch": 0.91, "grad_norm": 2.0948448179186894, "learning_rate": 4.3251137959302023e-07, "loss": 0.7218, "step": 8935 }, { "epoch": 0.91, "grad_norm": 1.6384943101690133, "learning_rate": 4.3155349188284903e-07, "loss": 0.7612, "step": 8936 }, { "epoch": 0.91, "grad_norm": 1.7067810498067775, "learning_rate": 4.305966426779118e-07, "loss": 0.6568, "step": 8937 }, { "epoch": 0.91, "grad_norm": 1.7004994744578605, "learning_rate": 4.2964083208206e-07, "loss": 0.5903, "step": 8938 }, { "epoch": 0.91, "grad_norm": 1.7022825552040735, "learning_rate": 4.286860601990328e-07, "loss": 0.7522, "step": 8939 }, { "epoch": 0.91, "grad_norm": 1.6869400366291563, "learning_rate": 4.2773232713245515e-07, "loss": 0.7511, "step": 8940 }, { "epoch": 0.91, "grad_norm": 1.6267627208860855, "learning_rate": 4.267796329858398e-07, "loss": 0.6479, "step": 8941 }, { "epoch": 0.91, "grad_norm": 1.5098530524214537, "learning_rate": 4.2582797786258735e-07, "loss": 0.5881, "step": 8942 }, { "epoch": 0.91, "grad_norm": 1.678415811161424, "learning_rate": 4.2487736186598297e-07, "loss": 0.7102, "step": 8943 }, { "epoch": 0.91, "grad_norm": 1.811580603149009, "learning_rate": 4.2392778509920516e-07, "loss": 0.6838, "step": 8944 }, { "epoch": 0.91, "grad_norm": 1.9324127880015471, "learning_rate": 4.229792476653116e-07, "loss": 0.8057, "step": 8945 }, { "epoch": 0.91, "grad_norm": 1.8912088902745035, "learning_rate": 4.2203174966725324e-07, "loss": 0.7762, "step": 8946 }, { "epoch": 0.91, "grad_norm": 1.607190207534778, "learning_rate": 4.210852912078656e-07, "loss": 0.643, "step": 8947 }, { "epoch": 0.91, "grad_norm": 1.7417460029699203, "learning_rate": 4.2013987238987197e-07, "loss": 0.7045, "step": 8948 }, { "epoch": 0.91, "grad_norm": 1.7337059018397725, "learning_rate": 4.1919549331588374e-07, "loss": 0.599, "step": 8949 }, { "epoch": 0.91, "grad_norm": 1.7286472884952369, "learning_rate": 4.182521540883966e-07, "loss": 0.7168, "step": 8950 }, { "epoch": 0.91, "grad_norm": 1.6656075871975395, "learning_rate": 4.1730985480979645e-07, "loss": 0.6924, "step": 8951 }, { "epoch": 0.91, "grad_norm": 1.6302672025258802, "learning_rate": 4.1636859558235707e-07, "loss": 0.7471, "step": 8952 }, { "epoch": 0.91, "grad_norm": 1.6921406858909696, "learning_rate": 4.154283765082334e-07, "loss": 0.7204, "step": 8953 }, { "epoch": 0.91, "grad_norm": 1.7806522406686651, "learning_rate": 4.144891976894749e-07, "loss": 0.6436, "step": 8954 }, { "epoch": 0.91, "grad_norm": 1.872644394026765, "learning_rate": 4.135510592280112e-07, "loss": 0.7537, "step": 8955 }, { "epoch": 0.91, "grad_norm": 1.6822983087884402, "learning_rate": 4.126139612256663e-07, "loss": 0.632, "step": 8956 }, { "epoch": 0.91, "grad_norm": 1.6924687648307566, "learning_rate": 4.1167790378414674e-07, "loss": 0.601, "step": 8957 }, { "epoch": 0.91, "grad_norm": 1.6927001381944409, "learning_rate": 4.107428870050445e-07, "loss": 0.7585, "step": 8958 }, { "epoch": 0.91, "grad_norm": 1.6511589387598713, "learning_rate": 4.09808910989844e-07, "loss": 0.6367, "step": 8959 }, { "epoch": 0.91, "grad_norm": 1.7125663757115257, "learning_rate": 4.088759758399108e-07, "loss": 0.681, "step": 8960 }, { "epoch": 0.91, "grad_norm": 1.8104751149832377, "learning_rate": 4.079440816565028e-07, "loss": 0.6205, "step": 8961 }, { "epoch": 0.91, "grad_norm": 1.6243580652214884, "learning_rate": 4.070132285407624e-07, "loss": 0.7469, "step": 8962 }, { "epoch": 0.91, "grad_norm": 1.8825980210289652, "learning_rate": 4.0608341659371774e-07, "loss": 0.7058, "step": 8963 }, { "epoch": 0.91, "grad_norm": 1.6623960622591962, "learning_rate": 4.0515464591628696e-07, "loss": 0.6874, "step": 8964 }, { "epoch": 0.91, "grad_norm": 1.5681784036517072, "learning_rate": 4.042269166092716e-07, "loss": 0.8016, "step": 8965 }, { "epoch": 0.91, "grad_norm": 1.7266043370859399, "learning_rate": 4.0330022877336337e-07, "loss": 0.6505, "step": 8966 }, { "epoch": 0.91, "grad_norm": 1.781958601565994, "learning_rate": 4.023745825091407e-07, "loss": 0.628, "step": 8967 }, { "epoch": 0.91, "grad_norm": 1.7222770015039668, "learning_rate": 4.0144997791706664e-07, "loss": 0.6065, "step": 8968 }, { "epoch": 0.91, "grad_norm": 1.7610167368399874, "learning_rate": 4.005264150974919e-07, "loss": 0.7912, "step": 8969 }, { "epoch": 0.91, "grad_norm": 1.6324699798367595, "learning_rate": 3.9960389415065746e-07, "loss": 0.6925, "step": 8970 }, { "epoch": 0.91, "grad_norm": 2.0978893687631777, "learning_rate": 3.986824151766855e-07, "loss": 0.708, "step": 8971 }, { "epoch": 0.91, "grad_norm": 1.5194927759176355, "learning_rate": 3.977619782755915e-07, "loss": 0.5378, "step": 8972 }, { "epoch": 0.91, "grad_norm": 1.7378334005633167, "learning_rate": 3.968425835472711e-07, "loss": 0.6148, "step": 8973 }, { "epoch": 0.91, "grad_norm": 1.8144345071554742, "learning_rate": 3.959242310915112e-07, "loss": 0.7466, "step": 8974 }, { "epoch": 0.91, "grad_norm": 1.8078036734219984, "learning_rate": 3.9500692100798656e-07, "loss": 0.7714, "step": 8975 }, { "epoch": 0.91, "grad_norm": 1.617085545757556, "learning_rate": 3.9409065339625407e-07, "loss": 0.6702, "step": 8976 }, { "epoch": 0.91, "grad_norm": 1.7290694019525905, "learning_rate": 3.9317542835576317e-07, "loss": 0.707, "step": 8977 }, { "epoch": 0.91, "grad_norm": 1.6922559342798456, "learning_rate": 3.9226124598584323e-07, "loss": 0.6726, "step": 8978 }, { "epoch": 0.91, "grad_norm": 1.9071775216199967, "learning_rate": 3.913481063857183e-07, "loss": 0.6511, "step": 8979 }, { "epoch": 0.91, "grad_norm": 1.5998712830431994, "learning_rate": 3.904360096544935e-07, "loss": 0.7038, "step": 8980 }, { "epoch": 0.91, "grad_norm": 1.6427375601725964, "learning_rate": 3.895249558911629e-07, "loss": 0.672, "step": 8981 }, { "epoch": 0.91, "grad_norm": 1.6755330097592833, "learning_rate": 3.8861494519460865e-07, "loss": 0.6397, "step": 8982 }, { "epoch": 0.91, "grad_norm": 1.6342444584998612, "learning_rate": 3.87705977663595e-07, "loss": 0.6957, "step": 8983 }, { "epoch": 0.91, "grad_norm": 1.619564578301826, "learning_rate": 3.867980533967786e-07, "loss": 0.6582, "step": 8984 }, { "epoch": 0.91, "grad_norm": 1.5857180219289881, "learning_rate": 3.858911724927006e-07, "loss": 0.6964, "step": 8985 }, { "epoch": 0.91, "grad_norm": 1.6850104647723942, "learning_rate": 3.849853350497879e-07, "loss": 0.7092, "step": 8986 }, { "epoch": 0.91, "grad_norm": 1.6564782792292456, "learning_rate": 3.84080541166354e-07, "loss": 0.7242, "step": 8987 }, { "epoch": 0.91, "grad_norm": 1.7271970757460506, "learning_rate": 3.831767909406026e-07, "loss": 0.6899, "step": 8988 }, { "epoch": 0.91, "grad_norm": 1.8320539782625036, "learning_rate": 3.8227408447061853e-07, "loss": 0.774, "step": 8989 }, { "epoch": 0.91, "grad_norm": 1.8335983391878803, "learning_rate": 3.813724218543802e-07, "loss": 0.7207, "step": 8990 }, { "epoch": 0.91, "grad_norm": 1.6308164696229996, "learning_rate": 3.8047180318974474e-07, "loss": 0.7297, "step": 8991 }, { "epoch": 0.91, "grad_norm": 1.3703166795086241, "learning_rate": 3.7957222857446297e-07, "loss": 0.6287, "step": 8992 }, { "epoch": 0.91, "grad_norm": 1.7678726313798627, "learning_rate": 3.7867369810617114e-07, "loss": 0.7225, "step": 8993 }, { "epoch": 0.91, "grad_norm": 1.561412053470494, "learning_rate": 3.777762118823869e-07, "loss": 0.6146, "step": 8994 }, { "epoch": 0.91, "grad_norm": 1.7741039057431165, "learning_rate": 3.768797700005211e-07, "loss": 0.7123, "step": 8995 }, { "epoch": 0.92, "grad_norm": 1.809479864639746, "learning_rate": 3.75984372557866e-07, "loss": 0.7771, "step": 8996 }, { "epoch": 0.92, "grad_norm": 2.029375389380463, "learning_rate": 3.7509001965160494e-07, "loss": 0.6924, "step": 8997 }, { "epoch": 0.92, "grad_norm": 1.7141203015363913, "learning_rate": 3.74196711378807e-07, "loss": 0.6498, "step": 8998 }, { "epoch": 0.92, "grad_norm": 1.5722400717490779, "learning_rate": 3.733044478364234e-07, "loss": 0.6627, "step": 8999 }, { "epoch": 0.92, "grad_norm": 1.7598986440797793, "learning_rate": 3.72413229121299e-07, "loss": 0.6553, "step": 9000 }, { "epoch": 0.92, "grad_norm": 1.7653352445711585, "learning_rate": 3.715230553301585e-07, "loss": 0.689, "step": 9001 }, { "epoch": 0.92, "grad_norm": 1.887705746303693, "learning_rate": 3.706339265596182e-07, "loss": 0.8099, "step": 9002 }, { "epoch": 0.92, "grad_norm": 1.5972384367430816, "learning_rate": 3.6974584290617955e-07, "loss": 0.6685, "step": 9003 }, { "epoch": 0.92, "grad_norm": 1.7070696333151447, "learning_rate": 3.688588044662289e-07, "loss": 0.6961, "step": 9004 }, { "epoch": 0.92, "grad_norm": 1.7616232130572633, "learning_rate": 3.6797281133603926e-07, "loss": 0.6333, "step": 9005 }, { "epoch": 0.92, "grad_norm": 1.7008064665959346, "learning_rate": 3.6708786361177586e-07, "loss": 0.725, "step": 9006 }, { "epoch": 0.92, "grad_norm": 1.6544849763586182, "learning_rate": 3.662039613894808e-07, "loss": 0.6224, "step": 9007 }, { "epoch": 0.92, "grad_norm": 1.6644316689410115, "learning_rate": 3.6532110476509064e-07, "loss": 0.6801, "step": 9008 }, { "epoch": 0.92, "grad_norm": 1.7592494890729364, "learning_rate": 3.644392938344254e-07, "loss": 0.7281, "step": 9009 }, { "epoch": 0.92, "grad_norm": 1.7409770219731049, "learning_rate": 3.6355852869318976e-07, "loss": 0.6187, "step": 9010 }, { "epoch": 0.92, "grad_norm": 1.7447307456725696, "learning_rate": 3.6267880943698153e-07, "loss": 0.6459, "step": 9011 }, { "epoch": 0.92, "grad_norm": 1.76726525973141, "learning_rate": 3.6180013616127554e-07, "loss": 0.7181, "step": 9012 }, { "epoch": 0.92, "grad_norm": 1.74741714153782, "learning_rate": 3.6092250896143986e-07, "loss": 0.6133, "step": 9013 }, { "epoch": 0.92, "grad_norm": 1.8751732333083337, "learning_rate": 3.6004592793272954e-07, "loss": 0.7615, "step": 9014 }, { "epoch": 0.92, "grad_norm": 1.6830093484207342, "learning_rate": 3.5917039317028057e-07, "loss": 0.7213, "step": 9015 }, { "epoch": 0.92, "grad_norm": 1.7216518180084097, "learning_rate": 3.5829590476911925e-07, "loss": 0.8031, "step": 9016 }, { "epoch": 0.92, "grad_norm": 1.6066940758025394, "learning_rate": 3.574224628241596e-07, "loss": 0.6596, "step": 9017 }, { "epoch": 0.92, "grad_norm": 1.741718527853461, "learning_rate": 3.5655006743019695e-07, "loss": 0.6479, "step": 9018 }, { "epoch": 0.92, "grad_norm": 1.956259263388941, "learning_rate": 3.556787186819177e-07, "loss": 0.7758, "step": 9019 }, { "epoch": 0.92, "grad_norm": 1.664427510137072, "learning_rate": 3.548084166738952e-07, "loss": 0.682, "step": 9020 }, { "epoch": 0.92, "grad_norm": 1.393762831262044, "learning_rate": 3.5393916150058274e-07, "loss": 0.5206, "step": 9021 }, { "epoch": 0.92, "grad_norm": 1.6615508318160366, "learning_rate": 3.5307095325632814e-07, "loss": 0.7007, "step": 9022 }, { "epoch": 0.92, "grad_norm": 1.8205337937904904, "learning_rate": 3.522037920353605e-07, "loss": 0.669, "step": 9023 }, { "epoch": 0.92, "grad_norm": 1.8779304425650445, "learning_rate": 3.5133767793179676e-07, "loss": 0.7525, "step": 9024 }, { "epoch": 0.92, "grad_norm": 1.7115456730628904, "learning_rate": 3.5047261103963837e-07, "loss": 0.6273, "step": 9025 }, { "epoch": 0.92, "grad_norm": 1.7026022015340625, "learning_rate": 3.49608591452778e-07, "loss": 0.7278, "step": 9026 }, { "epoch": 0.92, "grad_norm": 1.5533481880995001, "learning_rate": 3.4874561926498964e-07, "loss": 0.52, "step": 9027 }, { "epoch": 0.92, "grad_norm": 1.6861445365830685, "learning_rate": 3.478836945699349e-07, "loss": 0.706, "step": 9028 }, { "epoch": 0.92, "grad_norm": 1.7246204553515043, "learning_rate": 3.470228174611634e-07, "loss": 0.6582, "step": 9029 }, { "epoch": 0.92, "grad_norm": 1.7259912400155208, "learning_rate": 3.461629880321082e-07, "loss": 0.6964, "step": 9030 }, { "epoch": 0.92, "grad_norm": 1.5258391955713864, "learning_rate": 3.4530420637609365e-07, "loss": 0.6223, "step": 9031 }, { "epoch": 0.92, "grad_norm": 1.9501077686189958, "learning_rate": 3.444464725863228e-07, "loss": 0.7221, "step": 9032 }, { "epoch": 0.92, "grad_norm": 1.519065257951722, "learning_rate": 3.435897867558924e-07, "loss": 0.7602, "step": 9033 }, { "epoch": 0.92, "grad_norm": 1.6559610091291315, "learning_rate": 3.4273414897778133e-07, "loss": 0.6338, "step": 9034 }, { "epoch": 0.92, "grad_norm": 1.8860713373495857, "learning_rate": 3.418795593448554e-07, "loss": 0.6843, "step": 9035 }, { "epoch": 0.92, "grad_norm": 1.7613812992796087, "learning_rate": 3.4102601794986813e-07, "loss": 0.6712, "step": 9036 }, { "epoch": 0.92, "grad_norm": 1.6267258861120946, "learning_rate": 3.401735248854554e-07, "loss": 0.6966, "step": 9037 }, { "epoch": 0.92, "grad_norm": 1.7168945312821067, "learning_rate": 3.3932208024414435e-07, "loss": 0.8096, "step": 9038 }, { "epoch": 0.92, "grad_norm": 1.8824721722015318, "learning_rate": 3.3847168411834666e-07, "loss": 0.7043, "step": 9039 }, { "epoch": 0.92, "grad_norm": 1.644816582797017, "learning_rate": 3.3762233660035724e-07, "loss": 0.7229, "step": 9040 }, { "epoch": 0.92, "grad_norm": 1.6122426967789936, "learning_rate": 3.367740377823603e-07, "loss": 0.6559, "step": 9041 }, { "epoch": 0.92, "grad_norm": 1.7948017004198267, "learning_rate": 3.3592678775642653e-07, "loss": 0.7649, "step": 9042 }, { "epoch": 0.92, "grad_norm": 1.6036517898197797, "learning_rate": 3.35080586614509e-07, "loss": 0.7244, "step": 9043 }, { "epoch": 0.92, "grad_norm": 1.5588599713340119, "learning_rate": 3.342354344484533e-07, "loss": 0.6241, "step": 9044 }, { "epoch": 0.92, "grad_norm": 1.7085589108728771, "learning_rate": 3.333913313499848e-07, "loss": 0.642, "step": 9045 }, { "epoch": 0.92, "grad_norm": 1.756713623597841, "learning_rate": 3.3254827741071806e-07, "loss": 0.6681, "step": 9046 }, { "epoch": 0.92, "grad_norm": 1.4899810166224117, "learning_rate": 3.3170627272215427e-07, "loss": 0.6863, "step": 9047 }, { "epoch": 0.92, "grad_norm": 1.531364479212317, "learning_rate": 3.308653173756793e-07, "loss": 0.755, "step": 9048 }, { "epoch": 0.92, "grad_norm": 1.5168149460058475, "learning_rate": 3.300254114625656e-07, "loss": 0.7084, "step": 9049 }, { "epoch": 0.92, "grad_norm": 1.7459672256196828, "learning_rate": 3.2918655507397144e-07, "loss": 0.7533, "step": 9050 }, { "epoch": 0.92, "grad_norm": 1.7158933208599039, "learning_rate": 3.283487483009429e-07, "loss": 0.6339, "step": 9051 }, { "epoch": 0.92, "grad_norm": 1.7430776876204692, "learning_rate": 3.2751199123441046e-07, "loss": 0.6434, "step": 9052 }, { "epoch": 0.92, "grad_norm": 1.5704904697330333, "learning_rate": 3.2667628396518936e-07, "loss": 0.5842, "step": 9053 }, { "epoch": 0.92, "grad_norm": 1.7800244667694278, "learning_rate": 3.258416265839848e-07, "loss": 0.7669, "step": 9054 }, { "epoch": 0.92, "grad_norm": 1.7638354518588866, "learning_rate": 3.2500801918138425e-07, "loss": 0.5612, "step": 9055 }, { "epoch": 0.92, "grad_norm": 1.8344341984251336, "learning_rate": 3.241754618478632e-07, "loss": 0.647, "step": 9056 }, { "epoch": 0.92, "grad_norm": 1.7039427641608778, "learning_rate": 3.2334395467378266e-07, "loss": 0.6021, "step": 9057 }, { "epoch": 0.92, "grad_norm": 1.7322214223361194, "learning_rate": 3.2251349774939046e-07, "loss": 0.6456, "step": 9058 }, { "epoch": 0.92, "grad_norm": 1.7517206466744195, "learning_rate": 3.216840911648178e-07, "loss": 0.7309, "step": 9059 }, { "epoch": 0.92, "grad_norm": 1.6778998604568132, "learning_rate": 3.2085573501008717e-07, "loss": 0.6758, "step": 9060 }, { "epoch": 0.92, "grad_norm": 1.8758051183890794, "learning_rate": 3.200284293750999e-07, "loss": 0.7864, "step": 9061 }, { "epoch": 0.92, "grad_norm": 1.5772792146727868, "learning_rate": 3.1920217434964985e-07, "loss": 0.6732, "step": 9062 }, { "epoch": 0.92, "grad_norm": 1.8914308924735, "learning_rate": 3.1837697002341293e-07, "loss": 0.6585, "step": 9063 }, { "epoch": 0.92, "grad_norm": 1.7453450043856464, "learning_rate": 3.1755281648595093e-07, "loss": 0.7913, "step": 9064 }, { "epoch": 0.92, "grad_norm": 1.3858743411899503, "learning_rate": 3.1672971382671556e-07, "loss": 0.6196, "step": 9065 }, { "epoch": 0.92, "grad_norm": 1.8338183072323526, "learning_rate": 3.159076621350399e-07, "loss": 0.7064, "step": 9066 }, { "epoch": 0.92, "grad_norm": 1.685855771910675, "learning_rate": 3.1508666150014575e-07, "loss": 0.6349, "step": 9067 }, { "epoch": 0.92, "grad_norm": 1.6489397211084076, "learning_rate": 3.142667120111387e-07, "loss": 0.6519, "step": 9068 }, { "epoch": 0.92, "grad_norm": 1.49196056325252, "learning_rate": 3.134478137570118e-07, "loss": 0.6841, "step": 9069 }, { "epoch": 0.92, "grad_norm": 1.6392600812772116, "learning_rate": 3.126299668266453e-07, "loss": 0.6858, "step": 9070 }, { "epoch": 0.92, "grad_norm": 1.6652442805073417, "learning_rate": 3.1181317130880127e-07, "loss": 0.6811, "step": 9071 }, { "epoch": 0.92, "grad_norm": 1.708651031293616, "learning_rate": 3.1099742729213235e-07, "loss": 0.71, "step": 9072 }, { "epoch": 0.92, "grad_norm": 1.754886721993718, "learning_rate": 3.101827348651731e-07, "loss": 0.6408, "step": 9073 }, { "epoch": 0.92, "grad_norm": 1.6962840997056352, "learning_rate": 3.093690941163452e-07, "loss": 0.6917, "step": 9074 }, { "epoch": 0.92, "grad_norm": 1.5700005927580967, "learning_rate": 3.08556505133959e-07, "loss": 0.6271, "step": 9075 }, { "epoch": 0.92, "grad_norm": 1.581948249429852, "learning_rate": 3.077449680062061e-07, "loss": 0.6254, "step": 9076 }, { "epoch": 0.92, "grad_norm": 1.51273905759815, "learning_rate": 3.069344828211662e-07, "loss": 0.6443, "step": 9077 }, { "epoch": 0.92, "grad_norm": 1.7028985474900407, "learning_rate": 3.061250496668078e-07, "loss": 0.749, "step": 9078 }, { "epoch": 0.92, "grad_norm": 1.6677452775571646, "learning_rate": 3.053166686309783e-07, "loss": 0.6592, "step": 9079 }, { "epoch": 0.92, "grad_norm": 1.6062980603348134, "learning_rate": 3.0450933980141763e-07, "loss": 0.6491, "step": 9080 }, { "epoch": 0.92, "grad_norm": 1.5340881022151536, "learning_rate": 3.0370306326574673e-07, "loss": 0.6637, "step": 9081 }, { "epoch": 0.92, "grad_norm": 1.6672173189261768, "learning_rate": 3.028978391114745e-07, "loss": 0.7864, "step": 9082 }, { "epoch": 0.92, "grad_norm": 1.736912952197448, "learning_rate": 3.020936674259989e-07, "loss": 0.6965, "step": 9083 }, { "epoch": 0.92, "grad_norm": 1.688420218707243, "learning_rate": 3.012905482965944e-07, "loss": 0.6519, "step": 9084 }, { "epoch": 0.92, "grad_norm": 1.735607676551278, "learning_rate": 3.0048848181043256e-07, "loss": 0.673, "step": 9085 }, { "epoch": 0.92, "grad_norm": 1.7040912601415914, "learning_rate": 2.996874680545603e-07, "loss": 0.7363, "step": 9086 }, { "epoch": 0.92, "grad_norm": 1.6373307067637413, "learning_rate": 2.9888750711591805e-07, "loss": 0.6322, "step": 9087 }, { "epoch": 0.92, "grad_norm": 1.8062957317799482, "learning_rate": 2.980885990813298e-07, "loss": 0.6328, "step": 9088 }, { "epoch": 0.92, "grad_norm": 1.5429622083207397, "learning_rate": 2.972907440375017e-07, "loss": 0.6703, "step": 9089 }, { "epoch": 0.92, "grad_norm": 1.7175453590035683, "learning_rate": 2.964939420710311e-07, "loss": 0.7612, "step": 9090 }, { "epoch": 0.92, "grad_norm": 1.6746756006078176, "learning_rate": 2.956981932683967e-07, "loss": 0.697, "step": 9091 }, { "epoch": 0.92, "grad_norm": 1.7654789376597413, "learning_rate": 2.949034977159648e-07, "loss": 0.6529, "step": 9092 }, { "epoch": 0.92, "grad_norm": 1.5521819010013271, "learning_rate": 2.941098554999877e-07, "loss": 0.6326, "step": 9093 }, { "epoch": 0.93, "grad_norm": 1.7965320659002997, "learning_rate": 2.933172667066031e-07, "loss": 0.6972, "step": 9094 }, { "epoch": 0.93, "grad_norm": 1.5800734066412943, "learning_rate": 2.9252573142183327e-07, "loss": 0.6148, "step": 9095 }, { "epoch": 0.93, "grad_norm": 1.8429652754631338, "learning_rate": 2.917352497315873e-07, "loss": 0.6747, "step": 9096 }, { "epoch": 0.93, "grad_norm": 1.6399270165705966, "learning_rate": 2.9094582172165876e-07, "loss": 0.6913, "step": 9097 }, { "epoch": 0.93, "grad_norm": 1.6281374340831094, "learning_rate": 2.9015744747773024e-07, "loss": 0.7202, "step": 9098 }, { "epoch": 0.93, "grad_norm": 1.552728970873856, "learning_rate": 2.893701270853655e-07, "loss": 0.752, "step": 9099 }, { "epoch": 0.93, "grad_norm": 1.65165872718086, "learning_rate": 2.885838606300151e-07, "loss": 0.6869, "step": 9100 }, { "epoch": 0.93, "grad_norm": 1.7091754810620192, "learning_rate": 2.8779864819701853e-07, "loss": 0.6345, "step": 9101 }, { "epoch": 0.93, "grad_norm": 1.6686871905612022, "learning_rate": 2.8701448987159654e-07, "loss": 0.5509, "step": 9102 }, { "epoch": 0.93, "grad_norm": 1.8173373699523554, "learning_rate": 2.8623138573885767e-07, "loss": 0.7899, "step": 9103 }, { "epoch": 0.93, "grad_norm": 1.7692576744780697, "learning_rate": 2.854493358837951e-07, "loss": 0.6359, "step": 9104 }, { "epoch": 0.93, "grad_norm": 1.8505447600940077, "learning_rate": 2.8466834039128754e-07, "loss": 0.6465, "step": 9105 }, { "epoch": 0.93, "grad_norm": 1.7751451897404646, "learning_rate": 2.838883993461028e-07, "loss": 0.7206, "step": 9106 }, { "epoch": 0.93, "grad_norm": 1.8723784794316785, "learning_rate": 2.831095128328876e-07, "loss": 0.6643, "step": 9107 }, { "epoch": 0.93, "grad_norm": 1.6200263587394934, "learning_rate": 2.823316809361809e-07, "loss": 0.5683, "step": 9108 }, { "epoch": 0.93, "grad_norm": 1.702563720865403, "learning_rate": 2.8155490374040196e-07, "loss": 0.7154, "step": 9109 }, { "epoch": 0.93, "grad_norm": 1.7451961706558843, "learning_rate": 2.807791813298588e-07, "loss": 0.7522, "step": 9110 }, { "epoch": 0.93, "grad_norm": 1.6920928919156772, "learning_rate": 2.8000451378874525e-07, "loss": 0.6407, "step": 9111 }, { "epoch": 0.93, "grad_norm": 1.7605926944594885, "learning_rate": 2.7923090120113626e-07, "loss": 0.6672, "step": 9112 }, { "epoch": 0.93, "grad_norm": 1.5573327560649162, "learning_rate": 2.7845834365099913e-07, "loss": 0.6749, "step": 9113 }, { "epoch": 0.93, "grad_norm": 1.616019278808511, "learning_rate": 2.7768684122217893e-07, "loss": 0.6522, "step": 9114 }, { "epoch": 0.93, "grad_norm": 1.7863348392332858, "learning_rate": 2.769163939984121e-07, "loss": 0.6702, "step": 9115 }, { "epoch": 0.93, "grad_norm": 1.9669771441858406, "learning_rate": 2.7614700206332056e-07, "loss": 0.7218, "step": 9116 }, { "epoch": 0.93, "grad_norm": 1.6853909847735868, "learning_rate": 2.7537866550040647e-07, "loss": 0.5787, "step": 9117 }, { "epoch": 0.93, "grad_norm": 1.7771673876183924, "learning_rate": 2.74611384393062e-07, "loss": 0.698, "step": 9118 }, { "epoch": 0.93, "grad_norm": 1.5863480135223418, "learning_rate": 2.7384515882456386e-07, "loss": 0.685, "step": 9119 }, { "epoch": 0.93, "grad_norm": 1.6153180520052017, "learning_rate": 2.730799888780744e-07, "loss": 0.6658, "step": 9120 }, { "epoch": 0.93, "grad_norm": 1.5779497539387755, "learning_rate": 2.7231587463663945e-07, "loss": 0.627, "step": 9121 }, { "epoch": 0.93, "grad_norm": 1.8041035551184352, "learning_rate": 2.7155281618319265e-07, "loss": 0.7443, "step": 9122 }, { "epoch": 0.93, "grad_norm": 1.8765336984772065, "learning_rate": 2.7079081360055106e-07, "loss": 0.6708, "step": 9123 }, { "epoch": 0.93, "grad_norm": 1.5160141983328415, "learning_rate": 2.7002986697141966e-07, "loss": 0.5983, "step": 9124 }, { "epoch": 0.93, "grad_norm": 1.854303661617946, "learning_rate": 2.692699763783868e-07, "loss": 0.7064, "step": 9125 }, { "epoch": 0.93, "grad_norm": 1.5622552568194452, "learning_rate": 2.685111419039255e-07, "loss": 0.6858, "step": 9126 }, { "epoch": 0.93, "grad_norm": 1.5987403896005212, "learning_rate": 2.677533636303964e-07, "loss": 0.6825, "step": 9127 }, { "epoch": 0.93, "grad_norm": 1.6486100461672395, "learning_rate": 2.669966416400449e-07, "loss": 0.7009, "step": 9128 }, { "epoch": 0.93, "grad_norm": 1.8947303431333133, "learning_rate": 2.6624097601499976e-07, "loss": 0.7161, "step": 9129 }, { "epoch": 0.93, "grad_norm": 1.6099671906456852, "learning_rate": 2.654863668372787e-07, "loss": 0.5869, "step": 9130 }, { "epoch": 0.93, "grad_norm": 1.8616912431349213, "learning_rate": 2.6473281418878173e-07, "loss": 0.6781, "step": 9131 }, { "epoch": 0.93, "grad_norm": 1.87333267111817, "learning_rate": 2.6398031815129454e-07, "loss": 0.8606, "step": 9132 }, { "epoch": 0.93, "grad_norm": 1.7026026627353354, "learning_rate": 2.6322887880649074e-07, "loss": 0.5823, "step": 9133 }, { "epoch": 0.93, "grad_norm": 1.663095785623857, "learning_rate": 2.6247849623592504e-07, "loss": 0.613, "step": 9134 }, { "epoch": 0.93, "grad_norm": 1.7113272331280165, "learning_rate": 2.617291705210412e-07, "loss": 0.691, "step": 9135 }, { "epoch": 0.93, "grad_norm": 1.8262977928445028, "learning_rate": 2.6098090174316636e-07, "loss": 0.6946, "step": 9136 }, { "epoch": 0.93, "grad_norm": 1.7582436591373753, "learning_rate": 2.602336899835134e-07, "loss": 0.6886, "step": 9137 }, { "epoch": 0.93, "grad_norm": 1.8419965747676532, "learning_rate": 2.594875353231796e-07, "loss": 0.7765, "step": 9138 }, { "epoch": 0.93, "grad_norm": 1.622396277431157, "learning_rate": 2.5874243784315025e-07, "loss": 0.5949, "step": 9139 }, { "epoch": 0.93, "grad_norm": 1.635845730948259, "learning_rate": 2.579983976242928e-07, "loss": 0.6032, "step": 9140 }, { "epoch": 0.93, "grad_norm": 1.9015091477785202, "learning_rate": 2.572554147473616e-07, "loss": 0.7317, "step": 9141 }, { "epoch": 0.93, "grad_norm": 1.6575673802244377, "learning_rate": 2.565134892929955e-07, "loss": 0.6806, "step": 9142 }, { "epoch": 0.93, "grad_norm": 1.5589129509589466, "learning_rate": 2.5577262134171887e-07, "loss": 0.666, "step": 9143 }, { "epoch": 0.93, "grad_norm": 1.7090507291266002, "learning_rate": 2.5503281097394194e-07, "loss": 0.6619, "step": 9144 }, { "epoch": 0.93, "grad_norm": 1.7464178687329157, "learning_rate": 2.5429405826995934e-07, "loss": 0.688, "step": 9145 }, { "epoch": 0.93, "grad_norm": 1.585521708580421, "learning_rate": 2.5355636330994915e-07, "loss": 0.6253, "step": 9146 }, { "epoch": 0.93, "grad_norm": 1.6957372162455264, "learning_rate": 2.5281972617398064e-07, "loss": 0.7472, "step": 9147 }, { "epoch": 0.93, "grad_norm": 1.7136949583543848, "learning_rate": 2.52084146942001e-07, "loss": 0.6604, "step": 9148 }, { "epoch": 0.93, "grad_norm": 1.5735246095230109, "learning_rate": 2.5134962569384746e-07, "loss": 0.7197, "step": 9149 }, { "epoch": 0.93, "grad_norm": 1.4968907551306718, "learning_rate": 2.506161625092396e-07, "loss": 0.5578, "step": 9150 }, { "epoch": 0.93, "grad_norm": 1.9557448471366423, "learning_rate": 2.498837574677837e-07, "loss": 0.7901, "step": 9151 }, { "epoch": 0.93, "grad_norm": 1.6133938766958, "learning_rate": 2.491524106489718e-07, "loss": 0.792, "step": 9152 }, { "epoch": 0.93, "grad_norm": 1.734088575672457, "learning_rate": 2.484221221321792e-07, "loss": 0.7127, "step": 9153 }, { "epoch": 0.93, "grad_norm": 1.7922240857189475, "learning_rate": 2.47692891996667e-07, "loss": 0.6896, "step": 9154 }, { "epoch": 0.93, "grad_norm": 1.5268419657742744, "learning_rate": 2.4696472032158305e-07, "loss": 0.5813, "step": 9155 }, { "epoch": 0.93, "grad_norm": 1.7239929918768928, "learning_rate": 2.462376071859585e-07, "loss": 0.7521, "step": 9156 }, { "epoch": 0.93, "grad_norm": 1.6846588000318006, "learning_rate": 2.4551155266871017e-07, "loss": 0.6519, "step": 9157 }, { "epoch": 0.93, "grad_norm": 1.8150620464602065, "learning_rate": 2.447865568486385e-07, "loss": 0.6218, "step": 9158 }, { "epoch": 0.93, "grad_norm": 1.7133394743606516, "learning_rate": 2.440626198044327e-07, "loss": 0.6594, "step": 9159 }, { "epoch": 0.93, "grad_norm": 1.9240325579831237, "learning_rate": 2.4333974161466324e-07, "loss": 0.7677, "step": 9160 }, { "epoch": 0.93, "grad_norm": 1.8672205907680561, "learning_rate": 2.4261792235778737e-07, "loss": 0.6923, "step": 9161 }, { "epoch": 0.93, "grad_norm": 1.5890577955916911, "learning_rate": 2.418971621121491e-07, "loss": 0.6073, "step": 9162 }, { "epoch": 0.93, "grad_norm": 1.6869253072456738, "learning_rate": 2.411774609559725e-07, "loss": 0.6317, "step": 9163 }, { "epoch": 0.93, "grad_norm": 1.6117386715023836, "learning_rate": 2.404588189673718e-07, "loss": 0.5953, "step": 9164 }, { "epoch": 0.93, "grad_norm": 1.695397700532162, "learning_rate": 2.3974123622434566e-07, "loss": 0.6871, "step": 9165 }, { "epoch": 0.93, "grad_norm": 1.6048962661185335, "learning_rate": 2.390247128047729e-07, "loss": 0.6849, "step": 9166 }, { "epoch": 0.93, "grad_norm": 1.4696291515897013, "learning_rate": 2.383092487864247e-07, "loss": 0.6443, "step": 9167 }, { "epoch": 0.93, "grad_norm": 1.4987936697717255, "learning_rate": 2.3759484424695113e-07, "loss": 0.6706, "step": 9168 }, { "epoch": 0.93, "grad_norm": 1.7643556140864138, "learning_rate": 2.3688149926389015e-07, "loss": 0.6497, "step": 9169 }, { "epoch": 0.93, "grad_norm": 1.6598835699626207, "learning_rate": 2.361692139146643e-07, "loss": 0.6873, "step": 9170 }, { "epoch": 0.93, "grad_norm": 1.6054352567438275, "learning_rate": 2.354579882765806e-07, "loss": 0.6001, "step": 9171 }, { "epoch": 0.93, "grad_norm": 1.8758251422175432, "learning_rate": 2.3474782242683292e-07, "loss": 0.7149, "step": 9172 }, { "epoch": 0.93, "grad_norm": 2.1173561572795223, "learning_rate": 2.3403871644249731e-07, "loss": 0.6189, "step": 9173 }, { "epoch": 0.93, "grad_norm": 1.6994143128015073, "learning_rate": 2.3333067040053558e-07, "loss": 0.6513, "step": 9174 }, { "epoch": 0.93, "grad_norm": 1.6467310548387322, "learning_rate": 2.3262368437779736e-07, "loss": 0.7023, "step": 9175 }, { "epoch": 0.93, "grad_norm": 1.601730453794561, "learning_rate": 2.3191775845101238e-07, "loss": 0.6781, "step": 9176 }, { "epoch": 0.93, "grad_norm": 1.6270212271280904, "learning_rate": 2.3121289269679937e-07, "loss": 0.5984, "step": 9177 }, { "epoch": 0.93, "grad_norm": 1.6570884566322033, "learning_rate": 2.3050908719166155e-07, "loss": 0.6897, "step": 9178 }, { "epoch": 0.93, "grad_norm": 1.6162197492295876, "learning_rate": 2.2980634201198227e-07, "loss": 0.6794, "step": 9179 }, { "epoch": 0.93, "grad_norm": 1.8218868665569699, "learning_rate": 2.2910465723403717e-07, "loss": 0.7722, "step": 9180 }, { "epoch": 0.93, "grad_norm": 1.6672965841302891, "learning_rate": 2.2840403293398095e-07, "loss": 0.7197, "step": 9181 }, { "epoch": 0.93, "grad_norm": 1.7340015665776833, "learning_rate": 2.2770446918785716e-07, "loss": 0.7679, "step": 9182 }, { "epoch": 0.93, "grad_norm": 1.6038653974037864, "learning_rate": 2.2700596607159175e-07, "loss": 0.6896, "step": 9183 }, { "epoch": 0.93, "grad_norm": 1.7365201637290837, "learning_rate": 2.263085236609952e-07, "loss": 0.6372, "step": 9184 }, { "epoch": 0.93, "grad_norm": 1.7596204753893094, "learning_rate": 2.2561214203176583e-07, "loss": 0.7103, "step": 9185 }, { "epoch": 0.93, "grad_norm": 1.7182996150070722, "learning_rate": 2.249168212594832e-07, "loss": 0.6525, "step": 9186 }, { "epoch": 0.93, "grad_norm": 1.8044017196554798, "learning_rate": 2.2422256141961473e-07, "loss": 0.7075, "step": 9187 }, { "epoch": 0.93, "grad_norm": 1.6296108621493695, "learning_rate": 2.2352936258751235e-07, "loss": 0.5933, "step": 9188 }, { "epoch": 0.93, "grad_norm": 1.587229110833575, "learning_rate": 2.228372248384092e-07, "loss": 0.6692, "step": 9189 }, { "epoch": 0.93, "grad_norm": 1.4784135454097325, "learning_rate": 2.2214614824742853e-07, "loss": 0.6358, "step": 9190 }, { "epoch": 0.93, "grad_norm": 1.7090710335488055, "learning_rate": 2.214561328895748e-07, "loss": 0.6261, "step": 9191 }, { "epoch": 0.94, "grad_norm": 1.842598291770892, "learning_rate": 2.2076717883973808e-07, "loss": 0.6602, "step": 9192 }, { "epoch": 0.94, "grad_norm": 1.7598264214121744, "learning_rate": 2.2007928617269414e-07, "loss": 0.6436, "step": 9193 }, { "epoch": 0.94, "grad_norm": 1.8035073777880144, "learning_rate": 2.1939245496310324e-07, "loss": 0.7111, "step": 9194 }, { "epoch": 0.94, "grad_norm": 1.6420986225961434, "learning_rate": 2.187066852855091e-07, "loss": 0.6878, "step": 9195 }, { "epoch": 0.94, "grad_norm": 1.6271321324358616, "learning_rate": 2.1802197721434215e-07, "loss": 0.6896, "step": 9196 }, { "epoch": 0.94, "grad_norm": 1.7339393507898984, "learning_rate": 2.1733833082391632e-07, "loss": 0.7423, "step": 9197 }, { "epoch": 0.94, "grad_norm": 1.8347943762378955, "learning_rate": 2.166557461884322e-07, "loss": 0.7663, "step": 9198 }, { "epoch": 0.94, "grad_norm": 1.6972038780825494, "learning_rate": 2.1597422338197172e-07, "loss": 0.7218, "step": 9199 }, { "epoch": 0.94, "grad_norm": 2.034996875430456, "learning_rate": 2.1529376247850342e-07, "loss": 0.6533, "step": 9200 }, { "epoch": 0.94, "grad_norm": 1.603699345889999, "learning_rate": 2.146143635518827e-07, "loss": 0.7444, "step": 9201 }, { "epoch": 0.94, "grad_norm": 1.763026468203593, "learning_rate": 2.139360266758461e-07, "loss": 0.6964, "step": 9202 }, { "epoch": 0.94, "grad_norm": 1.5337279377060986, "learning_rate": 2.13258751924017e-07, "loss": 0.7364, "step": 9203 }, { "epoch": 0.94, "grad_norm": 1.6065746501014624, "learning_rate": 2.1258253936990213e-07, "loss": 0.7114, "step": 9204 }, { "epoch": 0.94, "grad_norm": 1.6610368388773546, "learning_rate": 2.1190738908689502e-07, "loss": 0.63, "step": 9205 }, { "epoch": 0.94, "grad_norm": 1.6533510918146666, "learning_rate": 2.1123330114827256e-07, "loss": 0.6356, "step": 9206 }, { "epoch": 0.94, "grad_norm": 1.7435137130120424, "learning_rate": 2.1056027562719517e-07, "loss": 0.7129, "step": 9207 }, { "epoch": 0.94, "grad_norm": 1.5918102940992866, "learning_rate": 2.0988831259670994e-07, "loss": 0.5952, "step": 9208 }, { "epoch": 0.94, "grad_norm": 1.7703162917156607, "learning_rate": 2.0921741212974967e-07, "loss": 0.6956, "step": 9209 }, { "epoch": 0.94, "grad_norm": 1.725491826595931, "learning_rate": 2.085475742991272e-07, "loss": 0.6831, "step": 9210 }, { "epoch": 0.94, "grad_norm": 1.522459509380362, "learning_rate": 2.078787991775455e-07, "loss": 0.5566, "step": 9211 }, { "epoch": 0.94, "grad_norm": 1.5164115294750733, "learning_rate": 2.072110868375876e-07, "loss": 0.629, "step": 9212 }, { "epoch": 0.94, "grad_norm": 1.6799430968173943, "learning_rate": 2.0654443735172447e-07, "loss": 0.6195, "step": 9213 }, { "epoch": 0.94, "grad_norm": 1.7529038428627008, "learning_rate": 2.058788507923104e-07, "loss": 0.6232, "step": 9214 }, { "epoch": 0.94, "grad_norm": 1.6082086073174993, "learning_rate": 2.052143272315843e-07, "loss": 0.6703, "step": 9215 }, { "epoch": 0.94, "grad_norm": 1.6880699939080532, "learning_rate": 2.0455086674166957e-07, "loss": 0.7222, "step": 9216 }, { "epoch": 0.94, "grad_norm": 1.6500289904255732, "learning_rate": 2.038884693945742e-07, "loss": 0.6659, "step": 9217 }, { "epoch": 0.94, "grad_norm": 1.5552756948520066, "learning_rate": 2.0322713526219062e-07, "loss": 0.6413, "step": 9218 }, { "epoch": 0.94, "grad_norm": 1.6993894982632298, "learning_rate": 2.0256686441629814e-07, "loss": 0.694, "step": 9219 }, { "epoch": 0.94, "grad_norm": 1.7077427956672064, "learning_rate": 2.019076569285583e-07, "loss": 0.6796, "step": 9220 }, { "epoch": 0.94, "grad_norm": 1.700456720601353, "learning_rate": 2.0124951287051718e-07, "loss": 0.6839, "step": 9221 }, { "epoch": 0.94, "grad_norm": 1.7073823170278, "learning_rate": 2.0059243231360437e-07, "loss": 0.63, "step": 9222 }, { "epoch": 0.94, "grad_norm": 1.7442798621212803, "learning_rate": 1.999364153291383e-07, "loss": 0.6994, "step": 9223 }, { "epoch": 0.94, "grad_norm": 2.003575890582916, "learning_rate": 1.9928146198831987e-07, "loss": 0.7964, "step": 9224 }, { "epoch": 0.94, "grad_norm": 1.7730842780328422, "learning_rate": 1.9862757236223108e-07, "loss": 0.7864, "step": 9225 }, { "epoch": 0.94, "grad_norm": 1.5536572444139543, "learning_rate": 1.979747465218429e-07, "loss": 0.6897, "step": 9226 }, { "epoch": 0.94, "grad_norm": 1.7481819975287163, "learning_rate": 1.9732298453801092e-07, "loss": 0.6845, "step": 9227 }, { "epoch": 0.94, "grad_norm": 1.744509227646625, "learning_rate": 1.9667228648147074e-07, "loss": 0.6998, "step": 9228 }, { "epoch": 0.94, "grad_norm": 1.7519756322022366, "learning_rate": 1.9602265242284813e-07, "loss": 0.7248, "step": 9229 }, { "epoch": 0.94, "grad_norm": 1.6632754313037552, "learning_rate": 1.9537408243264888e-07, "loss": 0.673, "step": 9230 }, { "epoch": 0.94, "grad_norm": 1.8540429133686656, "learning_rate": 1.947265765812656e-07, "loss": 0.7019, "step": 9231 }, { "epoch": 0.94, "grad_norm": 1.7358934404834045, "learning_rate": 1.9408013493897537e-07, "loss": 0.7499, "step": 9232 }, { "epoch": 0.94, "grad_norm": 1.579191613274508, "learning_rate": 1.9343475757593987e-07, "loss": 0.6206, "step": 9233 }, { "epoch": 0.94, "grad_norm": 2.1684428344582893, "learning_rate": 1.9279044456220307e-07, "loss": 0.6809, "step": 9234 }, { "epoch": 0.94, "grad_norm": 1.7471847525721256, "learning_rate": 1.921471959676957e-07, "loss": 0.6492, "step": 9235 }, { "epoch": 0.94, "grad_norm": 1.679965055034842, "learning_rate": 1.91505011862233e-07, "loss": 0.6013, "step": 9236 }, { "epoch": 0.94, "grad_norm": 1.6388236682172188, "learning_rate": 1.908638923155126e-07, "loss": 0.6785, "step": 9237 }, { "epoch": 0.94, "grad_norm": 1.5508458601759716, "learning_rate": 1.9022383739712104e-07, "loss": 0.7022, "step": 9238 }, { "epoch": 0.94, "grad_norm": 1.7548631919890791, "learning_rate": 1.895848471765227e-07, "loss": 0.733, "step": 9239 }, { "epoch": 0.94, "grad_norm": 1.663254244472624, "learning_rate": 1.8894692172307106e-07, "loss": 0.6702, "step": 9240 }, { "epoch": 0.94, "grad_norm": 1.6462632236988877, "learning_rate": 1.8831006110600404e-07, "loss": 0.8247, "step": 9241 }, { "epoch": 0.94, "grad_norm": 1.6436328578247896, "learning_rate": 1.8767426539444188e-07, "loss": 0.6637, "step": 9242 }, { "epoch": 0.94, "grad_norm": 1.692688762963928, "learning_rate": 1.8703953465739055e-07, "loss": 0.735, "step": 9243 }, { "epoch": 0.94, "grad_norm": 1.6500218020690514, "learning_rate": 1.8640586896374157e-07, "loss": 0.5977, "step": 9244 }, { "epoch": 0.94, "grad_norm": 1.7399479592499723, "learning_rate": 1.857732683822666e-07, "loss": 0.6792, "step": 9245 }, { "epoch": 0.94, "grad_norm": 1.8599038601472067, "learning_rate": 1.851417329816263e-07, "loss": 0.6901, "step": 9246 }, { "epoch": 0.94, "grad_norm": 1.572795620263803, "learning_rate": 1.8451126283036358e-07, "loss": 0.5979, "step": 9247 }, { "epoch": 0.94, "grad_norm": 1.7503420322332646, "learning_rate": 1.8388185799690705e-07, "loss": 0.7422, "step": 9248 }, { "epoch": 0.94, "grad_norm": 1.8567826988069571, "learning_rate": 1.8325351854956652e-07, "loss": 0.7061, "step": 9249 }, { "epoch": 0.94, "grad_norm": 1.7429096529080754, "learning_rate": 1.8262624455654077e-07, "loss": 0.65, "step": 9250 }, { "epoch": 0.94, "grad_norm": 1.6255268881969265, "learning_rate": 1.8200003608590977e-07, "loss": 0.6084, "step": 9251 }, { "epoch": 0.94, "grad_norm": 1.602696923514729, "learning_rate": 1.8137489320563806e-07, "loss": 0.6819, "step": 9252 }, { "epoch": 0.94, "grad_norm": 1.8004520193464986, "learning_rate": 1.807508159835758e-07, "loss": 0.6275, "step": 9253 }, { "epoch": 0.94, "grad_norm": 1.8567473176405347, "learning_rate": 1.8012780448745548e-07, "loss": 0.7466, "step": 9254 }, { "epoch": 0.94, "grad_norm": 1.7660728132743209, "learning_rate": 1.7950585878489856e-07, "loss": 0.6696, "step": 9255 }, { "epoch": 0.94, "grad_norm": 1.6680895285956447, "learning_rate": 1.7888497894340328e-07, "loss": 0.6764, "step": 9256 }, { "epoch": 0.94, "grad_norm": 1.8890462461888413, "learning_rate": 1.7826516503036018e-07, "loss": 0.6705, "step": 9257 }, { "epoch": 0.94, "grad_norm": 1.6092121708191458, "learning_rate": 1.7764641711303764e-07, "loss": 0.6481, "step": 9258 }, { "epoch": 0.94, "grad_norm": 1.853872643159592, "learning_rate": 1.77028735258592e-07, "loss": 0.6965, "step": 9259 }, { "epoch": 0.94, "grad_norm": 1.6994009901835503, "learning_rate": 1.764121195340629e-07, "loss": 0.657, "step": 9260 }, { "epoch": 0.94, "grad_norm": 1.6002015651261523, "learning_rate": 1.7579657000637464e-07, "loss": 0.6356, "step": 9261 }, { "epoch": 0.94, "grad_norm": 1.7882959951825168, "learning_rate": 1.7518208674233595e-07, "loss": 0.8028, "step": 9262 }, { "epoch": 0.94, "grad_norm": 1.5715626462526937, "learning_rate": 1.7456866980863795e-07, "loss": 0.5792, "step": 9263 }, { "epoch": 0.94, "grad_norm": 1.606745758826103, "learning_rate": 1.7395631927185853e-07, "loss": 0.6333, "step": 9264 }, { "epoch": 0.94, "grad_norm": 1.588132832958181, "learning_rate": 1.7334503519846002e-07, "loss": 0.7629, "step": 9265 }, { "epoch": 0.94, "grad_norm": 1.5870608189041842, "learning_rate": 1.7273481765478383e-07, "loss": 0.6958, "step": 9266 }, { "epoch": 0.94, "grad_norm": 1.7650485962558329, "learning_rate": 1.7212566670706366e-07, "loss": 0.7763, "step": 9267 }, { "epoch": 0.94, "grad_norm": 1.6304775041889885, "learning_rate": 1.7151758242141102e-07, "loss": 0.8069, "step": 9268 }, { "epoch": 0.94, "grad_norm": 1.6757597922240886, "learning_rate": 1.7091056486382428e-07, "loss": 0.7548, "step": 9269 }, { "epoch": 0.94, "grad_norm": 1.6025496608955452, "learning_rate": 1.7030461410018628e-07, "loss": 0.6505, "step": 9270 }, { "epoch": 0.94, "grad_norm": 1.609210244897395, "learning_rate": 1.696997301962633e-07, "loss": 0.6027, "step": 9271 }, { "epoch": 0.94, "grad_norm": 1.56754740181272, "learning_rate": 1.69095913217705e-07, "loss": 0.6646, "step": 9272 }, { "epoch": 0.94, "grad_norm": 1.5314525225926743, "learning_rate": 1.6849316323004794e-07, "loss": 0.5952, "step": 9273 }, { "epoch": 0.94, "grad_norm": 1.704004564930101, "learning_rate": 1.6789148029871083e-07, "loss": 0.7793, "step": 9274 }, { "epoch": 0.94, "grad_norm": 1.5067426360899583, "learning_rate": 1.672908644889959e-07, "loss": 0.5874, "step": 9275 }, { "epoch": 0.94, "grad_norm": 1.7121136462489004, "learning_rate": 1.666913158660921e-07, "loss": 0.6971, "step": 9276 }, { "epoch": 0.94, "grad_norm": 1.5541399878664655, "learning_rate": 1.6609283449506853e-07, "loss": 0.6459, "step": 9277 }, { "epoch": 0.94, "grad_norm": 1.651047465860301, "learning_rate": 1.654954204408843e-07, "loss": 0.7469, "step": 9278 }, { "epoch": 0.94, "grad_norm": 1.7380456494653596, "learning_rate": 1.6489907376837644e-07, "loss": 0.6762, "step": 9279 }, { "epoch": 0.94, "grad_norm": 1.8908809620929607, "learning_rate": 1.643037945422721e-07, "loss": 0.7793, "step": 9280 }, { "epoch": 0.94, "grad_norm": 1.5530664329085104, "learning_rate": 1.6370958282717619e-07, "loss": 0.716, "step": 9281 }, { "epoch": 0.94, "grad_norm": 1.9011341123308512, "learning_rate": 1.6311643868758274e-07, "loss": 0.6896, "step": 9282 }, { "epoch": 0.94, "grad_norm": 1.7537647607023992, "learning_rate": 1.6252436218786915e-07, "loss": 0.712, "step": 9283 }, { "epoch": 0.94, "grad_norm": 1.756264117673827, "learning_rate": 1.619333533922951e-07, "loss": 0.7068, "step": 9284 }, { "epoch": 0.94, "grad_norm": 1.7453694164411129, "learning_rate": 1.6134341236500373e-07, "loss": 0.625, "step": 9285 }, { "epoch": 0.94, "grad_norm": 1.612092023051016, "learning_rate": 1.6075453917002827e-07, "loss": 0.68, "step": 9286 }, { "epoch": 0.94, "grad_norm": 1.4284971486950722, "learning_rate": 1.6016673387127645e-07, "loss": 0.6504, "step": 9287 }, { "epoch": 0.94, "grad_norm": 1.8336605844552831, "learning_rate": 1.595799965325495e-07, "loss": 0.7225, "step": 9288 }, { "epoch": 0.94, "grad_norm": 1.7925815124293898, "learning_rate": 1.589943272175265e-07, "loss": 0.6753, "step": 9289 }, { "epoch": 0.94, "grad_norm": 1.833749887623125, "learning_rate": 1.5840972598977212e-07, "loss": 0.6691, "step": 9290 }, { "epoch": 0.95, "grad_norm": 1.7636615851990618, "learning_rate": 1.5782619291273894e-07, "loss": 0.6865, "step": 9291 }, { "epoch": 0.95, "grad_norm": 1.7029992810691348, "learning_rate": 1.572437280497563e-07, "loss": 0.7755, "step": 9292 }, { "epoch": 0.95, "grad_norm": 1.6787600559848652, "learning_rate": 1.5666233146404474e-07, "loss": 0.5814, "step": 9293 }, { "epoch": 0.95, "grad_norm": 1.6399835606911737, "learning_rate": 1.5608200321870382e-07, "loss": 0.5949, "step": 9294 }, { "epoch": 0.95, "grad_norm": 1.9466123957984622, "learning_rate": 1.5550274337671868e-07, "loss": 0.6788, "step": 9295 }, { "epoch": 0.95, "grad_norm": 1.7304152725692181, "learning_rate": 1.5492455200096234e-07, "loss": 0.7364, "step": 9296 }, { "epoch": 0.95, "grad_norm": 1.8101918901000436, "learning_rate": 1.5434742915418466e-07, "loss": 0.7217, "step": 9297 }, { "epoch": 0.95, "grad_norm": 1.6226630353045093, "learning_rate": 1.5377137489902548e-07, "loss": 0.6549, "step": 9298 }, { "epoch": 0.95, "grad_norm": 1.8351805596438682, "learning_rate": 1.5319638929800485e-07, "loss": 0.7632, "step": 9299 }, { "epoch": 0.95, "grad_norm": 1.8081645554575871, "learning_rate": 1.5262247241352945e-07, "loss": 0.6583, "step": 9300 }, { "epoch": 0.95, "grad_norm": 1.6134327541969915, "learning_rate": 1.520496243078895e-07, "loss": 0.6681, "step": 9301 }, { "epoch": 0.95, "grad_norm": 1.5380435585216454, "learning_rate": 1.5147784504325746e-07, "loss": 0.6873, "step": 9302 }, { "epoch": 0.95, "grad_norm": 1.7978338298694088, "learning_rate": 1.509071346816926e-07, "loss": 0.7485, "step": 9303 }, { "epoch": 0.95, "grad_norm": 1.5582241308265918, "learning_rate": 1.5033749328513537e-07, "loss": 0.7597, "step": 9304 }, { "epoch": 0.95, "grad_norm": 1.5338763086526532, "learning_rate": 1.4976892091541185e-07, "loss": 0.6083, "step": 9305 }, { "epoch": 0.95, "grad_norm": 1.8149298068655273, "learning_rate": 1.4920141763423158e-07, "loss": 0.6472, "step": 9306 }, { "epoch": 0.95, "grad_norm": 1.7713512451374713, "learning_rate": 1.486349835031875e-07, "loss": 0.7545, "step": 9307 }, { "epoch": 0.95, "grad_norm": 1.6077642775620882, "learning_rate": 1.4806961858375824e-07, "loss": 0.6135, "step": 9308 }, { "epoch": 0.95, "grad_norm": 1.695935839933717, "learning_rate": 1.475053229373047e-07, "loss": 0.6744, "step": 9309 }, { "epoch": 0.95, "grad_norm": 1.7030861479239043, "learning_rate": 1.469420966250723e-07, "loss": 0.7265, "step": 9310 }, { "epoch": 0.95, "grad_norm": 1.8379397844754426, "learning_rate": 1.4637993970819219e-07, "loss": 0.6841, "step": 9311 }, { "epoch": 0.95, "grad_norm": 1.58928195504432, "learning_rate": 1.4581885224767557e-07, "loss": 0.6871, "step": 9312 }, { "epoch": 0.95, "grad_norm": 1.6598694335921578, "learning_rate": 1.4525883430441922e-07, "loss": 0.7469, "step": 9313 }, { "epoch": 0.95, "grad_norm": 1.847375042284769, "learning_rate": 1.446998859392068e-07, "loss": 0.7513, "step": 9314 }, { "epoch": 0.95, "grad_norm": 1.6196942469957114, "learning_rate": 1.4414200721270198e-07, "loss": 0.7, "step": 9315 }, { "epoch": 0.95, "grad_norm": 1.651722372283378, "learning_rate": 1.4358519818545302e-07, "loss": 0.6817, "step": 9316 }, { "epoch": 0.95, "grad_norm": 1.4620185736229712, "learning_rate": 1.4302945891789487e-07, "loss": 0.6987, "step": 9317 }, { "epoch": 0.95, "grad_norm": 1.5969563156865756, "learning_rate": 1.4247478947034155e-07, "loss": 0.6432, "step": 9318 }, { "epoch": 0.95, "grad_norm": 1.7740376885253586, "learning_rate": 1.419211899029971e-07, "loss": 0.7709, "step": 9319 }, { "epoch": 0.95, "grad_norm": 1.5724361492200893, "learning_rate": 1.4136866027594341e-07, "loss": 0.7302, "step": 9320 }, { "epoch": 0.95, "grad_norm": 1.762529061812301, "learning_rate": 1.4081720064915037e-07, "loss": 0.6829, "step": 9321 }, { "epoch": 0.95, "grad_norm": 1.732177495747799, "learning_rate": 1.402668110824701e-07, "loss": 0.7528, "step": 9322 }, { "epoch": 0.95, "grad_norm": 1.5117135004060325, "learning_rate": 1.3971749163563696e-07, "loss": 0.7106, "step": 9323 }, { "epoch": 0.95, "grad_norm": 1.936486960015918, "learning_rate": 1.3916924236827444e-07, "loss": 0.7927, "step": 9324 }, { "epoch": 0.95, "grad_norm": 1.6315407141874998, "learning_rate": 1.3862206333988383e-07, "loss": 0.5763, "step": 9325 }, { "epoch": 0.95, "grad_norm": 1.6832826870046844, "learning_rate": 1.3807595460985312e-07, "loss": 0.6027, "step": 9326 }, { "epoch": 0.95, "grad_norm": 1.633657285099894, "learning_rate": 1.3753091623745497e-07, "loss": 0.7159, "step": 9327 }, { "epoch": 0.95, "grad_norm": 1.5657399469617896, "learning_rate": 1.3698694828184312e-07, "loss": 0.5689, "step": 9328 }, { "epoch": 0.95, "grad_norm": 1.5576252990477029, "learning_rate": 1.3644405080205925e-07, "loss": 0.6211, "step": 9329 }, { "epoch": 0.95, "grad_norm": 1.6803219682719743, "learning_rate": 1.359022238570229e-07, "loss": 0.7426, "step": 9330 }, { "epoch": 0.95, "grad_norm": 1.7206962427488917, "learning_rate": 1.353614675055437e-07, "loss": 0.7504, "step": 9331 }, { "epoch": 0.95, "grad_norm": 1.695239140095105, "learning_rate": 1.3482178180631243e-07, "loss": 0.7181, "step": 9332 }, { "epoch": 0.95, "grad_norm": 1.5461146588452048, "learning_rate": 1.3428316681790232e-07, "loss": 0.6312, "step": 9333 }, { "epoch": 0.95, "grad_norm": 1.634849581054608, "learning_rate": 1.33745622598771e-07, "loss": 0.7263, "step": 9334 }, { "epoch": 0.95, "grad_norm": 1.7191373351509227, "learning_rate": 1.3320914920726292e-07, "loss": 0.7151, "step": 9335 }, { "epoch": 0.95, "grad_norm": 1.806927458293626, "learning_rate": 1.3267374670160037e-07, "loss": 0.7173, "step": 9336 }, { "epoch": 0.95, "grad_norm": 1.847358132459803, "learning_rate": 1.3213941513989582e-07, "loss": 0.6338, "step": 9337 }, { "epoch": 0.95, "grad_norm": 1.6792896756264375, "learning_rate": 1.316061545801417e-07, "loss": 0.6552, "step": 9338 }, { "epoch": 0.95, "grad_norm": 1.7975803650935682, "learning_rate": 1.3107396508021508e-07, "loss": 0.649, "step": 9339 }, { "epoch": 0.95, "grad_norm": 1.7320047443084134, "learning_rate": 1.3054284669787754e-07, "loss": 0.757, "step": 9340 }, { "epoch": 0.95, "grad_norm": 1.7064991477573936, "learning_rate": 1.3001279949077184e-07, "loss": 0.7516, "step": 9341 }, { "epoch": 0.95, "grad_norm": 1.8049030315444434, "learning_rate": 1.294838235164275e-07, "loss": 0.7043, "step": 9342 }, { "epoch": 0.95, "grad_norm": 1.783648924170492, "learning_rate": 1.2895591883225754e-07, "loss": 0.7772, "step": 9343 }, { "epoch": 0.95, "grad_norm": 1.661130797786391, "learning_rate": 1.284290854955561e-07, "loss": 0.6693, "step": 9344 }, { "epoch": 0.95, "grad_norm": 1.682829079794363, "learning_rate": 1.27903323563503e-07, "loss": 0.6384, "step": 9345 }, { "epoch": 0.95, "grad_norm": 1.7792700905516068, "learning_rate": 1.2737863309316257e-07, "loss": 0.6837, "step": 9346 }, { "epoch": 0.95, "grad_norm": 1.8187133886049904, "learning_rate": 1.268550141414804e-07, "loss": 0.753, "step": 9347 }, { "epoch": 0.95, "grad_norm": 1.758836973325861, "learning_rate": 1.2633246676528764e-07, "loss": 0.6515, "step": 9348 }, { "epoch": 0.95, "grad_norm": 1.7328795167778492, "learning_rate": 1.2581099102129902e-07, "loss": 0.7636, "step": 9349 }, { "epoch": 0.95, "grad_norm": 1.8121824781438312, "learning_rate": 1.252905869661114e-07, "loss": 0.6873, "step": 9350 }, { "epoch": 0.95, "grad_norm": 1.741948485860588, "learning_rate": 1.2477125465620854e-07, "loss": 0.6576, "step": 9351 }, { "epoch": 0.95, "grad_norm": 1.6665539985254938, "learning_rate": 1.242529941479542e-07, "loss": 0.6524, "step": 9352 }, { "epoch": 0.95, "grad_norm": 1.6435808873911482, "learning_rate": 1.237358054975979e-07, "loss": 0.7197, "step": 9353 }, { "epoch": 0.95, "grad_norm": 1.6404318516170804, "learning_rate": 1.2321968876127244e-07, "loss": 0.723, "step": 9354 }, { "epoch": 0.95, "grad_norm": 1.831279473076429, "learning_rate": 1.2270464399499416e-07, "loss": 0.8039, "step": 9355 }, { "epoch": 0.95, "grad_norm": 1.6247816181543355, "learning_rate": 1.2219067125466388e-07, "loss": 0.6802, "step": 9356 }, { "epoch": 0.95, "grad_norm": 1.7632533356131936, "learning_rate": 1.2167777059606367e-07, "loss": 0.715, "step": 9357 }, { "epoch": 0.95, "grad_norm": 1.656251792043068, "learning_rate": 1.2116594207486232e-07, "loss": 0.5863, "step": 9358 }, { "epoch": 0.95, "grad_norm": 1.6262676141905716, "learning_rate": 1.2065518574660983e-07, "loss": 0.653, "step": 9359 }, { "epoch": 0.95, "grad_norm": 1.8073241511420874, "learning_rate": 1.201455016667419e-07, "loss": 0.6898, "step": 9360 }, { "epoch": 0.95, "grad_norm": 1.6183084246280248, "learning_rate": 1.196368898905753e-07, "loss": 0.655, "step": 9361 }, { "epoch": 0.95, "grad_norm": 1.616972179558239, "learning_rate": 1.1912935047331265e-07, "loss": 0.6692, "step": 9362 }, { "epoch": 0.95, "grad_norm": 1.7388663060507172, "learning_rate": 1.1862288347004091e-07, "loss": 0.6522, "step": 9363 }, { "epoch": 0.95, "grad_norm": 1.5250952476420325, "learning_rate": 1.1811748893572616e-07, "loss": 0.686, "step": 9364 }, { "epoch": 0.95, "grad_norm": 1.6662472338063785, "learning_rate": 1.176131669252234e-07, "loss": 0.6984, "step": 9365 }, { "epoch": 0.95, "grad_norm": 1.5549764713843681, "learning_rate": 1.1710991749326772e-07, "loss": 0.6081, "step": 9366 }, { "epoch": 0.95, "grad_norm": 1.5991869130619443, "learning_rate": 1.1660774069447877e-07, "loss": 0.6039, "step": 9367 }, { "epoch": 0.95, "grad_norm": 1.7864587149948645, "learning_rate": 1.1610663658336186e-07, "loss": 0.7464, "step": 9368 }, { "epoch": 0.95, "grad_norm": 1.9867487428760133, "learning_rate": 1.1560660521430233e-07, "loss": 0.6608, "step": 9369 }, { "epoch": 0.95, "grad_norm": 1.6219424473624813, "learning_rate": 1.1510764664157126e-07, "loss": 0.7251, "step": 9370 }, { "epoch": 0.95, "grad_norm": 1.6957119355691972, "learning_rate": 1.1460976091932307e-07, "loss": 0.6999, "step": 9371 }, { "epoch": 0.95, "grad_norm": 1.6944825952036535, "learning_rate": 1.1411294810159457e-07, "loss": 0.6964, "step": 9372 }, { "epoch": 0.95, "grad_norm": 1.7535197596326562, "learning_rate": 1.1361720824230704e-07, "loss": 0.6444, "step": 9373 }, { "epoch": 0.95, "grad_norm": 1.8297612829240806, "learning_rate": 1.1312254139526635e-07, "loss": 0.7812, "step": 9374 }, { "epoch": 0.95, "grad_norm": 1.8623045745422762, "learning_rate": 1.1262894761416066e-07, "loss": 0.797, "step": 9375 }, { "epoch": 0.95, "grad_norm": 1.801772300675876, "learning_rate": 1.1213642695256156e-07, "loss": 0.7934, "step": 9376 }, { "epoch": 0.95, "grad_norm": 1.6568338935636044, "learning_rate": 1.1164497946392406e-07, "loss": 0.6191, "step": 9377 }, { "epoch": 0.95, "grad_norm": 1.629710990885931, "learning_rate": 1.1115460520158772e-07, "loss": 0.5813, "step": 9378 }, { "epoch": 0.95, "grad_norm": 1.6004914877405867, "learning_rate": 1.1066530421877442e-07, "loss": 0.5771, "step": 9379 }, { "epoch": 0.95, "grad_norm": 1.7226413707376684, "learning_rate": 1.1017707656859055e-07, "loss": 0.6676, "step": 9380 }, { "epoch": 0.95, "grad_norm": 1.8615447304953088, "learning_rate": 1.0968992230402598e-07, "loss": 0.7985, "step": 9381 }, { "epoch": 0.95, "grad_norm": 1.6921151869689077, "learning_rate": 1.0920384147795172e-07, "loss": 0.7771, "step": 9382 }, { "epoch": 0.95, "grad_norm": 1.6221705984273835, "learning_rate": 1.0871883414312778e-07, "loss": 0.6666, "step": 9383 }, { "epoch": 0.95, "grad_norm": 1.5705962467314438, "learning_rate": 1.0823490035218986e-07, "loss": 0.6474, "step": 9384 }, { "epoch": 0.95, "grad_norm": 1.7104006167842383, "learning_rate": 1.0775204015766483e-07, "loss": 0.8605, "step": 9385 }, { "epoch": 0.95, "grad_norm": 1.6798442346632074, "learning_rate": 1.0727025361195853e-07, "loss": 0.7488, "step": 9386 }, { "epoch": 0.95, "grad_norm": 1.8652450432846577, "learning_rate": 1.0678954076736136e-07, "loss": 0.7213, "step": 9387 }, { "epoch": 0.95, "grad_norm": 1.7415937054078674, "learning_rate": 1.0630990167604716e-07, "loss": 0.5931, "step": 9388 }, { "epoch": 0.96, "grad_norm": 1.8481711044022067, "learning_rate": 1.0583133639007203e-07, "loss": 0.706, "step": 9389 }, { "epoch": 0.96, "grad_norm": 1.7516193585064288, "learning_rate": 1.053538449613778e-07, "loss": 0.7088, "step": 9390 }, { "epoch": 0.96, "grad_norm": 1.620252240786182, "learning_rate": 1.0487742744178964e-07, "loss": 0.6785, "step": 9391 }, { "epoch": 0.96, "grad_norm": 1.7365674839200946, "learning_rate": 1.0440208388301399e-07, "loss": 0.732, "step": 9392 }, { "epoch": 0.96, "grad_norm": 1.637589589718617, "learning_rate": 1.039278143366429e-07, "loss": 0.6338, "step": 9393 }, { "epoch": 0.96, "grad_norm": 1.8376359016842168, "learning_rate": 1.0345461885414965e-07, "loss": 0.6336, "step": 9394 }, { "epoch": 0.96, "grad_norm": 1.7308196052245322, "learning_rate": 1.0298249748689204e-07, "loss": 0.7295, "step": 9395 }, { "epoch": 0.96, "grad_norm": 1.6808380277400239, "learning_rate": 1.0251145028611464e-07, "loss": 0.6251, "step": 9396 }, { "epoch": 0.96, "grad_norm": 1.6294942373354988, "learning_rate": 1.0204147730293767e-07, "loss": 0.6, "step": 9397 }, { "epoch": 0.96, "grad_norm": 1.7041566127035381, "learning_rate": 1.0157257858837255e-07, "loss": 0.7238, "step": 9398 }, { "epoch": 0.96, "grad_norm": 1.6222572131657285, "learning_rate": 1.0110475419330967e-07, "loss": 0.7622, "step": 9399 }, { "epoch": 0.96, "grad_norm": 2.0364432927850973, "learning_rate": 1.0063800416852399e-07, "loss": 0.768, "step": 9400 }, { "epoch": 0.96, "grad_norm": 1.7975191300854436, "learning_rate": 1.0017232856467495e-07, "loss": 0.7176, "step": 9401 }, { "epoch": 0.96, "grad_norm": 1.5093214742141057, "learning_rate": 9.970772743230329e-08, "loss": 0.649, "step": 9402 }, { "epoch": 0.96, "grad_norm": 1.4343705311458053, "learning_rate": 9.924420082183416e-08, "loss": 0.6965, "step": 9403 }, { "epoch": 0.96, "grad_norm": 1.6913536743725621, "learning_rate": 9.878174878357738e-08, "loss": 0.6269, "step": 9404 }, { "epoch": 0.96, "grad_norm": 1.7215768900124486, "learning_rate": 9.832037136772387e-08, "loss": 0.6705, "step": 9405 }, { "epoch": 0.96, "grad_norm": 1.565162152804664, "learning_rate": 9.786006862434916e-08, "loss": 0.6139, "step": 9406 }, { "epoch": 0.96, "grad_norm": 1.6399417139822021, "learning_rate": 9.740084060341104e-08, "loss": 0.6996, "step": 9407 }, { "epoch": 0.96, "grad_norm": 1.671760353975966, "learning_rate": 9.694268735475299e-08, "loss": 0.6743, "step": 9408 }, { "epoch": 0.96, "grad_norm": 1.6602976217731225, "learning_rate": 9.648560892809967e-08, "loss": 0.7442, "step": 9409 }, { "epoch": 0.96, "grad_norm": 1.7598413490910223, "learning_rate": 9.602960537306027e-08, "loss": 0.7322, "step": 9410 }, { "epoch": 0.96, "grad_norm": 1.7576825593121441, "learning_rate": 9.557467673912635e-08, "loss": 0.6904, "step": 9411 }, { "epoch": 0.96, "grad_norm": 1.6407909634932698, "learning_rate": 9.512082307567283e-08, "loss": 0.6866, "step": 9412 }, { "epoch": 0.96, "grad_norm": 1.8693682646759682, "learning_rate": 9.46680444319581e-08, "loss": 0.7631, "step": 9413 }, { "epoch": 0.96, "grad_norm": 1.78065338467431, "learning_rate": 9.421634085712728e-08, "loss": 0.6334, "step": 9414 }, { "epoch": 0.96, "grad_norm": 1.6792038849549156, "learning_rate": 9.376571240020227e-08, "loss": 0.6723, "step": 9415 }, { "epoch": 0.96, "grad_norm": 1.74326436533906, "learning_rate": 9.331615911009284e-08, "loss": 0.682, "step": 9416 }, { "epoch": 0.96, "grad_norm": 1.7774904025160487, "learning_rate": 9.286768103559107e-08, "loss": 0.8439, "step": 9417 }, { "epoch": 0.96, "grad_norm": 1.6803450951448946, "learning_rate": 9.242027822537247e-08, "loss": 0.6999, "step": 9418 }, { "epoch": 0.96, "grad_norm": 1.626514736365701, "learning_rate": 9.197395072799597e-08, "loss": 0.7507, "step": 9419 }, { "epoch": 0.96, "grad_norm": 1.9147094916880236, "learning_rate": 9.152869859190283e-08, "loss": 0.5663, "step": 9420 }, { "epoch": 0.96, "grad_norm": 1.645184270408986, "learning_rate": 9.108452186541771e-08, "loss": 0.7222, "step": 9421 }, { "epoch": 0.96, "grad_norm": 1.745259445087553, "learning_rate": 9.064142059674985e-08, "loss": 0.7458, "step": 9422 }, { "epoch": 0.96, "grad_norm": 1.6908506915448078, "learning_rate": 9.019939483399076e-08, "loss": 0.6719, "step": 9423 }, { "epoch": 0.96, "grad_norm": 1.8034257047030355, "learning_rate": 8.975844462511652e-08, "loss": 0.7185, "step": 9424 }, { "epoch": 0.96, "grad_norm": 1.7462225913889697, "learning_rate": 8.931857001798216e-08, "loss": 0.6562, "step": 9425 }, { "epoch": 0.96, "grad_norm": 1.700078979326214, "learning_rate": 8.887977106033285e-08, "loss": 0.6466, "step": 9426 }, { "epoch": 0.96, "grad_norm": 1.8522994307817322, "learning_rate": 8.84420477997916e-08, "loss": 0.7504, "step": 9427 }, { "epoch": 0.96, "grad_norm": 1.6080686398108495, "learning_rate": 8.800540028386595e-08, "loss": 0.6406, "step": 9428 }, { "epoch": 0.96, "grad_norm": 1.7004782602843376, "learning_rate": 8.756982855994911e-08, "loss": 0.6996, "step": 9429 }, { "epoch": 0.96, "grad_norm": 1.8207728151070466, "learning_rate": 8.713533267531326e-08, "loss": 0.7679, "step": 9430 }, { "epoch": 0.96, "grad_norm": 1.7478520752621098, "learning_rate": 8.670191267711736e-08, "loss": 0.715, "step": 9431 }, { "epoch": 0.96, "grad_norm": 1.66706241086095, "learning_rate": 8.626956861240265e-08, "loss": 0.6582, "step": 9432 }, { "epoch": 0.96, "grad_norm": 1.6439981567690312, "learning_rate": 8.58383005280916e-08, "loss": 0.5828, "step": 9433 }, { "epoch": 0.96, "grad_norm": 1.7405589455947643, "learning_rate": 8.540810847099345e-08, "loss": 0.7158, "step": 9434 }, { "epoch": 0.96, "grad_norm": 1.7367774924168071, "learning_rate": 8.497899248779862e-08, "loss": 0.757, "step": 9435 }, { "epoch": 0.96, "grad_norm": 1.6737570175181034, "learning_rate": 8.455095262508095e-08, "loss": 0.7011, "step": 9436 }, { "epoch": 0.96, "grad_norm": 1.6452133588641074, "learning_rate": 8.412398892929663e-08, "loss": 0.6645, "step": 9437 }, { "epoch": 0.96, "grad_norm": 1.681366338145333, "learning_rate": 8.369810144678636e-08, "loss": 0.77, "step": 9438 }, { "epoch": 0.96, "grad_norm": 1.550502000121852, "learning_rate": 8.327329022377317e-08, "loss": 0.6442, "step": 9439 }, { "epoch": 0.96, "grad_norm": 1.6022409815852374, "learning_rate": 8.284955530636462e-08, "loss": 0.6408, "step": 9440 }, { "epoch": 0.96, "grad_norm": 1.5452686011312082, "learning_rate": 8.242689674054949e-08, "loss": 0.6169, "step": 9441 }, { "epoch": 0.96, "grad_norm": 1.8167821762833238, "learning_rate": 8.200531457220218e-08, "loss": 0.7093, "step": 9442 }, { "epoch": 0.96, "grad_norm": 1.7396942787558083, "learning_rate": 8.15848088470772e-08, "loss": 0.7135, "step": 9443 }, { "epoch": 0.96, "grad_norm": 1.9776231560210884, "learning_rate": 8.116537961081473e-08, "loss": 0.6921, "step": 9444 }, { "epoch": 0.96, "grad_norm": 1.75401788492991, "learning_rate": 8.074702690893722e-08, "loss": 0.6245, "step": 9445 }, { "epoch": 0.96, "grad_norm": 1.638899199915196, "learning_rate": 8.032975078684945e-08, "loss": 0.6686, "step": 9446 }, { "epoch": 0.96, "grad_norm": 1.6823860072883687, "learning_rate": 7.99135512898408e-08, "loss": 0.7626, "step": 9447 }, { "epoch": 0.96, "grad_norm": 1.5854351031807767, "learning_rate": 7.949842846308398e-08, "loss": 0.6313, "step": 9448 }, { "epoch": 0.96, "grad_norm": 1.5310605391572794, "learning_rate": 7.908438235163407e-08, "loss": 0.5886, "step": 9449 }, { "epoch": 0.96, "grad_norm": 1.8708623272585712, "learning_rate": 7.867141300042736e-08, "loss": 0.7019, "step": 9450 }, { "epoch": 0.96, "grad_norm": 1.3911077288709224, "learning_rate": 7.825952045428797e-08, "loss": 0.5764, "step": 9451 }, { "epoch": 0.96, "grad_norm": 1.6768022829466143, "learning_rate": 7.784870475791794e-08, "loss": 0.6353, "step": 9452 }, { "epoch": 0.96, "grad_norm": 1.5242529784224401, "learning_rate": 7.743896595590605e-08, "loss": 0.6805, "step": 9453 }, { "epoch": 0.96, "grad_norm": 1.8249475019113455, "learning_rate": 7.703030409272339e-08, "loss": 0.744, "step": 9454 }, { "epoch": 0.96, "grad_norm": 1.7586110111232993, "learning_rate": 7.662271921272224e-08, "loss": 0.6442, "step": 9455 }, { "epoch": 0.96, "grad_norm": 1.7143857110798468, "learning_rate": 7.621621136014168e-08, "loss": 0.5942, "step": 9456 }, { "epoch": 0.96, "grad_norm": 1.6898773689992317, "learning_rate": 7.581078057909974e-08, "loss": 0.6309, "step": 9457 }, { "epoch": 0.96, "grad_norm": 1.634801653449755, "learning_rate": 7.540642691360123e-08, "loss": 0.6671, "step": 9458 }, { "epoch": 0.96, "grad_norm": 1.8680966984373397, "learning_rate": 7.500315040753214e-08, "loss": 0.6873, "step": 9459 }, { "epoch": 0.96, "grad_norm": 1.6719597635551595, "learning_rate": 7.460095110466192e-08, "loss": 0.6266, "step": 9460 }, { "epoch": 0.96, "grad_norm": 1.6156246547794542, "learning_rate": 7.41998290486412e-08, "loss": 0.6576, "step": 9461 }, { "epoch": 0.96, "grad_norm": 1.4988781002438796, "learning_rate": 7.379978428300738e-08, "loss": 0.5995, "step": 9462 }, { "epoch": 0.96, "grad_norm": 1.6309987376080932, "learning_rate": 7.340081685117906e-08, "loss": 0.6821, "step": 9463 }, { "epoch": 0.96, "grad_norm": 1.8204765391454656, "learning_rate": 7.300292679645716e-08, "loss": 0.715, "step": 9464 }, { "epoch": 0.96, "grad_norm": 1.781509439351711, "learning_rate": 7.260611416202712e-08, "loss": 0.8175, "step": 9465 }, { "epoch": 0.96, "grad_norm": 1.7769442386139096, "learning_rate": 7.221037899095561e-08, "loss": 0.6977, "step": 9466 }, { "epoch": 0.96, "grad_norm": 1.884030339551365, "learning_rate": 7.181572132619385e-08, "loss": 0.645, "step": 9467 }, { "epoch": 0.96, "grad_norm": 1.5657382868794574, "learning_rate": 7.142214121057755e-08, "loss": 0.6959, "step": 9468 }, { "epoch": 0.96, "grad_norm": 1.7086982932887396, "learning_rate": 7.102963868682034e-08, "loss": 0.5867, "step": 9469 }, { "epoch": 0.96, "grad_norm": 1.4962641401982082, "learning_rate": 7.063821379752589e-08, "loss": 0.5343, "step": 9470 }, { "epoch": 0.96, "grad_norm": 1.7827013511340437, "learning_rate": 7.024786658517468e-08, "loss": 0.7294, "step": 9471 }, { "epoch": 0.96, "grad_norm": 1.7029572387275005, "learning_rate": 6.985859709213283e-08, "loss": 0.6746, "step": 9472 }, { "epoch": 0.96, "grad_norm": 1.528253388269664, "learning_rate": 6.947040536065208e-08, "loss": 0.6944, "step": 9473 }, { "epoch": 0.96, "grad_norm": 1.866562625793677, "learning_rate": 6.908329143286096e-08, "loss": 0.6758, "step": 9474 }, { "epoch": 0.96, "grad_norm": 1.6755032350387424, "learning_rate": 6.869725535077698e-08, "loss": 0.7056, "step": 9475 }, { "epoch": 0.96, "grad_norm": 1.7746080634624017, "learning_rate": 6.831229715629884e-08, "loss": 0.6918, "step": 9476 }, { "epoch": 0.96, "grad_norm": 1.710479335270481, "learning_rate": 6.792841689120533e-08, "loss": 0.8261, "step": 9477 }, { "epoch": 0.96, "grad_norm": 1.633912267250495, "learning_rate": 6.754561459716202e-08, "loss": 0.7372, "step": 9478 }, { "epoch": 0.96, "grad_norm": 1.8059470626753045, "learning_rate": 6.716389031571568e-08, "loss": 0.6856, "step": 9479 }, { "epoch": 0.96, "grad_norm": 1.7177100237104101, "learning_rate": 6.678324408829762e-08, "loss": 0.6838, "step": 9480 }, { "epoch": 0.96, "grad_norm": 1.6373046738415207, "learning_rate": 6.640367595622033e-08, "loss": 0.7585, "step": 9481 }, { "epoch": 0.96, "grad_norm": 1.8719583939143258, "learning_rate": 6.602518596067975e-08, "loss": 0.6701, "step": 9482 }, { "epoch": 0.96, "grad_norm": 1.7507052228301732, "learning_rate": 6.564777414275525e-08, "loss": 0.7428, "step": 9483 }, { "epoch": 0.96, "grad_norm": 1.6062017308145797, "learning_rate": 6.52714405434085e-08, "loss": 0.65, "step": 9484 }, { "epoch": 0.96, "grad_norm": 1.8043004451790021, "learning_rate": 6.489618520348573e-08, "loss": 0.7727, "step": 9485 }, { "epoch": 0.96, "grad_norm": 1.6345253892914255, "learning_rate": 6.452200816371435e-08, "loss": 0.7133, "step": 9486 }, { "epoch": 0.97, "grad_norm": 1.6192129197947587, "learning_rate": 6.41489094647052e-08, "loss": 0.705, "step": 9487 }, { "epoch": 0.97, "grad_norm": 1.7888330879688503, "learning_rate": 6.377688914695256e-08, "loss": 0.7332, "step": 9488 }, { "epoch": 0.97, "grad_norm": 1.5641196925584768, "learning_rate": 6.340594725083415e-08, "loss": 0.734, "step": 9489 }, { "epoch": 0.97, "grad_norm": 1.9912579431641058, "learning_rate": 6.303608381660887e-08, "loss": 0.7217, "step": 9490 }, { "epoch": 0.97, "grad_norm": 1.5776840514556638, "learning_rate": 6.266729888442013e-08, "loss": 0.6663, "step": 9491 }, { "epoch": 0.97, "grad_norm": 1.7249885914365968, "learning_rate": 6.229959249429263e-08, "loss": 0.6978, "step": 9492 }, { "epoch": 0.97, "grad_norm": 1.8158158270080882, "learning_rate": 6.193296468613663e-08, "loss": 0.6448, "step": 9493 }, { "epoch": 0.97, "grad_norm": 1.6464321238990238, "learning_rate": 6.156741549974365e-08, "loss": 0.6866, "step": 9494 }, { "epoch": 0.97, "grad_norm": 1.6656953848178833, "learning_rate": 6.120294497478752e-08, "loss": 0.6183, "step": 9495 }, { "epoch": 0.97, "grad_norm": 1.9088722193655174, "learning_rate": 6.083955315082657e-08, "loss": 0.6723, "step": 9496 }, { "epoch": 0.97, "grad_norm": 1.5716830550858858, "learning_rate": 6.04772400673015e-08, "loss": 0.6995, "step": 9497 }, { "epoch": 0.97, "grad_norm": 1.792143791691787, "learning_rate": 6.011600576353416e-08, "loss": 0.7165, "step": 9498 }, { "epoch": 0.97, "grad_norm": 1.6182212968308853, "learning_rate": 5.97558502787332e-08, "loss": 0.7434, "step": 9499 }, { "epoch": 0.97, "grad_norm": 1.7037892359301718, "learning_rate": 5.939677365198626e-08, "loss": 0.6117, "step": 9500 }, { "epoch": 0.97, "grad_norm": 1.707700336540344, "learning_rate": 5.903877592226548e-08, "loss": 0.7474, "step": 9501 }, { "epoch": 0.97, "grad_norm": 1.6940925472575223, "learning_rate": 5.868185712842645e-08, "loss": 0.6836, "step": 9502 }, { "epoch": 0.97, "grad_norm": 1.6486427451514054, "learning_rate": 5.832601730920706e-08, "loss": 0.7138, "step": 9503 }, { "epoch": 0.97, "grad_norm": 1.5679311999282515, "learning_rate": 5.797125650322866e-08, "loss": 0.5144, "step": 9504 }, { "epoch": 0.97, "grad_norm": 1.6840151836203492, "learning_rate": 5.7617574748993764e-08, "loss": 0.6978, "step": 9505 }, { "epoch": 0.97, "grad_norm": 1.7141209646309552, "learning_rate": 5.726497208488946e-08, "loss": 0.6544, "step": 9506 }, { "epoch": 0.97, "grad_norm": 1.7961651134883347, "learning_rate": 5.691344854918623e-08, "loss": 0.8223, "step": 9507 }, { "epoch": 0.97, "grad_norm": 1.721568167980704, "learning_rate": 5.6563004180034685e-08, "loss": 0.7074, "step": 9508 }, { "epoch": 0.97, "grad_norm": 1.7756903237314543, "learning_rate": 5.6213639015472166e-08, "loss": 0.729, "step": 9509 }, { "epoch": 0.97, "grad_norm": 1.7530493444827209, "learning_rate": 5.586535309341501e-08, "loss": 0.6859, "step": 9510 }, { "epoch": 0.97, "grad_norm": 1.8363965552736352, "learning_rate": 5.55181464516652e-08, "loss": 0.7653, "step": 9511 }, { "epoch": 0.97, "grad_norm": 1.632990922923886, "learning_rate": 5.517201912790593e-08, "loss": 0.6096, "step": 9512 }, { "epoch": 0.97, "grad_norm": 1.6297590626113077, "learning_rate": 5.4826971159704925e-08, "loss": 0.6823, "step": 9513 }, { "epoch": 0.97, "grad_norm": 2.001130699233612, "learning_rate": 5.448300258451111e-08, "loss": 0.7525, "step": 9514 }, { "epoch": 0.97, "grad_norm": 1.5364424660076623, "learning_rate": 5.4140113439655753e-08, "loss": 0.6215, "step": 9515 }, { "epoch": 0.97, "grad_norm": 1.7204574771548173, "learning_rate": 5.379830376235573e-08, "loss": 0.6571, "step": 9516 }, { "epoch": 0.97, "grad_norm": 1.6017778895293966, "learning_rate": 5.3457573589709156e-08, "loss": 0.682, "step": 9517 }, { "epoch": 0.97, "grad_norm": 1.5577355583757446, "learning_rate": 5.311792295869644e-08, "loss": 0.63, "step": 9518 }, { "epoch": 0.97, "grad_norm": 1.682049891016354, "learning_rate": 5.2779351906181445e-08, "loss": 0.6591, "step": 9519 }, { "epoch": 0.97, "grad_norm": 1.5620593916862504, "learning_rate": 5.24418604689092e-08, "loss": 0.5631, "step": 9520 }, { "epoch": 0.97, "grad_norm": 1.681310877232562, "learning_rate": 5.210544868351153e-08, "loss": 0.5983, "step": 9521 }, { "epoch": 0.97, "grad_norm": 1.8030213916103963, "learning_rate": 5.177011658650033e-08, "loss": 0.6789, "step": 9522 }, { "epoch": 0.97, "grad_norm": 1.8096040261391229, "learning_rate": 5.143586421426982e-08, "loss": 0.6906, "step": 9523 }, { "epoch": 0.97, "grad_norm": 1.836173510001181, "learning_rate": 5.1102691603097664e-08, "loss": 0.7915, "step": 9524 }, { "epoch": 0.97, "grad_norm": 1.6777462206542368, "learning_rate": 5.077059878914492e-08, "loss": 0.7249, "step": 9525 }, { "epoch": 0.97, "grad_norm": 1.7752150890688336, "learning_rate": 5.043958580845498e-08, "loss": 0.6774, "step": 9526 }, { "epoch": 0.97, "grad_norm": 1.935721956313621, "learning_rate": 5.010965269695578e-08, "loss": 0.7067, "step": 9527 }, { "epoch": 0.97, "grad_norm": 1.6102260340258132, "learning_rate": 4.978079949045311e-08, "loss": 0.7253, "step": 9528 }, { "epoch": 0.97, "grad_norm": 1.659590054401639, "learning_rate": 4.945302622464177e-08, "loss": 0.619, "step": 9529 }, { "epoch": 0.97, "grad_norm": 1.5797987844636674, "learning_rate": 4.912633293509439e-08, "loss": 0.7114, "step": 9530 }, { "epoch": 0.97, "grad_norm": 1.651232052055731, "learning_rate": 4.8800719657270404e-08, "loss": 0.6429, "step": 9531 }, { "epoch": 0.97, "grad_norm": 1.7290046370594587, "learning_rate": 4.84761864265082e-08, "loss": 0.734, "step": 9532 }, { "epoch": 0.97, "grad_norm": 1.7180351129100842, "learning_rate": 4.815273327803183e-08, "loss": 0.6383, "step": 9533 }, { "epoch": 0.97, "grad_norm": 1.5391683968772378, "learning_rate": 4.783036024694543e-08, "loss": 0.6444, "step": 9534 }, { "epoch": 0.97, "grad_norm": 1.7809726189745998, "learning_rate": 4.750906736824101e-08, "loss": 0.6574, "step": 9535 }, { "epoch": 0.97, "grad_norm": 1.6544166039332864, "learning_rate": 4.7188854676786246e-08, "loss": 0.7301, "step": 9536 }, { "epoch": 0.97, "grad_norm": 1.5079155368243176, "learning_rate": 4.6869722207337763e-08, "loss": 0.6591, "step": 9537 }, { "epoch": 0.97, "grad_norm": 1.6571193553186434, "learning_rate": 4.6551669994531204e-08, "loss": 0.6755, "step": 9538 }, { "epoch": 0.97, "grad_norm": 1.7825879578730972, "learning_rate": 4.623469807288561e-08, "loss": 0.7396, "step": 9539 }, { "epoch": 0.97, "grad_norm": 1.7840446572194073, "learning_rate": 4.591880647680458e-08, "loss": 0.657, "step": 9540 }, { "epoch": 0.97, "grad_norm": 1.9065872718206953, "learning_rate": 4.5603995240572906e-08, "loss": 0.7672, "step": 9541 }, { "epoch": 0.97, "grad_norm": 1.696836293573375, "learning_rate": 4.529026439835771e-08, "loss": 0.72, "step": 9542 }, { "epoch": 0.97, "grad_norm": 1.739553332302951, "learning_rate": 4.4977613984210634e-08, "loss": 0.6855, "step": 9543 }, { "epoch": 0.97, "grad_norm": 1.6854862459503848, "learning_rate": 4.4666044032063425e-08, "loss": 0.7009, "step": 9544 }, { "epoch": 0.97, "grad_norm": 1.6963922444075805, "learning_rate": 4.4355554575734594e-08, "loss": 0.6945, "step": 9545 }, { "epoch": 0.97, "grad_norm": 1.5800756558416964, "learning_rate": 4.404614564892051e-08, "loss": 0.588, "step": 9546 }, { "epoch": 0.97, "grad_norm": 1.6478454303775683, "learning_rate": 4.373781728520321e-08, "loss": 0.6437, "step": 9547 }, { "epoch": 0.97, "grad_norm": 1.5023842441634008, "learning_rate": 4.3430569518048135e-08, "loss": 0.6218, "step": 9548 }, { "epoch": 0.97, "grad_norm": 1.5924937602995637, "learning_rate": 4.3124402380800846e-08, "loss": 0.6583, "step": 9549 }, { "epoch": 0.97, "grad_norm": 1.6325314575288126, "learning_rate": 4.281931590669253e-08, "loss": 0.5673, "step": 9550 }, { "epoch": 0.97, "grad_norm": 1.5829462492317814, "learning_rate": 4.251531012883337e-08, "loss": 0.6106, "step": 9551 }, { "epoch": 0.97, "grad_norm": 1.5521949626144838, "learning_rate": 4.2212385080220295e-08, "loss": 0.6161, "step": 9552 }, { "epoch": 0.97, "grad_norm": 1.5820619083433027, "learning_rate": 4.191054079373036e-08, "loss": 0.7234, "step": 9553 }, { "epoch": 0.97, "grad_norm": 1.6808104992758746, "learning_rate": 4.160977730212401e-08, "loss": 0.6616, "step": 9554 }, { "epoch": 0.97, "grad_norm": 1.924873928536549, "learning_rate": 4.131009463804403e-08, "loss": 0.8662, "step": 9555 }, { "epoch": 0.97, "grad_norm": 1.4615127567216981, "learning_rate": 4.101149283401773e-08, "loss": 0.7099, "step": 9556 }, { "epoch": 0.97, "grad_norm": 1.615602544070035, "learning_rate": 4.071397192245252e-08, "loss": 0.6222, "step": 9557 }, { "epoch": 0.97, "grad_norm": 1.954739249642568, "learning_rate": 4.041753193563924e-08, "loss": 0.7783, "step": 9558 }, { "epoch": 0.97, "grad_norm": 1.6038174186370082, "learning_rate": 4.0122172905753264e-08, "loss": 0.7212, "step": 9559 }, { "epoch": 0.97, "grad_norm": 1.5473590138448612, "learning_rate": 3.982789486485006e-08, "loss": 0.5879, "step": 9560 }, { "epoch": 0.97, "grad_norm": 1.5495334233350668, "learning_rate": 3.953469784486852e-08, "loss": 0.6746, "step": 9561 }, { "epoch": 0.97, "grad_norm": 1.5247979141054615, "learning_rate": 3.924258187763208e-08, "loss": 0.6481, "step": 9562 }, { "epoch": 0.97, "grad_norm": 1.923834001273207, "learning_rate": 3.895154699484427e-08, "loss": 0.7229, "step": 9563 }, { "epoch": 0.97, "grad_norm": 1.9727940095964407, "learning_rate": 3.866159322809315e-08, "loss": 0.7823, "step": 9564 }, { "epoch": 0.97, "grad_norm": 1.6465939082770047, "learning_rate": 3.8372720608848e-08, "loss": 0.7266, "step": 9565 }, { "epoch": 0.97, "grad_norm": 1.6971371645450848, "learning_rate": 3.808492916846041e-08, "loss": 0.7288, "step": 9566 }, { "epoch": 0.97, "grad_norm": 1.7647054382027547, "learning_rate": 3.77982189381676e-08, "loss": 0.6905, "step": 9567 }, { "epoch": 0.97, "grad_norm": 1.7337785020115393, "learning_rate": 3.751258994908691e-08, "loss": 0.7073, "step": 9568 }, { "epoch": 0.97, "grad_norm": 1.9913478597915608, "learning_rate": 3.72280422322191e-08, "loss": 0.7697, "step": 9569 }, { "epoch": 0.97, "grad_norm": 1.7756431875942662, "learning_rate": 3.6944575818446126e-08, "loss": 0.69, "step": 9570 }, { "epoch": 0.97, "grad_norm": 1.6794395528746324, "learning_rate": 3.6662190738535606e-08, "loss": 0.7857, "step": 9571 }, { "epoch": 0.97, "grad_norm": 1.7678436916808111, "learning_rate": 3.638088702313414e-08, "loss": 0.646, "step": 9572 }, { "epoch": 0.97, "grad_norm": 1.7993167160166388, "learning_rate": 3.610066470277507e-08, "loss": 0.676, "step": 9573 }, { "epoch": 0.97, "grad_norm": 1.5841152046243077, "learning_rate": 3.582152380786963e-08, "loss": 0.5893, "step": 9574 }, { "epoch": 0.97, "grad_norm": 1.6703393696365203, "learning_rate": 3.554346436871581e-08, "loss": 0.7292, "step": 9575 }, { "epoch": 0.97, "grad_norm": 1.6449459827254167, "learning_rate": 3.52664864154928e-08, "loss": 0.6117, "step": 9576 }, { "epoch": 0.97, "grad_norm": 1.9219088939810345, "learning_rate": 3.499058997826099e-08, "loss": 0.7298, "step": 9577 }, { "epoch": 0.97, "grad_norm": 1.5935011107809127, "learning_rate": 3.471577508696533e-08, "loss": 0.5968, "step": 9578 }, { "epoch": 0.97, "grad_norm": 1.5925932403139895, "learning_rate": 3.444204177143307e-08, "loss": 0.623, "step": 9579 }, { "epoch": 0.97, "grad_norm": 1.782416756084071, "learning_rate": 3.416939006137265e-08, "loss": 0.7689, "step": 9580 }, { "epoch": 0.97, "grad_norm": 1.7447214103114526, "learning_rate": 3.389781998637709e-08, "loss": 0.6998, "step": 9581 }, { "epoch": 0.97, "grad_norm": 1.6536370107942204, "learning_rate": 3.3627331575921686e-08, "loss": 0.6672, "step": 9582 }, { "epoch": 0.97, "grad_norm": 1.7187325303429446, "learning_rate": 3.3357924859361845e-08, "loss": 0.7012, "step": 9583 }, { "epoch": 0.97, "grad_norm": 1.8955684515759144, "learning_rate": 3.3089599865938625e-08, "loss": 0.6829, "step": 9584 }, { "epoch": 0.97, "grad_norm": 1.7153222583829413, "learning_rate": 3.282235662477429e-08, "loss": 0.7687, "step": 9585 }, { "epoch": 0.98, "grad_norm": 1.770308421957577, "learning_rate": 3.2556195164873405e-08, "loss": 0.6477, "step": 9586 }, { "epoch": 0.98, "grad_norm": 1.629486287642064, "learning_rate": 3.2291115515125093e-08, "loss": 0.6571, "step": 9587 }, { "epoch": 0.98, "grad_norm": 1.7828578010716696, "learning_rate": 3.202711770429745e-08, "loss": 0.7029, "step": 9588 }, { "epoch": 0.98, "grad_norm": 1.783499374620443, "learning_rate": 3.176420176104533e-08, "loss": 0.6968, "step": 9589 }, { "epoch": 0.98, "grad_norm": 1.7423644115712555, "learning_rate": 3.1502367713903695e-08, "loss": 0.7301, "step": 9590 }, { "epoch": 0.98, "grad_norm": 1.7655218329922557, "learning_rate": 3.12416155912898e-08, "loss": 0.6931, "step": 9591 }, { "epoch": 0.98, "grad_norm": 1.7484148997191309, "learning_rate": 3.0981945421504345e-08, "loss": 0.742, "step": 9592 }, { "epoch": 0.98, "grad_norm": 1.6889592247363676, "learning_rate": 3.0723357232731455e-08, "loss": 0.6322, "step": 9593 }, { "epoch": 0.98, "grad_norm": 1.6742068227188056, "learning_rate": 3.0465851053035345e-08, "loss": 0.5973, "step": 9594 }, { "epoch": 0.98, "grad_norm": 1.658300994042665, "learning_rate": 3.0209426910364766e-08, "loss": 0.7638, "step": 9595 }, { "epoch": 0.98, "grad_norm": 1.561736898588684, "learning_rate": 2.995408483255191e-08, "loss": 0.6049, "step": 9596 }, { "epoch": 0.98, "grad_norm": 1.7656308985229812, "learning_rate": 2.9699824847307933e-08, "loss": 0.7248, "step": 9597 }, { "epoch": 0.98, "grad_norm": 1.5919797364329935, "learning_rate": 2.9446646982230763e-08, "loss": 0.6782, "step": 9598 }, { "epoch": 0.98, "grad_norm": 1.7407948021472628, "learning_rate": 2.919455126479731e-08, "loss": 0.7177, "step": 9599 }, { "epoch": 0.98, "grad_norm": 1.6195766093351134, "learning_rate": 2.894353772237013e-08, "loss": 0.567, "step": 9600 }, { "epoch": 0.98, "grad_norm": 1.8401774040526315, "learning_rate": 2.8693606382191876e-08, "loss": 0.8156, "step": 9601 }, { "epoch": 0.98, "grad_norm": 1.586921656800293, "learning_rate": 2.8444757271388625e-08, "loss": 0.7336, "step": 9602 }, { "epoch": 0.98, "grad_norm": 1.7564114273771738, "learning_rate": 2.819699041696877e-08, "loss": 0.7562, "step": 9603 }, { "epoch": 0.98, "grad_norm": 1.7466936053595925, "learning_rate": 2.7950305845825254e-08, "loss": 0.6833, "step": 9604 }, { "epoch": 0.98, "grad_norm": 1.7033376017472277, "learning_rate": 2.7704703584729985e-08, "loss": 0.7689, "step": 9605 }, { "epoch": 0.98, "grad_norm": 1.5863377258152949, "learning_rate": 2.7460183660339422e-08, "loss": 0.6984, "step": 9606 }, { "epoch": 0.98, "grad_norm": 2.0272524378790626, "learning_rate": 2.7216746099193448e-08, "loss": 0.6807, "step": 9607 }, { "epoch": 0.98, "grad_norm": 1.5975543790019915, "learning_rate": 2.6974390927712034e-08, "loss": 0.6843, "step": 9608 }, { "epoch": 0.98, "grad_norm": 1.6388291697059447, "learning_rate": 2.6733118172200812e-08, "loss": 0.6857, "step": 9609 }, { "epoch": 0.98, "grad_norm": 1.5592279432842686, "learning_rate": 2.6492927858844386e-08, "loss": 0.6555, "step": 9610 }, { "epoch": 0.98, "grad_norm": 1.6034240035241638, "learning_rate": 2.6253820013713017e-08, "loss": 0.6381, "step": 9611 }, { "epoch": 0.98, "grad_norm": 1.6946540536432775, "learning_rate": 2.6015794662757055e-08, "loss": 0.6846, "step": 9612 }, { "epoch": 0.98, "grad_norm": 1.7225744072643763, "learning_rate": 2.5778851831811392e-08, "loss": 0.7251, "step": 9613 }, { "epoch": 0.98, "grad_norm": 1.6038628126765848, "learning_rate": 2.554299154659212e-08, "loss": 0.6683, "step": 9614 }, { "epoch": 0.98, "grad_norm": 1.673729472783224, "learning_rate": 2.5308213832697658e-08, "loss": 0.7261, "step": 9615 }, { "epoch": 0.98, "grad_norm": 1.6990401901929215, "learning_rate": 2.5074518715609843e-08, "loss": 0.7117, "step": 9616 }, { "epoch": 0.98, "grad_norm": 1.6665138770587684, "learning_rate": 2.4841906220692825e-08, "loss": 0.6896, "step": 9617 }, { "epoch": 0.98, "grad_norm": 1.7577331276681882, "learning_rate": 2.461037637319308e-08, "loss": 0.6992, "step": 9618 }, { "epoch": 0.98, "grad_norm": 1.832379629273432, "learning_rate": 2.4379929198238285e-08, "loss": 0.7282, "step": 9619 }, { "epoch": 0.98, "grad_norm": 1.752051759087949, "learning_rate": 2.4150564720841763e-08, "loss": 0.748, "step": 9620 }, { "epoch": 0.98, "grad_norm": 1.7443054920924745, "learning_rate": 2.3922282965896936e-08, "loss": 0.6948, "step": 9621 }, { "epoch": 0.98, "grad_norm": 1.563872679925648, "learning_rate": 2.3695083958179543e-08, "loss": 0.654, "step": 9622 }, { "epoch": 0.98, "grad_norm": 1.789764500311137, "learning_rate": 2.3468967722347635e-08, "loss": 0.7633, "step": 9623 }, { "epoch": 0.98, "grad_norm": 1.7614250422498046, "learning_rate": 2.3243934282944912e-08, "loss": 0.6923, "step": 9624 }, { "epoch": 0.98, "grad_norm": 1.7086680323811136, "learning_rate": 2.3019983664394064e-08, "loss": 0.7576, "step": 9625 }, { "epoch": 0.98, "grad_norm": 1.8243468464401964, "learning_rate": 2.2797115891002308e-08, "loss": 0.7506, "step": 9626 }, { "epoch": 0.98, "grad_norm": 1.8382359181164998, "learning_rate": 2.2575330986956968e-08, "loss": 0.6899, "step": 9627 }, { "epoch": 0.98, "grad_norm": 1.711964555016779, "learning_rate": 2.2354628976328786e-08, "loss": 0.6424, "step": 9628 }, { "epoch": 0.98, "grad_norm": 1.6574125129336976, "learning_rate": 2.2135009883074155e-08, "loss": 0.5457, "step": 9629 }, { "epoch": 0.98, "grad_norm": 1.470929810445323, "learning_rate": 2.1916473731027343e-08, "loss": 0.6801, "step": 9630 }, { "epoch": 0.98, "grad_norm": 1.7670042059843676, "learning_rate": 2.1699020543907157e-08, "loss": 0.6014, "step": 9631 }, { "epoch": 0.98, "grad_norm": 1.8821606786857161, "learning_rate": 2.1482650345315826e-08, "loss": 0.665, "step": 9632 }, { "epoch": 0.98, "grad_norm": 1.6664184114715632, "learning_rate": 2.1267363158735676e-08, "loss": 0.7083, "step": 9633 }, { "epoch": 0.98, "grad_norm": 1.705911715860228, "learning_rate": 2.1053159007533575e-08, "loss": 0.6824, "step": 9634 }, { "epoch": 0.98, "grad_norm": 1.7456083344015636, "learning_rate": 2.0840037914958698e-08, "loss": 0.6966, "step": 9635 }, { "epoch": 0.98, "grad_norm": 1.6545046167332191, "learning_rate": 2.0627999904139218e-08, "loss": 0.6038, "step": 9636 }, { "epoch": 0.98, "grad_norm": 1.8839922766007815, "learning_rate": 2.041704499809227e-08, "loss": 0.7633, "step": 9637 }, { "epoch": 0.98, "grad_norm": 1.6442871165201445, "learning_rate": 2.0207173219710664e-08, "loss": 0.7207, "step": 9638 }, { "epoch": 0.98, "grad_norm": 1.8057249181482182, "learning_rate": 1.9998384591773945e-08, "loss": 0.7916, "step": 9639 }, { "epoch": 0.98, "grad_norm": 1.6379867021567172, "learning_rate": 1.979067913694399e-08, "loss": 0.714, "step": 9640 }, { "epoch": 0.98, "grad_norm": 1.7978173995445093, "learning_rate": 1.9584056877761658e-08, "loss": 0.6308, "step": 9641 }, { "epoch": 0.98, "grad_norm": 1.8522532972135437, "learning_rate": 1.9378517836653454e-08, "loss": 0.6179, "step": 9642 }, { "epoch": 0.98, "grad_norm": 1.6906803348048336, "learning_rate": 1.91740620359282e-08, "loss": 0.6019, "step": 9643 }, { "epoch": 0.98, "grad_norm": 1.7531966996092696, "learning_rate": 1.8970689497775917e-08, "loss": 0.675, "step": 9644 }, { "epoch": 0.98, "grad_norm": 1.728651057249247, "learning_rate": 1.876840024427007e-08, "loss": 0.7092, "step": 9645 }, { "epoch": 0.98, "grad_norm": 1.5379281297714842, "learning_rate": 1.85671942973642e-08, "loss": 0.6308, "step": 9646 }, { "epoch": 0.98, "grad_norm": 1.7675827172247032, "learning_rate": 1.8367071678897507e-08, "loss": 0.6974, "step": 9647 }, { "epoch": 0.98, "grad_norm": 1.7158119619808374, "learning_rate": 1.8168032410590398e-08, "loss": 0.6727, "step": 9648 }, { "epoch": 0.98, "grad_norm": 1.643652158228756, "learning_rate": 1.7970076514044476e-08, "loss": 0.8226, "step": 9649 }, { "epoch": 0.98, "grad_norm": 1.877227745255658, "learning_rate": 1.7773204010745892e-08, "loss": 0.6955, "step": 9650 }, { "epoch": 0.98, "grad_norm": 1.8893676527357877, "learning_rate": 1.7577414922060888e-08, "loss": 0.7242, "step": 9651 }, { "epoch": 0.98, "grad_norm": 1.627296937520523, "learning_rate": 1.738270926924024e-08, "loss": 0.6402, "step": 9652 }, { "epoch": 0.98, "grad_norm": 1.616663575398968, "learning_rate": 1.7189087073415933e-08, "loss": 0.7571, "step": 9653 }, { "epoch": 0.98, "grad_norm": 1.6853571802209921, "learning_rate": 1.699654835560116e-08, "loss": 0.6763, "step": 9654 }, { "epoch": 0.98, "grad_norm": 1.854656056250111, "learning_rate": 1.680509313669587e-08, "loss": 0.6867, "step": 9655 }, { "epoch": 0.98, "grad_norm": 1.4804921632252286, "learning_rate": 1.6614721437477887e-08, "loss": 0.6332, "step": 9656 }, { "epoch": 0.98, "grad_norm": 1.6700954955835952, "learning_rate": 1.642543327860846e-08, "loss": 0.6996, "step": 9657 }, { "epoch": 0.98, "grad_norm": 1.592121528151136, "learning_rate": 1.6237228680633376e-08, "loss": 0.6434, "step": 9658 }, { "epoch": 0.98, "grad_norm": 1.6533858632363638, "learning_rate": 1.605010766397741e-08, "loss": 0.8153, "step": 9659 }, { "epoch": 0.98, "grad_norm": 1.804843545317245, "learning_rate": 1.586407024895209e-08, "loss": 0.6773, "step": 9660 }, { "epoch": 0.98, "grad_norm": 1.4865888180732818, "learning_rate": 1.5679116455746823e-08, "loss": 0.6879, "step": 9661 }, { "epoch": 0.98, "grad_norm": 1.7862992328554177, "learning_rate": 1.5495246304435552e-08, "loss": 0.7552, "step": 9662 }, { "epoch": 0.98, "grad_norm": 1.8059227397518534, "learning_rate": 1.5312459814975644e-08, "loss": 0.6882, "step": 9663 }, { "epoch": 0.98, "grad_norm": 1.5367676202214664, "learning_rate": 1.5130757007205676e-08, "loss": 0.5667, "step": 9664 }, { "epoch": 0.98, "grad_norm": 1.836586511875315, "learning_rate": 1.495013790084654e-08, "loss": 0.7366, "step": 9665 }, { "epoch": 0.98, "grad_norm": 1.7310378350968094, "learning_rate": 1.4770602515500332e-08, "loss": 0.6892, "step": 9666 }, { "epoch": 0.98, "grad_norm": 1.7893982283088452, "learning_rate": 1.4592150870653688e-08, "loss": 0.7379, "step": 9667 }, { "epoch": 0.98, "grad_norm": 1.6391847665314565, "learning_rate": 1.4414782985674447e-08, "loss": 0.6567, "step": 9668 }, { "epoch": 0.98, "grad_norm": 1.648479239098497, "learning_rate": 1.4238498879813878e-08, "loss": 0.6808, "step": 9669 }, { "epoch": 0.98, "grad_norm": 1.6315940355015648, "learning_rate": 1.4063298572204454e-08, "loss": 0.7107, "step": 9670 }, { "epoch": 0.98, "grad_norm": 1.652497062929891, "learning_rate": 1.3889182081860963e-08, "loss": 0.7429, "step": 9671 }, { "epoch": 0.98, "grad_norm": 1.646812479137008, "learning_rate": 1.3716149427682734e-08, "loss": 0.6522, "step": 9672 }, { "epoch": 0.98, "grad_norm": 1.6886811009695795, "learning_rate": 1.3544200628446968e-08, "loss": 0.6619, "step": 9673 }, { "epoch": 0.98, "grad_norm": 1.7001777649396592, "learning_rate": 1.3373335702818735e-08, "loss": 0.7377, "step": 9674 }, { "epoch": 0.98, "grad_norm": 1.5931986521862718, "learning_rate": 1.3203554669339868e-08, "loss": 0.6784, "step": 9675 }, { "epoch": 0.98, "grad_norm": 1.5438624429645642, "learning_rate": 1.3034857546441182e-08, "loss": 0.5341, "step": 9676 }, { "epoch": 0.98, "grad_norm": 1.4887640945970237, "learning_rate": 1.2867244352428033e-08, "loss": 0.6961, "step": 9677 }, { "epoch": 0.98, "grad_norm": 1.7453192466708156, "learning_rate": 1.2700715105495865e-08, "loss": 0.6643, "step": 9678 }, { "epoch": 0.98, "grad_norm": 1.9154223364917178, "learning_rate": 1.2535269823716889e-08, "loss": 0.7022, "step": 9679 }, { "epoch": 0.98, "grad_norm": 1.6207314273673628, "learning_rate": 1.2370908525046744e-08, "loss": 0.7324, "step": 9680 }, { "epoch": 0.98, "grad_norm": 1.781701331962845, "learning_rate": 1.2207631227326711e-08, "loss": 0.6468, "step": 9681 }, { "epoch": 0.98, "grad_norm": 1.766644934566483, "learning_rate": 1.2045437948275952e-08, "loss": 0.7227, "step": 9682 }, { "epoch": 0.98, "grad_norm": 1.7699821522867707, "learning_rate": 1.188432870549927e-08, "loss": 0.7304, "step": 9683 }, { "epoch": 0.99, "grad_norm": 1.7396705355516582, "learning_rate": 1.1724303516481572e-08, "loss": 0.6502, "step": 9684 }, { "epoch": 0.99, "grad_norm": 1.6905569226968575, "learning_rate": 1.1565362398592295e-08, "loss": 0.8001, "step": 9685 }, { "epoch": 0.99, "grad_norm": 1.6747194976240847, "learning_rate": 1.1407505369080973e-08, "loss": 0.6568, "step": 9686 }, { "epoch": 0.99, "grad_norm": 1.642429861783825, "learning_rate": 1.1250732445080569e-08, "loss": 0.6395, "step": 9687 }, { "epoch": 0.99, "grad_norm": 1.6875176860247043, "learning_rate": 1.1095043643606363e-08, "loss": 0.7886, "step": 9688 }, { "epoch": 0.99, "grad_norm": 1.8228693514058845, "learning_rate": 1.0940438981555945e-08, "loss": 0.7219, "step": 9689 }, { "epoch": 0.99, "grad_norm": 1.7675485509518638, "learning_rate": 1.0786918475710339e-08, "loss": 0.6434, "step": 9690 }, { "epoch": 0.99, "grad_norm": 1.647818234835014, "learning_rate": 1.0634482142730662e-08, "loss": 0.7144, "step": 9691 }, { "epoch": 0.99, "grad_norm": 1.7718676572281018, "learning_rate": 1.0483129999161456e-08, "loss": 0.6408, "step": 9692 }, { "epoch": 0.99, "grad_norm": 1.6318067602617774, "learning_rate": 1.0332862061429583e-08, "loss": 0.6004, "step": 9693 }, { "epoch": 0.99, "grad_norm": 1.6351165397625582, "learning_rate": 1.0183678345845328e-08, "loss": 0.7552, "step": 9694 }, { "epoch": 0.99, "grad_norm": 1.5644902946973864, "learning_rate": 1.0035578868600182e-08, "loss": 0.6444, "step": 9695 }, { "epoch": 0.99, "grad_norm": 1.6428946667146398, "learning_rate": 9.888563645765736e-09, "loss": 0.5811, "step": 9696 }, { "epoch": 0.99, "grad_norm": 1.5742527303659848, "learning_rate": 9.742632693301445e-09, "loss": 0.692, "step": 9697 }, { "epoch": 0.99, "grad_norm": 1.7736699263772455, "learning_rate": 9.597786027042422e-09, "loss": 0.7327, "step": 9698 }, { "epoch": 0.99, "grad_norm": 1.9491332009997895, "learning_rate": 9.454023662712752e-09, "loss": 0.6928, "step": 9699 }, { "epoch": 0.99, "grad_norm": 1.7185475542191888, "learning_rate": 9.311345615913291e-09, "loss": 0.6323, "step": 9700 }, { "epoch": 0.99, "grad_norm": 1.5105438695817894, "learning_rate": 9.169751902131652e-09, "loss": 0.6499, "step": 9701 }, { "epoch": 0.99, "grad_norm": 1.514827773029279, "learning_rate": 9.029242536733318e-09, "loss": 0.6234, "step": 9702 }, { "epoch": 0.99, "grad_norm": 1.6461197241040941, "learning_rate": 8.889817534969425e-09, "loss": 0.6747, "step": 9703 }, { "epoch": 0.99, "grad_norm": 1.7957496588047202, "learning_rate": 8.751476911972313e-09, "loss": 0.7837, "step": 9704 }, { "epoch": 0.99, "grad_norm": 1.6629104613611898, "learning_rate": 8.614220682756635e-09, "loss": 0.6097, "step": 9705 }, { "epoch": 0.99, "grad_norm": 1.6919638021174255, "learning_rate": 8.478048862219368e-09, "loss": 0.7037, "step": 9706 }, { "epoch": 0.99, "grad_norm": 1.5271956567781249, "learning_rate": 8.342961465140908e-09, "loss": 0.5894, "step": 9707 }, { "epoch": 0.99, "grad_norm": 1.6611920184684048, "learning_rate": 8.208958506181752e-09, "loss": 0.7251, "step": 9708 }, { "epoch": 0.99, "grad_norm": 1.7030516189999174, "learning_rate": 8.076039999885821e-09, "loss": 0.7094, "step": 9709 }, { "epoch": 0.99, "grad_norm": 1.5774737119575863, "learning_rate": 7.944205960678242e-09, "loss": 0.6555, "step": 9710 }, { "epoch": 0.99, "grad_norm": 1.5424960825862553, "learning_rate": 7.813456402870901e-09, "loss": 0.7548, "step": 9711 }, { "epoch": 0.99, "grad_norm": 1.9115060508268176, "learning_rate": 7.683791340651337e-09, "loss": 0.6898, "step": 9712 }, { "epoch": 0.99, "grad_norm": 1.5520360612107897, "learning_rate": 7.555210788093847e-09, "loss": 0.7203, "step": 9713 }, { "epoch": 0.99, "grad_norm": 1.73846643821148, "learning_rate": 7.427714759153937e-09, "loss": 0.7294, "step": 9714 }, { "epoch": 0.99, "grad_norm": 1.7606361370390762, "learning_rate": 7.301303267669424e-09, "loss": 0.6608, "step": 9715 }, { "epoch": 0.99, "grad_norm": 1.8115631849090241, "learning_rate": 7.1759763273604454e-09, "loss": 0.814, "step": 9716 }, { "epoch": 0.99, "grad_norm": 1.6783185631212085, "learning_rate": 7.051733951828343e-09, "loss": 0.6538, "step": 9717 }, { "epoch": 0.99, "grad_norm": 1.618824346005183, "learning_rate": 6.928576154558997e-09, "loss": 0.5846, "step": 9718 }, { "epoch": 0.99, "grad_norm": 1.8248486875496646, "learning_rate": 6.806502948918381e-09, "loss": 0.7861, "step": 9719 }, { "epoch": 0.99, "grad_norm": 1.6413599961753038, "learning_rate": 6.685514348154787e-09, "loss": 0.6407, "step": 9720 }, { "epoch": 0.99, "grad_norm": 1.8528942467367489, "learning_rate": 6.565610365402153e-09, "loss": 0.738, "step": 9721 }, { "epoch": 0.99, "grad_norm": 1.818498691949131, "learning_rate": 6.446791013671183e-09, "loss": 0.7306, "step": 9722 }, { "epoch": 0.99, "grad_norm": 1.7186762257651678, "learning_rate": 6.329056305860448e-09, "loss": 0.6696, "step": 9723 }, { "epoch": 0.99, "grad_norm": 1.757030635077287, "learning_rate": 6.212406254746395e-09, "loss": 0.7701, "step": 9724 }, { "epoch": 0.99, "grad_norm": 1.5623043797000618, "learning_rate": 6.096840872991117e-09, "loss": 0.662, "step": 9725 }, { "epoch": 0.99, "grad_norm": 1.5841618328268379, "learning_rate": 5.9823601731356925e-09, "loss": 0.748, "step": 9726 }, { "epoch": 0.99, "grad_norm": 1.6043392083126362, "learning_rate": 5.86896416760685e-09, "loss": 0.579, "step": 9727 }, { "epoch": 0.99, "grad_norm": 1.9265682270468332, "learning_rate": 5.7566528687114095e-09, "loss": 0.7247, "step": 9728 }, { "epoch": 0.99, "grad_norm": 1.691440943069805, "learning_rate": 5.645426288638511e-09, "loss": 0.7415, "step": 9729 }, { "epoch": 0.99, "grad_norm": 1.6450176668324605, "learning_rate": 5.53528443945961e-09, "loss": 0.6279, "step": 9730 }, { "epoch": 0.99, "grad_norm": 1.7378168961948732, "learning_rate": 5.426227333130696e-09, "loss": 0.697, "step": 9731 }, { "epoch": 0.99, "grad_norm": 1.7252926169952139, "learning_rate": 5.318254981486748e-09, "loss": 0.6545, "step": 9732 }, { "epoch": 0.99, "grad_norm": 1.7953360616753136, "learning_rate": 5.21136739624617e-09, "loss": 0.7161, "step": 9733 }, { "epoch": 0.99, "grad_norm": 1.6194759118926911, "learning_rate": 5.105564589011902e-09, "loss": 0.677, "step": 9734 }, { "epoch": 0.99, "grad_norm": 1.7126316233812235, "learning_rate": 5.000846571264762e-09, "loss": 0.6515, "step": 9735 }, { "epoch": 0.99, "grad_norm": 1.9346810637478329, "learning_rate": 4.897213354372321e-09, "loss": 0.6876, "step": 9736 }, { "epoch": 0.99, "grad_norm": 2.209066745028407, "learning_rate": 4.7946649495811405e-09, "loss": 0.7986, "step": 9737 }, { "epoch": 0.99, "grad_norm": 1.8773927683013005, "learning_rate": 4.693201368021205e-09, "loss": 0.7163, "step": 9738 }, { "epoch": 0.99, "grad_norm": 1.8109093998592674, "learning_rate": 4.592822620705928e-09, "loss": 0.7481, "step": 9739 }, { "epoch": 0.99, "grad_norm": 1.5717984737809685, "learning_rate": 4.493528718528817e-09, "loss": 0.6516, "step": 9740 }, { "epoch": 0.99, "grad_norm": 1.6172161663400688, "learning_rate": 4.395319672266807e-09, "loss": 0.6364, "step": 9741 }, { "epoch": 0.99, "grad_norm": 1.6682760972502344, "learning_rate": 4.2981954925780385e-09, "loss": 0.7554, "step": 9742 }, { "epoch": 0.99, "grad_norm": 1.868953172176466, "learning_rate": 4.202156190006301e-09, "loss": 0.7247, "step": 9743 }, { "epoch": 0.99, "grad_norm": 1.7716773681131424, "learning_rate": 4.1072017749732574e-09, "loss": 0.6867, "step": 9744 }, { "epoch": 0.99, "grad_norm": 1.5398947349645191, "learning_rate": 4.013332257785107e-09, "loss": 0.5854, "step": 9745 }, { "epoch": 0.99, "grad_norm": 1.52256409465704, "learning_rate": 3.920547648630368e-09, "loss": 0.6266, "step": 9746 }, { "epoch": 0.99, "grad_norm": 1.7581089180332012, "learning_rate": 3.828847957577653e-09, "loss": 0.6372, "step": 9747 }, { "epoch": 0.99, "grad_norm": 1.596383318497569, "learning_rate": 3.73823319458233e-09, "loss": 0.6177, "step": 9748 }, { "epoch": 0.99, "grad_norm": 1.6520114837204025, "learning_rate": 3.6487033694776466e-09, "loss": 0.6509, "step": 9749 }, { "epoch": 0.99, "grad_norm": 1.7583325520760777, "learning_rate": 3.560258491980273e-09, "loss": 0.7194, "step": 9750 }, { "epoch": 0.99, "grad_norm": 1.596475826131414, "learning_rate": 3.4728985716903083e-09, "loss": 0.7752, "step": 9751 }, { "epoch": 0.99, "grad_norm": 1.436844800858631, "learning_rate": 3.3866236180879476e-09, "loss": 0.6588, "step": 9752 }, { "epoch": 0.99, "grad_norm": 1.8173776459488546, "learning_rate": 3.3014336405390313e-09, "loss": 0.7334, "step": 9753 }, { "epoch": 0.99, "grad_norm": 1.7236920470298163, "learning_rate": 3.2173286482883868e-09, "loss": 0.7585, "step": 9754 }, { "epoch": 0.99, "grad_norm": 1.6707557679087048, "learning_rate": 3.1343086504653785e-09, "loss": 0.6727, "step": 9755 }, { "epoch": 0.99, "grad_norm": 1.559160997871232, "learning_rate": 3.0523736560783558e-09, "loss": 0.6343, "step": 9756 }, { "epoch": 0.99, "grad_norm": 1.6592056636731305, "learning_rate": 2.971523674022425e-09, "loss": 0.6621, "step": 9757 }, { "epoch": 0.99, "grad_norm": 1.7959185329193494, "learning_rate": 2.8917587130705695e-09, "loss": 0.7023, "step": 9758 }, { "epoch": 0.99, "grad_norm": 1.5316612752649197, "learning_rate": 2.8130787818814177e-09, "loss": 0.7049, "step": 9759 }, { "epoch": 0.99, "grad_norm": 1.5013465330785167, "learning_rate": 2.7354838889948055e-09, "loss": 0.676, "step": 9760 }, { "epoch": 0.99, "grad_norm": 1.6680553354247996, "learning_rate": 2.6589740428306644e-09, "loss": 0.7261, "step": 9761 }, { "epoch": 0.99, "grad_norm": 1.5739749500286022, "learning_rate": 2.5835492516945725e-09, "loss": 0.5215, "step": 9762 }, { "epoch": 0.99, "grad_norm": 1.80329783598832, "learning_rate": 2.5092095237722048e-09, "loss": 0.7339, "step": 9763 }, { "epoch": 0.99, "grad_norm": 1.7697361499388937, "learning_rate": 2.4359548671315515e-09, "loss": 0.783, "step": 9764 }, { "epoch": 0.99, "grad_norm": 1.8060799010027864, "learning_rate": 2.36378528972292e-09, "loss": 0.6659, "step": 9765 }, { "epoch": 0.99, "grad_norm": 1.5320767192304863, "learning_rate": 2.2927007993811535e-09, "loss": 0.6163, "step": 9766 }, { "epoch": 0.99, "grad_norm": 1.8341164287247476, "learning_rate": 2.222701403818972e-09, "loss": 0.7484, "step": 9767 }, { "epoch": 0.99, "grad_norm": 1.6898445181700266, "learning_rate": 2.153787110634742e-09, "loss": 0.7118, "step": 9768 }, { "epoch": 0.99, "grad_norm": 1.73467212577717, "learning_rate": 2.0859579273091457e-09, "loss": 0.6583, "step": 9769 }, { "epoch": 0.99, "grad_norm": 1.8342421843135626, "learning_rate": 2.019213861201852e-09, "loss": 0.8027, "step": 9770 }, { "epoch": 0.99, "grad_norm": 1.7248601263057854, "learning_rate": 1.953554919559286e-09, "loss": 0.7182, "step": 9771 }, { "epoch": 0.99, "grad_norm": 1.7852646128943321, "learning_rate": 1.8889811095046396e-09, "loss": 0.6439, "step": 9772 }, { "epoch": 0.99, "grad_norm": 1.797497617117918, "learning_rate": 1.8254924380489702e-09, "loss": 0.7096, "step": 9773 }, { "epoch": 0.99, "grad_norm": 1.6025539122176873, "learning_rate": 1.7630889120823224e-09, "loss": 0.6443, "step": 9774 }, { "epoch": 0.99, "grad_norm": 1.5688322495059948, "learning_rate": 1.7017705383781668e-09, "loss": 0.7314, "step": 9775 }, { "epoch": 0.99, "grad_norm": 1.7617953713834467, "learning_rate": 1.64153732359007e-09, "loss": 0.6941, "step": 9776 }, { "epoch": 0.99, "grad_norm": 1.8437779372800862, "learning_rate": 1.5823892742561352e-09, "loss": 0.6198, "step": 9777 }, { "epoch": 0.99, "grad_norm": 1.5327724689221702, "learning_rate": 1.5243263967956722e-09, "loss": 0.7511, "step": 9778 }, { "epoch": 0.99, "grad_norm": 1.632644536650647, "learning_rate": 1.467348697511417e-09, "loss": 0.7554, "step": 9779 }, { "epoch": 0.99, "grad_norm": 1.8949552888377248, "learning_rate": 1.411456182587312e-09, "loss": 0.7169, "step": 9780 }, { "epoch": 0.99, "grad_norm": 1.6767940261577814, "learning_rate": 1.356648858088505e-09, "loss": 0.6894, "step": 9781 }, { "epoch": 1.0, "grad_norm": 1.7349273531608402, "learning_rate": 1.302926729964682e-09, "loss": 0.6867, "step": 9782 }, { "epoch": 1.0, "grad_norm": 1.6491775044986772, "learning_rate": 1.2502898040456235e-09, "loss": 0.6245, "step": 9783 }, { "epoch": 1.0, "grad_norm": 1.5181692314067778, "learning_rate": 1.1987380860456477e-09, "loss": 0.6693, "step": 9784 }, { "epoch": 1.0, "grad_norm": 1.5327161046836033, "learning_rate": 1.148271581558058e-09, "loss": 0.6834, "step": 9785 }, { "epoch": 1.0, "grad_norm": 1.6414243607298673, "learning_rate": 1.098890296060695e-09, "loss": 0.6154, "step": 9786 }, { "epoch": 1.0, "grad_norm": 1.844885336324718, "learning_rate": 1.0505942349137155e-09, "loss": 0.8566, "step": 9787 }, { "epoch": 1.0, "grad_norm": 1.5735819481588773, "learning_rate": 1.003383403358482e-09, "loss": 0.7284, "step": 9788 }, { "epoch": 1.0, "grad_norm": 1.7929415551134282, "learning_rate": 9.572578065197846e-10, "loss": 0.6794, "step": 9789 }, { "epoch": 1.0, "grad_norm": 1.8147303747186208, "learning_rate": 9.122174494025082e-10, "loss": 0.6762, "step": 9790 }, { "epoch": 1.0, "grad_norm": 1.8998667504295066, "learning_rate": 8.682623368971854e-10, "loss": 0.7023, "step": 9791 }, { "epoch": 1.0, "grad_norm": 1.5999912018017683, "learning_rate": 8.253924737711139e-10, "loss": 0.6024, "step": 9792 }, { "epoch": 1.0, "grad_norm": 1.6802611122367843, "learning_rate": 7.83607864680569e-10, "loss": 0.677, "step": 9793 }, { "epoch": 1.0, "grad_norm": 1.9299673966168387, "learning_rate": 7.429085141585912e-10, "loss": 0.6575, "step": 9794 }, { "epoch": 1.0, "grad_norm": 1.8079801054976152, "learning_rate": 7.032944266227582e-10, "loss": 0.7263, "step": 9795 }, { "epoch": 1.0, "grad_norm": 1.7053584530993402, "learning_rate": 6.647656063729635e-10, "loss": 0.7107, "step": 9796 }, { "epoch": 1.0, "grad_norm": 1.752559768160014, "learning_rate": 6.273220575914174e-10, "loss": 0.6197, "step": 9797 }, { "epoch": 1.0, "grad_norm": 1.6022948579476681, "learning_rate": 5.909637843404259e-10, "loss": 0.7039, "step": 9798 }, { "epoch": 1.0, "grad_norm": 1.6826875055361312, "learning_rate": 5.556907905679421e-10, "loss": 0.6946, "step": 9799 }, { "epoch": 1.0, "grad_norm": 1.6338947651630946, "learning_rate": 5.215030801009046e-10, "loss": 0.7023, "step": 9800 }, { "epoch": 1.0, "grad_norm": 1.669612829015242, "learning_rate": 4.884006566496791e-10, "loss": 0.7131, "step": 9801 }, { "epoch": 1.0, "grad_norm": 1.595339314335866, "learning_rate": 4.5638352380805716e-10, "loss": 0.5837, "step": 9802 }, { "epoch": 1.0, "grad_norm": 1.7544681428451936, "learning_rate": 4.2545168505103707e-10, "loss": 0.6499, "step": 9803 }, { "epoch": 1.0, "grad_norm": 1.6948058791459002, "learning_rate": 3.9560514373593317e-10, "loss": 0.6961, "step": 9804 }, { "epoch": 1.0, "grad_norm": 1.7957334177664486, "learning_rate": 3.6684390310015584e-10, "loss": 0.7444, "step": 9805 }, { "epoch": 1.0, "grad_norm": 1.5597201209989984, "learning_rate": 3.391679662678726e-10, "loss": 0.6784, "step": 9806 }, { "epoch": 1.0, "grad_norm": 1.8096052129609148, "learning_rate": 3.1257733624112665e-10, "loss": 0.6936, "step": 9807 }, { "epoch": 1.0, "grad_norm": 1.6849798366893498, "learning_rate": 2.8707201590649767e-10, "loss": 0.6742, "step": 9808 }, { "epoch": 1.0, "grad_norm": 1.838312020306633, "learning_rate": 2.6265200803177184e-10, "loss": 0.7065, "step": 9809 }, { "epoch": 1.0, "grad_norm": 1.767900214133964, "learning_rate": 2.3931731526927183e-10, "loss": 0.8026, "step": 9810 }, { "epoch": 1.0, "grad_norm": 1.5904778245566455, "learning_rate": 2.1706794014919596e-10, "loss": 0.7111, "step": 9811 }, { "epoch": 1.0, "grad_norm": 1.5468126215436857, "learning_rate": 1.9590388508627934e-10, "loss": 0.7569, "step": 9812 }, { "epoch": 1.0, "grad_norm": 1.5375661558070106, "learning_rate": 1.758251523797938e-10, "loss": 0.611, "step": 9813 }, { "epoch": 1.0, "grad_norm": 1.8357286117999407, "learning_rate": 1.5683174420799696e-10, "loss": 0.7772, "step": 9814 }, { "epoch": 1.0, "grad_norm": 2.1584454747868387, "learning_rate": 1.3892366263146273e-10, "loss": 0.77, "step": 9815 }, { "epoch": 1.0, "grad_norm": 1.674939245192305, "learning_rate": 1.2210090959419162e-10, "loss": 0.609, "step": 9816 }, { "epoch": 1.0, "grad_norm": 1.5174189719877236, "learning_rate": 1.0636348692250054e-10, "loss": 0.585, "step": 9817 }, { "epoch": 1.0, "grad_norm": 1.6701519955880866, "learning_rate": 9.171139632391245e-11, "loss": 0.748, "step": 9818 }, { "epoch": 1.0, "grad_norm": 1.5132318407758787, "learning_rate": 7.8144639389377e-11, "loss": 0.5916, "step": 9819 }, { "epoch": 1.0, "grad_norm": 1.6711317998681496, "learning_rate": 6.566321759104987e-11, "loss": 0.7035, "step": 9820 }, { "epoch": 1.0, "grad_norm": 1.7089305325553747, "learning_rate": 5.426713228340319e-11, "loss": 0.6716, "step": 9821 }, { "epoch": 1.0, "grad_norm": 1.7533657894024617, "learning_rate": 4.3956384703225384e-11, "loss": 0.7033, "step": 9822 }, { "epoch": 1.0, "grad_norm": 1.7866771307941718, "learning_rate": 3.4730975969621275e-11, "loss": 0.7179, "step": 9823 }, { "epoch": 1.0, "grad_norm": 1.7858168287448422, "learning_rate": 2.659090708401202e-11, "loss": 0.6644, "step": 9824 }, { "epoch": 1.0, "grad_norm": 1.849749275071706, "learning_rate": 1.9536178930135152e-11, "loss": 0.586, "step": 9825 }, { "epoch": 1.0, "grad_norm": 1.779817554106832, "learning_rate": 1.356679227404456e-11, "loss": 0.6623, "step": 9826 }, { "epoch": 1.0, "grad_norm": 1.7365065100153105, "learning_rate": 8.682747761890042e-12, "loss": 0.6541, "step": 9827 }, { "epoch": 1.0, "grad_norm": 1.7159852243362201, "learning_rate": 4.884045925468428e-12, "loss": 0.6903, "step": 9828 }, { "epoch": 1.0, "grad_norm": 1.9049278490404138, "learning_rate": 2.170687175562236e-12, "loss": 0.5288, "step": 9829 }, { "epoch": 1.0, "grad_norm": 1.6298596172428135, "learning_rate": 5.426718086010141e-13, "loss": 0.7138, "step": 9830 }, { "epoch": 1.0, "grad_norm": 1.6170033821053835, "learning_rate": 0.0, "loss": 0.7636, "step": 9831 }, { "epoch": 1.0, "step": 9831, "total_flos": 1617537550008320.0, "train_loss": 0.7271600768035261, "train_runtime": 125510.4499, "train_samples_per_second": 10.025, "train_steps_per_second": 0.078 } ], "logging_steps": 1.0, "max_steps": 9831, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 1617537550008320.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }