|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 0, |
|
"global_step": 1042, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0009596928982725527, |
|
"grad_norm": 0.06494140625, |
|
"learning_rate": 4.9952015355086376e-05, |
|
"loss": 1.3404, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0019193857965451055, |
|
"grad_norm": 0.07568359375, |
|
"learning_rate": 4.990403071017274e-05, |
|
"loss": 1.4738, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0028790786948176585, |
|
"grad_norm": 0.080078125, |
|
"learning_rate": 4.985604606525912e-05, |
|
"loss": 1.2836, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.003838771593090211, |
|
"grad_norm": 0.107421875, |
|
"learning_rate": 4.980806142034549e-05, |
|
"loss": 1.3877, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0047984644913627635, |
|
"grad_norm": 0.130859375, |
|
"learning_rate": 4.976007677543186e-05, |
|
"loss": 1.4109, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.005758157389635317, |
|
"grad_norm": 0.1474609375, |
|
"learning_rate": 4.9712092130518236e-05, |
|
"loss": 1.3444, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0067178502879078695, |
|
"grad_norm": 0.1728515625, |
|
"learning_rate": 4.966410748560461e-05, |
|
"loss": 1.3984, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.007677543186180422, |
|
"grad_norm": 0.1748046875, |
|
"learning_rate": 4.961612284069098e-05, |
|
"loss": 1.2668, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.008637236084452975, |
|
"grad_norm": 0.1953125, |
|
"learning_rate": 4.9568138195777356e-05, |
|
"loss": 1.3121, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.009596928982725527, |
|
"grad_norm": 0.1826171875, |
|
"learning_rate": 4.952015355086372e-05, |
|
"loss": 1.2488, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01055662188099808, |
|
"grad_norm": 0.1953125, |
|
"learning_rate": 4.94721689059501e-05, |
|
"loss": 1.3309, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.011516314779270634, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 4.942418426103647e-05, |
|
"loss": 1.3018, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.012476007677543186, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 4.937619961612284e-05, |
|
"loss": 1.407, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.013435700575815739, |
|
"grad_norm": 0.1484375, |
|
"learning_rate": 4.9328214971209215e-05, |
|
"loss": 1.2003, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.014395393474088292, |
|
"grad_norm": 0.1455078125, |
|
"learning_rate": 4.928023032629559e-05, |
|
"loss": 1.179, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.015355086372360844, |
|
"grad_norm": 0.13671875, |
|
"learning_rate": 4.923224568138196e-05, |
|
"loss": 1.127, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.016314779270633396, |
|
"grad_norm": 0.1279296875, |
|
"learning_rate": 4.9184261036468335e-05, |
|
"loss": 1.0714, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.01727447216890595, |
|
"grad_norm": 0.150390625, |
|
"learning_rate": 4.91362763915547e-05, |
|
"loss": 1.1533, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.018234165067178502, |
|
"grad_norm": 0.142578125, |
|
"learning_rate": 4.908829174664108e-05, |
|
"loss": 1.1575, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.019193857965451054, |
|
"grad_norm": 0.11865234375, |
|
"learning_rate": 4.904030710172745e-05, |
|
"loss": 1.0691, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02015355086372361, |
|
"grad_norm": 0.1455078125, |
|
"learning_rate": 4.899232245681382e-05, |
|
"loss": 1.151, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.02111324376199616, |
|
"grad_norm": 0.146484375, |
|
"learning_rate": 4.8944337811900195e-05, |
|
"loss": 1.1148, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.022072936660268713, |
|
"grad_norm": 0.10791015625, |
|
"learning_rate": 4.889635316698657e-05, |
|
"loss": 1.0852, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.023032629558541268, |
|
"grad_norm": 0.11083984375, |
|
"learning_rate": 4.884836852207294e-05, |
|
"loss": 1.123, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.02399232245681382, |
|
"grad_norm": 0.11865234375, |
|
"learning_rate": 4.880038387715931e-05, |
|
"loss": 1.1012, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02495201535508637, |
|
"grad_norm": 0.1083984375, |
|
"learning_rate": 4.875239923224568e-05, |
|
"loss": 1.1545, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.025911708253358926, |
|
"grad_norm": 0.11328125, |
|
"learning_rate": 4.8704414587332055e-05, |
|
"loss": 1.0973, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.026871401151631478, |
|
"grad_norm": 0.099609375, |
|
"learning_rate": 4.865642994241843e-05, |
|
"loss": 1.083, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.02783109404990403, |
|
"grad_norm": 0.09619140625, |
|
"learning_rate": 4.86084452975048e-05, |
|
"loss": 1.0999, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.028790786948176585, |
|
"grad_norm": 0.0927734375, |
|
"learning_rate": 4.8560460652591175e-05, |
|
"loss": 0.9142, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.029750479846449136, |
|
"grad_norm": 0.1044921875, |
|
"learning_rate": 4.851247600767754e-05, |
|
"loss": 1.0528, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.030710172744721688, |
|
"grad_norm": 0.1083984375, |
|
"learning_rate": 4.846449136276392e-05, |
|
"loss": 0.9556, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.03166986564299424, |
|
"grad_norm": 0.0908203125, |
|
"learning_rate": 4.841650671785029e-05, |
|
"loss": 0.9572, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.03262955854126679, |
|
"grad_norm": 0.09375, |
|
"learning_rate": 4.836852207293666e-05, |
|
"loss": 0.9687, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.03358925143953935, |
|
"grad_norm": 0.0986328125, |
|
"learning_rate": 4.8320537428023035e-05, |
|
"loss": 0.9755, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0345489443378119, |
|
"grad_norm": 0.09521484375, |
|
"learning_rate": 4.827255278310941e-05, |
|
"loss": 0.991, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.03550863723608445, |
|
"grad_norm": 0.1064453125, |
|
"learning_rate": 4.822456813819578e-05, |
|
"loss": 1.0293, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.036468330134357005, |
|
"grad_norm": 0.08837890625, |
|
"learning_rate": 4.817658349328215e-05, |
|
"loss": 0.929, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.03742802303262956, |
|
"grad_norm": 0.09765625, |
|
"learning_rate": 4.812859884836852e-05, |
|
"loss": 0.9814, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.03838771593090211, |
|
"grad_norm": 0.0947265625, |
|
"learning_rate": 4.80806142034549e-05, |
|
"loss": 0.978, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03934740882917467, |
|
"grad_norm": 0.091796875, |
|
"learning_rate": 4.803262955854127e-05, |
|
"loss": 1.0056, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.04030710172744722, |
|
"grad_norm": 0.09814453125, |
|
"learning_rate": 4.798464491362764e-05, |
|
"loss": 1.0102, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.04126679462571977, |
|
"grad_norm": 0.08837890625, |
|
"learning_rate": 4.7936660268714015e-05, |
|
"loss": 0.9454, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.04222648752399232, |
|
"grad_norm": 0.08837890625, |
|
"learning_rate": 4.788867562380039e-05, |
|
"loss": 0.9257, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.04318618042226487, |
|
"grad_norm": 0.080078125, |
|
"learning_rate": 4.784069097888676e-05, |
|
"loss": 0.943, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.044145873320537425, |
|
"grad_norm": 0.08642578125, |
|
"learning_rate": 4.779270633397313e-05, |
|
"loss": 0.9437, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.045105566218809984, |
|
"grad_norm": 0.08740234375, |
|
"learning_rate": 4.77447216890595e-05, |
|
"loss": 0.9291, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.046065259117082535, |
|
"grad_norm": 0.09130859375, |
|
"learning_rate": 4.769673704414588e-05, |
|
"loss": 0.8921, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.04702495201535509, |
|
"grad_norm": 0.119140625, |
|
"learning_rate": 4.764875239923225e-05, |
|
"loss": 0.9966, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.04798464491362764, |
|
"grad_norm": 0.09130859375, |
|
"learning_rate": 4.760076775431862e-05, |
|
"loss": 0.8992, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04894433781190019, |
|
"grad_norm": 0.076171875, |
|
"learning_rate": 4.7552783109404995e-05, |
|
"loss": 0.8931, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.04990403071017274, |
|
"grad_norm": 0.091796875, |
|
"learning_rate": 4.750479846449136e-05, |
|
"loss": 0.8923, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.0508637236084453, |
|
"grad_norm": 0.08056640625, |
|
"learning_rate": 4.745681381957774e-05, |
|
"loss": 0.9124, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.05182341650671785, |
|
"grad_norm": 0.07958984375, |
|
"learning_rate": 4.740882917466411e-05, |
|
"loss": 0.8448, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.052783109404990404, |
|
"grad_norm": 0.08642578125, |
|
"learning_rate": 4.736084452975048e-05, |
|
"loss": 0.8974, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.053742802303262956, |
|
"grad_norm": 0.1015625, |
|
"learning_rate": 4.7312859884836855e-05, |
|
"loss": 1.0322, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.05470249520153551, |
|
"grad_norm": 0.0830078125, |
|
"learning_rate": 4.726487523992323e-05, |
|
"loss": 0.8886, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.05566218809980806, |
|
"grad_norm": 0.09033203125, |
|
"learning_rate": 4.72168905950096e-05, |
|
"loss": 0.8811, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.05662188099808062, |
|
"grad_norm": 0.09423828125, |
|
"learning_rate": 4.716890595009597e-05, |
|
"loss": 1.0552, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.05758157389635317, |
|
"grad_norm": 0.083984375, |
|
"learning_rate": 4.712092130518234e-05, |
|
"loss": 0.8823, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05854126679462572, |
|
"grad_norm": 0.07958984375, |
|
"learning_rate": 4.707293666026872e-05, |
|
"loss": 0.9149, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.05950095969289827, |
|
"grad_norm": 0.08642578125, |
|
"learning_rate": 4.702495201535509e-05, |
|
"loss": 0.8472, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.060460652591170824, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 4.697696737044146e-05, |
|
"loss": 0.9021, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.061420345489443376, |
|
"grad_norm": 0.08251953125, |
|
"learning_rate": 4.6928982725527834e-05, |
|
"loss": 0.9205, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.06238003838771593, |
|
"grad_norm": 0.09619140625, |
|
"learning_rate": 4.688099808061421e-05, |
|
"loss": 0.8649, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.06333973128598848, |
|
"grad_norm": 0.08447265625, |
|
"learning_rate": 4.683301343570058e-05, |
|
"loss": 0.8204, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.06429942418426103, |
|
"grad_norm": 0.0810546875, |
|
"learning_rate": 4.678502879078695e-05, |
|
"loss": 0.8614, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.06525911708253358, |
|
"grad_norm": 0.1201171875, |
|
"learning_rate": 4.673704414587332e-05, |
|
"loss": 1.0054, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.06621880998080615, |
|
"grad_norm": 0.1201171875, |
|
"learning_rate": 4.6689059500959694e-05, |
|
"loss": 0.965, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.0671785028790787, |
|
"grad_norm": 0.083984375, |
|
"learning_rate": 4.664107485604607e-05, |
|
"loss": 0.8924, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06813819577735125, |
|
"grad_norm": 0.083984375, |
|
"learning_rate": 4.659309021113244e-05, |
|
"loss": 0.7907, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.0690978886756238, |
|
"grad_norm": 0.08251953125, |
|
"learning_rate": 4.654510556621881e-05, |
|
"loss": 0.841, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.07005758157389635, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 4.649712092130519e-05, |
|
"loss": 0.8276, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.0710172744721689, |
|
"grad_norm": 0.138671875, |
|
"learning_rate": 4.644913627639156e-05, |
|
"loss": 0.8203, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.07197696737044146, |
|
"grad_norm": 0.08740234375, |
|
"learning_rate": 4.640115163147793e-05, |
|
"loss": 0.9008, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.07293666026871401, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 4.63531669865643e-05, |
|
"loss": 0.8216, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.07389635316698656, |
|
"grad_norm": 0.09033203125, |
|
"learning_rate": 4.6305182341650674e-05, |
|
"loss": 0.825, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.07485604606525911, |
|
"grad_norm": 0.0888671875, |
|
"learning_rate": 4.625719769673705e-05, |
|
"loss": 0.7909, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.07581573896353166, |
|
"grad_norm": 0.0927734375, |
|
"learning_rate": 4.620921305182342e-05, |
|
"loss": 0.8424, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.07677543186180422, |
|
"grad_norm": 0.0927734375, |
|
"learning_rate": 4.616122840690979e-05, |
|
"loss": 0.8197, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07773512476007678, |
|
"grad_norm": 0.10400390625, |
|
"learning_rate": 4.611324376199616e-05, |
|
"loss": 0.8275, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.07869481765834933, |
|
"grad_norm": 0.08837890625, |
|
"learning_rate": 4.606525911708254e-05, |
|
"loss": 0.8048, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.07965451055662189, |
|
"grad_norm": 0.09375, |
|
"learning_rate": 4.601727447216891e-05, |
|
"loss": 0.8103, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.08061420345489444, |
|
"grad_norm": 0.10693359375, |
|
"learning_rate": 4.596928982725528e-05, |
|
"loss": 0.7741, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.08157389635316699, |
|
"grad_norm": 0.09375, |
|
"learning_rate": 4.5921305182341654e-05, |
|
"loss": 0.8088, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.08253358925143954, |
|
"grad_norm": 0.0927734375, |
|
"learning_rate": 4.587332053742803e-05, |
|
"loss": 0.8514, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.08349328214971209, |
|
"grad_norm": 0.09375, |
|
"learning_rate": 4.58253358925144e-05, |
|
"loss": 0.8502, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.08445297504798464, |
|
"grad_norm": 0.09521484375, |
|
"learning_rate": 4.577735124760077e-05, |
|
"loss": 0.8378, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.0854126679462572, |
|
"grad_norm": 0.0908203125, |
|
"learning_rate": 4.572936660268714e-05, |
|
"loss": 0.8144, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.08637236084452975, |
|
"grad_norm": 0.1005859375, |
|
"learning_rate": 4.5681381957773514e-05, |
|
"loss": 0.8769, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0873320537428023, |
|
"grad_norm": 0.08984375, |
|
"learning_rate": 4.563339731285989e-05, |
|
"loss": 0.7952, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.08829174664107485, |
|
"grad_norm": 0.09912109375, |
|
"learning_rate": 4.558541266794626e-05, |
|
"loss": 0.8802, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.0892514395393474, |
|
"grad_norm": 0.09228515625, |
|
"learning_rate": 4.553742802303263e-05, |
|
"loss": 0.8435, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.09021113243761997, |
|
"grad_norm": 0.0986328125, |
|
"learning_rate": 4.548944337811901e-05, |
|
"loss": 0.8348, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.09117082533589252, |
|
"grad_norm": 0.10107421875, |
|
"learning_rate": 4.544145873320538e-05, |
|
"loss": 0.8354, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.09213051823416507, |
|
"grad_norm": 0.0986328125, |
|
"learning_rate": 4.539347408829175e-05, |
|
"loss": 0.804, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.09309021113243762, |
|
"grad_norm": 0.09326171875, |
|
"learning_rate": 4.534548944337812e-05, |
|
"loss": 0.8968, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.09404990403071017, |
|
"grad_norm": 0.09375, |
|
"learning_rate": 4.5297504798464494e-05, |
|
"loss": 0.8628, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.09500959692898273, |
|
"grad_norm": 0.11865234375, |
|
"learning_rate": 4.524952015355087e-05, |
|
"loss": 0.8004, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.09596928982725528, |
|
"grad_norm": 0.09521484375, |
|
"learning_rate": 4.520153550863724e-05, |
|
"loss": 0.7847, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09692898272552783, |
|
"grad_norm": 0.10400390625, |
|
"learning_rate": 4.515355086372361e-05, |
|
"loss": 0.7901, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.09788867562380038, |
|
"grad_norm": 0.0966796875, |
|
"learning_rate": 4.510556621880998e-05, |
|
"loss": 0.854, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.09884836852207293, |
|
"grad_norm": 0.12109375, |
|
"learning_rate": 4.505758157389636e-05, |
|
"loss": 0.7811, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.09980806142034548, |
|
"grad_norm": 0.10595703125, |
|
"learning_rate": 4.500959692898273e-05, |
|
"loss": 0.8829, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.10076775431861804, |
|
"grad_norm": 0.09619140625, |
|
"learning_rate": 4.49616122840691e-05, |
|
"loss": 0.8016, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.1017274472168906, |
|
"grad_norm": 0.09814453125, |
|
"learning_rate": 4.491362763915547e-05, |
|
"loss": 0.7894, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.10268714011516315, |
|
"grad_norm": 0.0986328125, |
|
"learning_rate": 4.486564299424185e-05, |
|
"loss": 0.7981, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.1036468330134357, |
|
"grad_norm": 0.1083984375, |
|
"learning_rate": 4.481765834932822e-05, |
|
"loss": 0.8251, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.10460652591170826, |
|
"grad_norm": 0.1669921875, |
|
"learning_rate": 4.476967370441459e-05, |
|
"loss": 0.9373, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.10556621880998081, |
|
"grad_norm": 0.10400390625, |
|
"learning_rate": 4.472168905950096e-05, |
|
"loss": 0.7491, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.10652591170825336, |
|
"grad_norm": 0.099609375, |
|
"learning_rate": 4.467370441458733e-05, |
|
"loss": 0.7829, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.10748560460652591, |
|
"grad_norm": 0.1044921875, |
|
"learning_rate": 4.462571976967371e-05, |
|
"loss": 0.8941, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.10844529750479846, |
|
"grad_norm": 0.10546875, |
|
"learning_rate": 4.457773512476008e-05, |
|
"loss": 0.8423, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.10940499040307101, |
|
"grad_norm": 0.1064453125, |
|
"learning_rate": 4.4529750479846447e-05, |
|
"loss": 0.7553, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.11036468330134357, |
|
"grad_norm": 0.1669921875, |
|
"learning_rate": 4.448176583493283e-05, |
|
"loss": 0.8334, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.11132437619961612, |
|
"grad_norm": 0.10595703125, |
|
"learning_rate": 4.44337811900192e-05, |
|
"loss": 0.7987, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.11228406909788867, |
|
"grad_norm": 0.11767578125, |
|
"learning_rate": 4.4385796545105567e-05, |
|
"loss": 0.8109, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.11324376199616124, |
|
"grad_norm": 0.10986328125, |
|
"learning_rate": 4.433781190019194e-05, |
|
"loss": 0.7601, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.11420345489443379, |
|
"grad_norm": 0.130859375, |
|
"learning_rate": 4.428982725527831e-05, |
|
"loss": 0.8111, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.11516314779270634, |
|
"grad_norm": 0.109375, |
|
"learning_rate": 4.4241842610364687e-05, |
|
"loss": 0.8731, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.11612284069097889, |
|
"grad_norm": 0.10595703125, |
|
"learning_rate": 4.419385796545106e-05, |
|
"loss": 0.7185, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.11708253358925144, |
|
"grad_norm": 0.10693359375, |
|
"learning_rate": 4.4145873320537426e-05, |
|
"loss": 0.7751, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.118042226487524, |
|
"grad_norm": 0.10595703125, |
|
"learning_rate": 4.4097888675623806e-05, |
|
"loss": 0.7975, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.11900191938579655, |
|
"grad_norm": 0.115234375, |
|
"learning_rate": 4.404990403071017e-05, |
|
"loss": 0.8107, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.1199616122840691, |
|
"grad_norm": 0.134765625, |
|
"learning_rate": 4.4001919385796546e-05, |
|
"loss": 0.8543, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.12092130518234165, |
|
"grad_norm": 0.1123046875, |
|
"learning_rate": 4.395393474088292e-05, |
|
"loss": 0.8406, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.1218809980806142, |
|
"grad_norm": 0.1025390625, |
|
"learning_rate": 4.3905950095969286e-05, |
|
"loss": 0.8258, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.12284069097888675, |
|
"grad_norm": 0.11279296875, |
|
"learning_rate": 4.3857965451055666e-05, |
|
"loss": 0.817, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.1238003838771593, |
|
"grad_norm": 0.1396484375, |
|
"learning_rate": 4.380998080614204e-05, |
|
"loss": 0.7262, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.12476007677543186, |
|
"grad_norm": 0.220703125, |
|
"learning_rate": 4.3761996161228406e-05, |
|
"loss": 0.76, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1257197696737044, |
|
"grad_norm": 0.11083984375, |
|
"learning_rate": 4.371401151631478e-05, |
|
"loss": 0.7845, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.12667946257197696, |
|
"grad_norm": 0.1259765625, |
|
"learning_rate": 4.366602687140115e-05, |
|
"loss": 0.8156, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.1276391554702495, |
|
"grad_norm": 0.11474609375, |
|
"learning_rate": 4.3618042226487526e-05, |
|
"loss": 0.8062, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.12859884836852206, |
|
"grad_norm": 0.11279296875, |
|
"learning_rate": 4.35700575815739e-05, |
|
"loss": 0.7609, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.1295585412667946, |
|
"grad_norm": 0.11328125, |
|
"learning_rate": 4.3522072936660266e-05, |
|
"loss": 0.8428, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.13051823416506717, |
|
"grad_norm": 0.1083984375, |
|
"learning_rate": 4.3474088291746646e-05, |
|
"loss": 0.7752, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.13147792706333974, |
|
"grad_norm": 0.12158203125, |
|
"learning_rate": 4.342610364683302e-05, |
|
"loss": 0.7556, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.1324376199616123, |
|
"grad_norm": 0.11083984375, |
|
"learning_rate": 4.3378119001919386e-05, |
|
"loss": 0.8069, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.13339731285988485, |
|
"grad_norm": 0.130859375, |
|
"learning_rate": 4.333013435700576e-05, |
|
"loss": 0.8006, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.1343570057581574, |
|
"grad_norm": 0.11865234375, |
|
"learning_rate": 4.328214971209213e-05, |
|
"loss": 0.8035, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.13531669865642995, |
|
"grad_norm": 0.1181640625, |
|
"learning_rate": 4.3234165067178506e-05, |
|
"loss": 0.792, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.1362763915547025, |
|
"grad_norm": 0.12890625, |
|
"learning_rate": 4.318618042226488e-05, |
|
"loss": 0.741, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.13723608445297505, |
|
"grad_norm": 0.11962890625, |
|
"learning_rate": 4.3138195777351246e-05, |
|
"loss": 0.7722, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.1381957773512476, |
|
"grad_norm": 0.1240234375, |
|
"learning_rate": 4.3090211132437626e-05, |
|
"loss": 0.7554, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.13915547024952016, |
|
"grad_norm": 0.11279296875, |
|
"learning_rate": 4.304222648752399e-05, |
|
"loss": 0.7693, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.1401151631477927, |
|
"grad_norm": 0.1142578125, |
|
"learning_rate": 4.2994241842610366e-05, |
|
"loss": 0.7235, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.14107485604606526, |
|
"grad_norm": 0.1123046875, |
|
"learning_rate": 4.294625719769674e-05, |
|
"loss": 0.7351, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.1420345489443378, |
|
"grad_norm": 0.11376953125, |
|
"learning_rate": 4.289827255278311e-05, |
|
"loss": 0.743, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.14299424184261036, |
|
"grad_norm": 0.1640625, |
|
"learning_rate": 4.2850287907869486e-05, |
|
"loss": 0.7349, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.14395393474088292, |
|
"grad_norm": 0.11474609375, |
|
"learning_rate": 4.280230326295586e-05, |
|
"loss": 0.8031, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.14491362763915547, |
|
"grad_norm": 0.1220703125, |
|
"learning_rate": 4.2754318618042226e-05, |
|
"loss": 0.8145, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.14587332053742802, |
|
"grad_norm": 0.1201171875, |
|
"learning_rate": 4.27063339731286e-05, |
|
"loss": 0.7043, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.14683301343570057, |
|
"grad_norm": 0.12255859375, |
|
"learning_rate": 4.265834932821497e-05, |
|
"loss": 0.7998, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.14779270633397312, |
|
"grad_norm": 0.140625, |
|
"learning_rate": 4.2610364683301346e-05, |
|
"loss": 0.7619, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.14875239923224567, |
|
"grad_norm": 0.119140625, |
|
"learning_rate": 4.256238003838772e-05, |
|
"loss": 0.7511, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.14971209213051823, |
|
"grad_norm": 0.11767578125, |
|
"learning_rate": 4.2514395393474086e-05, |
|
"loss": 0.7492, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.15067178502879078, |
|
"grad_norm": 0.1298828125, |
|
"learning_rate": 4.2466410748560466e-05, |
|
"loss": 0.7702, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.15163147792706333, |
|
"grad_norm": 0.1162109375, |
|
"learning_rate": 4.241842610364683e-05, |
|
"loss": 0.7038, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.15259117082533588, |
|
"grad_norm": 0.1181640625, |
|
"learning_rate": 4.2370441458733206e-05, |
|
"loss": 0.8124, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.15355086372360843, |
|
"grad_norm": 0.12890625, |
|
"learning_rate": 4.232245681381958e-05, |
|
"loss": 0.7566, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.15451055662188098, |
|
"grad_norm": 0.11474609375, |
|
"learning_rate": 4.227447216890595e-05, |
|
"loss": 0.7076, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.15547024952015356, |
|
"grad_norm": 0.1220703125, |
|
"learning_rate": 4.2226487523992326e-05, |
|
"loss": 0.7593, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.15642994241842612, |
|
"grad_norm": 0.1591796875, |
|
"learning_rate": 4.21785028790787e-05, |
|
"loss": 0.7716, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.15738963531669867, |
|
"grad_norm": 0.1376953125, |
|
"learning_rate": 4.2130518234165065e-05, |
|
"loss": 0.8253, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.15834932821497122, |
|
"grad_norm": 0.119140625, |
|
"learning_rate": 4.2082533589251446e-05, |
|
"loss": 0.7395, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.15930902111324377, |
|
"grad_norm": 0.1142578125, |
|
"learning_rate": 4.203454894433781e-05, |
|
"loss": 0.7586, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.16026871401151632, |
|
"grad_norm": 0.12353515625, |
|
"learning_rate": 4.1986564299424185e-05, |
|
"loss": 0.7453, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.16122840690978887, |
|
"grad_norm": 0.123046875, |
|
"learning_rate": 4.193857965451056e-05, |
|
"loss": 0.7521, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.16218809980806143, |
|
"grad_norm": 0.12109375, |
|
"learning_rate": 4.189059500959693e-05, |
|
"loss": 0.7548, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.16314779270633398, |
|
"grad_norm": 0.1298828125, |
|
"learning_rate": 4.1842610364683305e-05, |
|
"loss": 0.7302, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.16410748560460653, |
|
"grad_norm": 0.12255859375, |
|
"learning_rate": 4.179462571976968e-05, |
|
"loss": 0.7698, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.16506717850287908, |
|
"grad_norm": 0.12255859375, |
|
"learning_rate": 4.1746641074856045e-05, |
|
"loss": 0.7054, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.16602687140115163, |
|
"grad_norm": 0.1240234375, |
|
"learning_rate": 4.1698656429942425e-05, |
|
"loss": 0.8326, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.16698656429942418, |
|
"grad_norm": 0.1240234375, |
|
"learning_rate": 4.165067178502879e-05, |
|
"loss": 0.7553, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.16794625719769674, |
|
"grad_norm": 0.1318359375, |
|
"learning_rate": 4.1602687140115165e-05, |
|
"loss": 0.7239, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.1689059500959693, |
|
"grad_norm": 0.1552734375, |
|
"learning_rate": 4.155470249520154e-05, |
|
"loss": 0.7776, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.16986564299424184, |
|
"grad_norm": 0.12158203125, |
|
"learning_rate": 4.1506717850287905e-05, |
|
"loss": 0.7774, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.1708253358925144, |
|
"grad_norm": 0.1201171875, |
|
"learning_rate": 4.1458733205374285e-05, |
|
"loss": 0.7945, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.17178502879078694, |
|
"grad_norm": 0.12451171875, |
|
"learning_rate": 4.141074856046065e-05, |
|
"loss": 0.8037, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.1727447216890595, |
|
"grad_norm": 0.12109375, |
|
"learning_rate": 4.1362763915547025e-05, |
|
"loss": 0.8721, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.17370441458733205, |
|
"grad_norm": 0.1328125, |
|
"learning_rate": 4.13147792706334e-05, |
|
"loss": 0.7226, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.1746641074856046, |
|
"grad_norm": 0.1337890625, |
|
"learning_rate": 4.126679462571977e-05, |
|
"loss": 0.7682, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.17562380038387715, |
|
"grad_norm": 0.119140625, |
|
"learning_rate": 4.1218809980806145e-05, |
|
"loss": 0.8327, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.1765834932821497, |
|
"grad_norm": 0.1259765625, |
|
"learning_rate": 4.117082533589252e-05, |
|
"loss": 0.8166, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.17754318618042225, |
|
"grad_norm": 0.11474609375, |
|
"learning_rate": 4.1122840690978885e-05, |
|
"loss": 0.8122, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.1785028790786948, |
|
"grad_norm": 0.1279296875, |
|
"learning_rate": 4.1074856046065265e-05, |
|
"loss": 0.7033, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.17946257197696738, |
|
"grad_norm": 0.1376953125, |
|
"learning_rate": 4.102687140115163e-05, |
|
"loss": 0.7136, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.18042226487523993, |
|
"grad_norm": 0.1259765625, |
|
"learning_rate": 4.0978886756238005e-05, |
|
"loss": 0.7105, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.1813819577735125, |
|
"grad_norm": 0.126953125, |
|
"learning_rate": 4.093090211132438e-05, |
|
"loss": 0.6925, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.18234165067178504, |
|
"grad_norm": 0.1376953125, |
|
"learning_rate": 4.088291746641075e-05, |
|
"loss": 0.7665, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1833013435700576, |
|
"grad_norm": 0.1259765625, |
|
"learning_rate": 4.0834932821497125e-05, |
|
"loss": 0.7447, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.18426103646833014, |
|
"grad_norm": 0.1279296875, |
|
"learning_rate": 4.078694817658349e-05, |
|
"loss": 0.8261, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.1852207293666027, |
|
"grad_norm": 0.1396484375, |
|
"learning_rate": 4.0738963531669865e-05, |
|
"loss": 0.747, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.18618042226487524, |
|
"grad_norm": 0.1337890625, |
|
"learning_rate": 4.0690978886756245e-05, |
|
"loss": 0.8633, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.1871401151631478, |
|
"grad_norm": 0.1328125, |
|
"learning_rate": 4.064299424184261e-05, |
|
"loss": 0.75, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.18809980806142035, |
|
"grad_norm": 0.1298828125, |
|
"learning_rate": 4.0595009596928985e-05, |
|
"loss": 0.7286, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.1890595009596929, |
|
"grad_norm": 0.134765625, |
|
"learning_rate": 4.054702495201536e-05, |
|
"loss": 0.7934, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.19001919385796545, |
|
"grad_norm": 0.1767578125, |
|
"learning_rate": 4.049904030710173e-05, |
|
"loss": 0.7847, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.190978886756238, |
|
"grad_norm": 0.1376953125, |
|
"learning_rate": 4.0451055662188105e-05, |
|
"loss": 0.7316, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.19193857965451055, |
|
"grad_norm": 0.1396484375, |
|
"learning_rate": 4.040307101727447e-05, |
|
"loss": 0.6962, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1928982725527831, |
|
"grad_norm": 0.1328125, |
|
"learning_rate": 4.0355086372360845e-05, |
|
"loss": 0.7105, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.19385796545105566, |
|
"grad_norm": 0.1279296875, |
|
"learning_rate": 4.030710172744722e-05, |
|
"loss": 0.7288, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.1948176583493282, |
|
"grad_norm": 0.1259765625, |
|
"learning_rate": 4.025911708253359e-05, |
|
"loss": 0.7149, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.19577735124760076, |
|
"grad_norm": 0.1328125, |
|
"learning_rate": 4.0211132437619965e-05, |
|
"loss": 0.7202, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.1967370441458733, |
|
"grad_norm": 0.1298828125, |
|
"learning_rate": 4.016314779270634e-05, |
|
"loss": 0.6825, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.19769673704414586, |
|
"grad_norm": 0.1328125, |
|
"learning_rate": 4.0115163147792705e-05, |
|
"loss": 0.722, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.19865642994241842, |
|
"grad_norm": 0.1376953125, |
|
"learning_rate": 4.0067178502879085e-05, |
|
"loss": 0.683, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.19961612284069097, |
|
"grad_norm": 0.1484375, |
|
"learning_rate": 4.001919385796545e-05, |
|
"loss": 0.7062, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.20057581573896352, |
|
"grad_norm": 0.1328125, |
|
"learning_rate": 3.9971209213051825e-05, |
|
"loss": 0.7325, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.20153550863723607, |
|
"grad_norm": 0.12353515625, |
|
"learning_rate": 3.99232245681382e-05, |
|
"loss": 0.8078, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.20249520153550865, |
|
"grad_norm": 0.1337890625, |
|
"learning_rate": 3.987523992322457e-05, |
|
"loss": 0.6984, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.2034548944337812, |
|
"grad_norm": 0.13671875, |
|
"learning_rate": 3.9827255278310945e-05, |
|
"loss": 0.8541, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.20441458733205375, |
|
"grad_norm": 0.154296875, |
|
"learning_rate": 3.977927063339731e-05, |
|
"loss": 0.7189, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.2053742802303263, |
|
"grad_norm": 0.146484375, |
|
"learning_rate": 3.9731285988483684e-05, |
|
"loss": 0.6897, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.20633397312859886, |
|
"grad_norm": 0.138671875, |
|
"learning_rate": 3.9683301343570065e-05, |
|
"loss": 0.7874, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.2072936660268714, |
|
"grad_norm": 0.1328125, |
|
"learning_rate": 3.963531669865643e-05, |
|
"loss": 0.9131, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.20825335892514396, |
|
"grad_norm": 0.126953125, |
|
"learning_rate": 3.9587332053742804e-05, |
|
"loss": 0.6953, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.2092130518234165, |
|
"grad_norm": 0.1435546875, |
|
"learning_rate": 3.953934740882918e-05, |
|
"loss": 0.7278, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.21017274472168906, |
|
"grad_norm": 0.203125, |
|
"learning_rate": 3.949136276391555e-05, |
|
"loss": 0.6261, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.21113243761996162, |
|
"grad_norm": 0.130859375, |
|
"learning_rate": 3.9443378119001924e-05, |
|
"loss": 0.7201, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.21209213051823417, |
|
"grad_norm": 0.134765625, |
|
"learning_rate": 3.939539347408829e-05, |
|
"loss": 0.7117, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.21305182341650672, |
|
"grad_norm": 0.1357421875, |
|
"learning_rate": 3.9347408829174664e-05, |
|
"loss": 0.795, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.21401151631477927, |
|
"grad_norm": 0.1435546875, |
|
"learning_rate": 3.9299424184261044e-05, |
|
"loss": 0.698, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.21497120921305182, |
|
"grad_norm": 0.140625, |
|
"learning_rate": 3.925143953934741e-05, |
|
"loss": 0.7643, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.21593090211132437, |
|
"grad_norm": 0.1630859375, |
|
"learning_rate": 3.9203454894433784e-05, |
|
"loss": 0.7322, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.21689059500959693, |
|
"grad_norm": 0.1337890625, |
|
"learning_rate": 3.915547024952016e-05, |
|
"loss": 0.7173, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.21785028790786948, |
|
"grad_norm": 0.150390625, |
|
"learning_rate": 3.9107485604606524e-05, |
|
"loss": 0.8099, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.21880998080614203, |
|
"grad_norm": 0.146484375, |
|
"learning_rate": 3.9059500959692904e-05, |
|
"loss": 0.7695, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.21976967370441458, |
|
"grad_norm": 0.1357421875, |
|
"learning_rate": 3.901151631477927e-05, |
|
"loss": 0.7597, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.22072936660268713, |
|
"grad_norm": 0.138671875, |
|
"learning_rate": 3.8963531669865644e-05, |
|
"loss": 0.6964, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.22168905950095968, |
|
"grad_norm": 0.1455078125, |
|
"learning_rate": 3.891554702495202e-05, |
|
"loss": 0.7146, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.22264875239923224, |
|
"grad_norm": 0.146484375, |
|
"learning_rate": 3.886756238003839e-05, |
|
"loss": 0.7936, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.2236084452975048, |
|
"grad_norm": 0.1416015625, |
|
"learning_rate": 3.8819577735124764e-05, |
|
"loss": 0.7637, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.22456813819577734, |
|
"grad_norm": 0.154296875, |
|
"learning_rate": 3.877159309021113e-05, |
|
"loss": 0.7628, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.2255278310940499, |
|
"grad_norm": 0.1455078125, |
|
"learning_rate": 3.8723608445297504e-05, |
|
"loss": 0.6949, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.22648752399232247, |
|
"grad_norm": 0.1494140625, |
|
"learning_rate": 3.8675623800383884e-05, |
|
"loss": 0.7897, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.22744721689059502, |
|
"grad_norm": 0.1396484375, |
|
"learning_rate": 3.862763915547025e-05, |
|
"loss": 0.7913, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.22840690978886757, |
|
"grad_norm": 0.138671875, |
|
"learning_rate": 3.8579654510556624e-05, |
|
"loss": 0.748, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.22936660268714013, |
|
"grad_norm": 0.1435546875, |
|
"learning_rate": 3.8531669865643e-05, |
|
"loss": 0.7325, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.23032629558541268, |
|
"grad_norm": 0.1513671875, |
|
"learning_rate": 3.848368522072937e-05, |
|
"loss": 0.7363, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.23128598848368523, |
|
"grad_norm": 0.146484375, |
|
"learning_rate": 3.8435700575815744e-05, |
|
"loss": 0.6879, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.23224568138195778, |
|
"grad_norm": 0.140625, |
|
"learning_rate": 3.838771593090211e-05, |
|
"loss": 0.6557, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.23320537428023033, |
|
"grad_norm": 0.1328125, |
|
"learning_rate": 3.8339731285988484e-05, |
|
"loss": 0.7875, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.23416506717850288, |
|
"grad_norm": 0.1474609375, |
|
"learning_rate": 3.829174664107486e-05, |
|
"loss": 0.6932, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.23512476007677544, |
|
"grad_norm": 0.1435546875, |
|
"learning_rate": 3.824376199616123e-05, |
|
"loss": 0.7278, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.236084452975048, |
|
"grad_norm": 0.142578125, |
|
"learning_rate": 3.8195777351247604e-05, |
|
"loss": 0.7493, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.23704414587332054, |
|
"grad_norm": 0.1474609375, |
|
"learning_rate": 3.814779270633397e-05, |
|
"loss": 0.6941, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.2380038387715931, |
|
"grad_norm": 0.146484375, |
|
"learning_rate": 3.809980806142035e-05, |
|
"loss": 0.7274, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.23896353166986564, |
|
"grad_norm": 0.146484375, |
|
"learning_rate": 3.8051823416506724e-05, |
|
"loss": 0.7197, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.2399232245681382, |
|
"grad_norm": 0.146484375, |
|
"learning_rate": 3.800383877159309e-05, |
|
"loss": 0.7782, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.24088291746641075, |
|
"grad_norm": 0.1435546875, |
|
"learning_rate": 3.7955854126679464e-05, |
|
"loss": 0.6752, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.2418426103646833, |
|
"grad_norm": 0.1416015625, |
|
"learning_rate": 3.790786948176584e-05, |
|
"loss": 0.7021, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.24280230326295585, |
|
"grad_norm": 0.1396484375, |
|
"learning_rate": 3.785988483685221e-05, |
|
"loss": 0.6937, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.2437619961612284, |
|
"grad_norm": 0.1435546875, |
|
"learning_rate": 3.7811900191938584e-05, |
|
"loss": 0.8093, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.24472168905950095, |
|
"grad_norm": 0.142578125, |
|
"learning_rate": 3.776391554702495e-05, |
|
"loss": 0.7016, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.2456813819577735, |
|
"grad_norm": 0.14453125, |
|
"learning_rate": 3.7715930902111324e-05, |
|
"loss": 0.6506, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.24664107485604606, |
|
"grad_norm": 0.1376953125, |
|
"learning_rate": 3.7667946257197704e-05, |
|
"loss": 0.7127, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.2476007677543186, |
|
"grad_norm": 0.1513671875, |
|
"learning_rate": 3.761996161228407e-05, |
|
"loss": 0.7209, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.24856046065259116, |
|
"grad_norm": 0.13671875, |
|
"learning_rate": 3.7571976967370443e-05, |
|
"loss": 0.7729, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.2495201535508637, |
|
"grad_norm": 0.15234375, |
|
"learning_rate": 3.752399232245682e-05, |
|
"loss": 0.7553, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2504798464491363, |
|
"grad_norm": 0.1513671875, |
|
"learning_rate": 3.747600767754319e-05, |
|
"loss": 0.7793, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.2514395393474088, |
|
"grad_norm": 0.1435546875, |
|
"learning_rate": 3.7428023032629563e-05, |
|
"loss": 0.6608, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.2523992322456814, |
|
"grad_norm": 0.146484375, |
|
"learning_rate": 3.738003838771593e-05, |
|
"loss": 0.7076, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.2533589251439539, |
|
"grad_norm": 0.1787109375, |
|
"learning_rate": 3.73320537428023e-05, |
|
"loss": 0.772, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.2543186180422265, |
|
"grad_norm": 0.1474609375, |
|
"learning_rate": 3.728406909788868e-05, |
|
"loss": 0.6851, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.255278310940499, |
|
"grad_norm": 0.1650390625, |
|
"learning_rate": 3.723608445297505e-05, |
|
"loss": 0.6781, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.2562380038387716, |
|
"grad_norm": 0.14453125, |
|
"learning_rate": 3.718809980806142e-05, |
|
"loss": 0.6604, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.2571976967370441, |
|
"grad_norm": 0.1474609375, |
|
"learning_rate": 3.714011516314779e-05, |
|
"loss": 0.6484, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.2581573896353167, |
|
"grad_norm": 0.146484375, |
|
"learning_rate": 3.709213051823417e-05, |
|
"loss": 0.7034, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.2591170825335892, |
|
"grad_norm": 0.1650390625, |
|
"learning_rate": 3.704414587332054e-05, |
|
"loss": 0.7504, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2600767754318618, |
|
"grad_norm": 0.15625, |
|
"learning_rate": 3.699616122840691e-05, |
|
"loss": 0.7586, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.26103646833013433, |
|
"grad_norm": 0.1435546875, |
|
"learning_rate": 3.694817658349328e-05, |
|
"loss": 0.741, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.2619961612284069, |
|
"grad_norm": 0.1513671875, |
|
"learning_rate": 3.6900191938579657e-05, |
|
"loss": 0.6887, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.2629558541266795, |
|
"grad_norm": 0.1552734375, |
|
"learning_rate": 3.685220729366603e-05, |
|
"loss": 0.6369, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.263915547024952, |
|
"grad_norm": 0.15625, |
|
"learning_rate": 3.68042226487524e-05, |
|
"loss": 0.7094, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.2648752399232246, |
|
"grad_norm": 0.1728515625, |
|
"learning_rate": 3.675623800383877e-05, |
|
"loss": 0.7673, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.2658349328214971, |
|
"grad_norm": 0.1474609375, |
|
"learning_rate": 3.670825335892514e-05, |
|
"loss": 0.7441, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.2667946257197697, |
|
"grad_norm": 0.150390625, |
|
"learning_rate": 3.6660268714011516e-05, |
|
"loss": 0.6746, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.2677543186180422, |
|
"grad_norm": 0.15234375, |
|
"learning_rate": 3.661228406909789e-05, |
|
"loss": 0.7387, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.2687140115163148, |
|
"grad_norm": 0.15625, |
|
"learning_rate": 3.656429942418426e-05, |
|
"loss": 0.6934, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.2696737044145873, |
|
"grad_norm": 0.1552734375, |
|
"learning_rate": 3.651631477927063e-05, |
|
"loss": 0.64, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.2706333973128599, |
|
"grad_norm": 0.171875, |
|
"learning_rate": 3.646833013435701e-05, |
|
"loss": 0.9662, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.2715930902111324, |
|
"grad_norm": 0.1455078125, |
|
"learning_rate": 3.642034548944338e-05, |
|
"loss": 0.7063, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.272552783109405, |
|
"grad_norm": 0.1513671875, |
|
"learning_rate": 3.637236084452975e-05, |
|
"loss": 0.7988, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.27351247600767753, |
|
"grad_norm": 0.1689453125, |
|
"learning_rate": 3.632437619961612e-05, |
|
"loss": 0.7435, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.2744721689059501, |
|
"grad_norm": 0.1513671875, |
|
"learning_rate": 3.6276391554702496e-05, |
|
"loss": 0.7163, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.27543186180422263, |
|
"grad_norm": 0.146484375, |
|
"learning_rate": 3.622840690978887e-05, |
|
"loss": 0.6569, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.2763915547024952, |
|
"grad_norm": 0.158203125, |
|
"learning_rate": 3.618042226487524e-05, |
|
"loss": 0.699, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.27735124760076774, |
|
"grad_norm": 0.154296875, |
|
"learning_rate": 3.613243761996161e-05, |
|
"loss": 0.7237, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.2783109404990403, |
|
"grad_norm": 0.1533203125, |
|
"learning_rate": 3.608445297504799e-05, |
|
"loss": 0.6712, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.27927063339731284, |
|
"grad_norm": 0.1484375, |
|
"learning_rate": 3.603646833013436e-05, |
|
"loss": 0.6856, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.2802303262955854, |
|
"grad_norm": 0.154296875, |
|
"learning_rate": 3.598848368522073e-05, |
|
"loss": 0.7346, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.28119001919385794, |
|
"grad_norm": 0.1630859375, |
|
"learning_rate": 3.59404990403071e-05, |
|
"loss": 0.6807, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.2821497120921305, |
|
"grad_norm": 0.1767578125, |
|
"learning_rate": 3.5892514395393476e-05, |
|
"loss": 0.6471, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.28310940499040305, |
|
"grad_norm": 0.1552734375, |
|
"learning_rate": 3.584452975047985e-05, |
|
"loss": 0.6942, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.2840690978886756, |
|
"grad_norm": 0.146484375, |
|
"learning_rate": 3.579654510556622e-05, |
|
"loss": 0.7439, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.28502879078694815, |
|
"grad_norm": 0.1650390625, |
|
"learning_rate": 3.574856046065259e-05, |
|
"loss": 0.7056, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.28598848368522073, |
|
"grad_norm": 0.1767578125, |
|
"learning_rate": 3.570057581573896e-05, |
|
"loss": 0.6869, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.2869481765834933, |
|
"grad_norm": 0.1474609375, |
|
"learning_rate": 3.5652591170825336e-05, |
|
"loss": 0.6763, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.28790786948176583, |
|
"grad_norm": 0.1474609375, |
|
"learning_rate": 3.560460652591171e-05, |
|
"loss": 0.7642, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2888675623800384, |
|
"grad_norm": 0.154296875, |
|
"learning_rate": 3.555662188099808e-05, |
|
"loss": 0.7297, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.28982725527831094, |
|
"grad_norm": 0.15234375, |
|
"learning_rate": 3.550863723608445e-05, |
|
"loss": 0.6708, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.2907869481765835, |
|
"grad_norm": 0.1611328125, |
|
"learning_rate": 3.546065259117083e-05, |
|
"loss": 0.6544, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.29174664107485604, |
|
"grad_norm": 0.14453125, |
|
"learning_rate": 3.54126679462572e-05, |
|
"loss": 0.6653, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.2927063339731286, |
|
"grad_norm": 0.2265625, |
|
"learning_rate": 3.536468330134357e-05, |
|
"loss": 0.6902, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.29366602687140114, |
|
"grad_norm": 0.150390625, |
|
"learning_rate": 3.531669865642994e-05, |
|
"loss": 0.6777, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.2946257197696737, |
|
"grad_norm": 0.1552734375, |
|
"learning_rate": 3.5268714011516316e-05, |
|
"loss": 0.6928, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.29558541266794625, |
|
"grad_norm": 0.1953125, |
|
"learning_rate": 3.522072936660269e-05, |
|
"loss": 0.7738, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.2965451055662188, |
|
"grad_norm": 0.1533203125, |
|
"learning_rate": 3.517274472168906e-05, |
|
"loss": 0.6672, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.29750479846449135, |
|
"grad_norm": 0.1572265625, |
|
"learning_rate": 3.512476007677543e-05, |
|
"loss": 0.6526, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.29846449136276393, |
|
"grad_norm": 0.1552734375, |
|
"learning_rate": 3.507677543186181e-05, |
|
"loss": 0.7665, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.29942418426103645, |
|
"grad_norm": 0.154296875, |
|
"learning_rate": 3.502879078694818e-05, |
|
"loss": 0.7359, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.30038387715930903, |
|
"grad_norm": 0.1494140625, |
|
"learning_rate": 3.498080614203455e-05, |
|
"loss": 0.6592, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.30134357005758156, |
|
"grad_norm": 0.1474609375, |
|
"learning_rate": 3.493282149712092e-05, |
|
"loss": 0.752, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.30230326295585414, |
|
"grad_norm": 0.162109375, |
|
"learning_rate": 3.4884836852207296e-05, |
|
"loss": 0.6995, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.30326295585412666, |
|
"grad_norm": 0.15234375, |
|
"learning_rate": 3.483685220729367e-05, |
|
"loss": 0.7031, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.30422264875239924, |
|
"grad_norm": 0.1591796875, |
|
"learning_rate": 3.478886756238004e-05, |
|
"loss": 0.7046, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.30518234165067176, |
|
"grad_norm": 0.15234375, |
|
"learning_rate": 3.474088291746641e-05, |
|
"loss": 0.6898, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.30614203454894434, |
|
"grad_norm": 0.1591796875, |
|
"learning_rate": 3.469289827255279e-05, |
|
"loss": 0.6746, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.30710172744721687, |
|
"grad_norm": 0.142578125, |
|
"learning_rate": 3.4644913627639155e-05, |
|
"loss": 0.8099, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.30806142034548945, |
|
"grad_norm": 0.1611328125, |
|
"learning_rate": 3.459692898272553e-05, |
|
"loss": 0.6901, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.30902111324376197, |
|
"grad_norm": 0.220703125, |
|
"learning_rate": 3.45489443378119e-05, |
|
"loss": 0.6712, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.30998080614203455, |
|
"grad_norm": 0.1494140625, |
|
"learning_rate": 3.450095969289827e-05, |
|
"loss": 0.6829, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.31094049904030713, |
|
"grad_norm": 0.1572265625, |
|
"learning_rate": 3.445297504798465e-05, |
|
"loss": 0.6831, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.31190019193857965, |
|
"grad_norm": 0.16015625, |
|
"learning_rate": 3.440499040307102e-05, |
|
"loss": 0.6814, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.31285988483685223, |
|
"grad_norm": 0.1552734375, |
|
"learning_rate": 3.435700575815739e-05, |
|
"loss": 0.7156, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.31381957773512476, |
|
"grad_norm": 0.166015625, |
|
"learning_rate": 3.430902111324376e-05, |
|
"loss": 0.7648, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.31477927063339733, |
|
"grad_norm": 0.1455078125, |
|
"learning_rate": 3.4261036468330135e-05, |
|
"loss": 0.7224, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.31573896353166986, |
|
"grad_norm": 0.154296875, |
|
"learning_rate": 3.421305182341651e-05, |
|
"loss": 0.6945, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.31669865642994244, |
|
"grad_norm": 0.1533203125, |
|
"learning_rate": 3.416506717850288e-05, |
|
"loss": 0.7545, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.31765834932821496, |
|
"grad_norm": 0.1484375, |
|
"learning_rate": 3.411708253358925e-05, |
|
"loss": 0.7285, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.31861804222648754, |
|
"grad_norm": 0.1513671875, |
|
"learning_rate": 3.406909788867563e-05, |
|
"loss": 0.7181, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.31957773512476007, |
|
"grad_norm": 0.154296875, |
|
"learning_rate": 3.4021113243761995e-05, |
|
"loss": 0.7902, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.32053742802303264, |
|
"grad_norm": 0.1962890625, |
|
"learning_rate": 3.397312859884837e-05, |
|
"loss": 0.6529, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.32149712092130517, |
|
"grad_norm": 0.16015625, |
|
"learning_rate": 3.392514395393474e-05, |
|
"loss": 0.7397, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.32245681381957775, |
|
"grad_norm": 0.1552734375, |
|
"learning_rate": 3.3877159309021115e-05, |
|
"loss": 0.6852, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.32341650671785027, |
|
"grad_norm": 0.1640625, |
|
"learning_rate": 3.382917466410749e-05, |
|
"loss": 0.7339, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.32437619961612285, |
|
"grad_norm": 0.1591796875, |
|
"learning_rate": 3.378119001919386e-05, |
|
"loss": 0.6874, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.3253358925143954, |
|
"grad_norm": 0.1689453125, |
|
"learning_rate": 3.373320537428023e-05, |
|
"loss": 0.6913, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.32629558541266795, |
|
"grad_norm": 0.1689453125, |
|
"learning_rate": 3.368522072936661e-05, |
|
"loss": 0.6271, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.3272552783109405, |
|
"grad_norm": 0.166015625, |
|
"learning_rate": 3.3637236084452975e-05, |
|
"loss": 0.6797, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.32821497120921306, |
|
"grad_norm": 0.1494140625, |
|
"learning_rate": 3.358925143953935e-05, |
|
"loss": 0.6866, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.3291746641074856, |
|
"grad_norm": 0.1669921875, |
|
"learning_rate": 3.354126679462572e-05, |
|
"loss": 0.7679, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.33013435700575816, |
|
"grad_norm": 0.1474609375, |
|
"learning_rate": 3.3493282149712095e-05, |
|
"loss": 0.7125, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.3310940499040307, |
|
"grad_norm": 0.1650390625, |
|
"learning_rate": 3.344529750479847e-05, |
|
"loss": 0.7532, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.33205374280230326, |
|
"grad_norm": 0.1484375, |
|
"learning_rate": 3.339731285988484e-05, |
|
"loss": 0.6617, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.3330134357005758, |
|
"grad_norm": 0.1611328125, |
|
"learning_rate": 3.334932821497121e-05, |
|
"loss": 0.6764, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.33397312859884837, |
|
"grad_norm": 0.1591796875, |
|
"learning_rate": 3.330134357005758e-05, |
|
"loss": 0.6953, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.33493282149712095, |
|
"grad_norm": 0.1640625, |
|
"learning_rate": 3.3253358925143955e-05, |
|
"loss": 0.6981, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.33589251439539347, |
|
"grad_norm": 0.21875, |
|
"learning_rate": 3.320537428023033e-05, |
|
"loss": 0.7343, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.33685220729366605, |
|
"grad_norm": 0.166015625, |
|
"learning_rate": 3.31573896353167e-05, |
|
"loss": 0.7237, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.3378119001919386, |
|
"grad_norm": 0.166015625, |
|
"learning_rate": 3.310940499040307e-05, |
|
"loss": 0.7092, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.33877159309021115, |
|
"grad_norm": 0.173828125, |
|
"learning_rate": 3.306142034548945e-05, |
|
"loss": 0.6969, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.3397312859884837, |
|
"grad_norm": 0.166015625, |
|
"learning_rate": 3.3013435700575815e-05, |
|
"loss": 0.6786, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.34069097888675626, |
|
"grad_norm": 0.1572265625, |
|
"learning_rate": 3.296545105566219e-05, |
|
"loss": 0.7467, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.3416506717850288, |
|
"grad_norm": 0.169921875, |
|
"learning_rate": 3.291746641074856e-05, |
|
"loss": 0.6644, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.34261036468330136, |
|
"grad_norm": 0.1591796875, |
|
"learning_rate": 3.2869481765834935e-05, |
|
"loss": 0.6865, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.3435700575815739, |
|
"grad_norm": 0.1650390625, |
|
"learning_rate": 3.282149712092131e-05, |
|
"loss": 0.7558, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.34452975047984646, |
|
"grad_norm": 0.1552734375, |
|
"learning_rate": 3.277351247600768e-05, |
|
"loss": 0.6625, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.345489443378119, |
|
"grad_norm": 0.1630859375, |
|
"learning_rate": 3.272552783109405e-05, |
|
"loss": 0.7669, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.34644913627639157, |
|
"grad_norm": 0.1572265625, |
|
"learning_rate": 3.267754318618043e-05, |
|
"loss": 0.6358, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.3474088291746641, |
|
"grad_norm": 0.1689453125, |
|
"learning_rate": 3.2629558541266795e-05, |
|
"loss": 0.7083, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.34836852207293667, |
|
"grad_norm": 0.1708984375, |
|
"learning_rate": 3.258157389635317e-05, |
|
"loss": 0.7058, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.3493282149712092, |
|
"grad_norm": 0.1533203125, |
|
"learning_rate": 3.253358925143954e-05, |
|
"loss": 0.6559, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.3502879078694818, |
|
"grad_norm": 0.162109375, |
|
"learning_rate": 3.2485604606525915e-05, |
|
"loss": 0.7451, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.3512476007677543, |
|
"grad_norm": 0.169921875, |
|
"learning_rate": 3.243761996161229e-05, |
|
"loss": 0.626, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.3522072936660269, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 3.2389635316698654e-05, |
|
"loss": 0.8474, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.3531669865642994, |
|
"grad_norm": 0.1650390625, |
|
"learning_rate": 3.234165067178503e-05, |
|
"loss": 0.7122, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.354126679462572, |
|
"grad_norm": 0.15625, |
|
"learning_rate": 3.229366602687141e-05, |
|
"loss": 0.6793, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.3550863723608445, |
|
"grad_norm": 0.173828125, |
|
"learning_rate": 3.2245681381957774e-05, |
|
"loss": 0.676, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3560460652591171, |
|
"grad_norm": 0.1630859375, |
|
"learning_rate": 3.219769673704415e-05, |
|
"loss": 0.6879, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.3570057581573896, |
|
"grad_norm": 0.177734375, |
|
"learning_rate": 3.214971209213052e-05, |
|
"loss": 0.794, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.3579654510556622, |
|
"grad_norm": 0.1572265625, |
|
"learning_rate": 3.210172744721689e-05, |
|
"loss": 0.6602, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.35892514395393477, |
|
"grad_norm": 0.15234375, |
|
"learning_rate": 3.205374280230327e-05, |
|
"loss": 0.6773, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.3598848368522073, |
|
"grad_norm": 0.1640625, |
|
"learning_rate": 3.2005758157389634e-05, |
|
"loss": 0.7207, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.36084452975047987, |
|
"grad_norm": 0.2001953125, |
|
"learning_rate": 3.195777351247601e-05, |
|
"loss": 0.7465, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.3618042226487524, |
|
"grad_norm": 0.154296875, |
|
"learning_rate": 3.190978886756238e-05, |
|
"loss": 0.63, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.362763915547025, |
|
"grad_norm": 0.16796875, |
|
"learning_rate": 3.1861804222648754e-05, |
|
"loss": 0.6951, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.3637236084452975, |
|
"grad_norm": 0.1953125, |
|
"learning_rate": 3.181381957773513e-05, |
|
"loss": 0.6942, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.3646833013435701, |
|
"grad_norm": 0.171875, |
|
"learning_rate": 3.17658349328215e-05, |
|
"loss": 0.7014, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.3656429942418426, |
|
"grad_norm": 0.1748046875, |
|
"learning_rate": 3.171785028790787e-05, |
|
"loss": 0.7354, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.3666026871401152, |
|
"grad_norm": 0.1728515625, |
|
"learning_rate": 3.166986564299425e-05, |
|
"loss": 0.6438, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.3675623800383877, |
|
"grad_norm": 0.169921875, |
|
"learning_rate": 3.1621880998080614e-05, |
|
"loss": 0.6779, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.3685220729366603, |
|
"grad_norm": 0.17578125, |
|
"learning_rate": 3.157389635316699e-05, |
|
"loss": 0.7349, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.3694817658349328, |
|
"grad_norm": 0.162109375, |
|
"learning_rate": 3.152591170825336e-05, |
|
"loss": 0.6854, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.3704414587332054, |
|
"grad_norm": 0.150390625, |
|
"learning_rate": 3.1477927063339734e-05, |
|
"loss": 0.6163, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.3714011516314779, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 3.142994241842611e-05, |
|
"loss": 0.7172, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.3723608445297505, |
|
"grad_norm": 0.166015625, |
|
"learning_rate": 3.1381957773512474e-05, |
|
"loss": 0.6633, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.373320537428023, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 3.133397312859885e-05, |
|
"loss": 0.6357, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.3742802303262956, |
|
"grad_norm": 0.16015625, |
|
"learning_rate": 3.128598848368523e-05, |
|
"loss": 0.6354, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3752399232245681, |
|
"grad_norm": 0.1591796875, |
|
"learning_rate": 3.1238003838771594e-05, |
|
"loss": 0.6848, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.3761996161228407, |
|
"grad_norm": 0.15234375, |
|
"learning_rate": 3.119001919385797e-05, |
|
"loss": 0.6624, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.3771593090211132, |
|
"grad_norm": 0.173828125, |
|
"learning_rate": 3.114203454894434e-05, |
|
"loss": 0.7023, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.3781190019193858, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 3.1094049904030714e-05, |
|
"loss": 0.6766, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.3790786948176583, |
|
"grad_norm": 0.1748046875, |
|
"learning_rate": 3.104606525911709e-05, |
|
"loss": 0.7, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.3800383877159309, |
|
"grad_norm": 0.1689453125, |
|
"learning_rate": 3.0998080614203454e-05, |
|
"loss": 0.6905, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.3809980806142035, |
|
"grad_norm": 0.16796875, |
|
"learning_rate": 3.095009596928983e-05, |
|
"loss": 0.6854, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.381957773512476, |
|
"grad_norm": 0.1767578125, |
|
"learning_rate": 3.09021113243762e-05, |
|
"loss": 0.6882, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.3829174664107486, |
|
"grad_norm": 0.162109375, |
|
"learning_rate": 3.0854126679462574e-05, |
|
"loss": 0.6951, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.3838771593090211, |
|
"grad_norm": 0.2314453125, |
|
"learning_rate": 3.080614203454895e-05, |
|
"loss": 0.6999, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3848368522072937, |
|
"grad_norm": 0.1708984375, |
|
"learning_rate": 3.0758157389635314e-05, |
|
"loss": 0.6821, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.3857965451055662, |
|
"grad_norm": 0.2021484375, |
|
"learning_rate": 3.071017274472169e-05, |
|
"loss": 0.6445, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.3867562380038388, |
|
"grad_norm": 0.17578125, |
|
"learning_rate": 3.066218809980807e-05, |
|
"loss": 0.6687, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.3877159309021113, |
|
"grad_norm": 0.1708984375, |
|
"learning_rate": 3.0614203454894434e-05, |
|
"loss": 0.6485, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.3886756238003839, |
|
"grad_norm": 0.1640625, |
|
"learning_rate": 3.056621880998081e-05, |
|
"loss": 0.6737, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.3896353166986564, |
|
"grad_norm": 0.166015625, |
|
"learning_rate": 3.051823416506718e-05, |
|
"loss": 0.6588, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.390595009596929, |
|
"grad_norm": 0.1728515625, |
|
"learning_rate": 3.0470249520153554e-05, |
|
"loss": 0.6528, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.3915547024952015, |
|
"grad_norm": 0.1689453125, |
|
"learning_rate": 3.0422264875239924e-05, |
|
"loss": 0.6638, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.3925143953934741, |
|
"grad_norm": 0.1669921875, |
|
"learning_rate": 3.0374280230326297e-05, |
|
"loss": 0.6735, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.3934740882917466, |
|
"grad_norm": 0.1669921875, |
|
"learning_rate": 3.0326295585412667e-05, |
|
"loss": 0.7035, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.3944337811900192, |
|
"grad_norm": 0.16796875, |
|
"learning_rate": 3.0278310940499044e-05, |
|
"loss": 0.6549, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.39539347408829173, |
|
"grad_norm": 0.1806640625, |
|
"learning_rate": 3.0230326295585414e-05, |
|
"loss": 0.7402, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.3963531669865643, |
|
"grad_norm": 0.177734375, |
|
"learning_rate": 3.0182341650671787e-05, |
|
"loss": 0.7682, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.39731285988483683, |
|
"grad_norm": 0.1572265625, |
|
"learning_rate": 3.0134357005758157e-05, |
|
"loss": 0.6547, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.3982725527831094, |
|
"grad_norm": 0.1650390625, |
|
"learning_rate": 3.0086372360844534e-05, |
|
"loss": 0.6486, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.39923224568138194, |
|
"grad_norm": 0.1650390625, |
|
"learning_rate": 3.0038387715930903e-05, |
|
"loss": 0.6471, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.4001919385796545, |
|
"grad_norm": 0.1748046875, |
|
"learning_rate": 2.9990403071017277e-05, |
|
"loss": 0.6749, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.40115163147792704, |
|
"grad_norm": 0.162109375, |
|
"learning_rate": 2.9942418426103647e-05, |
|
"loss": 0.6779, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.4021113243761996, |
|
"grad_norm": 0.1904296875, |
|
"learning_rate": 2.9894433781190023e-05, |
|
"loss": 0.6719, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.40307101727447214, |
|
"grad_norm": 0.171875, |
|
"learning_rate": 2.9846449136276393e-05, |
|
"loss": 0.6665, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.4040307101727447, |
|
"grad_norm": 0.171875, |
|
"learning_rate": 2.9798464491362767e-05, |
|
"loss": 0.6897, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.4049904030710173, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 2.9750479846449137e-05, |
|
"loss": 0.6689, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.4059500959692898, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 2.9702495201535507e-05, |
|
"loss": 0.6594, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.4069097888675624, |
|
"grad_norm": 0.1728515625, |
|
"learning_rate": 2.9654510556621883e-05, |
|
"loss": 0.6481, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.40786948176583493, |
|
"grad_norm": 0.1640625, |
|
"learning_rate": 2.9606525911708257e-05, |
|
"loss": 0.5975, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.4088291746641075, |
|
"grad_norm": 0.1689453125, |
|
"learning_rate": 2.9558541266794627e-05, |
|
"loss": 0.6538, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.40978886756238003, |
|
"grad_norm": 0.162109375, |
|
"learning_rate": 2.9510556621880997e-05, |
|
"loss": 0.7139, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.4107485604606526, |
|
"grad_norm": 0.1650390625, |
|
"learning_rate": 2.9462571976967373e-05, |
|
"loss": 0.6408, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.41170825335892514, |
|
"grad_norm": 0.2001953125, |
|
"learning_rate": 2.9414587332053743e-05, |
|
"loss": 0.7105, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.4126679462571977, |
|
"grad_norm": 0.2197265625, |
|
"learning_rate": 2.9366602687140116e-05, |
|
"loss": 0.7465, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.41362763915547024, |
|
"grad_norm": 0.1689453125, |
|
"learning_rate": 2.9318618042226486e-05, |
|
"loss": 0.6594, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.4145873320537428, |
|
"grad_norm": 0.1708984375, |
|
"learning_rate": 2.9270633397312863e-05, |
|
"loss": 0.6646, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.41554702495201534, |
|
"grad_norm": 0.1806640625, |
|
"learning_rate": 2.9222648752399233e-05, |
|
"loss": 0.6537, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.4165067178502879, |
|
"grad_norm": 0.17578125, |
|
"learning_rate": 2.9174664107485606e-05, |
|
"loss": 0.7188, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.41746641074856045, |
|
"grad_norm": 0.1689453125, |
|
"learning_rate": 2.9126679462571976e-05, |
|
"loss": 0.7669, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.418426103646833, |
|
"grad_norm": 0.169921875, |
|
"learning_rate": 2.9078694817658353e-05, |
|
"loss": 0.6955, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.41938579654510555, |
|
"grad_norm": 0.166015625, |
|
"learning_rate": 2.9030710172744723e-05, |
|
"loss": 0.668, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.42034548944337813, |
|
"grad_norm": 0.166015625, |
|
"learning_rate": 2.8982725527831096e-05, |
|
"loss": 0.675, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.42130518234165065, |
|
"grad_norm": 0.1630859375, |
|
"learning_rate": 2.8934740882917466e-05, |
|
"loss": 0.7466, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.42226487523992323, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 2.8886756238003843e-05, |
|
"loss": 0.6396, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.42322456813819576, |
|
"grad_norm": 0.169921875, |
|
"learning_rate": 2.8838771593090213e-05, |
|
"loss": 0.675, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.42418426103646834, |
|
"grad_norm": 0.1826171875, |
|
"learning_rate": 2.8790786948176586e-05, |
|
"loss": 0.7135, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.42514395393474086, |
|
"grad_norm": 0.1728515625, |
|
"learning_rate": 2.8742802303262956e-05, |
|
"loss": 0.6642, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.42610364683301344, |
|
"grad_norm": 0.16796875, |
|
"learning_rate": 2.8694817658349333e-05, |
|
"loss": 0.6373, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.42706333973128596, |
|
"grad_norm": 0.1787109375, |
|
"learning_rate": 2.8646833013435703e-05, |
|
"loss": 0.6906, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.42802303262955854, |
|
"grad_norm": 0.1669921875, |
|
"learning_rate": 2.8598848368522073e-05, |
|
"loss": 0.6607, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.4289827255278311, |
|
"grad_norm": 0.171875, |
|
"learning_rate": 2.8550863723608446e-05, |
|
"loss": 0.7029, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.42994241842610365, |
|
"grad_norm": 0.1669921875, |
|
"learning_rate": 2.8502879078694816e-05, |
|
"loss": 0.6377, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.4309021113243762, |
|
"grad_norm": 0.1787109375, |
|
"learning_rate": 2.8454894433781193e-05, |
|
"loss": 0.7336, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.43186180422264875, |
|
"grad_norm": 0.1728515625, |
|
"learning_rate": 2.8406909788867563e-05, |
|
"loss": 0.6323, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.43282149712092133, |
|
"grad_norm": 0.1708984375, |
|
"learning_rate": 2.8358925143953936e-05, |
|
"loss": 0.6363, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.43378119001919385, |
|
"grad_norm": 0.177734375, |
|
"learning_rate": 2.8310940499040306e-05, |
|
"loss": 0.6749, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.43474088291746643, |
|
"grad_norm": 0.1748046875, |
|
"learning_rate": 2.8262955854126683e-05, |
|
"loss": 0.7227, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.43570057581573896, |
|
"grad_norm": 0.1728515625, |
|
"learning_rate": 2.8214971209213053e-05, |
|
"loss": 0.7482, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.43666026871401153, |
|
"grad_norm": 0.169921875, |
|
"learning_rate": 2.8166986564299426e-05, |
|
"loss": 0.6636, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.43761996161228406, |
|
"grad_norm": 0.1748046875, |
|
"learning_rate": 2.8119001919385796e-05, |
|
"loss": 0.6363, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.43857965451055664, |
|
"grad_norm": 0.1787109375, |
|
"learning_rate": 2.8071017274472173e-05, |
|
"loss": 0.6765, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.43953934740882916, |
|
"grad_norm": 0.166015625, |
|
"learning_rate": 2.8023032629558543e-05, |
|
"loss": 0.7481, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.44049904030710174, |
|
"grad_norm": 0.177734375, |
|
"learning_rate": 2.7975047984644916e-05, |
|
"loss": 0.6333, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.44145873320537427, |
|
"grad_norm": 0.1708984375, |
|
"learning_rate": 2.7927063339731286e-05, |
|
"loss": 0.6644, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.44241842610364684, |
|
"grad_norm": 0.1787109375, |
|
"learning_rate": 2.7879078694817663e-05, |
|
"loss": 0.6443, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.44337811900191937, |
|
"grad_norm": 0.1728515625, |
|
"learning_rate": 2.7831094049904032e-05, |
|
"loss": 0.7137, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.44433781190019195, |
|
"grad_norm": 0.1748046875, |
|
"learning_rate": 2.7783109404990402e-05, |
|
"loss": 0.653, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.44529750479846447, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 2.7735124760076776e-05, |
|
"loss": 0.6663, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.44625719769673705, |
|
"grad_norm": 0.173828125, |
|
"learning_rate": 2.7687140115163152e-05, |
|
"loss": 0.7799, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.4472168905950096, |
|
"grad_norm": 0.1708984375, |
|
"learning_rate": 2.7639155470249522e-05, |
|
"loss": 0.7356, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.44817658349328215, |
|
"grad_norm": 0.1630859375, |
|
"learning_rate": 2.7591170825335892e-05, |
|
"loss": 0.622, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.4491362763915547, |
|
"grad_norm": 0.16796875, |
|
"learning_rate": 2.7543186180422266e-05, |
|
"loss": 0.7582, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.45009596928982726, |
|
"grad_norm": 0.173828125, |
|
"learning_rate": 2.7495201535508642e-05, |
|
"loss": 0.6162, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.4510556621880998, |
|
"grad_norm": 0.1748046875, |
|
"learning_rate": 2.7447216890595012e-05, |
|
"loss": 0.6388, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.45201535508637236, |
|
"grad_norm": 0.16796875, |
|
"learning_rate": 2.7399232245681382e-05, |
|
"loss": 0.6759, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.45297504798464494, |
|
"grad_norm": 0.1748046875, |
|
"learning_rate": 2.7351247600767756e-05, |
|
"loss": 0.6981, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.45393474088291746, |
|
"grad_norm": 0.1611328125, |
|
"learning_rate": 2.7303262955854126e-05, |
|
"loss": 0.6393, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.45489443378119004, |
|
"grad_norm": 0.1708984375, |
|
"learning_rate": 2.7255278310940502e-05, |
|
"loss": 0.6688, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.45585412667946257, |
|
"grad_norm": 0.171875, |
|
"learning_rate": 2.7207293666026872e-05, |
|
"loss": 0.6113, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.45681381957773515, |
|
"grad_norm": 0.1591796875, |
|
"learning_rate": 2.7159309021113246e-05, |
|
"loss": 0.6047, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.45777351247600767, |
|
"grad_norm": 0.19921875, |
|
"learning_rate": 2.7111324376199615e-05, |
|
"loss": 0.6615, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.45873320537428025, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 2.7063339731285992e-05, |
|
"loss": 0.6283, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.4596928982725528, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 2.7015355086372362e-05, |
|
"loss": 0.7262, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.46065259117082535, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 2.6967370441458732e-05, |
|
"loss": 0.6462, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.4616122840690979, |
|
"grad_norm": 0.1826171875, |
|
"learning_rate": 2.6919385796545105e-05, |
|
"loss": 0.6287, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.46257197696737046, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 2.6871401151631482e-05, |
|
"loss": 0.6341, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.463531669865643, |
|
"grad_norm": 0.16796875, |
|
"learning_rate": 2.6823416506717852e-05, |
|
"loss": 0.6403, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.46449136276391556, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 2.6775431861804222e-05, |
|
"loss": 0.6479, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.4654510556621881, |
|
"grad_norm": 0.1728515625, |
|
"learning_rate": 2.6727447216890595e-05, |
|
"loss": 0.6622, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.46641074856046066, |
|
"grad_norm": 0.171875, |
|
"learning_rate": 2.6679462571976972e-05, |
|
"loss": 0.7203, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.4673704414587332, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 2.6631477927063342e-05, |
|
"loss": 0.6908, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.46833013435700577, |
|
"grad_norm": 0.177734375, |
|
"learning_rate": 2.6583493282149712e-05, |
|
"loss": 0.6072, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.4692898272552783, |
|
"grad_norm": 0.1767578125, |
|
"learning_rate": 2.6535508637236085e-05, |
|
"loss": 0.6831, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.47024952015355087, |
|
"grad_norm": 0.177734375, |
|
"learning_rate": 2.6487523992322462e-05, |
|
"loss": 0.6283, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.4712092130518234, |
|
"grad_norm": 0.173828125, |
|
"learning_rate": 2.6439539347408832e-05, |
|
"loss": 0.7183, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.472168905950096, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 2.6391554702495202e-05, |
|
"loss": 0.6332, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.4731285988483685, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 2.6343570057581575e-05, |
|
"loss": 0.629, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.4740882917466411, |
|
"grad_norm": 0.17578125, |
|
"learning_rate": 2.6295585412667952e-05, |
|
"loss": 0.6857, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.4750479846449136, |
|
"grad_norm": 0.169921875, |
|
"learning_rate": 2.6247600767754322e-05, |
|
"loss": 0.6298, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.4760076775431862, |
|
"grad_norm": 0.1767578125, |
|
"learning_rate": 2.6199616122840692e-05, |
|
"loss": 0.6572, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.47696737044145876, |
|
"grad_norm": 0.1806640625, |
|
"learning_rate": 2.615163147792706e-05, |
|
"loss": 0.5846, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.4779270633397313, |
|
"grad_norm": 0.2138671875, |
|
"learning_rate": 2.6103646833013435e-05, |
|
"loss": 0.7872, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.47888675623800386, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 2.6055662188099812e-05, |
|
"loss": 0.6542, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.4798464491362764, |
|
"grad_norm": 0.173828125, |
|
"learning_rate": 2.600767754318618e-05, |
|
"loss": 0.6299, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.48080614203454897, |
|
"grad_norm": 0.2392578125, |
|
"learning_rate": 2.595969289827255e-05, |
|
"loss": 0.6197, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.4817658349328215, |
|
"grad_norm": 0.19921875, |
|
"learning_rate": 2.5911708253358925e-05, |
|
"loss": 0.6573, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.48272552783109407, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 2.58637236084453e-05, |
|
"loss": 0.6847, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.4836852207293666, |
|
"grad_norm": 0.1689453125, |
|
"learning_rate": 2.581573896353167e-05, |
|
"loss": 0.6969, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.4846449136276392, |
|
"grad_norm": 0.1748046875, |
|
"learning_rate": 2.576775431861804e-05, |
|
"loss": 0.6578, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.4856046065259117, |
|
"grad_norm": 0.1806640625, |
|
"learning_rate": 2.5719769673704415e-05, |
|
"loss": 0.712, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.4865642994241843, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 2.567178502879079e-05, |
|
"loss": 0.6483, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.4875239923224568, |
|
"grad_norm": 0.1728515625, |
|
"learning_rate": 2.562380038387716e-05, |
|
"loss": 0.751, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.4884836852207294, |
|
"grad_norm": 0.17578125, |
|
"learning_rate": 2.557581573896353e-05, |
|
"loss": 0.7356, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.4894433781190019, |
|
"grad_norm": 0.1826171875, |
|
"learning_rate": 2.5527831094049905e-05, |
|
"loss": 0.6905, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.4904030710172745, |
|
"grad_norm": 0.177734375, |
|
"learning_rate": 2.547984644913628e-05, |
|
"loss": 0.6421, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.491362763915547, |
|
"grad_norm": 0.23046875, |
|
"learning_rate": 2.543186180422265e-05, |
|
"loss": 0.7447, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.4923224568138196, |
|
"grad_norm": 0.177734375, |
|
"learning_rate": 2.538387715930902e-05, |
|
"loss": 0.63, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.4932821497120921, |
|
"grad_norm": 0.1806640625, |
|
"learning_rate": 2.533589251439539e-05, |
|
"loss": 0.6252, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.4942418426103647, |
|
"grad_norm": 0.1787109375, |
|
"learning_rate": 2.5287907869481768e-05, |
|
"loss": 0.6607, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.4952015355086372, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 2.523992322456814e-05, |
|
"loss": 0.6498, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.4961612284069098, |
|
"grad_norm": 0.171875, |
|
"learning_rate": 2.519193857965451e-05, |
|
"loss": 0.7057, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.4971209213051823, |
|
"grad_norm": 0.173828125, |
|
"learning_rate": 2.514395393474088e-05, |
|
"loss": 0.6589, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.4980806142034549, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 2.5095969289827258e-05, |
|
"loss": 0.6719, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.4990403071017274, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 2.504798464491363e-05, |
|
"loss": 0.7305, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.1826171875, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.6557, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.5009596928982726, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 2.495201535508637e-05, |
|
"loss": 0.7, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.5019193857965452, |
|
"grad_norm": 0.1708984375, |
|
"learning_rate": 2.4904030710172744e-05, |
|
"loss": 0.6396, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.5028790786948176, |
|
"grad_norm": 0.17578125, |
|
"learning_rate": 2.4856046065259118e-05, |
|
"loss": 0.6105, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.5038387715930902, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 2.480806142034549e-05, |
|
"loss": 0.6552, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.5047984644913628, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 2.476007677543186e-05, |
|
"loss": 0.6455, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.5057581573896354, |
|
"grad_norm": 0.17578125, |
|
"learning_rate": 2.4712092130518234e-05, |
|
"loss": 0.6799, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.5067178502879078, |
|
"grad_norm": 0.1767578125, |
|
"learning_rate": 2.4664107485604608e-05, |
|
"loss": 0.7056, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.5076775431861804, |
|
"grad_norm": 0.1806640625, |
|
"learning_rate": 2.461612284069098e-05, |
|
"loss": 0.6064, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.508637236084453, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 2.456813819577735e-05, |
|
"loss": 0.6832, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.5095969289827256, |
|
"grad_norm": 0.1806640625, |
|
"learning_rate": 2.4520153550863724e-05, |
|
"loss": 0.6416, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.510556621880998, |
|
"grad_norm": 0.171875, |
|
"learning_rate": 2.4472168905950098e-05, |
|
"loss": 0.6261, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.5115163147792706, |
|
"grad_norm": 0.1826171875, |
|
"learning_rate": 2.442418426103647e-05, |
|
"loss": 0.648, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.5124760076775432, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 2.437619961612284e-05, |
|
"loss": 0.6793, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.5134357005758158, |
|
"grad_norm": 0.1689453125, |
|
"learning_rate": 2.4328214971209214e-05, |
|
"loss": 0.711, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.5143953934740882, |
|
"grad_norm": 0.1787109375, |
|
"learning_rate": 2.4280230326295588e-05, |
|
"loss": 0.6517, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.5153550863723608, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 2.423224568138196e-05, |
|
"loss": 0.6468, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.5163147792706334, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 2.418426103646833e-05, |
|
"loss": 0.6369, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.517274472168906, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 2.4136276391554704e-05, |
|
"loss": 0.6717, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.5182341650671785, |
|
"grad_norm": 0.177734375, |
|
"learning_rate": 2.4088291746641074e-05, |
|
"loss": 0.7289, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.519193857965451, |
|
"grad_norm": 0.1787109375, |
|
"learning_rate": 2.404030710172745e-05, |
|
"loss": 0.6263, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.5201535508637236, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 2.399232245681382e-05, |
|
"loss": 0.6856, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.5211132437619962, |
|
"grad_norm": 0.1806640625, |
|
"learning_rate": 2.3944337811900194e-05, |
|
"loss": 0.6376, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.5220729366602687, |
|
"grad_norm": 0.177734375, |
|
"learning_rate": 2.3896353166986564e-05, |
|
"loss": 0.5946, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.5230326295585412, |
|
"grad_norm": 0.17578125, |
|
"learning_rate": 2.384836852207294e-05, |
|
"loss": 0.661, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.5239923224568138, |
|
"grad_norm": 0.1826171875, |
|
"learning_rate": 2.380038387715931e-05, |
|
"loss": 0.7157, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.5249520153550864, |
|
"grad_norm": 0.2041015625, |
|
"learning_rate": 2.375239923224568e-05, |
|
"loss": 0.7163, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.525911708253359, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 2.3704414587332054e-05, |
|
"loss": 0.612, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.5268714011516314, |
|
"grad_norm": 0.1767578125, |
|
"learning_rate": 2.3656429942418427e-05, |
|
"loss": 0.6513, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.527831094049904, |
|
"grad_norm": 0.1767578125, |
|
"learning_rate": 2.36084452975048e-05, |
|
"loss": 0.6722, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5287907869481766, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 2.356046065259117e-05, |
|
"loss": 0.6462, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.5297504798464492, |
|
"grad_norm": 0.197265625, |
|
"learning_rate": 2.3512476007677544e-05, |
|
"loss": 0.6398, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.5307101727447217, |
|
"grad_norm": 0.1806640625, |
|
"learning_rate": 2.3464491362763917e-05, |
|
"loss": 0.6747, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.5316698656429942, |
|
"grad_norm": 0.2021484375, |
|
"learning_rate": 2.341650671785029e-05, |
|
"loss": 0.795, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.5326295585412668, |
|
"grad_norm": 0.1826171875, |
|
"learning_rate": 2.336852207293666e-05, |
|
"loss": 0.6755, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.5335892514395394, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 2.3320537428023034e-05, |
|
"loss": 0.6751, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.5345489443378119, |
|
"grad_norm": 0.2001953125, |
|
"learning_rate": 2.3272552783109404e-05, |
|
"loss": 0.7003, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.5355086372360844, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 2.322456813819578e-05, |
|
"loss": 0.6791, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.536468330134357, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 2.317658349328215e-05, |
|
"loss": 0.6676, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.5374280230326296, |
|
"grad_norm": 0.1962890625, |
|
"learning_rate": 2.3128598848368524e-05, |
|
"loss": 0.695, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5383877159309021, |
|
"grad_norm": 0.1806640625, |
|
"learning_rate": 2.3080614203454894e-05, |
|
"loss": 0.6326, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.5393474088291746, |
|
"grad_norm": 0.1962890625, |
|
"learning_rate": 2.303262955854127e-05, |
|
"loss": 0.6717, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.5403071017274472, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 2.298464491362764e-05, |
|
"loss": 0.6424, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.5412667946257198, |
|
"grad_norm": 0.173828125, |
|
"learning_rate": 2.2936660268714014e-05, |
|
"loss": 0.6316, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.5422264875239923, |
|
"grad_norm": 0.2890625, |
|
"learning_rate": 2.2888675623800384e-05, |
|
"loss": 0.6362, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.5431861804222649, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 2.2840690978886757e-05, |
|
"loss": 0.6916, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.5441458733205374, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 2.279270633397313e-05, |
|
"loss": 0.6169, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.54510556621881, |
|
"grad_norm": 0.201171875, |
|
"learning_rate": 2.2744721689059504e-05, |
|
"loss": 0.6349, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.5460652591170825, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 2.2696737044145873e-05, |
|
"loss": 0.7681, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.5470249520153551, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 2.2648752399232247e-05, |
|
"loss": 0.6704, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.5479846449136276, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 2.260076775431862e-05, |
|
"loss": 0.657, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.5489443378119002, |
|
"grad_norm": 0.21484375, |
|
"learning_rate": 2.255278310940499e-05, |
|
"loss": 0.6165, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.5499040307101728, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 2.2504798464491363e-05, |
|
"loss": 0.6614, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.5508637236084453, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 2.2456813819577733e-05, |
|
"loss": 0.7066, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.5518234165067178, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 2.240882917466411e-05, |
|
"loss": 0.7623, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.5527831094049904, |
|
"grad_norm": 0.1669921875, |
|
"learning_rate": 2.236084452975048e-05, |
|
"loss": 0.6818, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.553742802303263, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 2.2312859884836853e-05, |
|
"loss": 0.6859, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.5547024952015355, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 2.2264875239923223e-05, |
|
"loss": 0.7791, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.555662188099808, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 2.22168905950096e-05, |
|
"loss": 0.6485, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.5566218809980806, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 2.216890595009597e-05, |
|
"loss": 0.6189, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5575815738963532, |
|
"grad_norm": 0.177734375, |
|
"learning_rate": 2.2120921305182343e-05, |
|
"loss": 0.6253, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.5585412667946257, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 2.2072936660268713e-05, |
|
"loss": 0.6484, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.5595009596928983, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 2.2024952015355087e-05, |
|
"loss": 0.6793, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.5604606525911708, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 2.197696737044146e-05, |
|
"loss": 0.7062, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.5614203454894434, |
|
"grad_norm": 0.220703125, |
|
"learning_rate": 2.1928982725527833e-05, |
|
"loss": 0.6422, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.5623800383877159, |
|
"grad_norm": 0.1904296875, |
|
"learning_rate": 2.1880998080614203e-05, |
|
"loss": 0.6656, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.5633397312859885, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 2.1833013435700576e-05, |
|
"loss": 0.6325, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.564299424184261, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 2.178502879078695e-05, |
|
"loss": 0.5826, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.5652591170825336, |
|
"grad_norm": 0.216796875, |
|
"learning_rate": 2.1737044145873323e-05, |
|
"loss": 0.6637, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.5662188099808061, |
|
"grad_norm": 0.1904296875, |
|
"learning_rate": 2.1689059500959693e-05, |
|
"loss": 0.7056, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.5671785028790787, |
|
"grad_norm": 0.1904296875, |
|
"learning_rate": 2.1641074856046066e-05, |
|
"loss": 0.6351, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.5681381957773513, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 2.159309021113244e-05, |
|
"loss": 0.6376, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.5690978886756238, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 2.1545105566218813e-05, |
|
"loss": 0.7302, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.5700575815738963, |
|
"grad_norm": 0.177734375, |
|
"learning_rate": 2.1497120921305183e-05, |
|
"loss": 0.6534, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.5710172744721689, |
|
"grad_norm": 0.193359375, |
|
"learning_rate": 2.1449136276391556e-05, |
|
"loss": 0.7276, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.5719769673704415, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 2.140115163147793e-05, |
|
"loss": 0.7175, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.572936660268714, |
|
"grad_norm": 0.1787109375, |
|
"learning_rate": 2.13531669865643e-05, |
|
"loss": 0.6521, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.5738963531669866, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 2.1305182341650673e-05, |
|
"loss": 0.6608, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.5748560460652591, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 2.1257197696737043e-05, |
|
"loss": 0.6794, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.5758157389635317, |
|
"grad_norm": 0.1923828125, |
|
"learning_rate": 2.1209213051823416e-05, |
|
"loss": 0.6665, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5767754318618042, |
|
"grad_norm": 0.1904296875, |
|
"learning_rate": 2.116122840690979e-05, |
|
"loss": 0.7426, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.5777351247600768, |
|
"grad_norm": 0.2021484375, |
|
"learning_rate": 2.1113243761996163e-05, |
|
"loss": 0.7074, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.5786948176583493, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 2.1065259117082533e-05, |
|
"loss": 0.6269, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.5796545105566219, |
|
"grad_norm": 0.17578125, |
|
"learning_rate": 2.1017274472168906e-05, |
|
"loss": 0.6592, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.5806142034548945, |
|
"grad_norm": 0.17578125, |
|
"learning_rate": 2.096928982725528e-05, |
|
"loss": 0.653, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.581573896353167, |
|
"grad_norm": 0.17578125, |
|
"learning_rate": 2.0921305182341653e-05, |
|
"loss": 0.6614, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.5825335892514395, |
|
"grad_norm": 0.1728515625, |
|
"learning_rate": 2.0873320537428023e-05, |
|
"loss": 0.695, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.5834932821497121, |
|
"grad_norm": 0.1806640625, |
|
"learning_rate": 2.0825335892514396e-05, |
|
"loss": 0.6617, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.5844529750479847, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 2.077735124760077e-05, |
|
"loss": 0.6105, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.5854126679462572, |
|
"grad_norm": 0.1826171875, |
|
"learning_rate": 2.0729366602687143e-05, |
|
"loss": 0.6862, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5863723608445297, |
|
"grad_norm": 0.1767578125, |
|
"learning_rate": 2.0681381957773513e-05, |
|
"loss": 0.6923, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.5873320537428023, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 2.0633397312859886e-05, |
|
"loss": 0.6701, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.5882917466410749, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 2.058541266794626e-05, |
|
"loss": 0.7386, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.5892514395393474, |
|
"grad_norm": 0.193359375, |
|
"learning_rate": 2.0537428023032633e-05, |
|
"loss": 0.6568, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.5902111324376199, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 2.0489443378119003e-05, |
|
"loss": 0.64, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.5911708253358925, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 2.0441458733205376e-05, |
|
"loss": 0.633, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.5921305182341651, |
|
"grad_norm": 0.220703125, |
|
"learning_rate": 2.0393474088291746e-05, |
|
"loss": 0.5945, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.5930902111324377, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 2.0345489443378122e-05, |
|
"loss": 0.6024, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.5940499040307101, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 2.0297504798464492e-05, |
|
"loss": 0.6571, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.5950095969289827, |
|
"grad_norm": 0.208984375, |
|
"learning_rate": 2.0249520153550866e-05, |
|
"loss": 0.6426, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.5959692898272553, |
|
"grad_norm": 0.1826171875, |
|
"learning_rate": 2.0201535508637236e-05, |
|
"loss": 0.6087, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.5969289827255279, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 2.015355086372361e-05, |
|
"loss": 0.6481, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.5978886756238004, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 2.0105566218809982e-05, |
|
"loss": 0.712, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.5988483685220729, |
|
"grad_norm": 0.2099609375, |
|
"learning_rate": 2.0057581573896352e-05, |
|
"loss": 0.6061, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.5998080614203455, |
|
"grad_norm": 0.1787109375, |
|
"learning_rate": 2.0009596928982726e-05, |
|
"loss": 0.6566, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.6007677543186181, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 1.99616122840691e-05, |
|
"loss": 0.682, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.6017274472168906, |
|
"grad_norm": 0.1904296875, |
|
"learning_rate": 1.9913627639155472e-05, |
|
"loss": 0.6136, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.6026871401151631, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 1.9865642994241842e-05, |
|
"loss": 0.6502, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.6036468330134357, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 1.9817658349328216e-05, |
|
"loss": 0.6402, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.6046065259117083, |
|
"grad_norm": 0.208984375, |
|
"learning_rate": 1.976967370441459e-05, |
|
"loss": 0.7516, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.6055662188099808, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 1.9721689059500962e-05, |
|
"loss": 0.7179, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.6065259117082533, |
|
"grad_norm": 0.1943359375, |
|
"learning_rate": 1.9673704414587332e-05, |
|
"loss": 0.6649, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.6074856046065259, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 1.9625719769673705e-05, |
|
"loss": 0.6738, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.6084452975047985, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 1.957773512476008e-05, |
|
"loss": 0.6455, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.6094049904030711, |
|
"grad_norm": 0.1767578125, |
|
"learning_rate": 1.9529750479846452e-05, |
|
"loss": 0.6375, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.6103646833013435, |
|
"grad_norm": 0.201171875, |
|
"learning_rate": 1.9481765834932822e-05, |
|
"loss": 0.6934, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.6113243761996161, |
|
"grad_norm": 0.173828125, |
|
"learning_rate": 1.9433781190019195e-05, |
|
"loss": 0.702, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.6122840690978887, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 1.9385796545105565e-05, |
|
"loss": 0.6226, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.6132437619961613, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 1.9337811900191942e-05, |
|
"loss": 0.6289, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.6142034548944337, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 1.9289827255278312e-05, |
|
"loss": 0.6697, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6151631477927063, |
|
"grad_norm": 0.1923828125, |
|
"learning_rate": 1.9241842610364685e-05, |
|
"loss": 0.6255, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.6161228406909789, |
|
"grad_norm": 0.1953125, |
|
"learning_rate": 1.9193857965451055e-05, |
|
"loss": 0.6304, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.6170825335892515, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 1.914587332053743e-05, |
|
"loss": 0.6377, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.6180422264875239, |
|
"grad_norm": 0.1806640625, |
|
"learning_rate": 1.9097888675623802e-05, |
|
"loss": 0.6029, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.6190019193857965, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 1.9049904030710175e-05, |
|
"loss": 0.6464, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.6199616122840691, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 1.9001919385796545e-05, |
|
"loss": 0.6886, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.6209213051823417, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 1.895393474088292e-05, |
|
"loss": 0.6223, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.6218809980806143, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 1.8905950095969292e-05, |
|
"loss": 0.6896, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.6228406909788867, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 1.8857965451055662e-05, |
|
"loss": 0.6286, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.6238003838771593, |
|
"grad_norm": 0.1767578125, |
|
"learning_rate": 1.8809980806142035e-05, |
|
"loss": 0.6154, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6247600767754319, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 1.876199616122841e-05, |
|
"loss": 0.6716, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.6257197696737045, |
|
"grad_norm": 0.2001953125, |
|
"learning_rate": 1.8714011516314782e-05, |
|
"loss": 0.6663, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.6266794625719769, |
|
"grad_norm": 0.173828125, |
|
"learning_rate": 1.866602687140115e-05, |
|
"loss": 0.6428, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.6276391554702495, |
|
"grad_norm": 0.1767578125, |
|
"learning_rate": 1.8618042226487525e-05, |
|
"loss": 0.6735, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.6285988483685221, |
|
"grad_norm": 0.1962890625, |
|
"learning_rate": 1.8570057581573895e-05, |
|
"loss": 0.6826, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.6295585412667947, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 1.852207293666027e-05, |
|
"loss": 0.6801, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.6305182341650671, |
|
"grad_norm": 0.1806640625, |
|
"learning_rate": 1.847408829174664e-05, |
|
"loss": 0.6533, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.6314779270633397, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 1.8426103646833015e-05, |
|
"loss": 0.7034, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.6324376199616123, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 1.8378119001919385e-05, |
|
"loss": 0.5994, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.6333973128598849, |
|
"grad_norm": 0.173828125, |
|
"learning_rate": 1.8330134357005758e-05, |
|
"loss": 0.6696, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.6343570057581573, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 1.828214971209213e-05, |
|
"loss": 0.6168, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.6353166986564299, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 1.8234165067178505e-05, |
|
"loss": 0.6458, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.6362763915547025, |
|
"grad_norm": 0.1826171875, |
|
"learning_rate": 1.8186180422264875e-05, |
|
"loss": 0.6651, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.6372360844529751, |
|
"grad_norm": 0.1904296875, |
|
"learning_rate": 1.8138195777351248e-05, |
|
"loss": 0.5914, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.6381957773512476, |
|
"grad_norm": 0.23828125, |
|
"learning_rate": 1.809021113243762e-05, |
|
"loss": 0.6907, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.6391554702495201, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 1.8042226487523995e-05, |
|
"loss": 0.6052, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.6401151631477927, |
|
"grad_norm": 0.2236328125, |
|
"learning_rate": 1.7994241842610365e-05, |
|
"loss": 0.602, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.6410748560460653, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 1.7946257197696738e-05, |
|
"loss": 0.6327, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.6420345489443378, |
|
"grad_norm": 0.1923828125, |
|
"learning_rate": 1.789827255278311e-05, |
|
"loss": 0.6137, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.6429942418426103, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 1.785028790786948e-05, |
|
"loss": 0.6465, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.6439539347408829, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 1.7802303262955855e-05, |
|
"loss": 0.606, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.6449136276391555, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 1.7754318618042225e-05, |
|
"loss": 0.6425, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.6458733205374281, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 1.77063339731286e-05, |
|
"loss": 0.7085, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.6468330134357005, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 1.765834932821497e-05, |
|
"loss": 0.6124, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.6477927063339731, |
|
"grad_norm": 0.193359375, |
|
"learning_rate": 1.7610364683301345e-05, |
|
"loss": 0.6603, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.6487523992322457, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 1.7562380038387714e-05, |
|
"loss": 0.6491, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.6497120921305183, |
|
"grad_norm": 0.1904296875, |
|
"learning_rate": 1.751439539347409e-05, |
|
"loss": 0.6662, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.6506717850287908, |
|
"grad_norm": 0.1982421875, |
|
"learning_rate": 1.746641074856046e-05, |
|
"loss": 0.5911, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.6516314779270633, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 1.7418426103646834e-05, |
|
"loss": 0.6353, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.6525911708253359, |
|
"grad_norm": 0.1943359375, |
|
"learning_rate": 1.7370441458733204e-05, |
|
"loss": 0.6741, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.6535508637236085, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 1.7322456813819578e-05, |
|
"loss": 0.6571, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.654510556621881, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 1.727447216890595e-05, |
|
"loss": 0.6054, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.6554702495201535, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 1.7226487523992324e-05, |
|
"loss": 0.6947, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.6564299424184261, |
|
"grad_norm": 0.1787109375, |
|
"learning_rate": 1.7178502879078694e-05, |
|
"loss": 0.618, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.6573896353166987, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 1.7130518234165068e-05, |
|
"loss": 0.6234, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.6583493282149712, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 1.708253358925144e-05, |
|
"loss": 0.7353, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.6593090211132437, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 1.7034548944337814e-05, |
|
"loss": 0.7295, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.6602687140115163, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 1.6986564299424184e-05, |
|
"loss": 0.6263, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.6612284069097889, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 1.6938579654510558e-05, |
|
"loss": 0.7171, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.6621880998080614, |
|
"grad_norm": 0.24609375, |
|
"learning_rate": 1.689059500959693e-05, |
|
"loss": 0.6646, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.663147792706334, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 1.6842610364683304e-05, |
|
"loss": 0.7177, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.6641074856046065, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 1.6794625719769674e-05, |
|
"loss": 0.6303, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.6650671785028791, |
|
"grad_norm": 0.201171875, |
|
"learning_rate": 1.6746641074856048e-05, |
|
"loss": 0.6422, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.6660268714011516, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 1.669865642994242e-05, |
|
"loss": 0.6577, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.6669865642994242, |
|
"grad_norm": 0.1767578125, |
|
"learning_rate": 1.665067178502879e-05, |
|
"loss": 0.7325, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.6679462571976967, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 1.6602687140115164e-05, |
|
"loss": 0.6445, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.6689059500959693, |
|
"grad_norm": 0.1923828125, |
|
"learning_rate": 1.6554702495201534e-05, |
|
"loss": 0.6456, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.6698656429942419, |
|
"grad_norm": 0.1787109375, |
|
"learning_rate": 1.6506717850287907e-05, |
|
"loss": 0.5991, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.6708253358925144, |
|
"grad_norm": 0.2216796875, |
|
"learning_rate": 1.645873320537428e-05, |
|
"loss": 0.7291, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.6717850287907869, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 1.6410748560460654e-05, |
|
"loss": 0.6138, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6727447216890595, |
|
"grad_norm": 0.234375, |
|
"learning_rate": 1.6362763915547024e-05, |
|
"loss": 0.7448, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.6737044145873321, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 1.6314779270633397e-05, |
|
"loss": 0.6448, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.6746641074856046, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 1.626679462571977e-05, |
|
"loss": 0.6737, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.6756238003838771, |
|
"grad_norm": 0.177734375, |
|
"learning_rate": 1.6218809980806144e-05, |
|
"loss": 0.6544, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.6765834932821497, |
|
"grad_norm": 0.1826171875, |
|
"learning_rate": 1.6170825335892514e-05, |
|
"loss": 0.6924, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.6775431861804223, |
|
"grad_norm": 0.1826171875, |
|
"learning_rate": 1.6122840690978887e-05, |
|
"loss": 0.6291, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.6785028790786948, |
|
"grad_norm": 0.1904296875, |
|
"learning_rate": 1.607485604606526e-05, |
|
"loss": 0.6303, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.6794625719769674, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 1.6026871401151634e-05, |
|
"loss": 0.6373, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.6804222648752399, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 1.5978886756238004e-05, |
|
"loss": 0.5909, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.6813819577735125, |
|
"grad_norm": 0.1787109375, |
|
"learning_rate": 1.5930902111324377e-05, |
|
"loss": 0.6442, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.682341650671785, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 1.588291746641075e-05, |
|
"loss": 0.7233, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.6833013435700576, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 1.5834932821497124e-05, |
|
"loss": 0.6688, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.6842610364683301, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 1.5786948176583494e-05, |
|
"loss": 0.6431, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.6852207293666027, |
|
"grad_norm": 0.1826171875, |
|
"learning_rate": 1.5738963531669867e-05, |
|
"loss": 0.6273, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.6861804222648752, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 1.5690978886756237e-05, |
|
"loss": 0.6353, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.6871401151631478, |
|
"grad_norm": 0.208984375, |
|
"learning_rate": 1.5642994241842614e-05, |
|
"loss": 0.7089, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.6880998080614203, |
|
"grad_norm": 0.1787109375, |
|
"learning_rate": 1.5595009596928984e-05, |
|
"loss": 0.6341, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.6890595009596929, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 1.5547024952015357e-05, |
|
"loss": 0.6427, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.6900191938579654, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 1.5499040307101727e-05, |
|
"loss": 0.6742, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.690978886756238, |
|
"grad_norm": 0.1943359375, |
|
"learning_rate": 1.54510556621881e-05, |
|
"loss": 0.6445, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.6919385796545106, |
|
"grad_norm": 0.1806640625, |
|
"learning_rate": 1.5403071017274474e-05, |
|
"loss": 0.6915, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.6928982725527831, |
|
"grad_norm": 0.1787109375, |
|
"learning_rate": 1.5355086372360844e-05, |
|
"loss": 0.6877, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.6938579654510557, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 1.5307101727447217e-05, |
|
"loss": 0.6345, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.6948176583493282, |
|
"grad_norm": 0.240234375, |
|
"learning_rate": 1.525911708253359e-05, |
|
"loss": 0.6741, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.6957773512476008, |
|
"grad_norm": 0.197265625, |
|
"learning_rate": 1.5211132437619962e-05, |
|
"loss": 0.5774, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.6967370441458733, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 1.5163147792706333e-05, |
|
"loss": 0.6195, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.6976967370441459, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 1.5115163147792707e-05, |
|
"loss": 0.6503, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.6986564299424184, |
|
"grad_norm": 0.201171875, |
|
"learning_rate": 1.5067178502879078e-05, |
|
"loss": 0.72, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.699616122840691, |
|
"grad_norm": 0.2099609375, |
|
"learning_rate": 1.5019193857965452e-05, |
|
"loss": 0.6388, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.7005758157389635, |
|
"grad_norm": 0.17578125, |
|
"learning_rate": 1.4971209213051823e-05, |
|
"loss": 0.722, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.7015355086372361, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 1.4923224568138197e-05, |
|
"loss": 0.7685, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.7024952015355086, |
|
"grad_norm": 0.1767578125, |
|
"learning_rate": 1.4875239923224568e-05, |
|
"loss": 0.6498, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.7034548944337812, |
|
"grad_norm": 0.1943359375, |
|
"learning_rate": 1.4827255278310942e-05, |
|
"loss": 0.6546, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.7044145873320538, |
|
"grad_norm": 0.2080078125, |
|
"learning_rate": 1.4779270633397313e-05, |
|
"loss": 0.7309, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.7053742802303263, |
|
"grad_norm": 0.193359375, |
|
"learning_rate": 1.4731285988483687e-05, |
|
"loss": 0.6422, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.7063339731285988, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 1.4683301343570058e-05, |
|
"loss": 0.6917, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.7072936660268714, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 1.4635316698656432e-05, |
|
"loss": 0.6573, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.708253358925144, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 1.4587332053742803e-05, |
|
"loss": 0.6355, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.7092130518234165, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 1.4539347408829177e-05, |
|
"loss": 0.6352, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.710172744721689, |
|
"grad_norm": 0.177734375, |
|
"learning_rate": 1.4491362763915548e-05, |
|
"loss": 0.6415, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7111324376199616, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 1.4443378119001921e-05, |
|
"loss": 0.6732, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.7120921305182342, |
|
"grad_norm": 0.2177734375, |
|
"learning_rate": 1.4395393474088293e-05, |
|
"loss": 0.6681, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.7130518234165067, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 1.4347408829174666e-05, |
|
"loss": 0.6902, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.7140115163147792, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 1.4299424184261036e-05, |
|
"loss": 0.7067, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.7149712092130518, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 1.4251439539347408e-05, |
|
"loss": 0.6437, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.7159309021113244, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 1.4203454894433781e-05, |
|
"loss": 0.6672, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.716890595009597, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 1.4155470249520153e-05, |
|
"loss": 0.7454, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.7178502879078695, |
|
"grad_norm": 0.20703125, |
|
"learning_rate": 1.4107485604606526e-05, |
|
"loss": 0.6786, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.718809980806142, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 1.4059500959692898e-05, |
|
"loss": 0.6255, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.7197696737044146, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 1.4011516314779271e-05, |
|
"loss": 0.7794, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7207293666026872, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 1.3963531669865643e-05, |
|
"loss": 0.6911, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.7216890595009597, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 1.3915547024952016e-05, |
|
"loss": 0.6235, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.7226487523992322, |
|
"grad_norm": 0.1943359375, |
|
"learning_rate": 1.3867562380038388e-05, |
|
"loss": 0.6387, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.7236084452975048, |
|
"grad_norm": 0.2158203125, |
|
"learning_rate": 1.3819577735124761e-05, |
|
"loss": 0.7356, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.7245681381957774, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 1.3771593090211133e-05, |
|
"loss": 0.6322, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.72552783109405, |
|
"grad_norm": 0.2080078125, |
|
"learning_rate": 1.3723608445297506e-05, |
|
"loss": 0.6258, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.7264875239923224, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 1.3675623800383878e-05, |
|
"loss": 0.6248, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.727447216890595, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 1.3627639155470251e-05, |
|
"loss": 0.6328, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.7284069097888676, |
|
"grad_norm": 0.1787109375, |
|
"learning_rate": 1.3579654510556623e-05, |
|
"loss": 0.6624, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.7293666026871402, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 1.3531669865642996e-05, |
|
"loss": 0.6588, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.7303262955854126, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 1.3483685220729366e-05, |
|
"loss": 0.6972, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.7312859884836852, |
|
"grad_norm": 0.20703125, |
|
"learning_rate": 1.3435700575815741e-05, |
|
"loss": 0.5945, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.7322456813819578, |
|
"grad_norm": 0.1953125, |
|
"learning_rate": 1.3387715930902111e-05, |
|
"loss": 0.6462, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.7332053742802304, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 1.3339731285988486e-05, |
|
"loss": 0.6181, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.7341650671785028, |
|
"grad_norm": 0.193359375, |
|
"learning_rate": 1.3291746641074856e-05, |
|
"loss": 0.7334, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.7351247600767754, |
|
"grad_norm": 0.220703125, |
|
"learning_rate": 1.3243761996161231e-05, |
|
"loss": 0.6741, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.736084452975048, |
|
"grad_norm": 0.1962890625, |
|
"learning_rate": 1.3195777351247601e-05, |
|
"loss": 0.6938, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.7370441458733206, |
|
"grad_norm": 0.19921875, |
|
"learning_rate": 1.3147792706333976e-05, |
|
"loss": 0.6204, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.738003838771593, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 1.3099808061420346e-05, |
|
"loss": 0.6083, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.7389635316698656, |
|
"grad_norm": 0.1904296875, |
|
"learning_rate": 1.3051823416506717e-05, |
|
"loss": 0.6224, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.7399232245681382, |
|
"grad_norm": 0.17578125, |
|
"learning_rate": 1.300383877159309e-05, |
|
"loss": 0.6501, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.7408829174664108, |
|
"grad_norm": 0.25, |
|
"learning_rate": 1.2955854126679462e-05, |
|
"loss": 0.6908, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.7418426103646834, |
|
"grad_norm": 0.1728515625, |
|
"learning_rate": 1.2907869481765836e-05, |
|
"loss": 0.6874, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.7428023032629558, |
|
"grad_norm": 0.1904296875, |
|
"learning_rate": 1.2859884836852207e-05, |
|
"loss": 0.6379, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.7437619961612284, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 1.281190019193858e-05, |
|
"loss": 0.6241, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.744721689059501, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 1.2763915547024952e-05, |
|
"loss": 0.7043, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.7456813819577736, |
|
"grad_norm": 0.1904296875, |
|
"learning_rate": 1.2715930902111326e-05, |
|
"loss": 0.6731, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.746641074856046, |
|
"grad_norm": 0.1943359375, |
|
"learning_rate": 1.2667946257197696e-05, |
|
"loss": 0.6525, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.7476007677543186, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 1.261996161228407e-05, |
|
"loss": 0.6071, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.7485604606525912, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 1.257197696737044e-05, |
|
"loss": 0.6466, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.7495201535508638, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 1.2523992322456816e-05, |
|
"loss": 0.6391, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.7504798464491362, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 1.2476007677543186e-05, |
|
"loss": 0.5748, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.7514395393474088, |
|
"grad_norm": 0.193359375, |
|
"learning_rate": 1.2428023032629559e-05, |
|
"loss": 0.704, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.7523992322456814, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 1.238003838771593e-05, |
|
"loss": 0.6755, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.753358925143954, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 1.2332053742802304e-05, |
|
"loss": 0.7469, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.7543186180422264, |
|
"grad_norm": 0.19921875, |
|
"learning_rate": 1.2284069097888675e-05, |
|
"loss": 0.6035, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.755278310940499, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 1.2236084452975049e-05, |
|
"loss": 0.6198, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.7562380038387716, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 1.218809980806142e-05, |
|
"loss": 0.6353, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.7571976967370442, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 1.2140115163147794e-05, |
|
"loss": 0.601, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.7581573896353166, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 1.2092130518234165e-05, |
|
"loss": 0.6355, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.7591170825335892, |
|
"grad_norm": 0.1962890625, |
|
"learning_rate": 1.2044145873320537e-05, |
|
"loss": 0.6715, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.7600767754318618, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 1.199616122840691e-05, |
|
"loss": 0.6421, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.7610364683301344, |
|
"grad_norm": 0.201171875, |
|
"learning_rate": 1.1948176583493282e-05, |
|
"loss": 0.657, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.761996161228407, |
|
"grad_norm": 0.197265625, |
|
"learning_rate": 1.1900191938579655e-05, |
|
"loss": 0.6295, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.7629558541266794, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 1.1852207293666027e-05, |
|
"loss": 0.6543, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.763915547024952, |
|
"grad_norm": 0.1923828125, |
|
"learning_rate": 1.18042226487524e-05, |
|
"loss": 0.6595, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.7648752399232246, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 1.1756238003838772e-05, |
|
"loss": 0.6413, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.7658349328214972, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 1.1708253358925145e-05, |
|
"loss": 0.6076, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.7667946257197696, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 1.1660268714011517e-05, |
|
"loss": 0.6734, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.7677543186180422, |
|
"grad_norm": 0.1943359375, |
|
"learning_rate": 1.161228406909789e-05, |
|
"loss": 0.687, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.7687140115163148, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 1.1564299424184262e-05, |
|
"loss": 0.6757, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.7696737044145874, |
|
"grad_norm": 0.1904296875, |
|
"learning_rate": 1.1516314779270635e-05, |
|
"loss": 0.6202, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.7706333973128598, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 1.1468330134357007e-05, |
|
"loss": 0.6245, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.7715930902111324, |
|
"grad_norm": 0.197265625, |
|
"learning_rate": 1.1420345489443378e-05, |
|
"loss": 0.6626, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.772552783109405, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 1.1372360844529752e-05, |
|
"loss": 0.7026, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.7735124760076776, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 1.1324376199616123e-05, |
|
"loss": 0.6806, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.77447216890595, |
|
"grad_norm": 0.1943359375, |
|
"learning_rate": 1.1276391554702495e-05, |
|
"loss": 0.7397, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.7754318618042226, |
|
"grad_norm": 0.1982421875, |
|
"learning_rate": 1.1228406909788867e-05, |
|
"loss": 0.6077, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.7763915547024952, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 1.118042226487524e-05, |
|
"loss": 0.5914, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.7773512476007678, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 1.1132437619961612e-05, |
|
"loss": 0.5846, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.7783109404990403, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 1.1084452975047985e-05, |
|
"loss": 0.6449, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.7792706333973128, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 1.1036468330134357e-05, |
|
"loss": 0.6712, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.7802303262955854, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 1.098848368522073e-05, |
|
"loss": 0.6806, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.781190019193858, |
|
"grad_norm": 0.1904296875, |
|
"learning_rate": 1.0940499040307102e-05, |
|
"loss": 0.6603, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.7821497120921305, |
|
"grad_norm": 0.1943359375, |
|
"learning_rate": 1.0892514395393475e-05, |
|
"loss": 0.6292, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.783109404990403, |
|
"grad_norm": 0.1953125, |
|
"learning_rate": 1.0844529750479847e-05, |
|
"loss": 0.6088, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.7840690978886756, |
|
"grad_norm": 0.2119140625, |
|
"learning_rate": 1.079654510556622e-05, |
|
"loss": 0.6789, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.7850287907869482, |
|
"grad_norm": 0.201171875, |
|
"learning_rate": 1.0748560460652591e-05, |
|
"loss": 0.6616, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.7859884836852208, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 1.0700575815738965e-05, |
|
"loss": 0.6143, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.7869481765834933, |
|
"grad_norm": 0.193359375, |
|
"learning_rate": 1.0652591170825336e-05, |
|
"loss": 0.6689, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.7879078694817658, |
|
"grad_norm": 0.1767578125, |
|
"learning_rate": 1.0604606525911708e-05, |
|
"loss": 0.704, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.7888675623800384, |
|
"grad_norm": 0.193359375, |
|
"learning_rate": 1.0556621880998081e-05, |
|
"loss": 0.5669, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.789827255278311, |
|
"grad_norm": 0.23828125, |
|
"learning_rate": 1.0508637236084453e-05, |
|
"loss": 0.5997, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.7907869481765835, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 1.0460652591170826e-05, |
|
"loss": 0.6471, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.791746641074856, |
|
"grad_norm": 0.1923828125, |
|
"learning_rate": 1.0412667946257198e-05, |
|
"loss": 0.5961, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.7927063339731286, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 1.0364683301343571e-05, |
|
"loss": 0.7073, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.7936660268714012, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 1.0316698656429943e-05, |
|
"loss": 0.6735, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.7946257197696737, |
|
"grad_norm": 0.1806640625, |
|
"learning_rate": 1.0268714011516316e-05, |
|
"loss": 0.6317, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.7955854126679462, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 1.0220729366602688e-05, |
|
"loss": 0.6551, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.7965451055662188, |
|
"grad_norm": 0.173828125, |
|
"learning_rate": 1.0172744721689061e-05, |
|
"loss": 0.6298, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.7975047984644914, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 1.0124760076775433e-05, |
|
"loss": 0.6228, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.7984644913627639, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 1.0076775431861805e-05, |
|
"loss": 0.6468, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.7994241842610365, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 1.0028790786948176e-05, |
|
"loss": 0.6267, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.800383877159309, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 9.98080614203455e-06, |
|
"loss": 0.6171, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.8013435700575816, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 9.932821497120921e-06, |
|
"loss": 0.6174, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.8023032629558541, |
|
"grad_norm": 0.1826171875, |
|
"learning_rate": 9.884836852207294e-06, |
|
"loss": 0.7022, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.8032629558541267, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 9.836852207293666e-06, |
|
"loss": 0.6331, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.8042226487523992, |
|
"grad_norm": 0.1826171875, |
|
"learning_rate": 9.78886756238004e-06, |
|
"loss": 0.6398, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.8051823416506718, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 9.740882917466411e-06, |
|
"loss": 0.6572, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.8061420345489443, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 9.692898272552783e-06, |
|
"loss": 0.6728, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.8071017274472169, |
|
"grad_norm": 0.2275390625, |
|
"learning_rate": 9.644913627639156e-06, |
|
"loss": 0.7734, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.8080614203454894, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 9.596928982725528e-06, |
|
"loss": 0.6028, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.809021113243762, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 9.548944337811901e-06, |
|
"loss": 0.6531, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.8099808061420346, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 9.500959692898273e-06, |
|
"loss": 0.7045, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.8109404990403071, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 9.452975047984646e-06, |
|
"loss": 0.636, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.8119001919385797, |
|
"grad_norm": 0.205078125, |
|
"learning_rate": 9.404990403071018e-06, |
|
"loss": 0.632, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.8128598848368522, |
|
"grad_norm": 0.2109375, |
|
"learning_rate": 9.357005758157391e-06, |
|
"loss": 0.6391, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.8138195777351248, |
|
"grad_norm": 0.1962890625, |
|
"learning_rate": 9.309021113243763e-06, |
|
"loss": 0.6591, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.8147792706333973, |
|
"grad_norm": 0.1953125, |
|
"learning_rate": 9.261036468330136e-06, |
|
"loss": 0.6406, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.8157389635316699, |
|
"grad_norm": 0.1923828125, |
|
"learning_rate": 9.213051823416507e-06, |
|
"loss": 0.6303, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.8166986564299424, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 9.165067178502879e-06, |
|
"loss": 0.6677, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.817658349328215, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 9.117082533589252e-06, |
|
"loss": 0.6476, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.8186180422264875, |
|
"grad_norm": 0.1982421875, |
|
"learning_rate": 9.069097888675624e-06, |
|
"loss": 0.7144, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.8195777351247601, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 9.021113243761997e-06, |
|
"loss": 0.6014, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.8205374280230326, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 8.973128598848369e-06, |
|
"loss": 0.6275, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.8214971209213052, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 8.92514395393474e-06, |
|
"loss": 0.7131, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.8224568138195777, |
|
"grad_norm": 0.2373046875, |
|
"learning_rate": 8.877159309021112e-06, |
|
"loss": 0.7126, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.8234165067178503, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 8.829174664107486e-06, |
|
"loss": 0.6512, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.8243761996161229, |
|
"grad_norm": 0.1953125, |
|
"learning_rate": 8.781190019193857e-06, |
|
"loss": 0.6421, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.8253358925143954, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 8.73320537428023e-06, |
|
"loss": 0.7562, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.8262955854126679, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 8.685220729366602e-06, |
|
"loss": 0.676, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.8272552783109405, |
|
"grad_norm": 0.1982421875, |
|
"learning_rate": 8.637236084452976e-06, |
|
"loss": 0.6452, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.8282149712092131, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 8.589251439539347e-06, |
|
"loss": 0.6486, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.8291746641074856, |
|
"grad_norm": 0.1904296875, |
|
"learning_rate": 8.54126679462572e-06, |
|
"loss": 0.6452, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.8301343570057581, |
|
"grad_norm": 0.2275390625, |
|
"learning_rate": 8.493282149712092e-06, |
|
"loss": 0.6602, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.8310940499040307, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 8.445297504798465e-06, |
|
"loss": 0.6752, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.8320537428023033, |
|
"grad_norm": 0.1953125, |
|
"learning_rate": 8.397312859884837e-06, |
|
"loss": 0.6572, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.8330134357005758, |
|
"grad_norm": 0.193359375, |
|
"learning_rate": 8.34932821497121e-06, |
|
"loss": 0.6129, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.8339731285988484, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 8.301343570057582e-06, |
|
"loss": 0.6541, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.8349328214971209, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 8.253358925143954e-06, |
|
"loss": 0.7138, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.8358925143953935, |
|
"grad_norm": 0.2021484375, |
|
"learning_rate": 8.205374280230327e-06, |
|
"loss": 0.5956, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.836852207293666, |
|
"grad_norm": 0.1943359375, |
|
"learning_rate": 8.157389635316699e-06, |
|
"loss": 0.6929, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.8378119001919386, |
|
"grad_norm": 0.1826171875, |
|
"learning_rate": 8.109404990403072e-06, |
|
"loss": 0.6584, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.8387715930902111, |
|
"grad_norm": 0.19921875, |
|
"learning_rate": 8.061420345489444e-06, |
|
"loss": 0.706, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.8397312859884837, |
|
"grad_norm": 0.2021484375, |
|
"learning_rate": 8.013435700575817e-06, |
|
"loss": 0.6272, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.8406909788867563, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 7.965451055662189e-06, |
|
"loss": 0.6094, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.8416506717850288, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 7.917466410748562e-06, |
|
"loss": 0.6238, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.8426103646833013, |
|
"grad_norm": 0.1923828125, |
|
"learning_rate": 7.869481765834934e-06, |
|
"loss": 0.6495, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.8435700575815739, |
|
"grad_norm": 0.1904296875, |
|
"learning_rate": 7.821497120921307e-06, |
|
"loss": 0.623, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.8445297504798465, |
|
"grad_norm": 0.197265625, |
|
"learning_rate": 7.773512476007678e-06, |
|
"loss": 0.6089, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.845489443378119, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 7.72552783109405e-06, |
|
"loss": 0.6583, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.8464491362763915, |
|
"grad_norm": 0.208984375, |
|
"learning_rate": 7.677543186180422e-06, |
|
"loss": 0.5981, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.8474088291746641, |
|
"grad_norm": 0.2001953125, |
|
"learning_rate": 7.629558541266795e-06, |
|
"loss": 0.6403, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.8483685220729367, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 7.581573896353167e-06, |
|
"loss": 0.6056, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.8493282149712092, |
|
"grad_norm": 0.1943359375, |
|
"learning_rate": 7.533589251439539e-06, |
|
"loss": 0.641, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.8502879078694817, |
|
"grad_norm": 0.197265625, |
|
"learning_rate": 7.485604606525912e-06, |
|
"loss": 0.6395, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.8512476007677543, |
|
"grad_norm": 0.1923828125, |
|
"learning_rate": 7.437619961612284e-06, |
|
"loss": 0.6442, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.8522072936660269, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 7.389635316698657e-06, |
|
"loss": 0.7474, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.8531669865642995, |
|
"grad_norm": 0.2060546875, |
|
"learning_rate": 7.341650671785029e-06, |
|
"loss": 0.7601, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.8541266794625719, |
|
"grad_norm": 0.19921875, |
|
"learning_rate": 7.293666026871402e-06, |
|
"loss": 0.6018, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.8550863723608445, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 7.245681381957774e-06, |
|
"loss": 0.678, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.8560460652591171, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 7.1976967370441466e-06, |
|
"loss": 0.6355, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.8570057581573897, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 7.149712092130518e-06, |
|
"loss": 0.6635, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.8579654510556622, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 7.101727447216891e-06, |
|
"loss": 0.6118, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.8589251439539347, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 7.053742802303263e-06, |
|
"loss": 0.6892, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.8598848368522073, |
|
"grad_norm": 0.1923828125, |
|
"learning_rate": 7.005758157389636e-06, |
|
"loss": 0.6305, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.8608445297504799, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 6.957773512476008e-06, |
|
"loss": 0.617, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.8618042226487524, |
|
"grad_norm": 0.1953125, |
|
"learning_rate": 6.909788867562381e-06, |
|
"loss": 0.6202, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.8627639155470249, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 6.861804222648753e-06, |
|
"loss": 0.6548, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.8637236084452975, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 6.8138195777351256e-06, |
|
"loss": 0.6238, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.8646833013435701, |
|
"grad_norm": 0.2392578125, |
|
"learning_rate": 6.765834932821498e-06, |
|
"loss": 0.6827, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.8656429942418427, |
|
"grad_norm": 0.201171875, |
|
"learning_rate": 6.7178502879078705e-06, |
|
"loss": 0.6924, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.8666026871401151, |
|
"grad_norm": 0.193359375, |
|
"learning_rate": 6.669865642994243e-06, |
|
"loss": 0.6776, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.8675623800383877, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 6.6218809980806155e-06, |
|
"loss": 0.6477, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.8685220729366603, |
|
"grad_norm": 0.2333984375, |
|
"learning_rate": 6.573896353166988e-06, |
|
"loss": 0.6498, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.8694817658349329, |
|
"grad_norm": 0.1953125, |
|
"learning_rate": 6.525911708253359e-06, |
|
"loss": 0.6242, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.8704414587332053, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 6.477927063339731e-06, |
|
"loss": 0.6607, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.8714011516314779, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 6.429942418426104e-06, |
|
"loss": 0.6355, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.8723608445297505, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 6.381957773512476e-06, |
|
"loss": 0.7027, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.8733205374280231, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 6.333973128598848e-06, |
|
"loss": 0.6315, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.8742802303262955, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 6.28598848368522e-06, |
|
"loss": 0.5826, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.8752399232245681, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 6.238003838771593e-06, |
|
"loss": 0.6937, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.8761996161228407, |
|
"grad_norm": 0.205078125, |
|
"learning_rate": 6.190019193857965e-06, |
|
"loss": 0.6438, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.8771593090211133, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 6.142034548944338e-06, |
|
"loss": 0.6252, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.8781190019193857, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 6.09404990403071e-06, |
|
"loss": 0.6701, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.8790786948176583, |
|
"grad_norm": 0.1923828125, |
|
"learning_rate": 6.046065259117083e-06, |
|
"loss": 0.6044, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.8800383877159309, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 5.998080614203455e-06, |
|
"loss": 0.6991, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.8809980806142035, |
|
"grad_norm": 0.2001953125, |
|
"learning_rate": 5.950095969289828e-06, |
|
"loss": 0.6853, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.8819577735124761, |
|
"grad_norm": 0.208984375, |
|
"learning_rate": 5.9021113243762e-06, |
|
"loss": 0.5957, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.8829174664107485, |
|
"grad_norm": 0.1943359375, |
|
"learning_rate": 5.854126679462573e-06, |
|
"loss": 0.6777, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.8838771593090211, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 5.806142034548945e-06, |
|
"loss": 0.6569, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.8848368522072937, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 5.758157389635318e-06, |
|
"loss": 0.6315, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.8857965451055663, |
|
"grad_norm": 0.197265625, |
|
"learning_rate": 5.710172744721689e-06, |
|
"loss": 0.655, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.8867562380038387, |
|
"grad_norm": 0.2158203125, |
|
"learning_rate": 5.662188099808062e-06, |
|
"loss": 0.6469, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.8877159309021113, |
|
"grad_norm": 0.1923828125, |
|
"learning_rate": 5.614203454894433e-06, |
|
"loss": 0.615, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.8886756238003839, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 5.566218809980806e-06, |
|
"loss": 0.6481, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.8896353166986565, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 5.518234165067178e-06, |
|
"loss": 0.6018, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.8905950095969289, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 5.470249520153551e-06, |
|
"loss": 0.6369, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.8915547024952015, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 5.422264875239923e-06, |
|
"loss": 0.614, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.8925143953934741, |
|
"grad_norm": 0.2021484375, |
|
"learning_rate": 5.374280230326296e-06, |
|
"loss": 0.6087, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.8934740882917467, |
|
"grad_norm": 0.1953125, |
|
"learning_rate": 5.326295585412668e-06, |
|
"loss": 0.6486, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.8944337811900192, |
|
"grad_norm": 0.2080078125, |
|
"learning_rate": 5.278310940499041e-06, |
|
"loss": 0.6569, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.8953934740882917, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 5.230326295585413e-06, |
|
"loss": 0.6594, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.8963531669865643, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 5.182341650671786e-06, |
|
"loss": 0.6214, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.8973128598848369, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 5.134357005758158e-06, |
|
"loss": 0.6567, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.8982725527831094, |
|
"grad_norm": 0.1923828125, |
|
"learning_rate": 5.086372360844531e-06, |
|
"loss": 0.7011, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.8992322456813819, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 5.038387715930902e-06, |
|
"loss": 0.664, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.9001919385796545, |
|
"grad_norm": 0.2177734375, |
|
"learning_rate": 4.990403071017275e-06, |
|
"loss": 0.6461, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.9011516314779271, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 4.942418426103647e-06, |
|
"loss": 0.7291, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.9021113243761996, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 4.89443378119002e-06, |
|
"loss": 0.645, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.9030710172744721, |
|
"grad_norm": 0.201171875, |
|
"learning_rate": 4.846449136276391e-06, |
|
"loss": 0.6564, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.9040307101727447, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 4.798464491362764e-06, |
|
"loss": 0.6059, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.9049904030710173, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 4.750479846449136e-06, |
|
"loss": 0.6725, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.9059500959692899, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 4.702495201535509e-06, |
|
"loss": 0.696, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.9069097888675623, |
|
"grad_norm": 0.1943359375, |
|
"learning_rate": 4.654510556621881e-06, |
|
"loss": 0.7019, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.9078694817658349, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 4.606525911708254e-06, |
|
"loss": 0.6446, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.9088291746641075, |
|
"grad_norm": 0.203125, |
|
"learning_rate": 4.558541266794626e-06, |
|
"loss": 0.6118, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.9097888675623801, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 4.510556621880999e-06, |
|
"loss": 0.7057, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.9107485604606526, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 4.46257197696737e-06, |
|
"loss": 0.5763, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.9117082533589251, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 4.414587332053743e-06, |
|
"loss": 0.7103, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.9126679462571977, |
|
"grad_norm": 0.193359375, |
|
"learning_rate": 4.366602687140115e-06, |
|
"loss": 0.6363, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.9136276391554703, |
|
"grad_norm": 0.234375, |
|
"learning_rate": 4.318618042226488e-06, |
|
"loss": 0.7067, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.9145873320537428, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 4.27063339731286e-06, |
|
"loss": 0.5826, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.9155470249520153, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 4.222648752399233e-06, |
|
"loss": 0.6715, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.9165067178502879, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 4.174664107485605e-06, |
|
"loss": 0.752, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.9174664107485605, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 4.126679462571977e-06, |
|
"loss": 0.6337, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.918426103646833, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 4.078694817658349e-06, |
|
"loss": 0.6485, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.9193857965451055, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 4.030710172744722e-06, |
|
"loss": 0.6394, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.9203454894433781, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 3.982725527831094e-06, |
|
"loss": 0.6874, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.9213051823416507, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 3.934740882917467e-06, |
|
"loss": 0.6199, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.9222648752399232, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 3.886756238003839e-06, |
|
"loss": 0.6185, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.9232245681381958, |
|
"grad_norm": 0.193359375, |
|
"learning_rate": 3.838771593090211e-06, |
|
"loss": 0.6295, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.9241842610364683, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 3.7907869481765834e-06, |
|
"loss": 0.6538, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.9251439539347409, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 3.742802303262956e-06, |
|
"loss": 0.6412, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.9261036468330134, |
|
"grad_norm": 0.1748046875, |
|
"learning_rate": 3.6948176583493283e-06, |
|
"loss": 0.7516, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.927063339731286, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 3.646833013435701e-06, |
|
"loss": 0.6511, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.9280230326295585, |
|
"grad_norm": 0.271484375, |
|
"learning_rate": 3.5988483685220733e-06, |
|
"loss": 0.7215, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.9289827255278311, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 3.5508637236084453e-06, |
|
"loss": 0.6221, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.9299424184261037, |
|
"grad_norm": 0.1826171875, |
|
"learning_rate": 3.502879078694818e-06, |
|
"loss": 0.6791, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.9309021113243762, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 3.4548944337811903e-06, |
|
"loss": 0.631, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.9318618042226487, |
|
"grad_norm": 0.240234375, |
|
"learning_rate": 3.4069097888675628e-06, |
|
"loss": 0.7288, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.9328214971209213, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 3.3589251439539353e-06, |
|
"loss": 0.6897, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.9337811900191939, |
|
"grad_norm": 0.1962890625, |
|
"learning_rate": 3.3109404990403077e-06, |
|
"loss": 0.6469, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.9347408829174664, |
|
"grad_norm": 0.1923828125, |
|
"learning_rate": 3.2629558541266794e-06, |
|
"loss": 0.6288, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.935700575815739, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 3.214971209213052e-06, |
|
"loss": 0.634, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.9366602687140115, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 3.166986564299424e-06, |
|
"loss": 0.6274, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.9376199616122841, |
|
"grad_norm": 0.20703125, |
|
"learning_rate": 3.1190019193857964e-06, |
|
"loss": 0.6977, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.9385796545105566, |
|
"grad_norm": 0.1923828125, |
|
"learning_rate": 3.071017274472169e-06, |
|
"loss": 0.6045, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.9395393474088292, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 3.0230326295585414e-06, |
|
"loss": 0.6448, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.9404990403071017, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 2.975047984644914e-06, |
|
"loss": 0.6329, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.9414587332053743, |
|
"grad_norm": 0.1767578125, |
|
"learning_rate": 2.9270633397312863e-06, |
|
"loss": 0.5696, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.9424184261036468, |
|
"grad_norm": 0.1962890625, |
|
"learning_rate": 2.879078694817659e-06, |
|
"loss": 0.671, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.9433781190019194, |
|
"grad_norm": 0.1826171875, |
|
"learning_rate": 2.831094049904031e-06, |
|
"loss": 0.6083, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.944337811900192, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 2.783109404990403e-06, |
|
"loss": 0.6567, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.9452975047984645, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 2.7351247600767754e-06, |
|
"loss": 0.6734, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.946257197696737, |
|
"grad_norm": 0.193359375, |
|
"learning_rate": 2.687140115163148e-06, |
|
"loss": 0.596, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.9472168905950096, |
|
"grad_norm": 0.1904296875, |
|
"learning_rate": 2.6391554702495203e-06, |
|
"loss": 0.6234, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.9481765834932822, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 2.591170825335893e-06, |
|
"loss": 0.6006, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.9491362763915547, |
|
"grad_norm": 0.212890625, |
|
"learning_rate": 2.5431861804222653e-06, |
|
"loss": 0.7036, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.9500959692898272, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 2.4952015355086374e-06, |
|
"loss": 0.6851, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.9510556621880998, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 2.44721689059501e-06, |
|
"loss": 0.6142, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.9520153550863724, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 2.399232245681382e-06, |
|
"loss": 0.6545, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.9529750479846449, |
|
"grad_norm": 0.2333984375, |
|
"learning_rate": 2.3512476007677544e-06, |
|
"loss": 0.652, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.9539347408829175, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 2.303262955854127e-06, |
|
"loss": 0.604, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.95489443378119, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 2.2552783109404993e-06, |
|
"loss": 0.6274, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.9558541266794626, |
|
"grad_norm": 0.205078125, |
|
"learning_rate": 2.2072936660268714e-06, |
|
"loss": 0.6445, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.9568138195777351, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 2.159309021113244e-06, |
|
"loss": 0.6626, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.9577735124760077, |
|
"grad_norm": 0.2265625, |
|
"learning_rate": 2.1113243761996164e-06, |
|
"loss": 0.6961, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.9587332053742802, |
|
"grad_norm": 0.1806640625, |
|
"learning_rate": 2.0633397312859884e-06, |
|
"loss": 0.575, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.9596928982725528, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 2.015355086372361e-06, |
|
"loss": 0.5857, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.9606525911708254, |
|
"grad_norm": 0.29296875, |
|
"learning_rate": 1.9673704414587334e-06, |
|
"loss": 0.6737, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.9616122840690979, |
|
"grad_norm": 0.19921875, |
|
"learning_rate": 1.9193857965451054e-06, |
|
"loss": 0.7174, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.9625719769673704, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 1.871401151631478e-06, |
|
"loss": 0.5874, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.963531669865643, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 1.8234165067178504e-06, |
|
"loss": 0.5958, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.9644913627639156, |
|
"grad_norm": 0.23828125, |
|
"learning_rate": 1.7754318618042227e-06, |
|
"loss": 0.6721, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.9654510556621881, |
|
"grad_norm": 0.201171875, |
|
"learning_rate": 1.7274472168905951e-06, |
|
"loss": 0.6471, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.9664107485604606, |
|
"grad_norm": 0.177734375, |
|
"learning_rate": 1.6794625719769676e-06, |
|
"loss": 0.6962, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.9673704414587332, |
|
"grad_norm": 0.1962890625, |
|
"learning_rate": 1.6314779270633397e-06, |
|
"loss": 0.6519, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.9683301343570058, |
|
"grad_norm": 0.197265625, |
|
"learning_rate": 1.583493282149712e-06, |
|
"loss": 0.6298, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.9692898272552783, |
|
"grad_norm": 0.23046875, |
|
"learning_rate": 1.5355086372360844e-06, |
|
"loss": 0.768, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.9702495201535508, |
|
"grad_norm": 0.1826171875, |
|
"learning_rate": 1.487523992322457e-06, |
|
"loss": 0.6426, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.9712092130518234, |
|
"grad_norm": 0.2353515625, |
|
"learning_rate": 1.4395393474088294e-06, |
|
"loss": 0.6598, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.972168905950096, |
|
"grad_norm": 0.1826171875, |
|
"learning_rate": 1.3915547024952015e-06, |
|
"loss": 0.5888, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.9731285988483686, |
|
"grad_norm": 0.177734375, |
|
"learning_rate": 1.343570057581574e-06, |
|
"loss": 0.6282, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.974088291746641, |
|
"grad_norm": 0.1767578125, |
|
"learning_rate": 1.2955854126679464e-06, |
|
"loss": 0.654, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.9750479846449136, |
|
"grad_norm": 0.1904296875, |
|
"learning_rate": 1.2476007677543187e-06, |
|
"loss": 0.7413, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.9760076775431862, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 1.199616122840691e-06, |
|
"loss": 0.5933, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.9769673704414588, |
|
"grad_norm": 0.1943359375, |
|
"learning_rate": 1.1516314779270634e-06, |
|
"loss": 0.6595, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.9779270633397313, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 1.1036468330134357e-06, |
|
"loss": 0.6091, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.9788867562380038, |
|
"grad_norm": 0.1943359375, |
|
"learning_rate": 1.0556621880998082e-06, |
|
"loss": 0.6702, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.9798464491362764, |
|
"grad_norm": 0.1962890625, |
|
"learning_rate": 1.0076775431861805e-06, |
|
"loss": 0.6348, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.980806142034549, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 9.596928982725527e-07, |
|
"loss": 0.7239, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.9817658349328215, |
|
"grad_norm": 0.212890625, |
|
"learning_rate": 9.117082533589252e-07, |
|
"loss": 0.6252, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.982725527831094, |
|
"grad_norm": 0.1826171875, |
|
"learning_rate": 8.637236084452976e-07, |
|
"loss": 0.6495, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.9836852207293666, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 8.157389635316698e-07, |
|
"loss": 0.6229, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.9846449136276392, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 7.677543186180422e-07, |
|
"loss": 0.6672, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.9856046065259118, |
|
"grad_norm": 0.1904296875, |
|
"learning_rate": 7.197696737044147e-07, |
|
"loss": 0.6027, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.9865642994241842, |
|
"grad_norm": 0.1904296875, |
|
"learning_rate": 6.71785028790787e-07, |
|
"loss": 0.6075, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.9875239923224568, |
|
"grad_norm": 0.1982421875, |
|
"learning_rate": 6.238003838771593e-07, |
|
"loss": 0.6318, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.9884836852207294, |
|
"grad_norm": 0.1943359375, |
|
"learning_rate": 5.758157389635317e-07, |
|
"loss": 0.6317, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.989443378119002, |
|
"grad_norm": 0.193359375, |
|
"learning_rate": 5.278310940499041e-07, |
|
"loss": 0.6753, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.9904030710172744, |
|
"grad_norm": 0.220703125, |
|
"learning_rate": 4.798464491362764e-07, |
|
"loss": 0.6664, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.991362763915547, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 4.318618042226488e-07, |
|
"loss": 0.6223, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.9923224568138196, |
|
"grad_norm": 0.1904296875, |
|
"learning_rate": 3.838771593090211e-07, |
|
"loss": 0.657, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.9932821497120922, |
|
"grad_norm": 0.1826171875, |
|
"learning_rate": 3.358925143953935e-07, |
|
"loss": 0.7154, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.9942418426103646, |
|
"grad_norm": 0.1806640625, |
|
"learning_rate": 2.8790786948176586e-07, |
|
"loss": 0.6366, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.9952015355086372, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 2.399232245681382e-07, |
|
"loss": 0.7238, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.9961612284069098, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 1.9193857965451055e-07, |
|
"loss": 0.7089, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.9971209213051824, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 1.4395393474088293e-07, |
|
"loss": 0.6592, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.9980806142034548, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 9.596928982725528e-08, |
|
"loss": 0.6543, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.9990403071017274, |
|
"grad_norm": 0.1962890625, |
|
"learning_rate": 4.798464491362764e-08, |
|
"loss": 0.7084, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 0.0, |
|
"loss": 0.6073, |
|
"step": 1042 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 1042, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 0, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.362946722077606e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|