|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.998067259373792, |
|
"eval_steps": 500, |
|
"global_step": 1292, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0015461925009663702, |
|
"grad_norm": 0.305040568113327, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 0.9801, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0030923850019327404, |
|
"grad_norm": 0.3316026031970978, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 1.0582, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.004638577502899111, |
|
"grad_norm": 0.321511834859848, |
|
"learning_rate": 1.5e-06, |
|
"loss": 1.0877, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.006184770003865481, |
|
"grad_norm": 0.32884451746940613, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.1082, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.007730962504831852, |
|
"grad_norm": 0.3244039714336395, |
|
"learning_rate": 2.5e-06, |
|
"loss": 1.1352, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.009277155005798222, |
|
"grad_norm": 0.319381445646286, |
|
"learning_rate": 3e-06, |
|
"loss": 1.0808, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.010823347506764593, |
|
"grad_norm": 0.3265005946159363, |
|
"learning_rate": 3.5000000000000004e-06, |
|
"loss": 1.0554, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.012369540007730962, |
|
"grad_norm": 0.3248363137245178, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.0834, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.013915732508697333, |
|
"grad_norm": 0.3369300365447998, |
|
"learning_rate": 4.5e-06, |
|
"loss": 1.1304, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.015461925009663703, |
|
"grad_norm": 0.3302851915359497, |
|
"learning_rate": 5e-06, |
|
"loss": 1.0683, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.017008117510630073, |
|
"grad_norm": 0.3330378830432892, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 1.1028, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.018554310011596443, |
|
"grad_norm": 0.332900732755661, |
|
"learning_rate": 6e-06, |
|
"loss": 1.1557, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.020100502512562814, |
|
"grad_norm": 0.36040395498275757, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 1.1344, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.021646695013529185, |
|
"grad_norm": 0.37165355682373047, |
|
"learning_rate": 7.000000000000001e-06, |
|
"loss": 1.1522, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.023192887514495556, |
|
"grad_norm": 0.35057052969932556, |
|
"learning_rate": 7.5e-06, |
|
"loss": 1.1397, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.024739080015461924, |
|
"grad_norm": 0.3664647042751312, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.1833, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.026285272516428294, |
|
"grad_norm": 0.38355353474617004, |
|
"learning_rate": 8.500000000000002e-06, |
|
"loss": 1.1925, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.027831465017394665, |
|
"grad_norm": 0.3568861186504364, |
|
"learning_rate": 9e-06, |
|
"loss": 1.1113, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.029377657518361036, |
|
"grad_norm": 0.3547118604183197, |
|
"learning_rate": 9.5e-06, |
|
"loss": 1.1403, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.030923850019327407, |
|
"grad_norm": 0.3746045231819153, |
|
"learning_rate": 1e-05, |
|
"loss": 1.1356, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03247004252029378, |
|
"grad_norm": 0.38165998458862305, |
|
"learning_rate": 1.05e-05, |
|
"loss": 1.1239, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.034016235021260145, |
|
"grad_norm": 0.4228748679161072, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 1.1263, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.03556242752222652, |
|
"grad_norm": 0.36566364765167236, |
|
"learning_rate": 1.1500000000000002e-05, |
|
"loss": 1.1362, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.03710862002319289, |
|
"grad_norm": 0.37338364124298096, |
|
"learning_rate": 1.2e-05, |
|
"loss": 1.1816, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.038654812524159254, |
|
"grad_norm": 0.3412342667579651, |
|
"learning_rate": 1.25e-05, |
|
"loss": 1.1341, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.04020100502512563, |
|
"grad_norm": 0.38790470361709595, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 1.2407, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.041747197526091996, |
|
"grad_norm": 0.38183456659317017, |
|
"learning_rate": 1.3500000000000001e-05, |
|
"loss": 1.1206, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.04329339002705837, |
|
"grad_norm": 0.4037930965423584, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 1.1111, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.04483958252802474, |
|
"grad_norm": 0.3747173547744751, |
|
"learning_rate": 1.45e-05, |
|
"loss": 1.14, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.04638577502899111, |
|
"grad_norm": 0.37349933385849, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.1629, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04793196752995748, |
|
"grad_norm": 0.3699789345264435, |
|
"learning_rate": 1.55e-05, |
|
"loss": 1.1417, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.04947816003092385, |
|
"grad_norm": 0.37245669960975647, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 1.1563, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.05102435253189022, |
|
"grad_norm": 0.3655848503112793, |
|
"learning_rate": 1.65e-05, |
|
"loss": 1.1556, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.05257054503285659, |
|
"grad_norm": 0.3637336194515228, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 1.1589, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.05411673753382296, |
|
"grad_norm": 0.35633373260498047, |
|
"learning_rate": 1.75e-05, |
|
"loss": 1.0969, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.05566293003478933, |
|
"grad_norm": 0.35284438729286194, |
|
"learning_rate": 1.8e-05, |
|
"loss": 1.0921, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.057209122535755705, |
|
"grad_norm": 0.3530278205871582, |
|
"learning_rate": 1.85e-05, |
|
"loss": 1.0982, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.05875531503672207, |
|
"grad_norm": 0.3533940315246582, |
|
"learning_rate": 1.9e-05, |
|
"loss": 1.1329, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.06030150753768844, |
|
"grad_norm": 0.35752803087234497, |
|
"learning_rate": 1.9500000000000003e-05, |
|
"loss": 1.1617, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.061847700038654814, |
|
"grad_norm": 0.34077584743499756, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0956, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06339389253962119, |
|
"grad_norm": 0.36975380778312683, |
|
"learning_rate": 2.05e-05, |
|
"loss": 1.0757, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.06494008504058756, |
|
"grad_norm": 0.36623820662498474, |
|
"learning_rate": 2.1e-05, |
|
"loss": 1.1465, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.06648627754155392, |
|
"grad_norm": 0.3988489508628845, |
|
"learning_rate": 2.15e-05, |
|
"loss": 1.1701, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.06803247004252029, |
|
"grad_norm": 0.37758833169937134, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 1.0647, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.06957866254348666, |
|
"grad_norm": 0.4047185182571411, |
|
"learning_rate": 2.25e-05, |
|
"loss": 1.1057, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.07112485504445304, |
|
"grad_norm": 0.39636239409446716, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"loss": 1.1294, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.0726710475454194, |
|
"grad_norm": 0.4347302317619324, |
|
"learning_rate": 2.35e-05, |
|
"loss": 1.152, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.07421724004638577, |
|
"grad_norm": 0.4483806788921356, |
|
"learning_rate": 2.4e-05, |
|
"loss": 1.1676, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.07576343254735214, |
|
"grad_norm": 0.5394858121871948, |
|
"learning_rate": 2.45e-05, |
|
"loss": 1.2228, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.07730962504831851, |
|
"grad_norm": 0.7282954454421997, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.2397, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07885581754928489, |
|
"grad_norm": 0.3573731482028961, |
|
"learning_rate": 2.5500000000000003e-05, |
|
"loss": 0.9119, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.08040201005025126, |
|
"grad_norm": 0.36452800035476685, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 0.983, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.08194820255121762, |
|
"grad_norm": 0.3743703067302704, |
|
"learning_rate": 2.6500000000000004e-05, |
|
"loss": 0.9428, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.08349439505218399, |
|
"grad_norm": 0.3590448498725891, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 0.9967, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.08504058755315037, |
|
"grad_norm": 0.35900965332984924, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.9687, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.08658678005411674, |
|
"grad_norm": 0.3654124140739441, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 0.9342, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.08813297255508311, |
|
"grad_norm": 0.3421430289745331, |
|
"learning_rate": 2.8499999999999998e-05, |
|
"loss": 0.9207, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.08967916505604948, |
|
"grad_norm": 0.3365703523159027, |
|
"learning_rate": 2.9e-05, |
|
"loss": 0.9487, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.09122535755701584, |
|
"grad_norm": 0.32828226685523987, |
|
"learning_rate": 2.95e-05, |
|
"loss": 0.9236, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.09277155005798222, |
|
"grad_norm": 0.31865429878234863, |
|
"learning_rate": 3e-05, |
|
"loss": 0.9071, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.09431774255894859, |
|
"grad_norm": 0.3250292241573334, |
|
"learning_rate": 3.05e-05, |
|
"loss": 0.9609, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.09586393505991496, |
|
"grad_norm": 0.3177807033061981, |
|
"learning_rate": 3.1e-05, |
|
"loss": 0.9727, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.09741012756088133, |
|
"grad_norm": 0.3273405134677887, |
|
"learning_rate": 3.15e-05, |
|
"loss": 1.0691, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.0989563200618477, |
|
"grad_norm": 0.30870744585990906, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.9726, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.10050251256281408, |
|
"grad_norm": 0.3052951395511627, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.8804, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.10204870506378044, |
|
"grad_norm": 0.2952975928783417, |
|
"learning_rate": 3.3e-05, |
|
"loss": 0.9563, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.10359489756474681, |
|
"grad_norm": 0.29250308871269226, |
|
"learning_rate": 3.35e-05, |
|
"loss": 0.9601, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.10514109006571318, |
|
"grad_norm": 0.2929418683052063, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.8986, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.10668728256667954, |
|
"grad_norm": 0.28594040870666504, |
|
"learning_rate": 3.45e-05, |
|
"loss": 0.8887, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.10823347506764593, |
|
"grad_norm": 0.2964857816696167, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.9407, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1097796675686123, |
|
"grad_norm": 0.31084346771240234, |
|
"learning_rate": 3.55e-05, |
|
"loss": 0.9907, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.11132586006957866, |
|
"grad_norm": 0.31780946254730225, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.9753, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.11287205257054503, |
|
"grad_norm": 0.31742170453071594, |
|
"learning_rate": 3.65e-05, |
|
"loss": 0.9411, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.11441824507151141, |
|
"grad_norm": 0.3252294361591339, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.9973, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.11596443757247778, |
|
"grad_norm": 0.3107585608959198, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.9186, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.11751063007344414, |
|
"grad_norm": 0.32054993510246277, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.9264, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.11905682257441051, |
|
"grad_norm": 0.31898629665374756, |
|
"learning_rate": 3.85e-05, |
|
"loss": 0.9958, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.12060301507537688, |
|
"grad_norm": 0.3400716185569763, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.9793, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.12214920757634326, |
|
"grad_norm": 0.347918838262558, |
|
"learning_rate": 3.9500000000000005e-05, |
|
"loss": 0.914, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.12369540007730963, |
|
"grad_norm": 0.33906570076942444, |
|
"learning_rate": 4e-05, |
|
"loss": 0.93, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.125241592578276, |
|
"grad_norm": 0.31842949986457825, |
|
"learning_rate": 4.05e-05, |
|
"loss": 0.8957, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.12678778507924238, |
|
"grad_norm": 0.3301747739315033, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.9606, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.12833397758020873, |
|
"grad_norm": 0.33865636587142944, |
|
"learning_rate": 4.15e-05, |
|
"loss": 0.9846, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.1298801700811751, |
|
"grad_norm": 0.31991609930992126, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.9574, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.13142636258214146, |
|
"grad_norm": 0.3442661762237549, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.9398, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.13297255508310785, |
|
"grad_norm": 0.33919069170951843, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.9736, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.13451874758407423, |
|
"grad_norm": 0.3447560966014862, |
|
"learning_rate": 4.35e-05, |
|
"loss": 0.9589, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.13606494008504058, |
|
"grad_norm": 0.3504810333251953, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 1.0156, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.13761113258600696, |
|
"grad_norm": 0.35089215636253357, |
|
"learning_rate": 4.4500000000000004e-05, |
|
"loss": 0.9732, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.13915732508697332, |
|
"grad_norm": 0.346599280834198, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.9705, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1407035175879397, |
|
"grad_norm": 0.3983485698699951, |
|
"learning_rate": 4.55e-05, |
|
"loss": 1.0236, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.14224971008890608, |
|
"grad_norm": 0.38158902525901794, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 1.0174, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.14379590258987243, |
|
"grad_norm": 0.3771804869174957, |
|
"learning_rate": 4.6500000000000005e-05, |
|
"loss": 1.012, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.1453420950908388, |
|
"grad_norm": 0.38228315114974976, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.9786, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.14688828759180517, |
|
"grad_norm": 0.3989880681037903, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.98, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.14843448009277155, |
|
"grad_norm": 0.41765791177749634, |
|
"learning_rate": 4.8e-05, |
|
"loss": 1.0502, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.14998067259373793, |
|
"grad_norm": 0.4242067039012909, |
|
"learning_rate": 4.85e-05, |
|
"loss": 1.0911, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.15152686509470428, |
|
"grad_norm": 0.4490616023540497, |
|
"learning_rate": 4.9e-05, |
|
"loss": 1.0791, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.15307305759567066, |
|
"grad_norm": 0.4694664478302002, |
|
"learning_rate": 4.9500000000000004e-05, |
|
"loss": 1.0374, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.15461925009663702, |
|
"grad_norm": 0.7335464954376221, |
|
"learning_rate": 5e-05, |
|
"loss": 1.093, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1561654425976034, |
|
"grad_norm": 0.3335883915424347, |
|
"learning_rate": 4.995805369127517e-05, |
|
"loss": 0.8461, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.15771163509856978, |
|
"grad_norm": 0.3368653357028961, |
|
"learning_rate": 4.9916107382550336e-05, |
|
"loss": 0.8619, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.15925782759953613, |
|
"grad_norm": 0.34530943632125854, |
|
"learning_rate": 4.9874161073825505e-05, |
|
"loss": 0.8399, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.16080402010050251, |
|
"grad_norm": 0.3303908407688141, |
|
"learning_rate": 4.983221476510067e-05, |
|
"loss": 0.891, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.1623502126014689, |
|
"grad_norm": 0.3104991614818573, |
|
"learning_rate": 4.9790268456375845e-05, |
|
"loss": 0.8851, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.16389640510243525, |
|
"grad_norm": 0.3138999342918396, |
|
"learning_rate": 4.974832214765101e-05, |
|
"loss": 0.8668, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.16544259760340163, |
|
"grad_norm": 0.3183957040309906, |
|
"learning_rate": 4.970637583892618e-05, |
|
"loss": 0.8494, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.16698879010436798, |
|
"grad_norm": 0.337296724319458, |
|
"learning_rate": 4.966442953020135e-05, |
|
"loss": 0.872, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.16853498260533437, |
|
"grad_norm": 0.29680609703063965, |
|
"learning_rate": 4.962248322147651e-05, |
|
"loss": 0.8473, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.17008117510630075, |
|
"grad_norm": 0.31526410579681396, |
|
"learning_rate": 4.958053691275168e-05, |
|
"loss": 0.9042, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1716273676072671, |
|
"grad_norm": 0.3531287610530853, |
|
"learning_rate": 4.9538590604026845e-05, |
|
"loss": 0.9444, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.17317356010823348, |
|
"grad_norm": 0.324305921792984, |
|
"learning_rate": 4.9496644295302015e-05, |
|
"loss": 0.8867, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.17471975260919984, |
|
"grad_norm": 0.32134464383125305, |
|
"learning_rate": 4.945469798657718e-05, |
|
"loss": 0.8575, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.17626594511016622, |
|
"grad_norm": 0.32409024238586426, |
|
"learning_rate": 4.9412751677852355e-05, |
|
"loss": 0.8937, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.1778121376111326, |
|
"grad_norm": 0.3491647243499756, |
|
"learning_rate": 4.937080536912752e-05, |
|
"loss": 0.8902, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.17935833011209895, |
|
"grad_norm": 0.3334656357765198, |
|
"learning_rate": 4.932885906040269e-05, |
|
"loss": 0.8788, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.18090452261306533, |
|
"grad_norm": 0.3333323299884796, |
|
"learning_rate": 4.928691275167786e-05, |
|
"loss": 0.907, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.1824507151140317, |
|
"grad_norm": 0.3563230335712433, |
|
"learning_rate": 4.924496644295302e-05, |
|
"loss": 0.9043, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.18399690761499807, |
|
"grad_norm": 0.35443171858787537, |
|
"learning_rate": 4.920302013422819e-05, |
|
"loss": 0.8966, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.18554310011596445, |
|
"grad_norm": 0.36155420541763306, |
|
"learning_rate": 4.9161073825503354e-05, |
|
"loss": 0.903, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1870892926169308, |
|
"grad_norm": 0.3701721131801605, |
|
"learning_rate": 4.9119127516778524e-05, |
|
"loss": 0.8786, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.18863548511789718, |
|
"grad_norm": 0.3697488307952881, |
|
"learning_rate": 4.9077181208053694e-05, |
|
"loss": 0.9279, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.19018167761886354, |
|
"grad_norm": 0.35044190287590027, |
|
"learning_rate": 4.9035234899328864e-05, |
|
"loss": 0.8577, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.19172787011982992, |
|
"grad_norm": 0.3510192632675171, |
|
"learning_rate": 4.8993288590604034e-05, |
|
"loss": 0.8831, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.1932740626207963, |
|
"grad_norm": 0.3509821593761444, |
|
"learning_rate": 4.89513422818792e-05, |
|
"loss": 0.8507, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.19482025512176265, |
|
"grad_norm": 0.3761618137359619, |
|
"learning_rate": 4.890939597315437e-05, |
|
"loss": 0.9151, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.19636644762272903, |
|
"grad_norm": 0.3758131265640259, |
|
"learning_rate": 4.886744966442953e-05, |
|
"loss": 0.9067, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.1979126401236954, |
|
"grad_norm": 0.39383259415626526, |
|
"learning_rate": 4.88255033557047e-05, |
|
"loss": 0.9002, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.19945883262466177, |
|
"grad_norm": 0.37084588408470154, |
|
"learning_rate": 4.878355704697986e-05, |
|
"loss": 0.8459, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.20100502512562815, |
|
"grad_norm": 0.389466792345047, |
|
"learning_rate": 4.874161073825503e-05, |
|
"loss": 0.9218, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2025512176265945, |
|
"grad_norm": 0.38749396800994873, |
|
"learning_rate": 4.86996644295302e-05, |
|
"loss": 0.892, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.20409741012756089, |
|
"grad_norm": 0.36285966634750366, |
|
"learning_rate": 4.865771812080537e-05, |
|
"loss": 0.8458, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.20564360262852724, |
|
"grad_norm": 0.38966134190559387, |
|
"learning_rate": 4.861577181208054e-05, |
|
"loss": 0.9189, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.20718979512949362, |
|
"grad_norm": 0.3792301118373871, |
|
"learning_rate": 4.8573825503355706e-05, |
|
"loss": 0.891, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.20873598763046, |
|
"grad_norm": 0.39519718289375305, |
|
"learning_rate": 4.8531879194630876e-05, |
|
"loss": 0.8946, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.21028218013142636, |
|
"grad_norm": 0.399034708738327, |
|
"learning_rate": 4.848993288590604e-05, |
|
"loss": 0.8692, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.21182837263239274, |
|
"grad_norm": 0.451168954372406, |
|
"learning_rate": 4.844798657718121e-05, |
|
"loss": 0.9005, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.2133745651333591, |
|
"grad_norm": 0.41019031405448914, |
|
"learning_rate": 4.840604026845638e-05, |
|
"loss": 0.9766, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.21492075763432547, |
|
"grad_norm": 0.4109886586666107, |
|
"learning_rate": 4.836409395973154e-05, |
|
"loss": 0.9078, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.21646695013529185, |
|
"grad_norm": 0.4546094834804535, |
|
"learning_rate": 4.832214765100672e-05, |
|
"loss": 0.9621, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2180131426362582, |
|
"grad_norm": 0.44215071201324463, |
|
"learning_rate": 4.828020134228188e-05, |
|
"loss": 0.9965, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.2195593351372246, |
|
"grad_norm": 0.4271162748336792, |
|
"learning_rate": 4.823825503355705e-05, |
|
"loss": 0.9277, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.22110552763819097, |
|
"grad_norm": 0.43438541889190674, |
|
"learning_rate": 4.8196308724832215e-05, |
|
"loss": 0.9727, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.22265172013915732, |
|
"grad_norm": 0.45643556118011475, |
|
"learning_rate": 4.8154362416107385e-05, |
|
"loss": 0.9284, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.2241979126401237, |
|
"grad_norm": 0.5079519748687744, |
|
"learning_rate": 4.8112416107382555e-05, |
|
"loss": 1.004, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.22574410514109006, |
|
"grad_norm": 0.4708511531352997, |
|
"learning_rate": 4.807046979865772e-05, |
|
"loss": 1.001, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.22729029764205644, |
|
"grad_norm": 0.510336697101593, |
|
"learning_rate": 4.802852348993289e-05, |
|
"loss": 0.9808, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.22883649014302282, |
|
"grad_norm": 0.5595572590827942, |
|
"learning_rate": 4.798657718120805e-05, |
|
"loss": 0.9983, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.23038268264398917, |
|
"grad_norm": 0.5763404965400696, |
|
"learning_rate": 4.794463087248323e-05, |
|
"loss": 0.9841, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.23192887514495555, |
|
"grad_norm": 0.9527286887168884, |
|
"learning_rate": 4.790268456375839e-05, |
|
"loss": 0.9827, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2334750676459219, |
|
"grad_norm": 0.35568949580192566, |
|
"learning_rate": 4.786073825503356e-05, |
|
"loss": 0.8084, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.2350212601468883, |
|
"grad_norm": 0.3866717517375946, |
|
"learning_rate": 4.7818791946308725e-05, |
|
"loss": 0.8538, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.23656745264785467, |
|
"grad_norm": 0.3884953260421753, |
|
"learning_rate": 4.7776845637583895e-05, |
|
"loss": 0.853, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.23811364514882102, |
|
"grad_norm": 0.40036800503730774, |
|
"learning_rate": 4.7734899328859064e-05, |
|
"loss": 0.8604, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.2396598376497874, |
|
"grad_norm": 0.4019627273082733, |
|
"learning_rate": 4.769295302013423e-05, |
|
"loss": 0.8409, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.24120603015075376, |
|
"grad_norm": 0.3811728358268738, |
|
"learning_rate": 4.76510067114094e-05, |
|
"loss": 0.8602, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.24275222265172014, |
|
"grad_norm": 0.39744654297828674, |
|
"learning_rate": 4.760906040268457e-05, |
|
"loss": 0.8011, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.24429841515268652, |
|
"grad_norm": 0.38136810064315796, |
|
"learning_rate": 4.756711409395974e-05, |
|
"loss": 0.9004, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.24584460765365287, |
|
"grad_norm": 0.3670859932899475, |
|
"learning_rate": 4.75251677852349e-05, |
|
"loss": 0.8506, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.24739080015461926, |
|
"grad_norm": 0.4202471375465393, |
|
"learning_rate": 4.748322147651007e-05, |
|
"loss": 0.7832, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2489369926555856, |
|
"grad_norm": 0.40722745656967163, |
|
"learning_rate": 4.744127516778524e-05, |
|
"loss": 0.8813, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.250483185156552, |
|
"grad_norm": 0.4037550389766693, |
|
"learning_rate": 4.7399328859060404e-05, |
|
"loss": 0.8328, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.25202937765751837, |
|
"grad_norm": 0.4262521266937256, |
|
"learning_rate": 4.7357382550335574e-05, |
|
"loss": 0.9148, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.25357557015848475, |
|
"grad_norm": 0.40966907143592834, |
|
"learning_rate": 4.731543624161074e-05, |
|
"loss": 0.8689, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.2551217626594511, |
|
"grad_norm": 0.41350632905960083, |
|
"learning_rate": 4.727348993288591e-05, |
|
"loss": 0.8253, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.25666795516041746, |
|
"grad_norm": 0.3822355568408966, |
|
"learning_rate": 4.723154362416108e-05, |
|
"loss": 0.8176, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.25821414766138384, |
|
"grad_norm": 0.4168022572994232, |
|
"learning_rate": 4.718959731543625e-05, |
|
"loss": 0.8821, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.2597603401623502, |
|
"grad_norm": 0.4205927848815918, |
|
"learning_rate": 4.714765100671141e-05, |
|
"loss": 0.9162, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.2613065326633166, |
|
"grad_norm": 0.41793638467788696, |
|
"learning_rate": 4.710570469798658e-05, |
|
"loss": 0.9045, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.26285272516428293, |
|
"grad_norm": 0.41607388854026794, |
|
"learning_rate": 4.706375838926175e-05, |
|
"loss": 0.8474, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.2643989176652493, |
|
"grad_norm": 0.3936135172843933, |
|
"learning_rate": 4.702181208053691e-05, |
|
"loss": 0.8859, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.2659451101662157, |
|
"grad_norm": 0.39022210240364075, |
|
"learning_rate": 4.697986577181208e-05, |
|
"loss": 0.8455, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.2674913026671821, |
|
"grad_norm": 0.40289798378944397, |
|
"learning_rate": 4.6937919463087246e-05, |
|
"loss": 0.9244, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.26903749516814845, |
|
"grad_norm": 0.4135661721229553, |
|
"learning_rate": 4.6895973154362416e-05, |
|
"loss": 0.8427, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.2705836876691148, |
|
"grad_norm": 0.4445662796497345, |
|
"learning_rate": 4.6854026845637586e-05, |
|
"loss": 0.8042, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.27212988017008116, |
|
"grad_norm": 0.42175382375717163, |
|
"learning_rate": 4.6812080536912756e-05, |
|
"loss": 0.8447, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.27367607267104754, |
|
"grad_norm": 0.42236700654029846, |
|
"learning_rate": 4.6770134228187926e-05, |
|
"loss": 0.8964, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.2752222651720139, |
|
"grad_norm": 0.44100165367126465, |
|
"learning_rate": 4.672818791946309e-05, |
|
"loss": 0.7989, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.2767684576729803, |
|
"grad_norm": 0.43265581130981445, |
|
"learning_rate": 4.668624161073826e-05, |
|
"loss": 0.8795, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.27831465017394663, |
|
"grad_norm": 0.43812161684036255, |
|
"learning_rate": 4.664429530201342e-05, |
|
"loss": 0.8984, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.279860842674913, |
|
"grad_norm": 0.4735831916332245, |
|
"learning_rate": 4.660234899328859e-05, |
|
"loss": 0.8868, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.2814070351758794, |
|
"grad_norm": 0.4555661678314209, |
|
"learning_rate": 4.6560402684563755e-05, |
|
"loss": 0.9057, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.2829532276768458, |
|
"grad_norm": 0.44536617398262024, |
|
"learning_rate": 4.6518456375838925e-05, |
|
"loss": 0.8956, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.28449942017781216, |
|
"grad_norm": 0.4430801272392273, |
|
"learning_rate": 4.6476510067114095e-05, |
|
"loss": 0.9171, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.2860456126787785, |
|
"grad_norm": 0.4534691572189331, |
|
"learning_rate": 4.6434563758389265e-05, |
|
"loss": 0.7871, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.28759180517974486, |
|
"grad_norm": 0.46288105845451355, |
|
"learning_rate": 4.6392617449664435e-05, |
|
"loss": 0.8819, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.28913799768071125, |
|
"grad_norm": 0.4744343161582947, |
|
"learning_rate": 4.63506711409396e-05, |
|
"loss": 0.9415, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.2906841901816776, |
|
"grad_norm": 0.46916016936302185, |
|
"learning_rate": 4.630872483221477e-05, |
|
"loss": 0.9436, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.292230382682644, |
|
"grad_norm": 0.4746655821800232, |
|
"learning_rate": 4.626677852348993e-05, |
|
"loss": 0.9039, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.29377657518361033, |
|
"grad_norm": 0.48333853483200073, |
|
"learning_rate": 4.62248322147651e-05, |
|
"loss": 0.9466, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.2953227676845767, |
|
"grad_norm": 0.4735361933708191, |
|
"learning_rate": 4.618288590604027e-05, |
|
"loss": 0.9088, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.2968689601855431, |
|
"grad_norm": 0.484523206949234, |
|
"learning_rate": 4.6140939597315434e-05, |
|
"loss": 0.909, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.2984151526865095, |
|
"grad_norm": 0.5111984610557556, |
|
"learning_rate": 4.609899328859061e-05, |
|
"loss": 0.9451, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.29996134518747586, |
|
"grad_norm": 0.5157277584075928, |
|
"learning_rate": 4.6057046979865774e-05, |
|
"loss": 0.9768, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.3015075376884422, |
|
"grad_norm": 0.5363951325416565, |
|
"learning_rate": 4.6015100671140944e-05, |
|
"loss": 0.9411, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.30305373018940857, |
|
"grad_norm": 0.5306119918823242, |
|
"learning_rate": 4.597315436241611e-05, |
|
"loss": 0.9238, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.30459992269037495, |
|
"grad_norm": 0.5576231479644775, |
|
"learning_rate": 4.593120805369128e-05, |
|
"loss": 0.9866, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.30614611519134133, |
|
"grad_norm": 0.6054885983467102, |
|
"learning_rate": 4.588926174496645e-05, |
|
"loss": 0.9987, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.3076923076923077, |
|
"grad_norm": 0.6553575396537781, |
|
"learning_rate": 4.584731543624161e-05, |
|
"loss": 1.0413, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.30923850019327404, |
|
"grad_norm": 1.023345708847046, |
|
"learning_rate": 4.580536912751678e-05, |
|
"loss": 1.0355, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3107846926942404, |
|
"grad_norm": 0.4243873059749603, |
|
"learning_rate": 4.576342281879195e-05, |
|
"loss": 0.7642, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.3123308851952068, |
|
"grad_norm": 0.4183518886566162, |
|
"learning_rate": 4.572147651006712e-05, |
|
"loss": 0.8115, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.3138770776961732, |
|
"grad_norm": 0.43979495763778687, |
|
"learning_rate": 4.5679530201342284e-05, |
|
"loss": 0.7705, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.31542327019713956, |
|
"grad_norm": 0.44140326976776123, |
|
"learning_rate": 4.5637583892617453e-05, |
|
"loss": 0.7784, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.31696946269810594, |
|
"grad_norm": 0.4253062903881073, |
|
"learning_rate": 4.559563758389262e-05, |
|
"loss": 0.8006, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.31851565519907227, |
|
"grad_norm": 0.41898655891418457, |
|
"learning_rate": 4.5553691275167787e-05, |
|
"loss": 0.8304, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.32006184770003865, |
|
"grad_norm": 0.3986593186855316, |
|
"learning_rate": 4.5511744966442957e-05, |
|
"loss": 0.7815, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.32160804020100503, |
|
"grad_norm": 0.42180249094963074, |
|
"learning_rate": 4.546979865771812e-05, |
|
"loss": 0.859, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.3231542327019714, |
|
"grad_norm": 0.429800808429718, |
|
"learning_rate": 4.542785234899329e-05, |
|
"loss": 0.8188, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.3247004252029378, |
|
"grad_norm": 0.4582015573978424, |
|
"learning_rate": 4.538590604026846e-05, |
|
"loss": 0.8069, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.3262466177039041, |
|
"grad_norm": 0.44153645634651184, |
|
"learning_rate": 4.534395973154363e-05, |
|
"loss": 0.8107, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.3277928102048705, |
|
"grad_norm": 0.4466538429260254, |
|
"learning_rate": 4.530201342281879e-05, |
|
"loss": 0.8076, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.3293390027058369, |
|
"grad_norm": 0.4263439178466797, |
|
"learning_rate": 4.526006711409396e-05, |
|
"loss": 0.8532, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.33088519520680326, |
|
"grad_norm": 0.469927042722702, |
|
"learning_rate": 4.521812080536913e-05, |
|
"loss": 0.8264, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.33243138770776964, |
|
"grad_norm": 0.45597830414772034, |
|
"learning_rate": 4.5176174496644296e-05, |
|
"loss": 0.8125, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.33397758020873597, |
|
"grad_norm": 0.43289533257484436, |
|
"learning_rate": 4.5134228187919466e-05, |
|
"loss": 0.8223, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.33552377270970235, |
|
"grad_norm": 0.4476693272590637, |
|
"learning_rate": 4.509228187919463e-05, |
|
"loss": 0.8206, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.33706996521066873, |
|
"grad_norm": 0.43755078315734863, |
|
"learning_rate": 4.50503355704698e-05, |
|
"loss": 0.8084, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.3386161577116351, |
|
"grad_norm": 0.4320957064628601, |
|
"learning_rate": 4.500838926174497e-05, |
|
"loss": 0.8127, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.3401623502126015, |
|
"grad_norm": 0.45105233788490295, |
|
"learning_rate": 4.496644295302014e-05, |
|
"loss": 0.8208, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3417085427135678, |
|
"grad_norm": 0.42372819781303406, |
|
"learning_rate": 4.49244966442953e-05, |
|
"loss": 0.8685, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.3432547352145342, |
|
"grad_norm": 0.4444120526313782, |
|
"learning_rate": 4.488255033557047e-05, |
|
"loss": 0.8501, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.3448009277155006, |
|
"grad_norm": 0.4355757236480713, |
|
"learning_rate": 4.484060402684564e-05, |
|
"loss": 0.8507, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.34634712021646696, |
|
"grad_norm": 0.45914098620414734, |
|
"learning_rate": 4.4798657718120805e-05, |
|
"loss": 0.9056, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.34789331271743335, |
|
"grad_norm": 0.4455774128437042, |
|
"learning_rate": 4.4756711409395975e-05, |
|
"loss": 0.8716, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.34943950521839967, |
|
"grad_norm": 0.4548904299736023, |
|
"learning_rate": 4.471476510067114e-05, |
|
"loss": 0.7938, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.35098569771936605, |
|
"grad_norm": 0.44709107279777527, |
|
"learning_rate": 4.467281879194631e-05, |
|
"loss": 0.8248, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.35253189022033243, |
|
"grad_norm": 0.4717913866043091, |
|
"learning_rate": 4.463087248322148e-05, |
|
"loss": 0.864, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.3540780827212988, |
|
"grad_norm": 0.48902633786201477, |
|
"learning_rate": 4.458892617449665e-05, |
|
"loss": 0.7545, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.3556242752222652, |
|
"grad_norm": 0.4466339945793152, |
|
"learning_rate": 4.454697986577182e-05, |
|
"loss": 0.8869, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.3571704677232315, |
|
"grad_norm": 0.4503350257873535, |
|
"learning_rate": 4.450503355704698e-05, |
|
"loss": 0.8719, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.3587166602241979, |
|
"grad_norm": 0.4927978217601776, |
|
"learning_rate": 4.446308724832215e-05, |
|
"loss": 0.9214, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.3602628527251643, |
|
"grad_norm": 0.4751432240009308, |
|
"learning_rate": 4.4421140939597314e-05, |
|
"loss": 0.8735, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.36180904522613067, |
|
"grad_norm": 0.4933432638645172, |
|
"learning_rate": 4.4379194630872484e-05, |
|
"loss": 0.8353, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.36335523772709705, |
|
"grad_norm": 0.5034083127975464, |
|
"learning_rate": 4.4337248322147654e-05, |
|
"loss": 0.9168, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.3649014302280634, |
|
"grad_norm": 0.5039856433868408, |
|
"learning_rate": 4.4295302013422824e-05, |
|
"loss": 0.9183, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.36644762272902975, |
|
"grad_norm": 0.47999468445777893, |
|
"learning_rate": 4.4253355704697994e-05, |
|
"loss": 0.9402, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.36799381522999614, |
|
"grad_norm": 0.5011245608329773, |
|
"learning_rate": 4.421140939597316e-05, |
|
"loss": 0.9587, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.3695400077309625, |
|
"grad_norm": 0.49425458908081055, |
|
"learning_rate": 4.416946308724833e-05, |
|
"loss": 0.8474, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.3710862002319289, |
|
"grad_norm": 0.514880359172821, |
|
"learning_rate": 4.412751677852349e-05, |
|
"loss": 0.9016, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.3726323927328952, |
|
"grad_norm": 0.5200977325439453, |
|
"learning_rate": 4.408557046979866e-05, |
|
"loss": 0.9552, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.3741785852338616, |
|
"grad_norm": 0.5420643091201782, |
|
"learning_rate": 4.4043624161073823e-05, |
|
"loss": 0.9023, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.375724777734828, |
|
"grad_norm": 0.5034189820289612, |
|
"learning_rate": 4.4001677852348993e-05, |
|
"loss": 0.9255, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.37727097023579437, |
|
"grad_norm": 0.529698371887207, |
|
"learning_rate": 4.395973154362416e-05, |
|
"loss": 0.9337, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.37881716273676075, |
|
"grad_norm": 0.5248023867607117, |
|
"learning_rate": 4.391778523489933e-05, |
|
"loss": 0.9626, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.3803633552377271, |
|
"grad_norm": 0.5809698700904846, |
|
"learning_rate": 4.38758389261745e-05, |
|
"loss": 0.9598, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.38190954773869346, |
|
"grad_norm": 0.5663429498672485, |
|
"learning_rate": 4.3833892617449666e-05, |
|
"loss": 0.979, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.38345574023965984, |
|
"grad_norm": 0.6147710680961609, |
|
"learning_rate": 4.3791946308724836e-05, |
|
"loss": 0.9747, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.3850019327406262, |
|
"grad_norm": 0.7502771615982056, |
|
"learning_rate": 4.375e-05, |
|
"loss": 0.9344, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.3865481252415926, |
|
"grad_norm": 1.0911471843719482, |
|
"learning_rate": 4.370805369127517e-05, |
|
"loss": 1.0075, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.3880943177425589, |
|
"grad_norm": 0.4232114255428314, |
|
"learning_rate": 4.366610738255034e-05, |
|
"loss": 0.7643, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.3896405102435253, |
|
"grad_norm": 0.423627644777298, |
|
"learning_rate": 4.36241610738255e-05, |
|
"loss": 0.7551, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.3911867027444917, |
|
"grad_norm": 0.4198389947414398, |
|
"learning_rate": 4.358221476510067e-05, |
|
"loss": 0.7888, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.39273289524545807, |
|
"grad_norm": 0.43714022636413574, |
|
"learning_rate": 4.354026845637584e-05, |
|
"loss": 0.7958, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.39427908774642445, |
|
"grad_norm": 0.4278389513492584, |
|
"learning_rate": 4.349832214765101e-05, |
|
"loss": 0.7959, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.3958252802473908, |
|
"grad_norm": 0.45499542355537415, |
|
"learning_rate": 4.3456375838926176e-05, |
|
"loss": 0.7461, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.39737147274835716, |
|
"grad_norm": 0.46723300218582153, |
|
"learning_rate": 4.3414429530201346e-05, |
|
"loss": 0.8312, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.39891766524932354, |
|
"grad_norm": 0.44024282693862915, |
|
"learning_rate": 4.337248322147651e-05, |
|
"loss": 0.8517, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.4004638577502899, |
|
"grad_norm": 0.42471960186958313, |
|
"learning_rate": 4.333053691275168e-05, |
|
"loss": 0.8312, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.4020100502512563, |
|
"grad_norm": 0.47083520889282227, |
|
"learning_rate": 4.328859060402685e-05, |
|
"loss": 0.8618, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.4035562427522226, |
|
"grad_norm": 0.45681217312812805, |
|
"learning_rate": 4.324664429530201e-05, |
|
"loss": 0.8986, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.405102435253189, |
|
"grad_norm": 0.42533236742019653, |
|
"learning_rate": 4.320469798657718e-05, |
|
"loss": 0.824, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.4066486277541554, |
|
"grad_norm": 0.4372871220111847, |
|
"learning_rate": 4.316275167785235e-05, |
|
"loss": 0.8077, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.40819482025512177, |
|
"grad_norm": 0.45690760016441345, |
|
"learning_rate": 4.312080536912752e-05, |
|
"loss": 0.8735, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.40974101275608815, |
|
"grad_norm": 0.46814388036727905, |
|
"learning_rate": 4.3078859060402685e-05, |
|
"loss": 0.8161, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.4112872052570545, |
|
"grad_norm": 0.4442387521266937, |
|
"learning_rate": 4.3036912751677855e-05, |
|
"loss": 0.8352, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.41283339775802086, |
|
"grad_norm": 0.4814305305480957, |
|
"learning_rate": 4.2994966442953025e-05, |
|
"loss": 0.7508, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.41437959025898724, |
|
"grad_norm": 0.4563637673854828, |
|
"learning_rate": 4.295302013422819e-05, |
|
"loss": 0.8639, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.4159257827599536, |
|
"grad_norm": 0.4439164698123932, |
|
"learning_rate": 4.291107382550336e-05, |
|
"loss": 0.8466, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.41747197526092, |
|
"grad_norm": 0.45698022842407227, |
|
"learning_rate": 4.286912751677852e-05, |
|
"loss": 0.8784, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.41901816776188633, |
|
"grad_norm": 0.46446678042411804, |
|
"learning_rate": 4.28271812080537e-05, |
|
"loss": 0.7856, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.4205643602628527, |
|
"grad_norm": 0.4599681496620178, |
|
"learning_rate": 4.278523489932886e-05, |
|
"loss": 0.8241, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.4221105527638191, |
|
"grad_norm": 0.4912761449813843, |
|
"learning_rate": 4.274328859060403e-05, |
|
"loss": 0.8697, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.4236567452647855, |
|
"grad_norm": 0.4875286817550659, |
|
"learning_rate": 4.27013422818792e-05, |
|
"loss": 0.7999, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.42520293776575185, |
|
"grad_norm": 0.4821873903274536, |
|
"learning_rate": 4.2659395973154364e-05, |
|
"loss": 0.7944, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.4267491302667182, |
|
"grad_norm": 0.4670597016811371, |
|
"learning_rate": 4.2617449664429534e-05, |
|
"loss": 0.9423, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.42829532276768456, |
|
"grad_norm": 0.5004227757453918, |
|
"learning_rate": 4.25755033557047e-05, |
|
"loss": 0.8073, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.42984151526865094, |
|
"grad_norm": 0.510180652141571, |
|
"learning_rate": 4.253355704697987e-05, |
|
"loss": 0.8658, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.4313877077696173, |
|
"grad_norm": 0.4866536855697632, |
|
"learning_rate": 4.249161073825503e-05, |
|
"loss": 0.7532, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.4329339002705837, |
|
"grad_norm": 0.5010313987731934, |
|
"learning_rate": 4.244966442953021e-05, |
|
"loss": 0.9368, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.43448009277155003, |
|
"grad_norm": 0.5455735325813293, |
|
"learning_rate": 4.240771812080537e-05, |
|
"loss": 0.9058, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.4360262852725164, |
|
"grad_norm": 0.47950518131256104, |
|
"learning_rate": 4.236577181208054e-05, |
|
"loss": 0.8709, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.4375724777734828, |
|
"grad_norm": 0.5143994092941284, |
|
"learning_rate": 4.232382550335571e-05, |
|
"loss": 0.8528, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.4391186702744492, |
|
"grad_norm": 0.5421144366264343, |
|
"learning_rate": 4.228187919463087e-05, |
|
"loss": 0.8989, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.44066486277541556, |
|
"grad_norm": 0.5018422603607178, |
|
"learning_rate": 4.223993288590604e-05, |
|
"loss": 0.8903, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.44221105527638194, |
|
"grad_norm": 0.4975266754627228, |
|
"learning_rate": 4.2197986577181206e-05, |
|
"loss": 0.9568, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.44375724777734826, |
|
"grad_norm": 0.4982987642288208, |
|
"learning_rate": 4.2156040268456376e-05, |
|
"loss": 0.946, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.44530344027831464, |
|
"grad_norm": 0.5241461992263794, |
|
"learning_rate": 4.2114093959731546e-05, |
|
"loss": 0.9359, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.446849632779281, |
|
"grad_norm": 0.4898720681667328, |
|
"learning_rate": 4.2072147651006716e-05, |
|
"loss": 0.9798, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.4483958252802474, |
|
"grad_norm": 0.5274215936660767, |
|
"learning_rate": 4.2030201342281886e-05, |
|
"loss": 0.8603, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4499420177812138, |
|
"grad_norm": 0.5538789629936218, |
|
"learning_rate": 4.198825503355705e-05, |
|
"loss": 0.8873, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.4514882102821801, |
|
"grad_norm": 0.5374253392219543, |
|
"learning_rate": 4.194630872483222e-05, |
|
"loss": 1.0174, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.4530344027831465, |
|
"grad_norm": 0.5286028385162354, |
|
"learning_rate": 4.190436241610738e-05, |
|
"loss": 0.9891, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.4545805952841129, |
|
"grad_norm": 0.5447548627853394, |
|
"learning_rate": 4.186241610738255e-05, |
|
"loss": 0.9587, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.45612678778507926, |
|
"grad_norm": 0.5515534281730652, |
|
"learning_rate": 4.1820469798657716e-05, |
|
"loss": 0.9663, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.45767298028604564, |
|
"grad_norm": 0.6086519956588745, |
|
"learning_rate": 4.1778523489932886e-05, |
|
"loss": 1.0357, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.45921917278701196, |
|
"grad_norm": 0.6332388520240784, |
|
"learning_rate": 4.1736577181208055e-05, |
|
"loss": 0.9999, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.46076536528797835, |
|
"grad_norm": 0.6928420066833496, |
|
"learning_rate": 4.1694630872483225e-05, |
|
"loss": 1.0243, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.4623115577889447, |
|
"grad_norm": 0.7226160168647766, |
|
"learning_rate": 4.1652684563758395e-05, |
|
"loss": 0.9209, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.4638577502899111, |
|
"grad_norm": 1.0083510875701904, |
|
"learning_rate": 4.161073825503356e-05, |
|
"loss": 0.9493, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4654039427908775, |
|
"grad_norm": 0.4649132192134857, |
|
"learning_rate": 4.156879194630873e-05, |
|
"loss": 0.7743, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.4669501352918438, |
|
"grad_norm": 0.43430233001708984, |
|
"learning_rate": 4.152684563758389e-05, |
|
"loss": 0.8197, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.4684963277928102, |
|
"grad_norm": 0.44487112760543823, |
|
"learning_rate": 4.148489932885906e-05, |
|
"loss": 0.7632, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.4700425202937766, |
|
"grad_norm": 0.4654642343521118, |
|
"learning_rate": 4.144295302013423e-05, |
|
"loss": 0.7637, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.47158871279474296, |
|
"grad_norm": 0.48062166571617126, |
|
"learning_rate": 4.1401006711409395e-05, |
|
"loss": 0.7771, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.47313490529570934, |
|
"grad_norm": 0.4307418167591095, |
|
"learning_rate": 4.135906040268457e-05, |
|
"loss": 0.8336, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.47468109779667567, |
|
"grad_norm": 0.4675520658493042, |
|
"learning_rate": 4.1317114093959735e-05, |
|
"loss": 0.7969, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.47622729029764205, |
|
"grad_norm": 0.48613134026527405, |
|
"learning_rate": 4.1275167785234905e-05, |
|
"loss": 0.8821, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.47777348279860843, |
|
"grad_norm": 0.46094274520874023, |
|
"learning_rate": 4.123322147651007e-05, |
|
"loss": 0.8103, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.4793196752995748, |
|
"grad_norm": 0.47018101811408997, |
|
"learning_rate": 4.119127516778524e-05, |
|
"loss": 0.8515, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.4808658678005412, |
|
"grad_norm": 0.43754515051841736, |
|
"learning_rate": 4.11493288590604e-05, |
|
"loss": 0.8218, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.4824120603015075, |
|
"grad_norm": 0.49217140674591064, |
|
"learning_rate": 4.110738255033557e-05, |
|
"loss": 0.7866, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.4839582528024739, |
|
"grad_norm": 0.47612541913986206, |
|
"learning_rate": 4.106543624161074e-05, |
|
"loss": 0.8088, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.4855044453034403, |
|
"grad_norm": 0.475328654050827, |
|
"learning_rate": 4.1023489932885904e-05, |
|
"loss": 0.8165, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.48705063780440666, |
|
"grad_norm": 0.4439482092857361, |
|
"learning_rate": 4.098154362416108e-05, |
|
"loss": 0.7833, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.48859683030537304, |
|
"grad_norm": 0.5019133687019348, |
|
"learning_rate": 4.0939597315436244e-05, |
|
"loss": 0.7942, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.49014302280633937, |
|
"grad_norm": 0.451492577791214, |
|
"learning_rate": 4.0897651006711414e-05, |
|
"loss": 0.8039, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.49168921530730575, |
|
"grad_norm": 0.46894291043281555, |
|
"learning_rate": 4.085570469798658e-05, |
|
"loss": 0.843, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.49323540780827213, |
|
"grad_norm": 0.49084803462028503, |
|
"learning_rate": 4.081375838926175e-05, |
|
"loss": 0.8121, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.4947816003092385, |
|
"grad_norm": 0.46862879395484924, |
|
"learning_rate": 4.077181208053692e-05, |
|
"loss": 0.901, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.4963277928102049, |
|
"grad_norm": 0.4881606698036194, |
|
"learning_rate": 4.072986577181208e-05, |
|
"loss": 0.9001, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.4978739853111712, |
|
"grad_norm": 0.5040379166603088, |
|
"learning_rate": 4.068791946308725e-05, |
|
"loss": 0.801, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.4994201778121376, |
|
"grad_norm": 0.46498745679855347, |
|
"learning_rate": 4.064597315436241e-05, |
|
"loss": 0.8473, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.500966370313104, |
|
"grad_norm": 0.4876708388328552, |
|
"learning_rate": 4.060402684563759e-05, |
|
"loss": 0.8418, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.5025125628140703, |
|
"grad_norm": 0.4962271451950073, |
|
"learning_rate": 4.056208053691275e-05, |
|
"loss": 0.7611, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.5040587553150367, |
|
"grad_norm": 0.48774364590644836, |
|
"learning_rate": 4.052013422818792e-05, |
|
"loss": 0.8825, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.5056049478160031, |
|
"grad_norm": 0.5011964440345764, |
|
"learning_rate": 4.047818791946309e-05, |
|
"loss": 0.7659, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.5071511403169695, |
|
"grad_norm": 0.5307598114013672, |
|
"learning_rate": 4.0436241610738256e-05, |
|
"loss": 0.8629, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.5086973328179358, |
|
"grad_norm": 0.49158573150634766, |
|
"learning_rate": 4.0394295302013426e-05, |
|
"loss": 0.9071, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.5102435253189022, |
|
"grad_norm": 0.5126063227653503, |
|
"learning_rate": 4.035234899328859e-05, |
|
"loss": 0.7998, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5117897178198686, |
|
"grad_norm": 0.5247028470039368, |
|
"learning_rate": 4.031040268456376e-05, |
|
"loss": 0.8443, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.5133359103208349, |
|
"grad_norm": 0.5511295199394226, |
|
"learning_rate": 4.026845637583892e-05, |
|
"loss": 0.8946, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.5148821028218014, |
|
"grad_norm": 0.5266992449760437, |
|
"learning_rate": 4.02265100671141e-05, |
|
"loss": 0.8172, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.5164282953227677, |
|
"grad_norm": 0.5326645374298096, |
|
"learning_rate": 4.018456375838926e-05, |
|
"loss": 0.8407, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.517974487823734, |
|
"grad_norm": 0.5065395832061768, |
|
"learning_rate": 4.014261744966443e-05, |
|
"loss": 0.8751, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.5195206803247004, |
|
"grad_norm": 0.53061443567276, |
|
"learning_rate": 4.01006711409396e-05, |
|
"loss": 0.9204, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.5210668728256668, |
|
"grad_norm": 0.5223432779312134, |
|
"learning_rate": 4.0058724832214765e-05, |
|
"loss": 0.8933, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.5226130653266332, |
|
"grad_norm": 0.5290891528129578, |
|
"learning_rate": 4.0016778523489935e-05, |
|
"loss": 0.977, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.5241592578275995, |
|
"grad_norm": 0.5419663786888123, |
|
"learning_rate": 3.99748322147651e-05, |
|
"loss": 0.9521, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.5257054503285659, |
|
"grad_norm": 0.5404860973358154, |
|
"learning_rate": 3.993288590604027e-05, |
|
"loss": 0.8907, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5272516428295323, |
|
"grad_norm": 0.5783061385154724, |
|
"learning_rate": 3.989093959731544e-05, |
|
"loss": 0.9162, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.5287978353304986, |
|
"grad_norm": 0.5513641834259033, |
|
"learning_rate": 3.984899328859061e-05, |
|
"loss": 0.8912, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.5303440278314651, |
|
"grad_norm": 0.5670628547668457, |
|
"learning_rate": 3.980704697986578e-05, |
|
"loss": 0.9502, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.5318902203324314, |
|
"grad_norm": 0.5960517525672913, |
|
"learning_rate": 3.976510067114094e-05, |
|
"loss": 0.9976, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.5334364128333977, |
|
"grad_norm": 0.6074213981628418, |
|
"learning_rate": 3.972315436241611e-05, |
|
"loss": 0.9861, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.5349826053343641, |
|
"grad_norm": 0.6100422143936157, |
|
"learning_rate": 3.9681208053691275e-05, |
|
"loss": 0.9483, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.5365287978353305, |
|
"grad_norm": 0.6308846473693848, |
|
"learning_rate": 3.9639261744966445e-05, |
|
"loss": 0.9455, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.5380749903362969, |
|
"grad_norm": 0.6239264607429504, |
|
"learning_rate": 3.959731543624161e-05, |
|
"loss": 0.9551, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.5396211828372632, |
|
"grad_norm": 0.6669812798500061, |
|
"learning_rate": 3.955536912751678e-05, |
|
"loss": 0.9251, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.5411673753382296, |
|
"grad_norm": 0.8175333738327026, |
|
"learning_rate": 3.951342281879195e-05, |
|
"loss": 0.9795, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.542713567839196, |
|
"grad_norm": 0.44942405819892883, |
|
"learning_rate": 3.947147651006712e-05, |
|
"loss": 0.7184, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.5442597603401623, |
|
"grad_norm": 0.46283286809921265, |
|
"learning_rate": 3.942953020134229e-05, |
|
"loss": 0.7251, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.5458059528411288, |
|
"grad_norm": 0.47923749685287476, |
|
"learning_rate": 3.938758389261745e-05, |
|
"loss": 0.766, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.5473521453420951, |
|
"grad_norm": 0.4575538635253906, |
|
"learning_rate": 3.934563758389262e-05, |
|
"loss": 0.7539, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.5488983378430614, |
|
"grad_norm": 0.48839524388313293, |
|
"learning_rate": 3.9303691275167784e-05, |
|
"loss": 0.7633, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.5504445303440278, |
|
"grad_norm": 0.45980286598205566, |
|
"learning_rate": 3.9261744966442954e-05, |
|
"loss": 0.774, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.5519907228449942, |
|
"grad_norm": 0.4767687916755676, |
|
"learning_rate": 3.9219798657718124e-05, |
|
"loss": 0.7588, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.5535369153459606, |
|
"grad_norm": 0.4907895028591156, |
|
"learning_rate": 3.917785234899329e-05, |
|
"loss": 0.7305, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.5550831078469269, |
|
"grad_norm": 0.46510565280914307, |
|
"learning_rate": 3.9135906040268464e-05, |
|
"loss": 0.8727, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.5566293003478933, |
|
"grad_norm": 0.4917638599872589, |
|
"learning_rate": 3.909395973154363e-05, |
|
"loss": 0.8013, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5581754928488597, |
|
"grad_norm": 0.49160364270210266, |
|
"learning_rate": 3.90520134228188e-05, |
|
"loss": 0.8871, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.559721685349826, |
|
"grad_norm": 0.5044489502906799, |
|
"learning_rate": 3.901006711409396e-05, |
|
"loss": 0.7945, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.5612678778507925, |
|
"grad_norm": 0.49622687697410583, |
|
"learning_rate": 3.896812080536913e-05, |
|
"loss": 0.7616, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.5628140703517588, |
|
"grad_norm": 0.5114305019378662, |
|
"learning_rate": 3.89261744966443e-05, |
|
"loss": 0.7593, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.5643602628527251, |
|
"grad_norm": 0.499931663274765, |
|
"learning_rate": 3.888422818791946e-05, |
|
"loss": 0.7935, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.5659064553536916, |
|
"grad_norm": 0.511566698551178, |
|
"learning_rate": 3.884228187919463e-05, |
|
"loss": 0.7665, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.5674526478546579, |
|
"grad_norm": 0.49260082840919495, |
|
"learning_rate": 3.8800335570469796e-05, |
|
"loss": 0.8207, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.5689988403556243, |
|
"grad_norm": 0.48072633147239685, |
|
"learning_rate": 3.875838926174497e-05, |
|
"loss": 0.7739, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.5705450328565906, |
|
"grad_norm": 0.4969918727874756, |
|
"learning_rate": 3.8716442953020136e-05, |
|
"loss": 0.8155, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.572091225357557, |
|
"grad_norm": 0.49780312180519104, |
|
"learning_rate": 3.8674496644295306e-05, |
|
"loss": 0.8397, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.5736374178585234, |
|
"grad_norm": 0.5126276612281799, |
|
"learning_rate": 3.863255033557047e-05, |
|
"loss": 0.8101, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.5751836103594897, |
|
"grad_norm": 0.48839470744132996, |
|
"learning_rate": 3.859060402684564e-05, |
|
"loss": 0.7629, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.5767298028604562, |
|
"grad_norm": 0.4904743432998657, |
|
"learning_rate": 3.854865771812081e-05, |
|
"loss": 0.8126, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.5782759953614225, |
|
"grad_norm": 0.48648014664649963, |
|
"learning_rate": 3.850671140939597e-05, |
|
"loss": 0.845, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.5798221878623888, |
|
"grad_norm": 0.537968635559082, |
|
"learning_rate": 3.846476510067114e-05, |
|
"loss": 0.863, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.5813683803633553, |
|
"grad_norm": 0.5034117102622986, |
|
"learning_rate": 3.8422818791946305e-05, |
|
"loss": 0.8333, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.5829145728643216, |
|
"grad_norm": 0.529244601726532, |
|
"learning_rate": 3.838087248322148e-05, |
|
"loss": 0.7756, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.584460765365288, |
|
"grad_norm": 0.5105958580970764, |
|
"learning_rate": 3.8338926174496645e-05, |
|
"loss": 0.8372, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.5860069578662543, |
|
"grad_norm": 0.5209059715270996, |
|
"learning_rate": 3.8296979865771815e-05, |
|
"loss": 0.8347, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.5875531503672207, |
|
"grad_norm": 0.5022969841957092, |
|
"learning_rate": 3.8255033557046985e-05, |
|
"loss": 0.8657, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.5890993428681871, |
|
"grad_norm": 0.5265902876853943, |
|
"learning_rate": 3.821308724832215e-05, |
|
"loss": 0.8333, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.5906455353691534, |
|
"grad_norm": 0.5082066655158997, |
|
"learning_rate": 3.817114093959732e-05, |
|
"loss": 0.8834, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.5921917278701199, |
|
"grad_norm": 0.5264511704444885, |
|
"learning_rate": 3.812919463087248e-05, |
|
"loss": 0.908, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.5937379203710862, |
|
"grad_norm": 0.5576880574226379, |
|
"learning_rate": 3.808724832214765e-05, |
|
"loss": 0.9174, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.5952841128720525, |
|
"grad_norm": 0.5170852541923523, |
|
"learning_rate": 3.804530201342282e-05, |
|
"loss": 0.8983, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.596830305373019, |
|
"grad_norm": 0.5212023854255676, |
|
"learning_rate": 3.800335570469799e-05, |
|
"loss": 0.8684, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.5983764978739853, |
|
"grad_norm": 0.5252931714057922, |
|
"learning_rate": 3.7961409395973154e-05, |
|
"loss": 0.9016, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.5999226903749517, |
|
"grad_norm": 0.5745819807052612, |
|
"learning_rate": 3.7919463087248324e-05, |
|
"loss": 0.8521, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.601468882875918, |
|
"grad_norm": 0.5259801149368286, |
|
"learning_rate": 3.7877516778523494e-05, |
|
"loss": 0.9101, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.6030150753768844, |
|
"grad_norm": 0.5453861355781555, |
|
"learning_rate": 3.783557046979866e-05, |
|
"loss": 0.8816, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.6045612678778508, |
|
"grad_norm": 0.5567570328712463, |
|
"learning_rate": 3.779362416107383e-05, |
|
"loss": 0.9126, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.6061074603788171, |
|
"grad_norm": 0.5654810070991516, |
|
"learning_rate": 3.775167785234899e-05, |
|
"loss": 0.8147, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.6076536528797836, |
|
"grad_norm": 0.5371072888374329, |
|
"learning_rate": 3.770973154362416e-05, |
|
"loss": 0.936, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.6091998453807499, |
|
"grad_norm": 0.6840121746063232, |
|
"learning_rate": 3.766778523489933e-05, |
|
"loss": 1.0121, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.6107460378817162, |
|
"grad_norm": 0.6007458567619324, |
|
"learning_rate": 3.76258389261745e-05, |
|
"loss": 0.895, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.6122922303826827, |
|
"grad_norm": 0.6644576191902161, |
|
"learning_rate": 3.758389261744967e-05, |
|
"loss": 0.9978, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.613838422883649, |
|
"grad_norm": 0.6520909667015076, |
|
"learning_rate": 3.7541946308724834e-05, |
|
"loss": 0.9551, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.6153846153846154, |
|
"grad_norm": 0.66536945104599, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.9551, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.6169308078855817, |
|
"grad_norm": 0.7479529976844788, |
|
"learning_rate": 3.745805369127517e-05, |
|
"loss": 1.022, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.6184770003865481, |
|
"grad_norm": 1.1065740585327148, |
|
"learning_rate": 3.741610738255034e-05, |
|
"loss": 0.9431, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6200231928875145, |
|
"grad_norm": 0.4878085255622864, |
|
"learning_rate": 3.7374161073825507e-05, |
|
"loss": 0.728, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.6215693853884808, |
|
"grad_norm": 0.4753767251968384, |
|
"learning_rate": 3.733221476510067e-05, |
|
"loss": 0.7575, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.6231155778894473, |
|
"grad_norm": 0.4956800043582916, |
|
"learning_rate": 3.7290268456375846e-05, |
|
"loss": 0.7292, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.6246617703904136, |
|
"grad_norm": 0.48405370116233826, |
|
"learning_rate": 3.724832214765101e-05, |
|
"loss": 0.7879, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.62620796289138, |
|
"grad_norm": 0.46832674741744995, |
|
"learning_rate": 3.720637583892618e-05, |
|
"loss": 0.7819, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.6277541553923464, |
|
"grad_norm": 0.4691636264324188, |
|
"learning_rate": 3.716442953020134e-05, |
|
"loss": 0.7984, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.6293003478933127, |
|
"grad_norm": 0.5165214538574219, |
|
"learning_rate": 3.712248322147651e-05, |
|
"loss": 0.7296, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.6308465403942791, |
|
"grad_norm": 0.5001758933067322, |
|
"learning_rate": 3.7080536912751676e-05, |
|
"loss": 0.808, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.6323927328952454, |
|
"grad_norm": 0.4783785045146942, |
|
"learning_rate": 3.7038590604026846e-05, |
|
"loss": 0.7856, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.6339389253962119, |
|
"grad_norm": 0.5511770248413086, |
|
"learning_rate": 3.6996644295302016e-05, |
|
"loss": 0.711, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.6354851178971782, |
|
"grad_norm": 0.5434517860412598, |
|
"learning_rate": 3.695469798657718e-05, |
|
"loss": 0.8498, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.6370313103981445, |
|
"grad_norm": 0.5018695592880249, |
|
"learning_rate": 3.6912751677852356e-05, |
|
"loss": 0.7898, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.638577502899111, |
|
"grad_norm": 0.5000431537628174, |
|
"learning_rate": 3.687080536912752e-05, |
|
"loss": 0.7803, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.6401236954000773, |
|
"grad_norm": 0.5340814590454102, |
|
"learning_rate": 3.682885906040269e-05, |
|
"loss": 0.7671, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.6416698879010437, |
|
"grad_norm": 0.5046684741973877, |
|
"learning_rate": 3.678691275167785e-05, |
|
"loss": 0.7876, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.6432160804020101, |
|
"grad_norm": 0.5036927461624146, |
|
"learning_rate": 3.674496644295302e-05, |
|
"loss": 0.8726, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.6447622729029764, |
|
"grad_norm": 0.5092120170593262, |
|
"learning_rate": 3.670302013422819e-05, |
|
"loss": 0.806, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.6463084654039428, |
|
"grad_norm": 0.5002840757369995, |
|
"learning_rate": 3.6661073825503355e-05, |
|
"loss": 0.8814, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.6478546579049091, |
|
"grad_norm": 0.5107703804969788, |
|
"learning_rate": 3.6619127516778525e-05, |
|
"loss": 0.7984, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.6494008504058756, |
|
"grad_norm": 0.5407206416130066, |
|
"learning_rate": 3.6577181208053695e-05, |
|
"loss": 0.7979, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.6509470429068419, |
|
"grad_norm": 0.5402006506919861, |
|
"learning_rate": 3.6535234899328865e-05, |
|
"loss": 0.7547, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.6524932354078082, |
|
"grad_norm": 0.5221443176269531, |
|
"learning_rate": 3.649328859060403e-05, |
|
"loss": 0.8426, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.6540394279087747, |
|
"grad_norm": 0.5319470167160034, |
|
"learning_rate": 3.64513422818792e-05, |
|
"loss": 0.7866, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.655585620409741, |
|
"grad_norm": 0.5060355067253113, |
|
"learning_rate": 3.640939597315436e-05, |
|
"loss": 0.8102, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.6571318129107074, |
|
"grad_norm": 0.5067129731178284, |
|
"learning_rate": 3.636744966442953e-05, |
|
"loss": 0.8316, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.6586780054116738, |
|
"grad_norm": 0.5815935730934143, |
|
"learning_rate": 3.63255033557047e-05, |
|
"loss": 0.7207, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.6602241979126401, |
|
"grad_norm": 0.518981397151947, |
|
"learning_rate": 3.6283557046979864e-05, |
|
"loss": 0.7909, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.6617703904136065, |
|
"grad_norm": 0.5797117948532104, |
|
"learning_rate": 3.6241610738255034e-05, |
|
"loss": 0.8346, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.6633165829145728, |
|
"grad_norm": 0.565744936466217, |
|
"learning_rate": 3.6199664429530204e-05, |
|
"loss": 0.8366, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.6648627754155393, |
|
"grad_norm": 0.5538972020149231, |
|
"learning_rate": 3.6157718120805374e-05, |
|
"loss": 0.8346, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.6664089679165056, |
|
"grad_norm": 0.561184823513031, |
|
"learning_rate": 3.611577181208054e-05, |
|
"loss": 0.8389, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.6679551604174719, |
|
"grad_norm": 0.5446305871009827, |
|
"learning_rate": 3.607382550335571e-05, |
|
"loss": 0.7981, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.6695013529184384, |
|
"grad_norm": 0.6099398136138916, |
|
"learning_rate": 3.603187919463088e-05, |
|
"loss": 0.8988, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.6710475454194047, |
|
"grad_norm": 0.572850227355957, |
|
"learning_rate": 3.598993288590604e-05, |
|
"loss": 0.8981, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.6725937379203711, |
|
"grad_norm": 0.5837070941925049, |
|
"learning_rate": 3.594798657718121e-05, |
|
"loss": 0.9049, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.6741399304213375, |
|
"grad_norm": 0.5394341945648193, |
|
"learning_rate": 3.5906040268456373e-05, |
|
"loss": 0.8744, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.6756861229223038, |
|
"grad_norm": 0.5494775772094727, |
|
"learning_rate": 3.5864093959731543e-05, |
|
"loss": 0.9612, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.6772323154232702, |
|
"grad_norm": 0.5773348808288574, |
|
"learning_rate": 3.582214765100671e-05, |
|
"loss": 0.9005, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.6787785079242366, |
|
"grad_norm": 0.5757828950881958, |
|
"learning_rate": 3.578020134228188e-05, |
|
"loss": 0.9121, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.680324700425203, |
|
"grad_norm": 0.6010167598724365, |
|
"learning_rate": 3.5738255033557046e-05, |
|
"loss": 0.8575, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.6818708929261693, |
|
"grad_norm": 0.621435284614563, |
|
"learning_rate": 3.5696308724832216e-05, |
|
"loss": 0.9356, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.6834170854271356, |
|
"grad_norm": 0.5952706933021545, |
|
"learning_rate": 3.5654362416107386e-05, |
|
"loss": 0.9653, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.6849632779281021, |
|
"grad_norm": 0.59757000207901, |
|
"learning_rate": 3.561241610738255e-05, |
|
"loss": 0.9147, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.6865094704290684, |
|
"grad_norm": 0.6352919936180115, |
|
"learning_rate": 3.557046979865772e-05, |
|
"loss": 0.8931, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.6880556629300348, |
|
"grad_norm": 0.5814509987831116, |
|
"learning_rate": 3.552852348993288e-05, |
|
"loss": 0.9158, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.6896018554310012, |
|
"grad_norm": 0.7028055787086487, |
|
"learning_rate": 3.548657718120805e-05, |
|
"loss": 0.8891, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.6911480479319675, |
|
"grad_norm": 0.659831166267395, |
|
"learning_rate": 3.544463087248322e-05, |
|
"loss": 0.9629, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.6926942404329339, |
|
"grad_norm": 0.7297990918159485, |
|
"learning_rate": 3.540268456375839e-05, |
|
"loss": 0.9856, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.6942404329339003, |
|
"grad_norm": 0.8682158589363098, |
|
"learning_rate": 3.536073825503356e-05, |
|
"loss": 1.0006, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.6957866254348667, |
|
"grad_norm": 1.2786856889724731, |
|
"learning_rate": 3.5318791946308726e-05, |
|
"loss": 0.9672, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.697332817935833, |
|
"grad_norm": 0.5570630431175232, |
|
"learning_rate": 3.5276845637583896e-05, |
|
"loss": 0.6907, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.6988790104367993, |
|
"grad_norm": 0.4869362413883209, |
|
"learning_rate": 3.523489932885906e-05, |
|
"loss": 0.7276, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.7004252029377658, |
|
"grad_norm": 0.4926219582557678, |
|
"learning_rate": 3.519295302013423e-05, |
|
"loss": 0.7988, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.7019713954387321, |
|
"grad_norm": 0.49141383171081543, |
|
"learning_rate": 3.51510067114094e-05, |
|
"loss": 0.7625, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.7035175879396985, |
|
"grad_norm": 0.48099425435066223, |
|
"learning_rate": 3.510906040268457e-05, |
|
"loss": 0.7932, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.7050637804406649, |
|
"grad_norm": 0.48948779702186584, |
|
"learning_rate": 3.506711409395974e-05, |
|
"loss": 0.725, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.7066099729416312, |
|
"grad_norm": 0.5123636722564697, |
|
"learning_rate": 3.50251677852349e-05, |
|
"loss": 0.7704, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.7081561654425976, |
|
"grad_norm": 0.49425482749938965, |
|
"learning_rate": 3.498322147651007e-05, |
|
"loss": 0.8, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.709702357943564, |
|
"grad_norm": 0.5385423898696899, |
|
"learning_rate": 3.4941275167785235e-05, |
|
"loss": 0.8093, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.7112485504445304, |
|
"grad_norm": 0.5466816425323486, |
|
"learning_rate": 3.4899328859060405e-05, |
|
"loss": 0.7991, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.7127947429454967, |
|
"grad_norm": 0.5069779753684998, |
|
"learning_rate": 3.485738255033557e-05, |
|
"loss": 0.7989, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.714340935446463, |
|
"grad_norm": 0.5083027482032776, |
|
"learning_rate": 3.481543624161074e-05, |
|
"loss": 0.7891, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.7158871279474295, |
|
"grad_norm": 0.5162233710289001, |
|
"learning_rate": 3.477348993288591e-05, |
|
"loss": 0.7208, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.7174333204483958, |
|
"grad_norm": 0.5323002338409424, |
|
"learning_rate": 3.473154362416108e-05, |
|
"loss": 0.7471, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.7189795129493622, |
|
"grad_norm": 0.5144820809364319, |
|
"learning_rate": 3.468959731543625e-05, |
|
"loss": 0.8335, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.7205257054503286, |
|
"grad_norm": 0.5344799160957336, |
|
"learning_rate": 3.464765100671141e-05, |
|
"loss": 0.7575, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.7220718979512949, |
|
"grad_norm": 0.5163140296936035, |
|
"learning_rate": 3.460570469798658e-05, |
|
"loss": 0.7471, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.7236180904522613, |
|
"grad_norm": 0.537284791469574, |
|
"learning_rate": 3.4563758389261744e-05, |
|
"loss": 0.8013, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.7251642829532277, |
|
"grad_norm": 0.5194945335388184, |
|
"learning_rate": 3.4521812080536914e-05, |
|
"loss": 0.7855, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.7267104754541941, |
|
"grad_norm": 0.5371651649475098, |
|
"learning_rate": 3.4479865771812084e-05, |
|
"loss": 0.7345, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.7282566679551604, |
|
"grad_norm": 0.5294081568717957, |
|
"learning_rate": 3.443791946308725e-05, |
|
"loss": 0.854, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.7298028604561267, |
|
"grad_norm": 0.5522701144218445, |
|
"learning_rate": 3.439597315436242e-05, |
|
"loss": 0.8338, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.7313490529570932, |
|
"grad_norm": 0.5342947244644165, |
|
"learning_rate": 3.435402684563759e-05, |
|
"loss": 0.8429, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.7328952454580595, |
|
"grad_norm": 0.5579586029052734, |
|
"learning_rate": 3.431208053691276e-05, |
|
"loss": 0.77, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.734441437959026, |
|
"grad_norm": 0.5210466384887695, |
|
"learning_rate": 3.427013422818792e-05, |
|
"loss": 0.8538, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.7359876304599923, |
|
"grad_norm": 0.5218535661697388, |
|
"learning_rate": 3.422818791946309e-05, |
|
"loss": 0.8771, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.7375338229609586, |
|
"grad_norm": 0.5211417078971863, |
|
"learning_rate": 3.418624161073825e-05, |
|
"loss": 0.827, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.739080015461925, |
|
"grad_norm": 0.5523113012313843, |
|
"learning_rate": 3.414429530201342e-05, |
|
"loss": 0.8367, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.7406262079628914, |
|
"grad_norm": 0.5218014717102051, |
|
"learning_rate": 3.410234899328859e-05, |
|
"loss": 0.8319, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.7421724004638578, |
|
"grad_norm": 0.5514734387397766, |
|
"learning_rate": 3.4060402684563756e-05, |
|
"loss": 0.8703, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.7437185929648241, |
|
"grad_norm": 0.5442553162574768, |
|
"learning_rate": 3.4018456375838926e-05, |
|
"loss": 0.8094, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.7452647854657904, |
|
"grad_norm": 0.5472526550292969, |
|
"learning_rate": 3.3976510067114096e-05, |
|
"loss": 0.8419, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.7468109779667569, |
|
"grad_norm": 0.5330756306648254, |
|
"learning_rate": 3.3934563758389266e-05, |
|
"loss": 0.8497, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.7483571704677232, |
|
"grad_norm": 0.5805166363716125, |
|
"learning_rate": 3.389261744966443e-05, |
|
"loss": 0.8505, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.7499033629686896, |
|
"grad_norm": 0.5910758376121521, |
|
"learning_rate": 3.38506711409396e-05, |
|
"loss": 0.8154, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.751449555469656, |
|
"grad_norm": 0.579165518283844, |
|
"learning_rate": 3.380872483221477e-05, |
|
"loss": 0.8398, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.7529957479706223, |
|
"grad_norm": 0.5517799258232117, |
|
"learning_rate": 3.376677852348993e-05, |
|
"loss": 0.8426, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.7545419404715887, |
|
"grad_norm": 0.588138997554779, |
|
"learning_rate": 3.37248322147651e-05, |
|
"loss": 0.8658, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.7560881329725551, |
|
"grad_norm": 0.5962609648704529, |
|
"learning_rate": 3.3682885906040266e-05, |
|
"loss": 0.9157, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.7576343254735215, |
|
"grad_norm": 0.5821203589439392, |
|
"learning_rate": 3.3640939597315436e-05, |
|
"loss": 0.9158, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.7591805179744878, |
|
"grad_norm": 0.5653342008590698, |
|
"learning_rate": 3.3598993288590605e-05, |
|
"loss": 0.924, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.7607267104754541, |
|
"grad_norm": 0.6114529967308044, |
|
"learning_rate": 3.3557046979865775e-05, |
|
"loss": 0.9249, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.7622729029764206, |
|
"grad_norm": 0.6291983723640442, |
|
"learning_rate": 3.3515100671140945e-05, |
|
"loss": 0.8665, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.7638190954773869, |
|
"grad_norm": 0.6541888117790222, |
|
"learning_rate": 3.347315436241611e-05, |
|
"loss": 0.9678, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.7653652879783533, |
|
"grad_norm": 0.6252798438072205, |
|
"learning_rate": 3.343120805369128e-05, |
|
"loss": 0.8691, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.7669114804793197, |
|
"grad_norm": 0.6714550852775574, |
|
"learning_rate": 3.338926174496644e-05, |
|
"loss": 0.9581, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.768457672980286, |
|
"grad_norm": 0.7336750626564026, |
|
"learning_rate": 3.334731543624161e-05, |
|
"loss": 0.9024, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.7700038654812524, |
|
"grad_norm": 0.7459555864334106, |
|
"learning_rate": 3.3305369127516775e-05, |
|
"loss": 0.9153, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.7715500579822188, |
|
"grad_norm": 0.7960460186004639, |
|
"learning_rate": 3.326342281879195e-05, |
|
"loss": 0.9776, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.7730962504831852, |
|
"grad_norm": 1.188610315322876, |
|
"learning_rate": 3.3221476510067115e-05, |
|
"loss": 0.9963, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7746424429841515, |
|
"grad_norm": 0.5373630523681641, |
|
"learning_rate": 3.3179530201342285e-05, |
|
"loss": 0.7497, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.7761886354851179, |
|
"grad_norm": 0.5297138094902039, |
|
"learning_rate": 3.3137583892617455e-05, |
|
"loss": 0.6842, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.7777348279860843, |
|
"grad_norm": 0.5116965770721436, |
|
"learning_rate": 3.309563758389262e-05, |
|
"loss": 0.7081, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.7792810204870506, |
|
"grad_norm": 0.523707389831543, |
|
"learning_rate": 3.305369127516779e-05, |
|
"loss": 0.7354, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.780827212988017, |
|
"grad_norm": 0.5127305388450623, |
|
"learning_rate": 3.301174496644295e-05, |
|
"loss": 0.7478, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.7823734054889834, |
|
"grad_norm": 0.5249508619308472, |
|
"learning_rate": 3.296979865771812e-05, |
|
"loss": 0.7198, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.7839195979899497, |
|
"grad_norm": 0.4998358190059662, |
|
"learning_rate": 3.292785234899329e-05, |
|
"loss": 0.7726, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.7854657904909161, |
|
"grad_norm": 0.48964107036590576, |
|
"learning_rate": 3.288590604026846e-05, |
|
"loss": 0.7643, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.7870119829918825, |
|
"grad_norm": 0.4985421299934387, |
|
"learning_rate": 3.284395973154363e-05, |
|
"loss": 0.7807, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.7885581754928489, |
|
"grad_norm": 0.5199535489082336, |
|
"learning_rate": 3.2802013422818794e-05, |
|
"loss": 0.7899, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.7901043679938152, |
|
"grad_norm": 0.5251364707946777, |
|
"learning_rate": 3.2760067114093964e-05, |
|
"loss": 0.7619, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.7916505604947816, |
|
"grad_norm": 0.5302333831787109, |
|
"learning_rate": 3.271812080536913e-05, |
|
"loss": 0.7561, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.793196752995748, |
|
"grad_norm": 0.544916033744812, |
|
"learning_rate": 3.26761744966443e-05, |
|
"loss": 0.7497, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.7947429454967143, |
|
"grad_norm": 0.5568458437919617, |
|
"learning_rate": 3.263422818791946e-05, |
|
"loss": 0.7872, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.7962891379976808, |
|
"grad_norm": 0.5136657357215881, |
|
"learning_rate": 3.259228187919463e-05, |
|
"loss": 0.8583, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.7978353304986471, |
|
"grad_norm": 0.5261164903640747, |
|
"learning_rate": 3.25503355704698e-05, |
|
"loss": 0.7806, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.7993815229996134, |
|
"grad_norm": 0.5282236933708191, |
|
"learning_rate": 3.250838926174497e-05, |
|
"loss": 0.7391, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.8009277155005798, |
|
"grad_norm": 0.5518252849578857, |
|
"learning_rate": 3.246644295302014e-05, |
|
"loss": 0.7981, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.8024739080015462, |
|
"grad_norm": 0.5146390199661255, |
|
"learning_rate": 3.24244966442953e-05, |
|
"loss": 0.7831, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.8040201005025126, |
|
"grad_norm": 0.5058282017707825, |
|
"learning_rate": 3.238255033557047e-05, |
|
"loss": 0.807, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.8055662930034789, |
|
"grad_norm": 0.5313502550125122, |
|
"learning_rate": 3.2340604026845636e-05, |
|
"loss": 0.8176, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.8071124855044453, |
|
"grad_norm": 0.5471131801605225, |
|
"learning_rate": 3.2298657718120806e-05, |
|
"loss": 0.81, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.8086586780054117, |
|
"grad_norm": 0.5153350830078125, |
|
"learning_rate": 3.2256711409395976e-05, |
|
"loss": 0.8729, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.810204870506378, |
|
"grad_norm": 0.5148741006851196, |
|
"learning_rate": 3.221476510067114e-05, |
|
"loss": 0.7776, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.8117510630073445, |
|
"grad_norm": 0.5151890516281128, |
|
"learning_rate": 3.217281879194631e-05, |
|
"loss": 0.8355, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.8132972555083108, |
|
"grad_norm": 0.5539215207099915, |
|
"learning_rate": 3.213087248322148e-05, |
|
"loss": 0.8074, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.8148434480092771, |
|
"grad_norm": 0.5416978597640991, |
|
"learning_rate": 3.208892617449665e-05, |
|
"loss": 0.8077, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.8163896405102435, |
|
"grad_norm": 0.5648258924484253, |
|
"learning_rate": 3.204697986577181e-05, |
|
"loss": 0.7928, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.8179358330112099, |
|
"grad_norm": 0.549947202205658, |
|
"learning_rate": 3.200503355704698e-05, |
|
"loss": 0.8371, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.8194820255121763, |
|
"grad_norm": 0.5815473198890686, |
|
"learning_rate": 3.196308724832215e-05, |
|
"loss": 0.8236, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.8210282180131426, |
|
"grad_norm": 0.5924739241600037, |
|
"learning_rate": 3.1921140939597315e-05, |
|
"loss": 0.7953, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.822574410514109, |
|
"grad_norm": 0.5691911578178406, |
|
"learning_rate": 3.1879194630872485e-05, |
|
"loss": 0.8312, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.8241206030150754, |
|
"grad_norm": 0.5568677186965942, |
|
"learning_rate": 3.183724832214765e-05, |
|
"loss": 0.839, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.8256667955160417, |
|
"grad_norm": 0.584894061088562, |
|
"learning_rate": 3.1795302013422825e-05, |
|
"loss": 0.9014, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.8272129880170082, |
|
"grad_norm": 0.5695962309837341, |
|
"learning_rate": 3.175335570469799e-05, |
|
"loss": 0.8177, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.8287591805179745, |
|
"grad_norm": 0.5662206411361694, |
|
"learning_rate": 3.171140939597316e-05, |
|
"loss": 0.8666, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.8303053730189408, |
|
"grad_norm": 0.6044044494628906, |
|
"learning_rate": 3.166946308724832e-05, |
|
"loss": 0.8757, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.8318515655199072, |
|
"grad_norm": 0.5584191083908081, |
|
"learning_rate": 3.162751677852349e-05, |
|
"loss": 0.8701, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.8333977580208736, |
|
"grad_norm": 0.6086747646331787, |
|
"learning_rate": 3.158557046979866e-05, |
|
"loss": 0.9369, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.83494395052184, |
|
"grad_norm": 0.5706461071968079, |
|
"learning_rate": 3.1543624161073825e-05, |
|
"loss": 0.9749, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.8364901430228063, |
|
"grad_norm": 0.5996837615966797, |
|
"learning_rate": 3.1501677852348995e-05, |
|
"loss": 0.881, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.8380363355237727, |
|
"grad_norm": 0.645283043384552, |
|
"learning_rate": 3.145973154362416e-05, |
|
"loss": 0.9201, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.8395825280247391, |
|
"grad_norm": 0.6247162222862244, |
|
"learning_rate": 3.1417785234899334e-05, |
|
"loss": 0.9159, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.8411287205257054, |
|
"grad_norm": 0.6042696833610535, |
|
"learning_rate": 3.13758389261745e-05, |
|
"loss": 0.9362, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.8426749130266719, |
|
"grad_norm": 0.6593182682991028, |
|
"learning_rate": 3.133389261744967e-05, |
|
"loss": 0.9722, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.8442211055276382, |
|
"grad_norm": 0.665708601474762, |
|
"learning_rate": 3.129194630872484e-05, |
|
"loss": 0.9028, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.8457672980286045, |
|
"grad_norm": 0.6491990089416504, |
|
"learning_rate": 3.125e-05, |
|
"loss": 0.9169, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.847313490529571, |
|
"grad_norm": 0.7431784868240356, |
|
"learning_rate": 3.120805369127517e-05, |
|
"loss": 0.9927, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.8488596830305373, |
|
"grad_norm": 0.8240005970001221, |
|
"learning_rate": 3.1166107382550334e-05, |
|
"loss": 1.0051, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.8504058755315037, |
|
"grad_norm": 1.1392122507095337, |
|
"learning_rate": 3.1124161073825504e-05, |
|
"loss": 0.9428, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.85195206803247, |
|
"grad_norm": 0.48860234022140503, |
|
"learning_rate": 3.108221476510067e-05, |
|
"loss": 0.7069, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.8534982605334364, |
|
"grad_norm": 0.4950990378856659, |
|
"learning_rate": 3.1040268456375844e-05, |
|
"loss": 0.7463, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.8550444530344028, |
|
"grad_norm": 0.48706600069999695, |
|
"learning_rate": 3.099832214765101e-05, |
|
"loss": 0.786, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.8565906455353691, |
|
"grad_norm": 0.5027382373809814, |
|
"learning_rate": 3.095637583892618e-05, |
|
"loss": 0.7882, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.8581368380363356, |
|
"grad_norm": 0.5104815363883972, |
|
"learning_rate": 3.091442953020135e-05, |
|
"loss": 0.7301, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.8596830305373019, |
|
"grad_norm": 0.49334749579429626, |
|
"learning_rate": 3.087248322147651e-05, |
|
"loss": 0.786, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.8612292230382682, |
|
"grad_norm": 0.5204259157180786, |
|
"learning_rate": 3.083053691275168e-05, |
|
"loss": 0.7809, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.8627754155392346, |
|
"grad_norm": 0.5358408093452454, |
|
"learning_rate": 3.078859060402684e-05, |
|
"loss": 0.7887, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.864321608040201, |
|
"grad_norm": 0.515990674495697, |
|
"learning_rate": 3.074664429530201e-05, |
|
"loss": 0.8452, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.8658678005411674, |
|
"grad_norm": 0.5604992508888245, |
|
"learning_rate": 3.070469798657718e-05, |
|
"loss": 0.7178, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.8674139930421337, |
|
"grad_norm": 0.5511056184768677, |
|
"learning_rate": 3.066275167785235e-05, |
|
"loss": 0.7577, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.8689601855431001, |
|
"grad_norm": 0.5570728778839111, |
|
"learning_rate": 3.062080536912752e-05, |
|
"loss": 0.7799, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.8705063780440665, |
|
"grad_norm": 0.535253643989563, |
|
"learning_rate": 3.0578859060402686e-05, |
|
"loss": 0.85, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.8720525705450328, |
|
"grad_norm": 0.5327886343002319, |
|
"learning_rate": 3.0536912751677856e-05, |
|
"loss": 0.8057, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.8735987630459993, |
|
"grad_norm": 0.5440667867660522, |
|
"learning_rate": 3.0494966442953022e-05, |
|
"loss": 0.7407, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.8751449555469656, |
|
"grad_norm": 0.5552430152893066, |
|
"learning_rate": 3.045302013422819e-05, |
|
"loss": 0.8022, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.876691148047932, |
|
"grad_norm": 0.523145854473114, |
|
"learning_rate": 3.0411073825503356e-05, |
|
"loss": 0.7694, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.8782373405488983, |
|
"grad_norm": 0.5461344122886658, |
|
"learning_rate": 3.0369127516778522e-05, |
|
"loss": 0.7849, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.8797835330498647, |
|
"grad_norm": 0.548468828201294, |
|
"learning_rate": 3.0327181208053695e-05, |
|
"loss": 0.8127, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.8813297255508311, |
|
"grad_norm": 0.5320367813110352, |
|
"learning_rate": 3.0285234899328862e-05, |
|
"loss": 0.8243, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.8828759180517974, |
|
"grad_norm": 0.5147125124931335, |
|
"learning_rate": 3.024328859060403e-05, |
|
"loss": 0.8784, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.8844221105527639, |
|
"grad_norm": 0.5353872776031494, |
|
"learning_rate": 3.02013422818792e-05, |
|
"loss": 0.7841, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.8859683030537302, |
|
"grad_norm": 0.5554389953613281, |
|
"learning_rate": 3.0159395973154365e-05, |
|
"loss": 0.8601, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.8875144955546965, |
|
"grad_norm": 0.5762984752655029, |
|
"learning_rate": 3.011744966442953e-05, |
|
"loss": 0.7359, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.889060688055663, |
|
"grad_norm": 0.5287367701530457, |
|
"learning_rate": 3.0075503355704698e-05, |
|
"loss": 0.8063, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.8906068805566293, |
|
"grad_norm": 0.5730248689651489, |
|
"learning_rate": 3.0033557046979865e-05, |
|
"loss": 0.735, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.8921530730575957, |
|
"grad_norm": 0.557384192943573, |
|
"learning_rate": 2.999161073825503e-05, |
|
"loss": 0.7962, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.893699265558562, |
|
"grad_norm": 0.5956771373748779, |
|
"learning_rate": 2.9949664429530205e-05, |
|
"loss": 0.8867, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.8952454580595284, |
|
"grad_norm": 0.5796382427215576, |
|
"learning_rate": 2.990771812080537e-05, |
|
"loss": 0.858, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.8967916505604948, |
|
"grad_norm": 0.5635401010513306, |
|
"learning_rate": 2.986577181208054e-05, |
|
"loss": 0.7987, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.8983378430614611, |
|
"grad_norm": 0.6214528679847717, |
|
"learning_rate": 2.9823825503355708e-05, |
|
"loss": 0.8397, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.8998840355624276, |
|
"grad_norm": 0.5945339202880859, |
|
"learning_rate": 2.9781879194630874e-05, |
|
"loss": 0.8428, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.9014302280633939, |
|
"grad_norm": 0.5704767107963562, |
|
"learning_rate": 2.973993288590604e-05, |
|
"loss": 0.8291, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.9029764205643602, |
|
"grad_norm": 0.5803176164627075, |
|
"learning_rate": 2.9697986577181207e-05, |
|
"loss": 0.8792, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.9045226130653267, |
|
"grad_norm": 0.6039038300514221, |
|
"learning_rate": 2.9656040268456374e-05, |
|
"loss": 0.7788, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.906068805566293, |
|
"grad_norm": 0.5550165176391602, |
|
"learning_rate": 2.9614093959731544e-05, |
|
"loss": 0.8658, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.9076149980672594, |
|
"grad_norm": 0.6096512079238892, |
|
"learning_rate": 2.9572147651006714e-05, |
|
"loss": 0.9237, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.9091611905682258, |
|
"grad_norm": 0.5784430503845215, |
|
"learning_rate": 2.9530201342281884e-05, |
|
"loss": 0.8538, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.9107073830691921, |
|
"grad_norm": 0.5854966044425964, |
|
"learning_rate": 2.948825503355705e-05, |
|
"loss": 0.9009, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.9122535755701585, |
|
"grad_norm": 0.5904499292373657, |
|
"learning_rate": 2.9446308724832217e-05, |
|
"loss": 0.9064, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.9137997680711248, |
|
"grad_norm": 0.6125240325927734, |
|
"learning_rate": 2.9404362416107384e-05, |
|
"loss": 0.8539, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.9153459605720913, |
|
"grad_norm": 0.6209454536437988, |
|
"learning_rate": 2.936241610738255e-05, |
|
"loss": 0.8864, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.9168921530730576, |
|
"grad_norm": 0.634355902671814, |
|
"learning_rate": 2.9320469798657717e-05, |
|
"loss": 0.8999, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.9184383455740239, |
|
"grad_norm": 0.644378125667572, |
|
"learning_rate": 2.9278523489932887e-05, |
|
"loss": 0.9351, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.9199845380749904, |
|
"grad_norm": 0.638783872127533, |
|
"learning_rate": 2.9236577181208053e-05, |
|
"loss": 0.8522, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.9215307305759567, |
|
"grad_norm": 0.6960675716400146, |
|
"learning_rate": 2.9194630872483227e-05, |
|
"loss": 0.8183, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.9230769230769231, |
|
"grad_norm": 0.7445054054260254, |
|
"learning_rate": 2.9152684563758393e-05, |
|
"loss": 0.9749, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.9246231155778895, |
|
"grad_norm": 0.7296366095542908, |
|
"learning_rate": 2.911073825503356e-05, |
|
"loss": 0.9535, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.9261693080788558, |
|
"grad_norm": 0.8242074251174927, |
|
"learning_rate": 2.9068791946308726e-05, |
|
"loss": 0.9661, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.9277155005798222, |
|
"grad_norm": 0.983094334602356, |
|
"learning_rate": 2.9026845637583893e-05, |
|
"loss": 0.8454, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9292616930807885, |
|
"grad_norm": 0.4916326403617859, |
|
"learning_rate": 2.898489932885906e-05, |
|
"loss": 0.7178, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.930807885581755, |
|
"grad_norm": 0.5236021876335144, |
|
"learning_rate": 2.894295302013423e-05, |
|
"loss": 0.7586, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.9323540780827213, |
|
"grad_norm": 0.5226188898086548, |
|
"learning_rate": 2.8901006711409396e-05, |
|
"loss": 0.7287, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.9339002705836876, |
|
"grad_norm": 0.5189059376716614, |
|
"learning_rate": 2.885906040268457e-05, |
|
"loss": 0.7176, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.9354464630846541, |
|
"grad_norm": 0.5282127857208252, |
|
"learning_rate": 2.8817114093959736e-05, |
|
"loss": 0.7487, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.9369926555856204, |
|
"grad_norm": 0.5156176090240479, |
|
"learning_rate": 2.8775167785234902e-05, |
|
"loss": 0.7929, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.9385388480865868, |
|
"grad_norm": 0.5219593644142151, |
|
"learning_rate": 2.873322147651007e-05, |
|
"loss": 0.7992, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.9400850405875532, |
|
"grad_norm": 0.5400338768959045, |
|
"learning_rate": 2.8691275167785235e-05, |
|
"loss": 0.7738, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.9416312330885195, |
|
"grad_norm": 0.5067276954650879, |
|
"learning_rate": 2.8649328859060402e-05, |
|
"loss": 0.8046, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.9431774255894859, |
|
"grad_norm": 0.5286040902137756, |
|
"learning_rate": 2.8607382550335572e-05, |
|
"loss": 0.6818, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.9447236180904522, |
|
"grad_norm": 0.5325278043746948, |
|
"learning_rate": 2.856543624161074e-05, |
|
"loss": 0.8112, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.9462698105914187, |
|
"grad_norm": 0.5434727668762207, |
|
"learning_rate": 2.8523489932885905e-05, |
|
"loss": 0.8245, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.947816003092385, |
|
"grad_norm": 0.5526994466781616, |
|
"learning_rate": 2.848154362416108e-05, |
|
"loss": 0.7493, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.9493621955933513, |
|
"grad_norm": 0.5655114054679871, |
|
"learning_rate": 2.8439597315436245e-05, |
|
"loss": 0.7514, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.9509083880943178, |
|
"grad_norm": 0.5636076331138611, |
|
"learning_rate": 2.839765100671141e-05, |
|
"loss": 0.7992, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.9524545805952841, |
|
"grad_norm": 0.5688204169273376, |
|
"learning_rate": 2.8355704697986578e-05, |
|
"loss": 0.7747, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.9540007730962505, |
|
"grad_norm": 0.5534058809280396, |
|
"learning_rate": 2.8313758389261748e-05, |
|
"loss": 0.7511, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.9555469655972169, |
|
"grad_norm": 0.5112160444259644, |
|
"learning_rate": 2.8271812080536915e-05, |
|
"loss": 0.7875, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.9570931580981832, |
|
"grad_norm": 0.553830623626709, |
|
"learning_rate": 2.822986577181208e-05, |
|
"loss": 0.8652, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.9586393505991496, |
|
"grad_norm": 0.5614729523658752, |
|
"learning_rate": 2.8187919463087248e-05, |
|
"loss": 0.8693, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.960185543100116, |
|
"grad_norm": 0.5519264340400696, |
|
"learning_rate": 2.8145973154362414e-05, |
|
"loss": 0.8102, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.9617317356010824, |
|
"grad_norm": 0.5544281601905823, |
|
"learning_rate": 2.8104026845637588e-05, |
|
"loss": 0.8263, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.9632779281020487, |
|
"grad_norm": 0.5747584700584412, |
|
"learning_rate": 2.8062080536912754e-05, |
|
"loss": 0.7718, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.964824120603015, |
|
"grad_norm": 0.5676540732383728, |
|
"learning_rate": 2.802013422818792e-05, |
|
"loss": 0.7616, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.9663703131039815, |
|
"grad_norm": 0.5307291150093079, |
|
"learning_rate": 2.797818791946309e-05, |
|
"loss": 0.8401, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.9679165056049478, |
|
"grad_norm": 0.5527417063713074, |
|
"learning_rate": 2.7936241610738257e-05, |
|
"loss": 0.8818, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.9694626981059142, |
|
"grad_norm": 0.545058012008667, |
|
"learning_rate": 2.7894295302013424e-05, |
|
"loss": 0.8606, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.9710088906068806, |
|
"grad_norm": 0.5928349494934082, |
|
"learning_rate": 2.785234899328859e-05, |
|
"loss": 0.7728, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.9725550831078469, |
|
"grad_norm": 0.5348992943763733, |
|
"learning_rate": 2.7810402684563757e-05, |
|
"loss": 0.8101, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.9741012756088133, |
|
"grad_norm": 0.583490788936615, |
|
"learning_rate": 2.7768456375838923e-05, |
|
"loss": 0.8648, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.9756474681097796, |
|
"grad_norm": 0.6235148906707764, |
|
"learning_rate": 2.7726510067114097e-05, |
|
"loss": 0.8425, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.9771936606107461, |
|
"grad_norm": 0.5755742788314819, |
|
"learning_rate": 2.7684563758389263e-05, |
|
"loss": 0.8765, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.9787398531117124, |
|
"grad_norm": 0.5636020302772522, |
|
"learning_rate": 2.7642617449664433e-05, |
|
"loss": 0.8114, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.9802860456126787, |
|
"grad_norm": 0.5665507316589355, |
|
"learning_rate": 2.76006711409396e-05, |
|
"loss": 0.9008, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.9818322381136452, |
|
"grad_norm": 0.5966489911079407, |
|
"learning_rate": 2.7558724832214766e-05, |
|
"loss": 0.846, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.9833784306146115, |
|
"grad_norm": 0.5960109829902649, |
|
"learning_rate": 2.7516778523489933e-05, |
|
"loss": 0.8962, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.9849246231155779, |
|
"grad_norm": 0.5626753568649292, |
|
"learning_rate": 2.74748322147651e-05, |
|
"loss": 0.9062, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.9864708156165443, |
|
"grad_norm": 0.6764492392539978, |
|
"learning_rate": 2.7432885906040266e-05, |
|
"loss": 0.8961, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.9880170081175106, |
|
"grad_norm": 0.5922832489013672, |
|
"learning_rate": 2.7390939597315436e-05, |
|
"loss": 0.8525, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.989563200618477, |
|
"grad_norm": 0.6102508902549744, |
|
"learning_rate": 2.7348993288590606e-05, |
|
"loss": 0.8887, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.9911093931194433, |
|
"grad_norm": 0.6205296516418457, |
|
"learning_rate": 2.7307046979865776e-05, |
|
"loss": 0.9007, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.9926555856204098, |
|
"grad_norm": 0.6284985542297363, |
|
"learning_rate": 2.7265100671140943e-05, |
|
"loss": 0.9228, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.9942017781213761, |
|
"grad_norm": 0.6276938915252686, |
|
"learning_rate": 2.722315436241611e-05, |
|
"loss": 0.9066, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.9957479706223424, |
|
"grad_norm": 0.6849061250686646, |
|
"learning_rate": 2.7181208053691276e-05, |
|
"loss": 0.9204, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.9972941631233089, |
|
"grad_norm": 0.7061152458190918, |
|
"learning_rate": 2.7139261744966442e-05, |
|
"loss": 0.9711, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.9988403556242752, |
|
"grad_norm": 0.7999619841575623, |
|
"learning_rate": 2.709731543624161e-05, |
|
"loss": 0.9722, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.0007730962504833, |
|
"grad_norm": 1.3448657989501953, |
|
"learning_rate": 2.705536912751678e-05, |
|
"loss": 1.2285, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.0023192887514496, |
|
"grad_norm": 0.4756757318973541, |
|
"learning_rate": 2.701342281879195e-05, |
|
"loss": 0.7208, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.003865481252416, |
|
"grad_norm": 0.495257169008255, |
|
"learning_rate": 2.697147651006712e-05, |
|
"loss": 0.7645, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.0054116737533823, |
|
"grad_norm": 0.5022267699241638, |
|
"learning_rate": 2.6929530201342285e-05, |
|
"loss": 0.7099, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.0069578662543486, |
|
"grad_norm": 0.5082698464393616, |
|
"learning_rate": 2.6887583892617452e-05, |
|
"loss": 0.7451, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.0085040587553151, |
|
"grad_norm": 0.5273095369338989, |
|
"learning_rate": 2.6845637583892618e-05, |
|
"loss": 0.7665, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.0100502512562815, |
|
"grad_norm": 0.5531541109085083, |
|
"learning_rate": 2.6803691275167785e-05, |
|
"loss": 0.7041, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.0115964437572478, |
|
"grad_norm": 0.5290402173995972, |
|
"learning_rate": 2.6761744966442955e-05, |
|
"loss": 0.812, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.013142636258214, |
|
"grad_norm": 0.556932270526886, |
|
"learning_rate": 2.671979865771812e-05, |
|
"loss": 0.7414, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.0146888287591804, |
|
"grad_norm": 0.501987874507904, |
|
"learning_rate": 2.6677852348993288e-05, |
|
"loss": 0.7446, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.016235021260147, |
|
"grad_norm": 0.5225424766540527, |
|
"learning_rate": 2.663590604026846e-05, |
|
"loss": 0.8439, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.0177812137611133, |
|
"grad_norm": 0.5627469420433044, |
|
"learning_rate": 2.6593959731543628e-05, |
|
"loss": 0.6966, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.0193274062620796, |
|
"grad_norm": 0.6055929660797119, |
|
"learning_rate": 2.6552013422818794e-05, |
|
"loss": 0.7859, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.020873598763046, |
|
"grad_norm": 0.6422880291938782, |
|
"learning_rate": 2.651006711409396e-05, |
|
"loss": 0.7733, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.0224197912640123, |
|
"grad_norm": 0.5887860059738159, |
|
"learning_rate": 2.6468120805369128e-05, |
|
"loss": 0.7041, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.0239659837649788, |
|
"grad_norm": 0.5853235125541687, |
|
"learning_rate": 2.6426174496644297e-05, |
|
"loss": 0.7268, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.0255121762659452, |
|
"grad_norm": 0.5295597910881042, |
|
"learning_rate": 2.6384228187919464e-05, |
|
"loss": 0.7482, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.0270583687669115, |
|
"grad_norm": 0.5481401085853577, |
|
"learning_rate": 2.634228187919463e-05, |
|
"loss": 0.7548, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.0286045612678778, |
|
"grad_norm": 0.538827121257782, |
|
"learning_rate": 2.6300335570469797e-05, |
|
"loss": 0.764, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.0301507537688441, |
|
"grad_norm": 0.577368974685669, |
|
"learning_rate": 2.625838926174497e-05, |
|
"loss": 0.6835, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.0316969462698107, |
|
"grad_norm": 0.5659049153327942, |
|
"learning_rate": 2.6216442953020137e-05, |
|
"loss": 0.7264, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.033243138770777, |
|
"grad_norm": 0.5179746150970459, |
|
"learning_rate": 2.6174496644295304e-05, |
|
"loss": 0.7911, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.0347893312717433, |
|
"grad_norm": 0.5719809532165527, |
|
"learning_rate": 2.613255033557047e-05, |
|
"loss": 0.7479, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.0363355237727097, |
|
"grad_norm": 0.5943763852119446, |
|
"learning_rate": 2.609060402684564e-05, |
|
"loss": 0.7802, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.037881716273676, |
|
"grad_norm": 0.5392094850540161, |
|
"learning_rate": 2.6048657718120807e-05, |
|
"loss": 0.7625, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.0394279087746425, |
|
"grad_norm": 0.5679749250411987, |
|
"learning_rate": 2.6006711409395973e-05, |
|
"loss": 0.7911, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.0409741012756089, |
|
"grad_norm": 0.5740141272544861, |
|
"learning_rate": 2.596476510067114e-05, |
|
"loss": 0.7477, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.0425202937765752, |
|
"grad_norm": 0.607397198677063, |
|
"learning_rate": 2.5922818791946306e-05, |
|
"loss": 0.852, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.0440664862775415, |
|
"grad_norm": 0.5706917643547058, |
|
"learning_rate": 2.588087248322148e-05, |
|
"loss": 0.6778, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.0456126787785078, |
|
"grad_norm": 0.5882996320724487, |
|
"learning_rate": 2.5838926174496646e-05, |
|
"loss": 0.7071, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.0471588712794744, |
|
"grad_norm": 0.5978296995162964, |
|
"learning_rate": 2.5796979865771813e-05, |
|
"loss": 0.7593, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.0487050637804407, |
|
"grad_norm": 0.6237056255340576, |
|
"learning_rate": 2.5755033557046983e-05, |
|
"loss": 0.8099, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.050251256281407, |
|
"grad_norm": 0.6156934499740601, |
|
"learning_rate": 2.571308724832215e-05, |
|
"loss": 0.8256, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.0517974487823734, |
|
"grad_norm": 0.6217848062515259, |
|
"learning_rate": 2.5671140939597316e-05, |
|
"loss": 0.8445, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.0533436412833397, |
|
"grad_norm": 0.6550363898277283, |
|
"learning_rate": 2.5629194630872482e-05, |
|
"loss": 0.7931, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.0548898337843062, |
|
"grad_norm": 0.6072224378585815, |
|
"learning_rate": 2.558724832214765e-05, |
|
"loss": 0.7985, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.0564360262852726, |
|
"grad_norm": 0.6478685140609741, |
|
"learning_rate": 2.5545302013422822e-05, |
|
"loss": 0.8055, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.0579822187862389, |
|
"grad_norm": 0.6033689975738525, |
|
"learning_rate": 2.550335570469799e-05, |
|
"loss": 0.9145, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.0595284112872052, |
|
"grad_norm": 0.5850486755371094, |
|
"learning_rate": 2.5461409395973155e-05, |
|
"loss": 0.8129, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.0610746037881715, |
|
"grad_norm": 0.6233928799629211, |
|
"learning_rate": 2.5419463087248325e-05, |
|
"loss": 0.9122, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.062620796289138, |
|
"grad_norm": 0.6058603525161743, |
|
"learning_rate": 2.5377516778523492e-05, |
|
"loss": 0.7968, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.0641669887901044, |
|
"grad_norm": 0.6053382754325867, |
|
"learning_rate": 2.533557046979866e-05, |
|
"loss": 0.8349, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.0657131812910707, |
|
"grad_norm": 0.6354022026062012, |
|
"learning_rate": 2.5293624161073825e-05, |
|
"loss": 0.8879, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.067259373792037, |
|
"grad_norm": 0.6736825108528137, |
|
"learning_rate": 2.525167785234899e-05, |
|
"loss": 0.8317, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.0688055662930034, |
|
"grad_norm": 0.6729496717453003, |
|
"learning_rate": 2.5209731543624158e-05, |
|
"loss": 0.8247, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.07035175879397, |
|
"grad_norm": 0.7000686526298523, |
|
"learning_rate": 2.516778523489933e-05, |
|
"loss": 0.9076, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.0718979512949363, |
|
"grad_norm": 0.7471379041671753, |
|
"learning_rate": 2.5125838926174498e-05, |
|
"loss": 0.861, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.0734441437959026, |
|
"grad_norm": 0.7505892515182495, |
|
"learning_rate": 2.5083892617449668e-05, |
|
"loss": 0.9747, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.074990336296869, |
|
"grad_norm": 0.7872920632362366, |
|
"learning_rate": 2.5041946308724835e-05, |
|
"loss": 0.8623, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.0765365287978352, |
|
"grad_norm": 0.97450190782547, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.8574, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.0780827212988018, |
|
"grad_norm": 0.6834471225738525, |
|
"learning_rate": 2.4958053691275168e-05, |
|
"loss": 0.7284, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.079628913799768, |
|
"grad_norm": 0.5371273756027222, |
|
"learning_rate": 2.4916107382550334e-05, |
|
"loss": 0.6604, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.0811751063007344, |
|
"grad_norm": 0.5436398983001709, |
|
"learning_rate": 2.4874161073825504e-05, |
|
"loss": 0.7106, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.0827212988017008, |
|
"grad_norm": 0.5626257061958313, |
|
"learning_rate": 2.4832214765100674e-05, |
|
"loss": 0.7199, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.084267491302667, |
|
"grad_norm": 0.5859701037406921, |
|
"learning_rate": 2.479026845637584e-05, |
|
"loss": 0.727, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.0858136838036336, |
|
"grad_norm": 0.5619367957115173, |
|
"learning_rate": 2.4748322147651007e-05, |
|
"loss": 0.7721, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.0873598763046, |
|
"grad_norm": 0.5503877997398376, |
|
"learning_rate": 2.4706375838926177e-05, |
|
"loss": 0.6971, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.0889060688055663, |
|
"grad_norm": 0.5587684512138367, |
|
"learning_rate": 2.4664429530201344e-05, |
|
"loss": 0.7868, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.0904522613065326, |
|
"grad_norm": 0.5771764516830444, |
|
"learning_rate": 2.462248322147651e-05, |
|
"loss": 0.7831, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.091998453807499, |
|
"grad_norm": 0.5702334046363831, |
|
"learning_rate": 2.4580536912751677e-05, |
|
"loss": 0.6751, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.0935446463084655, |
|
"grad_norm": 0.5503116250038147, |
|
"learning_rate": 2.4538590604026847e-05, |
|
"loss": 0.7732, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 1.0950908388094318, |
|
"grad_norm": 0.5595600008964539, |
|
"learning_rate": 2.4496644295302017e-05, |
|
"loss": 0.7168, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.0966370313103981, |
|
"grad_norm": 0.5581438541412354, |
|
"learning_rate": 2.4454697986577183e-05, |
|
"loss": 0.7462, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.0981832238113645, |
|
"grad_norm": 0.5454738736152649, |
|
"learning_rate": 2.441275167785235e-05, |
|
"loss": 0.8809, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.0997294163123308, |
|
"grad_norm": 0.5763128399848938, |
|
"learning_rate": 2.4370805369127517e-05, |
|
"loss": 0.6942, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.1012756088132973, |
|
"grad_norm": 0.5813524127006531, |
|
"learning_rate": 2.4328859060402687e-05, |
|
"loss": 0.6836, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.1028218013142637, |
|
"grad_norm": 0.5554409623146057, |
|
"learning_rate": 2.4286912751677853e-05, |
|
"loss": 0.7616, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 1.10436799381523, |
|
"grad_norm": 0.5576358437538147, |
|
"learning_rate": 2.424496644295302e-05, |
|
"loss": 0.7835, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.1059141863161963, |
|
"grad_norm": 0.5760726928710938, |
|
"learning_rate": 2.420302013422819e-05, |
|
"loss": 0.7866, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.1074603788171626, |
|
"grad_norm": 0.5910109877586365, |
|
"learning_rate": 2.416107382550336e-05, |
|
"loss": 0.7893, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.1090065713181292, |
|
"grad_norm": 0.6322896480560303, |
|
"learning_rate": 2.4119127516778526e-05, |
|
"loss": 0.7235, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.1105527638190955, |
|
"grad_norm": 0.5939295887947083, |
|
"learning_rate": 2.4077181208053693e-05, |
|
"loss": 0.7816, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.1120989563200618, |
|
"grad_norm": 0.5953226089477539, |
|
"learning_rate": 2.403523489932886e-05, |
|
"loss": 0.7444, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 1.1136451488210282, |
|
"grad_norm": 0.60584956407547, |
|
"learning_rate": 2.3993288590604026e-05, |
|
"loss": 0.7525, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.1151913413219945, |
|
"grad_norm": 0.6163296699523926, |
|
"learning_rate": 2.3951342281879196e-05, |
|
"loss": 0.772, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 1.116737533822961, |
|
"grad_norm": 0.5978072881698608, |
|
"learning_rate": 2.3909395973154362e-05, |
|
"loss": 0.7421, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 1.1182837263239274, |
|
"grad_norm": 0.5980417132377625, |
|
"learning_rate": 2.3867449664429532e-05, |
|
"loss": 0.7942, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 1.1198299188248937, |
|
"grad_norm": 0.6046664118766785, |
|
"learning_rate": 2.38255033557047e-05, |
|
"loss": 0.8131, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.12137611132586, |
|
"grad_norm": 0.5973670482635498, |
|
"learning_rate": 2.378355704697987e-05, |
|
"loss": 0.7857, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.1229223038268263, |
|
"grad_norm": 0.6115438342094421, |
|
"learning_rate": 2.3741610738255035e-05, |
|
"loss": 0.7942, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.1244684963277929, |
|
"grad_norm": 0.5946105718612671, |
|
"learning_rate": 2.3699664429530202e-05, |
|
"loss": 0.8088, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 1.1260146888287592, |
|
"grad_norm": 0.6095959544181824, |
|
"learning_rate": 2.365771812080537e-05, |
|
"loss": 0.8034, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.1275608813297255, |
|
"grad_norm": 0.6506893634796143, |
|
"learning_rate": 2.361577181208054e-05, |
|
"loss": 0.8469, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 1.1291070738306919, |
|
"grad_norm": 0.6179336309432983, |
|
"learning_rate": 2.3573825503355705e-05, |
|
"loss": 0.7918, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.1306532663316582, |
|
"grad_norm": 0.6253457069396973, |
|
"learning_rate": 2.3531879194630875e-05, |
|
"loss": 0.845, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 1.1321994588326247, |
|
"grad_norm": 0.6577156186103821, |
|
"learning_rate": 2.348993288590604e-05, |
|
"loss": 0.7475, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 1.133745651333591, |
|
"grad_norm": 0.6993891596794128, |
|
"learning_rate": 2.3447986577181208e-05, |
|
"loss": 0.7988, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 1.1352918438345574, |
|
"grad_norm": 0.6977733373641968, |
|
"learning_rate": 2.3406040268456378e-05, |
|
"loss": 0.7899, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 1.1368380363355237, |
|
"grad_norm": 0.6664114594459534, |
|
"learning_rate": 2.3364093959731545e-05, |
|
"loss": 0.8609, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.13838422883649, |
|
"grad_norm": 0.6565979719161987, |
|
"learning_rate": 2.332214765100671e-05, |
|
"loss": 0.8592, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.1399304213374566, |
|
"grad_norm": 0.6794628500938416, |
|
"learning_rate": 2.3280201342281878e-05, |
|
"loss": 0.8709, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 1.141476613838423, |
|
"grad_norm": 0.6833226084709167, |
|
"learning_rate": 2.3238255033557048e-05, |
|
"loss": 0.8121, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 1.1430228063393892, |
|
"grad_norm": 0.6899168491363525, |
|
"learning_rate": 2.3196308724832218e-05, |
|
"loss": 0.8362, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 1.1445689988403556, |
|
"grad_norm": 0.7028947472572327, |
|
"learning_rate": 2.3154362416107384e-05, |
|
"loss": 0.7995, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.146115191341322, |
|
"grad_norm": 0.7305999994277954, |
|
"learning_rate": 2.311241610738255e-05, |
|
"loss": 0.832, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 1.1476613838422884, |
|
"grad_norm": 0.7404617667198181, |
|
"learning_rate": 2.3070469798657717e-05, |
|
"loss": 0.9275, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 1.1492075763432548, |
|
"grad_norm": 0.7118339538574219, |
|
"learning_rate": 2.3028523489932887e-05, |
|
"loss": 0.8852, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 1.150753768844221, |
|
"grad_norm": 0.8329970240592957, |
|
"learning_rate": 2.2986577181208054e-05, |
|
"loss": 0.8482, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 1.1522999613451874, |
|
"grad_norm": 0.8370991945266724, |
|
"learning_rate": 2.2944630872483224e-05, |
|
"loss": 1.0003, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.1538461538461537, |
|
"grad_norm": 1.0701762437820435, |
|
"learning_rate": 2.290268456375839e-05, |
|
"loss": 1.0102, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 1.1553923463471203, |
|
"grad_norm": 0.7740350961685181, |
|
"learning_rate": 2.286073825503356e-05, |
|
"loss": 0.617, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 1.1569385388480866, |
|
"grad_norm": 0.5996577739715576, |
|
"learning_rate": 2.2818791946308727e-05, |
|
"loss": 0.7698, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.158484731349053, |
|
"grad_norm": 0.597896933555603, |
|
"learning_rate": 2.2776845637583893e-05, |
|
"loss": 0.7018, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 1.1600309238500193, |
|
"grad_norm": 0.5840704441070557, |
|
"learning_rate": 2.273489932885906e-05, |
|
"loss": 0.7634, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.1615771163509856, |
|
"grad_norm": 0.5987460613250732, |
|
"learning_rate": 2.269295302013423e-05, |
|
"loss": 0.7973, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 1.1631233088519521, |
|
"grad_norm": 0.5817953944206238, |
|
"learning_rate": 2.2651006711409396e-05, |
|
"loss": 0.7703, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 1.1646695013529185, |
|
"grad_norm": 0.5792746543884277, |
|
"learning_rate": 2.2609060402684566e-05, |
|
"loss": 0.7425, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 1.1662156938538848, |
|
"grad_norm": 0.5840611457824707, |
|
"learning_rate": 2.2567114093959733e-05, |
|
"loss": 0.715, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 1.1677618863548511, |
|
"grad_norm": 0.534000039100647, |
|
"learning_rate": 2.25251677852349e-05, |
|
"loss": 0.7764, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.1693080788558174, |
|
"grad_norm": 0.6020839214324951, |
|
"learning_rate": 2.248322147651007e-05, |
|
"loss": 0.7502, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 1.170854271356784, |
|
"grad_norm": 0.5764395594596863, |
|
"learning_rate": 2.2441275167785236e-05, |
|
"loss": 0.7298, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 1.1724004638577503, |
|
"grad_norm": 0.5840582251548767, |
|
"learning_rate": 2.2399328859060403e-05, |
|
"loss": 0.7836, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 1.1739466563587166, |
|
"grad_norm": 0.5759351253509521, |
|
"learning_rate": 2.235738255033557e-05, |
|
"loss": 0.7646, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 1.175492848859683, |
|
"grad_norm": 0.5777841806411743, |
|
"learning_rate": 2.231543624161074e-05, |
|
"loss": 0.8351, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.1770390413606493, |
|
"grad_norm": 0.6419858932495117, |
|
"learning_rate": 2.227348993288591e-05, |
|
"loss": 0.6833, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 1.1785852338616158, |
|
"grad_norm": 0.5851649045944214, |
|
"learning_rate": 2.2231543624161076e-05, |
|
"loss": 0.7559, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 1.1801314263625822, |
|
"grad_norm": 0.6243789196014404, |
|
"learning_rate": 2.2189597315436242e-05, |
|
"loss": 0.6587, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 1.1816776188635485, |
|
"grad_norm": 0.6107107996940613, |
|
"learning_rate": 2.2147651006711412e-05, |
|
"loss": 0.7724, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 1.1832238113645148, |
|
"grad_norm": 0.6164106130599976, |
|
"learning_rate": 2.210570469798658e-05, |
|
"loss": 0.6949, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.1847700038654811, |
|
"grad_norm": 0.6295919418334961, |
|
"learning_rate": 2.2063758389261745e-05, |
|
"loss": 0.7239, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 1.1863161963664477, |
|
"grad_norm": 0.582129955291748, |
|
"learning_rate": 2.2021812080536912e-05, |
|
"loss": 0.7619, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 1.187862388867414, |
|
"grad_norm": 0.639700174331665, |
|
"learning_rate": 2.197986577181208e-05, |
|
"loss": 0.7788, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.1894085813683803, |
|
"grad_norm": 0.6275160908699036, |
|
"learning_rate": 2.193791946308725e-05, |
|
"loss": 0.7457, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 1.1909547738693467, |
|
"grad_norm": 0.5969827175140381, |
|
"learning_rate": 2.1895973154362418e-05, |
|
"loss": 0.7981, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.192500966370313, |
|
"grad_norm": 0.5809218883514404, |
|
"learning_rate": 2.1854026845637585e-05, |
|
"loss": 0.8194, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 1.1940471588712795, |
|
"grad_norm": 0.5926761627197266, |
|
"learning_rate": 2.181208053691275e-05, |
|
"loss": 0.7808, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 1.1955933513722459, |
|
"grad_norm": 0.6315684914588928, |
|
"learning_rate": 2.177013422818792e-05, |
|
"loss": 0.8342, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 1.1971395438732122, |
|
"grad_norm": 0.5824242830276489, |
|
"learning_rate": 2.1728187919463088e-05, |
|
"loss": 0.8041, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 1.1986857363741785, |
|
"grad_norm": 0.6280264258384705, |
|
"learning_rate": 2.1686241610738254e-05, |
|
"loss": 0.7976, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.2002319288751448, |
|
"grad_norm": 0.6388978958129883, |
|
"learning_rate": 2.1644295302013424e-05, |
|
"loss": 0.7253, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 1.2017781213761114, |
|
"grad_norm": 0.6550062894821167, |
|
"learning_rate": 2.160234899328859e-05, |
|
"loss": 0.7531, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 1.2033243138770777, |
|
"grad_norm": 0.6625634431838989, |
|
"learning_rate": 2.156040268456376e-05, |
|
"loss": 0.7869, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 1.204870506378044, |
|
"grad_norm": 0.6421430706977844, |
|
"learning_rate": 2.1518456375838927e-05, |
|
"loss": 0.8198, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 1.2064166988790104, |
|
"grad_norm": 0.6213207840919495, |
|
"learning_rate": 2.1476510067114094e-05, |
|
"loss": 0.8565, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.2079628913799767, |
|
"grad_norm": 0.6527850031852722, |
|
"learning_rate": 2.143456375838926e-05, |
|
"loss": 0.8084, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 1.2095090838809432, |
|
"grad_norm": 0.6330167651176453, |
|
"learning_rate": 2.139261744966443e-05, |
|
"loss": 0.8062, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.2110552763819096, |
|
"grad_norm": 0.6649383902549744, |
|
"learning_rate": 2.13506711409396e-05, |
|
"loss": 0.7586, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 1.212601468882876, |
|
"grad_norm": 0.6256992220878601, |
|
"learning_rate": 2.1308724832214767e-05, |
|
"loss": 0.7785, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.2141476613838422, |
|
"grad_norm": 0.622163712978363, |
|
"learning_rate": 2.1266778523489934e-05, |
|
"loss": 0.9157, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.2156938538848086, |
|
"grad_norm": 0.6411583423614502, |
|
"learning_rate": 2.1224832214765103e-05, |
|
"loss": 0.8916, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 1.217240046385775, |
|
"grad_norm": 0.6612896919250488, |
|
"learning_rate": 2.118288590604027e-05, |
|
"loss": 0.8234, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 1.2187862388867414, |
|
"grad_norm": 0.7050024271011353, |
|
"learning_rate": 2.1140939597315437e-05, |
|
"loss": 0.9097, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 1.2203324313877078, |
|
"grad_norm": 0.7046983242034912, |
|
"learning_rate": 2.1098993288590603e-05, |
|
"loss": 0.8503, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 1.221878623888674, |
|
"grad_norm": 0.7282384634017944, |
|
"learning_rate": 2.1057046979865773e-05, |
|
"loss": 0.8317, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.2234248163896404, |
|
"grad_norm": 0.7514353394508362, |
|
"learning_rate": 2.1015100671140943e-05, |
|
"loss": 0.8562, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 1.224971008890607, |
|
"grad_norm": 0.6923422813415527, |
|
"learning_rate": 2.097315436241611e-05, |
|
"loss": 0.8234, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.2265172013915733, |
|
"grad_norm": 0.7773630023002625, |
|
"learning_rate": 2.0931208053691276e-05, |
|
"loss": 0.8988, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 1.2280633938925396, |
|
"grad_norm": 0.8075311183929443, |
|
"learning_rate": 2.0889261744966443e-05, |
|
"loss": 0.8421, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 1.229609586393506, |
|
"grad_norm": 0.8537722826004028, |
|
"learning_rate": 2.0847315436241613e-05, |
|
"loss": 0.835, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.2311557788944723, |
|
"grad_norm": 1.1471562385559082, |
|
"learning_rate": 2.080536912751678e-05, |
|
"loss": 0.9985, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 1.2327019713954388, |
|
"grad_norm": 0.8220142126083374, |
|
"learning_rate": 2.0763422818791946e-05, |
|
"loss": 0.6295, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 1.2342481638964051, |
|
"grad_norm": 0.6230762600898743, |
|
"learning_rate": 2.0721476510067116e-05, |
|
"loss": 0.6921, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 1.2357943563973715, |
|
"grad_norm": 0.596222996711731, |
|
"learning_rate": 2.0679530201342286e-05, |
|
"loss": 0.7503, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 1.2373405488983378, |
|
"grad_norm": 0.5620123744010925, |
|
"learning_rate": 2.0637583892617452e-05, |
|
"loss": 0.6849, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.238886741399304, |
|
"grad_norm": 0.5710748434066772, |
|
"learning_rate": 2.059563758389262e-05, |
|
"loss": 0.7431, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 1.2404329339002707, |
|
"grad_norm": 0.584814727306366, |
|
"learning_rate": 2.0553691275167785e-05, |
|
"loss": 0.721, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 1.241979126401237, |
|
"grad_norm": 0.576964795589447, |
|
"learning_rate": 2.0511744966442952e-05, |
|
"loss": 0.6897, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 1.2435253189022033, |
|
"grad_norm": 0.6063031554222107, |
|
"learning_rate": 2.0469798657718122e-05, |
|
"loss": 0.7516, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 1.2450715114031696, |
|
"grad_norm": 0.596976101398468, |
|
"learning_rate": 2.042785234899329e-05, |
|
"loss": 0.7488, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.246617703904136, |
|
"grad_norm": 0.6499916911125183, |
|
"learning_rate": 2.038590604026846e-05, |
|
"loss": 0.7554, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 1.2481638964051025, |
|
"grad_norm": 0.5964358448982239, |
|
"learning_rate": 2.0343959731543625e-05, |
|
"loss": 0.7417, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 1.2497100889060688, |
|
"grad_norm": 0.5935381650924683, |
|
"learning_rate": 2.0302013422818795e-05, |
|
"loss": 0.7308, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 1.2512562814070352, |
|
"grad_norm": 0.6293373703956604, |
|
"learning_rate": 2.026006711409396e-05, |
|
"loss": 0.734, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 1.2528024739080015, |
|
"grad_norm": 0.592042088508606, |
|
"learning_rate": 2.0218120805369128e-05, |
|
"loss": 0.7412, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.2543486664089678, |
|
"grad_norm": 0.5861983895301819, |
|
"learning_rate": 2.0176174496644295e-05, |
|
"loss": 0.8101, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 1.2558948589099344, |
|
"grad_norm": 0.6101320385932922, |
|
"learning_rate": 2.013422818791946e-05, |
|
"loss": 0.7394, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 1.2574410514109007, |
|
"grad_norm": 0.6127662062644958, |
|
"learning_rate": 2.009228187919463e-05, |
|
"loss": 0.7941, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 1.258987243911867, |
|
"grad_norm": 0.5885155200958252, |
|
"learning_rate": 2.00503355704698e-05, |
|
"loss": 0.7987, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 1.2605334364128333, |
|
"grad_norm": 0.6043751835823059, |
|
"learning_rate": 2.0008389261744968e-05, |
|
"loss": 0.7918, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.2620796289137997, |
|
"grad_norm": 0.6351797580718994, |
|
"learning_rate": 1.9966442953020134e-05, |
|
"loss": 0.7218, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 1.2636258214147662, |
|
"grad_norm": 0.6218336224555969, |
|
"learning_rate": 1.9924496644295304e-05, |
|
"loss": 0.8294, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 1.2651720139157325, |
|
"grad_norm": 0.6148021817207336, |
|
"learning_rate": 1.988255033557047e-05, |
|
"loss": 0.7767, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 1.2667182064166989, |
|
"grad_norm": 0.6303946375846863, |
|
"learning_rate": 1.9840604026845637e-05, |
|
"loss": 0.7816, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 1.2682643989176652, |
|
"grad_norm": 0.6076005101203918, |
|
"learning_rate": 1.9798657718120804e-05, |
|
"loss": 0.8023, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.2698105914186315, |
|
"grad_norm": 0.6453294157981873, |
|
"learning_rate": 1.9756711409395974e-05, |
|
"loss": 0.7728, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 1.271356783919598, |
|
"grad_norm": 0.6279742121696472, |
|
"learning_rate": 1.9714765100671144e-05, |
|
"loss": 0.7564, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 1.2729029764205644, |
|
"grad_norm": 0.6434690356254578, |
|
"learning_rate": 1.967281879194631e-05, |
|
"loss": 0.7989, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 1.2744491689215307, |
|
"grad_norm": 0.6816707253456116, |
|
"learning_rate": 1.9630872483221477e-05, |
|
"loss": 0.7964, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 1.275995361422497, |
|
"grad_norm": 0.6387498378753662, |
|
"learning_rate": 1.9588926174496643e-05, |
|
"loss": 0.7621, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.2775415539234634, |
|
"grad_norm": 0.6609524488449097, |
|
"learning_rate": 1.9546979865771813e-05, |
|
"loss": 0.8252, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 1.27908774642443, |
|
"grad_norm": 0.6324920654296875, |
|
"learning_rate": 1.950503355704698e-05, |
|
"loss": 0.7502, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 1.2806339389253962, |
|
"grad_norm": 0.6569236516952515, |
|
"learning_rate": 1.946308724832215e-05, |
|
"loss": 0.8148, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 1.2821801314263626, |
|
"grad_norm": 0.625497043132782, |
|
"learning_rate": 1.9421140939597316e-05, |
|
"loss": 0.8663, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 1.2837263239273289, |
|
"grad_norm": 0.6833832263946533, |
|
"learning_rate": 1.9379194630872486e-05, |
|
"loss": 0.692, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.2852725164282952, |
|
"grad_norm": 0.6534375548362732, |
|
"learning_rate": 1.9337248322147653e-05, |
|
"loss": 0.7731, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 1.2868187089292618, |
|
"grad_norm": 0.6668411493301392, |
|
"learning_rate": 1.929530201342282e-05, |
|
"loss": 0.758, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 1.288364901430228, |
|
"grad_norm": 0.6552398204803467, |
|
"learning_rate": 1.9253355704697986e-05, |
|
"loss": 0.8387, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 1.2899110939311944, |
|
"grad_norm": 0.6804989576339722, |
|
"learning_rate": 1.9211409395973153e-05, |
|
"loss": 0.823, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 1.2914572864321607, |
|
"grad_norm": 0.6738516092300415, |
|
"learning_rate": 1.9169463087248323e-05, |
|
"loss": 0.8759, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.293003478933127, |
|
"grad_norm": 0.6957226991653442, |
|
"learning_rate": 1.9127516778523493e-05, |
|
"loss": 0.8465, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.2945496714340936, |
|
"grad_norm": 0.7255749106407166, |
|
"learning_rate": 1.908557046979866e-05, |
|
"loss": 0.7872, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 1.29609586393506, |
|
"grad_norm": 0.7283822894096375, |
|
"learning_rate": 1.9043624161073826e-05, |
|
"loss": 0.8759, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 1.2976420564360263, |
|
"grad_norm": 0.7126689553260803, |
|
"learning_rate": 1.9001677852348996e-05, |
|
"loss": 0.8252, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 1.2991882489369926, |
|
"grad_norm": 0.7381671071052551, |
|
"learning_rate": 1.8959731543624162e-05, |
|
"loss": 0.9227, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.300734441437959, |
|
"grad_norm": 0.721825897693634, |
|
"learning_rate": 1.891778523489933e-05, |
|
"loss": 0.9147, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 1.3022806339389255, |
|
"grad_norm": 0.719792902469635, |
|
"learning_rate": 1.8875838926174495e-05, |
|
"loss": 0.9038, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 1.3038268264398918, |
|
"grad_norm": 0.7600436806678772, |
|
"learning_rate": 1.8833892617449665e-05, |
|
"loss": 0.903, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 1.305373018940858, |
|
"grad_norm": 0.743240475654602, |
|
"learning_rate": 1.8791946308724835e-05, |
|
"loss": 0.8799, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 1.3069192114418244, |
|
"grad_norm": 0.8364670872688293, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 0.9059, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.3084654039427908, |
|
"grad_norm": 0.9684616327285767, |
|
"learning_rate": 1.870805369127517e-05, |
|
"loss": 0.9468, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 1.3100115964437573, |
|
"grad_norm": 0.7947605848312378, |
|
"learning_rate": 1.8666107382550335e-05, |
|
"loss": 0.6681, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 1.3115577889447236, |
|
"grad_norm": 0.5737844109535217, |
|
"learning_rate": 1.8624161073825505e-05, |
|
"loss": 0.6837, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 1.31310398144569, |
|
"grad_norm": 0.5890055298805237, |
|
"learning_rate": 1.858221476510067e-05, |
|
"loss": 0.7115, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 1.3146501739466563, |
|
"grad_norm": 0.6102372407913208, |
|
"learning_rate": 1.8540268456375838e-05, |
|
"loss": 0.7131, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.3161963664476226, |
|
"grad_norm": 0.5825210809707642, |
|
"learning_rate": 1.8498322147651008e-05, |
|
"loss": 0.6873, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 1.3177425589485892, |
|
"grad_norm": 0.5857095718383789, |
|
"learning_rate": 1.8456375838926178e-05, |
|
"loss": 0.7435, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 1.3192887514495555, |
|
"grad_norm": 0.60005122423172, |
|
"learning_rate": 1.8414429530201344e-05, |
|
"loss": 0.6988, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 1.3208349439505218, |
|
"grad_norm": 0.6294332146644592, |
|
"learning_rate": 1.837248322147651e-05, |
|
"loss": 0.7796, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 1.3223811364514881, |
|
"grad_norm": 0.6006381511688232, |
|
"learning_rate": 1.8330536912751678e-05, |
|
"loss": 0.7677, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.3239273289524545, |
|
"grad_norm": 0.5754826664924622, |
|
"learning_rate": 1.8288590604026847e-05, |
|
"loss": 0.7845, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 1.325473521453421, |
|
"grad_norm": 0.6118499040603638, |
|
"learning_rate": 1.8246644295302014e-05, |
|
"loss": 0.6819, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 1.3270197139543873, |
|
"grad_norm": 0.5924245119094849, |
|
"learning_rate": 1.820469798657718e-05, |
|
"loss": 0.8092, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 1.3285659064553537, |
|
"grad_norm": 0.6202556490898132, |
|
"learning_rate": 1.816275167785235e-05, |
|
"loss": 0.7992, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 1.33011209895632, |
|
"grad_norm": 0.6356550455093384, |
|
"learning_rate": 1.8120805369127517e-05, |
|
"loss": 0.7731, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.3316582914572863, |
|
"grad_norm": 0.6205728650093079, |
|
"learning_rate": 1.8078859060402687e-05, |
|
"loss": 0.7951, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 1.3332044839582529, |
|
"grad_norm": 0.6306980848312378, |
|
"learning_rate": 1.8036912751677854e-05, |
|
"loss": 0.7904, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 1.3347506764592192, |
|
"grad_norm": 0.5935050249099731, |
|
"learning_rate": 1.799496644295302e-05, |
|
"loss": 0.8029, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 1.3362968689601855, |
|
"grad_norm": 0.6275442838668823, |
|
"learning_rate": 1.7953020134228187e-05, |
|
"loss": 0.7376, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 1.3378430614611518, |
|
"grad_norm": 0.6528346538543701, |
|
"learning_rate": 1.7911073825503357e-05, |
|
"loss": 0.7286, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.3393892539621182, |
|
"grad_norm": 0.6417142748832703, |
|
"learning_rate": 1.7869127516778523e-05, |
|
"loss": 0.7632, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 1.3409354464630847, |
|
"grad_norm": 0.5968343615531921, |
|
"learning_rate": 1.7827181208053693e-05, |
|
"loss": 0.8156, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 1.342481638964051, |
|
"grad_norm": 0.5820614695549011, |
|
"learning_rate": 1.778523489932886e-05, |
|
"loss": 0.7957, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 1.3440278314650174, |
|
"grad_norm": 0.576564610004425, |
|
"learning_rate": 1.7743288590604026e-05, |
|
"loss": 0.7799, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 1.3455740239659837, |
|
"grad_norm": 0.6096657514572144, |
|
"learning_rate": 1.7701342281879196e-05, |
|
"loss": 0.7929, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.34712021646695, |
|
"grad_norm": 0.6413806080818176, |
|
"learning_rate": 1.7659395973154363e-05, |
|
"loss": 0.7468, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 1.3486664089679166, |
|
"grad_norm": 0.6387087106704712, |
|
"learning_rate": 1.761744966442953e-05, |
|
"loss": 0.7342, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 1.350212601468883, |
|
"grad_norm": 0.6342037320137024, |
|
"learning_rate": 1.75755033557047e-05, |
|
"loss": 0.829, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 1.3517587939698492, |
|
"grad_norm": 0.6201750636100769, |
|
"learning_rate": 1.753355704697987e-05, |
|
"loss": 0.7289, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 1.3533049864708155, |
|
"grad_norm": 0.6321792006492615, |
|
"learning_rate": 1.7491610738255036e-05, |
|
"loss": 0.7253, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.3548511789717819, |
|
"grad_norm": 0.6561968922615051, |
|
"learning_rate": 1.7449664429530202e-05, |
|
"loss": 0.7157, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 1.3563973714727484, |
|
"grad_norm": 0.6112854480743408, |
|
"learning_rate": 1.740771812080537e-05, |
|
"loss": 0.7421, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 1.3579435639737147, |
|
"grad_norm": 0.6668578386306763, |
|
"learning_rate": 1.736577181208054e-05, |
|
"loss": 0.7849, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 1.359489756474681, |
|
"grad_norm": 0.6496410965919495, |
|
"learning_rate": 1.7323825503355705e-05, |
|
"loss": 0.8031, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 1.3610359489756474, |
|
"grad_norm": 0.6386198401451111, |
|
"learning_rate": 1.7281879194630872e-05, |
|
"loss": 0.8812, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.3625821414766137, |
|
"grad_norm": 0.6844744682312012, |
|
"learning_rate": 1.7239932885906042e-05, |
|
"loss": 0.8245, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 1.3641283339775803, |
|
"grad_norm": 0.6815952658653259, |
|
"learning_rate": 1.719798657718121e-05, |
|
"loss": 0.8751, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 1.3656745264785466, |
|
"grad_norm": 0.6746396422386169, |
|
"learning_rate": 1.715604026845638e-05, |
|
"loss": 0.812, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 1.367220718979513, |
|
"grad_norm": 0.6984376907348633, |
|
"learning_rate": 1.7114093959731545e-05, |
|
"loss": 0.8418, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 1.3687669114804792, |
|
"grad_norm": 0.6949034929275513, |
|
"learning_rate": 1.707214765100671e-05, |
|
"loss": 0.8032, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.3703131039814456, |
|
"grad_norm": 0.6689850091934204, |
|
"learning_rate": 1.7030201342281878e-05, |
|
"loss": 0.8363, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 1.3718592964824121, |
|
"grad_norm": 0.6734853386878967, |
|
"learning_rate": 1.6988255033557048e-05, |
|
"loss": 0.8643, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 1.3734054889833784, |
|
"grad_norm": 0.7207046747207642, |
|
"learning_rate": 1.6946308724832215e-05, |
|
"loss": 0.853, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 1.3749516814843448, |
|
"grad_norm": 0.7100691199302673, |
|
"learning_rate": 1.6904362416107385e-05, |
|
"loss": 0.8776, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 1.376497873985311, |
|
"grad_norm": 0.7267347574234009, |
|
"learning_rate": 1.686241610738255e-05, |
|
"loss": 0.817, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.3780440664862774, |
|
"grad_norm": 0.7453994154930115, |
|
"learning_rate": 1.6820469798657718e-05, |
|
"loss": 0.7864, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 1.379590258987244, |
|
"grad_norm": 0.7423779368400574, |
|
"learning_rate": 1.6778523489932888e-05, |
|
"loss": 0.8475, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 1.3811364514882103, |
|
"grad_norm": 0.7435901165008545, |
|
"learning_rate": 1.6736577181208054e-05, |
|
"loss": 0.8409, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 1.3826826439891766, |
|
"grad_norm": 0.8185101747512817, |
|
"learning_rate": 1.669463087248322e-05, |
|
"loss": 0.9255, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 1.384228836490143, |
|
"grad_norm": 0.8886857628822327, |
|
"learning_rate": 1.6652684563758387e-05, |
|
"loss": 0.9095, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 1.3857750289911093, |
|
"grad_norm": 1.2426859140396118, |
|
"learning_rate": 1.6610738255033557e-05, |
|
"loss": 0.9899, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 1.3873212214920758, |
|
"grad_norm": 0.7389244437217712, |
|
"learning_rate": 1.6568791946308727e-05, |
|
"loss": 0.5189, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 1.3888674139930421, |
|
"grad_norm": 0.5934030413627625, |
|
"learning_rate": 1.6526845637583894e-05, |
|
"loss": 0.6868, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 1.3904136064940085, |
|
"grad_norm": 0.5986191630363464, |
|
"learning_rate": 1.648489932885906e-05, |
|
"loss": 0.7075, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 1.3919597989949748, |
|
"grad_norm": 0.6108096241950989, |
|
"learning_rate": 1.644295302013423e-05, |
|
"loss": 0.6855, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.3935059914959411, |
|
"grad_norm": 0.5924285650253296, |
|
"learning_rate": 1.6401006711409397e-05, |
|
"loss": 0.7459, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 1.3950521839969077, |
|
"grad_norm": 0.627100944519043, |
|
"learning_rate": 1.6359060402684563e-05, |
|
"loss": 0.71, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 1.396598376497874, |
|
"grad_norm": 0.6151503920555115, |
|
"learning_rate": 1.631711409395973e-05, |
|
"loss": 0.6844, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 1.3981445689988403, |
|
"grad_norm": 0.6181209683418274, |
|
"learning_rate": 1.62751677852349e-05, |
|
"loss": 0.7635, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 1.3996907614998066, |
|
"grad_norm": 0.5866500735282898, |
|
"learning_rate": 1.623322147651007e-05, |
|
"loss": 0.742, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 1.401236954000773, |
|
"grad_norm": 0.6311929225921631, |
|
"learning_rate": 1.6191275167785237e-05, |
|
"loss": 0.734, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 1.4027831465017395, |
|
"grad_norm": 0.6104720830917358, |
|
"learning_rate": 1.6149328859060403e-05, |
|
"loss": 0.7187, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 1.4043293390027058, |
|
"grad_norm": 0.6044167280197144, |
|
"learning_rate": 1.610738255033557e-05, |
|
"loss": 0.717, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 1.4058755315036722, |
|
"grad_norm": 0.5984567999839783, |
|
"learning_rate": 1.606543624161074e-05, |
|
"loss": 0.8021, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 1.4074217240046385, |
|
"grad_norm": 0.6355494260787964, |
|
"learning_rate": 1.6023489932885906e-05, |
|
"loss": 0.8507, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.4089679165056048, |
|
"grad_norm": 0.6177673935890198, |
|
"learning_rate": 1.5981543624161076e-05, |
|
"loss": 0.7205, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 1.4105141090065714, |
|
"grad_norm": 0.6486150026321411, |
|
"learning_rate": 1.5939597315436243e-05, |
|
"loss": 0.8078, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 1.4120603015075377, |
|
"grad_norm": 0.6070188879966736, |
|
"learning_rate": 1.5897651006711413e-05, |
|
"loss": 0.7067, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 1.413606494008504, |
|
"grad_norm": 0.6363996267318726, |
|
"learning_rate": 1.585570469798658e-05, |
|
"loss": 0.7621, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 1.4151526865094703, |
|
"grad_norm": 0.6805376410484314, |
|
"learning_rate": 1.5813758389261746e-05, |
|
"loss": 0.7354, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 1.4166988790104367, |
|
"grad_norm": 0.6029914617538452, |
|
"learning_rate": 1.5771812080536912e-05, |
|
"loss": 0.7425, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 1.4182450715114032, |
|
"grad_norm": 0.6197159290313721, |
|
"learning_rate": 1.572986577181208e-05, |
|
"loss": 0.769, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 1.4197912640123695, |
|
"grad_norm": 0.6450138092041016, |
|
"learning_rate": 1.568791946308725e-05, |
|
"loss": 0.7455, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 1.4213374565133359, |
|
"grad_norm": 0.6024583578109741, |
|
"learning_rate": 1.564597315436242e-05, |
|
"loss": 0.783, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 1.4228836490143022, |
|
"grad_norm": 0.6291201114654541, |
|
"learning_rate": 1.5604026845637585e-05, |
|
"loss": 0.7888, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.4244298415152685, |
|
"grad_norm": 0.6113951802253723, |
|
"learning_rate": 1.5562080536912752e-05, |
|
"loss": 0.7682, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 1.425976034016235, |
|
"grad_norm": 0.6421045064926147, |
|
"learning_rate": 1.5520134228187922e-05, |
|
"loss": 0.7691, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 1.4275222265172014, |
|
"grad_norm": 0.6352230906486511, |
|
"learning_rate": 1.547818791946309e-05, |
|
"loss": 0.7902, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 1.4290684190181677, |
|
"grad_norm": 0.677621066570282, |
|
"learning_rate": 1.5436241610738255e-05, |
|
"loss": 0.765, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 1.430614611519134, |
|
"grad_norm": 0.6393842697143555, |
|
"learning_rate": 1.539429530201342e-05, |
|
"loss": 0.6943, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.4321608040201004, |
|
"grad_norm": 0.6991260051727295, |
|
"learning_rate": 1.535234899328859e-05, |
|
"loss": 0.7966, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 1.433706996521067, |
|
"grad_norm": 0.642305314540863, |
|
"learning_rate": 1.531040268456376e-05, |
|
"loss": 0.8245, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 1.4352531890220332, |
|
"grad_norm": 0.6572315692901611, |
|
"learning_rate": 1.5268456375838928e-05, |
|
"loss": 0.7562, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 1.4367993815229996, |
|
"grad_norm": 0.6849440336227417, |
|
"learning_rate": 1.5226510067114095e-05, |
|
"loss": 0.8522, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 1.4383455740239661, |
|
"grad_norm": 0.7153504490852356, |
|
"learning_rate": 1.5184563758389261e-05, |
|
"loss": 0.7843, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.4398917665249322, |
|
"grad_norm": 0.6780909895896912, |
|
"learning_rate": 1.5142617449664431e-05, |
|
"loss": 0.8062, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 1.4414379590258988, |
|
"grad_norm": 0.6425846815109253, |
|
"learning_rate": 1.51006711409396e-05, |
|
"loss": 0.7866, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 1.442984151526865, |
|
"grad_norm": 0.6629062294960022, |
|
"learning_rate": 1.5058724832214766e-05, |
|
"loss": 0.8047, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 1.4445303440278314, |
|
"grad_norm": 0.6615095734596252, |
|
"learning_rate": 1.5016778523489932e-05, |
|
"loss": 0.8117, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 1.446076536528798, |
|
"grad_norm": 0.718272864818573, |
|
"learning_rate": 1.4974832214765102e-05, |
|
"loss": 0.8416, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.447622729029764, |
|
"grad_norm": 0.7286227941513062, |
|
"learning_rate": 1.493288590604027e-05, |
|
"loss": 0.8347, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 1.4491689215307306, |
|
"grad_norm": 0.7250097990036011, |
|
"learning_rate": 1.4890939597315437e-05, |
|
"loss": 0.8095, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 1.450715114031697, |
|
"grad_norm": 0.7696204781532288, |
|
"learning_rate": 1.4848993288590604e-05, |
|
"loss": 0.8611, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 1.4522613065326633, |
|
"grad_norm": 0.7375919222831726, |
|
"learning_rate": 1.4807046979865772e-05, |
|
"loss": 0.8903, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 1.4538074990336298, |
|
"grad_norm": 0.7759246230125427, |
|
"learning_rate": 1.4765100671140942e-05, |
|
"loss": 0.8005, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.455353691534596, |
|
"grad_norm": 0.8237205743789673, |
|
"learning_rate": 1.4723154362416108e-05, |
|
"loss": 0.8204, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 1.4568998840355625, |
|
"grad_norm": 0.7622566819190979, |
|
"learning_rate": 1.4681208053691275e-05, |
|
"loss": 0.8673, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 1.4584460765365288, |
|
"grad_norm": 0.7782172560691833, |
|
"learning_rate": 1.4639261744966443e-05, |
|
"loss": 0.9434, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 1.4599922690374951, |
|
"grad_norm": 0.856338381767273, |
|
"learning_rate": 1.4597315436241613e-05, |
|
"loss": 0.9032, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 1.4615384615384617, |
|
"grad_norm": 0.9459933042526245, |
|
"learning_rate": 1.455536912751678e-05, |
|
"loss": 0.8599, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 1.4630846540394278, |
|
"grad_norm": 1.1424592733383179, |
|
"learning_rate": 1.4513422818791946e-05, |
|
"loss": 0.879, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 1.4646308465403943, |
|
"grad_norm": 0.9243542551994324, |
|
"learning_rate": 1.4471476510067115e-05, |
|
"loss": 0.6613, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 1.4661770390413607, |
|
"grad_norm": 0.5729179978370667, |
|
"learning_rate": 1.4429530201342285e-05, |
|
"loss": 0.6693, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 1.467723231542327, |
|
"grad_norm": 0.6040563583374023, |
|
"learning_rate": 1.4387583892617451e-05, |
|
"loss": 0.663, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 1.4692694240432935, |
|
"grad_norm": 0.6559063196182251, |
|
"learning_rate": 1.4345637583892618e-05, |
|
"loss": 0.691, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.4708156165442596, |
|
"grad_norm": 0.5916107296943665, |
|
"learning_rate": 1.4303691275167786e-05, |
|
"loss": 0.7474, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 1.4723618090452262, |
|
"grad_norm": 0.5876263380050659, |
|
"learning_rate": 1.4261744966442953e-05, |
|
"loss": 0.7287, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 1.4739080015461925, |
|
"grad_norm": 0.5855628848075867, |
|
"learning_rate": 1.4219798657718122e-05, |
|
"loss": 0.7254, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 1.4754541940471588, |
|
"grad_norm": 0.5839198231697083, |
|
"learning_rate": 1.4177852348993289e-05, |
|
"loss": 0.7462, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 1.4770003865481254, |
|
"grad_norm": 0.5933511257171631, |
|
"learning_rate": 1.4135906040268457e-05, |
|
"loss": 0.7303, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.4785465790490915, |
|
"grad_norm": 0.620617687702179, |
|
"learning_rate": 1.4093959731543624e-05, |
|
"loss": 0.7287, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 1.480092771550058, |
|
"grad_norm": 0.6549968719482422, |
|
"learning_rate": 1.4052013422818794e-05, |
|
"loss": 0.7426, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 1.4816389640510244, |
|
"grad_norm": 0.6639398336410522, |
|
"learning_rate": 1.401006711409396e-05, |
|
"loss": 0.7329, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 1.4831851565519907, |
|
"grad_norm": 0.6285966634750366, |
|
"learning_rate": 1.3968120805369129e-05, |
|
"loss": 0.7302, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 1.4847313490529572, |
|
"grad_norm": 0.6673871278762817, |
|
"learning_rate": 1.3926174496644295e-05, |
|
"loss": 0.7165, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.4862775415539233, |
|
"grad_norm": 0.6557399034500122, |
|
"learning_rate": 1.3884228187919462e-05, |
|
"loss": 0.7465, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 1.4878237340548899, |
|
"grad_norm": 0.615143358707428, |
|
"learning_rate": 1.3842281879194632e-05, |
|
"loss": 0.7531, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 1.4893699265558562, |
|
"grad_norm": 0.6445402503013611, |
|
"learning_rate": 1.38003355704698e-05, |
|
"loss": 0.7088, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 1.4909161190568225, |
|
"grad_norm": 0.6428129076957703, |
|
"learning_rate": 1.3758389261744966e-05, |
|
"loss": 0.7011, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 1.492462311557789, |
|
"grad_norm": 0.6472491025924683, |
|
"learning_rate": 1.3716442953020133e-05, |
|
"loss": 0.8012, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 1.4940085040587552, |
|
"grad_norm": 0.6467485427856445, |
|
"learning_rate": 1.3674496644295303e-05, |
|
"loss": 0.7108, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 1.4955546965597217, |
|
"grad_norm": 0.648501455783844, |
|
"learning_rate": 1.3632550335570471e-05, |
|
"loss": 0.7786, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 1.497100889060688, |
|
"grad_norm": 0.6614555716514587, |
|
"learning_rate": 1.3590604026845638e-05, |
|
"loss": 0.6745, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.4986470815616544, |
|
"grad_norm": 0.6633491516113281, |
|
"learning_rate": 1.3548657718120804e-05, |
|
"loss": 0.7064, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 1.500193274062621, |
|
"grad_norm": 0.658253014087677, |
|
"learning_rate": 1.3506711409395974e-05, |
|
"loss": 0.7438, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.501739466563587, |
|
"grad_norm": 0.6717640161514282, |
|
"learning_rate": 1.3464765100671143e-05, |
|
"loss": 0.7792, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 1.5032856590645536, |
|
"grad_norm": 0.6208140850067139, |
|
"learning_rate": 1.3422818791946309e-05, |
|
"loss": 0.7973, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 1.50483185156552, |
|
"grad_norm": 0.6302463412284851, |
|
"learning_rate": 1.3380872483221477e-05, |
|
"loss": 0.785, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 1.5063780440664862, |
|
"grad_norm": 0.6479527950286865, |
|
"learning_rate": 1.3338926174496644e-05, |
|
"loss": 0.8266, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 1.5079242365674528, |
|
"grad_norm": 0.6340357661247253, |
|
"learning_rate": 1.3296979865771814e-05, |
|
"loss": 0.7134, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.5094704290684189, |
|
"grad_norm": 0.6478318572044373, |
|
"learning_rate": 1.325503355704698e-05, |
|
"loss": 0.7638, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 1.5110166215693854, |
|
"grad_norm": 0.6769170761108398, |
|
"learning_rate": 1.3213087248322149e-05, |
|
"loss": 0.7754, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 1.5125628140703518, |
|
"grad_norm": 0.6835642457008362, |
|
"learning_rate": 1.3171140939597315e-05, |
|
"loss": 0.7856, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 1.514109006571318, |
|
"grad_norm": 0.700343668460846, |
|
"learning_rate": 1.3129194630872485e-05, |
|
"loss": 0.7428, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 1.5156551990722846, |
|
"grad_norm": 0.6960814595222473, |
|
"learning_rate": 1.3087248322147652e-05, |
|
"loss": 0.7649, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.5172013915732507, |
|
"grad_norm": 0.6891623735427856, |
|
"learning_rate": 1.304530201342282e-05, |
|
"loss": 0.8444, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 1.5187475840742173, |
|
"grad_norm": 0.6881600618362427, |
|
"learning_rate": 1.3003355704697987e-05, |
|
"loss": 0.7956, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 1.5202937765751836, |
|
"grad_norm": 0.6770979762077332, |
|
"learning_rate": 1.2961409395973153e-05, |
|
"loss": 0.8173, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 1.52183996907615, |
|
"grad_norm": 0.6920093894004822, |
|
"learning_rate": 1.2919463087248323e-05, |
|
"loss": 0.8522, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 1.5233861615771165, |
|
"grad_norm": 0.6803298592567444, |
|
"learning_rate": 1.2877516778523491e-05, |
|
"loss": 0.8345, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 1.5249323540780826, |
|
"grad_norm": 0.6997124552726746, |
|
"learning_rate": 1.2835570469798658e-05, |
|
"loss": 0.8141, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 1.5264785465790491, |
|
"grad_norm": 0.7112342715263367, |
|
"learning_rate": 1.2793624161073825e-05, |
|
"loss": 0.8196, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 1.5280247390800155, |
|
"grad_norm": 0.7443994879722595, |
|
"learning_rate": 1.2751677852348994e-05, |
|
"loss": 0.8575, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 1.5295709315809818, |
|
"grad_norm": 0.6785229444503784, |
|
"learning_rate": 1.2709731543624163e-05, |
|
"loss": 0.8843, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 1.5311171240819483, |
|
"grad_norm": 0.7394726276397705, |
|
"learning_rate": 1.266778523489933e-05, |
|
"loss": 0.9496, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.5326633165829144, |
|
"grad_norm": 0.738186776638031, |
|
"learning_rate": 1.2625838926174496e-05, |
|
"loss": 0.9172, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 1.534209509083881, |
|
"grad_norm": 0.7867004871368408, |
|
"learning_rate": 1.2583892617449666e-05, |
|
"loss": 0.8743, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 1.5357557015848473, |
|
"grad_norm": 0.8075705170631409, |
|
"learning_rate": 1.2541946308724834e-05, |
|
"loss": 0.9021, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 1.5373018940858136, |
|
"grad_norm": 0.8144130110740662, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.8258, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 1.5388480865867802, |
|
"grad_norm": 0.8460942506790161, |
|
"learning_rate": 1.2458053691275167e-05, |
|
"loss": 0.8051, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 1.5403942790877463, |
|
"grad_norm": 1.0733917951583862, |
|
"learning_rate": 1.2416107382550337e-05, |
|
"loss": 0.9469, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 1.5419404715887128, |
|
"grad_norm": 0.6984366178512573, |
|
"learning_rate": 1.2374161073825504e-05, |
|
"loss": 0.6271, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 1.5434866640896792, |
|
"grad_norm": 0.5974945425987244, |
|
"learning_rate": 1.2332214765100672e-05, |
|
"loss": 0.6767, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 1.5450328565906455, |
|
"grad_norm": 0.5917332172393799, |
|
"learning_rate": 1.2290268456375838e-05, |
|
"loss": 0.7027, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 1.546579049091612, |
|
"grad_norm": 0.5879162549972534, |
|
"learning_rate": 1.2248322147651008e-05, |
|
"loss": 0.6444, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.5481252415925781, |
|
"grad_norm": 0.5611408948898315, |
|
"learning_rate": 1.2206375838926175e-05, |
|
"loss": 0.6743, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 1.5496714340935447, |
|
"grad_norm": 0.6579827666282654, |
|
"learning_rate": 1.2164429530201343e-05, |
|
"loss": 0.6863, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 1.551217626594511, |
|
"grad_norm": 0.6543861031532288, |
|
"learning_rate": 1.212248322147651e-05, |
|
"loss": 0.6911, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 1.5527638190954773, |
|
"grad_norm": 0.6316918134689331, |
|
"learning_rate": 1.208053691275168e-05, |
|
"loss": 0.7126, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 1.5543100115964439, |
|
"grad_norm": 0.6070011258125305, |
|
"learning_rate": 1.2038590604026846e-05, |
|
"loss": 0.5802, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 1.55585620409741, |
|
"grad_norm": 0.6225163340568542, |
|
"learning_rate": 1.1996644295302013e-05, |
|
"loss": 0.7565, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 1.5574023965983765, |
|
"grad_norm": 0.6303391456604004, |
|
"learning_rate": 1.1954697986577181e-05, |
|
"loss": 0.7537, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 1.5589485890993429, |
|
"grad_norm": 0.6572604179382324, |
|
"learning_rate": 1.191275167785235e-05, |
|
"loss": 0.7332, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 1.5604947816003092, |
|
"grad_norm": 0.6323292255401611, |
|
"learning_rate": 1.1870805369127518e-05, |
|
"loss": 0.7537, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 1.5620409741012757, |
|
"grad_norm": 0.621051549911499, |
|
"learning_rate": 1.1828859060402684e-05, |
|
"loss": 0.7371, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.5635871666022418, |
|
"grad_norm": 0.6632164120674133, |
|
"learning_rate": 1.1786912751677852e-05, |
|
"loss": 0.7169, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 1.5651333591032084, |
|
"grad_norm": 0.6632513999938965, |
|
"learning_rate": 1.174496644295302e-05, |
|
"loss": 0.726, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 1.5666795516041747, |
|
"grad_norm": 0.6338618397712708, |
|
"learning_rate": 1.1703020134228189e-05, |
|
"loss": 0.7774, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 1.568225744105141, |
|
"grad_norm": 0.6426889896392822, |
|
"learning_rate": 1.1661073825503356e-05, |
|
"loss": 0.712, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 1.5697719366061076, |
|
"grad_norm": 0.6418123841285706, |
|
"learning_rate": 1.1619127516778524e-05, |
|
"loss": 0.692, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 1.5713181291070737, |
|
"grad_norm": 0.6338704228401184, |
|
"learning_rate": 1.1577181208053692e-05, |
|
"loss": 0.7498, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 1.5728643216080402, |
|
"grad_norm": 0.6129948496818542, |
|
"learning_rate": 1.1535234899328859e-05, |
|
"loss": 0.8721, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 1.5744105141090066, |
|
"grad_norm": 0.6413542032241821, |
|
"learning_rate": 1.1493288590604027e-05, |
|
"loss": 0.8381, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 1.575956706609973, |
|
"grad_norm": 0.6431681513786316, |
|
"learning_rate": 1.1451342281879195e-05, |
|
"loss": 0.7365, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 1.5775028991109394, |
|
"grad_norm": 0.6414735317230225, |
|
"learning_rate": 1.1409395973154363e-05, |
|
"loss": 0.8006, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.5790490916119055, |
|
"grad_norm": 0.6431716680526733, |
|
"learning_rate": 1.136744966442953e-05, |
|
"loss": 0.7759, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 1.580595284112872, |
|
"grad_norm": 0.6646420359611511, |
|
"learning_rate": 1.1325503355704698e-05, |
|
"loss": 0.7038, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 1.5821414766138384, |
|
"grad_norm": 0.7008923292160034, |
|
"learning_rate": 1.1283557046979866e-05, |
|
"loss": 0.719, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 1.5836876691148047, |
|
"grad_norm": 0.6393096446990967, |
|
"learning_rate": 1.1241610738255035e-05, |
|
"loss": 0.7841, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 1.5852338616157713, |
|
"grad_norm": 0.6879417300224304, |
|
"learning_rate": 1.1199664429530201e-05, |
|
"loss": 0.8218, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.5867800541167374, |
|
"grad_norm": 0.6500439047813416, |
|
"learning_rate": 1.115771812080537e-05, |
|
"loss": 0.8026, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 1.588326246617704, |
|
"grad_norm": 0.6967812180519104, |
|
"learning_rate": 1.1115771812080538e-05, |
|
"loss": 0.7494, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 1.5898724391186703, |
|
"grad_norm": 0.6527352929115295, |
|
"learning_rate": 1.1073825503355706e-05, |
|
"loss": 0.8472, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 1.5914186316196366, |
|
"grad_norm": 0.7033571004867554, |
|
"learning_rate": 1.1031879194630873e-05, |
|
"loss": 0.7553, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 1.5929648241206031, |
|
"grad_norm": 0.6790737509727478, |
|
"learning_rate": 1.098993288590604e-05, |
|
"loss": 0.8045, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.5945110166215692, |
|
"grad_norm": 0.7032893300056458, |
|
"learning_rate": 1.0947986577181209e-05, |
|
"loss": 0.8292, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 1.5960572091225358, |
|
"grad_norm": 0.6649371981620789, |
|
"learning_rate": 1.0906040268456376e-05, |
|
"loss": 0.8032, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 1.5976034016235021, |
|
"grad_norm": 0.7009012699127197, |
|
"learning_rate": 1.0864093959731544e-05, |
|
"loss": 0.9107, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 1.5991495941244684, |
|
"grad_norm": 0.7047061324119568, |
|
"learning_rate": 1.0822147651006712e-05, |
|
"loss": 0.904, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 1.600695786625435, |
|
"grad_norm": 0.7520581483840942, |
|
"learning_rate": 1.078020134228188e-05, |
|
"loss": 0.8972, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 1.602241979126401, |
|
"grad_norm": 0.710966944694519, |
|
"learning_rate": 1.0738255033557047e-05, |
|
"loss": 0.8133, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 1.6037881716273676, |
|
"grad_norm": 0.7045226693153381, |
|
"learning_rate": 1.0696308724832215e-05, |
|
"loss": 0.8474, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 1.605334364128334, |
|
"grad_norm": 0.7222912311553955, |
|
"learning_rate": 1.0654362416107383e-05, |
|
"loss": 0.8577, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 1.6068805566293003, |
|
"grad_norm": 0.7768924236297607, |
|
"learning_rate": 1.0612416107382552e-05, |
|
"loss": 0.8906, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 1.6084267491302668, |
|
"grad_norm": 0.7447935342788696, |
|
"learning_rate": 1.0570469798657718e-05, |
|
"loss": 0.8571, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.609972941631233, |
|
"grad_norm": 0.7860161662101746, |
|
"learning_rate": 1.0528523489932887e-05, |
|
"loss": 0.868, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 1.6115191341321995, |
|
"grad_norm": 0.772621750831604, |
|
"learning_rate": 1.0486577181208055e-05, |
|
"loss": 0.9064, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 1.6130653266331658, |
|
"grad_norm": 0.7927963733673096, |
|
"learning_rate": 1.0444630872483221e-05, |
|
"loss": 0.8628, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 1.6146115191341321, |
|
"grad_norm": 0.8811314105987549, |
|
"learning_rate": 1.040268456375839e-05, |
|
"loss": 0.9203, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 1.6161577116350987, |
|
"grad_norm": 1.0155308246612549, |
|
"learning_rate": 1.0360738255033558e-05, |
|
"loss": 0.9134, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 1.6177039041360648, |
|
"grad_norm": 1.3558322191238403, |
|
"learning_rate": 1.0318791946308726e-05, |
|
"loss": 1.007, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 1.6192500966370313, |
|
"grad_norm": 0.771702766418457, |
|
"learning_rate": 1.0276845637583893e-05, |
|
"loss": 0.5492, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 1.6207962891379977, |
|
"grad_norm": 0.5778831243515015, |
|
"learning_rate": 1.0234899328859061e-05, |
|
"loss": 0.5969, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 1.622342481638964, |
|
"grad_norm": 0.5768334865570068, |
|
"learning_rate": 1.019295302013423e-05, |
|
"loss": 0.7561, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 1.6238886741399305, |
|
"grad_norm": 0.6801527142524719, |
|
"learning_rate": 1.0151006711409397e-05, |
|
"loss": 0.6649, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.6254348666408966, |
|
"grad_norm": 0.6493854522705078, |
|
"learning_rate": 1.0109060402684564e-05, |
|
"loss": 0.6788, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 1.6269810591418632, |
|
"grad_norm": 0.6070857644081116, |
|
"learning_rate": 1.006711409395973e-05, |
|
"loss": 0.6509, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 1.6285272516428295, |
|
"grad_norm": 0.6425489783287048, |
|
"learning_rate": 1.00251677852349e-05, |
|
"loss": 0.7303, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 1.6300734441437958, |
|
"grad_norm": 0.6311664581298828, |
|
"learning_rate": 9.983221476510067e-06, |
|
"loss": 0.6492, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 1.6316196366447624, |
|
"grad_norm": 0.5983323454856873, |
|
"learning_rate": 9.941275167785235e-06, |
|
"loss": 0.7778, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 1.6331658291457285, |
|
"grad_norm": 0.5775598287582397, |
|
"learning_rate": 9.899328859060402e-06, |
|
"loss": 0.7594, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 1.634712021646695, |
|
"grad_norm": 0.6420764327049255, |
|
"learning_rate": 9.857382550335572e-06, |
|
"loss": 0.7812, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 1.6362582141476614, |
|
"grad_norm": 0.6438013911247253, |
|
"learning_rate": 9.815436241610738e-06, |
|
"loss": 0.7245, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 1.6378044066486277, |
|
"grad_norm": 0.6310963034629822, |
|
"learning_rate": 9.773489932885907e-06, |
|
"loss": 0.7321, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 1.6393505991495942, |
|
"grad_norm": 0.6203198432922363, |
|
"learning_rate": 9.731543624161075e-06, |
|
"loss": 0.6914, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.6408967916505603, |
|
"grad_norm": 0.638454258441925, |
|
"learning_rate": 9.689597315436243e-06, |
|
"loss": 0.7478, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 1.642442984151527, |
|
"grad_norm": 0.6612226963043213, |
|
"learning_rate": 9.64765100671141e-06, |
|
"loss": 0.7209, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 1.6439891766524932, |
|
"grad_norm": 0.6566651463508606, |
|
"learning_rate": 9.605704697986576e-06, |
|
"loss": 0.8045, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 1.6455353691534595, |
|
"grad_norm": 0.6543892621994019, |
|
"learning_rate": 9.563758389261746e-06, |
|
"loss": 0.7727, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 1.647081561654426, |
|
"grad_norm": 0.6707619428634644, |
|
"learning_rate": 9.521812080536913e-06, |
|
"loss": 0.7271, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 1.6486277541553922, |
|
"grad_norm": 0.6582595705986023, |
|
"learning_rate": 9.479865771812081e-06, |
|
"loss": 0.7332, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 1.6501739466563587, |
|
"grad_norm": 0.6757826209068298, |
|
"learning_rate": 9.437919463087248e-06, |
|
"loss": 0.7663, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 1.651720139157325, |
|
"grad_norm": 0.698391318321228, |
|
"learning_rate": 9.395973154362418e-06, |
|
"loss": 0.7277, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 1.6532663316582914, |
|
"grad_norm": 0.6885313987731934, |
|
"learning_rate": 9.354026845637584e-06, |
|
"loss": 0.7959, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 1.654812524159258, |
|
"grad_norm": 0.664113461971283, |
|
"learning_rate": 9.312080536912752e-06, |
|
"loss": 0.7571, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.656358716660224, |
|
"grad_norm": 0.6730982065200806, |
|
"learning_rate": 9.270134228187919e-06, |
|
"loss": 0.709, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 1.6579049091611906, |
|
"grad_norm": 0.684267520904541, |
|
"learning_rate": 9.228187919463089e-06, |
|
"loss": 0.6932, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 1.659451101662157, |
|
"grad_norm": 0.631272554397583, |
|
"learning_rate": 9.186241610738255e-06, |
|
"loss": 0.8569, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 1.6609972941631233, |
|
"grad_norm": 0.7173067927360535, |
|
"learning_rate": 9.144295302013424e-06, |
|
"loss": 0.777, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 1.6625434866640898, |
|
"grad_norm": 0.6663726568222046, |
|
"learning_rate": 9.10234899328859e-06, |
|
"loss": 0.7523, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.664089679165056, |
|
"grad_norm": 0.689304530620575, |
|
"learning_rate": 9.060402684563759e-06, |
|
"loss": 0.7922, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 1.6656358716660224, |
|
"grad_norm": 0.6620500683784485, |
|
"learning_rate": 9.018456375838927e-06, |
|
"loss": 0.8587, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 1.6671820641669888, |
|
"grad_norm": 0.6905922889709473, |
|
"learning_rate": 8.976510067114093e-06, |
|
"loss": 0.7897, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 1.668728256667955, |
|
"grad_norm": 0.6763781309127808, |
|
"learning_rate": 8.934563758389262e-06, |
|
"loss": 0.7886, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 1.6702744491689216, |
|
"grad_norm": 0.684766948223114, |
|
"learning_rate": 8.89261744966443e-06, |
|
"loss": 0.8122, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.6718206416698878, |
|
"grad_norm": 0.6934885382652283, |
|
"learning_rate": 8.850671140939598e-06, |
|
"loss": 0.7894, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 1.6733668341708543, |
|
"grad_norm": 0.7051049470901489, |
|
"learning_rate": 8.808724832214765e-06, |
|
"loss": 0.8047, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 1.6749130266718206, |
|
"grad_norm": 0.7332410216331482, |
|
"learning_rate": 8.766778523489935e-06, |
|
"loss": 0.7371, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 1.676459219172787, |
|
"grad_norm": 0.7231751084327698, |
|
"learning_rate": 8.724832214765101e-06, |
|
"loss": 0.7804, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 1.6780054116737535, |
|
"grad_norm": 0.7421995997428894, |
|
"learning_rate": 8.68288590604027e-06, |
|
"loss": 0.8364, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 1.6795516041747196, |
|
"grad_norm": 0.7182170152664185, |
|
"learning_rate": 8.640939597315436e-06, |
|
"loss": 0.848, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 1.6810977966756862, |
|
"grad_norm": 0.7546189427375793, |
|
"learning_rate": 8.598993288590604e-06, |
|
"loss": 0.8336, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 1.6826439891766525, |
|
"grad_norm": 0.7409399747848511, |
|
"learning_rate": 8.557046979865773e-06, |
|
"loss": 0.8151, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 1.6841901816776188, |
|
"grad_norm": 0.7315151691436768, |
|
"learning_rate": 8.515100671140939e-06, |
|
"loss": 0.8126, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 1.6857363741785854, |
|
"grad_norm": 0.7949945330619812, |
|
"learning_rate": 8.473154362416107e-06, |
|
"loss": 0.8533, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.6872825666795515, |
|
"grad_norm": 0.7690507769584656, |
|
"learning_rate": 8.431208053691276e-06, |
|
"loss": 0.846, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 1.688828759180518, |
|
"grad_norm": 0.7926616072654724, |
|
"learning_rate": 8.389261744966444e-06, |
|
"loss": 0.8402, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 1.6903749516814843, |
|
"grad_norm": 0.8056994676589966, |
|
"learning_rate": 8.34731543624161e-06, |
|
"loss": 0.8191, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 1.6919211441824507, |
|
"grad_norm": 0.8308284282684326, |
|
"learning_rate": 8.305369127516779e-06, |
|
"loss": 0.9042, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 1.6934673366834172, |
|
"grad_norm": 0.9019404053688049, |
|
"learning_rate": 8.263422818791947e-06, |
|
"loss": 0.8962, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 1.6950135291843833, |
|
"grad_norm": 1.144425630569458, |
|
"learning_rate": 8.221476510067115e-06, |
|
"loss": 1.0569, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 1.6965597216853499, |
|
"grad_norm": 0.7823913097381592, |
|
"learning_rate": 8.179530201342282e-06, |
|
"loss": 0.5956, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 1.6981059141863162, |
|
"grad_norm": 0.5778486132621765, |
|
"learning_rate": 8.13758389261745e-06, |
|
"loss": 0.6246, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 1.6996521066872825, |
|
"grad_norm": 0.6111435890197754, |
|
"learning_rate": 8.095637583892618e-06, |
|
"loss": 0.6681, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 1.701198299188249, |
|
"grad_norm": 0.6174157857894897, |
|
"learning_rate": 8.053691275167785e-06, |
|
"loss": 0.6635, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.7027444916892152, |
|
"grad_norm": 0.660089910030365, |
|
"learning_rate": 8.011744966442953e-06, |
|
"loss": 0.6994, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 1.7042906841901817, |
|
"grad_norm": 0.6478524208068848, |
|
"learning_rate": 7.969798657718121e-06, |
|
"loss": 0.6157, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 1.705836876691148, |
|
"grad_norm": 0.6385321617126465, |
|
"learning_rate": 7.92785234899329e-06, |
|
"loss": 0.6967, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 1.7073830691921144, |
|
"grad_norm": 0.6223974227905273, |
|
"learning_rate": 7.885906040268456e-06, |
|
"loss": 0.7572, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 1.708929261693081, |
|
"grad_norm": 0.6408361792564392, |
|
"learning_rate": 7.843959731543624e-06, |
|
"loss": 0.8266, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 1.710475454194047, |
|
"grad_norm": 0.6401566863059998, |
|
"learning_rate": 7.802013422818793e-06, |
|
"loss": 0.7073, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 1.7120216466950136, |
|
"grad_norm": 0.6204596757888794, |
|
"learning_rate": 7.760067114093961e-06, |
|
"loss": 0.6831, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 1.7135678391959799, |
|
"grad_norm": 0.6418159604072571, |
|
"learning_rate": 7.718120805369127e-06, |
|
"loss": 0.6925, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 1.7151140316969462, |
|
"grad_norm": 0.6294611692428589, |
|
"learning_rate": 7.676174496644296e-06, |
|
"loss": 0.7594, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 1.7166602241979128, |
|
"grad_norm": 0.6831735968589783, |
|
"learning_rate": 7.634228187919464e-06, |
|
"loss": 0.6813, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.7182064166988789, |
|
"grad_norm": 0.6496366262435913, |
|
"learning_rate": 7.5922818791946305e-06, |
|
"loss": 0.7343, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 1.7197526091998454, |
|
"grad_norm": 0.6550260782241821, |
|
"learning_rate": 7.5503355704698e-06, |
|
"loss": 0.7389, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 1.7212988017008117, |
|
"grad_norm": 0.6319887638092041, |
|
"learning_rate": 7.508389261744966e-06, |
|
"loss": 0.7011, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 1.722844994201778, |
|
"grad_norm": 0.6578625440597534, |
|
"learning_rate": 7.466442953020135e-06, |
|
"loss": 0.7221, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 1.7243911867027446, |
|
"grad_norm": 0.6553155779838562, |
|
"learning_rate": 7.424496644295302e-06, |
|
"loss": 0.7053, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 1.7259373792037107, |
|
"grad_norm": 0.6502689719200134, |
|
"learning_rate": 7.382550335570471e-06, |
|
"loss": 0.8073, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 1.7274835717046773, |
|
"grad_norm": 0.6422849893569946, |
|
"learning_rate": 7.3406040268456375e-06, |
|
"loss": 0.6863, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 1.7290297642056436, |
|
"grad_norm": 0.6866220831871033, |
|
"learning_rate": 7.298657718120807e-06, |
|
"loss": 0.7959, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 1.73057595670661, |
|
"grad_norm": 0.645169198513031, |
|
"learning_rate": 7.256711409395973e-06, |
|
"loss": 0.7729, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 1.7321221492075765, |
|
"grad_norm": 0.654244601726532, |
|
"learning_rate": 7.214765100671142e-06, |
|
"loss": 0.7707, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.7336683417085426, |
|
"grad_norm": 0.6695640087127686, |
|
"learning_rate": 7.172818791946309e-06, |
|
"loss": 0.714, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 1.735214534209509, |
|
"grad_norm": 0.6387913227081299, |
|
"learning_rate": 7.130872483221476e-06, |
|
"loss": 0.7654, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 1.7367607267104754, |
|
"grad_norm": 0.664368748664856, |
|
"learning_rate": 7.0889261744966445e-06, |
|
"loss": 0.7525, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 1.7383069192114418, |
|
"grad_norm": 0.7358962893486023, |
|
"learning_rate": 7.046979865771812e-06, |
|
"loss": 0.747, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 1.7398531117124083, |
|
"grad_norm": 0.6654751896858215, |
|
"learning_rate": 7.00503355704698e-06, |
|
"loss": 0.7669, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.7413993042133744, |
|
"grad_norm": 0.6763463020324707, |
|
"learning_rate": 6.963087248322148e-06, |
|
"loss": 0.8077, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 1.742945496714341, |
|
"grad_norm": 0.6764401197433472, |
|
"learning_rate": 6.921140939597316e-06, |
|
"loss": 0.7438, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 1.7444916892153073, |
|
"grad_norm": 0.7182475924491882, |
|
"learning_rate": 6.879194630872483e-06, |
|
"loss": 0.728, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 1.7460378817162736, |
|
"grad_norm": 0.6785516142845154, |
|
"learning_rate": 6.8372483221476515e-06, |
|
"loss": 0.7939, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 1.7475840742172402, |
|
"grad_norm": 0.7003461122512817, |
|
"learning_rate": 6.795302013422819e-06, |
|
"loss": 0.7729, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.7491302667182063, |
|
"grad_norm": 0.6882821917533875, |
|
"learning_rate": 6.753355704697987e-06, |
|
"loss": 0.9204, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 1.7506764592191728, |
|
"grad_norm": 0.7342216372489929, |
|
"learning_rate": 6.7114093959731546e-06, |
|
"loss": 0.7385, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 1.7522226517201391, |
|
"grad_norm": 0.7073139548301697, |
|
"learning_rate": 6.669463087248322e-06, |
|
"loss": 0.9268, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 1.7537688442211055, |
|
"grad_norm": 0.7231860756874084, |
|
"learning_rate": 6.62751677852349e-06, |
|
"loss": 0.8596, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 1.755315036722072, |
|
"grad_norm": 0.7145645618438721, |
|
"learning_rate": 6.585570469798658e-06, |
|
"loss": 0.8466, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 1.7568612292230381, |
|
"grad_norm": 0.7454160451889038, |
|
"learning_rate": 6.543624161073826e-06, |
|
"loss": 0.7629, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 1.7584074217240047, |
|
"grad_norm": 0.7323503494262695, |
|
"learning_rate": 6.501677852348993e-06, |
|
"loss": 0.8666, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 1.759953614224971, |
|
"grad_norm": 0.7377263307571411, |
|
"learning_rate": 6.4597315436241616e-06, |
|
"loss": 0.8515, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 1.7614998067259373, |
|
"grad_norm": 0.7750667929649353, |
|
"learning_rate": 6.417785234899329e-06, |
|
"loss": 0.8033, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 1.7630459992269039, |
|
"grad_norm": 0.7829360365867615, |
|
"learning_rate": 6.375838926174497e-06, |
|
"loss": 0.8903, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.76459219172787, |
|
"grad_norm": 0.7346689701080322, |
|
"learning_rate": 6.333892617449665e-06, |
|
"loss": 0.9772, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 1.7661383842288365, |
|
"grad_norm": 0.7309790253639221, |
|
"learning_rate": 6.291946308724833e-06, |
|
"loss": 0.8317, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 1.7676845767298028, |
|
"grad_norm": 0.8332825899124146, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.7692, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 1.7692307692307692, |
|
"grad_norm": 0.8708672523498535, |
|
"learning_rate": 6.2080536912751686e-06, |
|
"loss": 0.946, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 1.7707769617317357, |
|
"grad_norm": 0.9887493848800659, |
|
"learning_rate": 6.166107382550336e-06, |
|
"loss": 0.9102, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 1.7723231542327018, |
|
"grad_norm": 1.3964698314666748, |
|
"learning_rate": 6.124161073825504e-06, |
|
"loss": 0.8942, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 1.7738693467336684, |
|
"grad_norm": 0.9236196875572205, |
|
"learning_rate": 6.082214765100672e-06, |
|
"loss": 0.6239, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 1.7754155392346347, |
|
"grad_norm": 0.5753183960914612, |
|
"learning_rate": 6.04026845637584e-06, |
|
"loss": 0.6674, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 1.776961731735601, |
|
"grad_norm": 0.6132087111473083, |
|
"learning_rate": 5.9983221476510064e-06, |
|
"loss": 0.7186, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 1.7785079242365676, |
|
"grad_norm": 0.586824893951416, |
|
"learning_rate": 5.956375838926175e-06, |
|
"loss": 0.7077, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.7800541167375337, |
|
"grad_norm": 0.6105204224586487, |
|
"learning_rate": 5.914429530201342e-06, |
|
"loss": 0.7448, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 1.7816003092385002, |
|
"grad_norm": 0.6133350133895874, |
|
"learning_rate": 5.87248322147651e-06, |
|
"loss": 0.7761, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 1.7831465017394665, |
|
"grad_norm": 0.6162737011909485, |
|
"learning_rate": 5.830536912751678e-06, |
|
"loss": 0.7204, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 1.7846926942404329, |
|
"grad_norm": 0.6450937390327454, |
|
"learning_rate": 5.788590604026846e-06, |
|
"loss": 0.7163, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 1.7862388867413994, |
|
"grad_norm": 0.6387403011322021, |
|
"learning_rate": 5.7466442953020134e-06, |
|
"loss": 0.7454, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 1.7877850792423655, |
|
"grad_norm": 0.6444848775863647, |
|
"learning_rate": 5.704697986577182e-06, |
|
"loss": 0.7048, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 1.789331271743332, |
|
"grad_norm": 0.6471322774887085, |
|
"learning_rate": 5.662751677852349e-06, |
|
"loss": 0.75, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 1.7908774642442984, |
|
"grad_norm": 0.6798081398010254, |
|
"learning_rate": 5.620805369127517e-06, |
|
"loss": 0.7096, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 1.7924236567452647, |
|
"grad_norm": 0.6646496653556824, |
|
"learning_rate": 5.578859060402685e-06, |
|
"loss": 0.8017, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 1.7939698492462313, |
|
"grad_norm": 0.6455252170562744, |
|
"learning_rate": 5.536912751677853e-06, |
|
"loss": 0.7323, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.7955160417471974, |
|
"grad_norm": 0.6841678023338318, |
|
"learning_rate": 5.49496644295302e-06, |
|
"loss": 0.7373, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 1.797062234248164, |
|
"grad_norm": 0.6608906388282776, |
|
"learning_rate": 5.453020134228188e-06, |
|
"loss": 0.7039, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 1.7986084267491302, |
|
"grad_norm": 0.6611918807029724, |
|
"learning_rate": 5.411073825503356e-06, |
|
"loss": 0.768, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 1.8001546192500966, |
|
"grad_norm": 0.6479055881500244, |
|
"learning_rate": 5.3691275167785235e-06, |
|
"loss": 0.7672, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 1.8017008117510631, |
|
"grad_norm": 0.6645311117172241, |
|
"learning_rate": 5.327181208053692e-06, |
|
"loss": 0.8146, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 1.8032470042520292, |
|
"grad_norm": 0.6700178980827332, |
|
"learning_rate": 5.285234899328859e-06, |
|
"loss": 0.7019, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 1.8047931967529958, |
|
"grad_norm": 0.6892951130867004, |
|
"learning_rate": 5.243288590604027e-06, |
|
"loss": 0.7982, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 1.806339389253962, |
|
"grad_norm": 0.6578270196914673, |
|
"learning_rate": 5.201342281879195e-06, |
|
"loss": 0.7844, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 1.8078855817549284, |
|
"grad_norm": 0.6844699382781982, |
|
"learning_rate": 5.159395973154363e-06, |
|
"loss": 0.73, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 1.809431774255895, |
|
"grad_norm": 0.67879718542099, |
|
"learning_rate": 5.1174496644295305e-06, |
|
"loss": 0.8256, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.810977966756861, |
|
"grad_norm": 0.6748877167701721, |
|
"learning_rate": 5.075503355704699e-06, |
|
"loss": 0.7733, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 1.8125241592578276, |
|
"grad_norm": 0.6248130798339844, |
|
"learning_rate": 5.033557046979865e-06, |
|
"loss": 0.7489, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 1.814070351758794, |
|
"grad_norm": 0.6643280386924744, |
|
"learning_rate": 4.9916107382550336e-06, |
|
"loss": 0.726, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 1.8156165442597603, |
|
"grad_norm": 0.6891630291938782, |
|
"learning_rate": 4.949664429530201e-06, |
|
"loss": 0.7442, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 1.8171627367607268, |
|
"grad_norm": 0.6768823862075806, |
|
"learning_rate": 4.907718120805369e-06, |
|
"loss": 0.7376, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.818708929261693, |
|
"grad_norm": 0.669154167175293, |
|
"learning_rate": 4.8657718120805375e-06, |
|
"loss": 0.7775, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 1.8202551217626595, |
|
"grad_norm": 0.7224996089935303, |
|
"learning_rate": 4.823825503355705e-06, |
|
"loss": 0.7535, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 1.8218013142636258, |
|
"grad_norm": 0.6954566240310669, |
|
"learning_rate": 4.781879194630873e-06, |
|
"loss": 0.77, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 1.8233475067645921, |
|
"grad_norm": 0.6938985586166382, |
|
"learning_rate": 4.7399328859060405e-06, |
|
"loss": 0.7963, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 1.8248936992655587, |
|
"grad_norm": 0.6518607139587402, |
|
"learning_rate": 4.697986577181209e-06, |
|
"loss": 0.7652, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.8264398917665248, |
|
"grad_norm": 0.7072224617004395, |
|
"learning_rate": 4.656040268456376e-06, |
|
"loss": 0.7875, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 1.8279860842674913, |
|
"grad_norm": 0.689007580280304, |
|
"learning_rate": 4.6140939597315445e-06, |
|
"loss": 0.8659, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 1.8295322767684576, |
|
"grad_norm": 0.7117170095443726, |
|
"learning_rate": 4.572147651006712e-06, |
|
"loss": 0.7761, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 1.831078469269424, |
|
"grad_norm": 0.6940242052078247, |
|
"learning_rate": 4.530201342281879e-06, |
|
"loss": 0.8305, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 1.8326246617703905, |
|
"grad_norm": 0.7297004461288452, |
|
"learning_rate": 4.488255033557047e-06, |
|
"loss": 0.8451, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 1.8341708542713566, |
|
"grad_norm": 0.7414118647575378, |
|
"learning_rate": 4.446308724832215e-06, |
|
"loss": 0.7932, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 1.8357170467723232, |
|
"grad_norm": 0.7365830540657043, |
|
"learning_rate": 4.404362416107382e-06, |
|
"loss": 0.8153, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 1.8372632392732895, |
|
"grad_norm": 0.7378683686256409, |
|
"learning_rate": 4.362416107382551e-06, |
|
"loss": 0.8795, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 1.8388094317742558, |
|
"grad_norm": 0.7360830307006836, |
|
"learning_rate": 4.320469798657718e-06, |
|
"loss": 0.863, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 1.8403556242752224, |
|
"grad_norm": 0.7365934252738953, |
|
"learning_rate": 4.278523489932886e-06, |
|
"loss": 0.8836, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.8419018167761885, |
|
"grad_norm": 0.7860958576202393, |
|
"learning_rate": 4.236577181208054e-06, |
|
"loss": 0.8834, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 1.843448009277155, |
|
"grad_norm": 0.8424365520477295, |
|
"learning_rate": 4.194630872483222e-06, |
|
"loss": 0.8303, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 1.8449942017781213, |
|
"grad_norm": 0.8641267418861389, |
|
"learning_rate": 4.152684563758389e-06, |
|
"loss": 0.7928, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 1.8465403942790877, |
|
"grad_norm": 0.8995117545127869, |
|
"learning_rate": 4.110738255033558e-06, |
|
"loss": 0.8474, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 1.8480865867800542, |
|
"grad_norm": 0.9355427026748657, |
|
"learning_rate": 4.068791946308725e-06, |
|
"loss": 0.8516, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 1.8496327792810203, |
|
"grad_norm": 1.3238755464553833, |
|
"learning_rate": 4.026845637583892e-06, |
|
"loss": 1.0038, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 1.8511789717819869, |
|
"grad_norm": 0.8634078502655029, |
|
"learning_rate": 3.984899328859061e-06, |
|
"loss": 0.6067, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 1.8527251642829532, |
|
"grad_norm": 0.5876993536949158, |
|
"learning_rate": 3.942953020134228e-06, |
|
"loss": 0.6623, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 1.8542713567839195, |
|
"grad_norm": 0.6208466291427612, |
|
"learning_rate": 3.901006711409396e-06, |
|
"loss": 0.6686, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 1.855817549284886, |
|
"grad_norm": 0.5899839401245117, |
|
"learning_rate": 3.859060402684564e-06, |
|
"loss": 0.7108, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.8573637417858522, |
|
"grad_norm": 0.6139258742332458, |
|
"learning_rate": 3.817114093959732e-06, |
|
"loss": 0.6853, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 1.8589099342868187, |
|
"grad_norm": 0.6186558604240417, |
|
"learning_rate": 3.7751677852349e-06, |
|
"loss": 0.6619, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 1.860456126787785, |
|
"grad_norm": 0.6083664298057556, |
|
"learning_rate": 3.7332214765100677e-06, |
|
"loss": 0.7119, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 1.8620023192887514, |
|
"grad_norm": 0.6227363348007202, |
|
"learning_rate": 3.6912751677852355e-06, |
|
"loss": 0.7251, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 1.863548511789718, |
|
"grad_norm": 0.6329985857009888, |
|
"learning_rate": 3.6493288590604033e-06, |
|
"loss": 0.7796, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 1.865094704290684, |
|
"grad_norm": 0.644888162612915, |
|
"learning_rate": 3.607382550335571e-06, |
|
"loss": 0.7747, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 1.8666408967916506, |
|
"grad_norm": 0.6022448539733887, |
|
"learning_rate": 3.565436241610738e-06, |
|
"loss": 0.7027, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 1.868187089292617, |
|
"grad_norm": 0.6584210991859436, |
|
"learning_rate": 3.523489932885906e-06, |
|
"loss": 0.6816, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 1.8697332817935832, |
|
"grad_norm": 0.6227755546569824, |
|
"learning_rate": 3.481543624161074e-06, |
|
"loss": 0.7245, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 1.8712794742945498, |
|
"grad_norm": 0.6115254759788513, |
|
"learning_rate": 3.4395973154362416e-06, |
|
"loss": 0.7511, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.8728256667955159, |
|
"grad_norm": 0.638425886631012, |
|
"learning_rate": 3.3976510067114095e-06, |
|
"loss": 0.742, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 1.8743718592964824, |
|
"grad_norm": 0.654451310634613, |
|
"learning_rate": 3.3557046979865773e-06, |
|
"loss": 0.7323, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 1.8759180517974487, |
|
"grad_norm": 0.6516563892364502, |
|
"learning_rate": 3.313758389261745e-06, |
|
"loss": 0.6654, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 1.877464244298415, |
|
"grad_norm": 0.6713635325431824, |
|
"learning_rate": 3.271812080536913e-06, |
|
"loss": 0.7639, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 1.8790104367993816, |
|
"grad_norm": 0.6944072246551514, |
|
"learning_rate": 3.2298657718120808e-06, |
|
"loss": 0.7274, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 1.880556629300348, |
|
"grad_norm": 0.6565424799919128, |
|
"learning_rate": 3.1879194630872486e-06, |
|
"loss": 0.7757, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 1.8821028218013143, |
|
"grad_norm": 0.6998873949050903, |
|
"learning_rate": 3.1459731543624164e-06, |
|
"loss": 0.8059, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 1.8836490143022806, |
|
"grad_norm": 0.6804831624031067, |
|
"learning_rate": 3.1040268456375843e-06, |
|
"loss": 0.7777, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 1.885195206803247, |
|
"grad_norm": 0.6547567248344421, |
|
"learning_rate": 3.062080536912752e-06, |
|
"loss": 0.746, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 1.8867413993042135, |
|
"grad_norm": 0.6822516322135925, |
|
"learning_rate": 3.02013422818792e-06, |
|
"loss": 0.7518, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.8882875918051798, |
|
"grad_norm": 0.6750561594963074, |
|
"learning_rate": 2.9781879194630873e-06, |
|
"loss": 0.7261, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 1.8898337843061461, |
|
"grad_norm": 0.6937829256057739, |
|
"learning_rate": 2.936241610738255e-06, |
|
"loss": 0.7716, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 1.8913799768071125, |
|
"grad_norm": 0.6683257818222046, |
|
"learning_rate": 2.894295302013423e-06, |
|
"loss": 0.7657, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 1.8929261693080788, |
|
"grad_norm": 0.6856616735458374, |
|
"learning_rate": 2.852348993288591e-06, |
|
"loss": 0.7833, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 1.8944723618090453, |
|
"grad_norm": 0.719022274017334, |
|
"learning_rate": 2.8104026845637587e-06, |
|
"loss": 0.7462, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.8960185543100117, |
|
"grad_norm": 0.6999730467796326, |
|
"learning_rate": 2.7684563758389265e-06, |
|
"loss": 0.7777, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 1.897564746810978, |
|
"grad_norm": 0.710363507270813, |
|
"learning_rate": 2.726510067114094e-06, |
|
"loss": 0.7624, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 1.8991109393119443, |
|
"grad_norm": 0.6629136204719543, |
|
"learning_rate": 2.6845637583892617e-06, |
|
"loss": 0.7578, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 1.9006571318129106, |
|
"grad_norm": 0.6639029383659363, |
|
"learning_rate": 2.6426174496644296e-06, |
|
"loss": 0.8438, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 1.9022033243138772, |
|
"grad_norm": 0.6924868226051331, |
|
"learning_rate": 2.6006711409395974e-06, |
|
"loss": 0.7844, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.9037495168148435, |
|
"grad_norm": 0.7059327960014343, |
|
"learning_rate": 2.5587248322147652e-06, |
|
"loss": 0.7654, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 1.9052957093158098, |
|
"grad_norm": 0.6732707619667053, |
|
"learning_rate": 2.5167785234899326e-06, |
|
"loss": 0.7158, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 1.9068419018167762, |
|
"grad_norm": 0.712011456489563, |
|
"learning_rate": 2.4748322147651005e-06, |
|
"loss": 0.8323, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 1.9083880943177425, |
|
"grad_norm": 0.7283968329429626, |
|
"learning_rate": 2.4328859060402687e-06, |
|
"loss": 0.86, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 1.909934286818709, |
|
"grad_norm": 0.736113429069519, |
|
"learning_rate": 2.3909395973154366e-06, |
|
"loss": 0.8779, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 1.9114804793196754, |
|
"grad_norm": 0.7328662276268005, |
|
"learning_rate": 2.3489932885906044e-06, |
|
"loss": 0.8673, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 1.9130266718206417, |
|
"grad_norm": 0.7270045280456543, |
|
"learning_rate": 2.3070469798657722e-06, |
|
"loss": 0.8348, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 1.914572864321608, |
|
"grad_norm": 0.7681441903114319, |
|
"learning_rate": 2.2651006711409396e-06, |
|
"loss": 0.818, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 1.9161190568225743, |
|
"grad_norm": 0.7744415998458862, |
|
"learning_rate": 2.2231543624161075e-06, |
|
"loss": 0.8879, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 1.9176652493235409, |
|
"grad_norm": 0.7644962072372437, |
|
"learning_rate": 2.1812080536912753e-06, |
|
"loss": 0.8545, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.9192114418245072, |
|
"grad_norm": 0.7949373722076416, |
|
"learning_rate": 2.139261744966443e-06, |
|
"loss": 0.8603, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 1.9207576343254735, |
|
"grad_norm": 0.8180006146430969, |
|
"learning_rate": 2.097315436241611e-06, |
|
"loss": 0.8146, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 1.9223038268264399, |
|
"grad_norm": 0.8280307650566101, |
|
"learning_rate": 2.055369127516779e-06, |
|
"loss": 0.9628, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 1.9238500193274062, |
|
"grad_norm": 0.8613501787185669, |
|
"learning_rate": 2.013422818791946e-06, |
|
"loss": 0.8326, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 1.9253962118283727, |
|
"grad_norm": 0.9388262629508972, |
|
"learning_rate": 1.971476510067114e-06, |
|
"loss": 0.9428, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 1.926942404329339, |
|
"grad_norm": 1.25608491897583, |
|
"learning_rate": 1.929530201342282e-06, |
|
"loss": 0.8971, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 1.9284885968303054, |
|
"grad_norm": 0.8791813254356384, |
|
"learning_rate": 1.88758389261745e-06, |
|
"loss": 0.6034, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 1.9300347893312717, |
|
"grad_norm": 0.5444540977478027, |
|
"learning_rate": 1.8456375838926177e-06, |
|
"loss": 0.6993, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 1.931580981832238, |
|
"grad_norm": 0.6073552966117859, |
|
"learning_rate": 1.8036912751677856e-06, |
|
"loss": 0.7178, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 1.9331271743332046, |
|
"grad_norm": 0.6113264560699463, |
|
"learning_rate": 1.761744966442953e-06, |
|
"loss": 0.6966, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.934673366834171, |
|
"grad_norm": 0.5979108810424805, |
|
"learning_rate": 1.7197986577181208e-06, |
|
"loss": 0.7133, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 1.9362195593351372, |
|
"grad_norm": 0.6315338611602783, |
|
"learning_rate": 1.6778523489932886e-06, |
|
"loss": 0.7721, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 1.9377657518361036, |
|
"grad_norm": 0.6291902661323547, |
|
"learning_rate": 1.6359060402684565e-06, |
|
"loss": 0.7006, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 1.9393119443370699, |
|
"grad_norm": 0.6238860487937927, |
|
"learning_rate": 1.5939597315436243e-06, |
|
"loss": 0.7787, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 1.9408581368380364, |
|
"grad_norm": 0.6401642560958862, |
|
"learning_rate": 1.5520134228187921e-06, |
|
"loss": 0.7603, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 1.9424043293390028, |
|
"grad_norm": 0.6174636483192444, |
|
"learning_rate": 1.51006711409396e-06, |
|
"loss": 0.7259, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 1.943950521839969, |
|
"grad_norm": 0.6495472192764282, |
|
"learning_rate": 1.4681208053691276e-06, |
|
"loss": 0.6654, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 1.9454967143409354, |
|
"grad_norm": 0.667812168598175, |
|
"learning_rate": 1.4261744966442954e-06, |
|
"loss": 0.7147, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 1.9470429068419017, |
|
"grad_norm": 0.6613562703132629, |
|
"learning_rate": 1.3842281879194633e-06, |
|
"loss": 0.7437, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 1.9485890993428683, |
|
"grad_norm": 0.7061516046524048, |
|
"learning_rate": 1.3422818791946309e-06, |
|
"loss": 0.6831, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.9501352918438346, |
|
"grad_norm": 0.6739703416824341, |
|
"learning_rate": 1.3003355704697987e-06, |
|
"loss": 0.747, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 1.951681484344801, |
|
"grad_norm": 0.6422568559646606, |
|
"learning_rate": 1.2583892617449663e-06, |
|
"loss": 0.7569, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 1.9532276768457673, |
|
"grad_norm": 0.6601455211639404, |
|
"learning_rate": 1.2164429530201344e-06, |
|
"loss": 0.7461, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 1.9547738693467336, |
|
"grad_norm": 0.6846182942390442, |
|
"learning_rate": 1.1744966442953022e-06, |
|
"loss": 0.7185, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 1.9563200618477001, |
|
"grad_norm": 0.6746420860290527, |
|
"learning_rate": 1.1325503355704698e-06, |
|
"loss": 0.6806, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 1.9578662543486665, |
|
"grad_norm": 0.6510487198829651, |
|
"learning_rate": 1.0906040268456377e-06, |
|
"loss": 0.7693, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 1.9594124468496328, |
|
"grad_norm": 0.6752596497535706, |
|
"learning_rate": 1.0486577181208055e-06, |
|
"loss": 0.899, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 1.960958639350599, |
|
"grad_norm": 0.698874294757843, |
|
"learning_rate": 1.006711409395973e-06, |
|
"loss": 0.7521, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 1.9625048318515654, |
|
"grad_norm": 0.6726242899894714, |
|
"learning_rate": 9.64765100671141e-07, |
|
"loss": 0.7782, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 1.964051024352532, |
|
"grad_norm": 0.6857203245162964, |
|
"learning_rate": 9.228187919463089e-07, |
|
"loss": 0.7344, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.9655972168534983, |
|
"grad_norm": 0.7023079991340637, |
|
"learning_rate": 8.808724832214765e-07, |
|
"loss": 0.7533, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 1.9671434093544646, |
|
"grad_norm": 0.6352121233940125, |
|
"learning_rate": 8.389261744966443e-07, |
|
"loss": 0.7554, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 1.968689601855431, |
|
"grad_norm": 0.6913352608680725, |
|
"learning_rate": 7.969798657718122e-07, |
|
"loss": 0.7468, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 1.9702357943563973, |
|
"grad_norm": 0.6950103640556335, |
|
"learning_rate": 7.5503355704698e-07, |
|
"loss": 0.8532, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 1.9717819868573638, |
|
"grad_norm": 0.6684133410453796, |
|
"learning_rate": 7.130872483221477e-07, |
|
"loss": 0.8302, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.9733281793583302, |
|
"grad_norm": 0.6860172152519226, |
|
"learning_rate": 6.711409395973154e-07, |
|
"loss": 0.7342, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 1.9748743718592965, |
|
"grad_norm": 0.6672773957252502, |
|
"learning_rate": 6.291946308724832e-07, |
|
"loss": 0.7901, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 1.9764205643602628, |
|
"grad_norm": 0.7022169828414917, |
|
"learning_rate": 5.872483221476511e-07, |
|
"loss": 0.7796, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 1.9779667568612291, |
|
"grad_norm": 0.6778275370597839, |
|
"learning_rate": 5.453020134228188e-07, |
|
"loss": 0.8389, |
|
"step": 1279 |
|
}, |
|
{ |
|
"epoch": 1.9795129493621957, |
|
"grad_norm": 0.721585750579834, |
|
"learning_rate": 5.033557046979866e-07, |
|
"loss": 0.848, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.981059141863162, |
|
"grad_norm": 0.6909308433532715, |
|
"learning_rate": 4.6140939597315444e-07, |
|
"loss": 0.8214, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 1.9826053343641283, |
|
"grad_norm": 0.7320754528045654, |
|
"learning_rate": 4.1946308724832216e-07, |
|
"loss": 0.8179, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 1.9841515268650947, |
|
"grad_norm": 0.7341198325157166, |
|
"learning_rate": 3.7751677852349e-07, |
|
"loss": 0.8212, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 1.985697719366061, |
|
"grad_norm": 0.7166808843612671, |
|
"learning_rate": 3.355704697986577e-07, |
|
"loss": 0.8472, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 1.9872439118670275, |
|
"grad_norm": 0.7207593321800232, |
|
"learning_rate": 2.9362416107382555e-07, |
|
"loss": 0.8408, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 1.9887901043679939, |
|
"grad_norm": 0.7933880090713501, |
|
"learning_rate": 2.516778523489933e-07, |
|
"loss": 0.8781, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 1.9903362968689602, |
|
"grad_norm": 0.7923394441604614, |
|
"learning_rate": 2.0973154362416108e-07, |
|
"loss": 0.9313, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 1.9918824893699265, |
|
"grad_norm": 0.7550710439682007, |
|
"learning_rate": 1.6778523489932886e-07, |
|
"loss": 0.8893, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 1.9934286818708928, |
|
"grad_norm": 0.8748595118522644, |
|
"learning_rate": 1.2583892617449664e-07, |
|
"loss": 0.9071, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 1.9949748743718594, |
|
"grad_norm": 0.8392748236656189, |
|
"learning_rate": 8.389261744966443e-08, |
|
"loss": 0.9107, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.9965210668728257, |
|
"grad_norm": 0.8647322058677673, |
|
"learning_rate": 4.1946308724832215e-08, |
|
"loss": 0.8547, |
|
"step": 1291 |
|
}, |
|
{ |
|
"epoch": 1.998067259373792, |
|
"grad_norm": 1.0320862531661987, |
|
"learning_rate": 0.0, |
|
"loss": 0.8732, |
|
"step": 1292 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1292, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.697955930285742e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|