|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 21900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0045662100456621, |
|
"grad_norm": 22.38092803955078, |
|
"learning_rate": 2.993150684931507e-05, |
|
"loss": 5.0082, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0091324200913242, |
|
"grad_norm": 19.525426864624023, |
|
"learning_rate": 2.9863013698630136e-05, |
|
"loss": 4.0853, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0136986301369863, |
|
"grad_norm": 29.21298599243164, |
|
"learning_rate": 2.9794520547945206e-05, |
|
"loss": 3.8879, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0182648401826484, |
|
"grad_norm": 29.60974884033203, |
|
"learning_rate": 2.9726027397260275e-05, |
|
"loss": 3.5946, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0228310502283105, |
|
"grad_norm": 20.75732421875, |
|
"learning_rate": 2.9657534246575345e-05, |
|
"loss": 3.1165, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.0273972602739726, |
|
"grad_norm": 31.49699592590332, |
|
"learning_rate": 2.958904109589041e-05, |
|
"loss": 2.7346, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0319634703196347, |
|
"grad_norm": 43.215335845947266, |
|
"learning_rate": 2.952054794520548e-05, |
|
"loss": 2.3509, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.0365296803652968, |
|
"grad_norm": 29.553388595581055, |
|
"learning_rate": 2.945205479452055e-05, |
|
"loss": 2.3374, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0410958904109589, |
|
"grad_norm": 30.272083282470703, |
|
"learning_rate": 2.938356164383562e-05, |
|
"loss": 2.0551, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.045662100456621, |
|
"grad_norm": 23.157560348510742, |
|
"learning_rate": 2.9315068493150685e-05, |
|
"loss": 1.9659, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0502283105022831, |
|
"grad_norm": 39.72032165527344, |
|
"learning_rate": 2.9246575342465755e-05, |
|
"loss": 1.7552, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.0547945205479452, |
|
"grad_norm": 24.441036224365234, |
|
"learning_rate": 2.9178082191780824e-05, |
|
"loss": 1.7992, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.0593607305936073, |
|
"grad_norm": 32.07455825805664, |
|
"learning_rate": 2.910958904109589e-05, |
|
"loss": 1.6968, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.0639269406392694, |
|
"grad_norm": 25.034423828125, |
|
"learning_rate": 2.9041095890410956e-05, |
|
"loss": 1.788, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.0684931506849315, |
|
"grad_norm": 33.06129455566406, |
|
"learning_rate": 2.8972602739726026e-05, |
|
"loss": 1.8157, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.0730593607305936, |
|
"grad_norm": 32.716251373291016, |
|
"learning_rate": 2.8904109589041095e-05, |
|
"loss": 1.7502, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.0776255707762557, |
|
"grad_norm": 27.18515968322754, |
|
"learning_rate": 2.8835616438356165e-05, |
|
"loss": 1.5629, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.0821917808219178, |
|
"grad_norm": 19.844303131103516, |
|
"learning_rate": 2.876712328767123e-05, |
|
"loss": 1.6421, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.0867579908675799, |
|
"grad_norm": 160.48770141601562, |
|
"learning_rate": 2.86986301369863e-05, |
|
"loss": 1.6035, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.091324200913242, |
|
"grad_norm": 35.13261032104492, |
|
"learning_rate": 2.863013698630137e-05, |
|
"loss": 1.4939, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0958904109589041, |
|
"grad_norm": 24.28845977783203, |
|
"learning_rate": 2.856164383561644e-05, |
|
"loss": 1.5611, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.1004566210045662, |
|
"grad_norm": 30.07063865661621, |
|
"learning_rate": 2.8493150684931505e-05, |
|
"loss": 1.4715, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.1050228310502283, |
|
"grad_norm": 23.672300338745117, |
|
"learning_rate": 2.8424657534246575e-05, |
|
"loss": 1.534, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.1095890410958904, |
|
"grad_norm": 27.45283317565918, |
|
"learning_rate": 2.8356164383561644e-05, |
|
"loss": 1.5268, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.1141552511415525, |
|
"grad_norm": 26.83701515197754, |
|
"learning_rate": 2.8287671232876714e-05, |
|
"loss": 1.4089, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.1187214611872146, |
|
"grad_norm": 19.307844161987305, |
|
"learning_rate": 2.821917808219178e-05, |
|
"loss": 1.4929, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.1232876712328767, |
|
"grad_norm": 14.790838241577148, |
|
"learning_rate": 2.815068493150685e-05, |
|
"loss": 1.4687, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.1278538812785388, |
|
"grad_norm": 16.656993865966797, |
|
"learning_rate": 2.808219178082192e-05, |
|
"loss": 1.3456, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.1324200913242009, |
|
"grad_norm": 14.602429389953613, |
|
"learning_rate": 2.801369863013699e-05, |
|
"loss": 1.2725, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.136986301369863, |
|
"grad_norm": 23.853199005126953, |
|
"learning_rate": 2.7945205479452054e-05, |
|
"loss": 1.2684, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.1415525114155251, |
|
"grad_norm": 40.28663635253906, |
|
"learning_rate": 2.7876712328767124e-05, |
|
"loss": 1.2728, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.1461187214611872, |
|
"grad_norm": 11.038789749145508, |
|
"learning_rate": 2.7808219178082193e-05, |
|
"loss": 1.3578, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.1506849315068493, |
|
"grad_norm": 22.868593215942383, |
|
"learning_rate": 2.7739726027397263e-05, |
|
"loss": 1.3941, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.1552511415525114, |
|
"grad_norm": 23.89753532409668, |
|
"learning_rate": 2.767123287671233e-05, |
|
"loss": 1.3071, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.1598173515981735, |
|
"grad_norm": 40.02177429199219, |
|
"learning_rate": 2.76027397260274e-05, |
|
"loss": 1.3711, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1643835616438356, |
|
"grad_norm": 40.53044128417969, |
|
"learning_rate": 2.7534246575342468e-05, |
|
"loss": 1.1694, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.1689497716894977, |
|
"grad_norm": 38.70663070678711, |
|
"learning_rate": 2.7465753424657537e-05, |
|
"loss": 1.2284, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.1735159817351598, |
|
"grad_norm": 114.71417236328125, |
|
"learning_rate": 2.7397260273972603e-05, |
|
"loss": 1.1634, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.1780821917808219, |
|
"grad_norm": 34.596923828125, |
|
"learning_rate": 2.732876712328767e-05, |
|
"loss": 1.2858, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.182648401826484, |
|
"grad_norm": 23.52661895751953, |
|
"learning_rate": 2.726027397260274e-05, |
|
"loss": 1.2273, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.1872146118721461, |
|
"grad_norm": 22.8360652923584, |
|
"learning_rate": 2.719178082191781e-05, |
|
"loss": 1.2622, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.1917808219178082, |
|
"grad_norm": 17.08322525024414, |
|
"learning_rate": 2.7123287671232875e-05, |
|
"loss": 1.3012, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.1963470319634703, |
|
"grad_norm": 12.923150062561035, |
|
"learning_rate": 2.7054794520547944e-05, |
|
"loss": 1.3847, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.2009132420091324, |
|
"grad_norm": 73.03246307373047, |
|
"learning_rate": 2.6986301369863014e-05, |
|
"loss": 1.3173, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.2054794520547945, |
|
"grad_norm": 16.656768798828125, |
|
"learning_rate": 2.6917808219178083e-05, |
|
"loss": 1.1779, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.2100456621004566, |
|
"grad_norm": 22.919973373413086, |
|
"learning_rate": 2.684931506849315e-05, |
|
"loss": 1.2497, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.2146118721461187, |
|
"grad_norm": 20.0987491607666, |
|
"learning_rate": 2.678082191780822e-05, |
|
"loss": 1.1787, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.2191780821917808, |
|
"grad_norm": 84.04356384277344, |
|
"learning_rate": 2.6712328767123288e-05, |
|
"loss": 1.1989, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.2237442922374429, |
|
"grad_norm": 12.585476875305176, |
|
"learning_rate": 2.6643835616438358e-05, |
|
"loss": 1.2324, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.228310502283105, |
|
"grad_norm": 18.206226348876953, |
|
"learning_rate": 2.6575342465753424e-05, |
|
"loss": 1.2541, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.2328767123287671, |
|
"grad_norm": 24.236295700073242, |
|
"learning_rate": 2.6506849315068493e-05, |
|
"loss": 1.1755, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.2374429223744292, |
|
"grad_norm": 21.91484260559082, |
|
"learning_rate": 2.6438356164383563e-05, |
|
"loss": 1.1436, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.2420091324200913, |
|
"grad_norm": 25.741945266723633, |
|
"learning_rate": 2.6369863013698632e-05, |
|
"loss": 1.146, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.2465753424657534, |
|
"grad_norm": 21.776227951049805, |
|
"learning_rate": 2.6301369863013698e-05, |
|
"loss": 1.1521, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.2511415525114155, |
|
"grad_norm": 30.281368255615234, |
|
"learning_rate": 2.6232876712328768e-05, |
|
"loss": 1.0747, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.2557077625570776, |
|
"grad_norm": 16.21628189086914, |
|
"learning_rate": 2.6164383561643837e-05, |
|
"loss": 1.2503, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.2602739726027397, |
|
"grad_norm": 17.022933959960938, |
|
"learning_rate": 2.6095890410958907e-05, |
|
"loss": 1.1719, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.2648401826484018, |
|
"grad_norm": 17.665477752685547, |
|
"learning_rate": 2.6027397260273973e-05, |
|
"loss": 1.091, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.2694063926940639, |
|
"grad_norm": 12.710230827331543, |
|
"learning_rate": 2.5958904109589042e-05, |
|
"loss": 1.1828, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.273972602739726, |
|
"grad_norm": 23.11914825439453, |
|
"learning_rate": 2.589041095890411e-05, |
|
"loss": 1.0025, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.2785388127853881, |
|
"grad_norm": 17.97935676574707, |
|
"learning_rate": 2.582191780821918e-05, |
|
"loss": 1.2145, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.2831050228310502, |
|
"grad_norm": 28.123554229736328, |
|
"learning_rate": 2.5753424657534247e-05, |
|
"loss": 1.2629, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.2876712328767123, |
|
"grad_norm": 17.924665451049805, |
|
"learning_rate": 2.5684931506849317e-05, |
|
"loss": 1.0439, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.2922374429223744, |
|
"grad_norm": 24.03557777404785, |
|
"learning_rate": 2.5616438356164386e-05, |
|
"loss": 1.127, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.2968036529680365, |
|
"grad_norm": 30.70620346069336, |
|
"learning_rate": 2.5547945205479452e-05, |
|
"loss": 1.1464, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.3013698630136986, |
|
"grad_norm": 21.80393409729004, |
|
"learning_rate": 2.5479452054794518e-05, |
|
"loss": 0.9603, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.3059360730593607, |
|
"grad_norm": 23.686079025268555, |
|
"learning_rate": 2.5410958904109588e-05, |
|
"loss": 1.0745, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.3105022831050228, |
|
"grad_norm": 10.258773803710938, |
|
"learning_rate": 2.5342465753424657e-05, |
|
"loss": 1.0776, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.3150684931506849, |
|
"grad_norm": 28.77837562561035, |
|
"learning_rate": 2.5273972602739727e-05, |
|
"loss": 1.1781, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.319634703196347, |
|
"grad_norm": 39.38608932495117, |
|
"learning_rate": 2.5205479452054793e-05, |
|
"loss": 0.9749, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.3242009132420091, |
|
"grad_norm": 29.596742630004883, |
|
"learning_rate": 2.5136986301369862e-05, |
|
"loss": 1.1099, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.3287671232876712, |
|
"grad_norm": 15.371848106384277, |
|
"learning_rate": 2.5068493150684932e-05, |
|
"loss": 1.1363, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 27.008398056030273, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.2275, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.3378995433789954, |
|
"grad_norm": 54.85331726074219, |
|
"learning_rate": 2.4931506849315067e-05, |
|
"loss": 1.2297, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.3424657534246575, |
|
"grad_norm": 10.650010108947754, |
|
"learning_rate": 2.4863013698630137e-05, |
|
"loss": 1.1716, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.3470319634703196, |
|
"grad_norm": 21.537841796875, |
|
"learning_rate": 2.4794520547945206e-05, |
|
"loss": 1.2375, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.3515981735159817, |
|
"grad_norm": 31.040218353271484, |
|
"learning_rate": 2.4726027397260276e-05, |
|
"loss": 1.0511, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.3561643835616438, |
|
"grad_norm": 13.31843090057373, |
|
"learning_rate": 2.4657534246575342e-05, |
|
"loss": 1.1581, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.3607305936073059, |
|
"grad_norm": 22.53802490234375, |
|
"learning_rate": 2.458904109589041e-05, |
|
"loss": 1.0627, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.365296803652968, |
|
"grad_norm": 20.266468048095703, |
|
"learning_rate": 2.452054794520548e-05, |
|
"loss": 1.1103, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.3698630136986301, |
|
"grad_norm": 9.198668479919434, |
|
"learning_rate": 2.445205479452055e-05, |
|
"loss": 1.136, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.3744292237442922, |
|
"grad_norm": 13.770018577575684, |
|
"learning_rate": 2.4383561643835616e-05, |
|
"loss": 1.0984, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.3789954337899543, |
|
"grad_norm": 25.558441162109375, |
|
"learning_rate": 2.4315068493150686e-05, |
|
"loss": 1.062, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.3835616438356164, |
|
"grad_norm": 25.940107345581055, |
|
"learning_rate": 2.4246575342465755e-05, |
|
"loss": 1.2237, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.3881278538812785, |
|
"grad_norm": 11.530945777893066, |
|
"learning_rate": 2.4178082191780825e-05, |
|
"loss": 1.0717, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.3926940639269406, |
|
"grad_norm": 13.686588287353516, |
|
"learning_rate": 2.410958904109589e-05, |
|
"loss": 1.0011, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.3972602739726027, |
|
"grad_norm": 24.325557708740234, |
|
"learning_rate": 2.404109589041096e-05, |
|
"loss": 1.2033, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.4018264840182648, |
|
"grad_norm": 24.21858787536621, |
|
"learning_rate": 2.397260273972603e-05, |
|
"loss": 1.01, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.4063926940639269, |
|
"grad_norm": 14.875985145568848, |
|
"learning_rate": 2.39041095890411e-05, |
|
"loss": 1.0378, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.410958904109589, |
|
"grad_norm": 38.33986282348633, |
|
"learning_rate": 2.3835616438356165e-05, |
|
"loss": 1.0722, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.4155251141552511, |
|
"grad_norm": 10.655511856079102, |
|
"learning_rate": 2.376712328767123e-05, |
|
"loss": 1.1182, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.4200913242009132, |
|
"grad_norm": 26.336122512817383, |
|
"learning_rate": 2.36986301369863e-05, |
|
"loss": 1.0772, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.4246575342465753, |
|
"grad_norm": 26.254093170166016, |
|
"learning_rate": 2.363013698630137e-05, |
|
"loss": 0.9908, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.4292237442922374, |
|
"grad_norm": 7.081357002258301, |
|
"learning_rate": 2.3561643835616436e-05, |
|
"loss": 1.0079, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.4337899543378995, |
|
"grad_norm": 20.019088745117188, |
|
"learning_rate": 2.3493150684931506e-05, |
|
"loss": 1.1188, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.4383561643835616, |
|
"grad_norm": 30.188098907470703, |
|
"learning_rate": 2.3424657534246575e-05, |
|
"loss": 1.0616, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.4429223744292237, |
|
"grad_norm": 6.621674537658691, |
|
"learning_rate": 2.3356164383561645e-05, |
|
"loss": 1.0935, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.4474885844748858, |
|
"grad_norm": 21.145673751831055, |
|
"learning_rate": 2.328767123287671e-05, |
|
"loss": 1.0178, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.4520547945205479, |
|
"grad_norm": 26.22977066040039, |
|
"learning_rate": 2.321917808219178e-05, |
|
"loss": 1.1739, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.45662100456621, |
|
"grad_norm": 22.71933364868164, |
|
"learning_rate": 2.315068493150685e-05, |
|
"loss": 1.122, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.4611872146118721, |
|
"grad_norm": 16.020483016967773, |
|
"learning_rate": 2.308219178082192e-05, |
|
"loss": 1.0494, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.4657534246575342, |
|
"grad_norm": 10.217668533325195, |
|
"learning_rate": 2.3013698630136985e-05, |
|
"loss": 1.1577, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.4703196347031963, |
|
"grad_norm": 13.561128616333008, |
|
"learning_rate": 2.2945205479452055e-05, |
|
"loss": 1.1065, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.4748858447488584, |
|
"grad_norm": 24.07544708251953, |
|
"learning_rate": 2.2876712328767124e-05, |
|
"loss": 1.1721, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.4794520547945205, |
|
"grad_norm": 27.471532821655273, |
|
"learning_rate": 2.2808219178082194e-05, |
|
"loss": 1.0425, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.4840182648401826, |
|
"grad_norm": 39.17247772216797, |
|
"learning_rate": 2.273972602739726e-05, |
|
"loss": 1.0182, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.4885844748858447, |
|
"grad_norm": 11.253133773803711, |
|
"learning_rate": 2.267123287671233e-05, |
|
"loss": 1.0516, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.4931506849315068, |
|
"grad_norm": 19.816268920898438, |
|
"learning_rate": 2.26027397260274e-05, |
|
"loss": 0.9359, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.4977168949771689, |
|
"grad_norm": 21.89589500427246, |
|
"learning_rate": 2.253424657534247e-05, |
|
"loss": 1.0047, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.502283105022831, |
|
"grad_norm": 18.662717819213867, |
|
"learning_rate": 2.2465753424657534e-05, |
|
"loss": 1.1092, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.5068493150684932, |
|
"grad_norm": 13.65588092803955, |
|
"learning_rate": 2.2397260273972604e-05, |
|
"loss": 1.2777, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.5114155251141552, |
|
"grad_norm": 22.897354125976562, |
|
"learning_rate": 2.2328767123287673e-05, |
|
"loss": 1.0557, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.5159817351598174, |
|
"grad_norm": 22.30970573425293, |
|
"learning_rate": 2.2260273972602743e-05, |
|
"loss": 1.0701, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.5205479452054794, |
|
"grad_norm": 55.639892578125, |
|
"learning_rate": 2.219178082191781e-05, |
|
"loss": 1.1001, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.5251141552511416, |
|
"grad_norm": 11.327408790588379, |
|
"learning_rate": 2.212328767123288e-05, |
|
"loss": 1.1506, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.5296803652968036, |
|
"grad_norm": 31.288894653320312, |
|
"learning_rate": 2.2054794520547945e-05, |
|
"loss": 1.0774, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.5342465753424658, |
|
"grad_norm": 11.587857246398926, |
|
"learning_rate": 2.1986301369863014e-05, |
|
"loss": 1.0175, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.5388127853881278, |
|
"grad_norm": 27.174907684326172, |
|
"learning_rate": 2.191780821917808e-05, |
|
"loss": 1.1566, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.54337899543379, |
|
"grad_norm": 28.325786590576172, |
|
"learning_rate": 2.184931506849315e-05, |
|
"loss": 1.1089, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.547945205479452, |
|
"grad_norm": 21.70798683166504, |
|
"learning_rate": 2.178082191780822e-05, |
|
"loss": 1.0575, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.5525114155251142, |
|
"grad_norm": 18.79369354248047, |
|
"learning_rate": 2.171232876712329e-05, |
|
"loss": 1.1327, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.5570776255707762, |
|
"grad_norm": 31.291170120239258, |
|
"learning_rate": 2.1643835616438355e-05, |
|
"loss": 1.1412, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.5616438356164384, |
|
"grad_norm": 15.10459041595459, |
|
"learning_rate": 2.1575342465753424e-05, |
|
"loss": 1.1289, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.5662100456621004, |
|
"grad_norm": 23.595136642456055, |
|
"learning_rate": 2.1506849315068494e-05, |
|
"loss": 0.9419, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.5707762557077626, |
|
"grad_norm": 9.336952209472656, |
|
"learning_rate": 2.1438356164383563e-05, |
|
"loss": 0.9357, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.5753424657534246, |
|
"grad_norm": 34.959354400634766, |
|
"learning_rate": 2.136986301369863e-05, |
|
"loss": 1.1525, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.5799086757990868, |
|
"grad_norm": 25.974088668823242, |
|
"learning_rate": 2.13013698630137e-05, |
|
"loss": 1.0915, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.5844748858447488, |
|
"grad_norm": 26.807876586914062, |
|
"learning_rate": 2.1232876712328768e-05, |
|
"loss": 1.0783, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.589041095890411, |
|
"grad_norm": 11.836869239807129, |
|
"learning_rate": 2.1164383561643838e-05, |
|
"loss": 1.1106, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.593607305936073, |
|
"grad_norm": 20.5205078125, |
|
"learning_rate": 2.1095890410958904e-05, |
|
"loss": 1.0021, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.5981735159817352, |
|
"grad_norm": 63.918914794921875, |
|
"learning_rate": 2.1027397260273973e-05, |
|
"loss": 0.9942, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.6027397260273972, |
|
"grad_norm": 15.824874877929688, |
|
"learning_rate": 2.0958904109589043e-05, |
|
"loss": 1.0382, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.6073059360730594, |
|
"grad_norm": 23.539623260498047, |
|
"learning_rate": 2.0890410958904112e-05, |
|
"loss": 1.1323, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.6118721461187214, |
|
"grad_norm": 22.72905921936035, |
|
"learning_rate": 2.0821917808219178e-05, |
|
"loss": 1.096, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.6164383561643836, |
|
"grad_norm": 21.393850326538086, |
|
"learning_rate": 2.0753424657534248e-05, |
|
"loss": 1.0491, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.6210045662100456, |
|
"grad_norm": 10.425342559814453, |
|
"learning_rate": 2.0684931506849317e-05, |
|
"loss": 0.9894, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.6255707762557078, |
|
"grad_norm": 31.147842407226562, |
|
"learning_rate": 2.0616438356164387e-05, |
|
"loss": 1.0229, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.6301369863013698, |
|
"grad_norm": 13.265896797180176, |
|
"learning_rate": 2.0547945205479453e-05, |
|
"loss": 1.0372, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.634703196347032, |
|
"grad_norm": 19.299884796142578, |
|
"learning_rate": 2.0479452054794522e-05, |
|
"loss": 0.9861, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.639269406392694, |
|
"grad_norm": 15.215560913085938, |
|
"learning_rate": 2.041095890410959e-05, |
|
"loss": 1.0685, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.6438356164383562, |
|
"grad_norm": 12.113781929016113, |
|
"learning_rate": 2.034246575342466e-05, |
|
"loss": 1.0452, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.6484018264840182, |
|
"grad_norm": 53.0004768371582, |
|
"learning_rate": 2.0273972602739724e-05, |
|
"loss": 1.0177, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.6529680365296804, |
|
"grad_norm": 10.609742164611816, |
|
"learning_rate": 2.0205479452054793e-05, |
|
"loss": 0.9896, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.6575342465753424, |
|
"grad_norm": 13.45383071899414, |
|
"learning_rate": 2.0136986301369863e-05, |
|
"loss": 0.7549, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.6621004566210046, |
|
"grad_norm": 21.812686920166016, |
|
"learning_rate": 2.0068493150684932e-05, |
|
"loss": 1.0402, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 7.622537612915039, |
|
"learning_rate": 1.9999999999999998e-05, |
|
"loss": 0.9468, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.6712328767123288, |
|
"grad_norm": 14.563289642333984, |
|
"learning_rate": 1.9931506849315068e-05, |
|
"loss": 1.0154, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.6757990867579908, |
|
"grad_norm": 29.9089298248291, |
|
"learning_rate": 1.9863013698630137e-05, |
|
"loss": 0.9872, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.680365296803653, |
|
"grad_norm": 7.758576393127441, |
|
"learning_rate": 1.9794520547945207e-05, |
|
"loss": 0.8618, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.684931506849315, |
|
"grad_norm": 15.849843978881836, |
|
"learning_rate": 1.9726027397260273e-05, |
|
"loss": 0.9981, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.6894977168949772, |
|
"grad_norm": 13.798691749572754, |
|
"learning_rate": 1.9657534246575342e-05, |
|
"loss": 1.0578, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.6940639269406392, |
|
"grad_norm": 12.977076530456543, |
|
"learning_rate": 1.9589041095890412e-05, |
|
"loss": 1.0081, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.6986301369863014, |
|
"grad_norm": 25.823368072509766, |
|
"learning_rate": 1.952054794520548e-05, |
|
"loss": 0.9483, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.7031963470319634, |
|
"grad_norm": 1.7260483503341675, |
|
"learning_rate": 1.9452054794520547e-05, |
|
"loss": 0.9396, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.7077625570776256, |
|
"grad_norm": 28.217559814453125, |
|
"learning_rate": 1.9383561643835617e-05, |
|
"loss": 1.0159, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.7123287671232876, |
|
"grad_norm": 15.566514015197754, |
|
"learning_rate": 1.9315068493150686e-05, |
|
"loss": 1.0673, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.7168949771689498, |
|
"grad_norm": 49.06792449951172, |
|
"learning_rate": 1.9246575342465756e-05, |
|
"loss": 0.9817, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.7214611872146118, |
|
"grad_norm": 35.709224700927734, |
|
"learning_rate": 1.9178082191780822e-05, |
|
"loss": 0.9818, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.726027397260274, |
|
"grad_norm": 20.94626808166504, |
|
"learning_rate": 1.910958904109589e-05, |
|
"loss": 0.9113, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.730593607305936, |
|
"grad_norm": 25.5698184967041, |
|
"learning_rate": 1.904109589041096e-05, |
|
"loss": 1.0525, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.7351598173515982, |
|
"grad_norm": 36.41669845581055, |
|
"learning_rate": 1.897260273972603e-05, |
|
"loss": 1.007, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.7397260273972602, |
|
"grad_norm": 17.15513038635254, |
|
"learning_rate": 1.8904109589041096e-05, |
|
"loss": 0.9568, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.7442922374429224, |
|
"grad_norm": 24.435543060302734, |
|
"learning_rate": 1.8835616438356166e-05, |
|
"loss": 1.0073, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.7488584474885844, |
|
"grad_norm": 26.5279483795166, |
|
"learning_rate": 1.8767123287671235e-05, |
|
"loss": 0.9768, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.7534246575342466, |
|
"grad_norm": 18.786693572998047, |
|
"learning_rate": 1.8698630136986305e-05, |
|
"loss": 1.0579, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.7579908675799086, |
|
"grad_norm": 22.201034545898438, |
|
"learning_rate": 1.863013698630137e-05, |
|
"loss": 1.003, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.7625570776255708, |
|
"grad_norm": 28.934587478637695, |
|
"learning_rate": 1.856164383561644e-05, |
|
"loss": 0.9753, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.7671232876712328, |
|
"grad_norm": 17.935937881469727, |
|
"learning_rate": 1.8493150684931506e-05, |
|
"loss": 0.9038, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.771689497716895, |
|
"grad_norm": 29.648881912231445, |
|
"learning_rate": 1.8424657534246576e-05, |
|
"loss": 1.0109, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.776255707762557, |
|
"grad_norm": 32.65835189819336, |
|
"learning_rate": 1.8356164383561642e-05, |
|
"loss": 1.0207, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.7808219178082192, |
|
"grad_norm": 21.674053192138672, |
|
"learning_rate": 1.828767123287671e-05, |
|
"loss": 1.0573, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.7853881278538812, |
|
"grad_norm": 19.038619995117188, |
|
"learning_rate": 1.821917808219178e-05, |
|
"loss": 1.042, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.7899543378995434, |
|
"grad_norm": 23.994884490966797, |
|
"learning_rate": 1.815068493150685e-05, |
|
"loss": 1.0316, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.7945205479452054, |
|
"grad_norm": 26.702028274536133, |
|
"learning_rate": 1.8082191780821916e-05, |
|
"loss": 0.9592, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.7990867579908676, |
|
"grad_norm": 7.485787391662598, |
|
"learning_rate": 1.8013698630136986e-05, |
|
"loss": 1.0175, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.8036529680365296, |
|
"grad_norm": 24.249893188476562, |
|
"learning_rate": 1.7945205479452055e-05, |
|
"loss": 1.0643, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.8082191780821918, |
|
"grad_norm": 31.136962890625, |
|
"learning_rate": 1.7876712328767125e-05, |
|
"loss": 0.8719, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.8127853881278538, |
|
"grad_norm": 18.02696418762207, |
|
"learning_rate": 1.780821917808219e-05, |
|
"loss": 0.9169, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.817351598173516, |
|
"grad_norm": 7.9297261238098145, |
|
"learning_rate": 1.773972602739726e-05, |
|
"loss": 0.9241, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.821917808219178, |
|
"grad_norm": 15.147187232971191, |
|
"learning_rate": 1.767123287671233e-05, |
|
"loss": 0.9451, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.8264840182648402, |
|
"grad_norm": 26.369773864746094, |
|
"learning_rate": 1.76027397260274e-05, |
|
"loss": 0.9198, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.8310502283105022, |
|
"grad_norm": 10.24289321899414, |
|
"learning_rate": 1.7534246575342465e-05, |
|
"loss": 0.9423, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.8356164383561644, |
|
"grad_norm": 17.285388946533203, |
|
"learning_rate": 1.7465753424657535e-05, |
|
"loss": 0.8636, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.8401826484018264, |
|
"grad_norm": 26.444520950317383, |
|
"learning_rate": 1.7397260273972604e-05, |
|
"loss": 1.0647, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.8447488584474886, |
|
"grad_norm": 37.353084564208984, |
|
"learning_rate": 1.7328767123287674e-05, |
|
"loss": 0.9484, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.8493150684931506, |
|
"grad_norm": 9.881476402282715, |
|
"learning_rate": 1.726027397260274e-05, |
|
"loss": 1.014, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.8538812785388128, |
|
"grad_norm": 27.257709503173828, |
|
"learning_rate": 1.719178082191781e-05, |
|
"loss": 0.9169, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.8584474885844748, |
|
"grad_norm": 20.704347610473633, |
|
"learning_rate": 1.712328767123288e-05, |
|
"loss": 0.9597, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.863013698630137, |
|
"grad_norm": 20.994293212890625, |
|
"learning_rate": 1.705479452054795e-05, |
|
"loss": 0.9328, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.867579908675799, |
|
"grad_norm": 31.617778778076172, |
|
"learning_rate": 1.6986301369863014e-05, |
|
"loss": 0.9988, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.8721461187214612, |
|
"grad_norm": 24.76189422607422, |
|
"learning_rate": 1.6917808219178084e-05, |
|
"loss": 0.9338, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.8767123287671232, |
|
"grad_norm": 27.70781135559082, |
|
"learning_rate": 1.6849315068493153e-05, |
|
"loss": 0.8994, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.8812785388127854, |
|
"grad_norm": 6.332718372344971, |
|
"learning_rate": 1.6780821917808223e-05, |
|
"loss": 0.8815, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.8858447488584474, |
|
"grad_norm": 25.22089195251465, |
|
"learning_rate": 1.6712328767123286e-05, |
|
"loss": 1.0243, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.8904109589041096, |
|
"grad_norm": 6.610196590423584, |
|
"learning_rate": 1.6643835616438355e-05, |
|
"loss": 0.856, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.8949771689497716, |
|
"grad_norm": 9.7604341506958, |
|
"learning_rate": 1.6575342465753425e-05, |
|
"loss": 0.9275, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.8995433789954338, |
|
"grad_norm": 13.222633361816406, |
|
"learning_rate": 1.6506849315068494e-05, |
|
"loss": 0.7912, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.9041095890410958, |
|
"grad_norm": 8.07390022277832, |
|
"learning_rate": 1.643835616438356e-05, |
|
"loss": 0.9296, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.908675799086758, |
|
"grad_norm": 8.25124740600586, |
|
"learning_rate": 1.636986301369863e-05, |
|
"loss": 0.9269, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.91324200913242, |
|
"grad_norm": 22.882007598876953, |
|
"learning_rate": 1.63013698630137e-05, |
|
"loss": 0.9694, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.9178082191780822, |
|
"grad_norm": 8.665299415588379, |
|
"learning_rate": 1.623287671232877e-05, |
|
"loss": 0.8643, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 0.9223744292237442, |
|
"grad_norm": 13.816045761108398, |
|
"learning_rate": 1.6164383561643835e-05, |
|
"loss": 0.9476, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.9269406392694064, |
|
"grad_norm": 11.227286338806152, |
|
"learning_rate": 1.6095890410958904e-05, |
|
"loss": 0.8805, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 0.9315068493150684, |
|
"grad_norm": 7.756823539733887, |
|
"learning_rate": 1.6027397260273974e-05, |
|
"loss": 0.9185, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.9360730593607306, |
|
"grad_norm": 3.170640230178833, |
|
"learning_rate": 1.5958904109589043e-05, |
|
"loss": 0.8761, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.9406392694063926, |
|
"grad_norm": 11.567336082458496, |
|
"learning_rate": 1.589041095890411e-05, |
|
"loss": 0.9298, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.9452054794520548, |
|
"grad_norm": 23.299612045288086, |
|
"learning_rate": 1.582191780821918e-05, |
|
"loss": 0.8361, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 0.9497716894977168, |
|
"grad_norm": 14.57529067993164, |
|
"learning_rate": 1.5753424657534248e-05, |
|
"loss": 0.9669, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.954337899543379, |
|
"grad_norm": 10.777979850769043, |
|
"learning_rate": 1.5684931506849318e-05, |
|
"loss": 0.9632, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 0.958904109589041, |
|
"grad_norm": 25.46247100830078, |
|
"learning_rate": 1.5616438356164384e-05, |
|
"loss": 0.8625, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.9634703196347032, |
|
"grad_norm": 8.925729751586914, |
|
"learning_rate": 1.5547945205479453e-05, |
|
"loss": 0.903, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 0.9680365296803652, |
|
"grad_norm": 11.709217071533203, |
|
"learning_rate": 1.5479452054794523e-05, |
|
"loss": 0.9446, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.9726027397260274, |
|
"grad_norm": 36.75910568237305, |
|
"learning_rate": 1.5410958904109592e-05, |
|
"loss": 0.9568, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 0.9771689497716894, |
|
"grad_norm": 30.059825897216797, |
|
"learning_rate": 1.5342465753424658e-05, |
|
"loss": 0.9603, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.9817351598173516, |
|
"grad_norm": 23.478477478027344, |
|
"learning_rate": 1.5273972602739728e-05, |
|
"loss": 0.9828, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 0.9863013698630136, |
|
"grad_norm": 34.233699798583984, |
|
"learning_rate": 1.5205479452054795e-05, |
|
"loss": 0.9867, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.9908675799086758, |
|
"grad_norm": 19.399288177490234, |
|
"learning_rate": 1.5136986301369865e-05, |
|
"loss": 0.8618, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.9954337899543378, |
|
"grad_norm": 9.319437026977539, |
|
"learning_rate": 1.5068493150684931e-05, |
|
"loss": 0.9352, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 8.767410278320312, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.8511, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 1.004566210045662, |
|
"grad_norm": 15.091998100280762, |
|
"learning_rate": 1.4931506849315068e-05, |
|
"loss": 0.6653, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.009132420091324, |
|
"grad_norm": 23.510337829589844, |
|
"learning_rate": 1.4863013698630138e-05, |
|
"loss": 0.6163, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 1.0136986301369864, |
|
"grad_norm": 8.980317115783691, |
|
"learning_rate": 1.4794520547945205e-05, |
|
"loss": 0.7783, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 1.0182648401826484, |
|
"grad_norm": 36.67283248901367, |
|
"learning_rate": 1.4726027397260275e-05, |
|
"loss": 0.7681, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 1.0228310502283104, |
|
"grad_norm": 4.734091758728027, |
|
"learning_rate": 1.4657534246575343e-05, |
|
"loss": 0.6181, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.0273972602739727, |
|
"grad_norm": 7.513982772827148, |
|
"learning_rate": 1.4589041095890412e-05, |
|
"loss": 0.7297, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 1.0319634703196348, |
|
"grad_norm": 16.1832275390625, |
|
"learning_rate": 1.4520547945205478e-05, |
|
"loss": 0.6487, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 1.0365296803652968, |
|
"grad_norm": 25.557220458984375, |
|
"learning_rate": 1.4452054794520548e-05, |
|
"loss": 0.7089, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 1.0410958904109588, |
|
"grad_norm": 11.07717227935791, |
|
"learning_rate": 1.4383561643835615e-05, |
|
"loss": 0.6266, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.045662100456621, |
|
"grad_norm": 11.381722450256348, |
|
"learning_rate": 1.4315068493150685e-05, |
|
"loss": 0.6758, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 1.0502283105022832, |
|
"grad_norm": 9.080512046813965, |
|
"learning_rate": 1.4246575342465753e-05, |
|
"loss": 0.6865, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.0547945205479452, |
|
"grad_norm": 10.727092742919922, |
|
"learning_rate": 1.4178082191780822e-05, |
|
"loss": 0.6494, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 1.0593607305936072, |
|
"grad_norm": 4.969435691833496, |
|
"learning_rate": 1.410958904109589e-05, |
|
"loss": 0.5874, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 1.0639269406392695, |
|
"grad_norm": 12.03378963470459, |
|
"learning_rate": 1.404109589041096e-05, |
|
"loss": 0.6301, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 1.0684931506849316, |
|
"grad_norm": 8.797894477844238, |
|
"learning_rate": 1.3972602739726027e-05, |
|
"loss": 0.6839, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 1.0730593607305936, |
|
"grad_norm": 9.395676612854004, |
|
"learning_rate": 1.3904109589041097e-05, |
|
"loss": 0.6672, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 1.0776255707762556, |
|
"grad_norm": 26.268247604370117, |
|
"learning_rate": 1.3835616438356164e-05, |
|
"loss": 0.6398, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 1.0821917808219177, |
|
"grad_norm": 19.63582992553711, |
|
"learning_rate": 1.3767123287671234e-05, |
|
"loss": 0.7825, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 1.08675799086758, |
|
"grad_norm": 23.42495346069336, |
|
"learning_rate": 1.3698630136986302e-05, |
|
"loss": 0.7078, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 1.091324200913242, |
|
"grad_norm": 29.21826934814453, |
|
"learning_rate": 1.363013698630137e-05, |
|
"loss": 0.6973, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 1.095890410958904, |
|
"grad_norm": 13.69019603729248, |
|
"learning_rate": 1.3561643835616437e-05, |
|
"loss": 0.6325, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.1004566210045663, |
|
"grad_norm": 22.293821334838867, |
|
"learning_rate": 1.3493150684931507e-05, |
|
"loss": 0.6833, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 1.1050228310502284, |
|
"grad_norm": 9.701814651489258, |
|
"learning_rate": 1.3424657534246575e-05, |
|
"loss": 0.6518, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 1.1095890410958904, |
|
"grad_norm": 25.30548667907715, |
|
"learning_rate": 1.3356164383561644e-05, |
|
"loss": 0.7458, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 1.1141552511415524, |
|
"grad_norm": 11.800670623779297, |
|
"learning_rate": 1.3287671232876712e-05, |
|
"loss": 0.755, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 1.1187214611872145, |
|
"grad_norm": 15.019549369812012, |
|
"learning_rate": 1.3219178082191781e-05, |
|
"loss": 0.6871, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 1.1232876712328768, |
|
"grad_norm": 8.90066909790039, |
|
"learning_rate": 1.3150684931506849e-05, |
|
"loss": 0.738, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 1.1278538812785388, |
|
"grad_norm": 6.738426685333252, |
|
"learning_rate": 1.3082191780821919e-05, |
|
"loss": 0.6828, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 1.1324200913242009, |
|
"grad_norm": 7.866949558258057, |
|
"learning_rate": 1.3013698630136986e-05, |
|
"loss": 0.708, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 1.1369863013698631, |
|
"grad_norm": 26.066892623901367, |
|
"learning_rate": 1.2945205479452056e-05, |
|
"loss": 0.7078, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 1.1415525114155252, |
|
"grad_norm": 7.540081024169922, |
|
"learning_rate": 1.2876712328767124e-05, |
|
"loss": 0.6958, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.1461187214611872, |
|
"grad_norm": 13.667712211608887, |
|
"learning_rate": 1.2808219178082193e-05, |
|
"loss": 0.5233, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 1.1506849315068493, |
|
"grad_norm": 17.8378849029541, |
|
"learning_rate": 1.2739726027397259e-05, |
|
"loss": 0.669, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 1.1552511415525113, |
|
"grad_norm": 25.038570404052734, |
|
"learning_rate": 1.2671232876712329e-05, |
|
"loss": 0.5989, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 1.1598173515981736, |
|
"grad_norm": 2.4400246143341064, |
|
"learning_rate": 1.2602739726027396e-05, |
|
"loss": 0.745, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 1.1643835616438356, |
|
"grad_norm": 3.209836006164551, |
|
"learning_rate": 1.2534246575342466e-05, |
|
"loss": 0.6943, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 1.1689497716894977, |
|
"grad_norm": 12.491949081420898, |
|
"learning_rate": 1.2465753424657534e-05, |
|
"loss": 0.6115, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 1.17351598173516, |
|
"grad_norm": 11.220638275146484, |
|
"learning_rate": 1.2397260273972603e-05, |
|
"loss": 0.5898, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 1.178082191780822, |
|
"grad_norm": 16.73185920715332, |
|
"learning_rate": 1.2328767123287671e-05, |
|
"loss": 0.6734, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 1.182648401826484, |
|
"grad_norm": 14.307918548583984, |
|
"learning_rate": 1.226027397260274e-05, |
|
"loss": 0.7266, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 1.187214611872146, |
|
"grad_norm": 14.836384773254395, |
|
"learning_rate": 1.2191780821917808e-05, |
|
"loss": 0.7586, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.191780821917808, |
|
"grad_norm": 29.282228469848633, |
|
"learning_rate": 1.2123287671232878e-05, |
|
"loss": 0.7917, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 1.1963470319634704, |
|
"grad_norm": 26.005468368530273, |
|
"learning_rate": 1.2054794520547945e-05, |
|
"loss": 0.6413, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 1.2009132420091324, |
|
"grad_norm": 8.961703300476074, |
|
"learning_rate": 1.1986301369863015e-05, |
|
"loss": 0.792, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 1.2054794520547945, |
|
"grad_norm": 10.702567100524902, |
|
"learning_rate": 1.1917808219178083e-05, |
|
"loss": 0.6594, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 1.2100456621004567, |
|
"grad_norm": 13.418671607971191, |
|
"learning_rate": 1.184931506849315e-05, |
|
"loss": 0.7547, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 1.2146118721461188, |
|
"grad_norm": 17.413429260253906, |
|
"learning_rate": 1.1780821917808218e-05, |
|
"loss": 0.7908, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 1.2191780821917808, |
|
"grad_norm": 18.375572204589844, |
|
"learning_rate": 1.1712328767123288e-05, |
|
"loss": 0.7298, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 1.2237442922374429, |
|
"grad_norm": 12.126824378967285, |
|
"learning_rate": 1.1643835616438355e-05, |
|
"loss": 0.7482, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 1.228310502283105, |
|
"grad_norm": 6.292917728424072, |
|
"learning_rate": 1.1575342465753425e-05, |
|
"loss": 0.7049, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 1.2328767123287672, |
|
"grad_norm": 48.449954986572266, |
|
"learning_rate": 1.1506849315068493e-05, |
|
"loss": 0.5808, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.2374429223744292, |
|
"grad_norm": 11.724007606506348, |
|
"learning_rate": 1.1438356164383562e-05, |
|
"loss": 0.6694, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 1.2420091324200913, |
|
"grad_norm": 11.694669723510742, |
|
"learning_rate": 1.136986301369863e-05, |
|
"loss": 0.7223, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 1.2465753424657535, |
|
"grad_norm": 18.402746200561523, |
|
"learning_rate": 1.13013698630137e-05, |
|
"loss": 0.6794, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 1.2511415525114156, |
|
"grad_norm": 10.988481521606445, |
|
"learning_rate": 1.1232876712328767e-05, |
|
"loss": 0.618, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 1.2557077625570776, |
|
"grad_norm": 8.175172805786133, |
|
"learning_rate": 1.1164383561643837e-05, |
|
"loss": 0.6552, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 1.2602739726027397, |
|
"grad_norm": 17.85883903503418, |
|
"learning_rate": 1.1095890410958904e-05, |
|
"loss": 0.6645, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 1.2648401826484017, |
|
"grad_norm": 6.0199198722839355, |
|
"learning_rate": 1.1027397260273972e-05, |
|
"loss": 0.7725, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 1.269406392694064, |
|
"grad_norm": 12.200461387634277, |
|
"learning_rate": 1.095890410958904e-05, |
|
"loss": 0.6685, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 1.273972602739726, |
|
"grad_norm": 11.29808235168457, |
|
"learning_rate": 1.089041095890411e-05, |
|
"loss": 0.5974, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 1.278538812785388, |
|
"grad_norm": 20.522750854492188, |
|
"learning_rate": 1.0821917808219177e-05, |
|
"loss": 0.6497, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.2831050228310503, |
|
"grad_norm": 4.903714656829834, |
|
"learning_rate": 1.0753424657534247e-05, |
|
"loss": 0.805, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 1.2876712328767124, |
|
"grad_norm": 14.09408950805664, |
|
"learning_rate": 1.0684931506849315e-05, |
|
"loss": 0.6975, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 1.2922374429223744, |
|
"grad_norm": 5.306293964385986, |
|
"learning_rate": 1.0616438356164384e-05, |
|
"loss": 0.6181, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 1.2968036529680365, |
|
"grad_norm": 15.217981338500977, |
|
"learning_rate": 1.0547945205479452e-05, |
|
"loss": 0.6363, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 1.3013698630136985, |
|
"grad_norm": 2.86833119392395, |
|
"learning_rate": 1.0479452054794521e-05, |
|
"loss": 0.632, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 1.3059360730593608, |
|
"grad_norm": 12.824779510498047, |
|
"learning_rate": 1.0410958904109589e-05, |
|
"loss": 0.7499, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 1.3105022831050228, |
|
"grad_norm": 17.47176170349121, |
|
"learning_rate": 1.0342465753424659e-05, |
|
"loss": 0.581, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 1.3150684931506849, |
|
"grad_norm": 7.620934009552002, |
|
"learning_rate": 1.0273972602739726e-05, |
|
"loss": 0.5913, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 1.3196347031963471, |
|
"grad_norm": 11.1875, |
|
"learning_rate": 1.0205479452054796e-05, |
|
"loss": 0.6465, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 1.3242009132420092, |
|
"grad_norm": 24.72510528564453, |
|
"learning_rate": 1.0136986301369862e-05, |
|
"loss": 0.6796, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.3287671232876712, |
|
"grad_norm": 6.102226734161377, |
|
"learning_rate": 1.0068493150684931e-05, |
|
"loss": 0.7074, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 6.332953929901123, |
|
"learning_rate": 9.999999999999999e-06, |
|
"loss": 0.6338, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 1.3378995433789953, |
|
"grad_norm": 28.30670738220215, |
|
"learning_rate": 9.931506849315069e-06, |
|
"loss": 0.7109, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 1.3424657534246576, |
|
"grad_norm": 40.727230072021484, |
|
"learning_rate": 9.863013698630136e-06, |
|
"loss": 0.7342, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 1.3470319634703196, |
|
"grad_norm": 11.026389122009277, |
|
"learning_rate": 9.794520547945206e-06, |
|
"loss": 0.6972, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 1.3515981735159817, |
|
"grad_norm": 16.95206642150879, |
|
"learning_rate": 9.726027397260274e-06, |
|
"loss": 0.6509, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 1.356164383561644, |
|
"grad_norm": 24.887845993041992, |
|
"learning_rate": 9.657534246575343e-06, |
|
"loss": 0.6608, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 1.360730593607306, |
|
"grad_norm": 5.1824421882629395, |
|
"learning_rate": 9.589041095890411e-06, |
|
"loss": 0.664, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 1.365296803652968, |
|
"grad_norm": 35.08380889892578, |
|
"learning_rate": 9.52054794520548e-06, |
|
"loss": 0.7375, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 1.36986301369863, |
|
"grad_norm": 13.273919105529785, |
|
"learning_rate": 9.452054794520548e-06, |
|
"loss": 0.6581, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.374429223744292, |
|
"grad_norm": 20.243751525878906, |
|
"learning_rate": 9.383561643835618e-06, |
|
"loss": 0.7028, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 1.3789954337899544, |
|
"grad_norm": 6.9884934425354, |
|
"learning_rate": 9.315068493150685e-06, |
|
"loss": 0.6699, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 1.3835616438356164, |
|
"grad_norm": 10.110499382019043, |
|
"learning_rate": 9.246575342465753e-06, |
|
"loss": 0.5804, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 1.3881278538812785, |
|
"grad_norm": 5.272585868835449, |
|
"learning_rate": 9.178082191780821e-06, |
|
"loss": 0.7046, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 1.3926940639269407, |
|
"grad_norm": 3.3239293098449707, |
|
"learning_rate": 9.10958904109589e-06, |
|
"loss": 0.6621, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 1.3972602739726028, |
|
"grad_norm": 9.12402057647705, |
|
"learning_rate": 9.041095890410958e-06, |
|
"loss": 0.5613, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 1.4018264840182648, |
|
"grad_norm": 9.260445594787598, |
|
"learning_rate": 8.972602739726028e-06, |
|
"loss": 0.6179, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 1.4063926940639269, |
|
"grad_norm": 40.01394271850586, |
|
"learning_rate": 8.904109589041095e-06, |
|
"loss": 0.5897, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 1.410958904109589, |
|
"grad_norm": 43.27082443237305, |
|
"learning_rate": 8.835616438356165e-06, |
|
"loss": 0.5804, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 1.4155251141552512, |
|
"grad_norm": 8.149184226989746, |
|
"learning_rate": 8.767123287671233e-06, |
|
"loss": 0.7865, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.4200913242009132, |
|
"grad_norm": 16.708332061767578, |
|
"learning_rate": 8.698630136986302e-06, |
|
"loss": 0.6983, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 1.4246575342465753, |
|
"grad_norm": 2.826059103012085, |
|
"learning_rate": 8.63013698630137e-06, |
|
"loss": 0.6238, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 1.4292237442922375, |
|
"grad_norm": 13.411745071411133, |
|
"learning_rate": 8.56164383561644e-06, |
|
"loss": 0.6448, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 1.4337899543378996, |
|
"grad_norm": 12.094820976257324, |
|
"learning_rate": 8.493150684931507e-06, |
|
"loss": 0.6868, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 1.4383561643835616, |
|
"grad_norm": 13.272956848144531, |
|
"learning_rate": 8.424657534246577e-06, |
|
"loss": 0.6758, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 1.4429223744292237, |
|
"grad_norm": 7.710869312286377, |
|
"learning_rate": 8.356164383561643e-06, |
|
"loss": 0.6154, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 1.4474885844748857, |
|
"grad_norm": 24.845901489257812, |
|
"learning_rate": 8.287671232876712e-06, |
|
"loss": 0.6228, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 1.452054794520548, |
|
"grad_norm": 10.101279258728027, |
|
"learning_rate": 8.21917808219178e-06, |
|
"loss": 0.7418, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 1.45662100456621, |
|
"grad_norm": 71.19440460205078, |
|
"learning_rate": 8.15068493150685e-06, |
|
"loss": 0.7128, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 1.461187214611872, |
|
"grad_norm": 6.2137041091918945, |
|
"learning_rate": 8.082191780821917e-06, |
|
"loss": 0.7121, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.4657534246575343, |
|
"grad_norm": 20.703536987304688, |
|
"learning_rate": 8.013698630136987e-06, |
|
"loss": 0.7115, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 1.4703196347031964, |
|
"grad_norm": 26.53441619873047, |
|
"learning_rate": 7.945205479452055e-06, |
|
"loss": 0.5534, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 1.4748858447488584, |
|
"grad_norm": 20.233125686645508, |
|
"learning_rate": 7.876712328767124e-06, |
|
"loss": 0.6204, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 1.4794520547945205, |
|
"grad_norm": 23.36627769470215, |
|
"learning_rate": 7.808219178082192e-06, |
|
"loss": 0.6914, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 1.4840182648401825, |
|
"grad_norm": 11.94163703918457, |
|
"learning_rate": 7.739726027397261e-06, |
|
"loss": 0.6062, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 1.4885844748858448, |
|
"grad_norm": 15.757901191711426, |
|
"learning_rate": 7.671232876712329e-06, |
|
"loss": 0.6142, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 1.4931506849315068, |
|
"grad_norm": 12.007556915283203, |
|
"learning_rate": 7.602739726027398e-06, |
|
"loss": 0.6708, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 1.4977168949771689, |
|
"grad_norm": 9.127739906311035, |
|
"learning_rate": 7.5342465753424655e-06, |
|
"loss": 0.6319, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 1.5022831050228311, |
|
"grad_norm": 14.81264877319336, |
|
"learning_rate": 7.465753424657534e-06, |
|
"loss": 0.6143, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 1.5068493150684932, |
|
"grad_norm": 8.986160278320312, |
|
"learning_rate": 7.397260273972603e-06, |
|
"loss": 0.7508, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.5114155251141552, |
|
"grad_norm": 23.52107810974121, |
|
"learning_rate": 7.328767123287671e-06, |
|
"loss": 0.6377, |
|
"step": 16550 |
|
}, |
|
{ |
|
"epoch": 1.5159817351598175, |
|
"grad_norm": 4.14312219619751, |
|
"learning_rate": 7.260273972602739e-06, |
|
"loss": 0.595, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 1.5205479452054793, |
|
"grad_norm": 32.40318298339844, |
|
"learning_rate": 7.191780821917808e-06, |
|
"loss": 0.6177, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 1.5251141552511416, |
|
"grad_norm": 13.583507537841797, |
|
"learning_rate": 7.123287671232876e-06, |
|
"loss": 0.6513, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 1.5296803652968036, |
|
"grad_norm": 27.02487564086914, |
|
"learning_rate": 7.054794520547945e-06, |
|
"loss": 0.6136, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 1.5342465753424657, |
|
"grad_norm": 46.82355880737305, |
|
"learning_rate": 6.986301369863014e-06, |
|
"loss": 0.6545, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 1.538812785388128, |
|
"grad_norm": 11.266030311584473, |
|
"learning_rate": 6.917808219178082e-06, |
|
"loss": 0.6438, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 1.54337899543379, |
|
"grad_norm": 21.652450561523438, |
|
"learning_rate": 6.849315068493151e-06, |
|
"loss": 0.6617, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 1.547945205479452, |
|
"grad_norm": 18.254072189331055, |
|
"learning_rate": 6.780821917808219e-06, |
|
"loss": 0.645, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 1.5525114155251143, |
|
"grad_norm": 7.959854602813721, |
|
"learning_rate": 6.712328767123287e-06, |
|
"loss": 0.7444, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.5570776255707761, |
|
"grad_norm": 15.169598579406738, |
|
"learning_rate": 6.643835616438356e-06, |
|
"loss": 0.6788, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 1.5616438356164384, |
|
"grad_norm": 14.872618675231934, |
|
"learning_rate": 6.5753424657534245e-06, |
|
"loss": 0.7433, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 1.5662100456621004, |
|
"grad_norm": 12.479876518249512, |
|
"learning_rate": 6.506849315068493e-06, |
|
"loss": 0.676, |
|
"step": 17150 |
|
}, |
|
{ |
|
"epoch": 1.5707762557077625, |
|
"grad_norm": 4.30610990524292, |
|
"learning_rate": 6.438356164383562e-06, |
|
"loss": 0.597, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 1.5753424657534247, |
|
"grad_norm": 9.52687931060791, |
|
"learning_rate": 6.3698630136986296e-06, |
|
"loss": 0.6084, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 1.5799086757990868, |
|
"grad_norm": 6.067666053771973, |
|
"learning_rate": 6.301369863013698e-06, |
|
"loss": 0.61, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 1.5844748858447488, |
|
"grad_norm": 15.737329483032227, |
|
"learning_rate": 6.232876712328767e-06, |
|
"loss": 0.7011, |
|
"step": 17350 |
|
}, |
|
{ |
|
"epoch": 1.589041095890411, |
|
"grad_norm": 4.7880730628967285, |
|
"learning_rate": 6.1643835616438354e-06, |
|
"loss": 0.694, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 1.593607305936073, |
|
"grad_norm": 19.6992130279541, |
|
"learning_rate": 6.095890410958904e-06, |
|
"loss": 0.706, |
|
"step": 17450 |
|
}, |
|
{ |
|
"epoch": 1.5981735159817352, |
|
"grad_norm": 10.835814476013184, |
|
"learning_rate": 6.027397260273973e-06, |
|
"loss": 0.6298, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.6027397260273972, |
|
"grad_norm": 13.219555854797363, |
|
"learning_rate": 5.958904109589041e-06, |
|
"loss": 0.6496, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 1.6073059360730593, |
|
"grad_norm": 22.237091064453125, |
|
"learning_rate": 5.890410958904109e-06, |
|
"loss": 0.6585, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 1.6118721461187215, |
|
"grad_norm": 12.173138618469238, |
|
"learning_rate": 5.821917808219178e-06, |
|
"loss": 0.712, |
|
"step": 17650 |
|
}, |
|
{ |
|
"epoch": 1.6164383561643836, |
|
"grad_norm": 6.5948896408081055, |
|
"learning_rate": 5.753424657534246e-06, |
|
"loss": 0.6253, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 1.6210045662100456, |
|
"grad_norm": 5.447400093078613, |
|
"learning_rate": 5.684931506849315e-06, |
|
"loss": 0.5762, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 1.625570776255708, |
|
"grad_norm": 12.413744926452637, |
|
"learning_rate": 5.616438356164384e-06, |
|
"loss": 0.6272, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 1.6301369863013697, |
|
"grad_norm": 48.877052307128906, |
|
"learning_rate": 5.547945205479452e-06, |
|
"loss": 0.589, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 1.634703196347032, |
|
"grad_norm": 21.571834564208984, |
|
"learning_rate": 5.47945205479452e-06, |
|
"loss": 0.6592, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 1.639269406392694, |
|
"grad_norm": 7.417336940765381, |
|
"learning_rate": 5.410958904109589e-06, |
|
"loss": 0.5248, |
|
"step": 17950 |
|
}, |
|
{ |
|
"epoch": 1.643835616438356, |
|
"grad_norm": 48.4299430847168, |
|
"learning_rate": 5.342465753424657e-06, |
|
"loss": 0.6622, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.6484018264840183, |
|
"grad_norm": 17.760732650756836, |
|
"learning_rate": 5.273972602739726e-06, |
|
"loss": 0.626, |
|
"step": 18050 |
|
}, |
|
{ |
|
"epoch": 1.6529680365296804, |
|
"grad_norm": 14.000800132751465, |
|
"learning_rate": 5.2054794520547945e-06, |
|
"loss": 0.6084, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 1.6575342465753424, |
|
"grad_norm": 8.742088317871094, |
|
"learning_rate": 5.136986301369863e-06, |
|
"loss": 0.6029, |
|
"step": 18150 |
|
}, |
|
{ |
|
"epoch": 1.6621004566210047, |
|
"grad_norm": 26.16733169555664, |
|
"learning_rate": 5.068493150684931e-06, |
|
"loss": 0.6828, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 29.90041160583496, |
|
"learning_rate": 4.9999999999999996e-06, |
|
"loss": 0.5702, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 1.6712328767123288, |
|
"grad_norm": 15.568696022033691, |
|
"learning_rate": 4.931506849315068e-06, |
|
"loss": 0.6376, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 1.6757990867579908, |
|
"grad_norm": 15.59715747833252, |
|
"learning_rate": 4.863013698630137e-06, |
|
"loss": 0.6776, |
|
"step": 18350 |
|
}, |
|
{ |
|
"epoch": 1.6803652968036529, |
|
"grad_norm": 11.07044506072998, |
|
"learning_rate": 4.7945205479452054e-06, |
|
"loss": 0.529, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 1.6849315068493151, |
|
"grad_norm": 5.349613666534424, |
|
"learning_rate": 4.726027397260274e-06, |
|
"loss": 0.692, |
|
"step": 18450 |
|
}, |
|
{ |
|
"epoch": 1.6894977168949772, |
|
"grad_norm": 18.147794723510742, |
|
"learning_rate": 4.657534246575343e-06, |
|
"loss": 0.7057, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.6940639269406392, |
|
"grad_norm": 13.486499786376953, |
|
"learning_rate": 4.5890410958904105e-06, |
|
"loss": 0.6415, |
|
"step": 18550 |
|
}, |
|
{ |
|
"epoch": 1.6986301369863015, |
|
"grad_norm": 10.16304874420166, |
|
"learning_rate": 4.520547945205479e-06, |
|
"loss": 0.6604, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 1.7031963470319633, |
|
"grad_norm": 29.04235076904297, |
|
"learning_rate": 4.452054794520548e-06, |
|
"loss": 0.6773, |
|
"step": 18650 |
|
}, |
|
{ |
|
"epoch": 1.7077625570776256, |
|
"grad_norm": 9.932119369506836, |
|
"learning_rate": 4.383561643835616e-06, |
|
"loss": 0.617, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 1.7123287671232876, |
|
"grad_norm": 70.96830749511719, |
|
"learning_rate": 4.315068493150685e-06, |
|
"loss": 0.6157, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 1.7168949771689497, |
|
"grad_norm": 15.635278701782227, |
|
"learning_rate": 4.246575342465754e-06, |
|
"loss": 0.6127, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 1.721461187214612, |
|
"grad_norm": 2.3553667068481445, |
|
"learning_rate": 4.178082191780821e-06, |
|
"loss": 0.6847, |
|
"step": 18850 |
|
}, |
|
{ |
|
"epoch": 1.726027397260274, |
|
"grad_norm": 8.880425453186035, |
|
"learning_rate": 4.10958904109589e-06, |
|
"loss": 0.5043, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 1.730593607305936, |
|
"grad_norm": 22.30252456665039, |
|
"learning_rate": 4.041095890410959e-06, |
|
"loss": 0.5546, |
|
"step": 18950 |
|
}, |
|
{ |
|
"epoch": 1.7351598173515983, |
|
"grad_norm": 7.0828142166137695, |
|
"learning_rate": 3.972602739726027e-06, |
|
"loss": 0.5997, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.7397260273972601, |
|
"grad_norm": 25.635570526123047, |
|
"learning_rate": 3.904109589041096e-06, |
|
"loss": 0.6091, |
|
"step": 19050 |
|
}, |
|
{ |
|
"epoch": 1.7442922374429224, |
|
"grad_norm": 9.004444122314453, |
|
"learning_rate": 3.8356164383561645e-06, |
|
"loss": 0.6257, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 1.7488584474885844, |
|
"grad_norm": 5.485713958740234, |
|
"learning_rate": 3.7671232876712327e-06, |
|
"loss": 0.6728, |
|
"step": 19150 |
|
}, |
|
{ |
|
"epoch": 1.7534246575342465, |
|
"grad_norm": 35.63444519042969, |
|
"learning_rate": 3.6986301369863014e-06, |
|
"loss": 0.7696, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 1.7579908675799087, |
|
"grad_norm": 29.199115753173828, |
|
"learning_rate": 3.6301369863013696e-06, |
|
"loss": 0.59, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 1.7625570776255708, |
|
"grad_norm": 18.041336059570312, |
|
"learning_rate": 3.561643835616438e-06, |
|
"loss": 0.5744, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 1.7671232876712328, |
|
"grad_norm": 16.391035079956055, |
|
"learning_rate": 3.493150684931507e-06, |
|
"loss": 0.6831, |
|
"step": 19350 |
|
}, |
|
{ |
|
"epoch": 1.771689497716895, |
|
"grad_norm": 9.0728759765625, |
|
"learning_rate": 3.4246575342465754e-06, |
|
"loss": 0.5779, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 1.776255707762557, |
|
"grad_norm": 18.102890014648438, |
|
"learning_rate": 3.3561643835616436e-06, |
|
"loss": 0.5507, |
|
"step": 19450 |
|
}, |
|
{ |
|
"epoch": 1.7808219178082192, |
|
"grad_norm": 17.248735427856445, |
|
"learning_rate": 3.2876712328767123e-06, |
|
"loss": 0.7226, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.7853881278538812, |
|
"grad_norm": 27.942777633666992, |
|
"learning_rate": 3.219178082191781e-06, |
|
"loss": 0.7178, |
|
"step": 19550 |
|
}, |
|
{ |
|
"epoch": 1.7899543378995433, |
|
"grad_norm": 5.3349809646606445, |
|
"learning_rate": 3.150684931506849e-06, |
|
"loss": 0.5566, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 1.7945205479452055, |
|
"grad_norm": 30.73387908935547, |
|
"learning_rate": 3.0821917808219177e-06, |
|
"loss": 0.7677, |
|
"step": 19650 |
|
}, |
|
{ |
|
"epoch": 1.7990867579908676, |
|
"grad_norm": 21.230749130249023, |
|
"learning_rate": 3.0136986301369864e-06, |
|
"loss": 0.61, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 1.8036529680365296, |
|
"grad_norm": 10.363127708435059, |
|
"learning_rate": 2.9452054794520546e-06, |
|
"loss": 0.6385, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 1.808219178082192, |
|
"grad_norm": 1.5289000272750854, |
|
"learning_rate": 2.876712328767123e-06, |
|
"loss": 0.6747, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 1.8127853881278537, |
|
"grad_norm": 18.69597625732422, |
|
"learning_rate": 2.808219178082192e-06, |
|
"loss": 0.5934, |
|
"step": 19850 |
|
}, |
|
{ |
|
"epoch": 1.817351598173516, |
|
"grad_norm": 10.11032772064209, |
|
"learning_rate": 2.73972602739726e-06, |
|
"loss": 0.5918, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 1.821917808219178, |
|
"grad_norm": 30.333818435668945, |
|
"learning_rate": 2.6712328767123286e-06, |
|
"loss": 0.6381, |
|
"step": 19950 |
|
}, |
|
{ |
|
"epoch": 1.82648401826484, |
|
"grad_norm": 15.229024887084961, |
|
"learning_rate": 2.6027397260273973e-06, |
|
"loss": 0.6151, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.8310502283105023, |
|
"grad_norm": 28.18828773498535, |
|
"learning_rate": 2.5342465753424655e-06, |
|
"loss": 0.6968, |
|
"step": 20050 |
|
}, |
|
{ |
|
"epoch": 1.8356164383561644, |
|
"grad_norm": 48.18195724487305, |
|
"learning_rate": 2.465753424657534e-06, |
|
"loss": 0.6256, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 1.8401826484018264, |
|
"grad_norm": 16.33424949645996, |
|
"learning_rate": 2.3972602739726027e-06, |
|
"loss": 0.6477, |
|
"step": 20150 |
|
}, |
|
{ |
|
"epoch": 1.8447488584474887, |
|
"grad_norm": 25.994909286499023, |
|
"learning_rate": 2.3287671232876713e-06, |
|
"loss": 0.692, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 1.8493150684931505, |
|
"grad_norm": 8.117030143737793, |
|
"learning_rate": 2.2602739726027396e-06, |
|
"loss": 0.6933, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 1.8538812785388128, |
|
"grad_norm": 8.02834415435791, |
|
"learning_rate": 2.191780821917808e-06, |
|
"loss": 0.6413, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 1.8584474885844748, |
|
"grad_norm": 22.62827491760254, |
|
"learning_rate": 2.123287671232877e-06, |
|
"loss": 0.679, |
|
"step": 20350 |
|
}, |
|
{ |
|
"epoch": 1.8630136986301369, |
|
"grad_norm": 9.562274932861328, |
|
"learning_rate": 2.054794520547945e-06, |
|
"loss": 0.6615, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 1.8675799086757991, |
|
"grad_norm": 12.407808303833008, |
|
"learning_rate": 1.9863013698630136e-06, |
|
"loss": 0.7476, |
|
"step": 20450 |
|
}, |
|
{ |
|
"epoch": 1.8721461187214612, |
|
"grad_norm": 41.344093322753906, |
|
"learning_rate": 1.9178082191780823e-06, |
|
"loss": 0.5827, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.8767123287671232, |
|
"grad_norm": 10.044130325317383, |
|
"learning_rate": 1.8493150684931507e-06, |
|
"loss": 0.6566, |
|
"step": 20550 |
|
}, |
|
{ |
|
"epoch": 1.8812785388127855, |
|
"grad_norm": 9.382560729980469, |
|
"learning_rate": 1.780821917808219e-06, |
|
"loss": 0.6259, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 1.8858447488584473, |
|
"grad_norm": 9.731813430786133, |
|
"learning_rate": 1.7123287671232877e-06, |
|
"loss": 0.5924, |
|
"step": 20650 |
|
}, |
|
{ |
|
"epoch": 1.8904109589041096, |
|
"grad_norm": 13.417922973632812, |
|
"learning_rate": 1.6438356164383561e-06, |
|
"loss": 0.6229, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 1.8949771689497716, |
|
"grad_norm": 32.03701400756836, |
|
"learning_rate": 1.5753424657534245e-06, |
|
"loss": 0.5761, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 1.8995433789954337, |
|
"grad_norm": 6.067290782928467, |
|
"learning_rate": 1.5068493150684932e-06, |
|
"loss": 0.6463, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 1.904109589041096, |
|
"grad_norm": 19.67026710510254, |
|
"learning_rate": 1.4383561643835616e-06, |
|
"loss": 0.6671, |
|
"step": 20850 |
|
}, |
|
{ |
|
"epoch": 1.908675799086758, |
|
"grad_norm": 50.498802185058594, |
|
"learning_rate": 1.36986301369863e-06, |
|
"loss": 0.5811, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 1.91324200913242, |
|
"grad_norm": 15.981374740600586, |
|
"learning_rate": 1.3013698630136986e-06, |
|
"loss": 0.6823, |
|
"step": 20950 |
|
}, |
|
{ |
|
"epoch": 1.9178082191780823, |
|
"grad_norm": 19.175485610961914, |
|
"learning_rate": 1.232876712328767e-06, |
|
"loss": 0.6112, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.9223744292237441, |
|
"grad_norm": 8.795181274414062, |
|
"learning_rate": 1.1643835616438357e-06, |
|
"loss": 0.5474, |
|
"step": 21050 |
|
}, |
|
{ |
|
"epoch": 1.9269406392694064, |
|
"grad_norm": 3.590404748916626, |
|
"learning_rate": 1.095890410958904e-06, |
|
"loss": 0.7026, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 1.9315068493150684, |
|
"grad_norm": 21.619140625, |
|
"learning_rate": 1.0273972602739725e-06, |
|
"loss": 0.701, |
|
"step": 21150 |
|
}, |
|
{ |
|
"epoch": 1.9360730593607305, |
|
"grad_norm": 22.84990692138672, |
|
"learning_rate": 9.589041095890411e-07, |
|
"loss": 0.5942, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 1.9406392694063928, |
|
"grad_norm": 28.548683166503906, |
|
"learning_rate": 8.904109589041095e-07, |
|
"loss": 0.6343, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 1.9452054794520548, |
|
"grad_norm": 20.216583251953125, |
|
"learning_rate": 8.219178082191781e-07, |
|
"loss": 0.6836, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 1.9497716894977168, |
|
"grad_norm": 22.950048446655273, |
|
"learning_rate": 7.534246575342466e-07, |
|
"loss": 0.6815, |
|
"step": 21350 |
|
}, |
|
{ |
|
"epoch": 1.954337899543379, |
|
"grad_norm": 20.81789207458496, |
|
"learning_rate": 6.84931506849315e-07, |
|
"loss": 0.7679, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 1.958904109589041, |
|
"grad_norm": 14.494473457336426, |
|
"learning_rate": 6.164383561643835e-07, |
|
"loss": 0.5561, |
|
"step": 21450 |
|
}, |
|
{ |
|
"epoch": 1.9634703196347032, |
|
"grad_norm": 17.119592666625977, |
|
"learning_rate": 5.47945205479452e-07, |
|
"loss": 0.6881, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.9680365296803652, |
|
"grad_norm": 8.369721412658691, |
|
"learning_rate": 4.794520547945206e-07, |
|
"loss": 0.6692, |
|
"step": 21550 |
|
}, |
|
{ |
|
"epoch": 1.9726027397260273, |
|
"grad_norm": 11.264369010925293, |
|
"learning_rate": 4.1095890410958903e-07, |
|
"loss": 0.6405, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 1.9771689497716896, |
|
"grad_norm": 3.320608615875244, |
|
"learning_rate": 3.424657534246575e-07, |
|
"loss": 0.671, |
|
"step": 21650 |
|
}, |
|
{ |
|
"epoch": 1.9817351598173516, |
|
"grad_norm": 19.23822784423828, |
|
"learning_rate": 2.73972602739726e-07, |
|
"loss": 0.5475, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 1.9863013698630136, |
|
"grad_norm": 13.919721603393555, |
|
"learning_rate": 2.0547945205479452e-07, |
|
"loss": 0.7422, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 1.990867579908676, |
|
"grad_norm": 14.141480445861816, |
|
"learning_rate": 1.36986301369863e-07, |
|
"loss": 0.5855, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 1.9954337899543377, |
|
"grad_norm": 19.506189346313477, |
|
"learning_rate": 6.84931506849315e-08, |
|
"loss": 0.7103, |
|
"step": 21850 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 16.25570297241211, |
|
"learning_rate": 0.0, |
|
"loss": 0.7072, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 21900, |
|
"total_flos": 3.8619551920019866e+17, |
|
"train_loss": 0.9270246051109, |
|
"train_runtime": 51747.0424, |
|
"train_samples_per_second": 3.386, |
|
"train_steps_per_second": 0.423 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 21900, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 5000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.8619551920019866e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|