|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.09696264514095944, |
|
"eval_steps": 500, |
|
"global_step": 12000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 8.080220428413287e-05, |
|
"grad_norm": 120784.5234375, |
|
"learning_rate": 4.040404040404041e-08, |
|
"loss": 6344.0191, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00016160440856826573, |
|
"grad_norm": 246101.4375, |
|
"learning_rate": 8.080808080808082e-08, |
|
"loss": 7230.2391, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.00024240661285239863, |
|
"grad_norm": 220140.828125, |
|
"learning_rate": 1.2121212121212122e-07, |
|
"loss": 7248.1844, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.00032320881713653147, |
|
"grad_norm": 468224.40625, |
|
"learning_rate": 1.6161616161616163e-07, |
|
"loss": 8245.5844, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.00040401102142066436, |
|
"grad_norm": 129730.1171875, |
|
"learning_rate": 2.0202020202020202e-07, |
|
"loss": 5464.3164, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.00048481322570479725, |
|
"grad_norm": 37984.21484375, |
|
"learning_rate": 2.4242424242424244e-07, |
|
"loss": 7551.0562, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0005656154299889301, |
|
"grad_norm": 170187.078125, |
|
"learning_rate": 2.8282828282828283e-07, |
|
"loss": 6856.2141, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0006464176342730629, |
|
"grad_norm": 64738.05078125, |
|
"learning_rate": 3.2323232323232327e-07, |
|
"loss": 7045.2477, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0007272198385571959, |
|
"grad_norm": 114463.8515625, |
|
"learning_rate": 3.6363636363636366e-07, |
|
"loss": 5803.459, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0008080220428413287, |
|
"grad_norm": 102587.6796875, |
|
"learning_rate": 4.0404040404040405e-07, |
|
"loss": 3976.1211, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0008888242471254616, |
|
"grad_norm": 78112.53125, |
|
"learning_rate": 4.444444444444445e-07, |
|
"loss": 3535.432, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0009696264514095945, |
|
"grad_norm": 100944.5703125, |
|
"learning_rate": 4.848484848484849e-07, |
|
"loss": 4244.4645, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0010504286556937273, |
|
"grad_norm": 178465.359375, |
|
"learning_rate": 5.252525252525253e-07, |
|
"loss": 5655.2863, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0011312308599778602, |
|
"grad_norm": 70585.359375, |
|
"learning_rate": 5.656565656565657e-07, |
|
"loss": 3637.5656, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.001212033064261993, |
|
"grad_norm": 51223.41015625, |
|
"learning_rate": 6.060606060606061e-07, |
|
"loss": 2629.8635, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0012928352685461259, |
|
"grad_norm": 53755.01953125, |
|
"learning_rate": 6.464646464646465e-07, |
|
"loss": 4522.2559, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.0013736374728302587, |
|
"grad_norm": 72124.0625, |
|
"learning_rate": 6.868686868686869e-07, |
|
"loss": 3395.8777, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0014544396771143918, |
|
"grad_norm": 11780.193359375, |
|
"learning_rate": 7.272727272727273e-07, |
|
"loss": 1784.9486, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.0015352418813985246, |
|
"grad_norm": 62803.8828125, |
|
"learning_rate": 7.676767676767678e-07, |
|
"loss": 2220.277, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.0016160440856826574, |
|
"grad_norm": 118829.34375, |
|
"learning_rate": 8.080808080808081e-07, |
|
"loss": 2133.5133, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0016968462899667903, |
|
"grad_norm": 7391.849609375, |
|
"learning_rate": 8.484848484848486e-07, |
|
"loss": 1618.4662, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.0017776484942509231, |
|
"grad_norm": 7889.263671875, |
|
"learning_rate": 8.88888888888889e-07, |
|
"loss": 1578.3401, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.001858450698535056, |
|
"grad_norm": 10828.140625, |
|
"learning_rate": 9.292929292929294e-07, |
|
"loss": 1327.2748, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.001939252902819189, |
|
"grad_norm": 15991.2119140625, |
|
"learning_rate": 9.696969696969698e-07, |
|
"loss": 1342.3516, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.0020200551071033216, |
|
"grad_norm": 7872.46484375, |
|
"learning_rate": 1.0101010101010103e-06, |
|
"loss": 1077.598, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.0021008573113874547, |
|
"grad_norm": 3977.63623046875, |
|
"learning_rate": 1.0505050505050506e-06, |
|
"loss": 835.4583, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.0021816595156715873, |
|
"grad_norm": 3596.71923828125, |
|
"learning_rate": 1.090909090909091e-06, |
|
"loss": 766.0804, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.0022624617199557204, |
|
"grad_norm": 10194.791015625, |
|
"learning_rate": 1.1313131313131313e-06, |
|
"loss": 777.5695, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.0023432639242398534, |
|
"grad_norm": 7022.6103515625, |
|
"learning_rate": 1.1717171717171719e-06, |
|
"loss": 639.281, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.002424066128523986, |
|
"grad_norm": 2920.81787109375, |
|
"learning_rate": 1.2121212121212122e-06, |
|
"loss": 703.318, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.002504868332808119, |
|
"grad_norm": 4137.4970703125, |
|
"learning_rate": 1.2525252525252527e-06, |
|
"loss": 766.8718, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.0025856705370922517, |
|
"grad_norm": 8379.064453125, |
|
"learning_rate": 1.292929292929293e-06, |
|
"loss": 625.7259, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.002666472741376385, |
|
"grad_norm": 2632.939697265625, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 529.0049, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.0027472749456605174, |
|
"grad_norm": 3072.3486328125, |
|
"learning_rate": 1.3737373737373738e-06, |
|
"loss": 557.5195, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.0028280771499446505, |
|
"grad_norm": 2430.994384765625, |
|
"learning_rate": 1.4141414141414143e-06, |
|
"loss": 548.776, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.0029088793542287835, |
|
"grad_norm": 2531.90771484375, |
|
"learning_rate": 1.4545454545454546e-06, |
|
"loss": 602.2291, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.002989681558512916, |
|
"grad_norm": 5374.16552734375, |
|
"learning_rate": 1.4949494949494952e-06, |
|
"loss": 447.6404, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.003070483762797049, |
|
"grad_norm": 1357.3726806640625, |
|
"learning_rate": 1.5353535353535355e-06, |
|
"loss": 342.7574, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.003151285967081182, |
|
"grad_norm": 1249.0936279296875, |
|
"learning_rate": 1.5757575757575759e-06, |
|
"loss": 562.0835, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.003232088171365315, |
|
"grad_norm": 1270.3609619140625, |
|
"learning_rate": 1.6161616161616162e-06, |
|
"loss": 467.299, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0033128903756494475, |
|
"grad_norm": 2576.8291015625, |
|
"learning_rate": 1.6565656565656565e-06, |
|
"loss": 448.897, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.0033936925799335806, |
|
"grad_norm": 1376.2908935546875, |
|
"learning_rate": 1.6969696969696973e-06, |
|
"loss": 382.242, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.0034744947842177136, |
|
"grad_norm": 1212.802490234375, |
|
"learning_rate": 1.7373737373737376e-06, |
|
"loss": 486.5346, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.0035552969885018462, |
|
"grad_norm": 801.7883911132812, |
|
"learning_rate": 1.777777777777778e-06, |
|
"loss": 452.1537, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.0036360991927859793, |
|
"grad_norm": 1590.736572265625, |
|
"learning_rate": 1.818181818181818e-06, |
|
"loss": 449.5157, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.003716901397070112, |
|
"grad_norm": 3368.974853515625, |
|
"learning_rate": 1.8585858585858588e-06, |
|
"loss": 445.9489, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.003797703601354245, |
|
"grad_norm": 2164.530517578125, |
|
"learning_rate": 1.8989898989898992e-06, |
|
"loss": 449.8674, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.003878505805638378, |
|
"grad_norm": 1234.2860107421875, |
|
"learning_rate": 1.9393939393939395e-06, |
|
"loss": 375.5616, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.003959308009922511, |
|
"grad_norm": 1191.9912109375, |
|
"learning_rate": 1.9797979797979796e-06, |
|
"loss": 394.9626, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.004040110214206643, |
|
"grad_norm": 2829.901611328125, |
|
"learning_rate": 2.0202020202020206e-06, |
|
"loss": 398.194, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.004120912418490777, |
|
"grad_norm": 3573.80322265625, |
|
"learning_rate": 2.0606060606060607e-06, |
|
"loss": 440.5374, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.004201714622774909, |
|
"grad_norm": 991.9559326171875, |
|
"learning_rate": 2.1010101010101013e-06, |
|
"loss": 552.4083, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.004282516827059042, |
|
"grad_norm": 2654.274658203125, |
|
"learning_rate": 2.1414141414141414e-06, |
|
"loss": 414.1354, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.004363319031343175, |
|
"grad_norm": 1416.258056640625, |
|
"learning_rate": 2.181818181818182e-06, |
|
"loss": 443.3496, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.004444121235627308, |
|
"grad_norm": 1433.0880126953125, |
|
"learning_rate": 2.2222222222222225e-06, |
|
"loss": 476.5702, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.004524923439911441, |
|
"grad_norm": 852.6603393554688, |
|
"learning_rate": 2.2626262626262626e-06, |
|
"loss": 425.1744, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.004605725644195573, |
|
"grad_norm": 1662.8759765625, |
|
"learning_rate": 2.303030303030303e-06, |
|
"loss": 432.2859, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.004686527848479707, |
|
"grad_norm": 1177.7154541015625, |
|
"learning_rate": 2.3434343434343437e-06, |
|
"loss": 472.836, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.0047673300527638395, |
|
"grad_norm": 1203.2510986328125, |
|
"learning_rate": 2.383838383838384e-06, |
|
"loss": 400.3267, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.004848132257047972, |
|
"grad_norm": 806.4482421875, |
|
"learning_rate": 2.4242424242424244e-06, |
|
"loss": 381.2281, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.004928934461332105, |
|
"grad_norm": 1223.798095703125, |
|
"learning_rate": 2.4646464646464645e-06, |
|
"loss": 314.522, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.005009736665616238, |
|
"grad_norm": 1220.419189453125, |
|
"learning_rate": 2.5050505050505055e-06, |
|
"loss": 391.7079, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.005090538869900371, |
|
"grad_norm": 1846.539306640625, |
|
"learning_rate": 2.5454545454545456e-06, |
|
"loss": 415.6819, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.0051713410741845035, |
|
"grad_norm": 1243.7620849609375, |
|
"learning_rate": 2.585858585858586e-06, |
|
"loss": 437.1002, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.005252143278468637, |
|
"grad_norm": 1140.9453125, |
|
"learning_rate": 2.6262626262626263e-06, |
|
"loss": 415.1159, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.00533294548275277, |
|
"grad_norm": 4821.37060546875, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 464.5417, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.005413747687036902, |
|
"grad_norm": 1713.6087646484375, |
|
"learning_rate": 2.7070707070707074e-06, |
|
"loss": 375.5785, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.005494549891321035, |
|
"grad_norm": 2620.644287109375, |
|
"learning_rate": 2.7474747474747475e-06, |
|
"loss": 299.2374, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.005575352095605168, |
|
"grad_norm": 1759.0950927734375, |
|
"learning_rate": 2.787878787878788e-06, |
|
"loss": 415.4854, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.005656154299889301, |
|
"grad_norm": 1536.8719482421875, |
|
"learning_rate": 2.8282828282828286e-06, |
|
"loss": 449.5622, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.0057369565041734336, |
|
"grad_norm": 2095.785400390625, |
|
"learning_rate": 2.8686868686868687e-06, |
|
"loss": 380.9472, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.005817758708457567, |
|
"grad_norm": 1478.4825439453125, |
|
"learning_rate": 2.9090909090909093e-06, |
|
"loss": 390.7729, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.0058985609127417, |
|
"grad_norm": 1876.679443359375, |
|
"learning_rate": 2.9494949494949494e-06, |
|
"loss": 391.9601, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.005979363117025832, |
|
"grad_norm": 749.0634765625, |
|
"learning_rate": 2.9898989898989904e-06, |
|
"loss": 253.9756, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.006060165321309965, |
|
"grad_norm": 906.431396484375, |
|
"learning_rate": 3.0303030303030305e-06, |
|
"loss": 347.2193, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.006140967525594098, |
|
"grad_norm": 1034.1180419921875, |
|
"learning_rate": 3.070707070707071e-06, |
|
"loss": 311.8948, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.006221769729878231, |
|
"grad_norm": 2706.747802734375, |
|
"learning_rate": 3.111111111111111e-06, |
|
"loss": 329.9375, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.006302571934162364, |
|
"grad_norm": 1195.091064453125, |
|
"learning_rate": 3.1515151515151517e-06, |
|
"loss": 303.2004, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.006383374138446497, |
|
"grad_norm": 1622.099609375, |
|
"learning_rate": 3.191919191919192e-06, |
|
"loss": 359.3405, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.00646417634273063, |
|
"grad_norm": 1255.0582275390625, |
|
"learning_rate": 3.2323232323232324e-06, |
|
"loss": 362.8006, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.006544978547014762, |
|
"grad_norm": 1571.966552734375, |
|
"learning_rate": 3.2727272727272733e-06, |
|
"loss": 372.003, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.006625780751298895, |
|
"grad_norm": 1350.624755859375, |
|
"learning_rate": 3.313131313131313e-06, |
|
"loss": 391.2471, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.0067065829555830285, |
|
"grad_norm": 1288.0430908203125, |
|
"learning_rate": 3.3535353535353536e-06, |
|
"loss": 381.0373, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.006787385159867161, |
|
"grad_norm": 1756.347900390625, |
|
"learning_rate": 3.3939393939393946e-06, |
|
"loss": 366.8195, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.006868187364151294, |
|
"grad_norm": 1164.05224609375, |
|
"learning_rate": 3.4343434343434343e-06, |
|
"loss": 340.2402, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.006948989568435427, |
|
"grad_norm": 6004.3291015625, |
|
"learning_rate": 3.4747474747474752e-06, |
|
"loss": 397.7737, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.00702979177271956, |
|
"grad_norm": 1949.6370849609375, |
|
"learning_rate": 3.515151515151515e-06, |
|
"loss": 352.9498, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.0071105939770036925, |
|
"grad_norm": 823.8360595703125, |
|
"learning_rate": 3.555555555555556e-06, |
|
"loss": 432.1595, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.007191396181287825, |
|
"grad_norm": 3512.72607421875, |
|
"learning_rate": 3.5959595959595965e-06, |
|
"loss": 342.5901, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.007272198385571959, |
|
"grad_norm": 1352.2506103515625, |
|
"learning_rate": 3.636363636363636e-06, |
|
"loss": 306.887, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.007353000589856091, |
|
"grad_norm": 2983.867919921875, |
|
"learning_rate": 3.676767676767677e-06, |
|
"loss": 381.7238, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.007433802794140224, |
|
"grad_norm": 2423.1806640625, |
|
"learning_rate": 3.7171717171717177e-06, |
|
"loss": 303.3808, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.007514604998424357, |
|
"grad_norm": 953.1580810546875, |
|
"learning_rate": 3.757575757575758e-06, |
|
"loss": 380.6255, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.00759540720270849, |
|
"grad_norm": 5818.9609375, |
|
"learning_rate": 3.7979797979797984e-06, |
|
"loss": 372.4149, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.007676209406992623, |
|
"grad_norm": 1317.5467529296875, |
|
"learning_rate": 3.8383838383838385e-06, |
|
"loss": 369.7815, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.007757011611276756, |
|
"grad_norm": 929.1298217773438, |
|
"learning_rate": 3.878787878787879e-06, |
|
"loss": 395.9197, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.007837813815560889, |
|
"grad_norm": 1342.6861572265625, |
|
"learning_rate": 3.9191919191919196e-06, |
|
"loss": 384.7229, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.007918616019845021, |
|
"grad_norm": 1253.3720703125, |
|
"learning_rate": 3.959595959595959e-06, |
|
"loss": 302.0778, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.007999418224129154, |
|
"grad_norm": 966.1517333984375, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 407.2742, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.008080220428413287, |
|
"grad_norm": 949.992919921875, |
|
"learning_rate": 4.040404040404041e-06, |
|
"loss": 333.9852, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.00816102263269742, |
|
"grad_norm": 2160.320068359375, |
|
"learning_rate": 4.080808080808081e-06, |
|
"loss": 320.1485, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.008241824836981554, |
|
"grad_norm": 1784.5238037109375, |
|
"learning_rate": 4.1212121212121215e-06, |
|
"loss": 388.0309, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.008322627041265686, |
|
"grad_norm": 11426.04296875, |
|
"learning_rate": 4.161616161616161e-06, |
|
"loss": 342.7975, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.008403429245549819, |
|
"grad_norm": 1648.6806640625, |
|
"learning_rate": 4.2020202020202026e-06, |
|
"loss": 377.397, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.008484231449833951, |
|
"grad_norm": 929.9481811523438, |
|
"learning_rate": 4.242424242424243e-06, |
|
"loss": 316.3978, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.008565033654118084, |
|
"grad_norm": 1129.7996826171875, |
|
"learning_rate": 4.282828282828283e-06, |
|
"loss": 299.9879, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.008645835858402217, |
|
"grad_norm": 1400.36376953125, |
|
"learning_rate": 4.323232323232323e-06, |
|
"loss": 350.1171, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.00872663806268635, |
|
"grad_norm": 1833.9061279296875, |
|
"learning_rate": 4.363636363636364e-06, |
|
"loss": 325.1123, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.008807440266970484, |
|
"grad_norm": 1330.033203125, |
|
"learning_rate": 4.4040404040404044e-06, |
|
"loss": 325.3823, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.008888242471254616, |
|
"grad_norm": 910.3088989257812, |
|
"learning_rate": 4.444444444444445e-06, |
|
"loss": 301.4958, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.008969044675538749, |
|
"grad_norm": 1182.816162109375, |
|
"learning_rate": 4.484848484848485e-06, |
|
"loss": 277.6404, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.009049846879822881, |
|
"grad_norm": 4707.03857421875, |
|
"learning_rate": 4.525252525252525e-06, |
|
"loss": 424.5438, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.009130649084107014, |
|
"grad_norm": 2059.0185546875, |
|
"learning_rate": 4.565656565656566e-06, |
|
"loss": 324.0304, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.009211451288391147, |
|
"grad_norm": 3679.1044921875, |
|
"learning_rate": 4.606060606060606e-06, |
|
"loss": 303.365, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.00929225349267528, |
|
"grad_norm": 2518.44970703125, |
|
"learning_rate": 4.646464646464647e-06, |
|
"loss": 257.8196, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.009373055696959414, |
|
"grad_norm": 1017.4381713867188, |
|
"learning_rate": 4.6868686868686874e-06, |
|
"loss": 308.7409, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.009453857901243546, |
|
"grad_norm": 1016.4625244140625, |
|
"learning_rate": 4.727272727272727e-06, |
|
"loss": 309.7148, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.009534660105527679, |
|
"grad_norm": 2000.1339111328125, |
|
"learning_rate": 4.767676767676768e-06, |
|
"loss": 416.5233, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.009615462309811812, |
|
"grad_norm": 1349.24755859375, |
|
"learning_rate": 4.808080808080808e-06, |
|
"loss": 312.0022, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.009696264514095944, |
|
"grad_norm": 605.5498046875, |
|
"learning_rate": 4.848484848484849e-06, |
|
"loss": 373.616, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.009777066718380077, |
|
"grad_norm": 1045.7061767578125, |
|
"learning_rate": 4.888888888888889e-06, |
|
"loss": 263.9121, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.00985786892266421, |
|
"grad_norm": 997.4849243164062, |
|
"learning_rate": 4.929292929292929e-06, |
|
"loss": 299.2633, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.009938671126948344, |
|
"grad_norm": 690.6622314453125, |
|
"learning_rate": 4.96969696969697e-06, |
|
"loss": 216.8853, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.010019473331232476, |
|
"grad_norm": 1591.2972412109375, |
|
"learning_rate": 5.010101010101011e-06, |
|
"loss": 339.3444, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.010100275535516609, |
|
"grad_norm": 749.3450317382812, |
|
"learning_rate": 5.050505050505051e-06, |
|
"loss": 315.4824, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.010181077739800742, |
|
"grad_norm": 923.2822265625, |
|
"learning_rate": 5.090909090909091e-06, |
|
"loss": 265.8641, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.010261879944084874, |
|
"grad_norm": 1161.5048828125, |
|
"learning_rate": 5.131313131313131e-06, |
|
"loss": 255.6592, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.010342682148369007, |
|
"grad_norm": 949.8252563476562, |
|
"learning_rate": 5.171717171717172e-06, |
|
"loss": 350.1398, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.01042348435265314, |
|
"grad_norm": 1018.5875854492188, |
|
"learning_rate": 5.212121212121213e-06, |
|
"loss": 313.8918, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.010504286556937274, |
|
"grad_norm": 3170.640869140625, |
|
"learning_rate": 5.2525252525252526e-06, |
|
"loss": 305.4674, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.010585088761221407, |
|
"grad_norm": 1600.813720703125, |
|
"learning_rate": 5.292929292929293e-06, |
|
"loss": 331.4024, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.01066589096550554, |
|
"grad_norm": 1063.93408203125, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 373.562, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.010746693169789672, |
|
"grad_norm": 665.6146240234375, |
|
"learning_rate": 5.373737373737374e-06, |
|
"loss": 235.0867, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.010827495374073804, |
|
"grad_norm": 1380.6151123046875, |
|
"learning_rate": 5.414141414141415e-06, |
|
"loss": 324.2148, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.010908297578357937, |
|
"grad_norm": 3627.843994140625, |
|
"learning_rate": 5.4545454545454545e-06, |
|
"loss": 364.2515, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.01098909978264207, |
|
"grad_norm": 850.1503295898438, |
|
"learning_rate": 5.494949494949495e-06, |
|
"loss": 408.481, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.011069901986926204, |
|
"grad_norm": 2091.482421875, |
|
"learning_rate": 5.5353535353535355e-06, |
|
"loss": 267.4172, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.011150704191210337, |
|
"grad_norm": 1496.604248046875, |
|
"learning_rate": 5.575757575757576e-06, |
|
"loss": 295.7435, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.01123150639549447, |
|
"grad_norm": 2673.033203125, |
|
"learning_rate": 5.616161616161617e-06, |
|
"loss": 251.5225, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.011312308599778602, |
|
"grad_norm": 1121.48779296875, |
|
"learning_rate": 5.656565656565657e-06, |
|
"loss": 318.3079, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.011393110804062734, |
|
"grad_norm": 866.069091796875, |
|
"learning_rate": 5.696969696969697e-06, |
|
"loss": 283.861, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.011473913008346867, |
|
"grad_norm": 1132.9764404296875, |
|
"learning_rate": 5.7373737373737374e-06, |
|
"loss": 312.9168, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.011554715212631, |
|
"grad_norm": 1029.2037353515625, |
|
"learning_rate": 5.777777777777778e-06, |
|
"loss": 307.1583, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.011635517416915134, |
|
"grad_norm": 1138.4461669921875, |
|
"learning_rate": 5.8181818181818185e-06, |
|
"loss": 266.8528, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.011716319621199267, |
|
"grad_norm": 1278.30224609375, |
|
"learning_rate": 5.858585858585859e-06, |
|
"loss": 339.6735, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.0117971218254834, |
|
"grad_norm": 908.4046630859375, |
|
"learning_rate": 5.898989898989899e-06, |
|
"loss": 331.0705, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.011877924029767532, |
|
"grad_norm": 1165.220947265625, |
|
"learning_rate": 5.93939393939394e-06, |
|
"loss": 267.2519, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.011958726234051665, |
|
"grad_norm": 3219.732177734375, |
|
"learning_rate": 5.979797979797981e-06, |
|
"loss": 485.5872, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.012039528438335797, |
|
"grad_norm": 1050.6778564453125, |
|
"learning_rate": 6.0202020202020204e-06, |
|
"loss": 325.8213, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.01212033064261993, |
|
"grad_norm": 1031.156005859375, |
|
"learning_rate": 6.060606060606061e-06, |
|
"loss": 302.1713, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.012201132846904064, |
|
"grad_norm": 1457.485107421875, |
|
"learning_rate": 6.101010101010101e-06, |
|
"loss": 267.4604, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.012281935051188197, |
|
"grad_norm": 1509.94091796875, |
|
"learning_rate": 6.141414141414142e-06, |
|
"loss": 368.4667, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.01236273725547233, |
|
"grad_norm": 1406.15673828125, |
|
"learning_rate": 6.181818181818183e-06, |
|
"loss": 349.9011, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.012443539459756462, |
|
"grad_norm": 1459.2613525390625, |
|
"learning_rate": 6.222222222222222e-06, |
|
"loss": 325.6512, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.012524341664040595, |
|
"grad_norm": 3242.78271484375, |
|
"learning_rate": 6.262626262626263e-06, |
|
"loss": 375.0032, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.012605143868324727, |
|
"grad_norm": 2443.515625, |
|
"learning_rate": 6.303030303030303e-06, |
|
"loss": 379.789, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.01268594607260886, |
|
"grad_norm": 1770.516845703125, |
|
"learning_rate": 6.343434343434344e-06, |
|
"loss": 251.652, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.012766748276892994, |
|
"grad_norm": 1482.888671875, |
|
"learning_rate": 6.383838383838384e-06, |
|
"loss": 324.2996, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.012847550481177127, |
|
"grad_norm": 1663.3935546875, |
|
"learning_rate": 6.424242424242424e-06, |
|
"loss": 301.8472, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.01292835268546126, |
|
"grad_norm": 23524.42578125, |
|
"learning_rate": 6.464646464646465e-06, |
|
"loss": 335.0892, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.013009154889745392, |
|
"grad_norm": 862.7949829101562, |
|
"learning_rate": 6.505050505050505e-06, |
|
"loss": 306.2031, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.013089957094029525, |
|
"grad_norm": 2391.976806640625, |
|
"learning_rate": 6.545454545454547e-06, |
|
"loss": 310.1344, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.013170759298313657, |
|
"grad_norm": 1260.0029296875, |
|
"learning_rate": 6.5858585858585856e-06, |
|
"loss": 372.1161, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.01325156150259779, |
|
"grad_norm": 1587.1514892578125, |
|
"learning_rate": 6.626262626262626e-06, |
|
"loss": 351.7325, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.013332363706881924, |
|
"grad_norm": 1152.1556396484375, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 373.3706, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.013413165911166057, |
|
"grad_norm": 1005.15771484375, |
|
"learning_rate": 6.707070707070707e-06, |
|
"loss": 317.8097, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.01349396811545019, |
|
"grad_norm": 1090.3779296875, |
|
"learning_rate": 6.747474747474749e-06, |
|
"loss": 315.5722, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.013574770319734322, |
|
"grad_norm": 1011.2723388671875, |
|
"learning_rate": 6.787878787878789e-06, |
|
"loss": 231.6832, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.013655572524018455, |
|
"grad_norm": 798.0405883789062, |
|
"learning_rate": 6.828282828282828e-06, |
|
"loss": 306.8582, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.013736374728302587, |
|
"grad_norm": 855.2308959960938, |
|
"learning_rate": 6.8686868686868685e-06, |
|
"loss": 282.2944, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.01381717693258672, |
|
"grad_norm": 1325.1092529296875, |
|
"learning_rate": 6.909090909090909e-06, |
|
"loss": 427.1749, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.013897979136870854, |
|
"grad_norm": 1027.2860107421875, |
|
"learning_rate": 6.9494949494949505e-06, |
|
"loss": 232.57, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.013978781341154987, |
|
"grad_norm": 1047.0118408203125, |
|
"learning_rate": 6.989898989898991e-06, |
|
"loss": 272.4593, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.01405958354543912, |
|
"grad_norm": 1453.4931640625, |
|
"learning_rate": 7.03030303030303e-06, |
|
"loss": 287.978, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.014140385749723252, |
|
"grad_norm": 1631.84521484375, |
|
"learning_rate": 7.0707070707070704e-06, |
|
"loss": 222.9585, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.014221187954007385, |
|
"grad_norm": 1109.9012451171875, |
|
"learning_rate": 7.111111111111112e-06, |
|
"loss": 263.4153, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.014301990158291518, |
|
"grad_norm": 1374.5731201171875, |
|
"learning_rate": 7.151515151515152e-06, |
|
"loss": 224.7149, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.01438279236257565, |
|
"grad_norm": 700.2552490234375, |
|
"learning_rate": 7.191919191919193e-06, |
|
"loss": 243.2667, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.014463594566859785, |
|
"grad_norm": 689.7608032226562, |
|
"learning_rate": 7.232323232323232e-06, |
|
"loss": 209.3831, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.014544396771143917, |
|
"grad_norm": 1072.593994140625, |
|
"learning_rate": 7.272727272727272e-06, |
|
"loss": 233.4359, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.01462519897542805, |
|
"grad_norm": 783.2555541992188, |
|
"learning_rate": 7.313131313131314e-06, |
|
"loss": 259.1145, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.014706001179712182, |
|
"grad_norm": 1364.049560546875, |
|
"learning_rate": 7.353535353535354e-06, |
|
"loss": 244.7944, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.014786803383996315, |
|
"grad_norm": 1947.3690185546875, |
|
"learning_rate": 7.393939393939395e-06, |
|
"loss": 276.0544, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.014867605588280448, |
|
"grad_norm": 1572.007568359375, |
|
"learning_rate": 7.434343434343435e-06, |
|
"loss": 310.638, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.01494840779256458, |
|
"grad_norm": 1419.4361572265625, |
|
"learning_rate": 7.474747474747475e-06, |
|
"loss": 409.634, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.015029209996848715, |
|
"grad_norm": 1355.0137939453125, |
|
"learning_rate": 7.515151515151516e-06, |
|
"loss": 253.9533, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.015110012201132847, |
|
"grad_norm": 768.96923828125, |
|
"learning_rate": 7.555555555555556e-06, |
|
"loss": 304.4757, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.01519081440541698, |
|
"grad_norm": 959.4989624023438, |
|
"learning_rate": 7.595959595959597e-06, |
|
"loss": 345.032, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.015271616609701113, |
|
"grad_norm": 1339.84228515625, |
|
"learning_rate": 7.636363636363638e-06, |
|
"loss": 296.0798, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.015352418813985245, |
|
"grad_norm": 1036.2491455078125, |
|
"learning_rate": 7.676767676767677e-06, |
|
"loss": 300.197, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.015433221018269378, |
|
"grad_norm": 707.27587890625, |
|
"learning_rate": 7.717171717171717e-06, |
|
"loss": 277.1603, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.015514023222553512, |
|
"grad_norm": 1203.453857421875, |
|
"learning_rate": 7.757575757575758e-06, |
|
"loss": 303.6785, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.015594825426837645, |
|
"grad_norm": 1172.7025146484375, |
|
"learning_rate": 7.797979797979799e-06, |
|
"loss": 246.9346, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.015675627631121777, |
|
"grad_norm": 1082.4605712890625, |
|
"learning_rate": 7.838383838383839e-06, |
|
"loss": 296.1259, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.01575642983540591, |
|
"grad_norm": 1456.42529296875, |
|
"learning_rate": 7.878787878787878e-06, |
|
"loss": 269.7506, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.015837232039690043, |
|
"grad_norm": 1794.72119140625, |
|
"learning_rate": 7.919191919191919e-06, |
|
"loss": 246.411, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.015918034243974175, |
|
"grad_norm": 3157.114990234375, |
|
"learning_rate": 7.959595959595959e-06, |
|
"loss": 256.127, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.015998836448258308, |
|
"grad_norm": 1361.6929931640625, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 308.1984, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.01607963865254244, |
|
"grad_norm": 1006.965087890625, |
|
"learning_rate": 8.040404040404042e-06, |
|
"loss": 341.8693, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.016160440856826573, |
|
"grad_norm": 1541.38720703125, |
|
"learning_rate": 8.080808080808082e-06, |
|
"loss": 274.4633, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.016241243061110706, |
|
"grad_norm": 1735.104248046875, |
|
"learning_rate": 8.121212121212121e-06, |
|
"loss": 258.4104, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.01632204526539484, |
|
"grad_norm": 2176.154052734375, |
|
"learning_rate": 8.161616161616162e-06, |
|
"loss": 314.5508, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.016402847469678974, |
|
"grad_norm": 1052.533447265625, |
|
"learning_rate": 8.202020202020202e-06, |
|
"loss": 312.927, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.016483649673963107, |
|
"grad_norm": 1208.69189453125, |
|
"learning_rate": 8.242424242424243e-06, |
|
"loss": 254.359, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.01656445187824724, |
|
"grad_norm": 1978.5025634765625, |
|
"learning_rate": 8.282828282828283e-06, |
|
"loss": 299.8744, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.016645254082531372, |
|
"grad_norm": 3605.6494140625, |
|
"learning_rate": 8.323232323232322e-06, |
|
"loss": 298.715, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.016726056286815505, |
|
"grad_norm": 1599.973876953125, |
|
"learning_rate": 8.363636363636365e-06, |
|
"loss": 258.9758, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.016806858491099638, |
|
"grad_norm": 1183.451904296875, |
|
"learning_rate": 8.404040404040405e-06, |
|
"loss": 352.8176, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.01688766069538377, |
|
"grad_norm": 1582.7120361328125, |
|
"learning_rate": 8.444444444444446e-06, |
|
"loss": 309.3484, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.016968462899667903, |
|
"grad_norm": 932.9716796875, |
|
"learning_rate": 8.484848484848486e-06, |
|
"loss": 255.265, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.017049265103952035, |
|
"grad_norm": 922.5059814453125, |
|
"learning_rate": 8.525252525252525e-06, |
|
"loss": 253.6583, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.017130067308236168, |
|
"grad_norm": 1196.361083984375, |
|
"learning_rate": 8.565656565656566e-06, |
|
"loss": 323.4844, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.0172108695125203, |
|
"grad_norm": 1005.9546508789062, |
|
"learning_rate": 8.606060606060606e-06, |
|
"loss": 351.4896, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.017291671716804433, |
|
"grad_norm": 1585.8636474609375, |
|
"learning_rate": 8.646464646464647e-06, |
|
"loss": 325.0701, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.017372473921088566, |
|
"grad_norm": 3758.6982421875, |
|
"learning_rate": 8.686868686868687e-06, |
|
"loss": 203.9509, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.0174532761253727, |
|
"grad_norm": 1602.480224609375, |
|
"learning_rate": 8.727272727272728e-06, |
|
"loss": 294.8089, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.017534078329656835, |
|
"grad_norm": 1571.1812744140625, |
|
"learning_rate": 8.767676767676768e-06, |
|
"loss": 220.656, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.017614880533940967, |
|
"grad_norm": 1073.2261962890625, |
|
"learning_rate": 8.808080808080809e-06, |
|
"loss": 221.6837, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.0176956827382251, |
|
"grad_norm": 1125.1983642578125, |
|
"learning_rate": 8.84848484848485e-06, |
|
"loss": 286.0061, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.017776484942509233, |
|
"grad_norm": 1008.795654296875, |
|
"learning_rate": 8.88888888888889e-06, |
|
"loss": 282.5281, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.017857287146793365, |
|
"grad_norm": 1810.7894287109375, |
|
"learning_rate": 8.92929292929293e-06, |
|
"loss": 226.4816, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.017938089351077498, |
|
"grad_norm": 894.2589721679688, |
|
"learning_rate": 8.96969696969697e-06, |
|
"loss": 251.6401, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.01801889155536163, |
|
"grad_norm": 1232.9827880859375, |
|
"learning_rate": 9.01010101010101e-06, |
|
"loss": 248.7544, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.018099693759645763, |
|
"grad_norm": 1993.9267578125, |
|
"learning_rate": 9.05050505050505e-06, |
|
"loss": 256.8296, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.018180495963929896, |
|
"grad_norm": 967.433837890625, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 245.6321, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.018261298168214028, |
|
"grad_norm": 2560.1728515625, |
|
"learning_rate": 9.131313131313132e-06, |
|
"loss": 234.9478, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.01834210037249816, |
|
"grad_norm": 590.1747436523438, |
|
"learning_rate": 9.171717171717172e-06, |
|
"loss": 236.6796, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.018422902576782293, |
|
"grad_norm": 1504.942626953125, |
|
"learning_rate": 9.212121212121213e-06, |
|
"loss": 367.3362, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.018503704781066426, |
|
"grad_norm": 977.7069091796875, |
|
"learning_rate": 9.252525252525253e-06, |
|
"loss": 276.6067, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.01858450698535056, |
|
"grad_norm": 1384.83203125, |
|
"learning_rate": 9.292929292929294e-06, |
|
"loss": 340.7896, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.018665309189634695, |
|
"grad_norm": 1302.6343994140625, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 198.1527, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.018746111393918827, |
|
"grad_norm": 836.4732666015625, |
|
"learning_rate": 9.373737373737375e-06, |
|
"loss": 209.2565, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.01882691359820296, |
|
"grad_norm": 1400.0604248046875, |
|
"learning_rate": 9.414141414141414e-06, |
|
"loss": 305.2968, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.018907715802487093, |
|
"grad_norm": 1454.78125, |
|
"learning_rate": 9.454545454545454e-06, |
|
"loss": 260.4474, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.018988518006771225, |
|
"grad_norm": 4679.6923828125, |
|
"learning_rate": 9.494949494949495e-06, |
|
"loss": 247.9314, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.019069320211055358, |
|
"grad_norm": 6640.1201171875, |
|
"learning_rate": 9.535353535353535e-06, |
|
"loss": 259.6065, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.01915012241533949, |
|
"grad_norm": 1564.781982421875, |
|
"learning_rate": 9.575757575757578e-06, |
|
"loss": 245.3016, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.019230924619623623, |
|
"grad_norm": 2078.74267578125, |
|
"learning_rate": 9.616161616161616e-06, |
|
"loss": 284.7133, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.019311726823907756, |
|
"grad_norm": 1441.0360107421875, |
|
"learning_rate": 9.656565656565657e-06, |
|
"loss": 270.4787, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.01939252902819189, |
|
"grad_norm": 744.0514526367188, |
|
"learning_rate": 9.696969696969698e-06, |
|
"loss": 200.2359, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.01947333123247602, |
|
"grad_norm": 1168.913818359375, |
|
"learning_rate": 9.737373737373738e-06, |
|
"loss": 269.4149, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.019554133436760154, |
|
"grad_norm": 3848.146484375, |
|
"learning_rate": 9.777777777777779e-06, |
|
"loss": 250.6479, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.019634935641044286, |
|
"grad_norm": 798.60595703125, |
|
"learning_rate": 9.818181818181818e-06, |
|
"loss": 245.7341, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.01971573784532842, |
|
"grad_norm": 2139.72265625, |
|
"learning_rate": 9.858585858585858e-06, |
|
"loss": 275.653, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.019796540049612555, |
|
"grad_norm": 1223.6392822265625, |
|
"learning_rate": 9.898989898989899e-06, |
|
"loss": 271.2685, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.019877342253896688, |
|
"grad_norm": 1007.25439453125, |
|
"learning_rate": 9.93939393939394e-06, |
|
"loss": 235.147, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.01995814445818082, |
|
"grad_norm": 1195.012939453125, |
|
"learning_rate": 9.979797979797981e-06, |
|
"loss": 302.4475, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.020038946662464953, |
|
"grad_norm": 1530.9473876953125, |
|
"learning_rate": 1.0020202020202022e-05, |
|
"loss": 259.1597, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.020119748866749085, |
|
"grad_norm": 1403.546142578125, |
|
"learning_rate": 1.006060606060606e-05, |
|
"loss": 348.2433, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.020200551071033218, |
|
"grad_norm": 1328.4873046875, |
|
"learning_rate": 1.0101010101010101e-05, |
|
"loss": 289.0594, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.02028135327531735, |
|
"grad_norm": 1171.5048828125, |
|
"learning_rate": 1.0141414141414142e-05, |
|
"loss": 197.3946, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.020362155479601483, |
|
"grad_norm": 1274.6544189453125, |
|
"learning_rate": 1.0181818181818182e-05, |
|
"loss": 295.2945, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.020442957683885616, |
|
"grad_norm": 2023.71337890625, |
|
"learning_rate": 1.0222222222222223e-05, |
|
"loss": 271.9707, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.02052375988816975, |
|
"grad_norm": 1765.538818359375, |
|
"learning_rate": 1.0262626262626262e-05, |
|
"loss": 246.6141, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.02060456209245388, |
|
"grad_norm": 859.3914794921875, |
|
"learning_rate": 1.0303030303030304e-05, |
|
"loss": 276.1375, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.020685364296738014, |
|
"grad_norm": 1024.8955078125, |
|
"learning_rate": 1.0343434343434345e-05, |
|
"loss": 239.6529, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.020766166501022146, |
|
"grad_norm": 1012.91455078125, |
|
"learning_rate": 1.0383838383838385e-05, |
|
"loss": 199.6656, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.02084696870530628, |
|
"grad_norm": 1371.8551025390625, |
|
"learning_rate": 1.0424242424242426e-05, |
|
"loss": 247.0821, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.020927770909590415, |
|
"grad_norm": 1338.343017578125, |
|
"learning_rate": 1.0464646464646465e-05, |
|
"loss": 260.7634, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.021008573113874548, |
|
"grad_norm": 917.8023071289062, |
|
"learning_rate": 1.0505050505050505e-05, |
|
"loss": 255.2869, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.02108937531815868, |
|
"grad_norm": 1167.3427734375, |
|
"learning_rate": 1.0545454545454546e-05, |
|
"loss": 255.108, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.021170177522442813, |
|
"grad_norm": 1662.1556396484375, |
|
"learning_rate": 1.0585858585858586e-05, |
|
"loss": 304.7596, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.021250979726726946, |
|
"grad_norm": 1393.7713623046875, |
|
"learning_rate": 1.0626262626262627e-05, |
|
"loss": 298.9948, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.02133178193101108, |
|
"grad_norm": 4169.89306640625, |
|
"learning_rate": 1.0666666666666667e-05, |
|
"loss": 256.329, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.02141258413529521, |
|
"grad_norm": 961.8526000976562, |
|
"learning_rate": 1.0707070707070708e-05, |
|
"loss": 268.2124, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.021493386339579344, |
|
"grad_norm": 2141.90869140625, |
|
"learning_rate": 1.0747474747474748e-05, |
|
"loss": 279.6773, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.021574188543863476, |
|
"grad_norm": 1454.77392578125, |
|
"learning_rate": 1.0787878787878789e-05, |
|
"loss": 250.6374, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.02165499074814761, |
|
"grad_norm": 1119.782958984375, |
|
"learning_rate": 1.082828282828283e-05, |
|
"loss": 299.24, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.02173579295243174, |
|
"grad_norm": 1507.88916015625, |
|
"learning_rate": 1.086868686868687e-05, |
|
"loss": 304.945, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.021816595156715874, |
|
"grad_norm": 1235.4326171875, |
|
"learning_rate": 1.0909090909090909e-05, |
|
"loss": 329.6972, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.021897397361000007, |
|
"grad_norm": 1516.6436767578125, |
|
"learning_rate": 1.094949494949495e-05, |
|
"loss": 276.193, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.02197819956528414, |
|
"grad_norm": 1332.3309326171875, |
|
"learning_rate": 1.098989898989899e-05, |
|
"loss": 309.7016, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.022059001769568275, |
|
"grad_norm": 1349.0360107421875, |
|
"learning_rate": 1.103030303030303e-05, |
|
"loss": 423.0254, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.022139803973852408, |
|
"grad_norm": 2262.348876953125, |
|
"learning_rate": 1.1070707070707071e-05, |
|
"loss": 260.9447, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.02222060617813654, |
|
"grad_norm": 1374.3009033203125, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 283.2908, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.022301408382420673, |
|
"grad_norm": 768.2625732421875, |
|
"learning_rate": 1.1151515151515152e-05, |
|
"loss": 243.0171, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.022382210586704806, |
|
"grad_norm": 4175.06396484375, |
|
"learning_rate": 1.1191919191919193e-05, |
|
"loss": 236.5765, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.02246301279098894, |
|
"grad_norm": 1220.933837890625, |
|
"learning_rate": 1.1232323232323233e-05, |
|
"loss": 254.5181, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.02254381499527307, |
|
"grad_norm": 1589.7581787109375, |
|
"learning_rate": 1.1272727272727274e-05, |
|
"loss": 183.8809, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.022624617199557204, |
|
"grad_norm": 1031.3692626953125, |
|
"learning_rate": 1.1313131313131314e-05, |
|
"loss": 246.612, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.022705419403841336, |
|
"grad_norm": 1396.37744140625, |
|
"learning_rate": 1.1353535353535353e-05, |
|
"loss": 224.2803, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.02278622160812547, |
|
"grad_norm": 1879.5634765625, |
|
"learning_rate": 1.1393939393939394e-05, |
|
"loss": 287.104, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.0228670238124096, |
|
"grad_norm": 1376.0625, |
|
"learning_rate": 1.1434343434343434e-05, |
|
"loss": 284.0272, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.022947826016693734, |
|
"grad_norm": 682.961181640625, |
|
"learning_rate": 1.1474747474747475e-05, |
|
"loss": 210.3876, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.023028628220977867, |
|
"grad_norm": 1677.2169189453125, |
|
"learning_rate": 1.1515151515151517e-05, |
|
"loss": 326.8289, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.023109430425262, |
|
"grad_norm": 733.2987060546875, |
|
"learning_rate": 1.1555555555555556e-05, |
|
"loss": 230.43, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.023190232629546136, |
|
"grad_norm": 630.212890625, |
|
"learning_rate": 1.1595959595959597e-05, |
|
"loss": 205.1732, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.023271034833830268, |
|
"grad_norm": 1535.36572265625, |
|
"learning_rate": 1.1636363636363637e-05, |
|
"loss": 304.9371, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.0233518370381144, |
|
"grad_norm": 1065.3255615234375, |
|
"learning_rate": 1.1676767676767678e-05, |
|
"loss": 219.3441, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.023432639242398533, |
|
"grad_norm": 2319.004638671875, |
|
"learning_rate": 1.1717171717171718e-05, |
|
"loss": 274.9734, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.023513441446682666, |
|
"grad_norm": 1362.914794921875, |
|
"learning_rate": 1.1757575757575757e-05, |
|
"loss": 214.0071, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.0235942436509668, |
|
"grad_norm": 1748.42333984375, |
|
"learning_rate": 1.1797979797979798e-05, |
|
"loss": 247.8715, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.02367504585525093, |
|
"grad_norm": 1357.4864501953125, |
|
"learning_rate": 1.1838383838383838e-05, |
|
"loss": 367.5896, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.023755848059535064, |
|
"grad_norm": 1028.1129150390625, |
|
"learning_rate": 1.187878787878788e-05, |
|
"loss": 248.4116, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.023836650263819197, |
|
"grad_norm": 1497.2218017578125, |
|
"learning_rate": 1.1919191919191921e-05, |
|
"loss": 309.2206, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.02391745246810333, |
|
"grad_norm": 830.5894775390625, |
|
"learning_rate": 1.1959595959595961e-05, |
|
"loss": 203.645, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.023998254672387462, |
|
"grad_norm": 1067.825439453125, |
|
"learning_rate": 1.2e-05, |
|
"loss": 226.1753, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.024079056876671594, |
|
"grad_norm": 1088.949462890625, |
|
"learning_rate": 1.2040404040404041e-05, |
|
"loss": 235.4068, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.024159859080955727, |
|
"grad_norm": 1378.7232666015625, |
|
"learning_rate": 1.2080808080808081e-05, |
|
"loss": 258.9663, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.02424066128523986, |
|
"grad_norm": 1665.1182861328125, |
|
"learning_rate": 1.2121212121212122e-05, |
|
"loss": 178.7374, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.024321463489523996, |
|
"grad_norm": 1254.1650390625, |
|
"learning_rate": 1.2161616161616162e-05, |
|
"loss": 196.0946, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.02440226569380813, |
|
"grad_norm": 1680.0679931640625, |
|
"learning_rate": 1.2202020202020201e-05, |
|
"loss": 232.687, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.02448306789809226, |
|
"grad_norm": 1063.8585205078125, |
|
"learning_rate": 1.2242424242424242e-05, |
|
"loss": 247.0947, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.024563870102376394, |
|
"grad_norm": 3708.090087890625, |
|
"learning_rate": 1.2282828282828284e-05, |
|
"loss": 252.8591, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.024644672306660526, |
|
"grad_norm": 1465.7147216796875, |
|
"learning_rate": 1.2323232323232325e-05, |
|
"loss": 262.5838, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.02472547451094466, |
|
"grad_norm": 1327.937255859375, |
|
"learning_rate": 1.2363636363636365e-05, |
|
"loss": 363.5593, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.02480627671522879, |
|
"grad_norm": 1344.0130615234375, |
|
"learning_rate": 1.2404040404040404e-05, |
|
"loss": 257.7762, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.024887078919512924, |
|
"grad_norm": 1054.320068359375, |
|
"learning_rate": 1.2444444444444445e-05, |
|
"loss": 247.01, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.024967881123797057, |
|
"grad_norm": 1068.103515625, |
|
"learning_rate": 1.2484848484848485e-05, |
|
"loss": 248.3725, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.02504868332808119, |
|
"grad_norm": 1697.66259765625, |
|
"learning_rate": 1.2525252525252526e-05, |
|
"loss": 258.5855, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.025129485532365322, |
|
"grad_norm": 2154.507080078125, |
|
"learning_rate": 1.2565656565656566e-05, |
|
"loss": 324.4505, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.025210287736649455, |
|
"grad_norm": 1405.111083984375, |
|
"learning_rate": 1.2606060606060607e-05, |
|
"loss": 201.9885, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.025291089940933587, |
|
"grad_norm": 1943.2344970703125, |
|
"learning_rate": 1.2646464646464647e-05, |
|
"loss": 279.3126, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.02537189214521772, |
|
"grad_norm": 1883.3538818359375, |
|
"learning_rate": 1.2686868686868688e-05, |
|
"loss": 305.4725, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.025452694349501856, |
|
"grad_norm": 2943.758544921875, |
|
"learning_rate": 1.2727272727272727e-05, |
|
"loss": 282.1084, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.02553349655378599, |
|
"grad_norm": 1242.160400390625, |
|
"learning_rate": 1.2767676767676767e-05, |
|
"loss": 220.8162, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.02561429875807012, |
|
"grad_norm": 2627.211181640625, |
|
"learning_rate": 1.2808080808080808e-05, |
|
"loss": 303.4214, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.025695100962354254, |
|
"grad_norm": 1310.1988525390625, |
|
"learning_rate": 1.2848484848484848e-05, |
|
"loss": 259.6753, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.025775903166638386, |
|
"grad_norm": 1910.4666748046875, |
|
"learning_rate": 1.2888888888888889e-05, |
|
"loss": 273.7454, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.02585670537092252, |
|
"grad_norm": 740.5687255859375, |
|
"learning_rate": 1.292929292929293e-05, |
|
"loss": 292.6491, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.02593750757520665, |
|
"grad_norm": 1202.99462890625, |
|
"learning_rate": 1.296969696969697e-05, |
|
"loss": 243.2686, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.026018309779490784, |
|
"grad_norm": 2174.3525390625, |
|
"learning_rate": 1.301010101010101e-05, |
|
"loss": 276.3461, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.026099111983774917, |
|
"grad_norm": 1177.9141845703125, |
|
"learning_rate": 1.3050505050505051e-05, |
|
"loss": 246.7616, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.02617991418805905, |
|
"grad_norm": 1337.3155517578125, |
|
"learning_rate": 1.3090909090909093e-05, |
|
"loss": 279.5415, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.026260716392343182, |
|
"grad_norm": 970.5191040039062, |
|
"learning_rate": 1.3131313131313134e-05, |
|
"loss": 240.0207, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.026341518596627315, |
|
"grad_norm": 1119.1689453125, |
|
"learning_rate": 1.3171717171717171e-05, |
|
"loss": 242.7696, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.026422320800911447, |
|
"grad_norm": 4005.26318359375, |
|
"learning_rate": 1.3212121212121212e-05, |
|
"loss": 288.4158, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.02650312300519558, |
|
"grad_norm": 2148.187255859375, |
|
"learning_rate": 1.3252525252525252e-05, |
|
"loss": 224.5919, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.026583925209479716, |
|
"grad_norm": 1367.2222900390625, |
|
"learning_rate": 1.3292929292929293e-05, |
|
"loss": 212.8641, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.02666472741376385, |
|
"grad_norm": 1278.0506591796875, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 222.3518, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.02674552961804798, |
|
"grad_norm": 1361.965087890625, |
|
"learning_rate": 1.3373737373737374e-05, |
|
"loss": 257.0285, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.026826331822332114, |
|
"grad_norm": 1330.8619384765625, |
|
"learning_rate": 1.3414141414141414e-05, |
|
"loss": 293.7213, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.026907134026616247, |
|
"grad_norm": 1107.8526611328125, |
|
"learning_rate": 1.3454545454545457e-05, |
|
"loss": 198.6048, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.02698793623090038, |
|
"grad_norm": 1048.5009765625, |
|
"learning_rate": 1.3494949494949497e-05, |
|
"loss": 197.4301, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.027068738435184512, |
|
"grad_norm": 2446.47119140625, |
|
"learning_rate": 1.3535353535353538e-05, |
|
"loss": 287.2842, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.027149540639468644, |
|
"grad_norm": 1396.544921875, |
|
"learning_rate": 1.3575757575757578e-05, |
|
"loss": 258.2231, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.027230342843752777, |
|
"grad_norm": 904.5388793945312, |
|
"learning_rate": 1.3616161616161615e-05, |
|
"loss": 376.3808, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.02731114504803691, |
|
"grad_norm": 1247.5994873046875, |
|
"learning_rate": 1.3656565656565656e-05, |
|
"loss": 318.9619, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.027391947252321042, |
|
"grad_norm": 1123.76220703125, |
|
"learning_rate": 1.3696969696969697e-05, |
|
"loss": 235.3794, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.027472749456605175, |
|
"grad_norm": 2435.860107421875, |
|
"learning_rate": 1.3737373737373737e-05, |
|
"loss": 290.7648, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.027553551660889308, |
|
"grad_norm": 1895.08642578125, |
|
"learning_rate": 1.3777777777777778e-05, |
|
"loss": 228.9499, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.02763435386517344, |
|
"grad_norm": 1057.9578857421875, |
|
"learning_rate": 1.3818181818181818e-05, |
|
"loss": 199.0401, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.027715156069457576, |
|
"grad_norm": 1296.09130859375, |
|
"learning_rate": 1.385858585858586e-05, |
|
"loss": 238.6514, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.02779595827374171, |
|
"grad_norm": 3411.5341796875, |
|
"learning_rate": 1.3898989898989901e-05, |
|
"loss": 299.6798, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.02787676047802584, |
|
"grad_norm": 1198.8946533203125, |
|
"learning_rate": 1.3939393939393942e-05, |
|
"loss": 258.3361, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.027957562682309974, |
|
"grad_norm": 839.223388671875, |
|
"learning_rate": 1.3979797979797982e-05, |
|
"loss": 222.8733, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.028038364886594107, |
|
"grad_norm": 1462.1505126953125, |
|
"learning_rate": 1.402020202020202e-05, |
|
"loss": 206.9738, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.02811916709087824, |
|
"grad_norm": 1066.7890625, |
|
"learning_rate": 1.406060606060606e-05, |
|
"loss": 190.5506, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.028199969295162372, |
|
"grad_norm": 2753.951171875, |
|
"learning_rate": 1.41010101010101e-05, |
|
"loss": 210.7065, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.028280771499446505, |
|
"grad_norm": 1700.187744140625, |
|
"learning_rate": 1.4141414141414141e-05, |
|
"loss": 239.0851, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.028361573703730637, |
|
"grad_norm": 1913.965576171875, |
|
"learning_rate": 1.4181818181818181e-05, |
|
"loss": 244.5272, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.02844237590801477, |
|
"grad_norm": 1347.6934814453125, |
|
"learning_rate": 1.4222222222222224e-05, |
|
"loss": 219.5849, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.028523178112298903, |
|
"grad_norm": 894.3698120117188, |
|
"learning_rate": 1.4262626262626264e-05, |
|
"loss": 261.3107, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.028603980316583035, |
|
"grad_norm": 649.42236328125, |
|
"learning_rate": 1.4303030303030305e-05, |
|
"loss": 202.5557, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.028684782520867168, |
|
"grad_norm": 824.8812255859375, |
|
"learning_rate": 1.4343434343434345e-05, |
|
"loss": 245.8003, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.0287655847251513, |
|
"grad_norm": 828.0931396484375, |
|
"learning_rate": 1.4383838383838386e-05, |
|
"loss": 260.2875, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.028846386929435437, |
|
"grad_norm": 1334.4947509765625, |
|
"learning_rate": 1.4424242424242426e-05, |
|
"loss": 232.7898, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.02892718913371957, |
|
"grad_norm": 1371.1171875, |
|
"learning_rate": 1.4464646464646464e-05, |
|
"loss": 418.4771, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.029007991338003702, |
|
"grad_norm": 18497.5234375, |
|
"learning_rate": 1.4505050505050504e-05, |
|
"loss": 303.3979, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.029088793542287834, |
|
"grad_norm": 1640.417724609375, |
|
"learning_rate": 1.4545454545454545e-05, |
|
"loss": 246.1203, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.029169595746571967, |
|
"grad_norm": 866.4635620117188, |
|
"learning_rate": 1.4585858585858587e-05, |
|
"loss": 227.0032, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.0292503979508561, |
|
"grad_norm": 1206.3389892578125, |
|
"learning_rate": 1.4626262626262627e-05, |
|
"loss": 240.7797, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.029331200155140232, |
|
"grad_norm": 1930.5679931640625, |
|
"learning_rate": 1.4666666666666668e-05, |
|
"loss": 244.9207, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.029412002359424365, |
|
"grad_norm": 1362.0755615234375, |
|
"learning_rate": 1.4707070707070709e-05, |
|
"loss": 223.5896, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.029492804563708497, |
|
"grad_norm": 1778.240478515625, |
|
"learning_rate": 1.4747474747474749e-05, |
|
"loss": 233.0804, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.02957360676799263, |
|
"grad_norm": 1185.7432861328125, |
|
"learning_rate": 1.478787878787879e-05, |
|
"loss": 269.5211, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.029654408972276763, |
|
"grad_norm": 1272.7274169921875, |
|
"learning_rate": 1.482828282828283e-05, |
|
"loss": 256.0854, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.029735211176560895, |
|
"grad_norm": 3724.482421875, |
|
"learning_rate": 1.486868686868687e-05, |
|
"loss": 220.0564, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.029816013380845028, |
|
"grad_norm": 1362.2408447265625, |
|
"learning_rate": 1.4909090909090908e-05, |
|
"loss": 196.9579, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.02989681558512916, |
|
"grad_norm": 1142.985107421875, |
|
"learning_rate": 1.494949494949495e-05, |
|
"loss": 298.3712, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.029977617789413297, |
|
"grad_norm": 1711.4461669921875, |
|
"learning_rate": 1.498989898989899e-05, |
|
"loss": 248.673, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.03005841999369743, |
|
"grad_norm": 1854.973876953125, |
|
"learning_rate": 1.5030303030303031e-05, |
|
"loss": 178.4528, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.030139222197981562, |
|
"grad_norm": 2415.3564453125, |
|
"learning_rate": 1.5070707070707072e-05, |
|
"loss": 279.0313, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.030220024402265695, |
|
"grad_norm": 1113.0447998046875, |
|
"learning_rate": 1.5111111111111112e-05, |
|
"loss": 263.2642, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.030300826606549827, |
|
"grad_norm": 1523.1632080078125, |
|
"learning_rate": 1.5151515151515153e-05, |
|
"loss": 292.6833, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.03038162881083396, |
|
"grad_norm": 1810.5382080078125, |
|
"learning_rate": 1.5191919191919193e-05, |
|
"loss": 260.2465, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.030462431015118092, |
|
"grad_norm": 2051.318115234375, |
|
"learning_rate": 1.5232323232323234e-05, |
|
"loss": 249.5686, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.030543233219402225, |
|
"grad_norm": 1145.482421875, |
|
"learning_rate": 1.5272727272727276e-05, |
|
"loss": 217.0, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.030624035423686358, |
|
"grad_norm": 1456.9969482421875, |
|
"learning_rate": 1.531313131313131e-05, |
|
"loss": 247.1355, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.03070483762797049, |
|
"grad_norm": 2063.9072265625, |
|
"learning_rate": 1.5353535353535354e-05, |
|
"loss": 317.8373, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.030785639832254623, |
|
"grad_norm": 1188.59130859375, |
|
"learning_rate": 1.5393939393939393e-05, |
|
"loss": 251.0659, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.030866442036538756, |
|
"grad_norm": 542.1653442382812, |
|
"learning_rate": 1.5434343434343435e-05, |
|
"loss": 205.6288, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.030947244240822888, |
|
"grad_norm": 858.66552734375, |
|
"learning_rate": 1.5474747474747474e-05, |
|
"loss": 261.1724, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.031028046445107024, |
|
"grad_norm": 1392.4208984375, |
|
"learning_rate": 1.5515151515151516e-05, |
|
"loss": 263.898, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.031108848649391157, |
|
"grad_norm": 1089.10888671875, |
|
"learning_rate": 1.5555555555555555e-05, |
|
"loss": 263.4895, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.03118965085367529, |
|
"grad_norm": 1323.1083984375, |
|
"learning_rate": 1.5595959595959597e-05, |
|
"loss": 224.5914, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.03127045305795942, |
|
"grad_norm": 748.7206420898438, |
|
"learning_rate": 1.563636363636364e-05, |
|
"loss": 185.8181, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.031351255262243555, |
|
"grad_norm": 1530.072021484375, |
|
"learning_rate": 1.5676767676767678e-05, |
|
"loss": 295.1081, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.031432057466527684, |
|
"grad_norm": 1390.1978759765625, |
|
"learning_rate": 1.571717171717172e-05, |
|
"loss": 221.7333, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.03151285967081182, |
|
"grad_norm": 1188.6934814453125, |
|
"learning_rate": 1.5757575757575756e-05, |
|
"loss": 231.4922, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.031593661875095956, |
|
"grad_norm": 1810.8616943359375, |
|
"learning_rate": 1.5797979797979798e-05, |
|
"loss": 226.2008, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.031674464079380085, |
|
"grad_norm": 1351.2021484375, |
|
"learning_rate": 1.5838383838383837e-05, |
|
"loss": 213.8082, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.03175526628366422, |
|
"grad_norm": 1504.8511962890625, |
|
"learning_rate": 1.587878787878788e-05, |
|
"loss": 237.731, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.03183606848794835, |
|
"grad_norm": 3990.205810546875, |
|
"learning_rate": 1.5919191919191918e-05, |
|
"loss": 308.5875, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.03191687069223249, |
|
"grad_norm": 1052.4140625, |
|
"learning_rate": 1.595959595959596e-05, |
|
"loss": 173.6135, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.031997672896516616, |
|
"grad_norm": 818.5986328125, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 255.928, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.03207847510080075, |
|
"grad_norm": 3015.482666015625, |
|
"learning_rate": 1.604040404040404e-05, |
|
"loss": 246.6157, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.03215927730508488, |
|
"grad_norm": 1520.350341796875, |
|
"learning_rate": 1.6080808080808084e-05, |
|
"loss": 294.5478, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.03224007950936902, |
|
"grad_norm": 1362.8385009765625, |
|
"learning_rate": 1.6121212121212123e-05, |
|
"loss": 263.2382, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.032320881713653146, |
|
"grad_norm": 1330.2135009765625, |
|
"learning_rate": 1.6161616161616165e-05, |
|
"loss": 227.9952, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.03240168391793728, |
|
"grad_norm": 2001.479248046875, |
|
"learning_rate": 1.62020202020202e-05, |
|
"loss": 373.7298, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.03248248612222141, |
|
"grad_norm": 670.8789672851562, |
|
"learning_rate": 1.6242424242424243e-05, |
|
"loss": 252.2481, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.03256328832650555, |
|
"grad_norm": 1504.35205078125, |
|
"learning_rate": 1.628282828282828e-05, |
|
"loss": 259.7328, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.03264409053078968, |
|
"grad_norm": 1177.47509765625, |
|
"learning_rate": 1.6323232323232324e-05, |
|
"loss": 220.6592, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.03272489273507381, |
|
"grad_norm": 889.9537353515625, |
|
"learning_rate": 1.6363636363636366e-05, |
|
"loss": 210.0868, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.03280569493935795, |
|
"grad_norm": 1655.767333984375, |
|
"learning_rate": 1.6404040404040405e-05, |
|
"loss": 247.7082, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.03288649714364208, |
|
"grad_norm": 1741.26416015625, |
|
"learning_rate": 1.6444444444444447e-05, |
|
"loss": 213.1305, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.032967299347926214, |
|
"grad_norm": 1701.3470458984375, |
|
"learning_rate": 1.6484848484848486e-05, |
|
"loss": 197.4172, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.03304810155221034, |
|
"grad_norm": 1241.48876953125, |
|
"learning_rate": 1.6525252525252528e-05, |
|
"loss": 200.895, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.03312890375649448, |
|
"grad_norm": 4305.5234375, |
|
"learning_rate": 1.6565656565656567e-05, |
|
"loss": 270.1561, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.03320970596077861, |
|
"grad_norm": 1233.9559326171875, |
|
"learning_rate": 1.6606060606060606e-05, |
|
"loss": 234.697, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.033290508165062745, |
|
"grad_norm": 1864.9722900390625, |
|
"learning_rate": 1.6646464646464645e-05, |
|
"loss": 207.7519, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.033371310369346874, |
|
"grad_norm": 696.45654296875, |
|
"learning_rate": 1.6686868686868687e-05, |
|
"loss": 260.9977, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.03345211257363101, |
|
"grad_norm": 1083.8914794921875, |
|
"learning_rate": 1.672727272727273e-05, |
|
"loss": 296.5648, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.03353291477791514, |
|
"grad_norm": 787.8980102539062, |
|
"learning_rate": 1.6767676767676768e-05, |
|
"loss": 252.0068, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.033613716982199275, |
|
"grad_norm": 3963.899658203125, |
|
"learning_rate": 1.680808080808081e-05, |
|
"loss": 239.0976, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.033694519186483404, |
|
"grad_norm": 1345.8841552734375, |
|
"learning_rate": 1.684848484848485e-05, |
|
"loss": 200.5568, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.03377532139076754, |
|
"grad_norm": 1667.1441650390625, |
|
"learning_rate": 1.688888888888889e-05, |
|
"loss": 246.528, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.033856123595051676, |
|
"grad_norm": 941.3829956054688, |
|
"learning_rate": 1.692929292929293e-05, |
|
"loss": 188.0032, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.033936925799335806, |
|
"grad_norm": 2224.00048828125, |
|
"learning_rate": 1.6969696969696972e-05, |
|
"loss": 233.2688, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.03401772800361994, |
|
"grad_norm": 990.577880859375, |
|
"learning_rate": 1.701010101010101e-05, |
|
"loss": 229.0408, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.03409853020790407, |
|
"grad_norm": 1741.591064453125, |
|
"learning_rate": 1.705050505050505e-05, |
|
"loss": 210.8973, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.03417933241218821, |
|
"grad_norm": 1565.2149658203125, |
|
"learning_rate": 1.7090909090909092e-05, |
|
"loss": 172.9691, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.034260134616472336, |
|
"grad_norm": 1411.6668701171875, |
|
"learning_rate": 1.713131313131313e-05, |
|
"loss": 223.8018, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.03434093682075647, |
|
"grad_norm": 849.447998046875, |
|
"learning_rate": 1.7171717171717173e-05, |
|
"loss": 277.72, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.0344217390250406, |
|
"grad_norm": 1456.3353271484375, |
|
"learning_rate": 1.7212121212121212e-05, |
|
"loss": 269.1795, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.03450254122932474, |
|
"grad_norm": 2039.048583984375, |
|
"learning_rate": 1.7252525252525255e-05, |
|
"loss": 203.6644, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.03458334343360887, |
|
"grad_norm": 1037.1063232421875, |
|
"learning_rate": 1.7292929292929293e-05, |
|
"loss": 268.1442, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.034664145637893, |
|
"grad_norm": 1481.98095703125, |
|
"learning_rate": 1.7333333333333336e-05, |
|
"loss": 246.1609, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.03474494784217713, |
|
"grad_norm": 1042.147216796875, |
|
"learning_rate": 1.7373737373737375e-05, |
|
"loss": 360.0711, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.03482575004646127, |
|
"grad_norm": 1008.8258666992188, |
|
"learning_rate": 1.7414141414141417e-05, |
|
"loss": 254.1684, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.0349065522507454, |
|
"grad_norm": 1818.73681640625, |
|
"learning_rate": 1.7454545454545456e-05, |
|
"loss": 248.8469, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.03498735445502953, |
|
"grad_norm": 2598.832763671875, |
|
"learning_rate": 1.7494949494949494e-05, |
|
"loss": 215.4962, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.03506815665931367, |
|
"grad_norm": 5505.1572265625, |
|
"learning_rate": 1.7535353535353537e-05, |
|
"loss": 206.1825, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.0351489588635978, |
|
"grad_norm": 872.9111328125, |
|
"learning_rate": 1.7575757575757576e-05, |
|
"loss": 226.9096, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.035229761067881935, |
|
"grad_norm": 1309.483154296875, |
|
"learning_rate": 1.7616161616161618e-05, |
|
"loss": 347.4825, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.035310563272166064, |
|
"grad_norm": 1847.357666015625, |
|
"learning_rate": 1.7656565656565657e-05, |
|
"loss": 283.7126, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.0353913654764502, |
|
"grad_norm": 1132.7510986328125, |
|
"learning_rate": 1.76969696969697e-05, |
|
"loss": 238.7522, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.03547216768073433, |
|
"grad_norm": 1338.4906005859375, |
|
"learning_rate": 1.7737373737373738e-05, |
|
"loss": 206.4677, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.035552969885018465, |
|
"grad_norm": 889.9144897460938, |
|
"learning_rate": 1.777777777777778e-05, |
|
"loss": 218.258, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.035633772089302594, |
|
"grad_norm": 1081.747314453125, |
|
"learning_rate": 1.781818181818182e-05, |
|
"loss": 227.4267, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.03571457429358673, |
|
"grad_norm": 1337.2747802734375, |
|
"learning_rate": 1.785858585858586e-05, |
|
"loss": 216.5905, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.03579537649787086, |
|
"grad_norm": 1070.0733642578125, |
|
"learning_rate": 1.78989898989899e-05, |
|
"loss": 244.2413, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.035876178702154995, |
|
"grad_norm": 2713.52392578125, |
|
"learning_rate": 1.793939393939394e-05, |
|
"loss": 214.338, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.035956980906439125, |
|
"grad_norm": 1579.244873046875, |
|
"learning_rate": 1.797979797979798e-05, |
|
"loss": 236.9733, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.03603778311072326, |
|
"grad_norm": 1429.0421142578125, |
|
"learning_rate": 1.802020202020202e-05, |
|
"loss": 210.1912, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.0361185853150074, |
|
"grad_norm": 1236.484375, |
|
"learning_rate": 1.8060606060606062e-05, |
|
"loss": 209.4003, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.036199387519291526, |
|
"grad_norm": 2510.634521484375, |
|
"learning_rate": 1.81010101010101e-05, |
|
"loss": 261.3127, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.03628018972357566, |
|
"grad_norm": 697.04345703125, |
|
"learning_rate": 1.8141414141414143e-05, |
|
"loss": 202.3308, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.03636099192785979, |
|
"grad_norm": 1664.605712890625, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 230.7549, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.03644179413214393, |
|
"grad_norm": 1968.6279296875, |
|
"learning_rate": 1.8222222222222224e-05, |
|
"loss": 222.0125, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.036522596336428056, |
|
"grad_norm": 1813.247314453125, |
|
"learning_rate": 1.8262626262626263e-05, |
|
"loss": 206.1146, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.03660339854071219, |
|
"grad_norm": 1681.3162841796875, |
|
"learning_rate": 1.8303030303030305e-05, |
|
"loss": 281.5203, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.03668420074499632, |
|
"grad_norm": 813.0327758789062, |
|
"learning_rate": 1.8343434343434344e-05, |
|
"loss": 241.7396, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.03676500294928046, |
|
"grad_norm": 1714.4927978515625, |
|
"learning_rate": 1.8383838383838383e-05, |
|
"loss": 229.0337, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.03684580515356459, |
|
"grad_norm": 1173.26318359375, |
|
"learning_rate": 1.8424242424242425e-05, |
|
"loss": 167.8814, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.03692660735784872, |
|
"grad_norm": 1044.22509765625, |
|
"learning_rate": 1.8464646464646464e-05, |
|
"loss": 181.4134, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.03700740956213285, |
|
"grad_norm": 1544.4964599609375, |
|
"learning_rate": 1.8505050505050506e-05, |
|
"loss": 264.1711, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.03708821176641699, |
|
"grad_norm": 3204.8271484375, |
|
"learning_rate": 1.8545454545454545e-05, |
|
"loss": 209.0515, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.03716901397070112, |
|
"grad_norm": 1948.9998779296875, |
|
"learning_rate": 1.8585858585858588e-05, |
|
"loss": 204.1481, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.037249816174985254, |
|
"grad_norm": 985.3388671875, |
|
"learning_rate": 1.8626262626262626e-05, |
|
"loss": 251.0652, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.03733061837926939, |
|
"grad_norm": 4716.29833984375, |
|
"learning_rate": 1.866666666666667e-05, |
|
"loss": 234.005, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.03741142058355352, |
|
"grad_norm": 2745.129150390625, |
|
"learning_rate": 1.8707070707070707e-05, |
|
"loss": 222.8053, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.037492222787837655, |
|
"grad_norm": 852.2494506835938, |
|
"learning_rate": 1.874747474747475e-05, |
|
"loss": 244.6, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.037573024992121784, |
|
"grad_norm": 1276.906494140625, |
|
"learning_rate": 1.878787878787879e-05, |
|
"loss": 243.4739, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.03765382719640592, |
|
"grad_norm": 2488.490478515625, |
|
"learning_rate": 1.8828282828282827e-05, |
|
"loss": 241.5105, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.03773462940069005, |
|
"grad_norm": 1208.5731201171875, |
|
"learning_rate": 1.886868686868687e-05, |
|
"loss": 266.4298, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.037815431604974185, |
|
"grad_norm": 1110.9935302734375, |
|
"learning_rate": 1.890909090909091e-05, |
|
"loss": 220.2013, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.037896233809258315, |
|
"grad_norm": 966.4763793945312, |
|
"learning_rate": 1.894949494949495e-05, |
|
"loss": 213.4089, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.03797703601354245, |
|
"grad_norm": 888.4136352539062, |
|
"learning_rate": 1.898989898989899e-05, |
|
"loss": 192.6133, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.03805783821782658, |
|
"grad_norm": 1441.930419921875, |
|
"learning_rate": 1.9030303030303032e-05, |
|
"loss": 210.6855, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.038138640422110716, |
|
"grad_norm": 1268.2919921875, |
|
"learning_rate": 1.907070707070707e-05, |
|
"loss": 196.9399, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.038219442626394845, |
|
"grad_norm": 714.101318359375, |
|
"learning_rate": 1.9111111111111113e-05, |
|
"loss": 236.1493, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.03830024483067898, |
|
"grad_norm": 1360.3662109375, |
|
"learning_rate": 1.9151515151515155e-05, |
|
"loss": 277.1614, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.03838104703496312, |
|
"grad_norm": 857.1802368164062, |
|
"learning_rate": 1.919191919191919e-05, |
|
"loss": 233.6975, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.038461849239247246, |
|
"grad_norm": 1430.3370361328125, |
|
"learning_rate": 1.9232323232323233e-05, |
|
"loss": 206.9375, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.03854265144353138, |
|
"grad_norm": 999.745849609375, |
|
"learning_rate": 1.9272727272727272e-05, |
|
"loss": 177.6682, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.03862345364781551, |
|
"grad_norm": 1979.0234375, |
|
"learning_rate": 1.9313131313131314e-05, |
|
"loss": 237.5471, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.03870425585209965, |
|
"grad_norm": 1399.9544677734375, |
|
"learning_rate": 1.9353535353535353e-05, |
|
"loss": 209.8267, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.03878505805638378, |
|
"grad_norm": 1058.5128173828125, |
|
"learning_rate": 1.9393939393939395e-05, |
|
"loss": 206.1269, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.03886586026066791, |
|
"grad_norm": 1852.674072265625, |
|
"learning_rate": 1.9434343434343434e-05, |
|
"loss": 192.6013, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.03894666246495204, |
|
"grad_norm": 1104.2967529296875, |
|
"learning_rate": 1.9474747474747476e-05, |
|
"loss": 252.5522, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.03902746466923618, |
|
"grad_norm": 1426.0396728515625, |
|
"learning_rate": 1.951515151515152e-05, |
|
"loss": 250.6448, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.03910826687352031, |
|
"grad_norm": 1632.4510498046875, |
|
"learning_rate": 1.9555555555555557e-05, |
|
"loss": 163.3638, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.03918906907780444, |
|
"grad_norm": 700.0907592773438, |
|
"learning_rate": 1.95959595959596e-05, |
|
"loss": 236.7388, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.03926987128208857, |
|
"grad_norm": 1205.572265625, |
|
"learning_rate": 1.9636363636363635e-05, |
|
"loss": 272.2705, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.03935067348637271, |
|
"grad_norm": 799.412353515625, |
|
"learning_rate": 1.9676767676767677e-05, |
|
"loss": 171.4291, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.03943147569065684, |
|
"grad_norm": 1350.2025146484375, |
|
"learning_rate": 1.9717171717171716e-05, |
|
"loss": 233.9921, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.039512277894940974, |
|
"grad_norm": 976.219970703125, |
|
"learning_rate": 1.975757575757576e-05, |
|
"loss": 189.0711, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.03959308009922511, |
|
"grad_norm": 947.8401489257812, |
|
"learning_rate": 1.9797979797979797e-05, |
|
"loss": 207.2786, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.03967388230350924, |
|
"grad_norm": 1402.2440185546875, |
|
"learning_rate": 1.983838383838384e-05, |
|
"loss": 233.6717, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.039754684507793375, |
|
"grad_norm": 2319.2314453125, |
|
"learning_rate": 1.987878787878788e-05, |
|
"loss": 268.4254, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.039835486712077504, |
|
"grad_norm": 1344.019775390625, |
|
"learning_rate": 1.991919191919192e-05, |
|
"loss": 215.5304, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.03991628891636164, |
|
"grad_norm": 1209.1622314453125, |
|
"learning_rate": 1.9959595959595963e-05, |
|
"loss": 202.8059, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.03999709112064577, |
|
"grad_norm": 1872.3892822265625, |
|
"learning_rate": 2e-05, |
|
"loss": 193.5764, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.040077893324929906, |
|
"grad_norm": 1944.2449951171875, |
|
"learning_rate": 2.0040404040404044e-05, |
|
"loss": 273.8487, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.040158695529214035, |
|
"grad_norm": 988.1495361328125, |
|
"learning_rate": 2.008080808080808e-05, |
|
"loss": 202.3245, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.04023949773349817, |
|
"grad_norm": 1082.6280517578125, |
|
"learning_rate": 2.012121212121212e-05, |
|
"loss": 190.6009, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.0403202999377823, |
|
"grad_norm": 1510.5738525390625, |
|
"learning_rate": 2.016161616161616e-05, |
|
"loss": 262.4282, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.040401102142066436, |
|
"grad_norm": 1080.0328369140625, |
|
"learning_rate": 2.0202020202020203e-05, |
|
"loss": 179.5178, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.040481904346350565, |
|
"grad_norm": 1204.5341796875, |
|
"learning_rate": 2.0242424242424245e-05, |
|
"loss": 208.4234, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.0405627065506347, |
|
"grad_norm": 788.6203002929688, |
|
"learning_rate": 2.0282828282828284e-05, |
|
"loss": 222.1854, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.04064350875491884, |
|
"grad_norm": 2447.934326171875, |
|
"learning_rate": 2.0323232323232326e-05, |
|
"loss": 183.1969, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.04072431095920297, |
|
"grad_norm": 1879.5914306640625, |
|
"learning_rate": 2.0363636363636365e-05, |
|
"loss": 235.8428, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.0408051131634871, |
|
"grad_norm": 859.5083618164062, |
|
"learning_rate": 2.0404040404040407e-05, |
|
"loss": 223.1974, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.04088591536777123, |
|
"grad_norm": 591.982421875, |
|
"learning_rate": 2.0444444444444446e-05, |
|
"loss": 195.905, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.04096671757205537, |
|
"grad_norm": 2516.256103515625, |
|
"learning_rate": 2.0484848484848485e-05, |
|
"loss": 224.4586, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.0410475197763395, |
|
"grad_norm": 1155.78271484375, |
|
"learning_rate": 2.0525252525252524e-05, |
|
"loss": 237.8034, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.04112832198062363, |
|
"grad_norm": 760.8511962890625, |
|
"learning_rate": 2.0565656565656566e-05, |
|
"loss": 213.4372, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.04120912418490776, |
|
"grad_norm": 746.3182983398438, |
|
"learning_rate": 2.0606060606060608e-05, |
|
"loss": 246.9279, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.0412899263891919, |
|
"grad_norm": 1112.6119384765625, |
|
"learning_rate": 2.0646464646464647e-05, |
|
"loss": 215.4636, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.04137072859347603, |
|
"grad_norm": 1308.880126953125, |
|
"learning_rate": 2.068686868686869e-05, |
|
"loss": 184.3576, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.041451530797760164, |
|
"grad_norm": 1182.3695068359375, |
|
"learning_rate": 2.0727272727272728e-05, |
|
"loss": 251.3663, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.04153233300204429, |
|
"grad_norm": 3545.449951171875, |
|
"learning_rate": 2.076767676767677e-05, |
|
"loss": 221.7183, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.04161313520632843, |
|
"grad_norm": 1155.616455078125, |
|
"learning_rate": 2.080808080808081e-05, |
|
"loss": 181.7703, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.04169393741061256, |
|
"grad_norm": 927.0892333984375, |
|
"learning_rate": 2.084848484848485e-05, |
|
"loss": 242.7771, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.041774739614896694, |
|
"grad_norm": 1621.09326171875, |
|
"learning_rate": 2.088888888888889e-05, |
|
"loss": 168.8398, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.04185554181918083, |
|
"grad_norm": 1823.0281982421875, |
|
"learning_rate": 2.092929292929293e-05, |
|
"loss": 226.3993, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.04193634402346496, |
|
"grad_norm": 1904.581298828125, |
|
"learning_rate": 2.096969696969697e-05, |
|
"loss": 274.6904, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.042017146227749096, |
|
"grad_norm": 1195.8973388671875, |
|
"learning_rate": 2.101010101010101e-05, |
|
"loss": 193.929, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.042097948432033225, |
|
"grad_norm": 809.5712890625, |
|
"learning_rate": 2.1050505050505052e-05, |
|
"loss": 183.259, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.04217875063631736, |
|
"grad_norm": 1392.5491943359375, |
|
"learning_rate": 2.109090909090909e-05, |
|
"loss": 220.0326, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.04225955284060149, |
|
"grad_norm": 1818.6051025390625, |
|
"learning_rate": 2.1131313131313134e-05, |
|
"loss": 209.3423, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.042340355044885626, |
|
"grad_norm": 756.583740234375, |
|
"learning_rate": 2.1171717171717172e-05, |
|
"loss": 152.79, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.042421157249169755, |
|
"grad_norm": 1358.5194091796875, |
|
"learning_rate": 2.1212121212121215e-05, |
|
"loss": 223.6846, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.04250195945345389, |
|
"grad_norm": 2302.727783203125, |
|
"learning_rate": 2.1252525252525254e-05, |
|
"loss": 206.5412, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.04258276165773802, |
|
"grad_norm": 1090.666259765625, |
|
"learning_rate": 2.1292929292929296e-05, |
|
"loss": 197.9379, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.04266356386202216, |
|
"grad_norm": 1535.5264892578125, |
|
"learning_rate": 2.1333333333333335e-05, |
|
"loss": 172.5529, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.042744366066306286, |
|
"grad_norm": 1242.1055908203125, |
|
"learning_rate": 2.1373737373737373e-05, |
|
"loss": 182.2667, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.04282516827059042, |
|
"grad_norm": 1571.221923828125, |
|
"learning_rate": 2.1414141414141416e-05, |
|
"loss": 206.9152, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.04290597047487456, |
|
"grad_norm": 1733.92578125, |
|
"learning_rate": 2.1454545454545455e-05, |
|
"loss": 253.6228, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.04298677267915869, |
|
"grad_norm": 1736.4722900390625, |
|
"learning_rate": 2.1494949494949497e-05, |
|
"loss": 209.5105, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.04306757488344282, |
|
"grad_norm": 846.6854248046875, |
|
"learning_rate": 2.1535353535353536e-05, |
|
"loss": 227.6331, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.04314837708772695, |
|
"grad_norm": 793.491943359375, |
|
"learning_rate": 2.1575757575757578e-05, |
|
"loss": 190.9206, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.04322917929201109, |
|
"grad_norm": 1314.4940185546875, |
|
"learning_rate": 2.1616161616161617e-05, |
|
"loss": 278.1586, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.04330998149629522, |
|
"grad_norm": 1807.1669921875, |
|
"learning_rate": 2.165656565656566e-05, |
|
"loss": 246.6954, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.043390783700579354, |
|
"grad_norm": 1456.6739501953125, |
|
"learning_rate": 2.1696969696969698e-05, |
|
"loss": 198.1051, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 0.04347158590486348, |
|
"grad_norm": 2645.863037109375, |
|
"learning_rate": 2.173737373737374e-05, |
|
"loss": 263.7012, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.04355238810914762, |
|
"grad_norm": 890.2818603515625, |
|
"learning_rate": 2.177777777777778e-05, |
|
"loss": 206.08, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 0.04363319031343175, |
|
"grad_norm": 1066.948974609375, |
|
"learning_rate": 2.1818181818181818e-05, |
|
"loss": 203.3024, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.043713992517715884, |
|
"grad_norm": 1678.3651123046875, |
|
"learning_rate": 2.185858585858586e-05, |
|
"loss": 287.4994, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 0.04379479472200001, |
|
"grad_norm": 1427.133544921875, |
|
"learning_rate": 2.18989898989899e-05, |
|
"loss": 236.3808, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.04387559692628415, |
|
"grad_norm": 993.3723754882812, |
|
"learning_rate": 2.193939393939394e-05, |
|
"loss": 221.2247, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 0.04395639913056828, |
|
"grad_norm": 919.2279663085938, |
|
"learning_rate": 2.197979797979798e-05, |
|
"loss": 232.8961, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.044037201334852415, |
|
"grad_norm": 1196.51904296875, |
|
"learning_rate": 2.2020202020202022e-05, |
|
"loss": 208.7773, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.04411800353913655, |
|
"grad_norm": 937.6903076171875, |
|
"learning_rate": 2.206060606060606e-05, |
|
"loss": 159.7425, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.04419880574342068, |
|
"grad_norm": 2946.419921875, |
|
"learning_rate": 2.2101010101010103e-05, |
|
"loss": 201.0844, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 0.044279607947704816, |
|
"grad_norm": 1663.4422607421875, |
|
"learning_rate": 2.2141414141414142e-05, |
|
"loss": 140.8333, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.044360410151988945, |
|
"grad_norm": 1202.589599609375, |
|
"learning_rate": 2.2181818181818184e-05, |
|
"loss": 210.6169, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 0.04444121235627308, |
|
"grad_norm": 1676.0555419921875, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 300.5972, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.04452201456055721, |
|
"grad_norm": 1122.7333984375, |
|
"learning_rate": 2.2262626262626262e-05, |
|
"loss": 223.7688, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 0.044602816764841346, |
|
"grad_norm": 842.3754272460938, |
|
"learning_rate": 2.2303030303030304e-05, |
|
"loss": 231.1573, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.044683618969125476, |
|
"grad_norm": 912.3519897460938, |
|
"learning_rate": 2.2343434343434343e-05, |
|
"loss": 161.5479, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 0.04476442117340961, |
|
"grad_norm": 2117.377197265625, |
|
"learning_rate": 2.2383838383838385e-05, |
|
"loss": 178.251, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.04484522337769374, |
|
"grad_norm": 1402.2164306640625, |
|
"learning_rate": 2.2424242424242424e-05, |
|
"loss": 209.2086, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.04492602558197788, |
|
"grad_norm": 1458.323974609375, |
|
"learning_rate": 2.2464646464646467e-05, |
|
"loss": 243.9479, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.045006827786262006, |
|
"grad_norm": 2175.216796875, |
|
"learning_rate": 2.2505050505050505e-05, |
|
"loss": 189.8892, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 0.04508762999054614, |
|
"grad_norm": 1899.4354248046875, |
|
"learning_rate": 2.2545454545454548e-05, |
|
"loss": 335.0552, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.04516843219483028, |
|
"grad_norm": 1230.814697265625, |
|
"learning_rate": 2.2585858585858587e-05, |
|
"loss": 194.9335, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 0.04524923439911441, |
|
"grad_norm": 2101.527587890625, |
|
"learning_rate": 2.262626262626263e-05, |
|
"loss": 257.3806, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.045330036603398544, |
|
"grad_norm": 1695.30810546875, |
|
"learning_rate": 2.2666666666666668e-05, |
|
"loss": 219.7137, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 0.04541083880768267, |
|
"grad_norm": 1386.2855224609375, |
|
"learning_rate": 2.2707070707070706e-05, |
|
"loss": 236.2214, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.04549164101196681, |
|
"grad_norm": 1138.779052734375, |
|
"learning_rate": 2.274747474747475e-05, |
|
"loss": 192.9845, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 0.04557244321625094, |
|
"grad_norm": 2650.991943359375, |
|
"learning_rate": 2.2787878787878788e-05, |
|
"loss": 233.3904, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.045653245420535074, |
|
"grad_norm": 1309.0333251953125, |
|
"learning_rate": 2.282828282828283e-05, |
|
"loss": 225.3846, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.0457340476248192, |
|
"grad_norm": 930.385009765625, |
|
"learning_rate": 2.286868686868687e-05, |
|
"loss": 236.1336, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.04581484982910334, |
|
"grad_norm": 1646.2891845703125, |
|
"learning_rate": 2.290909090909091e-05, |
|
"loss": 227.3526, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 0.04589565203338747, |
|
"grad_norm": 2285.751708984375, |
|
"learning_rate": 2.294949494949495e-05, |
|
"loss": 236.6346, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.045976454237671605, |
|
"grad_norm": 3180.75537109375, |
|
"learning_rate": 2.2989898989898992e-05, |
|
"loss": 177.5457, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 0.046057256441955734, |
|
"grad_norm": 1423.35009765625, |
|
"learning_rate": 2.3030303030303034e-05, |
|
"loss": 194.2139, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.04613805864623987, |
|
"grad_norm": 1577.701171875, |
|
"learning_rate": 2.307070707070707e-05, |
|
"loss": 183.8717, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 0.046218860850524, |
|
"grad_norm": 1255.1485595703125, |
|
"learning_rate": 2.3111111111111112e-05, |
|
"loss": 213.8492, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.046299663054808135, |
|
"grad_norm": 1154.9453125, |
|
"learning_rate": 2.315151515151515e-05, |
|
"loss": 219.6154, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 0.04638046525909227, |
|
"grad_norm": 3208.9140625, |
|
"learning_rate": 2.3191919191919193e-05, |
|
"loss": 212.2527, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.0464612674633764, |
|
"grad_norm": 826.8831787109375, |
|
"learning_rate": 2.3232323232323232e-05, |
|
"loss": 193.0573, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.046542069667660536, |
|
"grad_norm": 953.578369140625, |
|
"learning_rate": 2.3272727272727274e-05, |
|
"loss": 200.1285, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.046622871871944666, |
|
"grad_norm": 948.6517944335938, |
|
"learning_rate": 2.3313131313131313e-05, |
|
"loss": 226.3946, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 0.0467036740762288, |
|
"grad_norm": 1502.9415283203125, |
|
"learning_rate": 2.3353535353535355e-05, |
|
"loss": 301.4247, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.04678447628051293, |
|
"grad_norm": 592.7190551757812, |
|
"learning_rate": 2.3393939393939397e-05, |
|
"loss": 171.6613, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 0.04686527848479707, |
|
"grad_norm": 774.3163452148438, |
|
"learning_rate": 2.3434343434343436e-05, |
|
"loss": 174.7567, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.046946080689081196, |
|
"grad_norm": 1000.3840942382812, |
|
"learning_rate": 2.347474747474748e-05, |
|
"loss": 140.1143, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 0.04702688289336533, |
|
"grad_norm": 1050.761474609375, |
|
"learning_rate": 2.3515151515151514e-05, |
|
"loss": 234.2542, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.04710768509764946, |
|
"grad_norm": 1076.979248046875, |
|
"learning_rate": 2.3555555555555556e-05, |
|
"loss": 170.2877, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 0.0471884873019336, |
|
"grad_norm": 1528.865478515625, |
|
"learning_rate": 2.3595959595959595e-05, |
|
"loss": 280.3715, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.047269289506217727, |
|
"grad_norm": 1554.0205078125, |
|
"learning_rate": 2.3636363636363637e-05, |
|
"loss": 258.9206, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.04735009171050186, |
|
"grad_norm": 969.7879028320312, |
|
"learning_rate": 2.3676767676767676e-05, |
|
"loss": 173.4592, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.047430893914786, |
|
"grad_norm": 1271.55322265625, |
|
"learning_rate": 2.371717171717172e-05, |
|
"loss": 187.7373, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 0.04751169611907013, |
|
"grad_norm": 757.3799438476562, |
|
"learning_rate": 2.375757575757576e-05, |
|
"loss": 206.0978, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.047592498323354264, |
|
"grad_norm": 1099.2119140625, |
|
"learning_rate": 2.37979797979798e-05, |
|
"loss": 191.4486, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.04767330052763839, |
|
"grad_norm": 895.0558471679688, |
|
"learning_rate": 2.3838383838383842e-05, |
|
"loss": 197.1677, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.04775410273192253, |
|
"grad_norm": 900.752685546875, |
|
"learning_rate": 2.387878787878788e-05, |
|
"loss": 209.3482, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 0.04783490493620666, |
|
"grad_norm": 865.3425903320312, |
|
"learning_rate": 2.3919191919191923e-05, |
|
"loss": 211.704, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.047915707140490794, |
|
"grad_norm": 1376.961181640625, |
|
"learning_rate": 2.395959595959596e-05, |
|
"loss": 197.2012, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 0.047996509344774924, |
|
"grad_norm": 2671.92236328125, |
|
"learning_rate": 2.4e-05, |
|
"loss": 262.8319, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.04807731154905906, |
|
"grad_norm": 4328.66552734375, |
|
"learning_rate": 2.404040404040404e-05, |
|
"loss": 263.4226, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.04815811375334319, |
|
"grad_norm": 1454.4398193359375, |
|
"learning_rate": 2.4080808080808082e-05, |
|
"loss": 173.7909, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.048238915957627325, |
|
"grad_norm": 1238.2913818359375, |
|
"learning_rate": 2.4121212121212124e-05, |
|
"loss": 190.8571, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 0.048319718161911454, |
|
"grad_norm": 1106.6146240234375, |
|
"learning_rate": 2.4161616161616163e-05, |
|
"loss": 252.6962, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.04840052036619559, |
|
"grad_norm": 1612.1171875, |
|
"learning_rate": 2.4202020202020205e-05, |
|
"loss": 176.1714, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 0.04848132257047972, |
|
"grad_norm": 684.4707641601562, |
|
"learning_rate": 2.4242424242424244e-05, |
|
"loss": 236.6299, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.048562124774763855, |
|
"grad_norm": 5278.638671875, |
|
"learning_rate": 2.4282828282828286e-05, |
|
"loss": 200.9588, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 0.04864292697904799, |
|
"grad_norm": 2136.859375, |
|
"learning_rate": 2.4323232323232325e-05, |
|
"loss": 249.8048, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.04872372918333212, |
|
"grad_norm": 704.8456420898438, |
|
"learning_rate": 2.4363636363636364e-05, |
|
"loss": 210.816, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 0.04880453138761626, |
|
"grad_norm": 2405.291259765625, |
|
"learning_rate": 2.4404040404040403e-05, |
|
"loss": 180.7068, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.048885333591900386, |
|
"grad_norm": 1121.5928955078125, |
|
"learning_rate": 2.4444444444444445e-05, |
|
"loss": 268.1764, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.04896613579618452, |
|
"grad_norm": 1185.4925537109375, |
|
"learning_rate": 2.4484848484848484e-05, |
|
"loss": 252.5901, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.04904693800046865, |
|
"grad_norm": 1037.7261962890625, |
|
"learning_rate": 2.4525252525252526e-05, |
|
"loss": 217.7089, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 0.04912774020475279, |
|
"grad_norm": 3574.91943359375, |
|
"learning_rate": 2.4565656565656568e-05, |
|
"loss": 248.0757, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.049208542409036916, |
|
"grad_norm": 1335.7510986328125, |
|
"learning_rate": 2.4606060606060607e-05, |
|
"loss": 243.5903, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 0.04928934461332105, |
|
"grad_norm": 1548.2281494140625, |
|
"learning_rate": 2.464646464646465e-05, |
|
"loss": 204.2808, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.04937014681760518, |
|
"grad_norm": 1327.641357421875, |
|
"learning_rate": 2.4686868686868688e-05, |
|
"loss": 175.3226, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 0.04945094902188932, |
|
"grad_norm": 1096.567626953125, |
|
"learning_rate": 2.472727272727273e-05, |
|
"loss": 251.3891, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.04953175122617345, |
|
"grad_norm": 916.0780639648438, |
|
"learning_rate": 2.476767676767677e-05, |
|
"loss": 265.5964, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 0.04961255343045758, |
|
"grad_norm": 3319.821533203125, |
|
"learning_rate": 2.4808080808080808e-05, |
|
"loss": 199.656, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.04969335563474172, |
|
"grad_norm": 804.5398559570312, |
|
"learning_rate": 2.4848484848484847e-05, |
|
"loss": 176.793, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.04977415783902585, |
|
"grad_norm": 1266.6590576171875, |
|
"learning_rate": 2.488888888888889e-05, |
|
"loss": 172.6065, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.049854960043309984, |
|
"grad_norm": 953.1856689453125, |
|
"learning_rate": 2.492929292929293e-05, |
|
"loss": 259.3056, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 0.049935762247594113, |
|
"grad_norm": 1643.679443359375, |
|
"learning_rate": 2.496969696969697e-05, |
|
"loss": 227.5671, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.05001656445187825, |
|
"grad_norm": 2092.3837890625, |
|
"learning_rate": 2.5010101010101013e-05, |
|
"loss": 231.7141, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 0.05009736665616238, |
|
"grad_norm": 5872.7822265625, |
|
"learning_rate": 2.505050505050505e-05, |
|
"loss": 307.3282, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.050178168860446515, |
|
"grad_norm": 1653.10888671875, |
|
"learning_rate": 2.5090909090909094e-05, |
|
"loss": 290.71, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 0.050258971064730644, |
|
"grad_norm": 5940.2861328125, |
|
"learning_rate": 2.5131313131313133e-05, |
|
"loss": 298.1718, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.05033977326901478, |
|
"grad_norm": 1056.6617431640625, |
|
"learning_rate": 2.5171717171717175e-05, |
|
"loss": 167.573, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 0.05042057547329891, |
|
"grad_norm": 1492.5479736328125, |
|
"learning_rate": 2.5212121212121214e-05, |
|
"loss": 209.9481, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.050501377677583045, |
|
"grad_norm": 764.5651245117188, |
|
"learning_rate": 2.5252525252525256e-05, |
|
"loss": 189.9945, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.050582179881867174, |
|
"grad_norm": 2933.18603515625, |
|
"learning_rate": 2.5292929292929295e-05, |
|
"loss": 228.2252, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.05066298208615131, |
|
"grad_norm": 2692.583740234375, |
|
"learning_rate": 2.5333333333333337e-05, |
|
"loss": 217.1123, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 0.05074378429043544, |
|
"grad_norm": 1611.5694580078125, |
|
"learning_rate": 2.5373737373737376e-05, |
|
"loss": 199.2745, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.050824586494719576, |
|
"grad_norm": 638.3251953125, |
|
"learning_rate": 2.5414141414141418e-05, |
|
"loss": 283.4336, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 0.05090538869900371, |
|
"grad_norm": 960.7551879882812, |
|
"learning_rate": 2.5454545454545454e-05, |
|
"loss": 199.2895, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.05098619090328784, |
|
"grad_norm": 1416.6865234375, |
|
"learning_rate": 2.5494949494949492e-05, |
|
"loss": 247.6437, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 0.05106699310757198, |
|
"grad_norm": 962.5587158203125, |
|
"learning_rate": 2.5535353535353535e-05, |
|
"loss": 222.514, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.051147795311856106, |
|
"grad_norm": 1019.0704956054688, |
|
"learning_rate": 2.5575757575757573e-05, |
|
"loss": 233.7968, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 0.05122859751614024, |
|
"grad_norm": 1380.1087646484375, |
|
"learning_rate": 2.5616161616161616e-05, |
|
"loss": 203.472, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.05130939972042437, |
|
"grad_norm": 765.1551513671875, |
|
"learning_rate": 2.5656565656565658e-05, |
|
"loss": 202.9591, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.05139020192470851, |
|
"grad_norm": 854.4512329101562, |
|
"learning_rate": 2.5696969696969697e-05, |
|
"loss": 152.8654, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.05147100412899264, |
|
"grad_norm": 1366.1529541015625, |
|
"learning_rate": 2.573737373737374e-05, |
|
"loss": 202.4912, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 0.05155180633327677, |
|
"grad_norm": 812.153564453125, |
|
"learning_rate": 2.5777777777777778e-05, |
|
"loss": 190.2283, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.0516326085375609, |
|
"grad_norm": 2072.30029296875, |
|
"learning_rate": 2.581818181818182e-05, |
|
"loss": 250.4601, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 0.05171341074184504, |
|
"grad_norm": 1064.25732421875, |
|
"learning_rate": 2.585858585858586e-05, |
|
"loss": 243.8253, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.05179421294612917, |
|
"grad_norm": 1004.585205078125, |
|
"learning_rate": 2.58989898989899e-05, |
|
"loss": 233.6981, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 0.0518750151504133, |
|
"grad_norm": 781.0443115234375, |
|
"learning_rate": 2.593939393939394e-05, |
|
"loss": 231.5708, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 0.05195581735469744, |
|
"grad_norm": 1038.6923828125, |
|
"learning_rate": 2.5979797979797982e-05, |
|
"loss": 166.9408, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 0.05203661955898157, |
|
"grad_norm": 1369.49560546875, |
|
"learning_rate": 2.602020202020202e-05, |
|
"loss": 212.7086, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 0.052117421763265705, |
|
"grad_norm": 1065.5115966796875, |
|
"learning_rate": 2.6060606060606063e-05, |
|
"loss": 197.232, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.052198223967549834, |
|
"grad_norm": 1192.5135498046875, |
|
"learning_rate": 2.6101010101010102e-05, |
|
"loss": 210.3559, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 0.05227902617183397, |
|
"grad_norm": 2817.4658203125, |
|
"learning_rate": 2.6141414141414145e-05, |
|
"loss": 218.5129, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 0.0523598283761181, |
|
"grad_norm": 1661.2547607421875, |
|
"learning_rate": 2.6181818181818187e-05, |
|
"loss": 251.3214, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 0.052440630580402235, |
|
"grad_norm": 1465.83251953125, |
|
"learning_rate": 2.6222222222222226e-05, |
|
"loss": 167.4487, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 0.052521432784686364, |
|
"grad_norm": 1172.0814208984375, |
|
"learning_rate": 2.6262626262626268e-05, |
|
"loss": 202.9199, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.0526022349889705, |
|
"grad_norm": 845.3886108398438, |
|
"learning_rate": 2.63030303030303e-05, |
|
"loss": 245.5335, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 0.05268303719325463, |
|
"grad_norm": 1505.1903076171875, |
|
"learning_rate": 2.6343434343434342e-05, |
|
"loss": 189.6107, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 0.052763839397538766, |
|
"grad_norm": 855.8611450195312, |
|
"learning_rate": 2.6383838383838384e-05, |
|
"loss": 168.8279, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 0.052844641601822895, |
|
"grad_norm": 1719.1915283203125, |
|
"learning_rate": 2.6424242424242423e-05, |
|
"loss": 266.6213, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 0.05292544380610703, |
|
"grad_norm": 1334.455322265625, |
|
"learning_rate": 2.6464646464646466e-05, |
|
"loss": 154.8023, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.05300624601039116, |
|
"grad_norm": 1549.58154296875, |
|
"learning_rate": 2.6505050505050504e-05, |
|
"loss": 188.1264, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.053087048214675296, |
|
"grad_norm": 740.02587890625, |
|
"learning_rate": 2.6545454545454547e-05, |
|
"loss": 241.7192, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 0.05316785041895943, |
|
"grad_norm": 1759.15869140625, |
|
"learning_rate": 2.6585858585858585e-05, |
|
"loss": 249.789, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 0.05324865262324356, |
|
"grad_norm": 1615.3770751953125, |
|
"learning_rate": 2.6626262626262628e-05, |
|
"loss": 256.1343, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 0.0533294548275277, |
|
"grad_norm": 1187.103515625, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 153.0139, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.05341025703181183, |
|
"grad_norm": 835.982177734375, |
|
"learning_rate": 2.670707070707071e-05, |
|
"loss": 240.5858, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 0.05349105923609596, |
|
"grad_norm": 658.9365234375, |
|
"learning_rate": 2.6747474747474748e-05, |
|
"loss": 212.2474, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 0.05357186144038009, |
|
"grad_norm": 836.15185546875, |
|
"learning_rate": 2.678787878787879e-05, |
|
"loss": 203.4595, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 0.05365266364466423, |
|
"grad_norm": 1312.960205078125, |
|
"learning_rate": 2.682828282828283e-05, |
|
"loss": 178.5055, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 0.05373346584894836, |
|
"grad_norm": 2402.58642578125, |
|
"learning_rate": 2.686868686868687e-05, |
|
"loss": 204.5425, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.05381426805323249, |
|
"grad_norm": 743.2178344726562, |
|
"learning_rate": 2.6909090909090913e-05, |
|
"loss": 151.0765, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 0.05389507025751662, |
|
"grad_norm": 2009.14599609375, |
|
"learning_rate": 2.6949494949494952e-05, |
|
"loss": 255.5723, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 0.05397587246180076, |
|
"grad_norm": 1129.924560546875, |
|
"learning_rate": 2.6989898989898994e-05, |
|
"loss": 249.5275, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 0.05405667466608489, |
|
"grad_norm": 1877.5682373046875, |
|
"learning_rate": 2.7030303030303033e-05, |
|
"loss": 201.4787, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 0.054137476870369024, |
|
"grad_norm": 1205.5860595703125, |
|
"learning_rate": 2.7070707070707075e-05, |
|
"loss": 165.1917, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.05421827907465316, |
|
"grad_norm": 833.5079956054688, |
|
"learning_rate": 2.7111111111111114e-05, |
|
"loss": 222.6354, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 0.05429908127893729, |
|
"grad_norm": 1644.57470703125, |
|
"learning_rate": 2.7151515151515157e-05, |
|
"loss": 183.0618, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.054379883483221425, |
|
"grad_norm": 1261.3482666015625, |
|
"learning_rate": 2.7191919191919192e-05, |
|
"loss": 204.8876, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 0.054460685687505554, |
|
"grad_norm": 1064.4910888671875, |
|
"learning_rate": 2.723232323232323e-05, |
|
"loss": 228.8735, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 0.05454148789178969, |
|
"grad_norm": 1227.28369140625, |
|
"learning_rate": 2.7272727272727273e-05, |
|
"loss": 244.5206, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.05462229009607382, |
|
"grad_norm": 747.6671142578125, |
|
"learning_rate": 2.7313131313131312e-05, |
|
"loss": 171.5991, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 0.054703092300357956, |
|
"grad_norm": 1191.174560546875, |
|
"learning_rate": 2.7353535353535354e-05, |
|
"loss": 183.062, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 0.054783894504642085, |
|
"grad_norm": 1179.271484375, |
|
"learning_rate": 2.7393939393939393e-05, |
|
"loss": 203.4914, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 0.05486469670892622, |
|
"grad_norm": 1980.94287109375, |
|
"learning_rate": 2.7434343434343435e-05, |
|
"loss": 190.7682, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 0.05494549891321035, |
|
"grad_norm": 1313.760498046875, |
|
"learning_rate": 2.7474747474747474e-05, |
|
"loss": 179.4395, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.055026301117494486, |
|
"grad_norm": 818.7135620117188, |
|
"learning_rate": 2.7515151515151516e-05, |
|
"loss": 240.7207, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 0.055107103321778615, |
|
"grad_norm": 1303.9735107421875, |
|
"learning_rate": 2.7555555555555555e-05, |
|
"loss": 197.5866, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.05518790552606275, |
|
"grad_norm": 4817.638671875, |
|
"learning_rate": 2.7595959595959597e-05, |
|
"loss": 236.2139, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 0.05526870773034688, |
|
"grad_norm": 1369.7080078125, |
|
"learning_rate": 2.7636363636363636e-05, |
|
"loss": 155.043, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 0.05534950993463102, |
|
"grad_norm": 1351.29150390625, |
|
"learning_rate": 2.767676767676768e-05, |
|
"loss": 193.5722, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.05543031213891515, |
|
"grad_norm": 1340.113525390625, |
|
"learning_rate": 2.771717171717172e-05, |
|
"loss": 196.4928, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 0.05551111434319928, |
|
"grad_norm": 1829.1298828125, |
|
"learning_rate": 2.775757575757576e-05, |
|
"loss": 207.2559, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 0.05559191654748342, |
|
"grad_norm": 1614.317138671875, |
|
"learning_rate": 2.7797979797979802e-05, |
|
"loss": 191.8481, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 0.05567271875176755, |
|
"grad_norm": 1409.754150390625, |
|
"learning_rate": 2.783838383838384e-05, |
|
"loss": 172.1698, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 0.05575352095605168, |
|
"grad_norm": 1008.7220458984375, |
|
"learning_rate": 2.7878787878787883e-05, |
|
"loss": 204.197, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.05583432316033581, |
|
"grad_norm": 1213.98291015625, |
|
"learning_rate": 2.7919191919191922e-05, |
|
"loss": 176.116, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 0.05591512536461995, |
|
"grad_norm": 1919.146484375, |
|
"learning_rate": 2.7959595959595964e-05, |
|
"loss": 205.5731, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 0.05599592756890408, |
|
"grad_norm": 1582.1240234375, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 204.0053, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 0.056076729773188214, |
|
"grad_norm": 1082.2257080078125, |
|
"learning_rate": 2.804040404040404e-05, |
|
"loss": 216.7668, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 0.05615753197747234, |
|
"grad_norm": 1451.9715576171875, |
|
"learning_rate": 2.808080808080808e-05, |
|
"loss": 128.3858, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.05623833418175648, |
|
"grad_norm": 2580.067138671875, |
|
"learning_rate": 2.812121212121212e-05, |
|
"loss": 187.2646, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 0.05631913638604061, |
|
"grad_norm": 1153.5308837890625, |
|
"learning_rate": 2.8161616161616162e-05, |
|
"loss": 170.1935, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 0.056399938590324744, |
|
"grad_norm": 842.653076171875, |
|
"learning_rate": 2.82020202020202e-05, |
|
"loss": 229.306, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 0.05648074079460888, |
|
"grad_norm": 1086.96337890625, |
|
"learning_rate": 2.8242424242424243e-05, |
|
"loss": 180.9517, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 0.05656154299889301, |
|
"grad_norm": 963.1438598632812, |
|
"learning_rate": 2.8282828282828282e-05, |
|
"loss": 186.2078, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.056642345203177145, |
|
"grad_norm": 1010.3299560546875, |
|
"learning_rate": 2.8323232323232324e-05, |
|
"loss": 223.6001, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 0.056723147407461275, |
|
"grad_norm": 1217.844482421875, |
|
"learning_rate": 2.8363636363636363e-05, |
|
"loss": 179.5198, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 0.05680394961174541, |
|
"grad_norm": 1364.8577880859375, |
|
"learning_rate": 2.8404040404040405e-05, |
|
"loss": 212.5286, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 0.05688475181602954, |
|
"grad_norm": 804.541748046875, |
|
"learning_rate": 2.8444444444444447e-05, |
|
"loss": 201.4965, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.056965554020313676, |
|
"grad_norm": 2093.808349609375, |
|
"learning_rate": 2.8484848484848486e-05, |
|
"loss": 202.5039, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.057046356224597805, |
|
"grad_norm": 1088.9471435546875, |
|
"learning_rate": 2.852525252525253e-05, |
|
"loss": 164.6322, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 0.05712715842888194, |
|
"grad_norm": 1510.014404296875, |
|
"learning_rate": 2.8565656565656567e-05, |
|
"loss": 246.0487, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 0.05720796063316607, |
|
"grad_norm": 617.3926391601562, |
|
"learning_rate": 2.860606060606061e-05, |
|
"loss": 166.5255, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 0.057288762837450206, |
|
"grad_norm": 1088.094482421875, |
|
"learning_rate": 2.864646464646465e-05, |
|
"loss": 180.0012, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 0.057369565041734336, |
|
"grad_norm": 754.35400390625, |
|
"learning_rate": 2.868686868686869e-05, |
|
"loss": 165.6718, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.05745036724601847, |
|
"grad_norm": 847.3502197265625, |
|
"learning_rate": 2.872727272727273e-05, |
|
"loss": 150.3254, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 0.0575311694503026, |
|
"grad_norm": 3462.79541015625, |
|
"learning_rate": 2.876767676767677e-05, |
|
"loss": 206.9913, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 0.05761197165458674, |
|
"grad_norm": 1302.846923828125, |
|
"learning_rate": 2.880808080808081e-05, |
|
"loss": 218.2749, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 0.05769277385887087, |
|
"grad_norm": 1508.3194580078125, |
|
"learning_rate": 2.8848484848484853e-05, |
|
"loss": 198.5009, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 0.057773576063155, |
|
"grad_norm": 1260.8990478515625, |
|
"learning_rate": 2.8888888888888888e-05, |
|
"loss": 287.7319, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.05785437826743914, |
|
"grad_norm": 2510.641357421875, |
|
"learning_rate": 2.8929292929292927e-05, |
|
"loss": 212.435, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 0.05793518047172327, |
|
"grad_norm": 1610.3782958984375, |
|
"learning_rate": 2.896969696969697e-05, |
|
"loss": 195.9904, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 0.058015982676007403, |
|
"grad_norm": 2051.1611328125, |
|
"learning_rate": 2.9010101010101008e-05, |
|
"loss": 230.746, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 0.05809678488029153, |
|
"grad_norm": 1708.345703125, |
|
"learning_rate": 2.905050505050505e-05, |
|
"loss": 202.1169, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 0.05817758708457567, |
|
"grad_norm": 991.0370483398438, |
|
"learning_rate": 2.909090909090909e-05, |
|
"loss": 182.8259, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.0582583892888598, |
|
"grad_norm": 1151.1380615234375, |
|
"learning_rate": 2.913131313131313e-05, |
|
"loss": 241.0473, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 0.058339191493143934, |
|
"grad_norm": 1103.3897705078125, |
|
"learning_rate": 2.9171717171717174e-05, |
|
"loss": 151.7667, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 0.05841999369742806, |
|
"grad_norm": 1151.0849609375, |
|
"learning_rate": 2.9212121212121213e-05, |
|
"loss": 197.9749, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 0.0585007959017122, |
|
"grad_norm": 983.3527221679688, |
|
"learning_rate": 2.9252525252525255e-05, |
|
"loss": 186.8989, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 0.05858159810599633, |
|
"grad_norm": 669.5452880859375, |
|
"learning_rate": 2.9292929292929294e-05, |
|
"loss": 179.281, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.058662400310280464, |
|
"grad_norm": 1186.9957275390625, |
|
"learning_rate": 2.9333333333333336e-05, |
|
"loss": 170.5622, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 0.0587432025145646, |
|
"grad_norm": 1314.4376220703125, |
|
"learning_rate": 2.9373737373737375e-05, |
|
"loss": 175.4961, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 0.05882400471884873, |
|
"grad_norm": 1278.834716796875, |
|
"learning_rate": 2.9414141414141417e-05, |
|
"loss": 183.9097, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 0.058904806923132866, |
|
"grad_norm": 1116.2734375, |
|
"learning_rate": 2.9454545454545456e-05, |
|
"loss": 143.516, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 0.058985609127416995, |
|
"grad_norm": 1352.628173828125, |
|
"learning_rate": 2.9494949494949498e-05, |
|
"loss": 204.3025, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.05906641133170113, |
|
"grad_norm": 1091.3201904296875, |
|
"learning_rate": 2.9535353535353537e-05, |
|
"loss": 181.4761, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 0.05914721353598526, |
|
"grad_norm": 1040.334716796875, |
|
"learning_rate": 2.957575757575758e-05, |
|
"loss": 170.6319, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 0.059228015740269396, |
|
"grad_norm": 1476.125732421875, |
|
"learning_rate": 2.9616161616161618e-05, |
|
"loss": 161.3749, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 0.059308817944553525, |
|
"grad_norm": 1488.0325927734375, |
|
"learning_rate": 2.965656565656566e-05, |
|
"loss": 183.3941, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 0.05938962014883766, |
|
"grad_norm": 481.60833740234375, |
|
"learning_rate": 2.96969696969697e-05, |
|
"loss": 199.2278, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.05947042235312179, |
|
"grad_norm": 1610.34521484375, |
|
"learning_rate": 2.973737373737374e-05, |
|
"loss": 201.723, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 0.05955122455740593, |
|
"grad_norm": 1576.0423583984375, |
|
"learning_rate": 2.9777777777777777e-05, |
|
"loss": 222.0852, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 0.059632026761690056, |
|
"grad_norm": 889.7515258789062, |
|
"learning_rate": 2.9818181818181816e-05, |
|
"loss": 193.5616, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 0.05971282896597419, |
|
"grad_norm": 746.6514282226562, |
|
"learning_rate": 2.9858585858585858e-05, |
|
"loss": 166.2696, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 0.05979363117025832, |
|
"grad_norm": 1730.69580078125, |
|
"learning_rate": 2.98989898989899e-05, |
|
"loss": 209.8799, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.05987443337454246, |
|
"grad_norm": 690.6642456054688, |
|
"learning_rate": 2.993939393939394e-05, |
|
"loss": 230.9101, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 0.05995523557882659, |
|
"grad_norm": 863.1697387695312, |
|
"learning_rate": 2.997979797979798e-05, |
|
"loss": 150.7177, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 0.06003603778311072, |
|
"grad_norm": 1267.2069091796875, |
|
"learning_rate": 3.002020202020202e-05, |
|
"loss": 210.8308, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 0.06011683998739486, |
|
"grad_norm": 1010.417724609375, |
|
"learning_rate": 3.0060606060606062e-05, |
|
"loss": 191.3645, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.06019764219167899, |
|
"grad_norm": 689.7382202148438, |
|
"learning_rate": 3.01010101010101e-05, |
|
"loss": 187.7134, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.060278444395963124, |
|
"grad_norm": 1864.760986328125, |
|
"learning_rate": 3.0141414141414144e-05, |
|
"loss": 214.7331, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 0.06035924660024725, |
|
"grad_norm": 1038.37353515625, |
|
"learning_rate": 3.0181818181818182e-05, |
|
"loss": 217.9106, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 0.06044004880453139, |
|
"grad_norm": 622.6604614257812, |
|
"learning_rate": 3.0222222222222225e-05, |
|
"loss": 155.4263, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 0.06052085100881552, |
|
"grad_norm": 878.7538452148438, |
|
"learning_rate": 3.0262626262626263e-05, |
|
"loss": 231.1667, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 0.060601653213099654, |
|
"grad_norm": 1581.2225341796875, |
|
"learning_rate": 3.0303030303030306e-05, |
|
"loss": 163.4888, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.060682455417383784, |
|
"grad_norm": 1152.7149658203125, |
|
"learning_rate": 3.0343434343434345e-05, |
|
"loss": 182.3645, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 0.06076325762166792, |
|
"grad_norm": 1109.6708984375, |
|
"learning_rate": 3.0383838383838387e-05, |
|
"loss": 175.0838, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 0.06084405982595205, |
|
"grad_norm": 1053.8270263671875, |
|
"learning_rate": 3.0424242424242426e-05, |
|
"loss": 181.691, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 0.060924862030236185, |
|
"grad_norm": 2113.046875, |
|
"learning_rate": 3.0464646464646468e-05, |
|
"loss": 224.7368, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 0.06100566423452032, |
|
"grad_norm": 1166.90478515625, |
|
"learning_rate": 3.050505050505051e-05, |
|
"loss": 206.4759, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.06108646643880445, |
|
"grad_norm": 1273.3836669921875, |
|
"learning_rate": 3.054545454545455e-05, |
|
"loss": 171.2801, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 0.061167268643088586, |
|
"grad_norm": 2534.885498046875, |
|
"learning_rate": 3.058585858585859e-05, |
|
"loss": 159.7586, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 0.061248070847372715, |
|
"grad_norm": 3763.103515625, |
|
"learning_rate": 3.062626262626262e-05, |
|
"loss": 323.4677, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 0.06132887305165685, |
|
"grad_norm": 1977.9522705078125, |
|
"learning_rate": 3.066666666666667e-05, |
|
"loss": 227.4736, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 0.06140967525594098, |
|
"grad_norm": 1690.8280029296875, |
|
"learning_rate": 3.070707070707071e-05, |
|
"loss": 192.4362, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.06149047746022512, |
|
"grad_norm": 1523.7828369140625, |
|
"learning_rate": 3.074747474747475e-05, |
|
"loss": 234.8535, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 0.061571279664509246, |
|
"grad_norm": 1146.36865234375, |
|
"learning_rate": 3.0787878787878786e-05, |
|
"loss": 142.9756, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 0.06165208186879338, |
|
"grad_norm": 895.3403930664062, |
|
"learning_rate": 3.082828282828283e-05, |
|
"loss": 201.5379, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 0.06173288407307751, |
|
"grad_norm": 1039.900634765625, |
|
"learning_rate": 3.086868686868687e-05, |
|
"loss": 230.2974, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 0.06181368627736165, |
|
"grad_norm": 1130.9986572265625, |
|
"learning_rate": 3.090909090909091e-05, |
|
"loss": 189.1531, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.061894488481645776, |
|
"grad_norm": 1224.142822265625, |
|
"learning_rate": 3.094949494949495e-05, |
|
"loss": 204.0206, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 0.06197529068592991, |
|
"grad_norm": 2115.472412109375, |
|
"learning_rate": 3.098989898989899e-05, |
|
"loss": 180.536, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 0.06205609289021405, |
|
"grad_norm": 779.9313354492188, |
|
"learning_rate": 3.103030303030303e-05, |
|
"loss": 158.623, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.06213689509449818, |
|
"grad_norm": 1337.7568359375, |
|
"learning_rate": 3.107070707070707e-05, |
|
"loss": 159.0383, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 0.062217697298782314, |
|
"grad_norm": 1851.648193359375, |
|
"learning_rate": 3.111111111111111e-05, |
|
"loss": 179.8161, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.06229849950306644, |
|
"grad_norm": 1469.6453857421875, |
|
"learning_rate": 3.1151515151515156e-05, |
|
"loss": 187.596, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 0.06237930170735058, |
|
"grad_norm": 1624.6527099609375, |
|
"learning_rate": 3.1191919191919194e-05, |
|
"loss": 214.8479, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 0.06246010391163471, |
|
"grad_norm": 1006.6346435546875, |
|
"learning_rate": 3.123232323232323e-05, |
|
"loss": 154.5748, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 0.06254090611591884, |
|
"grad_norm": 1002.5286254882812, |
|
"learning_rate": 3.127272727272728e-05, |
|
"loss": 184.4432, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 0.06262170832020297, |
|
"grad_norm": 1352.4193115234375, |
|
"learning_rate": 3.131313131313132e-05, |
|
"loss": 237.0036, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.06270251052448711, |
|
"grad_norm": 1084.147216796875, |
|
"learning_rate": 3.1353535353535357e-05, |
|
"loss": 164.8318, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 0.06278331272877125, |
|
"grad_norm": 1302.1048583984375, |
|
"learning_rate": 3.1393939393939395e-05, |
|
"loss": 164.3788, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 0.06286411493305537, |
|
"grad_norm": 1383.396484375, |
|
"learning_rate": 3.143434343434344e-05, |
|
"loss": 175.5805, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 0.0629449171373395, |
|
"grad_norm": 1246.53857421875, |
|
"learning_rate": 3.147474747474747e-05, |
|
"loss": 210.3966, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 0.06302571934162364, |
|
"grad_norm": 1285.145263671875, |
|
"learning_rate": 3.151515151515151e-05, |
|
"loss": 246.1903, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.06310652154590778, |
|
"grad_norm": 1620.3326416015625, |
|
"learning_rate": 3.155555555555556e-05, |
|
"loss": 196.0127, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 0.06318732375019191, |
|
"grad_norm": 1016.9979858398438, |
|
"learning_rate": 3.1595959595959596e-05, |
|
"loss": 210.5301, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 0.06326812595447603, |
|
"grad_norm": 1945.8780517578125, |
|
"learning_rate": 3.1636363636363635e-05, |
|
"loss": 239.3266, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 0.06334892815876017, |
|
"grad_norm": 1864.5794677734375, |
|
"learning_rate": 3.1676767676767674e-05, |
|
"loss": 193.7567, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 0.0634297303630443, |
|
"grad_norm": 1095.450927734375, |
|
"learning_rate": 3.171717171717172e-05, |
|
"loss": 191.1735, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.06351053256732844, |
|
"grad_norm": 1031.504150390625, |
|
"learning_rate": 3.175757575757576e-05, |
|
"loss": 185.8655, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 0.06359133477161256, |
|
"grad_norm": 1385.5076904296875, |
|
"learning_rate": 3.17979797979798e-05, |
|
"loss": 177.6908, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 0.0636721369758967, |
|
"grad_norm": 1074.5181884765625, |
|
"learning_rate": 3.1838383838383836e-05, |
|
"loss": 204.0031, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 0.06375293918018084, |
|
"grad_norm": 953.3314208984375, |
|
"learning_rate": 3.187878787878788e-05, |
|
"loss": 180.9185, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 0.06383374138446497, |
|
"grad_norm": 868.8043823242188, |
|
"learning_rate": 3.191919191919192e-05, |
|
"loss": 220.1422, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.0639145435887491, |
|
"grad_norm": 5921.494140625, |
|
"learning_rate": 3.195959595959596e-05, |
|
"loss": 167.039, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 0.06399534579303323, |
|
"grad_norm": 1500.1710205078125, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 138.7559, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 0.06407614799731737, |
|
"grad_norm": 1143.7266845703125, |
|
"learning_rate": 3.2040404040404044e-05, |
|
"loss": 195.0978, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 0.0641569502016015, |
|
"grad_norm": 523.0445556640625, |
|
"learning_rate": 3.208080808080808e-05, |
|
"loss": 151.1692, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 0.06423775240588563, |
|
"grad_norm": 2158.39013671875, |
|
"learning_rate": 3.212121212121212e-05, |
|
"loss": 236.7984, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.06431855461016976, |
|
"grad_norm": 659.3209228515625, |
|
"learning_rate": 3.216161616161617e-05, |
|
"loss": 181.1136, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 0.0643993568144539, |
|
"grad_norm": 608.638671875, |
|
"learning_rate": 3.2202020202020206e-05, |
|
"loss": 194.2183, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 0.06448015901873803, |
|
"grad_norm": 1122.7078857421875, |
|
"learning_rate": 3.2242424242424245e-05, |
|
"loss": 160.3627, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 0.06456096122302217, |
|
"grad_norm": 1686.80810546875, |
|
"learning_rate": 3.2282828282828284e-05, |
|
"loss": 223.456, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 0.06464176342730629, |
|
"grad_norm": 1573.1317138671875, |
|
"learning_rate": 3.232323232323233e-05, |
|
"loss": 224.0322, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.06472256563159043, |
|
"grad_norm": 1321.1458740234375, |
|
"learning_rate": 3.236363636363636e-05, |
|
"loss": 252.9104, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 0.06480336783587456, |
|
"grad_norm": 1179.701171875, |
|
"learning_rate": 3.24040404040404e-05, |
|
"loss": 223.4346, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 0.0648841700401587, |
|
"grad_norm": 977.9105224609375, |
|
"learning_rate": 3.2444444444444446e-05, |
|
"loss": 152.0468, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 0.06496497224444282, |
|
"grad_norm": 2066.90380859375, |
|
"learning_rate": 3.2484848484848485e-05, |
|
"loss": 190.8667, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 0.06504577444872696, |
|
"grad_norm": 3095.08935546875, |
|
"learning_rate": 3.2525252525252524e-05, |
|
"loss": 192.0303, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.0651265766530111, |
|
"grad_norm": 2343.95947265625, |
|
"learning_rate": 3.256565656565656e-05, |
|
"loss": 157.5384, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 0.06520737885729523, |
|
"grad_norm": 1510.8023681640625, |
|
"learning_rate": 3.260606060606061e-05, |
|
"loss": 239.6892, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 0.06528818106157935, |
|
"grad_norm": 1445.597900390625, |
|
"learning_rate": 3.264646464646465e-05, |
|
"loss": 177.4803, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 0.06536898326586349, |
|
"grad_norm": 1667.5521240234375, |
|
"learning_rate": 3.2686868686868686e-05, |
|
"loss": 190.0459, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 0.06544978547014763, |
|
"grad_norm": 925.2418212890625, |
|
"learning_rate": 3.272727272727273e-05, |
|
"loss": 190.8257, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.06553058767443176, |
|
"grad_norm": 1247.4376220703125, |
|
"learning_rate": 3.276767676767677e-05, |
|
"loss": 203.9773, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 0.0656113898787159, |
|
"grad_norm": 1212.892822265625, |
|
"learning_rate": 3.280808080808081e-05, |
|
"loss": 203.7381, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 0.06569219208300002, |
|
"grad_norm": 1091.890380859375, |
|
"learning_rate": 3.284848484848485e-05, |
|
"loss": 194.8187, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 0.06577299428728416, |
|
"grad_norm": 2029.2864990234375, |
|
"learning_rate": 3.2888888888888894e-05, |
|
"loss": 246.8937, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 0.06585379649156829, |
|
"grad_norm": 920.1378784179688, |
|
"learning_rate": 3.292929292929293e-05, |
|
"loss": 215.9934, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.06593459869585243, |
|
"grad_norm": 1521.0574951171875, |
|
"learning_rate": 3.296969696969697e-05, |
|
"loss": 167.3099, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 0.06601540090013655, |
|
"grad_norm": 1420.7525634765625, |
|
"learning_rate": 3.301010101010101e-05, |
|
"loss": 206.7512, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 0.06609620310442069, |
|
"grad_norm": 840.5839233398438, |
|
"learning_rate": 3.3050505050505056e-05, |
|
"loss": 202.7185, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 0.06617700530870482, |
|
"grad_norm": 1193.502197265625, |
|
"learning_rate": 3.3090909090909095e-05, |
|
"loss": 160.3612, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 0.06625780751298896, |
|
"grad_norm": 2222.778564453125, |
|
"learning_rate": 3.3131313131313134e-05, |
|
"loss": 150.92, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.06633860971727308, |
|
"grad_norm": 776.4454956054688, |
|
"learning_rate": 3.317171717171717e-05, |
|
"loss": 159.6749, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 0.06641941192155722, |
|
"grad_norm": 1179.86279296875, |
|
"learning_rate": 3.321212121212121e-05, |
|
"loss": 147.1537, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 0.06650021412584135, |
|
"grad_norm": 1168.2757568359375, |
|
"learning_rate": 3.325252525252525e-05, |
|
"loss": 163.5715, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 0.06658101633012549, |
|
"grad_norm": 996.3876953125, |
|
"learning_rate": 3.329292929292929e-05, |
|
"loss": 156.5557, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 0.06666181853440963, |
|
"grad_norm": 1006.9996337890625, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 176.6802, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.06674262073869375, |
|
"grad_norm": 877.4000854492188, |
|
"learning_rate": 3.3373737373737374e-05, |
|
"loss": 182.8363, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 0.06682342294297788, |
|
"grad_norm": 2153.091552734375, |
|
"learning_rate": 3.341414141414141e-05, |
|
"loss": 184.7595, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 0.06690422514726202, |
|
"grad_norm": 1884.7989501953125, |
|
"learning_rate": 3.345454545454546e-05, |
|
"loss": 197.672, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 0.06698502735154616, |
|
"grad_norm": 1494.185791015625, |
|
"learning_rate": 3.34949494949495e-05, |
|
"loss": 178.7865, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 0.06706582955583028, |
|
"grad_norm": 2600.398193359375, |
|
"learning_rate": 3.3535353535353536e-05, |
|
"loss": 222.7885, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.06714663176011441, |
|
"grad_norm": 1300.013671875, |
|
"learning_rate": 3.3575757575757575e-05, |
|
"loss": 182.5449, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 0.06722743396439855, |
|
"grad_norm": 2145.218505859375, |
|
"learning_rate": 3.361616161616162e-05, |
|
"loss": 281.7514, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 0.06730823616868269, |
|
"grad_norm": 1519.411865234375, |
|
"learning_rate": 3.365656565656566e-05, |
|
"loss": 167.7319, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 0.06738903837296681, |
|
"grad_norm": 750.0274047851562, |
|
"learning_rate": 3.36969696969697e-05, |
|
"loss": 173.5281, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 0.06746984057725094, |
|
"grad_norm": 1222.1435546875, |
|
"learning_rate": 3.373737373737374e-05, |
|
"loss": 162.324, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.06755064278153508, |
|
"grad_norm": 955.3302001953125, |
|
"learning_rate": 3.377777777777778e-05, |
|
"loss": 184.845, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 0.06763144498581922, |
|
"grad_norm": 1112.2943115234375, |
|
"learning_rate": 3.381818181818182e-05, |
|
"loss": 160.8671, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 0.06771224719010335, |
|
"grad_norm": 1163.462646484375, |
|
"learning_rate": 3.385858585858586e-05, |
|
"loss": 147.4451, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 0.06779304939438748, |
|
"grad_norm": 925.3172607421875, |
|
"learning_rate": 3.38989898989899e-05, |
|
"loss": 175.2849, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 0.06787385159867161, |
|
"grad_norm": 1990.568359375, |
|
"learning_rate": 3.3939393939393945e-05, |
|
"loss": 199.808, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.06795465380295575, |
|
"grad_norm": 2128.471923828125, |
|
"learning_rate": 3.3979797979797984e-05, |
|
"loss": 205.3155, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 0.06803545600723988, |
|
"grad_norm": 2691.37353515625, |
|
"learning_rate": 3.402020202020202e-05, |
|
"loss": 216.1895, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 0.068116258211524, |
|
"grad_norm": 3363.869140625, |
|
"learning_rate": 3.406060606060606e-05, |
|
"loss": 201.8374, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 0.06819706041580814, |
|
"grad_norm": 1438.0633544921875, |
|
"learning_rate": 3.41010101010101e-05, |
|
"loss": 222.5396, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 0.06827786262009228, |
|
"grad_norm": 1703.8653564453125, |
|
"learning_rate": 3.414141414141414e-05, |
|
"loss": 213.8756, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.06835866482437641, |
|
"grad_norm": 1938.7177734375, |
|
"learning_rate": 3.4181818181818185e-05, |
|
"loss": 215.6468, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 0.06843946702866054, |
|
"grad_norm": 851.2493896484375, |
|
"learning_rate": 3.4222222222222224e-05, |
|
"loss": 182.1204, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 0.06852026923294467, |
|
"grad_norm": 1202.3365478515625, |
|
"learning_rate": 3.426262626262626e-05, |
|
"loss": 183.2559, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 0.06860107143722881, |
|
"grad_norm": 1543.7257080078125, |
|
"learning_rate": 3.43030303030303e-05, |
|
"loss": 239.1145, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 0.06868187364151294, |
|
"grad_norm": 748.701171875, |
|
"learning_rate": 3.434343434343435e-05, |
|
"loss": 285.6954, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.06876267584579707, |
|
"grad_norm": 5747.30224609375, |
|
"learning_rate": 3.4383838383838386e-05, |
|
"loss": 194.7742, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 0.0688434780500812, |
|
"grad_norm": 1587.478271484375, |
|
"learning_rate": 3.4424242424242425e-05, |
|
"loss": 213.0056, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 0.06892428025436534, |
|
"grad_norm": 907.9869995117188, |
|
"learning_rate": 3.4464646464646463e-05, |
|
"loss": 183.2854, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 0.06900508245864947, |
|
"grad_norm": 1065.7462158203125, |
|
"learning_rate": 3.450505050505051e-05, |
|
"loss": 147.3901, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 0.06908588466293361, |
|
"grad_norm": 1654.4375, |
|
"learning_rate": 3.454545454545455e-05, |
|
"loss": 152.4811, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.06916668686721773, |
|
"grad_norm": 1075.144775390625, |
|
"learning_rate": 3.458585858585859e-05, |
|
"loss": 217.3591, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 0.06924748907150187, |
|
"grad_norm": 1216.4287109375, |
|
"learning_rate": 3.4626262626262626e-05, |
|
"loss": 184.7878, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 0.069328291275786, |
|
"grad_norm": 1143.3253173828125, |
|
"learning_rate": 3.466666666666667e-05, |
|
"loss": 185.8244, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 0.06940909348007014, |
|
"grad_norm": 2943.891357421875, |
|
"learning_rate": 3.470707070707071e-05, |
|
"loss": 203.6546, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 0.06948989568435426, |
|
"grad_norm": 562.7566528320312, |
|
"learning_rate": 3.474747474747475e-05, |
|
"loss": 224.7354, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.0695706978886384, |
|
"grad_norm": 1995.734130859375, |
|
"learning_rate": 3.4787878787878795e-05, |
|
"loss": 178.8969, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 0.06965150009292254, |
|
"grad_norm": 1593.8944091796875, |
|
"learning_rate": 3.4828282828282834e-05, |
|
"loss": 164.9677, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 0.06973230229720667, |
|
"grad_norm": 715.6300048828125, |
|
"learning_rate": 3.486868686868687e-05, |
|
"loss": 147.6377, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 0.0698131045014908, |
|
"grad_norm": 3263.21044921875, |
|
"learning_rate": 3.490909090909091e-05, |
|
"loss": 235.9829, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 0.06989390670577493, |
|
"grad_norm": 3928.5576171875, |
|
"learning_rate": 3.494949494949495e-05, |
|
"loss": 220.889, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.06997470891005907, |
|
"grad_norm": 1176.6265869140625, |
|
"learning_rate": 3.498989898989899e-05, |
|
"loss": 173.9785, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 0.0700555111143432, |
|
"grad_norm": 1472.33349609375, |
|
"learning_rate": 3.503030303030303e-05, |
|
"loss": 166.185, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 0.07013631331862734, |
|
"grad_norm": 943.4843139648438, |
|
"learning_rate": 3.5070707070707073e-05, |
|
"loss": 196.9754, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 0.07021711552291146, |
|
"grad_norm": 1376.169189453125, |
|
"learning_rate": 3.511111111111111e-05, |
|
"loss": 249.9044, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 0.0702979177271956, |
|
"grad_norm": 862.705078125, |
|
"learning_rate": 3.515151515151515e-05, |
|
"loss": 134.6473, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.07037871993147973, |
|
"grad_norm": 1661.3258056640625, |
|
"learning_rate": 3.519191919191919e-05, |
|
"loss": 260.4335, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 0.07045952213576387, |
|
"grad_norm": 858.2864379882812, |
|
"learning_rate": 3.5232323232323236e-05, |
|
"loss": 156.1466, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 0.07054032434004799, |
|
"grad_norm": 1033.8033447265625, |
|
"learning_rate": 3.5272727272727274e-05, |
|
"loss": 158.8132, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 0.07062112654433213, |
|
"grad_norm": 2244.4833984375, |
|
"learning_rate": 3.531313131313131e-05, |
|
"loss": 185.2664, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 0.07070192874861626, |
|
"grad_norm": 828.0194091796875, |
|
"learning_rate": 3.535353535353535e-05, |
|
"loss": 189.655, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.0707827309529004, |
|
"grad_norm": 764.8339233398438, |
|
"learning_rate": 3.53939393939394e-05, |
|
"loss": 169.7833, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 0.07086353315718452, |
|
"grad_norm": 1434.6533203125, |
|
"learning_rate": 3.543434343434344e-05, |
|
"loss": 169.9032, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 0.07094433536146866, |
|
"grad_norm": 1811.5740966796875, |
|
"learning_rate": 3.5474747474747475e-05, |
|
"loss": 256.6501, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 0.0710251375657528, |
|
"grad_norm": 923.5958251953125, |
|
"learning_rate": 3.551515151515152e-05, |
|
"loss": 157.0084, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 0.07110593977003693, |
|
"grad_norm": 1671.8385009765625, |
|
"learning_rate": 3.555555555555556e-05, |
|
"loss": 206.8505, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.07118674197432107, |
|
"grad_norm": 2508.17626953125, |
|
"learning_rate": 3.55959595959596e-05, |
|
"loss": 204.866, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 0.07126754417860519, |
|
"grad_norm": 852.1519775390625, |
|
"learning_rate": 3.563636363636364e-05, |
|
"loss": 147.0554, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 0.07134834638288932, |
|
"grad_norm": 925.072021484375, |
|
"learning_rate": 3.567676767676768e-05, |
|
"loss": 205.7363, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 0.07142914858717346, |
|
"grad_norm": 1310.513916015625, |
|
"learning_rate": 3.571717171717172e-05, |
|
"loss": 252.9427, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 0.0715099507914576, |
|
"grad_norm": 1795.476806640625, |
|
"learning_rate": 3.575757575757576e-05, |
|
"loss": 184.422, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.07159075299574172, |
|
"grad_norm": 1071.3101806640625, |
|
"learning_rate": 3.57979797979798e-05, |
|
"loss": 165.8845, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 0.07167155520002585, |
|
"grad_norm": 724.8527221679688, |
|
"learning_rate": 3.583838383838384e-05, |
|
"loss": 180.9637, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 0.07175235740430999, |
|
"grad_norm": 999.9872436523438, |
|
"learning_rate": 3.587878787878788e-05, |
|
"loss": 186.5519, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 0.07183315960859413, |
|
"grad_norm": 1380.1075439453125, |
|
"learning_rate": 3.5919191919191916e-05, |
|
"loss": 224.0065, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 0.07191396181287825, |
|
"grad_norm": 1093.9498291015625, |
|
"learning_rate": 3.595959595959596e-05, |
|
"loss": 165.8288, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.07199476401716239, |
|
"grad_norm": 2711.7353515625, |
|
"learning_rate": 3.6e-05, |
|
"loss": 163.5886, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 0.07207556622144652, |
|
"grad_norm": 1537.253662109375, |
|
"learning_rate": 3.604040404040404e-05, |
|
"loss": 165.1541, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 0.07215636842573066, |
|
"grad_norm": 1100.548095703125, |
|
"learning_rate": 3.608080808080808e-05, |
|
"loss": 169.8857, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 0.0722371706300148, |
|
"grad_norm": 1088.0587158203125, |
|
"learning_rate": 3.6121212121212124e-05, |
|
"loss": 186.5488, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 0.07231797283429892, |
|
"grad_norm": 1160.9769287109375, |
|
"learning_rate": 3.616161616161616e-05, |
|
"loss": 190.4372, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.07239877503858305, |
|
"grad_norm": 1117.179443359375, |
|
"learning_rate": 3.62020202020202e-05, |
|
"loss": 134.2678, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 0.07247957724286719, |
|
"grad_norm": 1956.5089111328125, |
|
"learning_rate": 3.624242424242425e-05, |
|
"loss": 172.5972, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 0.07256037944715132, |
|
"grad_norm": 2694.300537109375, |
|
"learning_rate": 3.6282828282828286e-05, |
|
"loss": 200.7171, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 0.07264118165143545, |
|
"grad_norm": 1112.63720703125, |
|
"learning_rate": 3.6323232323232325e-05, |
|
"loss": 151.1802, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 0.07272198385571958, |
|
"grad_norm": 2176.663330078125, |
|
"learning_rate": 3.6363636363636364e-05, |
|
"loss": 162.1838, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.07280278606000372, |
|
"grad_norm": 1126.674072265625, |
|
"learning_rate": 3.640404040404041e-05, |
|
"loss": 180.7603, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 0.07288358826428785, |
|
"grad_norm": 1244.1241455078125, |
|
"learning_rate": 3.644444444444445e-05, |
|
"loss": 114.9891, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 0.07296439046857198, |
|
"grad_norm": 2782.807373046875, |
|
"learning_rate": 3.648484848484849e-05, |
|
"loss": 199.3326, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 0.07304519267285611, |
|
"grad_norm": 1218.721435546875, |
|
"learning_rate": 3.6525252525252526e-05, |
|
"loss": 143.516, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 0.07312599487714025, |
|
"grad_norm": 799.7451782226562, |
|
"learning_rate": 3.656565656565657e-05, |
|
"loss": 131.4932, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.07320679708142439, |
|
"grad_norm": 547.2342529296875, |
|
"learning_rate": 3.660606060606061e-05, |
|
"loss": 151.0718, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 0.07328759928570851, |
|
"grad_norm": 1480.14501953125, |
|
"learning_rate": 3.664646464646464e-05, |
|
"loss": 201.2714, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 0.07336840148999264, |
|
"grad_norm": 516.29931640625, |
|
"learning_rate": 3.668686868686869e-05, |
|
"loss": 174.2223, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 0.07344920369427678, |
|
"grad_norm": 912.0347900390625, |
|
"learning_rate": 3.672727272727273e-05, |
|
"loss": 180.7495, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 0.07353000589856092, |
|
"grad_norm": 1243.236083984375, |
|
"learning_rate": 3.6767676767676766e-05, |
|
"loss": 189.5693, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.07361080810284505, |
|
"grad_norm": 742.5632934570312, |
|
"learning_rate": 3.6808080808080805e-05, |
|
"loss": 194.6184, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 0.07369161030712917, |
|
"grad_norm": 1145.2069091796875, |
|
"learning_rate": 3.684848484848485e-05, |
|
"loss": 185.7565, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 0.07377241251141331, |
|
"grad_norm": 1086.181640625, |
|
"learning_rate": 3.688888888888889e-05, |
|
"loss": 167.5214, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 0.07385321471569745, |
|
"grad_norm": 1321.85400390625, |
|
"learning_rate": 3.692929292929293e-05, |
|
"loss": 221.075, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 0.07393401691998158, |
|
"grad_norm": 2168.907958984375, |
|
"learning_rate": 3.6969696969696974e-05, |
|
"loss": 219.8519, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.0740148191242657, |
|
"grad_norm": 1007.1217041015625, |
|
"learning_rate": 3.701010101010101e-05, |
|
"loss": 194.1429, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 0.07409562132854984, |
|
"grad_norm": 1099.997802734375, |
|
"learning_rate": 3.705050505050505e-05, |
|
"loss": 220.093, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 0.07417642353283398, |
|
"grad_norm": 745.4526977539062, |
|
"learning_rate": 3.709090909090909e-05, |
|
"loss": 143.6606, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 0.07425722573711811, |
|
"grad_norm": 2050.18212890625, |
|
"learning_rate": 3.7131313131313136e-05, |
|
"loss": 222.1462, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 0.07433802794140223, |
|
"grad_norm": 2366.43896484375, |
|
"learning_rate": 3.7171717171717175e-05, |
|
"loss": 191.5783, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.07441883014568637, |
|
"grad_norm": 905.9826049804688, |
|
"learning_rate": 3.7212121212121214e-05, |
|
"loss": 168.3594, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 0.07449963234997051, |
|
"grad_norm": 1320.197509765625, |
|
"learning_rate": 3.725252525252525e-05, |
|
"loss": 140.7596, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 0.07458043455425464, |
|
"grad_norm": 3290.6748046875, |
|
"learning_rate": 3.72929292929293e-05, |
|
"loss": 161.625, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 0.07466123675853878, |
|
"grad_norm": 1262.2984619140625, |
|
"learning_rate": 3.733333333333334e-05, |
|
"loss": 163.4713, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 0.0747420389628229, |
|
"grad_norm": 1434.2923583984375, |
|
"learning_rate": 3.7373737373737376e-05, |
|
"loss": 168.6003, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.07482284116710704, |
|
"grad_norm": 716.86083984375, |
|
"learning_rate": 3.7414141414141415e-05, |
|
"loss": 165.9515, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 0.07490364337139117, |
|
"grad_norm": 4190.2119140625, |
|
"learning_rate": 3.745454545454546e-05, |
|
"loss": 201.2678, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 0.07498444557567531, |
|
"grad_norm": 1093.3636474609375, |
|
"learning_rate": 3.74949494949495e-05, |
|
"loss": 145.2475, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 0.07506524777995943, |
|
"grad_norm": 1217.2969970703125, |
|
"learning_rate": 3.753535353535353e-05, |
|
"loss": 219.3316, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 0.07514604998424357, |
|
"grad_norm": 1122.0589599609375, |
|
"learning_rate": 3.757575757575758e-05, |
|
"loss": 201.7304, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.0752268521885277, |
|
"grad_norm": 779.33447265625, |
|
"learning_rate": 3.7616161616161616e-05, |
|
"loss": 199.0558, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 0.07530765439281184, |
|
"grad_norm": 1110.2554931640625, |
|
"learning_rate": 3.7656565656565655e-05, |
|
"loss": 190.4773, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 0.07538845659709596, |
|
"grad_norm": 1762.0330810546875, |
|
"learning_rate": 3.76969696969697e-05, |
|
"loss": 151.1484, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 0.0754692588013801, |
|
"grad_norm": 1020.6593627929688, |
|
"learning_rate": 3.773737373737374e-05, |
|
"loss": 169.2854, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 0.07555006100566423, |
|
"grad_norm": 1202.7464599609375, |
|
"learning_rate": 3.777777777777778e-05, |
|
"loss": 164.9156, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.07563086320994837, |
|
"grad_norm": 1249.380126953125, |
|
"learning_rate": 3.781818181818182e-05, |
|
"loss": 227.8868, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 0.0757116654142325, |
|
"grad_norm": 1847.7193603515625, |
|
"learning_rate": 3.785858585858586e-05, |
|
"loss": 176.2305, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 0.07579246761851663, |
|
"grad_norm": 1446.559814453125, |
|
"learning_rate": 3.78989898989899e-05, |
|
"loss": 147.3983, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 0.07587326982280077, |
|
"grad_norm": 1216.9862060546875, |
|
"learning_rate": 3.793939393939394e-05, |
|
"loss": 165.7989, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 0.0759540720270849, |
|
"grad_norm": 2848.1337890625, |
|
"learning_rate": 3.797979797979798e-05, |
|
"loss": 157.2326, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.07603487423136904, |
|
"grad_norm": 940.07177734375, |
|
"learning_rate": 3.8020202020202025e-05, |
|
"loss": 221.2202, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 0.07611567643565316, |
|
"grad_norm": 1160.6881103515625, |
|
"learning_rate": 3.8060606060606064e-05, |
|
"loss": 158.6488, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 0.0761964786399373, |
|
"grad_norm": 2879.822998046875, |
|
"learning_rate": 3.81010101010101e-05, |
|
"loss": 223.0843, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 0.07627728084422143, |
|
"grad_norm": 1498.8753662109375, |
|
"learning_rate": 3.814141414141414e-05, |
|
"loss": 161.5878, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 0.07635808304850557, |
|
"grad_norm": 1229.455078125, |
|
"learning_rate": 3.818181818181819e-05, |
|
"loss": 198.7026, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.07643888525278969, |
|
"grad_norm": 929.4089965820312, |
|
"learning_rate": 3.8222222222222226e-05, |
|
"loss": 251.8946, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 0.07651968745707383, |
|
"grad_norm": 934.8760375976562, |
|
"learning_rate": 3.8262626262626265e-05, |
|
"loss": 162.8998, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 0.07660048966135796, |
|
"grad_norm": 915.323974609375, |
|
"learning_rate": 3.830303030303031e-05, |
|
"loss": 171.6096, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 0.0766812918656421, |
|
"grad_norm": 1115.416748046875, |
|
"learning_rate": 3.834343434343435e-05, |
|
"loss": 163.5847, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 0.07676209406992623, |
|
"grad_norm": 2073.390869140625, |
|
"learning_rate": 3.838383838383838e-05, |
|
"loss": 261.5975, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.07684289627421036, |
|
"grad_norm": 1260.9718017578125, |
|
"learning_rate": 3.842424242424243e-05, |
|
"loss": 131.5614, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 0.07692369847849449, |
|
"grad_norm": 1239.3568115234375, |
|
"learning_rate": 3.8464646464646466e-05, |
|
"loss": 164.9494, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 0.07700450068277863, |
|
"grad_norm": 1221.8023681640625, |
|
"learning_rate": 3.8505050505050505e-05, |
|
"loss": 245.5353, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 0.07708530288706276, |
|
"grad_norm": 533.4956665039062, |
|
"learning_rate": 3.8545454545454544e-05, |
|
"loss": 149.1819, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 0.07716610509134689, |
|
"grad_norm": 1410.545166015625, |
|
"learning_rate": 3.858585858585859e-05, |
|
"loss": 172.351, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.07724690729563102, |
|
"grad_norm": 927.8252563476562, |
|
"learning_rate": 3.862626262626263e-05, |
|
"loss": 152.7654, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 0.07732770949991516, |
|
"grad_norm": 1244.0257568359375, |
|
"learning_rate": 3.866666666666667e-05, |
|
"loss": 171.8292, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 0.0774085117041993, |
|
"grad_norm": 664.2005615234375, |
|
"learning_rate": 3.8707070707070706e-05, |
|
"loss": 109.5876, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 0.07748931390848342, |
|
"grad_norm": 1334.42626953125, |
|
"learning_rate": 3.874747474747475e-05, |
|
"loss": 129.1926, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 0.07757011611276755, |
|
"grad_norm": 1236.3963623046875, |
|
"learning_rate": 3.878787878787879e-05, |
|
"loss": 195.8564, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.07765091831705169, |
|
"grad_norm": 517.2808227539062, |
|
"learning_rate": 3.882828282828283e-05, |
|
"loss": 213.7913, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 0.07773172052133583, |
|
"grad_norm": 1618.37890625, |
|
"learning_rate": 3.886868686868687e-05, |
|
"loss": 187.1561, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 0.07781252272561995, |
|
"grad_norm": 1312.72705078125, |
|
"learning_rate": 3.8909090909090914e-05, |
|
"loss": 192.0711, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 0.07789332492990408, |
|
"grad_norm": 970.0208129882812, |
|
"learning_rate": 3.894949494949495e-05, |
|
"loss": 150.2422, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 0.07797412713418822, |
|
"grad_norm": 1302.1982421875, |
|
"learning_rate": 3.898989898989899e-05, |
|
"loss": 149.0684, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.07805492933847236, |
|
"grad_norm": 3663.638427734375, |
|
"learning_rate": 3.903030303030304e-05, |
|
"loss": 197.8055, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 0.07813573154275649, |
|
"grad_norm": 1510.8233642578125, |
|
"learning_rate": 3.9070707070707076e-05, |
|
"loss": 217.5386, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 0.07821653374704061, |
|
"grad_norm": 4103.01904296875, |
|
"learning_rate": 3.9111111111111115e-05, |
|
"loss": 198.1855, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 0.07829733595132475, |
|
"grad_norm": 1483.061279296875, |
|
"learning_rate": 3.9151515151515153e-05, |
|
"loss": 200.777, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 0.07837813815560889, |
|
"grad_norm": 1666.9429931640625, |
|
"learning_rate": 3.91919191919192e-05, |
|
"loss": 233.7256, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.07845894035989302, |
|
"grad_norm": 1422.605224609375, |
|
"learning_rate": 3.923232323232323e-05, |
|
"loss": 198.8265, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 0.07853974256417715, |
|
"grad_norm": 955.90869140625, |
|
"learning_rate": 3.927272727272727e-05, |
|
"loss": 178.4951, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 0.07862054476846128, |
|
"grad_norm": 768.09228515625, |
|
"learning_rate": 3.9313131313131316e-05, |
|
"loss": 169.9865, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 0.07870134697274542, |
|
"grad_norm": 2241.572998046875, |
|
"learning_rate": 3.9353535353535355e-05, |
|
"loss": 171.3657, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 0.07878214917702955, |
|
"grad_norm": 828.8177490234375, |
|
"learning_rate": 3.939393939393939e-05, |
|
"loss": 212.0979, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.07886295138131368, |
|
"grad_norm": 1248.2691650390625, |
|
"learning_rate": 3.943434343434343e-05, |
|
"loss": 144.412, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 0.07894375358559781, |
|
"grad_norm": 1106.5013427734375, |
|
"learning_rate": 3.947474747474748e-05, |
|
"loss": 170.0778, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 0.07902455578988195, |
|
"grad_norm": 1183.6558837890625, |
|
"learning_rate": 3.951515151515152e-05, |
|
"loss": 183.0114, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 0.07910535799416608, |
|
"grad_norm": 790.3275146484375, |
|
"learning_rate": 3.9555555555555556e-05, |
|
"loss": 148.8441, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 0.07918616019845022, |
|
"grad_norm": 1040.2529296875, |
|
"learning_rate": 3.9595959595959594e-05, |
|
"loss": 181.9307, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.07926696240273434, |
|
"grad_norm": 1023.8417358398438, |
|
"learning_rate": 3.963636363636364e-05, |
|
"loss": 160.768, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 0.07934776460701848, |
|
"grad_norm": 1530.9327392578125, |
|
"learning_rate": 3.967676767676768e-05, |
|
"loss": 189.5177, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 0.07942856681130261, |
|
"grad_norm": 1020.0157470703125, |
|
"learning_rate": 3.971717171717172e-05, |
|
"loss": 175.2649, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 0.07950936901558675, |
|
"grad_norm": 1115.394287109375, |
|
"learning_rate": 3.975757575757576e-05, |
|
"loss": 180.9754, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 0.07959017121987087, |
|
"grad_norm": 596.3128051757812, |
|
"learning_rate": 3.97979797979798e-05, |
|
"loss": 190.0906, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.07967097342415501, |
|
"grad_norm": 1229.2056884765625, |
|
"learning_rate": 3.983838383838384e-05, |
|
"loss": 153.1082, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 0.07975177562843914, |
|
"grad_norm": 1460.8936767578125, |
|
"learning_rate": 3.987878787878788e-05, |
|
"loss": 176.2776, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 0.07983257783272328, |
|
"grad_norm": 4583.7373046875, |
|
"learning_rate": 3.9919191919191926e-05, |
|
"loss": 163.6398, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 0.0799133800370074, |
|
"grad_norm": 1115.4329833984375, |
|
"learning_rate": 3.9959595959595964e-05, |
|
"loss": 174.1616, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 0.07999418224129154, |
|
"grad_norm": 2389.8232421875, |
|
"learning_rate": 4e-05, |
|
"loss": 163.833, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.08007498444557568, |
|
"grad_norm": 2180.552978515625, |
|
"learning_rate": 4.004040404040404e-05, |
|
"loss": 189.1826, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 0.08015578664985981, |
|
"grad_norm": 2478.71533203125, |
|
"learning_rate": 4.008080808080809e-05, |
|
"loss": 262.7207, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 0.08023658885414395, |
|
"grad_norm": 952.7739868164062, |
|
"learning_rate": 4.012121212121212e-05, |
|
"loss": 152.5153, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 0.08031739105842807, |
|
"grad_norm": 1591.5555419921875, |
|
"learning_rate": 4.016161616161616e-05, |
|
"loss": 161.1086, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 0.0803981932627122, |
|
"grad_norm": 2502.85400390625, |
|
"learning_rate": 4.0202020202020204e-05, |
|
"loss": 180.6969, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.08047899546699634, |
|
"grad_norm": 1107.453857421875, |
|
"learning_rate": 4.024242424242424e-05, |
|
"loss": 245.7, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 0.08055979767128048, |
|
"grad_norm": 732.3422241210938, |
|
"learning_rate": 4.028282828282828e-05, |
|
"loss": 128.3301, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 0.0806405998755646, |
|
"grad_norm": 852.4077758789062, |
|
"learning_rate": 4.032323232323232e-05, |
|
"loss": 171.862, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 0.08072140207984874, |
|
"grad_norm": 974.1900634765625, |
|
"learning_rate": 4.0363636363636367e-05, |
|
"loss": 245.3339, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 0.08080220428413287, |
|
"grad_norm": 917.6868286132812, |
|
"learning_rate": 4.0404040404040405e-05, |
|
"loss": 151.893, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.08088300648841701, |
|
"grad_norm": 1009.7120971679688, |
|
"learning_rate": 4.0444444444444444e-05, |
|
"loss": 196.4933, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 0.08096380869270113, |
|
"grad_norm": 2075.980224609375, |
|
"learning_rate": 4.048484848484849e-05, |
|
"loss": 190.6988, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 0.08104461089698527, |
|
"grad_norm": 2236.189697265625, |
|
"learning_rate": 4.052525252525253e-05, |
|
"loss": 173.7965, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 0.0811254131012694, |
|
"grad_norm": 1387.155517578125, |
|
"learning_rate": 4.056565656565657e-05, |
|
"loss": 287.3005, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 0.08120621530555354, |
|
"grad_norm": 1775.0162353515625, |
|
"learning_rate": 4.0606060606060606e-05, |
|
"loss": 161.304, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 0.08128701750983768, |
|
"grad_norm": 2991.034423828125, |
|
"learning_rate": 4.064646464646465e-05, |
|
"loss": 183.5822, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 0.0813678197141218, |
|
"grad_norm": 2393.831298828125, |
|
"learning_rate": 4.068686868686869e-05, |
|
"loss": 216.6813, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 0.08144862191840593, |
|
"grad_norm": 753.2874755859375, |
|
"learning_rate": 4.072727272727273e-05, |
|
"loss": 176.5726, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 0.08152942412269007, |
|
"grad_norm": 917.6944580078125, |
|
"learning_rate": 4.076767676767677e-05, |
|
"loss": 182.8443, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 0.0816102263269742, |
|
"grad_norm": 1497.6976318359375, |
|
"learning_rate": 4.0808080808080814e-05, |
|
"loss": 179.7216, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.08169102853125833, |
|
"grad_norm": 896.7678833007812, |
|
"learning_rate": 4.084848484848485e-05, |
|
"loss": 171.1982, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 0.08177183073554246, |
|
"grad_norm": 2560.443115234375, |
|
"learning_rate": 4.088888888888889e-05, |
|
"loss": 242.9725, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 0.0818526329398266, |
|
"grad_norm": 914.347900390625, |
|
"learning_rate": 4.092929292929293e-05, |
|
"loss": 166.4831, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 0.08193343514411074, |
|
"grad_norm": 1105.142822265625, |
|
"learning_rate": 4.096969696969697e-05, |
|
"loss": 182.9737, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 0.08201423734839486, |
|
"grad_norm": 1321.1329345703125, |
|
"learning_rate": 4.101010101010101e-05, |
|
"loss": 152.9314, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 0.082095039552679, |
|
"grad_norm": 924.438720703125, |
|
"learning_rate": 4.105050505050505e-05, |
|
"loss": 187.9318, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 0.08217584175696313, |
|
"grad_norm": 834.27685546875, |
|
"learning_rate": 4.109090909090909e-05, |
|
"loss": 177.3875, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 0.08225664396124727, |
|
"grad_norm": 996.1378173828125, |
|
"learning_rate": 4.113131313131313e-05, |
|
"loss": 163.1871, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 0.08233744616553139, |
|
"grad_norm": 1657.6314697265625, |
|
"learning_rate": 4.117171717171717e-05, |
|
"loss": 195.5306, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 0.08241824836981552, |
|
"grad_norm": 1040.5526123046875, |
|
"learning_rate": 4.1212121212121216e-05, |
|
"loss": 179.7722, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.08249905057409966, |
|
"grad_norm": 1405.0408935546875, |
|
"learning_rate": 4.1252525252525255e-05, |
|
"loss": 177.9881, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 0.0825798527783838, |
|
"grad_norm": 1484.392333984375, |
|
"learning_rate": 4.1292929292929294e-05, |
|
"loss": 170.4384, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 0.08266065498266793, |
|
"grad_norm": 533.9537963867188, |
|
"learning_rate": 4.133333333333333e-05, |
|
"loss": 166.7928, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 0.08274145718695206, |
|
"grad_norm": 1133.5531005859375, |
|
"learning_rate": 4.137373737373738e-05, |
|
"loss": 185.8696, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 0.08282225939123619, |
|
"grad_norm": 1964.5546875, |
|
"learning_rate": 4.141414141414142e-05, |
|
"loss": 247.6705, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.08290306159552033, |
|
"grad_norm": 1816.8203125, |
|
"learning_rate": 4.1454545454545456e-05, |
|
"loss": 188.6205, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 0.08298386379980446, |
|
"grad_norm": 809.494873046875, |
|
"learning_rate": 4.1494949494949495e-05, |
|
"loss": 186.0191, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 0.08306466600408859, |
|
"grad_norm": 1444.1771240234375, |
|
"learning_rate": 4.153535353535354e-05, |
|
"loss": 171.1266, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 0.08314546820837272, |
|
"grad_norm": 1594.9212646484375, |
|
"learning_rate": 4.157575757575758e-05, |
|
"loss": 152.8558, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 0.08322627041265686, |
|
"grad_norm": 1367.26318359375, |
|
"learning_rate": 4.161616161616162e-05, |
|
"loss": 215.3903, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.083307072616941, |
|
"grad_norm": 1395.453857421875, |
|
"learning_rate": 4.165656565656566e-05, |
|
"loss": 219.8608, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 0.08338787482122512, |
|
"grad_norm": 823.7109375, |
|
"learning_rate": 4.16969696969697e-05, |
|
"loss": 198.1472, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 0.08346867702550925, |
|
"grad_norm": 4030.11083984375, |
|
"learning_rate": 4.173737373737374e-05, |
|
"loss": 191.6869, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 0.08354947922979339, |
|
"grad_norm": 1047.6395263671875, |
|
"learning_rate": 4.177777777777778e-05, |
|
"loss": 198.535, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 0.08363028143407752, |
|
"grad_norm": 1213.222412109375, |
|
"learning_rate": 4.181818181818182e-05, |
|
"loss": 209.4711, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 0.08371108363836166, |
|
"grad_norm": 862.3009643554688, |
|
"learning_rate": 4.185858585858586e-05, |
|
"loss": 197.0447, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 0.08379188584264578, |
|
"grad_norm": 7555.03271484375, |
|
"learning_rate": 4.18989898989899e-05, |
|
"loss": 257.4577, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 0.08387268804692992, |
|
"grad_norm": 1448.51806640625, |
|
"learning_rate": 4.193939393939394e-05, |
|
"loss": 145.9192, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 0.08395349025121406, |
|
"grad_norm": 563.4491577148438, |
|
"learning_rate": 4.197979797979798e-05, |
|
"loss": 185.379, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 0.08403429245549819, |
|
"grad_norm": 2999.943603515625, |
|
"learning_rate": 4.202020202020202e-05, |
|
"loss": 207.6808, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.08411509465978231, |
|
"grad_norm": 1272.3822021484375, |
|
"learning_rate": 4.206060606060606e-05, |
|
"loss": 203.2208, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 0.08419589686406645, |
|
"grad_norm": 1877.1287841796875, |
|
"learning_rate": 4.2101010101010105e-05, |
|
"loss": 208.5024, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 0.08427669906835059, |
|
"grad_norm": 880.4778442382812, |
|
"learning_rate": 4.2141414141414144e-05, |
|
"loss": 114.9268, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 0.08435750127263472, |
|
"grad_norm": 2282.030517578125, |
|
"learning_rate": 4.218181818181818e-05, |
|
"loss": 185.7249, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 0.08443830347691884, |
|
"grad_norm": 1241.139892578125, |
|
"learning_rate": 4.222222222222222e-05, |
|
"loss": 171.4185, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 0.08451910568120298, |
|
"grad_norm": 1106.116943359375, |
|
"learning_rate": 4.226262626262627e-05, |
|
"loss": 233.517, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 0.08459990788548712, |
|
"grad_norm": 1352.7723388671875, |
|
"learning_rate": 4.2303030303030306e-05, |
|
"loss": 164.0137, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 0.08468071008977125, |
|
"grad_norm": 824.0071411132812, |
|
"learning_rate": 4.2343434343434345e-05, |
|
"loss": 149.2571, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 0.08476151229405539, |
|
"grad_norm": 1494.287841796875, |
|
"learning_rate": 4.2383838383838384e-05, |
|
"loss": 158.6048, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 0.08484231449833951, |
|
"grad_norm": 928.8883056640625, |
|
"learning_rate": 4.242424242424243e-05, |
|
"loss": 160.3929, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.08492311670262365, |
|
"grad_norm": 3102.35791015625, |
|
"learning_rate": 4.246464646464647e-05, |
|
"loss": 262.7529, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 0.08500391890690778, |
|
"grad_norm": 753.5589599609375, |
|
"learning_rate": 4.250505050505051e-05, |
|
"loss": 147.7691, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 0.08508472111119192, |
|
"grad_norm": 1001.4116821289062, |
|
"learning_rate": 4.254545454545455e-05, |
|
"loss": 137.3231, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 0.08516552331547604, |
|
"grad_norm": 813.2144775390625, |
|
"learning_rate": 4.258585858585859e-05, |
|
"loss": 201.1259, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 0.08524632551976018, |
|
"grad_norm": 1098.597900390625, |
|
"learning_rate": 4.262626262626263e-05, |
|
"loss": 165.8937, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 0.08532712772404431, |
|
"grad_norm": 1298.8853759765625, |
|
"learning_rate": 4.266666666666667e-05, |
|
"loss": 198.1985, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 0.08540792992832845, |
|
"grad_norm": 1016.0570678710938, |
|
"learning_rate": 4.270707070707071e-05, |
|
"loss": 206.1271, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 0.08548873213261257, |
|
"grad_norm": 2057.573974609375, |
|
"learning_rate": 4.274747474747475e-05, |
|
"loss": 172.7861, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 0.08556953433689671, |
|
"grad_norm": 1261.774169921875, |
|
"learning_rate": 4.2787878787878786e-05, |
|
"loss": 149.1268, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 0.08565033654118084, |
|
"grad_norm": 1344.2037353515625, |
|
"learning_rate": 4.282828282828283e-05, |
|
"loss": 195.2029, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.08573113874546498, |
|
"grad_norm": 3034.935546875, |
|
"learning_rate": 4.286868686868687e-05, |
|
"loss": 222.7399, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 0.08581194094974912, |
|
"grad_norm": 970.0159912109375, |
|
"learning_rate": 4.290909090909091e-05, |
|
"loss": 168.5838, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 0.08589274315403324, |
|
"grad_norm": 953.5883178710938, |
|
"learning_rate": 4.294949494949495e-05, |
|
"loss": 162.8468, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 0.08597354535831737, |
|
"grad_norm": 2465.924072265625, |
|
"learning_rate": 4.2989898989898994e-05, |
|
"loss": 204.2057, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 0.08605434756260151, |
|
"grad_norm": 1194.1285400390625, |
|
"learning_rate": 4.303030303030303e-05, |
|
"loss": 197.6671, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 0.08613514976688565, |
|
"grad_norm": 881.1695556640625, |
|
"learning_rate": 4.307070707070707e-05, |
|
"loss": 163.8398, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 0.08621595197116977, |
|
"grad_norm": 3846.036376953125, |
|
"learning_rate": 4.311111111111111e-05, |
|
"loss": 178.4458, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 0.0862967541754539, |
|
"grad_norm": 1206.0906982421875, |
|
"learning_rate": 4.3151515151515156e-05, |
|
"loss": 153.2348, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 0.08637755637973804, |
|
"grad_norm": 898.4664306640625, |
|
"learning_rate": 4.3191919191919195e-05, |
|
"loss": 143.3541, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 0.08645835858402218, |
|
"grad_norm": 2398.2255859375, |
|
"learning_rate": 4.3232323232323234e-05, |
|
"loss": 212.5595, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.0865391607883063, |
|
"grad_norm": 1220.5733642578125, |
|
"learning_rate": 4.327272727272728e-05, |
|
"loss": 179.1572, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 0.08661996299259044, |
|
"grad_norm": 1974.5457763671875, |
|
"learning_rate": 4.331313131313132e-05, |
|
"loss": 190.0654, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 0.08670076519687457, |
|
"grad_norm": 2125.98583984375, |
|
"learning_rate": 4.335353535353536e-05, |
|
"loss": 168.3283, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 0.08678156740115871, |
|
"grad_norm": 1389.6546630859375, |
|
"learning_rate": 4.3393939393939396e-05, |
|
"loss": 159.5211, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 0.08686236960544283, |
|
"grad_norm": 807.5780029296875, |
|
"learning_rate": 4.343434343434344e-05, |
|
"loss": 192.4806, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 0.08694317180972697, |
|
"grad_norm": 1139.113037109375, |
|
"learning_rate": 4.347474747474748e-05, |
|
"loss": 155.3915, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 0.0870239740140111, |
|
"grad_norm": 1003.0131225585938, |
|
"learning_rate": 4.351515151515152e-05, |
|
"loss": 150.0795, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 0.08710477621829524, |
|
"grad_norm": 962.0811767578125, |
|
"learning_rate": 4.355555555555556e-05, |
|
"loss": 194.0887, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 0.08718557842257937, |
|
"grad_norm": 626.3510131835938, |
|
"learning_rate": 4.35959595959596e-05, |
|
"loss": 148.0051, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 0.0872663806268635, |
|
"grad_norm": 2664.0517578125, |
|
"learning_rate": 4.3636363636363636e-05, |
|
"loss": 199.106, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.08734718283114763, |
|
"grad_norm": 895.5698852539062, |
|
"learning_rate": 4.3676767676767674e-05, |
|
"loss": 172.3583, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 0.08742798503543177, |
|
"grad_norm": 1103.621826171875, |
|
"learning_rate": 4.371717171717172e-05, |
|
"loss": 203.0746, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 0.0875087872397159, |
|
"grad_norm": 1323.6517333984375, |
|
"learning_rate": 4.375757575757576e-05, |
|
"loss": 158.2676, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 0.08758958944400003, |
|
"grad_norm": 1170.48779296875, |
|
"learning_rate": 4.37979797979798e-05, |
|
"loss": 151.4179, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 0.08767039164828416, |
|
"grad_norm": 1171.2379150390625, |
|
"learning_rate": 4.383838383838384e-05, |
|
"loss": 181.4644, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.0877511938525683, |
|
"grad_norm": 1495.01025390625, |
|
"learning_rate": 4.387878787878788e-05, |
|
"loss": 167.6518, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 0.08783199605685243, |
|
"grad_norm": 1283.5498046875, |
|
"learning_rate": 4.391919191919192e-05, |
|
"loss": 138.4191, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 0.08791279826113656, |
|
"grad_norm": 1028.198974609375, |
|
"learning_rate": 4.395959595959596e-05, |
|
"loss": 167.4624, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 0.0879936004654207, |
|
"grad_norm": 958.3167114257812, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 187.0603, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 0.08807440266970483, |
|
"grad_norm": 1248.95556640625, |
|
"learning_rate": 4.4040404040404044e-05, |
|
"loss": 194.5838, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.08815520487398897, |
|
"grad_norm": 1088.8775634765625, |
|
"learning_rate": 4.408080808080808e-05, |
|
"loss": 137.5682, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 0.0882360070782731, |
|
"grad_norm": 1130.275146484375, |
|
"learning_rate": 4.412121212121212e-05, |
|
"loss": 180.1215, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 0.08831680928255722, |
|
"grad_norm": 1201.9453125, |
|
"learning_rate": 4.416161616161617e-05, |
|
"loss": 157.9624, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 0.08839761148684136, |
|
"grad_norm": 1291.0989990234375, |
|
"learning_rate": 4.420202020202021e-05, |
|
"loss": 159.1208, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 0.0884784136911255, |
|
"grad_norm": 946.185546875, |
|
"learning_rate": 4.4242424242424246e-05, |
|
"loss": 200.6266, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 0.08855921589540963, |
|
"grad_norm": 2330.45361328125, |
|
"learning_rate": 4.4282828282828284e-05, |
|
"loss": 163.2997, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 0.08864001809969375, |
|
"grad_norm": 959.3818359375, |
|
"learning_rate": 4.432323232323233e-05, |
|
"loss": 200.18, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 0.08872082030397789, |
|
"grad_norm": 1215.0078125, |
|
"learning_rate": 4.436363636363637e-05, |
|
"loss": 194.7184, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 0.08880162250826203, |
|
"grad_norm": 731.89501953125, |
|
"learning_rate": 4.44040404040404e-05, |
|
"loss": 183.9504, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 0.08888242471254616, |
|
"grad_norm": 751.2623291015625, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 164.3441, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.08896322691683028, |
|
"grad_norm": 1424.0635986328125, |
|
"learning_rate": 4.4484848484848485e-05, |
|
"loss": 202.2657, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 0.08904402912111442, |
|
"grad_norm": 1039.608642578125, |
|
"learning_rate": 4.4525252525252524e-05, |
|
"loss": 177.8795, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 0.08912483132539856, |
|
"grad_norm": 1338.993408203125, |
|
"learning_rate": 4.456565656565656e-05, |
|
"loss": 167.3348, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 0.08920563352968269, |
|
"grad_norm": 2591.8984375, |
|
"learning_rate": 4.460606060606061e-05, |
|
"loss": 165.2155, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 0.08928643573396683, |
|
"grad_norm": 931.5535888671875, |
|
"learning_rate": 4.464646464646465e-05, |
|
"loss": 198.5493, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 0.08936723793825095, |
|
"grad_norm": 1028.25927734375, |
|
"learning_rate": 4.4686868686868686e-05, |
|
"loss": 169.1925, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 0.08944804014253509, |
|
"grad_norm": 1156.0810546875, |
|
"learning_rate": 4.472727272727273e-05, |
|
"loss": 172.742, |
|
"step": 11070 |
|
}, |
|
{ |
|
"epoch": 0.08952884234681922, |
|
"grad_norm": 1409.868408203125, |
|
"learning_rate": 4.476767676767677e-05, |
|
"loss": 152.6085, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 0.08960964455110336, |
|
"grad_norm": 1091.4266357421875, |
|
"learning_rate": 4.480808080808081e-05, |
|
"loss": 149.4833, |
|
"step": 11090 |
|
}, |
|
{ |
|
"epoch": 0.08969044675538748, |
|
"grad_norm": 1274.9849853515625, |
|
"learning_rate": 4.484848484848485e-05, |
|
"loss": 239.724, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.08977124895967162, |
|
"grad_norm": 2974.341552734375, |
|
"learning_rate": 4.4888888888888894e-05, |
|
"loss": 184.0551, |
|
"step": 11110 |
|
}, |
|
{ |
|
"epoch": 0.08985205116395575, |
|
"grad_norm": 893.275390625, |
|
"learning_rate": 4.492929292929293e-05, |
|
"loss": 163.8494, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 0.08993285336823989, |
|
"grad_norm": 1236.2047119140625, |
|
"learning_rate": 4.496969696969697e-05, |
|
"loss": 223.2573, |
|
"step": 11130 |
|
}, |
|
{ |
|
"epoch": 0.09001365557252401, |
|
"grad_norm": 560.5221557617188, |
|
"learning_rate": 4.501010101010101e-05, |
|
"loss": 143.0442, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 0.09009445777680815, |
|
"grad_norm": 1671.4537353515625, |
|
"learning_rate": 4.5050505050505056e-05, |
|
"loss": 230.4551, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 0.09017525998109228, |
|
"grad_norm": 2295.0419921875, |
|
"learning_rate": 4.5090909090909095e-05, |
|
"loss": 191.5067, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 0.09025606218537642, |
|
"grad_norm": 1291.7230224609375, |
|
"learning_rate": 4.5131313131313134e-05, |
|
"loss": 165.677, |
|
"step": 11170 |
|
}, |
|
{ |
|
"epoch": 0.09033686438966056, |
|
"grad_norm": 817.165771484375, |
|
"learning_rate": 4.517171717171717e-05, |
|
"loss": 145.0678, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 0.09041766659394468, |
|
"grad_norm": 938.4746704101562, |
|
"learning_rate": 4.521212121212122e-05, |
|
"loss": 168.7315, |
|
"step": 11190 |
|
}, |
|
{ |
|
"epoch": 0.09049846879822881, |
|
"grad_norm": 820.7261352539062, |
|
"learning_rate": 4.525252525252526e-05, |
|
"loss": 174.4509, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.09057927100251295, |
|
"grad_norm": 899.671875, |
|
"learning_rate": 4.529292929292929e-05, |
|
"loss": 153.883, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 0.09066007320679709, |
|
"grad_norm": 2744.694091796875, |
|
"learning_rate": 4.5333333333333335e-05, |
|
"loss": 152.8033, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 0.09074087541108121, |
|
"grad_norm": 1821.427734375, |
|
"learning_rate": 4.5373737373737374e-05, |
|
"loss": 203.8027, |
|
"step": 11230 |
|
}, |
|
{ |
|
"epoch": 0.09082167761536535, |
|
"grad_norm": 937.9207763671875, |
|
"learning_rate": 4.541414141414141e-05, |
|
"loss": 183.0782, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 0.09090247981964948, |
|
"grad_norm": 1872.185546875, |
|
"learning_rate": 4.545454545454546e-05, |
|
"loss": 153.9125, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 0.09098328202393362, |
|
"grad_norm": 772.832275390625, |
|
"learning_rate": 4.54949494949495e-05, |
|
"loss": 143.9607, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 0.09106408422821774, |
|
"grad_norm": 2842.652587890625, |
|
"learning_rate": 4.5535353535353536e-05, |
|
"loss": 189.6786, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 0.09114488643250188, |
|
"grad_norm": 1738.2589111328125, |
|
"learning_rate": 4.5575757575757575e-05, |
|
"loss": 163.1939, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 0.09122568863678601, |
|
"grad_norm": 1521.9814453125, |
|
"learning_rate": 4.561616161616162e-05, |
|
"loss": 208.5482, |
|
"step": 11290 |
|
}, |
|
{ |
|
"epoch": 0.09130649084107015, |
|
"grad_norm": 1132.692138671875, |
|
"learning_rate": 4.565656565656566e-05, |
|
"loss": 161.8789, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.09138729304535427, |
|
"grad_norm": 1395.38671875, |
|
"learning_rate": 4.56969696969697e-05, |
|
"loss": 164.5156, |
|
"step": 11310 |
|
}, |
|
{ |
|
"epoch": 0.0914680952496384, |
|
"grad_norm": 661.7669067382812, |
|
"learning_rate": 4.573737373737374e-05, |
|
"loss": 163.1058, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 0.09154889745392254, |
|
"grad_norm": 1353.42578125, |
|
"learning_rate": 4.577777777777778e-05, |
|
"loss": 178.0265, |
|
"step": 11330 |
|
}, |
|
{ |
|
"epoch": 0.09162969965820668, |
|
"grad_norm": 758.9215087890625, |
|
"learning_rate": 4.581818181818182e-05, |
|
"loss": 186.8539, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 0.09171050186249081, |
|
"grad_norm": 926.5440673828125, |
|
"learning_rate": 4.585858585858586e-05, |
|
"loss": 174.9289, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 0.09179130406677494, |
|
"grad_norm": 1032.7493896484375, |
|
"learning_rate": 4.58989898989899e-05, |
|
"loss": 157.8875, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 0.09187210627105907, |
|
"grad_norm": 1104.685302734375, |
|
"learning_rate": 4.5939393939393945e-05, |
|
"loss": 180.8059, |
|
"step": 11370 |
|
}, |
|
{ |
|
"epoch": 0.09195290847534321, |
|
"grad_norm": 807.3258056640625, |
|
"learning_rate": 4.5979797979797984e-05, |
|
"loss": 133.7691, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 0.09203371067962735, |
|
"grad_norm": 1296.3505859375, |
|
"learning_rate": 4.602020202020202e-05, |
|
"loss": 168.8188, |
|
"step": 11390 |
|
}, |
|
{ |
|
"epoch": 0.09211451288391147, |
|
"grad_norm": 717.891357421875, |
|
"learning_rate": 4.606060606060607e-05, |
|
"loss": 143.9607, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.0921953150881956, |
|
"grad_norm": 841.2793579101562, |
|
"learning_rate": 4.610101010101011e-05, |
|
"loss": 185.6871, |
|
"step": 11410 |
|
}, |
|
{ |
|
"epoch": 0.09227611729247974, |
|
"grad_norm": 1319.2064208984375, |
|
"learning_rate": 4.614141414141414e-05, |
|
"loss": 163.8516, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 0.09235691949676388, |
|
"grad_norm": 1566.4168701171875, |
|
"learning_rate": 4.618181818181818e-05, |
|
"loss": 165.3434, |
|
"step": 11430 |
|
}, |
|
{ |
|
"epoch": 0.092437721701048, |
|
"grad_norm": 919.2448120117188, |
|
"learning_rate": 4.6222222222222224e-05, |
|
"loss": 133.6576, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 0.09251852390533213, |
|
"grad_norm": 976.5399780273438, |
|
"learning_rate": 4.626262626262626e-05, |
|
"loss": 172.4401, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 0.09259932610961627, |
|
"grad_norm": 1343.291015625, |
|
"learning_rate": 4.63030303030303e-05, |
|
"loss": 203.6782, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 0.0926801283139004, |
|
"grad_norm": 839.6242065429688, |
|
"learning_rate": 4.634343434343435e-05, |
|
"loss": 175.6062, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 0.09276093051818454, |
|
"grad_norm": 909.182861328125, |
|
"learning_rate": 4.6383838383838386e-05, |
|
"loss": 159.7895, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 0.09284173272246866, |
|
"grad_norm": 2260.60107421875, |
|
"learning_rate": 4.6424242424242425e-05, |
|
"loss": 168.3035, |
|
"step": 11490 |
|
}, |
|
{ |
|
"epoch": 0.0929225349267528, |
|
"grad_norm": 1016.5995483398438, |
|
"learning_rate": 4.6464646464646464e-05, |
|
"loss": 149.6134, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.09300333713103694, |
|
"grad_norm": 1180.2608642578125, |
|
"learning_rate": 4.650505050505051e-05, |
|
"loss": 123.0571, |
|
"step": 11510 |
|
}, |
|
{ |
|
"epoch": 0.09308413933532107, |
|
"grad_norm": 1054.5244140625, |
|
"learning_rate": 4.654545454545455e-05, |
|
"loss": 180.0858, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 0.0931649415396052, |
|
"grad_norm": 1064.1981201171875, |
|
"learning_rate": 4.658585858585859e-05, |
|
"loss": 305.0233, |
|
"step": 11530 |
|
}, |
|
{ |
|
"epoch": 0.09324574374388933, |
|
"grad_norm": 1455.9857177734375, |
|
"learning_rate": 4.6626262626262626e-05, |
|
"loss": 174.1713, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 0.09332654594817347, |
|
"grad_norm": 801.254150390625, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 175.7632, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 0.0934073481524576, |
|
"grad_norm": 811.2909545898438, |
|
"learning_rate": 4.670707070707071e-05, |
|
"loss": 152.5526, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 0.09348815035674173, |
|
"grad_norm": 860.8350830078125, |
|
"learning_rate": 4.674747474747475e-05, |
|
"loss": 159.9338, |
|
"step": 11570 |
|
}, |
|
{ |
|
"epoch": 0.09356895256102586, |
|
"grad_norm": 1820.26318359375, |
|
"learning_rate": 4.6787878787878795e-05, |
|
"loss": 205.222, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 0.09364975476531, |
|
"grad_norm": 1027.9521484375, |
|
"learning_rate": 4.6828282828282834e-05, |
|
"loss": 167.9507, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 0.09373055696959413, |
|
"grad_norm": 3551.64599609375, |
|
"learning_rate": 4.686868686868687e-05, |
|
"loss": 194.2529, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.09381135917387827, |
|
"grad_norm": 957.3357543945312, |
|
"learning_rate": 4.690909090909091e-05, |
|
"loss": 185.5651, |
|
"step": 11610 |
|
}, |
|
{ |
|
"epoch": 0.09389216137816239, |
|
"grad_norm": 587.98828125, |
|
"learning_rate": 4.694949494949496e-05, |
|
"loss": 208.2654, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 0.09397296358244653, |
|
"grad_norm": 824.1953735351562, |
|
"learning_rate": 4.698989898989899e-05, |
|
"loss": 118.9412, |
|
"step": 11630 |
|
}, |
|
{ |
|
"epoch": 0.09405376578673066, |
|
"grad_norm": 997.8128051757812, |
|
"learning_rate": 4.703030303030303e-05, |
|
"loss": 144.5133, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 0.0941345679910148, |
|
"grad_norm": 825.6588745117188, |
|
"learning_rate": 4.7070707070707074e-05, |
|
"loss": 130.3822, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 0.09421537019529892, |
|
"grad_norm": 1590.2271728515625, |
|
"learning_rate": 4.711111111111111e-05, |
|
"loss": 147.783, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 0.09429617239958306, |
|
"grad_norm": 831.8695068359375, |
|
"learning_rate": 4.715151515151515e-05, |
|
"loss": 150.0897, |
|
"step": 11670 |
|
}, |
|
{ |
|
"epoch": 0.0943769746038672, |
|
"grad_norm": 879.4678955078125, |
|
"learning_rate": 4.719191919191919e-05, |
|
"loss": 134.1156, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 0.09445777680815133, |
|
"grad_norm": 1955.2484130859375, |
|
"learning_rate": 4.7232323232323236e-05, |
|
"loss": 155.8984, |
|
"step": 11690 |
|
}, |
|
{ |
|
"epoch": 0.09453857901243545, |
|
"grad_norm": 1074.5509033203125, |
|
"learning_rate": 4.7272727272727275e-05, |
|
"loss": 186.7514, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.09461938121671959, |
|
"grad_norm": 992.748046875, |
|
"learning_rate": 4.7313131313131314e-05, |
|
"loss": 155.1787, |
|
"step": 11710 |
|
}, |
|
{ |
|
"epoch": 0.09470018342100373, |
|
"grad_norm": 990.5753784179688, |
|
"learning_rate": 4.735353535353535e-05, |
|
"loss": 140.0309, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 0.09478098562528786, |
|
"grad_norm": 792.1434936523438, |
|
"learning_rate": 4.73939393939394e-05, |
|
"loss": 138.4607, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 0.094861787829572, |
|
"grad_norm": 1121.582763671875, |
|
"learning_rate": 4.743434343434344e-05, |
|
"loss": 121.2844, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 0.09494259003385612, |
|
"grad_norm": 1351.6878662109375, |
|
"learning_rate": 4.7474747474747476e-05, |
|
"loss": 198.7702, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 0.09502339223814026, |
|
"grad_norm": 2031.31494140625, |
|
"learning_rate": 4.751515151515152e-05, |
|
"loss": 164.1413, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 0.09510419444242439, |
|
"grad_norm": 761.006103515625, |
|
"learning_rate": 4.755555555555556e-05, |
|
"loss": 143.7779, |
|
"step": 11770 |
|
}, |
|
{ |
|
"epoch": 0.09518499664670853, |
|
"grad_norm": 1228.6676025390625, |
|
"learning_rate": 4.75959595959596e-05, |
|
"loss": 177.4899, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 0.09526579885099265, |
|
"grad_norm": 817.8377075195312, |
|
"learning_rate": 4.763636363636364e-05, |
|
"loss": 158.3827, |
|
"step": 11790 |
|
}, |
|
{ |
|
"epoch": 0.09534660105527679, |
|
"grad_norm": 1481.2501220703125, |
|
"learning_rate": 4.7676767676767684e-05, |
|
"loss": 144.1627, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.09542740325956092, |
|
"grad_norm": 899.5111083984375, |
|
"learning_rate": 4.771717171717172e-05, |
|
"loss": 188.1043, |
|
"step": 11810 |
|
}, |
|
{ |
|
"epoch": 0.09550820546384506, |
|
"grad_norm": 766.15869140625, |
|
"learning_rate": 4.775757575757576e-05, |
|
"loss": 190.8986, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 0.09558900766812918, |
|
"grad_norm": 818.2703857421875, |
|
"learning_rate": 4.77979797979798e-05, |
|
"loss": 226.6272, |
|
"step": 11830 |
|
}, |
|
{ |
|
"epoch": 0.09566980987241332, |
|
"grad_norm": 1306.8607177734375, |
|
"learning_rate": 4.7838383838383846e-05, |
|
"loss": 179.7575, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 0.09575061207669745, |
|
"grad_norm": 1371.1048583984375, |
|
"learning_rate": 4.787878787878788e-05, |
|
"loss": 184.5432, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 0.09583141428098159, |
|
"grad_norm": 1219.8428955078125, |
|
"learning_rate": 4.791919191919192e-05, |
|
"loss": 256.0484, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 0.09591221648526572, |
|
"grad_norm": 1769.408935546875, |
|
"learning_rate": 4.795959595959596e-05, |
|
"loss": 216.8512, |
|
"step": 11870 |
|
}, |
|
{ |
|
"epoch": 0.09599301868954985, |
|
"grad_norm": 1021.0985107421875, |
|
"learning_rate": 4.8e-05, |
|
"loss": 150.8418, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 0.09607382089383398, |
|
"grad_norm": 789.3172607421875, |
|
"learning_rate": 4.804040404040404e-05, |
|
"loss": 168.713, |
|
"step": 11890 |
|
}, |
|
{ |
|
"epoch": 0.09615462309811812, |
|
"grad_norm": 1145.4168701171875, |
|
"learning_rate": 4.808080808080808e-05, |
|
"loss": 248.8971, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.09623542530240226, |
|
"grad_norm": 1330.3175048828125, |
|
"learning_rate": 4.8121212121212125e-05, |
|
"loss": 143.9111, |
|
"step": 11910 |
|
}, |
|
{ |
|
"epoch": 0.09631622750668638, |
|
"grad_norm": 1145.8402099609375, |
|
"learning_rate": 4.8161616161616163e-05, |
|
"loss": 196.2298, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 0.09639702971097051, |
|
"grad_norm": 3719.409423828125, |
|
"learning_rate": 4.82020202020202e-05, |
|
"loss": 141.981, |
|
"step": 11930 |
|
}, |
|
{ |
|
"epoch": 0.09647783191525465, |
|
"grad_norm": 783.9396362304688, |
|
"learning_rate": 4.824242424242425e-05, |
|
"loss": 139.5111, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 0.09655863411953879, |
|
"grad_norm": 817.3587036132812, |
|
"learning_rate": 4.828282828282829e-05, |
|
"loss": 193.1281, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 0.09663943632382291, |
|
"grad_norm": 2150.12939453125, |
|
"learning_rate": 4.8323232323232326e-05, |
|
"loss": 173.1304, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 0.09672023852810704, |
|
"grad_norm": 1299.8162841796875, |
|
"learning_rate": 4.8363636363636364e-05, |
|
"loss": 181.7705, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 0.09680104073239118, |
|
"grad_norm": 936.7069091796875, |
|
"learning_rate": 4.840404040404041e-05, |
|
"loss": 173.4242, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 0.09688184293667532, |
|
"grad_norm": 613.501708984375, |
|
"learning_rate": 4.844444444444445e-05, |
|
"loss": 153.117, |
|
"step": 11990 |
|
}, |
|
{ |
|
"epoch": 0.09696264514095944, |
|
"grad_norm": 1406.6314697265625, |
|
"learning_rate": 4.848484848484849e-05, |
|
"loss": 185.4404, |
|
"step": 12000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 123750, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|