|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.387568555758683, |
|
"eval_steps": 500, |
|
"global_step": 12000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003656307129798903, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9978062157221207e-05, |
|
"loss": 0.0015, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.007312614259597806, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9956124314442413e-05, |
|
"loss": 0.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.010968921389396709, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9934186471663623e-05, |
|
"loss": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.014625228519195612, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.991224862888483e-05, |
|
"loss": 0.0001, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.018281535648994516, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9890310786106035e-05, |
|
"loss": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.021937842778793418, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9868372943327238e-05, |
|
"loss": 0.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.025594149908592323, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9846435100548447e-05, |
|
"loss": 0.0004, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.029250457038391225, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9824497257769653e-05, |
|
"loss": 0.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03290676416819013, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.980255941499086e-05, |
|
"loss": 0.0007, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03656307129798903, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9780621572212066e-05, |
|
"loss": 0.0042, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04021937842778794, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9758683729433275e-05, |
|
"loss": 0.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.043875685557586835, |
|
"grad_norm": 1.104537010192871, |
|
"learning_rate": 2.973674588665448e-05, |
|
"loss": 0.0002, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04753199268738574, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9714808043875687e-05, |
|
"loss": 0.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.051188299817184646, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9692870201096894e-05, |
|
"loss": 0.0, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.054844606946983544, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.96709323583181e-05, |
|
"loss": 0.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05850091407678245, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9648994515539306e-05, |
|
"loss": 0.0, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.062157221206581355, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9627056672760512e-05, |
|
"loss": 0.0, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.06581352833638025, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9605118829981718e-05, |
|
"loss": 0.0, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.06946983546617916, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9583180987202924e-05, |
|
"loss": 0.0, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.07312614259597806, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9561243144424134e-05, |
|
"loss": 0.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07678244972577697, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.953930530164534e-05, |
|
"loss": 0.0, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.08043875685557587, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9517367458866546e-05, |
|
"loss": 0.0, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.08409506398537477, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9495429616087752e-05, |
|
"loss": 0.0, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.08775137111517367, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.947349177330896e-05, |
|
"loss": 0.0, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.09140767824497258, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9451553930530168e-05, |
|
"loss": 0.0099, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.09506398537477148, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.942961608775137e-05, |
|
"loss": 0.0, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.09872029250457039, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9407678244972577e-05, |
|
"loss": 0.0, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.10237659963436929, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9385740402193783e-05, |
|
"loss": 0.0, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.10603290676416818, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9363802559414992e-05, |
|
"loss": 0.0, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.10968921389396709, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.93418647166362e-05, |
|
"loss": 0.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.113345521023766, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9319926873857405e-05, |
|
"loss": 0.0, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.1170018281535649, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.929798903107861e-05, |
|
"loss": 0.0001, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1206581352833638, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.927605118829982e-05, |
|
"loss": 0.0, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.12431444241316271, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9254113345521026e-05, |
|
"loss": 0.0, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.12797074954296161, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9232175502742233e-05, |
|
"loss": 0.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1316270566727605, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9210237659963435e-05, |
|
"loss": 0.0, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.13528336380255943, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.918829981718464e-05, |
|
"loss": 0.0, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.13893967093235832, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.916636197440585e-05, |
|
"loss": 0.0, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1425959780621572, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9144424131627057e-05, |
|
"loss": 0.0, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.14625228519195613, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9122486288848263e-05, |
|
"loss": 0.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.14990859232175502, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.910054844606947e-05, |
|
"loss": 0.0001, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.15356489945155394, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.907861060329068e-05, |
|
"loss": 0.0, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.15722120658135283, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9056672760511885e-05, |
|
"loss": 0.0, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.16087751371115175, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.903473491773309e-05, |
|
"loss": 0.0, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.16453382084095064, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9012797074954297e-05, |
|
"loss": 0.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.16819012797074953, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8990859232175504e-05, |
|
"loss": 0.0, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.17184643510054845, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.896892138939671e-05, |
|
"loss": 0.0, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.17550274223034734, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8946983546617916e-05, |
|
"loss": 0.0, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.17915904936014626, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8925045703839122e-05, |
|
"loss": 0.0, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.18281535648994515, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8903107861060328e-05, |
|
"loss": 0.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.18647166361974407, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8881170018281538e-05, |
|
"loss": 0.0, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.19012797074954296, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8859232175502744e-05, |
|
"loss": 0.0, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.19378427787934185, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.883729433272395e-05, |
|
"loss": 0.0, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.19744058500914077, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8815356489945156e-05, |
|
"loss": 0.0, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.20109689213893966, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8793418647166366e-05, |
|
"loss": 0.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.20475319926873858, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.877148080438757e-05, |
|
"loss": 0.0, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.20840950639853748, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8749542961608775e-05, |
|
"loss": 0.0, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.21206581352833637, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.872760511882998e-05, |
|
"loss": 0.0, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.21572212065813529, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.870566727605119e-05, |
|
"loss": 0.0, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.21937842778793418, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8683729433272396e-05, |
|
"loss": 0.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2230347349177331, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8661791590493602e-05, |
|
"loss": 0.0, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.226691042047532, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.863985374771481e-05, |
|
"loss": 0.0, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.2303473491773309, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8617915904936015e-05, |
|
"loss": 0.0, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.2340036563071298, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8595978062157224e-05, |
|
"loss": 0.0, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.2376599634369287, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.857404021937843e-05, |
|
"loss": 0.0002, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.2413162705667276, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8552102376599633e-05, |
|
"loss": 0.0, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.2449725776965265, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.853016453382084e-05, |
|
"loss": 0.0, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.24862888482632542, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.850822669104205e-05, |
|
"loss": 0.0, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.2522851919561243, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8486288848263255e-05, |
|
"loss": 0.0, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.25594149908592323, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.846435100548446e-05, |
|
"loss": 0.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.2595978062157221, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8442413162705667e-05, |
|
"loss": 0.0005, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.263254113345521, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8420475319926873e-05, |
|
"loss": 0.0, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.26691042047531993, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8398537477148083e-05, |
|
"loss": 0.0, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.27056672760511885, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.837659963436929e-05, |
|
"loss": 0.0073, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.2742230347349177, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8354661791590495e-05, |
|
"loss": 0.0, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.27787934186471663, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8332723948811698e-05, |
|
"loss": 0.0, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.28153564899451555, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8310786106032907e-05, |
|
"loss": 0.0, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.2851919561243144, |
|
"grad_norm": 0.01952667348086834, |
|
"learning_rate": 2.8288848263254114e-05, |
|
"loss": 0.0094, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.28884826325411334, |
|
"grad_norm": 1.364105224609375, |
|
"learning_rate": 2.826691042047532e-05, |
|
"loss": 0.0086, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.29250457038391225, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8244972577696526e-05, |
|
"loss": 0.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2961608775137112, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8223034734917735e-05, |
|
"loss": 0.0, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.29981718464351004, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.820109689213894e-05, |
|
"loss": 0.0, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.30347349177330896, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8179159049360148e-05, |
|
"loss": 0.0, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.3071297989031079, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8157221206581354e-05, |
|
"loss": 0.0002, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.31078610603290674, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.813528336380256e-05, |
|
"loss": 0.0, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.31444241316270566, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8113345521023766e-05, |
|
"loss": 0.0, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.3180987202925046, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8091407678244972e-05, |
|
"loss": 0.0, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.3217550274223035, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.806946983546618e-05, |
|
"loss": 0.0, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.32541133455210236, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8047531992687385e-05, |
|
"loss": 0.0, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.3290676416819013, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8025594149908594e-05, |
|
"loss": 0.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.3327239488117002, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.80036563071298e-05, |
|
"loss": 0.0, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.33638025594149906, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.7981718464351006e-05, |
|
"loss": 0.0, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.340036563071298, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.7959780621572213e-05, |
|
"loss": 0.0, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.3436928702010969, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.7937842778793422e-05, |
|
"loss": 0.0, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.3473491773308958, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.7915904936014628e-05, |
|
"loss": 0.0, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.3510054844606947, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.789396709323583e-05, |
|
"loss": 0.0, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.3546617915904936, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.7872029250457037e-05, |
|
"loss": 0.0, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.3583180987202925, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.7850091407678243e-05, |
|
"loss": 0.0, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.3619744058500914, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.7828153564899453e-05, |
|
"loss": 0.0, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.3656307129798903, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.780621572212066e-05, |
|
"loss": 0.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3692870201096892, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.7784277879341865e-05, |
|
"loss": 0.0, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.37294332723948814, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.776234003656307e-05, |
|
"loss": 0.0, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.376599634369287, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.774040219378428e-05, |
|
"loss": 0.0, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.3802559414990859, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.7718464351005487e-05, |
|
"loss": 0.0, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.38391224862888484, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.7696526508226693e-05, |
|
"loss": 0.0, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.3875685557586837, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.76745886654479e-05, |
|
"loss": 0.0, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.3912248628884826, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.7652650822669102e-05, |
|
"loss": 0.0, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.39488117001828155, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.763071297989031e-05, |
|
"loss": 0.0, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.39853747714808047, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.7608775137111518e-05, |
|
"loss": 0.0, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.40219378427787933, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.7586837294332724e-05, |
|
"loss": 0.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.40585009140767825, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.756489945155393e-05, |
|
"loss": 0.0, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.40950639853747717, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.754296160877514e-05, |
|
"loss": 0.0038, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.41316270566727603, |
|
"grad_norm": 2.969595432281494, |
|
"learning_rate": 2.7521023765996346e-05, |
|
"loss": 0.1412, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.41681901279707495, |
|
"grad_norm": 0.8748220205307007, |
|
"learning_rate": 2.749908592321755e-05, |
|
"loss": 0.0621, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.42047531992687387, |
|
"grad_norm": 0.1589801162481308, |
|
"learning_rate": 2.7477148080438758e-05, |
|
"loss": 0.0083, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.42413162705667273, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.7455210237659964e-05, |
|
"loss": 0.0007, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.42778793418647165, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.743327239488117e-05, |
|
"loss": 0.0, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.43144424131627057, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.7411334552102376e-05, |
|
"loss": 0.0001, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.4351005484460695, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.7389396709323582e-05, |
|
"loss": 0.0001, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.43875685557586835, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.736745886654479e-05, |
|
"loss": 0.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.4424131627056673, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.7345521023765998e-05, |
|
"loss": 0.0, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.4460694698354662, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.7323583180987204e-05, |
|
"loss": 0.0, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.44972577696526506, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.730164533820841e-05, |
|
"loss": 0.0, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.453382084095064, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.7279707495429616e-05, |
|
"loss": 0.0, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.4570383912248629, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.7257769652650826e-05, |
|
"loss": 0.0, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.4606946983546618, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.7235831809872032e-05, |
|
"loss": 0.0, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.4643510054844607, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.7213893967093235e-05, |
|
"loss": 0.0, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.4680073126142596, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.719195612431444e-05, |
|
"loss": 0.0, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.4716636197440585, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.717001828153565e-05, |
|
"loss": 0.0, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.4753199268738574, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.7148080438756857e-05, |
|
"loss": 0.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.4789762340036563, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.7126142595978063e-05, |
|
"loss": 0.0, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.4826325411334552, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.710420475319927e-05, |
|
"loss": 0.0, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.48628884826325414, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.7082266910420475e-05, |
|
"loss": 0.0, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.489945155393053, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.7060329067641685e-05, |
|
"loss": 0.0, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.4936014625228519, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.703839122486289e-05, |
|
"loss": 0.0, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.49725776965265084, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.7016453382084097e-05, |
|
"loss": 0.0, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.5009140767824497, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.69945155393053e-05, |
|
"loss": 0.0, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.5045703839122486, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.697257769652651e-05, |
|
"loss": 0.0, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.5082266910420475, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6950639853747715e-05, |
|
"loss": 0.0, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.5118829981718465, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.692870201096892e-05, |
|
"loss": 0.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.5155393053016454, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6906764168190128e-05, |
|
"loss": 0.0, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.5191956124314442, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6884826325411337e-05, |
|
"loss": 0.0, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.5228519195612431, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6862888482632543e-05, |
|
"loss": 0.0, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.526508226691042, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.684095063985375e-05, |
|
"loss": 0.0, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.5301645338208409, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6819012797074956e-05, |
|
"loss": 0.0, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.5338208409506399, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6797074954296162e-05, |
|
"loss": 0.0, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.5374771480804388, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6775137111517368e-05, |
|
"loss": 0.0, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.5411334552102377, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6753199268738574e-05, |
|
"loss": 0.0, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.5447897623400365, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.673126142595978e-05, |
|
"loss": 0.0, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.5484460694698354, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6709323583180986e-05, |
|
"loss": 0.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.5521023765996343, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6687385740402196e-05, |
|
"loss": 0.0, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.5557586837294333, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6665447897623402e-05, |
|
"loss": 0.0, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.5594149908592322, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6643510054844608e-05, |
|
"loss": 0.0, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.5630712979890311, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6621572212065814e-05, |
|
"loss": 0.0, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.56672760511883, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.659963436928702e-05, |
|
"loss": 0.0, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.5703839122486288, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.657769652650823e-05, |
|
"loss": 0.0, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.5740402193784278, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6555758683729433e-05, |
|
"loss": 0.0, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.5776965265082267, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.653382084095064e-05, |
|
"loss": 0.0, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.5813528336380256, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6511882998171845e-05, |
|
"loss": 0.0, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.5850091407678245, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6489945155393054e-05, |
|
"loss": 0.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.5886654478976234, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.646800731261426e-05, |
|
"loss": 0.0, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.5923217550274223, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6446069469835467e-05, |
|
"loss": 0.0, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.5959780621572212, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6424131627056673e-05, |
|
"loss": 0.0, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.5996343692870201, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6402193784277882e-05, |
|
"loss": 0.0, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.603290676416819, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.638025594149909e-05, |
|
"loss": 0.0, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.6069469835466179, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6358318098720295e-05, |
|
"loss": 0.0, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.6106032906764168, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6336380255941497e-05, |
|
"loss": 0.0, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.6142595978062158, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6314442413162704e-05, |
|
"loss": 0.0, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.6179159049360147, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6292504570383913e-05, |
|
"loss": 0.0, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.6215722120658135, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.627056672760512e-05, |
|
"loss": 0.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.6252285191956124, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6248628884826325e-05, |
|
"loss": 0.0, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.6288848263254113, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.622669104204753e-05, |
|
"loss": 0.0, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.6325411334552102, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.620475319926874e-05, |
|
"loss": 0.0, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.6361974405850092, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6182815356489947e-05, |
|
"loss": 0.0, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.6398537477148081, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6160877513711153e-05, |
|
"loss": 0.0, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.643510054844607, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.613893967093236e-05, |
|
"loss": 0.0, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.6471663619744058, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6117001828153566e-05, |
|
"loss": 0.0, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.6508226691042047, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6095063985374772e-05, |
|
"loss": 0.0, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.6544789762340036, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6073126142595978e-05, |
|
"loss": 0.0, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.6581352833638026, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6051188299817184e-05, |
|
"loss": 0.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.6617915904936015, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.602925045703839e-05, |
|
"loss": 0.0, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.6654478976234004, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.60073126142596e-05, |
|
"loss": 0.0, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.6691042047531993, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5985374771480806e-05, |
|
"loss": 0.0, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.6727605118829981, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5963436928702012e-05, |
|
"loss": 0.0, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.676416819012797, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5941499085923218e-05, |
|
"loss": 0.0, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.680073126142596, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5919561243144428e-05, |
|
"loss": 0.0, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.6837294332723949, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.589762340036563e-05, |
|
"loss": 0.0, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.6873857404021938, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5875685557586837e-05, |
|
"loss": 0.0, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.6910420475319927, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5853747714808043e-05, |
|
"loss": 0.0, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.6946983546617916, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.583180987202925e-05, |
|
"loss": 0.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.6983546617915904, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.580987202925046e-05, |
|
"loss": 0.0, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.7020109689213894, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5787934186471665e-05, |
|
"loss": 0.0, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.7056672760511883, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.576599634369287e-05, |
|
"loss": 0.0, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.7093235831809872, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5744058500914077e-05, |
|
"loss": 0.0, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.7129798903107861, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5722120658135286e-05, |
|
"loss": 0.0, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.716636197440585, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5700182815356492e-05, |
|
"loss": 0.0, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.720292504570384, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5678244972577695e-05, |
|
"loss": 0.0, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.7239488117001828, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.56563071297989e-05, |
|
"loss": 0.0, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.7276051188299817, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.563436928702011e-05, |
|
"loss": 0.0, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.7312614259597806, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5612431444241317e-05, |
|
"loss": 0.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.7349177330895795, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5590493601462523e-05, |
|
"loss": 0.0, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.7385740402193784, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.556855575868373e-05, |
|
"loss": 0.0, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.7422303473491774, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5546617915904935e-05, |
|
"loss": 0.0, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.7458866544789763, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5524680073126145e-05, |
|
"loss": 0.0, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.7495429616087751, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.550274223034735e-05, |
|
"loss": 0.0, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.753199268738574, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5480804387568557e-05, |
|
"loss": 0.0, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.7568555758683729, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.545886654478976e-05, |
|
"loss": 0.0, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.7605118829981719, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.543692870201097e-05, |
|
"loss": 0.0, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.7641681901279708, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5414990859232176e-05, |
|
"loss": 0.0, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.7678244972577697, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5393053016453382e-05, |
|
"loss": 0.0, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.7714808043875686, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5371115173674588e-05, |
|
"loss": 0.0, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.7751371115173674, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5349177330895798e-05, |
|
"loss": 0.0, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.7787934186471663, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5327239488117004e-05, |
|
"loss": 0.0, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.7824497257769653, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.530530164533821e-05, |
|
"loss": 0.0, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.7861060329067642, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5283363802559416e-05, |
|
"loss": 0.0, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.7897623400365631, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5261425959780622e-05, |
|
"loss": 0.0, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.793418647166362, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5239488117001828e-05, |
|
"loss": 0.0, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.7970749542961609, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5217550274223034e-05, |
|
"loss": 0.0, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.8007312614259597, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.519561243144424e-05, |
|
"loss": 0.0, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.8043875685557587, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5173674588665447e-05, |
|
"loss": 0.0, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.8080438756855576, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5151736745886656e-05, |
|
"loss": 0.0, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.8117001828153565, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5129798903107862e-05, |
|
"loss": 0.0, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.8153564899451554, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.510786106032907e-05, |
|
"loss": 0.0, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.8190127970749543, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5085923217550275e-05, |
|
"loss": 0.0, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.8226691042047533, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.506398537477148e-05, |
|
"loss": 0.0, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.8263254113345521, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.504204753199269e-05, |
|
"loss": 0.0, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.829981718464351, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5020109689213896e-05, |
|
"loss": 0.0, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.8336380255941499, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.49981718464351e-05, |
|
"loss": 0.0, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.8372943327239488, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4976234003656305e-05, |
|
"loss": 0.0, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.8409506398537477, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4954296160877515e-05, |
|
"loss": 0.0, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.8446069469835467, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.493235831809872e-05, |
|
"loss": 0.0, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.8482632541133455, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4910420475319927e-05, |
|
"loss": 0.0, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.8519195612431444, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4888482632541133e-05, |
|
"loss": 0.0, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.8555758683729433, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4866544789762343e-05, |
|
"loss": 0.0, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.8592321755027422, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.484460694698355e-05, |
|
"loss": 0.0, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.8628884826325411, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4822669104204755e-05, |
|
"loss": 0.0, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.8665447897623401, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.480073126142596e-05, |
|
"loss": 0.0, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.870201096892139, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4778793418647164e-05, |
|
"loss": 0.0, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.8738574040219378, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4756855575868373e-05, |
|
"loss": 0.0, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.8775137111517367, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.473491773308958e-05, |
|
"loss": 0.0, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.8811700182815356, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4712979890310786e-05, |
|
"loss": 0.0, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.8848263254113345, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4691042047531992e-05, |
|
"loss": 0.0, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.8884826325411335, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.46691042047532e-05, |
|
"loss": 0.0, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.8921389396709324, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4647166361974408e-05, |
|
"loss": 0.0, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.8957952468007313, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4625228519195614e-05, |
|
"loss": 0.0, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.8994515539305301, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.460329067641682e-05, |
|
"loss": 0.0, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.903107861060329, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.458135283363803e-05, |
|
"loss": 0.0, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.906764168190128, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4559414990859232e-05, |
|
"loss": 0.0, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.9104204753199269, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4537477148080438e-05, |
|
"loss": 0.0, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.9140767824497258, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4515539305301644e-05, |
|
"loss": 0.0, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.9177330895795247, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.449360146252285e-05, |
|
"loss": 0.0, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.9213893967093236, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.447166361974406e-05, |
|
"loss": 0.0, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.9250457038391224, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4449725776965266e-05, |
|
"loss": 0.0, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.9287020109689214, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4427787934186472e-05, |
|
"loss": 0.0, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.9323583180987203, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.440585009140768e-05, |
|
"loss": 0.0, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.9360146252285192, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4383912248628888e-05, |
|
"loss": 0.0, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.9396709323583181, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4361974405850094e-05, |
|
"loss": 0.0, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.943327239488117, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4340036563071297e-05, |
|
"loss": 0.0, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.946983546617916, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4318098720292503e-05, |
|
"loss": 0.0, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.9506398537477148, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4296160877513713e-05, |
|
"loss": 0.0, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.9542961608775137, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.427422303473492e-05, |
|
"loss": 0.0, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.9579524680073126, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4252285191956125e-05, |
|
"loss": 0.0, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.9616087751371115, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.423034734917733e-05, |
|
"loss": 0.0, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.9652650822669104, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4208409506398537e-05, |
|
"loss": 0.0, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.9689213893967094, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4186471663619747e-05, |
|
"loss": 0.0, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.9725776965265083, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4164533820840953e-05, |
|
"loss": 0.0, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.9762340036563071, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.414259597806216e-05, |
|
"loss": 0.0, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.979890310786106, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4120658135283362e-05, |
|
"loss": 0.0, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.9835466179159049, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.409872029250457e-05, |
|
"loss": 0.0, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.9872029250457038, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4076782449725777e-05, |
|
"loss": 0.0, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.9908592321755028, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4054844606946984e-05, |
|
"loss": 0.0, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.9945155393053017, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.403290676416819e-05, |
|
"loss": 0.0, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.9981718464351006, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4010968921389396e-05, |
|
"loss": 0.0, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.0018281535648994, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3989031078610605e-05, |
|
"loss": 0.0, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.0054844606946984, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.396709323583181e-05, |
|
"loss": 0.0, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.0091407678244972, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3945155393053018e-05, |
|
"loss": 0.0, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.012797074954296, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3923217550274224e-05, |
|
"loss": 0.0, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.016453382084095, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.390127970749543e-05, |
|
"loss": 0.0, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.0201096892138939, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3879341864716636e-05, |
|
"loss": 0.0, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.023765996343693, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3857404021937842e-05, |
|
"loss": 0.0, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.0274223034734917, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.383546617915905e-05, |
|
"loss": 0.0, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.0310786106032908, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3813528336380258e-05, |
|
"loss": 0.0, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.0347349177330896, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3791590493601464e-05, |
|
"loss": 0.0, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.0383912248628886, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.376965265082267e-05, |
|
"loss": 0.0, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.0420475319926874, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3747714808043876e-05, |
|
"loss": 0.0, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.0457038391224862, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3725776965265082e-05, |
|
"loss": 0.0, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.0493601462522852, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3703839122486292e-05, |
|
"loss": 0.0, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.053016453382084, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3681901279707495e-05, |
|
"loss": 0.0, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.056672760511883, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.36599634369287e-05, |
|
"loss": 0.0, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.0603290676416819, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3638025594149907e-05, |
|
"loss": 0.0, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.0639853747714807, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3616087751371117e-05, |
|
"loss": 0.0, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.0676416819012797, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3594149908592323e-05, |
|
"loss": 0.0, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.0712979890310785, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.357221206581353e-05, |
|
"loss": 0.0, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.0749542961608776, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3550274223034735e-05, |
|
"loss": 0.0, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.0786106032906764, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3528336380255944e-05, |
|
"loss": 0.0, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.0822669104204754, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.350639853747715e-05, |
|
"loss": 0.0, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.0859232175502742, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3484460694698357e-05, |
|
"loss": 0.0, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.0895795246800732, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.346252285191956e-05, |
|
"loss": 0.0, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.093235831809872, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3440585009140766e-05, |
|
"loss": 0.0, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.0968921389396709, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3418647166361975e-05, |
|
"loss": 0.0, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.1005484460694699, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.339670932358318e-05, |
|
"loss": 0.0, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.1042047531992687, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3374771480804387e-05, |
|
"loss": 0.0, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.1078610603290677, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3352833638025594e-05, |
|
"loss": 0.0, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.1115173674588665, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3330895795246803e-05, |
|
"loss": 0.0, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.1151736745886653, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.330895795246801e-05, |
|
"loss": 0.0, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.1188299817184644, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3287020109689215e-05, |
|
"loss": 0.0, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.1224862888482632, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.326508226691042e-05, |
|
"loss": 0.0, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.1261425959780622, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3243144424131624e-05, |
|
"loss": 0.0, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.129798903107861, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3221206581352834e-05, |
|
"loss": 0.0, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.13345521023766, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.319926873857404e-05, |
|
"loss": 0.0, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.1371115173674589, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3177330895795246e-05, |
|
"loss": 0.0, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.1407678244972579, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3155393053016452e-05, |
|
"loss": 0.0, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.1444241316270567, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3133455210237662e-05, |
|
"loss": 0.0, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.1480804387568555, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3111517367458868e-05, |
|
"loss": 0.0, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.1517367458866545, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3089579524680074e-05, |
|
"loss": 0.0, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.1553930530164533, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.306764168190128e-05, |
|
"loss": 0.0, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.1590493601462524, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.304570383912249e-05, |
|
"loss": 0.0, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.1627056672760512, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3023765996343693e-05, |
|
"loss": 0.0, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.16636197440585, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.30018281535649e-05, |
|
"loss": 0.0, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.170018281535649, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2979890310786105e-05, |
|
"loss": 0.0, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.1736745886654478, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.295795246800731e-05, |
|
"loss": 0.0, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.1773308957952469, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.293601462522852e-05, |
|
"loss": 0.0, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.1809872029250457, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2914076782449727e-05, |
|
"loss": 0.0, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.1846435100548447, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2892138939670933e-05, |
|
"loss": 0.0, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.1882998171846435, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.287020109689214e-05, |
|
"loss": 0.0, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.1919561243144425, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.284826325411335e-05, |
|
"loss": 0.0, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.1956124314442413, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2826325411334555e-05, |
|
"loss": 0.0, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.1992687385740401, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.280438756855576e-05, |
|
"loss": 0.0, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.2029250457038392, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2782449725776963e-05, |
|
"loss": 0.0, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.206581352833638, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2760511882998173e-05, |
|
"loss": 0.0, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.210237659963437, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.273857404021938e-05, |
|
"loss": 0.0, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.2138939670932358, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2716636197440585e-05, |
|
"loss": 0.0, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.2175502742230346, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.269469835466179e-05, |
|
"loss": 0.0, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.2212065813528337, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2672760511882998e-05, |
|
"loss": 0.0, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.2248628884826325, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2650822669104207e-05, |
|
"loss": 0.0, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.2285191956124315, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2628884826325413e-05, |
|
"loss": 0.0, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.2321755027422303, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.260694698354662e-05, |
|
"loss": 0.0, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.2358318098720293, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2585009140767826e-05, |
|
"loss": 0.0, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.2394881170018281, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.256307129798903e-05, |
|
"loss": 0.0, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.2431444241316272, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2541133455210238e-05, |
|
"loss": 0.0, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.246800731261426, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2519195612431444e-05, |
|
"loss": 0.0, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.2504570383912248, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.249725776965265e-05, |
|
"loss": 0.0, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.2541133455210238, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.247531992687386e-05, |
|
"loss": 0.0, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.2577696526508226, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2453382084095066e-05, |
|
"loss": 0.0, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.2614259597806217, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2431444241316272e-05, |
|
"loss": 0.0, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.2650822669104205, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2409506398537478e-05, |
|
"loss": 0.0, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.2687385740402193, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2387568555758684e-05, |
|
"loss": 0.0, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.2723948811700183, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2365630712979894e-05, |
|
"loss": 0.0, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.2760511882998171, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2343692870201096e-05, |
|
"loss": 0.0, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.2797074954296161, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2321755027422303e-05, |
|
"loss": 0.0, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.283363802559415, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.229981718464351e-05, |
|
"loss": 0.0, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.2870201096892138, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2277879341864718e-05, |
|
"loss": 0.0, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.2906764168190128, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2255941499085924e-05, |
|
"loss": 0.0, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.2943327239488118, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.223400365630713e-05, |
|
"loss": 0.0, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.2979890310786106, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2212065813528337e-05, |
|
"loss": 0.0, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.3016453382084094, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2190127970749543e-05, |
|
"loss": 0.0, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.3053016453382085, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2168190127970752e-05, |
|
"loss": 0.0, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.3089579524680073, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.214625228519196e-05, |
|
"loss": 0.0, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.3126142595978063, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.212431444241316e-05, |
|
"loss": 0.0, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.3162705667276051, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2102376599634367e-05, |
|
"loss": 0.0, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.319926873857404, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2080438756855577e-05, |
|
"loss": 0.0, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.323583180987203, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2058500914076783e-05, |
|
"loss": 0.0, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.3272394881170018, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.203656307129799e-05, |
|
"loss": 0.0, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.3308957952468008, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2014625228519195e-05, |
|
"loss": 0.0, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.3345521023765996, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1992687385740405e-05, |
|
"loss": 0.0, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.3382084095063984, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.197074954296161e-05, |
|
"loss": 0.0, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.3418647166361974, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1948811700182817e-05, |
|
"loss": 0.0, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.3455210237659965, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1926873857404023e-05, |
|
"loss": 0.0, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.3491773308957953, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1904936014625226e-05, |
|
"loss": 0.0, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.352833638025594, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1882998171846436e-05, |
|
"loss": 0.0, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.3564899451553931, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1861060329067642e-05, |
|
"loss": 0.0, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.360146252285192, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1839122486288848e-05, |
|
"loss": 0.0, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.363802559414991, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1817184643510054e-05, |
|
"loss": 0.0, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.3674588665447898, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1795246800731264e-05, |
|
"loss": 0.0, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.3711151736745886, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.177330895795247e-05, |
|
"loss": 0.0, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.3747714808043876, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1751371115173676e-05, |
|
"loss": 0.0, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.3784277879341864, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1729433272394882e-05, |
|
"loss": 0.0, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.3820840950639854, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.170749542961609e-05, |
|
"loss": 0.0, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.3857404021937842, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1685557586837294e-05, |
|
"loss": 0.0, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.389396709323583, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.16636197440585e-05, |
|
"loss": 0.0, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.393053016453382, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1641681901279707e-05, |
|
"loss": 0.0, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.3967093235831811, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1619744058500913e-05, |
|
"loss": 0.0, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.40036563071298, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1597806215722122e-05, |
|
"loss": 0.0, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.4040219378427787, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.157586837294333e-05, |
|
"loss": 0.0, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.4076782449725778, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1553930530164534e-05, |
|
"loss": 0.0, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.4113345521023766, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.153199268738574e-05, |
|
"loss": 0.0, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.4149908592321756, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.151005484460695e-05, |
|
"loss": 0.0, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.4186471663619744, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1488117001828156e-05, |
|
"loss": 0.0, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.4223034734917732, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.146617915904936e-05, |
|
"loss": 0.0, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.4259597806215722, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1444241316270565e-05, |
|
"loss": 0.0, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.429616087751371, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.142230347349177e-05, |
|
"loss": 0.0, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 1.43327239488117, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.140036563071298e-05, |
|
"loss": 0.0, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.436928702010969, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1378427787934187e-05, |
|
"loss": 0.0, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 1.4405850091407677, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1356489945155393e-05, |
|
"loss": 0.0, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.4442413162705667, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.13345521023766e-05, |
|
"loss": 0.0, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.4478976234003658, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.131261425959781e-05, |
|
"loss": 0.0, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.4515539305301646, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1290676416819015e-05, |
|
"loss": 0.0, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 1.4552102376599634, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.126873857404022e-05, |
|
"loss": 0.0, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.4588665447897624, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1246800731261424e-05, |
|
"loss": 0.0, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 1.4625228519195612, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1224862888482633e-05, |
|
"loss": 0.0, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.4661791590493602, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.120292504570384e-05, |
|
"loss": 0.0, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 1.469835466179159, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1180987202925046e-05, |
|
"loss": 0.0, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 1.4734917733089579, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1159049360146252e-05, |
|
"loss": 0.0, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 1.477148080438757, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1137111517367458e-05, |
|
"loss": 0.0, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 1.4808043875685557, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1115173674588667e-05, |
|
"loss": 0.0, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.4844606946983547, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1093235831809874e-05, |
|
"loss": 0.0, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 1.4881170018281535, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.107129798903108e-05, |
|
"loss": 0.0, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 1.4917733089579523, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1049360146252286e-05, |
|
"loss": 0.0, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 1.4954296160877514, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1027422303473492e-05, |
|
"loss": 0.0, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 1.4990859232175504, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1005484460694698e-05, |
|
"loss": 0.0, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.5027422303473492, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0983546617915904e-05, |
|
"loss": 0.0, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 1.506398537477148, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.096160877513711e-05, |
|
"loss": 0.0, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 1.5100548446069468, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.093967093235832e-05, |
|
"loss": 0.0, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 1.5137111517367459, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0917733089579526e-05, |
|
"loss": 0.0, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 1.517367458866545, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0895795246800732e-05, |
|
"loss": 0.0, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.5210237659963437, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.087385740402194e-05, |
|
"loss": 0.0, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.5246800731261425, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0851919561243145e-05, |
|
"loss": 0.0, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 1.5283363802559415, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0829981718464354e-05, |
|
"loss": 0.0, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 1.5319926873857403, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0808043875685557e-05, |
|
"loss": 0.0, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 1.5356489945155394, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0786106032906763e-05, |
|
"loss": 0.0, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.5393053016453382, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.076416819012797e-05, |
|
"loss": 0.0, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 1.542961608775137, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.074223034734918e-05, |
|
"loss": 0.0, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 1.546617915904936, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0720292504570385e-05, |
|
"loss": 0.0, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 1.550274223034735, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.069835466179159e-05, |
|
"loss": 0.0, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 1.5539305301645339, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0676416819012797e-05, |
|
"loss": 0.0, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.5575868372943327, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0654478976234007e-05, |
|
"loss": 0.0, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 1.5612431444241315, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0632541133455213e-05, |
|
"loss": 0.0, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 1.5648994515539305, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.061060329067642e-05, |
|
"loss": 0.0, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 1.5685557586837295, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.058866544789762e-05, |
|
"loss": 0.0, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 1.5722120658135283, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0566727605118828e-05, |
|
"loss": 0.0, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.5758683729433272, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0544789762340037e-05, |
|
"loss": 0.0, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 1.5795246800731262, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0522851919561243e-05, |
|
"loss": 0.0, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 1.583180987202925, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.050091407678245e-05, |
|
"loss": 0.0, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 1.586837294332724, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0478976234003656e-05, |
|
"loss": 0.0, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 1.5904936014625228, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0457038391224865e-05, |
|
"loss": 0.0, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.5941499085923216, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.043510054844607e-05, |
|
"loss": 0.0, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 1.5978062157221207, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0413162705667278e-05, |
|
"loss": 0.0, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 1.6014625228519197, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0391224862888484e-05, |
|
"loss": 0.0, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 1.6051188299817185, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.036928702010969e-05, |
|
"loss": 0.0, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 1.6087751371115173, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0347349177330896e-05, |
|
"loss": 0.0, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.6124314442413161, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0325411334552102e-05, |
|
"loss": 0.0, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 1.6160877513711152, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0303473491773308e-05, |
|
"loss": 0.0, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 1.6197440585009142, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0281535648994514e-05, |
|
"loss": 0.0, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 1.623400365630713, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0259597806215724e-05, |
|
"loss": 0.0, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 1.6270566727605118, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.023765996343693e-05, |
|
"loss": 0.0, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.6307129798903108, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0215722120658136e-05, |
|
"loss": 0.0, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 1.6343692870201096, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0193784277879342e-05, |
|
"loss": 0.0, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 1.6380255941499087, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0171846435100552e-05, |
|
"loss": 0.0, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 1.6416819012797075, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0149908592321758e-05, |
|
"loss": 0.0, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 1.6453382084095063, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.012797074954296e-05, |
|
"loss": 0.0, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.6489945155393053, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0106032906764167e-05, |
|
"loss": 0.0, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 1.6526508226691043, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0084095063985373e-05, |
|
"loss": 0.0, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 1.6563071297989032, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0062157221206583e-05, |
|
"loss": 0.0, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 1.659963436928702, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.004021937842779e-05, |
|
"loss": 0.0, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 1.6636197440585008, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0018281535648995e-05, |
|
"loss": 0.0, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.6672760511882998, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.99963436928702e-05, |
|
"loss": 0.0, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 1.6709323583180988, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.997440585009141e-05, |
|
"loss": 0.0, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 1.6745886654478976, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9952468007312617e-05, |
|
"loss": 0.0, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 1.6782449725776964, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9930530164533823e-05, |
|
"loss": 0.0, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 1.6819012797074955, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9908592321755026e-05, |
|
"loss": 0.0, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.6855575868372943, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9886654478976235e-05, |
|
"loss": 0.0, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 1.6892138939670933, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.986471663619744e-05, |
|
"loss": 0.0, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 1.6928702010968921, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9842778793418647e-05, |
|
"loss": 0.0, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 1.696526508226691, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9820840950639853e-05, |
|
"loss": 0.0, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 1.70018281535649, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.979890310786106e-05, |
|
"loss": 0.0, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.703839122486289, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.977696526508227e-05, |
|
"loss": 0.0, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 1.7074954296160878, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9755027422303475e-05, |
|
"loss": 0.0, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 1.7111517367458866, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.973308957952468e-05, |
|
"loss": 0.0, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 1.7148080438756854, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9711151736745888e-05, |
|
"loss": 0.0, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 1.7184643510054844, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9689213893967094e-05, |
|
"loss": 0.0, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.7221206581352835, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.96672760511883e-05, |
|
"loss": 0.0, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 1.7257769652650823, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9645338208409506e-05, |
|
"loss": 0.0, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 1.729433272394881, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9623400365630712e-05, |
|
"loss": 0.0, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 1.7330895795246801, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9601462522851918e-05, |
|
"loss": 0.0, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 1.736745886654479, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9579524680073128e-05, |
|
"loss": 0.0, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.740402193784278, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9557586837294334e-05, |
|
"loss": 0.0, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 1.7440585009140768, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.953564899451554e-05, |
|
"loss": 0.0, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 1.7477148080438756, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9513711151736746e-05, |
|
"loss": 0.0, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 1.7513711151736746, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9491773308957956e-05, |
|
"loss": 0.0, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 1.7550274223034736, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.946983546617916e-05, |
|
"loss": 0.0, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.7586837294332724, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9447897623400365e-05, |
|
"loss": 0.0, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 1.7623400365630713, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.942595978062157e-05, |
|
"loss": 0.0, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 1.76599634369287, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.940402193784278e-05, |
|
"loss": 0.0, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 1.769652650822669, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9382084095063986e-05, |
|
"loss": 0.0, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 1.7733089579524681, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9360146252285193e-05, |
|
"loss": 0.0, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.776965265082267, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.93382084095064e-05, |
|
"loss": 0.0, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 1.7806215722120657, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9316270566727605e-05, |
|
"loss": 0.0, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 1.7842778793418648, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9294332723948814e-05, |
|
"loss": 0.0, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 1.7879341864716636, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.927239488117002e-05, |
|
"loss": 0.0, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 1.7915904936014626, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9250457038391223e-05, |
|
"loss": 0.0, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.7952468007312614, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.922851919561243e-05, |
|
"loss": 0.0, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 1.7989031078610602, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.920658135283364e-05, |
|
"loss": 0.0, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 1.8025594149908593, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9184643510054845e-05, |
|
"loss": 0.0, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 1.8062157221206583, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.916270566727605e-05, |
|
"loss": 0.0, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 1.809872029250457, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9140767824497257e-05, |
|
"loss": 0.0, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.813528336380256, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9118829981718467e-05, |
|
"loss": 0.0, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 1.8171846435100547, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9096892138939673e-05, |
|
"loss": 0.0, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 1.8208409506398537, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.907495429616088e-05, |
|
"loss": 0.0, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 1.8244972577696528, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9053016453382085e-05, |
|
"loss": 0.0, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 1.8281535648994516, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9031078610603288e-05, |
|
"loss": 0.0, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.8318098720292504, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9009140767824498e-05, |
|
"loss": 0.0, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 1.8354661791590492, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8987202925045704e-05, |
|
"loss": 0.0, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 1.8391224862888482, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.896526508226691e-05, |
|
"loss": 0.0, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 1.8427787934186473, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8943327239488116e-05, |
|
"loss": 0.0, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 1.846435100548446, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8921389396709326e-05, |
|
"loss": 0.0, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.8500914076782449, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8899451553930532e-05, |
|
"loss": 0.0, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 1.853747714808044, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8877513711151738e-05, |
|
"loss": 0.0, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 1.857404021937843, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8855575868372944e-05, |
|
"loss": 0.0, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 1.8610603290676417, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8833638025594154e-05, |
|
"loss": 0.0, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 1.8647166361974405, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8811700182815356e-05, |
|
"loss": 0.0, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.8683729433272394, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8789762340036562e-05, |
|
"loss": 0.0, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 1.8720292504570384, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.876782449725777e-05, |
|
"loss": 0.0, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 1.8756855575868374, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8745886654478975e-05, |
|
"loss": 0.0, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 1.8793418647166362, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8723948811700184e-05, |
|
"loss": 0.0, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 1.882998171846435, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.870201096892139e-05, |
|
"loss": 0.0, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.8866544789762338, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8680073126142597e-05, |
|
"loss": 0.0, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 1.8903107861060329, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8658135283363803e-05, |
|
"loss": 0.0, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 1.893967093235832, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8636197440585012e-05, |
|
"loss": 0.0, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 1.8976234003656307, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.861425959780622e-05, |
|
"loss": 0.0, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 1.9012797074954295, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.859232175502742e-05, |
|
"loss": 0.0, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.9049360146252285, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8570383912248627e-05, |
|
"loss": 0.0, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 1.9085923217550276, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8548446069469833e-05, |
|
"loss": 0.0, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 1.9122486288848264, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8526508226691043e-05, |
|
"loss": 0.0, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 1.9159049360146252, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.850457038391225e-05, |
|
"loss": 0.0, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 1.919561243144424, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8482632541133455e-05, |
|
"loss": 0.0, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.923217550274223, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.846069469835466e-05, |
|
"loss": 0.0, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 1.926873857404022, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.843875685557587e-05, |
|
"loss": 0.0, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 1.9305301645338209, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8416819012797077e-05, |
|
"loss": 0.0, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 1.9341864716636197, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8394881170018283e-05, |
|
"loss": 0.0, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 1.9378427787934185, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8372943327239486e-05, |
|
"loss": 0.0, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.9414990859232175, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8351005484460695e-05, |
|
"loss": 0.0, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 1.9451553930530165, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.83290676416819e-05, |
|
"loss": 0.0, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 1.9488117001828154, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8307129798903108e-05, |
|
"loss": 0.0, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 1.9524680073126142, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8285191956124314e-05, |
|
"loss": 0.0, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 1.9561243144424132, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.826325411334552e-05, |
|
"loss": 0.0, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.9597806215722122, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.824131627056673e-05, |
|
"loss": 0.0, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 1.963436928702011, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8219378427787936e-05, |
|
"loss": 0.0, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 1.9670932358318098, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8197440585009142e-05, |
|
"loss": 0.0, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 1.9707495429616086, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8175502742230348e-05, |
|
"loss": 0.0, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 1.9744058500914077, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8153564899451554e-05, |
|
"loss": 0.0, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.9780621572212067, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.813162705667276e-05, |
|
"loss": 0.0, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 1.9817184643510055, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8109689213893966e-05, |
|
"loss": 0.0, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 1.9853747714808043, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8087751371115173e-05, |
|
"loss": 0.0, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 1.9890310786106031, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8065813528336382e-05, |
|
"loss": 0.0, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 1.9926873857404022, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8043875685557588e-05, |
|
"loss": 0.0, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.9963436928702012, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8021937842778794e-05, |
|
"loss": 0.0, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.0, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 2.003656307129799, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7978062157221207e-05, |
|
"loss": 0.0, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 2.0073126142595976, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7956124314442416e-05, |
|
"loss": 0.0, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 2.010968921389397, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7934186471663622e-05, |
|
"loss": 0.0, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.0146252285191957, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7912248628884825e-05, |
|
"loss": 0.0, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 2.0182815356489945, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.789031078610603e-05, |
|
"loss": 0.0, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 2.0219378427787933, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.786837294332724e-05, |
|
"loss": 0.0, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 2.025594149908592, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7846435100548447e-05, |
|
"loss": 0.0, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 2.0292504570383914, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7824497257769653e-05, |
|
"loss": 0.0, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 2.03290676416819, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.780255941499086e-05, |
|
"loss": 0.0, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 2.036563071297989, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7780621572212065e-05, |
|
"loss": 0.0, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 2.0402193784277878, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7758683729433275e-05, |
|
"loss": 0.0, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 2.043875685557587, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.773674588665448e-05, |
|
"loss": 0.0, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 2.047531992687386, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7714808043875687e-05, |
|
"loss": 0.0, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.0511882998171846, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.769287020109689e-05, |
|
"loss": 0.0, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 2.0548446069469835, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.76709323583181e-05, |
|
"loss": 0.0, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 2.0585009140767823, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7648994515539305e-05, |
|
"loss": 0.0, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 2.0621572212065815, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.762705667276051e-05, |
|
"loss": 0.0, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 2.0658135283363803, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7605118829981718e-05, |
|
"loss": 0.0, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 2.069469835466179, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7583180987202927e-05, |
|
"loss": 0.0, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 2.073126142595978, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7561243144424133e-05, |
|
"loss": 0.0, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 2.076782449725777, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.753930530164534e-05, |
|
"loss": 0.0, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 2.080438756855576, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7517367458866546e-05, |
|
"loss": 0.0, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 2.084095063985375, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7495429616087752e-05, |
|
"loss": 0.0, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.0877513711151736, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7473491773308958e-05, |
|
"loss": 0.0, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 2.0914076782449724, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7451553930530164e-05, |
|
"loss": 0.0, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 2.0950639853747717, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.742961608775137e-05, |
|
"loss": 0.0001, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 2.0987202925045705, |
|
"grad_norm": 0.24371479451656342, |
|
"learning_rate": 1.7407678244972576e-05, |
|
"loss": 0.0011, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 2.1023765996343693, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7385740402193786e-05, |
|
"loss": 0.0006, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.106032906764168, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7363802559414992e-05, |
|
"loss": 0.0003, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 2.109689213893967, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7341864716636198e-05, |
|
"loss": 0.0005, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 2.113345521023766, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7319926873857404e-05, |
|
"loss": 0.0, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 2.117001828153565, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7297989031078614e-05, |
|
"loss": 0.0002, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 2.1206581352833638, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.727605118829982e-05, |
|
"loss": 0.0, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.1243144424131626, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7254113345521023e-05, |
|
"loss": 0.0, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 2.1279707495429614, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.723217550274223e-05, |
|
"loss": 0.0, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 2.1316270566727606, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7210237659963435e-05, |
|
"loss": 0.0, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 2.1352833638025595, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7188299817184645e-05, |
|
"loss": 0.0, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 2.1389396709323583, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.716636197440585e-05, |
|
"loss": 0.0, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 2.142595978062157, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7144424131627057e-05, |
|
"loss": 0.0, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 2.1462522851919563, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7122486288848263e-05, |
|
"loss": 0.0, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 2.149908592321755, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7100548446069473e-05, |
|
"loss": 0.0, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 2.153564899451554, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.707861060329068e-05, |
|
"loss": 0.0, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 2.1572212065813527, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7056672760511885e-05, |
|
"loss": 0.0, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.1608775137111516, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7034734917733088e-05, |
|
"loss": 0.0, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 2.164533820840951, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7012797074954294e-05, |
|
"loss": 0.0, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 2.1681901279707496, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6990859232175503e-05, |
|
"loss": 0.0, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 2.1718464351005484, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.696892138939671e-05, |
|
"loss": 0.0, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 2.1755027422303472, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6946983546617916e-05, |
|
"loss": 0.0, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 2.1791590493601465, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6925045703839122e-05, |
|
"loss": 0.0, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 2.1828153564899453, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.690310786106033e-05, |
|
"loss": 0.0, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 2.186471663619744, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6881170018281537e-05, |
|
"loss": 0.0, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 2.190127970749543, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6859232175502744e-05, |
|
"loss": 0.0, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 2.1937842778793417, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.683729433272395e-05, |
|
"loss": 0.0, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.197440585009141, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6815356489945156e-05, |
|
"loss": 0.0, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 2.2010968921389398, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6793418647166362e-05, |
|
"loss": 0.0, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 2.2047531992687386, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6771480804387568e-05, |
|
"loss": 0.0, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 2.2084095063985374, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6749542961608774e-05, |
|
"loss": 0.0, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 2.212065813528336, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.672760511882998e-05, |
|
"loss": 0.0, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 2.2157221206581355, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.670566727605119e-05, |
|
"loss": 0.0, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 2.2193784277879343, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6683729433272396e-05, |
|
"loss": 0.0, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 2.223034734917733, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6661791590493602e-05, |
|
"loss": 0.0, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 2.226691042047532, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6639853747714808e-05, |
|
"loss": 0.0, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 2.2303473491773307, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6617915904936018e-05, |
|
"loss": 0.0, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.23400365630713, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.659597806215722e-05, |
|
"loss": 0.0, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 2.2376599634369287, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6574040219378427e-05, |
|
"loss": 0.0, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 2.2413162705667276, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6552102376599633e-05, |
|
"loss": 0.0, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 2.2449725776965264, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6530164533820842e-05, |
|
"loss": 0.0, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 2.2486288848263256, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.650822669104205e-05, |
|
"loss": 0.0, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 2.2522851919561244, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6486288848263255e-05, |
|
"loss": 0.0, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 2.2559414990859232, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.646435100548446e-05, |
|
"loss": 0.0, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 2.259597806215722, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6442413162705667e-05, |
|
"loss": 0.0, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 2.263254113345521, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6420475319926876e-05, |
|
"loss": 0.0, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 2.26691042047532, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6398537477148083e-05, |
|
"loss": 0.0, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.270566727605119, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6376599634369285e-05, |
|
"loss": 0.0, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 2.2742230347349177, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.635466179159049e-05, |
|
"loss": 0.0, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 2.2778793418647165, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.63327239488117e-05, |
|
"loss": 0.0, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 2.2815356489945158, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6310786106032907e-05, |
|
"loss": 0.0, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 2.2851919561243146, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6288848263254113e-05, |
|
"loss": 0.0, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 2.2888482632541134, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.626691042047532e-05, |
|
"loss": 0.0, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 2.292504570383912, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.624497257769653e-05, |
|
"loss": 0.0, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 2.296160877513711, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6223034734917735e-05, |
|
"loss": 0.0, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 2.2998171846435103, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.620109689213894e-05, |
|
"loss": 0.0, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 2.303473491773309, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6179159049360147e-05, |
|
"loss": 0.0, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.307129798903108, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.615722120658135e-05, |
|
"loss": 0.0, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 2.3107861060329067, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.613528336380256e-05, |
|
"loss": 0.0, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 2.3144424131627055, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6113345521023766e-05, |
|
"loss": 0.0, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 2.3180987202925047, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6091407678244972e-05, |
|
"loss": 0.0, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 2.3217550274223036, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6069469835466178e-05, |
|
"loss": 0.0, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 2.3254113345521024, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6047531992687388e-05, |
|
"loss": 0.0, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 2.329067641681901, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6025594149908594e-05, |
|
"loss": 0.0, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 2.3327239488117, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.60036563071298e-05, |
|
"loss": 0.0, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 2.3363802559414992, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5981718464351006e-05, |
|
"loss": 0.0, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 2.340036563071298, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5959780621572212e-05, |
|
"loss": 0.0, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.343692870201097, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.593784277879342e-05, |
|
"loss": 0.0, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 2.3473491773308957, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5915904936014625e-05, |
|
"loss": 0.0, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 2.3510054844606945, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.589396709323583e-05, |
|
"loss": 0.0, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 2.3546617915904937, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5872029250457037e-05, |
|
"loss": 0.0, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 2.3583180987202925, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5850091407678246e-05, |
|
"loss": 0.0, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 2.3619744058500913, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5828153564899452e-05, |
|
"loss": 0.0, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 2.36563071297989, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.580621572212066e-05, |
|
"loss": 0.0, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 2.3692870201096894, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5784277879341865e-05, |
|
"loss": 0.0, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 2.372943327239488, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5762340036563074e-05, |
|
"loss": 0.0, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 2.376599634369287, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.574040219378428e-05, |
|
"loss": 0.0, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.380255941499086, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5718464351005483e-05, |
|
"loss": 0.0, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 2.383912248628885, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.569652650822669e-05, |
|
"loss": 0.0, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 2.387568555758684, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5674588665447895e-05, |
|
"loss": 0.0, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 2.3912248628884827, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5652650822669105e-05, |
|
"loss": 0.0, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 2.3948811700182815, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.563071297989031e-05, |
|
"loss": 0.0, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 2.3985374771480803, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5608775137111517e-05, |
|
"loss": 0.0, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 2.4021937842778796, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5586837294332723e-05, |
|
"loss": 0.0, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 2.4058500914076784, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5564899451553933e-05, |
|
"loss": 0.0, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 2.409506398537477, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.554296160877514e-05, |
|
"loss": 0.0, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 2.413162705667276, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5521023765996345e-05, |
|
"loss": 0.0, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.416819012797075, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.549908592321755e-05, |
|
"loss": 0.0, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 2.420475319926874, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5477148080438757e-05, |
|
"loss": 0.0, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 2.424131627056673, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5455210237659964e-05, |
|
"loss": 0.0, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 2.4277879341864717, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.543327239488117e-05, |
|
"loss": 0.0, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 2.4314442413162705, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5411334552102376e-05, |
|
"loss": 0.0, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 2.4351005484460693, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5389396709323582e-05, |
|
"loss": 0.0, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 2.4387568555758685, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.536745886654479e-05, |
|
"loss": 0.0, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 2.4424131627056673, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5345521023765998e-05, |
|
"loss": 0.0, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 2.446069469835466, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5323583180987204e-05, |
|
"loss": 0.0, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 2.449725776965265, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.530164533820841e-05, |
|
"loss": 0.0, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.4533820840950638, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.527970749542962e-05, |
|
"loss": 0.0, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 2.457038391224863, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5257769652650824e-05, |
|
"loss": 0.0, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 2.460694698354662, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.523583180987203e-05, |
|
"loss": 0.0, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 2.4643510054844606, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5213893967093235e-05, |
|
"loss": 0.0, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 2.4680073126142594, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.519195612431444e-05, |
|
"loss": 0.0, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 2.4716636197440587, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.517001828153565e-05, |
|
"loss": 0.0, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 2.4753199268738575, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5148080438756856e-05, |
|
"loss": 0.0, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 2.4789762340036563, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5126142595978063e-05, |
|
"loss": 0.0, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 2.482632541133455, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5104204753199267e-05, |
|
"loss": 0.0, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 2.4862888482632544, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5082266910420477e-05, |
|
"loss": 0.0, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.489945155393053, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5060329067641683e-05, |
|
"loss": 0.0, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 2.493601462522852, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5038391224862889e-05, |
|
"loss": 0.0, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 2.497257769652651, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5016453382084095e-05, |
|
"loss": 0.0, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 2.5009140767824496, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4994515539305301e-05, |
|
"loss": 0.0, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 2.504570383912249, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4972577696526509e-05, |
|
"loss": 0.0, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 2.5082266910420477, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4950639853747715e-05, |
|
"loss": 0.0, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 2.5118829981718465, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4928702010968921e-05, |
|
"loss": 0.0, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 2.5155393053016453, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4906764168190129e-05, |
|
"loss": 0.0, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 2.519195612431444, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4884826325411333e-05, |
|
"loss": 0.0, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 2.5228519195612433, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4862888482632541e-05, |
|
"loss": 0.0, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 2.526508226691042, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4840950639853747e-05, |
|
"loss": 0.0, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 2.530164533820841, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4819012797074955e-05, |
|
"loss": 0.0, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 2.5338208409506398, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4797074954296161e-05, |
|
"loss": 0.0, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 2.5374771480804386, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4775137111517368e-05, |
|
"loss": 0.0, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 2.541133455210238, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4753199268738574e-05, |
|
"loss": 0.0, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 2.5447897623400366, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4731261425959782e-05, |
|
"loss": 0.0, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 2.5484460694698354, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4709323583180988e-05, |
|
"loss": 0.0, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 2.5521023765996342, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4687385740402196e-05, |
|
"loss": 0.0, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 2.555758683729433, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.46654478976234e-05, |
|
"loss": 0.0, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 2.5594149908592323, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4643510054844606e-05, |
|
"loss": 0.0, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.563071297989031, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4621572212065814e-05, |
|
"loss": 0.0, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 2.56672760511883, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.459963436928702e-05, |
|
"loss": 0.0, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 2.5703839122486287, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4577696526508228e-05, |
|
"loss": 0.0, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 2.5740402193784275, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4555758683729432e-05, |
|
"loss": 0.0, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 2.577696526508227, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.453382084095064e-05, |
|
"loss": 0.0, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 2.5813528336380256, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4511882998171846e-05, |
|
"loss": 0.0, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 2.5850091407678244, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4489945155393054e-05, |
|
"loss": 0.0, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 2.5886654478976237, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.446800731261426e-05, |
|
"loss": 0.0, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 2.5923217550274225, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4446069469835466e-05, |
|
"loss": 0.0, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 2.5959780621572213, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4424131627056673e-05, |
|
"loss": 0.0, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 2.59963436928702, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4402193784277879e-05, |
|
"loss": 0.0, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 2.603290676416819, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4380255941499087e-05, |
|
"loss": 0.0, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 2.606946983546618, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4358318098720293e-05, |
|
"loss": 0.0, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 2.610603290676417, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.43363802559415e-05, |
|
"loss": 0.0, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 2.6142595978062158, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4314442413162705e-05, |
|
"loss": 0.0, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 2.6179159049360146, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4292504570383913e-05, |
|
"loss": 0.0, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 2.6215722120658134, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4270566727605119e-05, |
|
"loss": 0.0, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 2.6252285191956126, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4248628884826327e-05, |
|
"loss": 0.0, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 2.6288848263254114, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4226691042047533e-05, |
|
"loss": 0.0, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 2.6325411334552102, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4204753199268739e-05, |
|
"loss": 0.0, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 2.636197440585009, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4182815356489945e-05, |
|
"loss": 0.0, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 2.639853747714808, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4160877513711153e-05, |
|
"loss": 0.0, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 2.643510054844607, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.413893967093236e-05, |
|
"loss": 0.0, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 2.647166361974406, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4117001828153565e-05, |
|
"loss": 0.0, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 2.6508226691042047, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4095063985374771e-05, |
|
"loss": 0.0, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 2.6544789762340035, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4073126142595978e-05, |
|
"loss": 0.0, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 2.6581352833638023, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4051188299817185e-05, |
|
"loss": 0.0, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 2.6617915904936016, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4029250457038392e-05, |
|
"loss": 0.0, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 2.6654478976234004, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.40073126142596e-05, |
|
"loss": 0.0, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 2.669104204753199, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3985374771480804e-05, |
|
"loss": 0.0, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 2.672760511882998, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3963436928702012e-05, |
|
"loss": 0.0, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 2.676416819012797, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3941499085923218e-05, |
|
"loss": 0.0, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 2.680073126142596, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3919561243144426e-05, |
|
"loss": 0.0, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 2.683729433272395, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3897623400365632e-05, |
|
"loss": 0.0, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 2.6873857404021937, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3875685557586836e-05, |
|
"loss": 0.0, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 2.691042047531993, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3853747714808044e-05, |
|
"loss": 0.0, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 2.6946983546617918, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.383180987202925e-05, |
|
"loss": 0.0, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 2.6983546617915906, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3809872029250458e-05, |
|
"loss": 0.0, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 2.7020109689213894, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3787934186471664e-05, |
|
"loss": 0.0, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 2.705667276051188, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.376599634369287e-05, |
|
"loss": 0.0, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 2.7093235831809874, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3744058500914077e-05, |
|
"loss": 0.0, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 2.7129798903107862, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3722120658135284e-05, |
|
"loss": 0.0, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 2.716636197440585, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.370018281535649e-05, |
|
"loss": 0.0, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 2.720292504570384, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3678244972577698e-05, |
|
"loss": 0.0, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 2.7239488117001827, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3656307129798903e-05, |
|
"loss": 0.0, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 2.727605118829982, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3634369287020109e-05, |
|
"loss": 0.0, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 2.7312614259597807, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3612431444241317e-05, |
|
"loss": 0.0, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 2.7349177330895795, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3590493601462523e-05, |
|
"loss": 0.0, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 2.7385740402193783, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.356855575868373e-05, |
|
"loss": 0.0, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 2.742230347349177, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3546617915904935e-05, |
|
"loss": 0.0, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.7458866544789764, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3524680073126143e-05, |
|
"loss": 0.0, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 2.749542961608775, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3502742230347349e-05, |
|
"loss": 0.0, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 2.753199268738574, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3480804387568557e-05, |
|
"loss": 0.0, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 2.756855575868373, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3458866544789763e-05, |
|
"loss": 0.0, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 2.7605118829981716, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.343692870201097e-05, |
|
"loss": 0.0, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 2.764168190127971, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3414990859232175e-05, |
|
"loss": 0.0, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 2.7678244972577697, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3393053016453383e-05, |
|
"loss": 0.0, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 2.7714808043875685, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.337111517367459e-05, |
|
"loss": 0.0, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 2.7751371115173673, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3349177330895796e-05, |
|
"loss": 0.0, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 2.778793418647166, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3327239488117002e-05, |
|
"loss": 0.0, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 2.7824497257769654, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3305301645338208e-05, |
|
"loss": 0.0, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 2.786106032906764, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3283363802559416e-05, |
|
"loss": 0.0, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 2.789762340036563, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3261425959780622e-05, |
|
"loss": 0.0, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 2.7934186471663622, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.323948811700183e-05, |
|
"loss": 0.0, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 2.797074954296161, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3217550274223034e-05, |
|
"loss": 0.0, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 2.80073126142596, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3195612431444242e-05, |
|
"loss": 0.0, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 2.8043875685557587, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3173674588665448e-05, |
|
"loss": 0.0, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 2.8080438756855575, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3151736745886656e-05, |
|
"loss": 0.0, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 2.8117001828153567, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3129798903107862e-05, |
|
"loss": 0.0, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 2.8153564899451555, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3107861060329066e-05, |
|
"loss": 0.0, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 2.8190127970749543, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3085923217550274e-05, |
|
"loss": 0.0, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 2.822669104204753, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.306398537477148e-05, |
|
"loss": 0.0, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 2.826325411334552, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3042047531992688e-05, |
|
"loss": 0.0, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 2.829981718464351, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3020109689213894e-05, |
|
"loss": 0.0, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 2.83363802559415, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.29981718464351e-05, |
|
"loss": 0.0, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 2.837294332723949, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2976234003656307e-05, |
|
"loss": 0.0, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 2.8409506398537476, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2954296160877515e-05, |
|
"loss": 0.0, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 2.8446069469835464, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.293235831809872e-05, |
|
"loss": 0.0, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 2.8482632541133457, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2910420475319929e-05, |
|
"loss": 0.0, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 2.8519195612431445, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2888482632541133e-05, |
|
"loss": 0.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 2.8555758683729433, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.286654478976234e-05, |
|
"loss": 0.0, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 2.859232175502742, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2844606946983547e-05, |
|
"loss": 0.0, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 2.862888482632541, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2822669104204753e-05, |
|
"loss": 0.0, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 2.86654478976234, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2800731261425961e-05, |
|
"loss": 0.0, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 2.870201096892139, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2778793418647165e-05, |
|
"loss": 0.0, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 2.873857404021938, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2756855575868373e-05, |
|
"loss": 0.0, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 2.8775137111517366, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.273491773308958e-05, |
|
"loss": 0.0, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 2.8811700182815354, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2712979890310787e-05, |
|
"loss": 0.0, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 2.8848263254113347, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2691042047531993e-05, |
|
"loss": 0.0, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 2.8884826325411335, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.26691042047532e-05, |
|
"loss": 0.0, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 2.8921389396709323, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2647166361974406e-05, |
|
"loss": 0.0, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 2.8957952468007315, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2625228519195613e-05, |
|
"loss": 0.0, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 2.89945155393053, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.260329067641682e-05, |
|
"loss": 0.0, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 2.903107861060329, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2581352833638026e-05, |
|
"loss": 0.0, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 2.906764168190128, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2559414990859232e-05, |
|
"loss": 0.0, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 2.9104204753199268, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2537477148080438e-05, |
|
"loss": 0.0, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 2.914076782449726, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2515539305301646e-05, |
|
"loss": 0.0, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 2.917733089579525, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2493601462522852e-05, |
|
"loss": 0.0, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 2.9213893967093236, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.247166361974406e-05, |
|
"loss": 0.0, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 2.9250457038391224, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2449725776965264e-05, |
|
"loss": 0.0, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.9287020109689212, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2427787934186472e-05, |
|
"loss": 0.0, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 2.9323583180987205, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2405850091407678e-05, |
|
"loss": 0.0, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 2.9360146252285193, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2383912248628886e-05, |
|
"loss": 0.0, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 2.939670932358318, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2361974405850092e-05, |
|
"loss": 0.0, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 2.943327239488117, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2340036563071298e-05, |
|
"loss": 0.0, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 2.9469835466179157, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2318098720292504e-05, |
|
"loss": 0.0, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 2.950639853747715, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.229616087751371e-05, |
|
"loss": 0.0, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 2.954296160877514, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2274223034734918e-05, |
|
"loss": 0.0, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 2.9579524680073126, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2252285191956125e-05, |
|
"loss": 0.0, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 2.9616087751371114, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.223034734917733e-05, |
|
"loss": 0.0, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 2.96526508226691, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2208409506398537e-05, |
|
"loss": 0.0, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 2.9689213893967095, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2186471663619745e-05, |
|
"loss": 0.0, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 2.9725776965265083, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2164533820840951e-05, |
|
"loss": 0.0, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 2.976234003656307, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2142595978062159e-05, |
|
"loss": 0.0, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 2.979890310786106, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2120658135283363e-05, |
|
"loss": 0.0, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 2.9835466179159047, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2098720292504571e-05, |
|
"loss": 0.0, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 2.987202925045704, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2076782449725777e-05, |
|
"loss": 0.0, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 2.9908592321755028, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2054844606946983e-05, |
|
"loss": 0.0, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 2.9945155393053016, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2032906764168191e-05, |
|
"loss": 0.0, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 2.998171846435101, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2010968921389397e-05, |
|
"loss": 0.0, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 3.0018281535648996, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1989031078610603e-05, |
|
"loss": 0.0, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 3.0054844606946984, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.196709323583181e-05, |
|
"loss": 0.0, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 3.0091407678244972, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1945155393053017e-05, |
|
"loss": 0.0, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 3.012797074954296, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1923217550274223e-05, |
|
"loss": 0.0, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 3.016453382084095, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1901279707495431e-05, |
|
"loss": 0.0, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 3.020109689213894, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1879341864716636e-05, |
|
"loss": 0.0, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 3.023765996343693, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1857404021937844e-05, |
|
"loss": 0.0, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 3.0274223034734917, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.183546617915905e-05, |
|
"loss": 0.0, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 3.0310786106032905, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1813528336380256e-05, |
|
"loss": 0.0, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 3.03473491773309, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1791590493601464e-05, |
|
"loss": 0.0, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 3.0383912248628886, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1769652650822668e-05, |
|
"loss": 0.0, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 3.0420475319926874, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1747714808043876e-05, |
|
"loss": 0.0, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 3.045703839122486, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1725776965265082e-05, |
|
"loss": 0.0, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 3.049360146252285, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.170383912248629e-05, |
|
"loss": 0.0, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 3.0530164533820843, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1681901279707496e-05, |
|
"loss": 0.0, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 3.056672760511883, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1659963436928702e-05, |
|
"loss": 0.0, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 3.060329067641682, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1638025594149908e-05, |
|
"loss": 0.0, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 3.0639853747714807, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1616087751371116e-05, |
|
"loss": 0.0, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 3.0676416819012795, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1594149908592322e-05, |
|
"loss": 0.0, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 3.0712979890310788, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.157221206581353e-05, |
|
"loss": 0.0, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 3.0749542961608776, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1550274223034735e-05, |
|
"loss": 0.0, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 3.0786106032906764, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.152833638025594e-05, |
|
"loss": 0.0, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 3.082266910420475, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1506398537477149e-05, |
|
"loss": 0.0, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 3.0859232175502744, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1484460694698355e-05, |
|
"loss": 0.0, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 3.0895795246800732, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1462522851919563e-05, |
|
"loss": 0.0, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 3.093235831809872, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1440585009140767e-05, |
|
"loss": 0.0, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 3.096892138939671, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1418647166361975e-05, |
|
"loss": 0.0, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 3.1005484460694697, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1396709323583181e-05, |
|
"loss": 0.0, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 3.104204753199269, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1374771480804389e-05, |
|
"loss": 0.0, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 3.1078610603290677, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1352833638025595e-05, |
|
"loss": 0.0, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.1115173674588665, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1330895795246801e-05, |
|
"loss": 0.0, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 3.1151736745886653, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1308957952468007e-05, |
|
"loss": 0.0, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 3.118829981718464, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1287020109689213e-05, |
|
"loss": 0.0, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 3.1224862888482634, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1265082266910421e-05, |
|
"loss": 0.0, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 3.126142595978062, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1243144424131627e-05, |
|
"loss": 0.0, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 3.129798903107861, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1221206581352834e-05, |
|
"loss": 0.0, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 3.13345521023766, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.119926873857404e-05, |
|
"loss": 0.0, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 3.137111517367459, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1177330895795248e-05, |
|
"loss": 0.0, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 3.140767824497258, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1155393053016454e-05, |
|
"loss": 0.0, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 3.1444241316270567, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1133455210237662e-05, |
|
"loss": 0.0, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 3.1480804387568555, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1111517367458866e-05, |
|
"loss": 0.0, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 3.1517367458866543, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1089579524680074e-05, |
|
"loss": 0.0, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 3.1553930530164536, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.106764168190128e-05, |
|
"loss": 0.0, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 3.1590493601462524, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1045703839122488e-05, |
|
"loss": 0.0, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 3.162705667276051, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1023765996343694e-05, |
|
"loss": 0.0, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 3.16636197440585, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1001828153564898e-05, |
|
"loss": 0.0, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 3.170018281535649, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0979890310786106e-05, |
|
"loss": 0.0, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 3.173674588665448, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0957952468007312e-05, |
|
"loss": 0.0, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 3.177330895795247, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.093601462522852e-05, |
|
"loss": 0.0, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 3.1809872029250457, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0914076782449726e-05, |
|
"loss": 0.0, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 3.1846435100548445, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0892138939670932e-05, |
|
"loss": 0.0, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 3.1882998171846433, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0870201096892139e-05, |
|
"loss": 0.0, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 3.1919561243144425, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0848263254113346e-05, |
|
"loss": 0.0, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 3.1956124314442413, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0826325411334553e-05, |
|
"loss": 0.0, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 3.19926873857404, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.080438756855576e-05, |
|
"loss": 0.0, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 3.202925045703839, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0782449725776965e-05, |
|
"loss": 0.0, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 3.206581352833638, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0760511882998171e-05, |
|
"loss": 0.0, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 3.210237659963437, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0738574040219379e-05, |
|
"loss": 0.0, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 3.213893967093236, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0716636197440585e-05, |
|
"loss": 0.0, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 3.2175502742230346, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0694698354661793e-05, |
|
"loss": 0.0, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 3.2212065813528334, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0672760511882997e-05, |
|
"loss": 0.0, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 3.2248628884826327, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0650822669104205e-05, |
|
"loss": 0.0, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 3.2285191956124315, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0628884826325411e-05, |
|
"loss": 0.0, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 3.2321755027422303, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0606946983546619e-05, |
|
"loss": 0.0, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 3.235831809872029, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0585009140767825e-05, |
|
"loss": 0.0, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 3.2394881170018284, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0563071297989031e-05, |
|
"loss": 0.0, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 3.243144424131627, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0541133455210237e-05, |
|
"loss": 0.0, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 3.246800731261426, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0519195612431444e-05, |
|
"loss": 0.0, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 3.250457038391225, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0497257769652651e-05, |
|
"loss": 0.0, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 3.2541133455210236, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0475319926873858e-05, |
|
"loss": 0.0, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 3.257769652650823, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0453382084095064e-05, |
|
"loss": 0.0, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 3.2614259597806217, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.043144424131627e-05, |
|
"loss": 0.0, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 3.2650822669104205, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0409506398537478e-05, |
|
"loss": 0.0, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 3.2687385740402193, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0387568555758684e-05, |
|
"loss": 0.0, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 3.272394881170018, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0365630712979892e-05, |
|
"loss": 0.0, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 3.2760511882998173, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0343692870201096e-05, |
|
"loss": 0.0, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 3.279707495429616, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0321755027422304e-05, |
|
"loss": 0.0, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 3.283363802559415, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.029981718464351e-05, |
|
"loss": 0.0, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 3.2870201096892138, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0277879341864718e-05, |
|
"loss": 0.0, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 3.2906764168190126, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0255941499085924e-05, |
|
"loss": 0.0, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.294332723948812, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0234003656307129e-05, |
|
"loss": 0.0, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 3.2979890310786106, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0212065813528336e-05, |
|
"loss": 0.0, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 3.3016453382084094, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0190127970749543e-05, |
|
"loss": 0.0, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 3.3053016453382082, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.016819012797075e-05, |
|
"loss": 0.0, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 3.3089579524680075, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0146252285191956e-05, |
|
"loss": 0.0, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 3.3126142595978063, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0124314442413163e-05, |
|
"loss": 0.0, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 3.316270566727605, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0102376599634369e-05, |
|
"loss": 0.0, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 3.319926873857404, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0080438756855577e-05, |
|
"loss": 0.0, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 3.3235831809872027, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0058500914076783e-05, |
|
"loss": 0.0, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 3.327239488117002, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.003656307129799e-05, |
|
"loss": 0.0, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 3.330895795246801, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0014625228519195e-05, |
|
"loss": 0.0, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 3.3345521023765996, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.992687385740401e-06, |
|
"loss": 0.0, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 3.3382084095063984, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.970749542961609e-06, |
|
"loss": 0.0, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 3.3418647166361977, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.948811700182815e-06, |
|
"loss": 0.0, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 3.3455210237659965, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.926873857404023e-06, |
|
"loss": 0.0, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 3.3491773308957953, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.904936014625227e-06, |
|
"loss": 0.0, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 3.352833638025594, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.882998171846435e-06, |
|
"loss": 0.0, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 3.356489945155393, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.861060329067641e-06, |
|
"loss": 0.0, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 3.360146252285192, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.83912248628885e-06, |
|
"loss": 0.0, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 3.363802559414991, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.817184643510055e-06, |
|
"loss": 0.0, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 3.3674588665447898, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.795246800731262e-06, |
|
"loss": 0.0, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 3.3711151736745886, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.773308957952468e-06, |
|
"loss": 0.0, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 3.3747714808043874, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.751371115173675e-06, |
|
"loss": 0.0, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 3.3784277879341866, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.729433272394882e-06, |
|
"loss": 0.0, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 3.3820840950639854, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.707495429616088e-06, |
|
"loss": 0.0, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 3.3857404021937842, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.685557586837294e-06, |
|
"loss": 0.0, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 3.389396709323583, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.6636197440585e-06, |
|
"loss": 0.0, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 3.393053016453382, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.641681901279708e-06, |
|
"loss": 0.0, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 3.396709323583181, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.619744058500914e-06, |
|
"loss": 0.0, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 3.40036563071298, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.597806215722122e-06, |
|
"loss": 0.0, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 3.4040219378427787, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.575868372943328e-06, |
|
"loss": 0.0, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 3.4076782449725775, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.553930530164534e-06, |
|
"loss": 0.0, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 3.411334552102377, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.53199268738574e-06, |
|
"loss": 0.0, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 3.4149908592321756, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.510054844606948e-06, |
|
"loss": 0.0, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 3.4186471663619744, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.488117001828154e-06, |
|
"loss": 0.0, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 3.422303473491773, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.46617915904936e-06, |
|
"loss": 0.0, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 3.425959780621572, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.444241316270567e-06, |
|
"loss": 0.0, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 3.4296160877513713, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.422303473491773e-06, |
|
"loss": 0.0, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 3.43327239488117, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.40036563071298e-06, |
|
"loss": 0.0, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 3.436928702010969, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.378427787934187e-06, |
|
"loss": 0.0, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 3.4405850091407677, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.356489945155395e-06, |
|
"loss": 0.0, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 3.444241316270567, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.334552102376599e-06, |
|
"loss": 0.0, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 3.4478976234003658, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.312614259597807e-06, |
|
"loss": 0.0, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 3.4515539305301646, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.290676416819013e-06, |
|
"loss": 0.0, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 3.4552102376599634, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.26873857404022e-06, |
|
"loss": 0.0, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 3.458866544789762, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.246800731261427e-06, |
|
"loss": 0.0, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 3.4625228519195614, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.224862888482633e-06, |
|
"loss": 0.0, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 3.4661791590493602, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.20292504570384e-06, |
|
"loss": 0.0, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 3.469835466179159, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.180987202925045e-06, |
|
"loss": 0.0, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 3.473491773308958, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.159049360146253e-06, |
|
"loss": 0.0, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.4771480804387567, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.13711151736746e-06, |
|
"loss": 0.0, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 3.480804387568556, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.115173674588665e-06, |
|
"loss": 0.0, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 3.4844606946983547, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.093235831809872e-06, |
|
"loss": 0.0, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 3.4881170018281535, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.07129798903108e-06, |
|
"loss": 0.0, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 3.4917733089579523, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.049360146252286e-06, |
|
"loss": 0.0, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 3.495429616087751, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.027422303473493e-06, |
|
"loss": 0.0, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 3.4990859232175504, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.005484460694698e-06, |
|
"loss": 0.0, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 3.502742230347349, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.983546617915906e-06, |
|
"loss": 0.0, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 3.506398537477148, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.961608775137112e-06, |
|
"loss": 0.0, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 3.510054844606947, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.939670932358318e-06, |
|
"loss": 0.0, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 3.5137111517367456, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.917733089579526e-06, |
|
"loss": 0.0, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 3.517367458866545, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.89579524680073e-06, |
|
"loss": 0.0, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 3.5210237659963437, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.873857404021938e-06, |
|
"loss": 0.0, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 3.5246800731261425, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.851919561243144e-06, |
|
"loss": 0.0, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 3.5283363802559418, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.829981718464352e-06, |
|
"loss": 0.0, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 3.53199268738574, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.808043875685558e-06, |
|
"loss": 0.0, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 3.5356489945155394, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.786106032906764e-06, |
|
"loss": 0.0, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 3.539305301645338, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.76416819012797e-06, |
|
"loss": 0.0, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 3.542961608775137, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.742230347349178e-06, |
|
"loss": 0.0, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 3.5466179159049362, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.720292504570384e-06, |
|
"loss": 0.0, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 3.550274223034735, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.69835466179159e-06, |
|
"loss": 0.0, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 3.553930530164534, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.676416819012797e-06, |
|
"loss": 0.0, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 3.5575868372943327, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.654478976234003e-06, |
|
"loss": 0.0, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 3.5612431444241315, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.63254113345521e-06, |
|
"loss": 0.0, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 3.5648994515539307, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.610603290676417e-06, |
|
"loss": 0.0, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 3.5685557586837295, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.588665447897625e-06, |
|
"loss": 0.0, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 3.5722120658135283, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.566727605118829e-06, |
|
"loss": 0.0, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 3.575868372943327, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.544789762340037e-06, |
|
"loss": 0.0, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 3.579524680073126, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.522851919561243e-06, |
|
"loss": 0.0, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 3.583180987202925, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.500914076782451e-06, |
|
"loss": 0.0, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 3.586837294332724, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.478976234003657e-06, |
|
"loss": 0.0, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 3.590493601462523, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.457038391224863e-06, |
|
"loss": 0.0, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 3.5941499085923216, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.43510054844607e-06, |
|
"loss": 0.0, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 3.5978062157221204, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.413162705667276e-06, |
|
"loss": 0.0, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 3.6014625228519197, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.391224862888483e-06, |
|
"loss": 0.0, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 3.6051188299817185, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.36928702010969e-06, |
|
"loss": 0.0, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 3.6087751371115173, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.347349177330896e-06, |
|
"loss": 0.0, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 3.612431444241316, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.325411334552102e-06, |
|
"loss": 0.0, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 3.616087751371115, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.30347349177331e-06, |
|
"loss": 0.0, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 3.619744058500914, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.281535648994516e-06, |
|
"loss": 0.0, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 3.623400365630713, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.259597806215724e-06, |
|
"loss": 0.0, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 3.627056672760512, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.237659963436928e-06, |
|
"loss": 0.0, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 3.630712979890311, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.215722120658136e-06, |
|
"loss": 0.0, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 3.6343692870201094, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.193784277879342e-06, |
|
"loss": 0.0, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 3.6380255941499087, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.171846435100548e-06, |
|
"loss": 0.0, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 3.6416819012797075, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.149908592321756e-06, |
|
"loss": 0.0, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 3.6453382084095063, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.12797074954296e-06, |
|
"loss": 0.0, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 3.6489945155393055, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.106032906764168e-06, |
|
"loss": 0.0, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 3.6526508226691043, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.084095063985374e-06, |
|
"loss": 0.0, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 3.656307129798903, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.062157221206582e-06, |
|
"loss": 0.0, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.659963436928702, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.040219378427788e-06, |
|
"loss": 0.0, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 3.6636197440585008, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.018281535648995e-06, |
|
"loss": 0.0, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 3.6672760511883, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.9963436928702e-06, |
|
"loss": 0.0, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 3.670932358318099, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.974405850091408e-06, |
|
"loss": 0.0, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 3.6745886654478976, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.952468007312615e-06, |
|
"loss": 0.0, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 3.6782449725776964, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.930530164533822e-06, |
|
"loss": 0.0, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 3.6819012797074953, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.908592321755027e-06, |
|
"loss": 0.0, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 3.6855575868372945, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.886654478976233e-06, |
|
"loss": 0.0, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 3.6892138939670933, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.864716636197441e-06, |
|
"loss": 0.0, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 3.692870201096892, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.842778793418647e-06, |
|
"loss": 0.0, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 3.696526508226691, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.820840950639855e-06, |
|
"loss": 0.0, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 3.7001828153564897, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.79890310786106e-06, |
|
"loss": 0.0, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 3.703839122486289, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.776965265082267e-06, |
|
"loss": 0.0, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 3.707495429616088, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.755027422303473e-06, |
|
"loss": 0.0, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 3.7111517367458866, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.733089579524681e-06, |
|
"loss": 0.0, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 3.7148080438756854, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.711151736745887e-06, |
|
"loss": 0.0, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 3.7184643510054842, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.689213893967093e-06, |
|
"loss": 0.0, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 3.7221206581352835, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.6672760511883e-06, |
|
"loss": 0.0, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 3.7257769652650823, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.645338208409506e-06, |
|
"loss": 0.0, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 3.729433272394881, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.6234003656307135e-06, |
|
"loss": 0.0, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 3.7330895795246803, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.601462522851919e-06, |
|
"loss": 0.0, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 3.7367458866544787, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.579524680073127e-06, |
|
"loss": 0.0, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 3.740402193784278, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.557586837294333e-06, |
|
"loss": 0.0, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 3.7440585009140768, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.53564899451554e-06, |
|
"loss": 0.0, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 3.7477148080438756, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.513711151736746e-06, |
|
"loss": 0.0, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 3.751371115173675, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.491773308957952e-06, |
|
"loss": 0.0, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 3.7550274223034736, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.469835466179159e-06, |
|
"loss": 0.0, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 3.7586837294332724, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.447897623400366e-06, |
|
"loss": 0.0, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 3.7623400365630713, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.425959780621572e-06, |
|
"loss": 0.0, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 3.76599634369287, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.404021937842779e-06, |
|
"loss": 0.0, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 3.7696526508226693, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.382084095063985e-06, |
|
"loss": 0.0, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 3.773308957952468, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.360146252285192e-06, |
|
"loss": 0.0, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 3.776965265082267, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.338208409506399e-06, |
|
"loss": 0.0, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 3.7806215722120657, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.3162705667276054e-06, |
|
"loss": 0.0, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 3.7842778793418645, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.294332723948812e-06, |
|
"loss": 0.0, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 3.787934186471664, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.272394881170018e-06, |
|
"loss": 0.0, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 3.7915904936014626, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.250457038391225e-06, |
|
"loss": 0.0, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 3.7952468007312614, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.228519195612432e-06, |
|
"loss": 0.0, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 3.7989031078610602, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.206581352833638e-06, |
|
"loss": 0.0, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 3.802559414990859, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.184643510054845e-06, |
|
"loss": 0.0, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 3.8062157221206583, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.162705667276051e-06, |
|
"loss": 0.0, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 3.809872029250457, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.140767824497258e-06, |
|
"loss": 0.0, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 3.813528336380256, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.118829981718465e-06, |
|
"loss": 0.0, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 3.8171846435100547, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.096892138939671e-06, |
|
"loss": 0.0, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 3.8208409506398535, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.074954296160878e-06, |
|
"loss": 0.0, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 3.8244972577696528, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.053016453382084e-06, |
|
"loss": 0.0, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 3.8281535648994516, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.031078610603291e-06, |
|
"loss": 0.0, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 3.8318098720292504, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.009140767824497e-06, |
|
"loss": 0.0, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 3.835466179159049, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.9872029250457035e-06, |
|
"loss": 0.0, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 3.839122486288848, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.9652650822669105e-06, |
|
"loss": 0.0, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.8427787934186473, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.943327239488117e-06, |
|
"loss": 0.0, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 3.846435100548446, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.921389396709324e-06, |
|
"loss": 0.0, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 3.850091407678245, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.899451553930531e-06, |
|
"loss": 0.0, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 3.853747714808044, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.877513711151737e-06, |
|
"loss": 0.0, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 3.857404021937843, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.855575868372944e-06, |
|
"loss": 0.0, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 3.8610603290676417, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.83363802559415e-06, |
|
"loss": 0.0, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 3.8647166361974405, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.811700182815357e-06, |
|
"loss": 0.0, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 3.8683729433272394, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.789762340036564e-06, |
|
"loss": 0.0, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 3.8720292504570386, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.76782449725777e-06, |
|
"loss": 0.0, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 3.8756855575868374, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.745886654478976e-06, |
|
"loss": 0.0, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 3.8793418647166362, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.723948811700182e-06, |
|
"loss": 0.0, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 3.882998171846435, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.702010968921389e-06, |
|
"loss": 0.0, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 3.886654478976234, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.680073126142596e-06, |
|
"loss": 0.0, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 3.890310786106033, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.658135283363802e-06, |
|
"loss": 0.0, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 3.893967093235832, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.636197440585009e-06, |
|
"loss": 0.0, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 3.8976234003656307, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.6142595978062155e-06, |
|
"loss": 0.0, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 3.9012797074954295, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.5923217550274225e-06, |
|
"loss": 0.0, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 3.9049360146252283, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.5703839122486295e-06, |
|
"loss": 0.0, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 3.9085923217550276, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.548446069469836e-06, |
|
"loss": 0.0, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 3.9122486288848264, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.526508226691043e-06, |
|
"loss": 0.0, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 3.915904936014625, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.50457038391225e-06, |
|
"loss": 0.0, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 3.919561243144424, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.482632541133455e-06, |
|
"loss": 0.0, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 3.923217550274223, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.460694698354662e-06, |
|
"loss": 0.0, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 3.926873857404022, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.438756855575868e-06, |
|
"loss": 0.0, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 3.930530164533821, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.416819012797075e-06, |
|
"loss": 0.0, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 3.9341864716636197, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.394881170018282e-06, |
|
"loss": 0.0, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 3.9378427787934185, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.372943327239488e-06, |
|
"loss": 0.0, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 3.9414990859232173, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.351005484460695e-06, |
|
"loss": 0.0, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 3.9451553930530165, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.329067641681901e-06, |
|
"loss": 0.0, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 3.9488117001828154, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.307129798903108e-06, |
|
"loss": 0.0, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 3.952468007312614, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.285191956124315e-06, |
|
"loss": 0.0, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 3.9561243144424134, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.263254113345521e-06, |
|
"loss": 0.0, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 3.9597806215722122, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.241316270566728e-06, |
|
"loss": 0.0, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 3.963436928702011, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.219378427787934e-06, |
|
"loss": 0.0, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 3.96709323583181, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.197440585009141e-06, |
|
"loss": 0.0, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 3.9707495429616086, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.175502742230348e-06, |
|
"loss": 0.0, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 3.974405850091408, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.153564899451554e-06, |
|
"loss": 0.0, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 3.9780621572212067, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.131627056672761e-06, |
|
"loss": 0.0, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 3.9817184643510055, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.109689213893967e-06, |
|
"loss": 0.0, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 3.9853747714808043, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.087751371115174e-06, |
|
"loss": 0.0, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 3.989031078610603, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.065813528336381e-06, |
|
"loss": 0.0, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 3.9926873857404024, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.043875685557587e-06, |
|
"loss": 0.0, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 3.996343692870201, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.021937842778794e-06, |
|
"loss": 0.0, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6e-06, |
|
"loss": 0.0, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 4.003656307129799, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.978062157221207e-06, |
|
"loss": 0.0, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 4.007312614259598, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.956124314442413e-06, |
|
"loss": 0.0, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 4.010968921389397, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.934186471663619e-06, |
|
"loss": 0.0, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 4.014625228519195, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.912248628884826e-06, |
|
"loss": 0.0, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 4.0182815356489945, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.8903107861060326e-06, |
|
"loss": 0.0, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 4.021937842778794, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.8683729433272395e-06, |
|
"loss": 0.0, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 4.025594149908592, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.8464351005484465e-06, |
|
"loss": 0.0, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 4.029250457038391, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.824497257769653e-06, |
|
"loss": 0.0, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 4.03290676416819, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.80255941499086e-06, |
|
"loss": 0.0, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 4.036563071297989, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.780621572212066e-06, |
|
"loss": 0.0, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 4.040219378427788, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.758683729433273e-06, |
|
"loss": 0.0, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 4.043875685557587, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.73674588665448e-06, |
|
"loss": 0.0, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 4.047531992687386, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.714808043875686e-06, |
|
"loss": 0.0, |
|
"step": 11070 |
|
}, |
|
{ |
|
"epoch": 4.051188299817184, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.692870201096892e-06, |
|
"loss": 0.0, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 4.0548446069469835, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.670932358318098e-06, |
|
"loss": 0.0, |
|
"step": 11090 |
|
}, |
|
{ |
|
"epoch": 4.058500914076783, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.648994515539305e-06, |
|
"loss": 0.0, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 4.062157221206581, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.627056672760512e-06, |
|
"loss": 0.0, |
|
"step": 11110 |
|
}, |
|
{ |
|
"epoch": 4.06581352833638, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.605118829981718e-06, |
|
"loss": 0.0, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 4.06946983546618, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.583180987202925e-06, |
|
"loss": 0.0, |
|
"step": 11130 |
|
}, |
|
{ |
|
"epoch": 4.073126142595978, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.5612431444241314e-06, |
|
"loss": 0.0, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 4.076782449725777, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.5393053016453384e-06, |
|
"loss": 0.0, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 4.0804387568555756, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.517367458866545e-06, |
|
"loss": 0.0, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 4.084095063985375, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.4954296160877516e-06, |
|
"loss": 0.0, |
|
"step": 11170 |
|
}, |
|
{ |
|
"epoch": 4.087751371115174, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.4734917733089585e-06, |
|
"loss": 0.0, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 4.091407678244972, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.451553930530164e-06, |
|
"loss": 0.0, |
|
"step": 11190 |
|
}, |
|
{ |
|
"epoch": 4.095063985374772, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.429616087751371e-06, |
|
"loss": 0.0, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 4.09872029250457, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.407678244972578e-06, |
|
"loss": 0.0, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 4.102376599634369, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.385740402193784e-06, |
|
"loss": 0.0, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 4.1060329067641685, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.363802559414991e-06, |
|
"loss": 0.0, |
|
"step": 11230 |
|
}, |
|
{ |
|
"epoch": 4.109689213893967, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.341864716636198e-06, |
|
"loss": 0.0, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 4.113345521023766, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.319926873857404e-06, |
|
"loss": 0.0, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 4.1170018281535645, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.297989031078611e-06, |
|
"loss": 0.0, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 4.120658135283364, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.276051188299817e-06, |
|
"loss": 0.0, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 4.124314442413163, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.254113345521024e-06, |
|
"loss": 0.0, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 4.127970749542961, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.232175502742231e-06, |
|
"loss": 0.0, |
|
"step": 11290 |
|
}, |
|
{ |
|
"epoch": 4.131627056672761, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.210237659963437e-06, |
|
"loss": 0.0, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 4.135283363802559, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.1882998171846435e-06, |
|
"loss": 0.0, |
|
"step": 11310 |
|
}, |
|
{ |
|
"epoch": 4.138939670932358, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.16636197440585e-06, |
|
"loss": 0.0, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 4.1425959780621575, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.144424131627057e-06, |
|
"loss": 0.0, |
|
"step": 11330 |
|
}, |
|
{ |
|
"epoch": 4.146252285191956, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.122486288848264e-06, |
|
"loss": 0.0, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 4.149908592321755, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.10054844606947e-06, |
|
"loss": 0.0, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 4.153564899451554, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.078610603290677e-06, |
|
"loss": 0.0, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 4.157221206581353, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.056672760511883e-06, |
|
"loss": 0.0, |
|
"step": 11370 |
|
}, |
|
{ |
|
"epoch": 4.160877513711152, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.03473491773309e-06, |
|
"loss": 0.0, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 4.16453382084095, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.012797074954297e-06, |
|
"loss": 0.0, |
|
"step": 11390 |
|
}, |
|
{ |
|
"epoch": 4.16819012797075, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.990859232175503e-06, |
|
"loss": 0.0, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 4.171846435100549, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.96892138939671e-06, |
|
"loss": 0.0, |
|
"step": 11410 |
|
}, |
|
{ |
|
"epoch": 4.175502742230347, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.946983546617916e-06, |
|
"loss": 0.0, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 4.1791590493601465, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.925045703839122e-06, |
|
"loss": 0.0, |
|
"step": 11430 |
|
}, |
|
{ |
|
"epoch": 4.182815356489945, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.903107861060329e-06, |
|
"loss": 0.0, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 4.186471663619744, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.881170018281535e-06, |
|
"loss": 0.0, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 4.190127970749543, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.859232175502742e-06, |
|
"loss": 0.0, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 4.193784277879342, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.8372943327239485e-06, |
|
"loss": 0.0, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 4.197440585009141, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.8153564899451555e-06, |
|
"loss": 0.0, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 4.201096892138939, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.7934186471663625e-06, |
|
"loss": 0.0, |
|
"step": 11490 |
|
}, |
|
{ |
|
"epoch": 4.204753199268739, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.771480804387569e-06, |
|
"loss": 0.0, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 4.208409506398538, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.749542961608776e-06, |
|
"loss": 0.0, |
|
"step": 11510 |
|
}, |
|
{ |
|
"epoch": 4.212065813528336, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.727605118829982e-06, |
|
"loss": 0.0, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 4.2157221206581355, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.705667276051189e-06, |
|
"loss": 0.0, |
|
"step": 11530 |
|
}, |
|
{ |
|
"epoch": 4.219378427787934, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.683729433272396e-06, |
|
"loss": 0.0, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 4.223034734917733, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.661791590493601e-06, |
|
"loss": 0.0, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 4.226691042047532, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.639853747714808e-06, |
|
"loss": 0.0, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 4.230347349177331, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.617915904936014e-06, |
|
"loss": 0.0, |
|
"step": 11570 |
|
}, |
|
{ |
|
"epoch": 4.23400365630713, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.595978062157221e-06, |
|
"loss": 0.0, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 4.237659963436928, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.574040219378428e-06, |
|
"loss": 0.0, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 4.2413162705667276, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.552102376599634e-06, |
|
"loss": 0.0, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 4.244972577696527, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.530164533820841e-06, |
|
"loss": 0.0, |
|
"step": 11610 |
|
}, |
|
{ |
|
"epoch": 4.248628884826325, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.508226691042047e-06, |
|
"loss": 0.0, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 4.252285191956124, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.486288848263254e-06, |
|
"loss": 0.0, |
|
"step": 11630 |
|
}, |
|
{ |
|
"epoch": 4.255941499085923, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.464351005484461e-06, |
|
"loss": 0.0, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 4.259597806215722, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.4424131627056675e-06, |
|
"loss": 0.0, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 4.263254113345521, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.4204753199268745e-06, |
|
"loss": 0.0, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 4.26691042047532, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.39853747714808e-06, |
|
"loss": 0.0, |
|
"step": 11670 |
|
}, |
|
{ |
|
"epoch": 4.270566727605119, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.376599634369287e-06, |
|
"loss": 0.0, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 4.274223034734918, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.354661791590494e-06, |
|
"loss": 0.0, |
|
"step": 11690 |
|
}, |
|
{ |
|
"epoch": 4.2778793418647165, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.3327239488117e-06, |
|
"loss": 0.0, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 4.281535648994516, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.310786106032907e-06, |
|
"loss": 0.0, |
|
"step": 11710 |
|
}, |
|
{ |
|
"epoch": 4.285191956124314, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.288848263254113e-06, |
|
"loss": 0.0, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 4.288848263254113, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.26691042047532e-06, |
|
"loss": 0.0, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 4.292504570383913, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.244972577696527e-06, |
|
"loss": 0.0, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 4.296160877513711, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.223034734917733e-06, |
|
"loss": 0.0, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 4.29981718464351, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.20109689213894e-06, |
|
"loss": 0.0, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 4.303473491773309, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.179159049360146e-06, |
|
"loss": 0.0, |
|
"step": 11770 |
|
}, |
|
{ |
|
"epoch": 4.307129798903108, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.157221206581353e-06, |
|
"loss": 0.0, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 4.310786106032907, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.135283363802559e-06, |
|
"loss": 0.0, |
|
"step": 11790 |
|
}, |
|
{ |
|
"epoch": 4.3144424131627055, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.1133455210237655e-06, |
|
"loss": 0.0, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 4.318098720292505, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.0914076782449725e-06, |
|
"loss": 0.0, |
|
"step": 11810 |
|
}, |
|
{ |
|
"epoch": 4.321755027422303, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.0694698354661795e-06, |
|
"loss": 0.0, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 4.325411334552102, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.047531992687386e-06, |
|
"loss": 0.0, |
|
"step": 11830 |
|
}, |
|
{ |
|
"epoch": 4.329067641681902, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.025594149908593e-06, |
|
"loss": 0.0, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 4.3327239488117, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.003656307129799e-06, |
|
"loss": 0.0, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 4.336380255941499, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.981718464351006e-06, |
|
"loss": 0.0, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 4.340036563071298, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.959780621572213e-06, |
|
"loss": 0.0, |
|
"step": 11870 |
|
}, |
|
{ |
|
"epoch": 4.343692870201097, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.937842778793419e-06, |
|
"loss": 0.0, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 4.347349177330896, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.915904936014626e-06, |
|
"loss": 0.0, |
|
"step": 11890 |
|
}, |
|
{ |
|
"epoch": 4.3510054844606945, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.893967093235831e-06, |
|
"loss": 0.0, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 4.354661791590494, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.872029250457038e-06, |
|
"loss": 0.0, |
|
"step": 11910 |
|
}, |
|
{ |
|
"epoch": 4.358318098720293, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.850091407678245e-06, |
|
"loss": 0.0, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 4.361974405850091, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.828153564899451e-06, |
|
"loss": 0.0, |
|
"step": 11930 |
|
}, |
|
{ |
|
"epoch": 4.365630712979891, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.8062157221206583e-06, |
|
"loss": 0.0, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 4.369287020109689, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.784277879341865e-06, |
|
"loss": 0.0, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 4.372943327239488, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.7623400365630714e-06, |
|
"loss": 0.0, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 4.376599634369287, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.740402193784278e-06, |
|
"loss": 0.0, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 4.380255941499086, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.7184643510054846e-06, |
|
"loss": 0.0, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 4.383912248628885, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.696526508226691e-06, |
|
"loss": 0.0, |
|
"step": 11990 |
|
}, |
|
{ |
|
"epoch": 4.387568555758683, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.6745886654478977e-06, |
|
"loss": 0.0, |
|
"step": 12000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 13675, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 2000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|