|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.999336957963135, |
|
"eval_steps": 500, |
|
"global_step": 9425, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005304336294921098, |
|
"grad_norm": 3.020922899246216, |
|
"learning_rate": 2.1208907741251328e-07, |
|
"loss": 0.3096, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010608672589842195, |
|
"grad_norm": 2.9058778285980225, |
|
"learning_rate": 4.2417815482502656e-07, |
|
"loss": 0.2989, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.015913008884763293, |
|
"grad_norm": 3.3067986965179443, |
|
"learning_rate": 6.362672322375398e-07, |
|
"loss": 0.2989, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02121734517968439, |
|
"grad_norm": 3.3193933963775635, |
|
"learning_rate": 8.483563096500531e-07, |
|
"loss": 0.3077, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02652168147460549, |
|
"grad_norm": 3.428807497024536, |
|
"learning_rate": 1.0604453870625663e-06, |
|
"loss": 0.3117, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.031826017769526586, |
|
"grad_norm": 3.1738169193267822, |
|
"learning_rate": 1.2725344644750796e-06, |
|
"loss": 0.307, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03713035406444769, |
|
"grad_norm": 2.9309284687042236, |
|
"learning_rate": 1.4846235418875928e-06, |
|
"loss": 0.3017, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04243469035936878, |
|
"grad_norm": 3.575754165649414, |
|
"learning_rate": 1.6967126193001062e-06, |
|
"loss": 0.2868, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04773902665428988, |
|
"grad_norm": 2.9849185943603516, |
|
"learning_rate": 1.9088016967126195e-06, |
|
"loss": 0.3072, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05304336294921098, |
|
"grad_norm": 3.3160886764526367, |
|
"learning_rate": 2.1208907741251327e-06, |
|
"loss": 0.3127, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05834769924413208, |
|
"grad_norm": 3.4454386234283447, |
|
"learning_rate": 2.332979851537646e-06, |
|
"loss": 0.3105, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06365203553905317, |
|
"grad_norm": 3.1443614959716797, |
|
"learning_rate": 2.545068928950159e-06, |
|
"loss": 0.3011, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06895637183397427, |
|
"grad_norm": 3.260246515274048, |
|
"learning_rate": 2.7571580063626724e-06, |
|
"loss": 0.2922, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07426070812889538, |
|
"grad_norm": 3.3770997524261475, |
|
"learning_rate": 2.9692470837751856e-06, |
|
"loss": 0.2962, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07956504442381647, |
|
"grad_norm": 3.2429087162017822, |
|
"learning_rate": 3.1813361611876992e-06, |
|
"loss": 0.3195, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08486938071873756, |
|
"grad_norm": 2.878188371658325, |
|
"learning_rate": 3.3934252386002125e-06, |
|
"loss": 0.3045, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09017371701365867, |
|
"grad_norm": 3.4501426219940186, |
|
"learning_rate": 3.6055143160127253e-06, |
|
"loss": 0.2964, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09547805330857977, |
|
"grad_norm": 3.384909152984619, |
|
"learning_rate": 3.817603393425239e-06, |
|
"loss": 0.2967, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.10078238960350086, |
|
"grad_norm": 3.2439138889312744, |
|
"learning_rate": 4.029692470837753e-06, |
|
"loss": 0.3126, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.10608672589842195, |
|
"grad_norm": 3.6719398498535156, |
|
"learning_rate": 4.241781548250265e-06, |
|
"loss": 0.3142, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11139106219334306, |
|
"grad_norm": 3.7998712062835693, |
|
"learning_rate": 4.453870625662779e-06, |
|
"loss": 0.3003, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.11669539848826416, |
|
"grad_norm": 3.800631523132324, |
|
"learning_rate": 4.665959703075292e-06, |
|
"loss": 0.3009, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12199973478318525, |
|
"grad_norm": 3.281419038772583, |
|
"learning_rate": 4.8780487804878055e-06, |
|
"loss": 0.3126, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.12730407107810635, |
|
"grad_norm": 3.1733639240264893, |
|
"learning_rate": 5.090137857900318e-06, |
|
"loss": 0.313, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13260840737302745, |
|
"grad_norm": 3.4202017784118652, |
|
"learning_rate": 5.302226935312832e-06, |
|
"loss": 0.317, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.13791274366794853, |
|
"grad_norm": 3.541494369506836, |
|
"learning_rate": 5.514316012725345e-06, |
|
"loss": 0.3093, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14321707996286964, |
|
"grad_norm": 3.3184351921081543, |
|
"learning_rate": 5.726405090137858e-06, |
|
"loss": 0.3197, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.14852141625779075, |
|
"grad_norm": 3.494412660598755, |
|
"learning_rate": 5.938494167550371e-06, |
|
"loss": 0.3238, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15382575255271183, |
|
"grad_norm": 3.590353488922119, |
|
"learning_rate": 6.150583244962884e-06, |
|
"loss": 0.3147, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.15913008884763294, |
|
"grad_norm": 4.020939350128174, |
|
"learning_rate": 6.3626723223753985e-06, |
|
"loss": 0.3161, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16443442514255405, |
|
"grad_norm": 3.1689860820770264, |
|
"learning_rate": 6.574761399787911e-06, |
|
"loss": 0.3161, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.16973876143747513, |
|
"grad_norm": 3.292384386062622, |
|
"learning_rate": 6.786850477200425e-06, |
|
"loss": 0.3076, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.17504309773239624, |
|
"grad_norm": 3.379220962524414, |
|
"learning_rate": 6.998939554612938e-06, |
|
"loss": 0.3119, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.18034743402731734, |
|
"grad_norm": 3.823171377182007, |
|
"learning_rate": 7.2110286320254506e-06, |
|
"loss": 0.3157, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18565177032223842, |
|
"grad_norm": 3.715949058532715, |
|
"learning_rate": 7.423117709437965e-06, |
|
"loss": 0.3354, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.19095610661715953, |
|
"grad_norm": 3.638728141784668, |
|
"learning_rate": 7.635206786850478e-06, |
|
"loss": 0.3333, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1962604429120806, |
|
"grad_norm": 3.572036027908325, |
|
"learning_rate": 7.847295864262992e-06, |
|
"loss": 0.336, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.20156477920700172, |
|
"grad_norm": 3.472360610961914, |
|
"learning_rate": 8.059384941675505e-06, |
|
"loss": 0.333, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.20686911550192283, |
|
"grad_norm": 3.5541131496429443, |
|
"learning_rate": 8.271474019088017e-06, |
|
"loss": 0.3476, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2121734517968439, |
|
"grad_norm": 3.7231061458587646, |
|
"learning_rate": 8.48356309650053e-06, |
|
"loss": 0.3391, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21747778809176502, |
|
"grad_norm": 3.7742016315460205, |
|
"learning_rate": 8.695652173913044e-06, |
|
"loss": 0.3374, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.22278212438668613, |
|
"grad_norm": 4.2606892585754395, |
|
"learning_rate": 8.907741251325558e-06, |
|
"loss": 0.3653, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.2280864606816072, |
|
"grad_norm": 4.846815586090088, |
|
"learning_rate": 9.11983032873807e-06, |
|
"loss": 0.3342, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.23339079697652831, |
|
"grad_norm": 3.6985626220703125, |
|
"learning_rate": 9.331919406150584e-06, |
|
"loss": 0.3438, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.23869513327144942, |
|
"grad_norm": 3.6264142990112305, |
|
"learning_rate": 9.544008483563097e-06, |
|
"loss": 0.3692, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.2439994695663705, |
|
"grad_norm": 3.990809440612793, |
|
"learning_rate": 9.756097560975611e-06, |
|
"loss": 0.3545, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.2493038058612916, |
|
"grad_norm": 3.816340208053589, |
|
"learning_rate": 9.968186638388125e-06, |
|
"loss": 0.3347, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.2546081421562127, |
|
"grad_norm": 4.031066417694092, |
|
"learning_rate": 1.0180275715800637e-05, |
|
"loss": 0.3554, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2599124784511338, |
|
"grad_norm": 3.9148292541503906, |
|
"learning_rate": 1.039236479321315e-05, |
|
"loss": 0.3644, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2652168147460549, |
|
"grad_norm": 3.6338350772857666, |
|
"learning_rate": 1.0604453870625664e-05, |
|
"loss": 0.3755, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.270521151040976, |
|
"grad_norm": 3.52591872215271, |
|
"learning_rate": 1.0816542948038178e-05, |
|
"loss": 0.3513, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.27582548733589707, |
|
"grad_norm": 4.284359455108643, |
|
"learning_rate": 1.102863202545069e-05, |
|
"loss": 0.3726, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.2811298236308182, |
|
"grad_norm": 3.469064712524414, |
|
"learning_rate": 1.1240721102863203e-05, |
|
"loss": 0.3705, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.2864341599257393, |
|
"grad_norm": 3.983943223953247, |
|
"learning_rate": 1.1452810180275717e-05, |
|
"loss": 0.3772, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.2917384962206604, |
|
"grad_norm": 4.600942134857178, |
|
"learning_rate": 1.1664899257688229e-05, |
|
"loss": 0.3685, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.2970428325155815, |
|
"grad_norm": 3.9568793773651123, |
|
"learning_rate": 1.1876988335100742e-05, |
|
"loss": 0.3545, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.3023471688105026, |
|
"grad_norm": 4.284022808074951, |
|
"learning_rate": 1.2089077412513258e-05, |
|
"loss": 0.3806, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.30765150510542366, |
|
"grad_norm": 3.7012698650360107, |
|
"learning_rate": 1.2301166489925768e-05, |
|
"loss": 0.3717, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.31295584140034477, |
|
"grad_norm": 3.5977935791015625, |
|
"learning_rate": 1.2513255567338283e-05, |
|
"loss": 0.3741, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.3182601776952659, |
|
"grad_norm": 4.306045055389404, |
|
"learning_rate": 1.2725344644750797e-05, |
|
"loss": 0.3774, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.323564513990187, |
|
"grad_norm": 4.045034408569336, |
|
"learning_rate": 1.293743372216331e-05, |
|
"loss": 0.3828, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.3288688502851081, |
|
"grad_norm": 3.6002280712127686, |
|
"learning_rate": 1.3149522799575823e-05, |
|
"loss": 0.3613, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.33417318658002915, |
|
"grad_norm": 3.792759418487549, |
|
"learning_rate": 1.3361611876988336e-05, |
|
"loss": 0.3916, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.33947752287495025, |
|
"grad_norm": 4.016223907470703, |
|
"learning_rate": 1.357370095440085e-05, |
|
"loss": 0.3832, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.34478185916987136, |
|
"grad_norm": 3.6611733436584473, |
|
"learning_rate": 1.3785790031813362e-05, |
|
"loss": 0.3726, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.35008619546479247, |
|
"grad_norm": 4.31847620010376, |
|
"learning_rate": 1.3997879109225876e-05, |
|
"loss": 0.4027, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.3553905317597136, |
|
"grad_norm": 3.5145909786224365, |
|
"learning_rate": 1.4209968186638389e-05, |
|
"loss": 0.4077, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.3606948680546347, |
|
"grad_norm": 4.157687664031982, |
|
"learning_rate": 1.4422057264050901e-05, |
|
"loss": 0.3887, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.36599920434955574, |
|
"grad_norm": 3.945263624191284, |
|
"learning_rate": 1.4634146341463415e-05, |
|
"loss": 0.4042, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.37130354064447685, |
|
"grad_norm": 4.426787853240967, |
|
"learning_rate": 1.484623541887593e-05, |
|
"loss": 0.4188, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.37660787693939796, |
|
"grad_norm": 3.5950045585632324, |
|
"learning_rate": 1.5058324496288444e-05, |
|
"loss": 0.4235, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.38191221323431906, |
|
"grad_norm": 3.7992334365844727, |
|
"learning_rate": 1.5270413573700956e-05, |
|
"loss": 0.3891, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.3872165495292402, |
|
"grad_norm": 4.29195499420166, |
|
"learning_rate": 1.548250265111347e-05, |
|
"loss": 0.4039, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.3925208858241612, |
|
"grad_norm": 4.078744411468506, |
|
"learning_rate": 1.5694591728525983e-05, |
|
"loss": 0.4045, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.39782522211908233, |
|
"grad_norm": 4.213825702667236, |
|
"learning_rate": 1.5906680805938493e-05, |
|
"loss": 0.428, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.40312955841400344, |
|
"grad_norm": 3.6432559490203857, |
|
"learning_rate": 1.611876988335101e-05, |
|
"loss": 0.3985, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.40843389470892455, |
|
"grad_norm": 3.458439826965332, |
|
"learning_rate": 1.6330858960763524e-05, |
|
"loss": 0.4257, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.41373823100384566, |
|
"grad_norm": 3.8959126472473145, |
|
"learning_rate": 1.6542948038176034e-05, |
|
"loss": 0.4293, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.41904256729876677, |
|
"grad_norm": 4.051570415496826, |
|
"learning_rate": 1.6755037115588548e-05, |
|
"loss": 0.4293, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.4243469035936878, |
|
"grad_norm": 3.807042121887207, |
|
"learning_rate": 1.696712619300106e-05, |
|
"loss": 0.4263, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.4296512398886089, |
|
"grad_norm": 3.8195431232452393, |
|
"learning_rate": 1.7179215270413575e-05, |
|
"loss": 0.4341, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.43495557618353003, |
|
"grad_norm": 3.7309257984161377, |
|
"learning_rate": 1.739130434782609e-05, |
|
"loss": 0.4238, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.44025991247845114, |
|
"grad_norm": 3.7905941009521484, |
|
"learning_rate": 1.7603393425238602e-05, |
|
"loss": 0.423, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.44556424877337225, |
|
"grad_norm": 3.9321465492248535, |
|
"learning_rate": 1.7815482502651116e-05, |
|
"loss": 0.4333, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.4508685850682933, |
|
"grad_norm": 4.208277702331543, |
|
"learning_rate": 1.8027571580063626e-05, |
|
"loss": 0.4355, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.4561729213632144, |
|
"grad_norm": 4.083523273468018, |
|
"learning_rate": 1.823966065747614e-05, |
|
"loss": 0.4345, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.4614772576581355, |
|
"grad_norm": 4.052921772003174, |
|
"learning_rate": 1.8451749734888657e-05, |
|
"loss": 0.4345, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.46678159395305663, |
|
"grad_norm": 3.9257407188415527, |
|
"learning_rate": 1.8663838812301167e-05, |
|
"loss": 0.438, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.47208593024797774, |
|
"grad_norm": 3.8625378608703613, |
|
"learning_rate": 1.887592788971368e-05, |
|
"loss": 0.4479, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.47739026654289884, |
|
"grad_norm": 3.6812710762023926, |
|
"learning_rate": 1.9088016967126195e-05, |
|
"loss": 0.4728, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.4826946028378199, |
|
"grad_norm": 3.700044631958008, |
|
"learning_rate": 1.9300106044538708e-05, |
|
"loss": 0.4685, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.487998939132741, |
|
"grad_norm": 3.957547187805176, |
|
"learning_rate": 1.9512195121951222e-05, |
|
"loss": 0.4518, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.4933032754276621, |
|
"grad_norm": 4.383725643157959, |
|
"learning_rate": 1.9724284199363736e-05, |
|
"loss": 0.4822, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.4986076117225832, |
|
"grad_norm": 3.6373391151428223, |
|
"learning_rate": 1.993637327677625e-05, |
|
"loss": 0.4518, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.5039119480175043, |
|
"grad_norm": 3.9900004863739014, |
|
"learning_rate": 1.9999966389958385e-05, |
|
"loss": 0.4512, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5092162843124254, |
|
"grad_norm": 3.9511730670928955, |
|
"learning_rate": 1.9999801769890262e-05, |
|
"loss": 0.4611, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.5145206206073465, |
|
"grad_norm": 4.0564751625061035, |
|
"learning_rate": 1.9999499968778183e-05, |
|
"loss": 0.4532, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.5198249569022676, |
|
"grad_norm": 4.041463375091553, |
|
"learning_rate": 1.999906099076237e-05, |
|
"loss": 0.4699, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.5251292931971887, |
|
"grad_norm": 4.434980869293213, |
|
"learning_rate": 1.9998484841864885e-05, |
|
"loss": 0.4849, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.5304336294921098, |
|
"grad_norm": 3.809227228164673, |
|
"learning_rate": 1.999777152998956e-05, |
|
"loss": 0.4654, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5357379657870309, |
|
"grad_norm": 3.6580355167388916, |
|
"learning_rate": 1.999692106492187e-05, |
|
"loss": 0.4694, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.541042302081952, |
|
"grad_norm": 4.149374485015869, |
|
"learning_rate": 1.9995933458328816e-05, |
|
"loss": 0.4854, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.5463466383768731, |
|
"grad_norm": 4.361349105834961, |
|
"learning_rate": 1.999480872375876e-05, |
|
"loss": 0.4966, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.5516509746717941, |
|
"grad_norm": 4.03767728805542, |
|
"learning_rate": 1.999354687664123e-05, |
|
"loss": 0.4779, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.5569553109667152, |
|
"grad_norm": 3.866338014602661, |
|
"learning_rate": 1.9992147934286726e-05, |
|
"loss": 0.4843, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.5622596472616364, |
|
"grad_norm": 3.858006238937378, |
|
"learning_rate": 1.999061191588646e-05, |
|
"loss": 0.4805, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.5675639835565575, |
|
"grad_norm": 4.386377811431885, |
|
"learning_rate": 1.9988938842512117e-05, |
|
"loss": 0.4837, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.5728683198514786, |
|
"grad_norm": 3.8602404594421387, |
|
"learning_rate": 1.998712873711554e-05, |
|
"loss": 0.4861, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.5781726561463997, |
|
"grad_norm": 3.921699047088623, |
|
"learning_rate": 1.9985181624528435e-05, |
|
"loss": 0.4724, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.5834769924413208, |
|
"grad_norm": 4.1873884201049805, |
|
"learning_rate": 1.998309753146203e-05, |
|
"loss": 0.4928, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5887813287362419, |
|
"grad_norm": 4.134608745574951, |
|
"learning_rate": 1.99808764865067e-05, |
|
"loss": 0.4961, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.594085665031163, |
|
"grad_norm": 4.108495712280273, |
|
"learning_rate": 1.9978518520131574e-05, |
|
"loss": 0.5054, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.5993900013260841, |
|
"grad_norm": 4.025395393371582, |
|
"learning_rate": 1.9976023664684114e-05, |
|
"loss": 0.4999, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.6046943376210052, |
|
"grad_norm": 4.111999034881592, |
|
"learning_rate": 1.9973391954389697e-05, |
|
"loss": 0.4848, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6099986739159262, |
|
"grad_norm": 3.806658983230591, |
|
"learning_rate": 1.997062342535111e-05, |
|
"loss": 0.4896, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6153030102108473, |
|
"grad_norm": 3.9219367504119873, |
|
"learning_rate": 1.996771811554808e-05, |
|
"loss": 0.4868, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.6206073465057684, |
|
"grad_norm": 4.265909194946289, |
|
"learning_rate": 1.9964676064836733e-05, |
|
"loss": 0.4984, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.6259116828006895, |
|
"grad_norm": 3.8776512145996094, |
|
"learning_rate": 1.996149731494907e-05, |
|
"loss": 0.4891, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.6312160190956106, |
|
"grad_norm": 4.08100700378418, |
|
"learning_rate": 1.995818190949238e-05, |
|
"loss": 0.4922, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.6365203553905318, |
|
"grad_norm": 3.563994884490967, |
|
"learning_rate": 1.995472989394864e-05, |
|
"loss": 0.4939, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6418246916854529, |
|
"grad_norm": 4.029845714569092, |
|
"learning_rate": 1.9951141315673897e-05, |
|
"loss": 0.4856, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.647129027980374, |
|
"grad_norm": 3.762803554534912, |
|
"learning_rate": 1.9947416223897624e-05, |
|
"loss": 0.4912, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.6524333642752951, |
|
"grad_norm": 4.067233562469482, |
|
"learning_rate": 1.9943554669722027e-05, |
|
"loss": 0.4884, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.6577377005702162, |
|
"grad_norm": 3.9958083629608154, |
|
"learning_rate": 1.993955670612136e-05, |
|
"loss": 0.4926, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.6630420368651373, |
|
"grad_norm": 4.066989421844482, |
|
"learning_rate": 1.9935422387941194e-05, |
|
"loss": 0.5063, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.6683463731600583, |
|
"grad_norm": 4.203117370605469, |
|
"learning_rate": 1.9931151771897658e-05, |
|
"loss": 0.4957, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.6736507094549794, |
|
"grad_norm": 3.6308250427246094, |
|
"learning_rate": 1.9926744916576674e-05, |
|
"loss": 0.4939, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.6789550457499005, |
|
"grad_norm": 4.011183738708496, |
|
"learning_rate": 1.992220188243314e-05, |
|
"loss": 0.5002, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.6842593820448216, |
|
"grad_norm": 4.1181840896606445, |
|
"learning_rate": 1.991752273179011e-05, |
|
"loss": 0.5035, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.6895637183397427, |
|
"grad_norm": 3.892821788787842, |
|
"learning_rate": 1.9912707528837935e-05, |
|
"loss": 0.5061, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6948680546346638, |
|
"grad_norm": 3.6360907554626465, |
|
"learning_rate": 1.990775633963337e-05, |
|
"loss": 0.5024, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.7001723909295849, |
|
"grad_norm": 3.6487557888031006, |
|
"learning_rate": 1.9902669232098707e-05, |
|
"loss": 0.511, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.705476727224506, |
|
"grad_norm": 3.6783857345581055, |
|
"learning_rate": 1.989744627602079e-05, |
|
"loss": 0.5203, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7107810635194272, |
|
"grad_norm": 4.257678508758545, |
|
"learning_rate": 1.9892087543050102e-05, |
|
"loss": 0.4958, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.7160853998143483, |
|
"grad_norm": 4.080876350402832, |
|
"learning_rate": 1.988659310669976e-05, |
|
"loss": 0.5152, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.7213897361092694, |
|
"grad_norm": 3.8536195755004883, |
|
"learning_rate": 1.9880963042344502e-05, |
|
"loss": 0.4987, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.7266940724041904, |
|
"grad_norm": 3.5171945095062256, |
|
"learning_rate": 1.987519742721968e-05, |
|
"loss": 0.5109, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.7319984086991115, |
|
"grad_norm": 3.6282453536987305, |
|
"learning_rate": 1.9869296340420162e-05, |
|
"loss": 0.5077, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.7373027449940326, |
|
"grad_norm": 3.358875274658203, |
|
"learning_rate": 1.9863259862899285e-05, |
|
"loss": 0.5084, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.7426070812889537, |
|
"grad_norm": 3.6957879066467285, |
|
"learning_rate": 1.9857088077467713e-05, |
|
"loss": 0.5272, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.7479114175838748, |
|
"grad_norm": 3.5185537338256836, |
|
"learning_rate": 1.9850781068792327e-05, |
|
"loss": 0.5016, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.7532157538787959, |
|
"grad_norm": 3.4131875038146973, |
|
"learning_rate": 1.9844338923395044e-05, |
|
"loss": 0.491, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.758520090173717, |
|
"grad_norm": 4.015808582305908, |
|
"learning_rate": 1.9837761729651635e-05, |
|
"loss": 0.5011, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.7638244264686381, |
|
"grad_norm": 4.074829578399658, |
|
"learning_rate": 1.9831049577790526e-05, |
|
"loss": 0.5272, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.7691287627635592, |
|
"grad_norm": 3.5524649620056152, |
|
"learning_rate": 1.9824202559891534e-05, |
|
"loss": 0.5363, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.7744330990584803, |
|
"grad_norm": 3.5700480937957764, |
|
"learning_rate": 1.9817220769884636e-05, |
|
"loss": 0.5078, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.7797374353534015, |
|
"grad_norm": 3.697791337966919, |
|
"learning_rate": 1.981010430354865e-05, |
|
"loss": 0.5136, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.7850417716483225, |
|
"grad_norm": 3.734912633895874, |
|
"learning_rate": 1.9802853258509937e-05, |
|
"loss": 0.5108, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.7903461079432436, |
|
"grad_norm": 3.951476812362671, |
|
"learning_rate": 1.9795467734241068e-05, |
|
"loss": 0.5172, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.7956504442381647, |
|
"grad_norm": 3.73142671585083, |
|
"learning_rate": 1.9787947832059437e-05, |
|
"loss": 0.5052, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8009547805330858, |
|
"grad_norm": 3.8046658039093018, |
|
"learning_rate": 1.97802936551259e-05, |
|
"loss": 0.4869, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.8062591168280069, |
|
"grad_norm": 3.8006317615509033, |
|
"learning_rate": 1.9772505308443332e-05, |
|
"loss": 0.5227, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.811563453122928, |
|
"grad_norm": 4.094967842102051, |
|
"learning_rate": 1.9764582898855203e-05, |
|
"loss": 0.5027, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.8168677894178491, |
|
"grad_norm": 3.843932628631592, |
|
"learning_rate": 1.975652653504411e-05, |
|
"loss": 0.5099, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.8221721257127702, |
|
"grad_norm": 3.7942137718200684, |
|
"learning_rate": 1.9748336327530287e-05, |
|
"loss": 0.5271, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.8274764620076913, |
|
"grad_norm": 3.671407699584961, |
|
"learning_rate": 1.9740012388670077e-05, |
|
"loss": 0.515, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.8327807983026124, |
|
"grad_norm": 3.8499674797058105, |
|
"learning_rate": 1.9731554832654414e-05, |
|
"loss": 0.5197, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.8380851345975335, |
|
"grad_norm": 3.9028103351593018, |
|
"learning_rate": 1.9722963775507225e-05, |
|
"loss": 0.5098, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.8433894708924545, |
|
"grad_norm": 3.6346793174743652, |
|
"learning_rate": 1.971423933508387e-05, |
|
"loss": 0.5, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.8486938071873756, |
|
"grad_norm": 3.3686752319335938, |
|
"learning_rate": 1.9705381631069508e-05, |
|
"loss": 0.5078, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.8539981434822967, |
|
"grad_norm": 3.9609599113464355, |
|
"learning_rate": 1.9696390784977453e-05, |
|
"loss": 0.4996, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.8593024797772179, |
|
"grad_norm": 3.7242376804351807, |
|
"learning_rate": 1.9687266920147517e-05, |
|
"loss": 0.5064, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.864606816072139, |
|
"grad_norm": 3.655386209487915, |
|
"learning_rate": 1.967801016174431e-05, |
|
"loss": 0.5239, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.8699111523670601, |
|
"grad_norm": 3.86031436920166, |
|
"learning_rate": 1.9668620636755525e-05, |
|
"loss": 0.5372, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.8752154886619812, |
|
"grad_norm": 3.772238254547119, |
|
"learning_rate": 1.965909847399021e-05, |
|
"loss": 0.5121, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.8805198249569023, |
|
"grad_norm": 4.217292785644531, |
|
"learning_rate": 1.9649443804076962e-05, |
|
"loss": 0.5204, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.8858241612518234, |
|
"grad_norm": 3.409078359603882, |
|
"learning_rate": 1.9639656759462186e-05, |
|
"loss": 0.5083, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.8911284975467445, |
|
"grad_norm": 3.6432178020477295, |
|
"learning_rate": 1.962973747440824e-05, |
|
"loss": 0.5179, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.8964328338416656, |
|
"grad_norm": 4.103431701660156, |
|
"learning_rate": 1.961968608499161e-05, |
|
"loss": 0.5162, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.9017371701365866, |
|
"grad_norm": 4.007205009460449, |
|
"learning_rate": 1.9609502729101043e-05, |
|
"loss": 0.5192, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.9070415064315077, |
|
"grad_norm": 3.750261068344116, |
|
"learning_rate": 1.959918754643564e-05, |
|
"loss": 0.5089, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.9123458427264288, |
|
"grad_norm": 3.946521043777466, |
|
"learning_rate": 1.9588740678502963e-05, |
|
"loss": 0.5104, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.9176501790213499, |
|
"grad_norm": 3.7120089530944824, |
|
"learning_rate": 1.957816226861708e-05, |
|
"loss": 0.5117, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.922954515316271, |
|
"grad_norm": 3.9578349590301514, |
|
"learning_rate": 1.956745246189659e-05, |
|
"loss": 0.5057, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.9282588516111921, |
|
"grad_norm": 3.6276957988739014, |
|
"learning_rate": 1.9556611405262665e-05, |
|
"loss": 0.5168, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.9335631879061133, |
|
"grad_norm": 3.7194759845733643, |
|
"learning_rate": 1.954563924743699e-05, |
|
"loss": 0.527, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.9388675242010344, |
|
"grad_norm": 3.7220964431762695, |
|
"learning_rate": 1.953453613893976e-05, |
|
"loss": 0.516, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.9441718604959555, |
|
"grad_norm": 3.6736011505126953, |
|
"learning_rate": 1.9523302232087592e-05, |
|
"loss": 0.5347, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.9494761967908766, |
|
"grad_norm": 3.3162753582000732, |
|
"learning_rate": 1.951193768099145e-05, |
|
"loss": 0.5142, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.9547805330857977, |
|
"grad_norm": 4.169299602508545, |
|
"learning_rate": 1.9500442641554523e-05, |
|
"loss": 0.5339, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9600848693807187, |
|
"grad_norm": 3.8769702911376953, |
|
"learning_rate": 1.9488817271470087e-05, |
|
"loss": 0.5086, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.9653892056756398, |
|
"grad_norm": 3.482774496078491, |
|
"learning_rate": 1.9477061730219345e-05, |
|
"loss": 0.5216, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.9706935419705609, |
|
"grad_norm": 4.0699028968811035, |
|
"learning_rate": 1.9465176179069235e-05, |
|
"loss": 0.5136, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.975997878265482, |
|
"grad_norm": 3.416879177093506, |
|
"learning_rate": 1.9453160781070222e-05, |
|
"loss": 0.5161, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.9813022145604031, |
|
"grad_norm": 3.698963165283203, |
|
"learning_rate": 1.9441015701054056e-05, |
|
"loss": 0.5084, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.9866065508553242, |
|
"grad_norm": 3.572234630584717, |
|
"learning_rate": 1.9428741105631515e-05, |
|
"loss": 0.5304, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.9919108871502453, |
|
"grad_norm": 3.557875394821167, |
|
"learning_rate": 1.9416337163190123e-05, |
|
"loss": 0.5258, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.9972152234451664, |
|
"grad_norm": 3.5830304622650146, |
|
"learning_rate": 1.9403804043891824e-05, |
|
"loss": 0.5101, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.0025195597400876, |
|
"grad_norm": 3.1445114612579346, |
|
"learning_rate": 1.9391141919670668e-05, |
|
"loss": 0.4397, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.0078238960350085, |
|
"grad_norm": 4.033489227294922, |
|
"learning_rate": 1.9378350964230442e-05, |
|
"loss": 0.2934, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.0131282323299298, |
|
"grad_norm": 3.4734363555908203, |
|
"learning_rate": 1.9365431353042283e-05, |
|
"loss": 0.299, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.0184325686248508, |
|
"grad_norm": 3.221336841583252, |
|
"learning_rate": 1.9352383263342284e-05, |
|
"loss": 0.285, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.023736904919772, |
|
"grad_norm": 3.4902710914611816, |
|
"learning_rate": 1.9339206874129043e-05, |
|
"loss": 0.304, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.029041241214693, |
|
"grad_norm": 3.831965684890747, |
|
"learning_rate": 1.932590236616123e-05, |
|
"loss": 0.2964, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.0343455775096142, |
|
"grad_norm": 3.539476156234741, |
|
"learning_rate": 1.9312469921955092e-05, |
|
"loss": 0.3061, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.0396499138045352, |
|
"grad_norm": 3.1916446685791016, |
|
"learning_rate": 1.9298909725781957e-05, |
|
"loss": 0.2949, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.0449542500994564, |
|
"grad_norm": 3.2898848056793213, |
|
"learning_rate": 1.9285221963665695e-05, |
|
"loss": 0.2918, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.0502585863943774, |
|
"grad_norm": 3.646435260772705, |
|
"learning_rate": 1.927140682338018e-05, |
|
"loss": 0.3051, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.0555629226892984, |
|
"grad_norm": 3.4552927017211914, |
|
"learning_rate": 1.9257464494446702e-05, |
|
"loss": 0.31, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.0608672589842196, |
|
"grad_norm": 3.9061553478240967, |
|
"learning_rate": 1.924339516813138e-05, |
|
"loss": 0.299, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0661715952791406, |
|
"grad_norm": 3.764522075653076, |
|
"learning_rate": 1.922919903744253e-05, |
|
"loss": 0.304, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.0714759315740618, |
|
"grad_norm": 3.577147960662842, |
|
"learning_rate": 1.9214876297128007e-05, |
|
"loss": 0.3022, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.0767802678689828, |
|
"grad_norm": 3.5394883155822754, |
|
"learning_rate": 1.9200427143672557e-05, |
|
"loss": 0.3077, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.082084604163904, |
|
"grad_norm": 3.72452712059021, |
|
"learning_rate": 1.918585177529511e-05, |
|
"loss": 0.3006, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.087388940458825, |
|
"grad_norm": 3.8295013904571533, |
|
"learning_rate": 1.9171150391946045e-05, |
|
"loss": 0.3118, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.0926932767537463, |
|
"grad_norm": 3.4648563861846924, |
|
"learning_rate": 1.9156323195304477e-05, |
|
"loss": 0.3059, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.0979976130486673, |
|
"grad_norm": 3.20479416847229, |
|
"learning_rate": 1.914137038877547e-05, |
|
"loss": 0.2892, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.1033019493435883, |
|
"grad_norm": 3.4433279037475586, |
|
"learning_rate": 1.9126292177487248e-05, |
|
"loss": 0.287, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.1086062856385095, |
|
"grad_norm": 3.7677085399627686, |
|
"learning_rate": 1.911108876828839e-05, |
|
"loss": 0.3007, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.1139106219334305, |
|
"grad_norm": 3.7692863941192627, |
|
"learning_rate": 1.9095760369744987e-05, |
|
"loss": 0.2983, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.1192149582283517, |
|
"grad_norm": 3.512312650680542, |
|
"learning_rate": 1.9080307192137776e-05, |
|
"loss": 0.3138, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.1245192945232727, |
|
"grad_norm": 3.7949514389038086, |
|
"learning_rate": 1.906472944745926e-05, |
|
"loss": 0.3279, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.129823630818194, |
|
"grad_norm": 3.4511559009552, |
|
"learning_rate": 1.9049027349410812e-05, |
|
"loss": 0.2923, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.135127967113115, |
|
"grad_norm": 3.5923140048980713, |
|
"learning_rate": 1.9033201113399713e-05, |
|
"loss": 0.3115, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.1404323034080361, |
|
"grad_norm": 3.7241883277893066, |
|
"learning_rate": 1.901725095653623e-05, |
|
"loss": 0.3077, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.1457366397029571, |
|
"grad_norm": 3.86002779006958, |
|
"learning_rate": 1.9001177097630617e-05, |
|
"loss": 0.3038, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.1510409759978784, |
|
"grad_norm": 3.63974666595459, |
|
"learning_rate": 1.8984979757190115e-05, |
|
"loss": 0.2945, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.1563453122927994, |
|
"grad_norm": 3.8336093425750732, |
|
"learning_rate": 1.896865915741594e-05, |
|
"loss": 0.3103, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.1616496485877206, |
|
"grad_norm": 3.468128204345703, |
|
"learning_rate": 1.8952215522200226e-05, |
|
"loss": 0.3029, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.1669539848826416, |
|
"grad_norm": 3.8509416580200195, |
|
"learning_rate": 1.893564907712294e-05, |
|
"loss": 0.3047, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.1722583211775626, |
|
"grad_norm": 3.3073980808258057, |
|
"learning_rate": 1.8918960049448815e-05, |
|
"loss": 0.3121, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.1775626574724838, |
|
"grad_norm": 3.4550652503967285, |
|
"learning_rate": 1.890214866812421e-05, |
|
"loss": 0.3049, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.1828669937674048, |
|
"grad_norm": 3.4764418601989746, |
|
"learning_rate": 1.8885215163773987e-05, |
|
"loss": 0.3146, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.188171330062326, |
|
"grad_norm": 3.708789110183716, |
|
"learning_rate": 1.8868159768698325e-05, |
|
"loss": 0.3119, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.193475666357247, |
|
"grad_norm": 3.81087064743042, |
|
"learning_rate": 1.885098271686956e-05, |
|
"loss": 0.311, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.1987800026521682, |
|
"grad_norm": 3.6135127544403076, |
|
"learning_rate": 1.8833684243928943e-05, |
|
"loss": 0.3046, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.2040843389470892, |
|
"grad_norm": 3.372121810913086, |
|
"learning_rate": 1.8816264587183442e-05, |
|
"loss": 0.306, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.2093886752420104, |
|
"grad_norm": 3.7073960304260254, |
|
"learning_rate": 1.8798723985602465e-05, |
|
"loss": 0.3059, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.2146930115369314, |
|
"grad_norm": 3.6234822273254395, |
|
"learning_rate": 1.878106267981458e-05, |
|
"loss": 0.3151, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.2199973478318524, |
|
"grad_norm": 3.8061039447784424, |
|
"learning_rate": 1.8763280912104233e-05, |
|
"loss": 0.3116, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.2253016841267736, |
|
"grad_norm": 3.3875911235809326, |
|
"learning_rate": 1.8745378926408403e-05, |
|
"loss": 0.3252, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.2306060204216946, |
|
"grad_norm": 3.5641937255859375, |
|
"learning_rate": 1.8727356968313265e-05, |
|
"loss": 0.3094, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.2359103567166159, |
|
"grad_norm": 3.8727028369903564, |
|
"learning_rate": 1.870921528505082e-05, |
|
"loss": 0.3199, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.2412146930115369, |
|
"grad_norm": 3.7284903526306152, |
|
"learning_rate": 1.8690954125495516e-05, |
|
"loss": 0.3129, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.246519029306458, |
|
"grad_norm": 3.500553607940674, |
|
"learning_rate": 1.8672573740160802e-05, |
|
"loss": 0.3133, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.251823365601379, |
|
"grad_norm": 3.956127643585205, |
|
"learning_rate": 1.8654074381195726e-05, |
|
"loss": 0.3166, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.2571277018963003, |
|
"grad_norm": 3.6364858150482178, |
|
"learning_rate": 1.8635456302381456e-05, |
|
"loss": 0.3075, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.2624320381912213, |
|
"grad_norm": 3.55739164352417, |
|
"learning_rate": 1.8616719759127803e-05, |
|
"loss": 0.3061, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.2677363744861423, |
|
"grad_norm": 3.479860305786133, |
|
"learning_rate": 1.859786500846972e-05, |
|
"loss": 0.3171, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.2730407107810635, |
|
"grad_norm": 3.628953218460083, |
|
"learning_rate": 1.857889230906377e-05, |
|
"loss": 0.305, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.2783450470759847, |
|
"grad_norm": 3.855865478515625, |
|
"learning_rate": 1.8559801921184587e-05, |
|
"loss": 0.296, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.2836493833709057, |
|
"grad_norm": 3.509706497192383, |
|
"learning_rate": 1.8540594106721293e-05, |
|
"loss": 0.3224, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.2889537196658267, |
|
"grad_norm": 3.4830563068389893, |
|
"learning_rate": 1.8521269129173914e-05, |
|
"loss": 0.3124, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.294258055960748, |
|
"grad_norm": 3.4599504470825195, |
|
"learning_rate": 1.850182725364977e-05, |
|
"loss": 0.3223, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.299562392255669, |
|
"grad_norm": 3.6792361736297607, |
|
"learning_rate": 1.848226874685982e-05, |
|
"loss": 0.325, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.3048667285505902, |
|
"grad_norm": 3.6066126823425293, |
|
"learning_rate": 1.8462593877115027e-05, |
|
"loss": 0.3135, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.3101710648455112, |
|
"grad_norm": 4.144036769866943, |
|
"learning_rate": 1.8442802914322655e-05, |
|
"loss": 0.3293, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.3154754011404324, |
|
"grad_norm": 4.001558303833008, |
|
"learning_rate": 1.8422896129982578e-05, |
|
"loss": 0.3391, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.3207797374353534, |
|
"grad_norm": 3.577042818069458, |
|
"learning_rate": 1.840287379718356e-05, |
|
"loss": 0.3207, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.3260840737302746, |
|
"grad_norm": 3.525238037109375, |
|
"learning_rate": 1.8382736190599494e-05, |
|
"loss": 0.3254, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.3313884100251956, |
|
"grad_norm": 3.2063822746276855, |
|
"learning_rate": 1.8362483586485642e-05, |
|
"loss": 0.314, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.3366927463201166, |
|
"grad_norm": 3.878560781478882, |
|
"learning_rate": 1.834211626267486e-05, |
|
"loss": 0.3223, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.3419970826150378, |
|
"grad_norm": 3.4493205547332764, |
|
"learning_rate": 1.8321634498573748e-05, |
|
"loss": 0.3296, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.347301418909959, |
|
"grad_norm": 3.4929230213165283, |
|
"learning_rate": 1.830103857515886e-05, |
|
"loss": 0.3135, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.35260575520488, |
|
"grad_norm": 3.288297653198242, |
|
"learning_rate": 1.828032877497283e-05, |
|
"loss": 0.317, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.357910091499801, |
|
"grad_norm": 4.501832485198975, |
|
"learning_rate": 1.8259505382120483e-05, |
|
"loss": 0.3067, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.3632144277947222, |
|
"grad_norm": 3.346526622772217, |
|
"learning_rate": 1.8238568682264962e-05, |
|
"loss": 0.3127, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.3685187640896432, |
|
"grad_norm": 3.7969963550567627, |
|
"learning_rate": 1.8217518962623792e-05, |
|
"loss": 0.3217, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.3738231003845645, |
|
"grad_norm": 3.661844253540039, |
|
"learning_rate": 1.8196356511964955e-05, |
|
"loss": 0.321, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.3791274366794855, |
|
"grad_norm": 3.460169553756714, |
|
"learning_rate": 1.8175081620602903e-05, |
|
"loss": 0.3245, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.3844317729744064, |
|
"grad_norm": 3.679044008255005, |
|
"learning_rate": 1.815369458039461e-05, |
|
"loss": 0.3167, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.3897361092693277, |
|
"grad_norm": 3.7198102474212646, |
|
"learning_rate": 1.8132195684735545e-05, |
|
"loss": 0.3249, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.3950404455642489, |
|
"grad_norm": 3.785710334777832, |
|
"learning_rate": 1.811058522855564e-05, |
|
"loss": 0.3279, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.4003447818591699, |
|
"grad_norm": 3.7823047637939453, |
|
"learning_rate": 1.808886350831527e-05, |
|
"loss": 0.3297, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.4056491181540909, |
|
"grad_norm": 3.903444766998291, |
|
"learning_rate": 1.806703082200117e-05, |
|
"loss": 0.3324, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.410953454449012, |
|
"grad_norm": 3.5661463737487793, |
|
"learning_rate": 1.8045087469122346e-05, |
|
"loss": 0.3207, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.416257790743933, |
|
"grad_norm": 4.140559673309326, |
|
"learning_rate": 1.8023033750705972e-05, |
|
"loss": 0.3151, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.4215621270388543, |
|
"grad_norm": 3.873009443283081, |
|
"learning_rate": 1.8000869969293254e-05, |
|
"loss": 0.3327, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.4268664633337753, |
|
"grad_norm": 3.7390902042388916, |
|
"learning_rate": 1.7978596428935286e-05, |
|
"loss": 0.3073, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.4321707996286965, |
|
"grad_norm": 3.8170132637023926, |
|
"learning_rate": 1.7956213435188884e-05, |
|
"loss": 0.3235, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.4374751359236175, |
|
"grad_norm": 4.089386940002441, |
|
"learning_rate": 1.793372129511237e-05, |
|
"loss": 0.325, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.4427794722185387, |
|
"grad_norm": 3.624798059463501, |
|
"learning_rate": 1.791112031726139e-05, |
|
"loss": 0.3221, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.4480838085134597, |
|
"grad_norm": 3.935511350631714, |
|
"learning_rate": 1.788841081168467e-05, |
|
"loss": 0.3301, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.4533881448083807, |
|
"grad_norm": 3.4397850036621094, |
|
"learning_rate": 1.7865593089919745e-05, |
|
"loss": 0.3137, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.458692481103302, |
|
"grad_norm": 3.8265187740325928, |
|
"learning_rate": 1.784266746498871e-05, |
|
"loss": 0.3299, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.4639968173982232, |
|
"grad_norm": 3.81622052192688, |
|
"learning_rate": 1.781963425139392e-05, |
|
"loss": 0.3155, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.4693011536931442, |
|
"grad_norm": 3.5892412662506104, |
|
"learning_rate": 1.7796493765113666e-05, |
|
"loss": 0.3248, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.4746054899880652, |
|
"grad_norm": 3.772759437561035, |
|
"learning_rate": 1.7773246323597845e-05, |
|
"loss": 0.313, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.4799098262829864, |
|
"grad_norm": 3.4618637561798096, |
|
"learning_rate": 1.7749892245763614e-05, |
|
"loss": 0.3127, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.4852141625779074, |
|
"grad_norm": 3.3471107482910156, |
|
"learning_rate": 1.7726431851990992e-05, |
|
"loss": 0.3208, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.4905184988728286, |
|
"grad_norm": 3.3492703437805176, |
|
"learning_rate": 1.77028654641185e-05, |
|
"loss": 0.3252, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.4958228351677496, |
|
"grad_norm": 3.785266160964966, |
|
"learning_rate": 1.7679193405438713e-05, |
|
"loss": 0.3229, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.5011271714626706, |
|
"grad_norm": 3.609468460083008, |
|
"learning_rate": 1.7655416000693836e-05, |
|
"loss": 0.3193, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.5064315077575918, |
|
"grad_norm": 3.516772508621216, |
|
"learning_rate": 1.763153357607126e-05, |
|
"loss": 0.3318, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.511735844052513, |
|
"grad_norm": 3.5849831104278564, |
|
"learning_rate": 1.760754645919907e-05, |
|
"loss": 0.3284, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.517040180347434, |
|
"grad_norm": 4.03626012802124, |
|
"learning_rate": 1.758345497914157e-05, |
|
"loss": 0.32, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.522344516642355, |
|
"grad_norm": 4.129549980163574, |
|
"learning_rate": 1.755925946639474e-05, |
|
"loss": 0.3312, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.5276488529372763, |
|
"grad_norm": 3.846771717071533, |
|
"learning_rate": 1.7534960252881735e-05, |
|
"loss": 0.3263, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.5329531892321975, |
|
"grad_norm": 3.8618805408477783, |
|
"learning_rate": 1.7510557671948314e-05, |
|
"loss": 0.3203, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.5382575255271185, |
|
"grad_norm": 3.563652992248535, |
|
"learning_rate": 1.748605205835826e-05, |
|
"loss": 0.3393, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.5435618618220395, |
|
"grad_norm": 3.45072865486145, |
|
"learning_rate": 1.7461443748288797e-05, |
|
"loss": 0.3234, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.5488661981169605, |
|
"grad_norm": 3.2662858963012695, |
|
"learning_rate": 1.7436733079326e-05, |
|
"loss": 0.3229, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.5541705344118817, |
|
"grad_norm": 3.587158679962158, |
|
"learning_rate": 1.741192039046011e-05, |
|
"loss": 0.3394, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.559474870706803, |
|
"grad_norm": 4.137202262878418, |
|
"learning_rate": 1.738700602208094e-05, |
|
"loss": 0.3364, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.564779207001724, |
|
"grad_norm": 3.8183786869049072, |
|
"learning_rate": 1.7361990315973166e-05, |
|
"loss": 0.3245, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.570083543296645, |
|
"grad_norm": 3.716970205307007, |
|
"learning_rate": 1.733687361531166e-05, |
|
"loss": 0.3379, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.5753878795915661, |
|
"grad_norm": 3.4474103450775146, |
|
"learning_rate": 1.731165626465678e-05, |
|
"loss": 0.3321, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.5806922158864873, |
|
"grad_norm": 3.9459433555603027, |
|
"learning_rate": 1.7286338609949623e-05, |
|
"loss": 0.3319, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.5859965521814083, |
|
"grad_norm": 3.303617477416992, |
|
"learning_rate": 1.7260920998507315e-05, |
|
"loss": 0.3383, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.5913008884763293, |
|
"grad_norm": 3.5322844982147217, |
|
"learning_rate": 1.72354037790182e-05, |
|
"loss": 0.3154, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.5966052247712503, |
|
"grad_norm": 3.635408401489258, |
|
"learning_rate": 1.7209787301537116e-05, |
|
"loss": 0.3277, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.6019095610661716, |
|
"grad_norm": 3.167227268218994, |
|
"learning_rate": 1.7184071917480526e-05, |
|
"loss": 0.3179, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.6072138973610928, |
|
"grad_norm": 3.5568039417266846, |
|
"learning_rate": 1.7158257979621756e-05, |
|
"loss": 0.3293, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.6125182336560138, |
|
"grad_norm": 3.3058979511260986, |
|
"learning_rate": 1.7132345842086114e-05, |
|
"loss": 0.3355, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.6178225699509348, |
|
"grad_norm": 3.9339213371276855, |
|
"learning_rate": 1.710633586034606e-05, |
|
"loss": 0.3188, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.623126906245856, |
|
"grad_norm": 3.5868442058563232, |
|
"learning_rate": 1.7080228391216305e-05, |
|
"loss": 0.3212, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.6284312425407772, |
|
"grad_norm": 3.4200713634490967, |
|
"learning_rate": 1.705402379284894e-05, |
|
"loss": 0.3173, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.6337355788356982, |
|
"grad_norm": 3.5358498096466064, |
|
"learning_rate": 1.7027722424728513e-05, |
|
"loss": 0.3298, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.6390399151306192, |
|
"grad_norm": 3.7147319316864014, |
|
"learning_rate": 1.700132464766708e-05, |
|
"loss": 0.3235, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.6443442514255404, |
|
"grad_norm": 3.8844540119171143, |
|
"learning_rate": 1.6974830823799285e-05, |
|
"loss": 0.3201, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.6496485877204616, |
|
"grad_norm": 3.6052422523498535, |
|
"learning_rate": 1.6948241316577375e-05, |
|
"loss": 0.3228, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.6549529240153826, |
|
"grad_norm": 3.4337236881256104, |
|
"learning_rate": 1.692155649076621e-05, |
|
"loss": 0.3332, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.6602572603103036, |
|
"grad_norm": 3.9126977920532227, |
|
"learning_rate": 1.6894776712438288e-05, |
|
"loss": 0.3291, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.6655615966052246, |
|
"grad_norm": 3.7995638847351074, |
|
"learning_rate": 1.686790234896867e-05, |
|
"loss": 0.3222, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.6708659329001458, |
|
"grad_norm": 3.664808988571167, |
|
"learning_rate": 1.6840933769030002e-05, |
|
"loss": 0.3301, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.676170269195067, |
|
"grad_norm": 3.8624629974365234, |
|
"learning_rate": 1.6813871342587404e-05, |
|
"loss": 0.3265, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.681474605489988, |
|
"grad_norm": 3.776545763015747, |
|
"learning_rate": 1.678671544089343e-05, |
|
"loss": 0.3266, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.686778941784909, |
|
"grad_norm": 4.29400634765625, |
|
"learning_rate": 1.6759466436482954e-05, |
|
"loss": 0.3385, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.6920832780798303, |
|
"grad_norm": 4.078743934631348, |
|
"learning_rate": 1.6732124703168075e-05, |
|
"loss": 0.3357, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.6973876143747515, |
|
"grad_norm": 3.473541498184204, |
|
"learning_rate": 1.6704690616032987e-05, |
|
"loss": 0.3294, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.7026919506696725, |
|
"grad_norm": 3.7348885536193848, |
|
"learning_rate": 1.667716455142881e-05, |
|
"loss": 0.3351, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.7079962869645935, |
|
"grad_norm": 3.998124361038208, |
|
"learning_rate": 1.6649546886968473e-05, |
|
"loss": 0.3324, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.7133006232595145, |
|
"grad_norm": 3.9629366397857666, |
|
"learning_rate": 1.662183800152148e-05, |
|
"loss": 0.335, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.7186049595544357, |
|
"grad_norm": 3.7607057094573975, |
|
"learning_rate": 1.6594038275208748e-05, |
|
"loss": 0.3442, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.723909295849357, |
|
"grad_norm": 4.3320112228393555, |
|
"learning_rate": 1.6566148089397387e-05, |
|
"loss": 0.3319, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.729213632144278, |
|
"grad_norm": 3.361454963684082, |
|
"learning_rate": 1.6538167826695466e-05, |
|
"loss": 0.3291, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.734517968439199, |
|
"grad_norm": 3.9114990234375, |
|
"learning_rate": 1.6510097870946752e-05, |
|
"loss": 0.3157, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.7398223047341201, |
|
"grad_norm": 4.4820237159729, |
|
"learning_rate": 1.6481938607225468e-05, |
|
"loss": 0.3245, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.7451266410290414, |
|
"grad_norm": 3.609239101409912, |
|
"learning_rate": 1.6453690421830987e-05, |
|
"loss": 0.3129, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.7504309773239624, |
|
"grad_norm": 3.86490797996521, |
|
"learning_rate": 1.6425353702282543e-05, |
|
"loss": 0.3103, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.7557353136188834, |
|
"grad_norm": 3.5796539783477783, |
|
"learning_rate": 1.639692883731393e-05, |
|
"loss": 0.3202, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.7610396499138046, |
|
"grad_norm": 3.493624687194824, |
|
"learning_rate": 1.6368416216868137e-05, |
|
"loss": 0.3331, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.7663439862087258, |
|
"grad_norm": 3.596022129058838, |
|
"learning_rate": 1.633981623209202e-05, |
|
"loss": 0.3175, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.7716483225036468, |
|
"grad_norm": 3.6291396617889404, |
|
"learning_rate": 1.6311129275330936e-05, |
|
"loss": 0.3256, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.7769526587985678, |
|
"grad_norm": 3.8184401988983154, |
|
"learning_rate": 1.628235574012335e-05, |
|
"loss": 0.3112, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.7822569950934888, |
|
"grad_norm": 3.4983391761779785, |
|
"learning_rate": 1.6253496021195453e-05, |
|
"loss": 0.3198, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.78756133138841, |
|
"grad_norm": 3.6440587043762207, |
|
"learning_rate": 1.6224550514455724e-05, |
|
"loss": 0.3341, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.7928656676833312, |
|
"grad_norm": 3.429333448410034, |
|
"learning_rate": 1.619551961698952e-05, |
|
"loss": 0.3255, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.7981700039782522, |
|
"grad_norm": 3.5707192420959473, |
|
"learning_rate": 1.6166403727053617e-05, |
|
"loss": 0.3268, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.8034743402731732, |
|
"grad_norm": 3.5965964794158936, |
|
"learning_rate": 1.6137203244070755e-05, |
|
"loss": 0.3194, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.8087786765680944, |
|
"grad_norm": 3.7592883110046387, |
|
"learning_rate": 1.610791856862414e-05, |
|
"loss": 0.3398, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.8140830128630157, |
|
"grad_norm": 4.1979265213012695, |
|
"learning_rate": 1.6078550102451974e-05, |
|
"loss": 0.3345, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.8193873491579367, |
|
"grad_norm": 3.106452465057373, |
|
"learning_rate": 1.6049098248441936e-05, |
|
"loss": 0.321, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.8246916854528576, |
|
"grad_norm": 3.2761764526367188, |
|
"learning_rate": 1.6019563410625635e-05, |
|
"loss": 0.327, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.8299960217477786, |
|
"grad_norm": 3.375182867050171, |
|
"learning_rate": 1.5989945994173094e-05, |
|
"loss": 0.3178, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.8353003580426999, |
|
"grad_norm": 3.563046932220459, |
|
"learning_rate": 1.5960246405387173e-05, |
|
"loss": 0.3322, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.840604694337621, |
|
"grad_norm": 4.122814178466797, |
|
"learning_rate": 1.5930465051698016e-05, |
|
"loss": 0.3335, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.845909030632542, |
|
"grad_norm": 3.5397937297821045, |
|
"learning_rate": 1.5900602341657435e-05, |
|
"loss": 0.3326, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.851213366927463, |
|
"grad_norm": 3.227036952972412, |
|
"learning_rate": 1.5870658684933327e-05, |
|
"loss": 0.3265, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.8565177032223843, |
|
"grad_norm": 3.8446407318115234, |
|
"learning_rate": 1.5840634492304045e-05, |
|
"loss": 0.334, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.8618220395173055, |
|
"grad_norm": 3.7455456256866455, |
|
"learning_rate": 1.581053017565276e-05, |
|
"loss": 0.3173, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.8671263758122265, |
|
"grad_norm": 3.5950396060943604, |
|
"learning_rate": 1.5780346147961814e-05, |
|
"loss": 0.3386, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.8724307121071475, |
|
"grad_norm": 3.6555497646331787, |
|
"learning_rate": 1.5750082823307067e-05, |
|
"loss": 0.3404, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.8777350484020687, |
|
"grad_norm": 3.5170364379882812, |
|
"learning_rate": 1.5719740616852192e-05, |
|
"loss": 0.3267, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.88303938469699, |
|
"grad_norm": 3.70723295211792, |
|
"learning_rate": 1.568931994484299e-05, |
|
"loss": 0.3214, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.888343720991911, |
|
"grad_norm": 3.543966770172119, |
|
"learning_rate": 1.5658821224601693e-05, |
|
"loss": 0.3206, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.893648057286832, |
|
"grad_norm": 3.4180748462677, |
|
"learning_rate": 1.562824487452123e-05, |
|
"loss": 0.3221, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.898952393581753, |
|
"grad_norm": 4.066466808319092, |
|
"learning_rate": 1.5597591314059464e-05, |
|
"loss": 0.3219, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.9042567298766742, |
|
"grad_norm": 3.359677791595459, |
|
"learning_rate": 1.5566860963733486e-05, |
|
"loss": 0.3372, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.9095610661715954, |
|
"grad_norm": 4.057319164276123, |
|
"learning_rate": 1.55360542451138e-05, |
|
"loss": 0.3306, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.9148654024665164, |
|
"grad_norm": 3.5447351932525635, |
|
"learning_rate": 1.550517158081857e-05, |
|
"loss": 0.3054, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.9201697387614374, |
|
"grad_norm": 3.775535821914673, |
|
"learning_rate": 1.5474213394507798e-05, |
|
"loss": 0.3222, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.9254740750563586, |
|
"grad_norm": 3.208907127380371, |
|
"learning_rate": 1.544318011087754e-05, |
|
"loss": 0.3168, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.9307784113512798, |
|
"grad_norm": 3.393601179122925, |
|
"learning_rate": 1.541207215565407e-05, |
|
"loss": 0.3234, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.9360827476462008, |
|
"grad_norm": 3.797776222229004, |
|
"learning_rate": 1.5380889955588006e-05, |
|
"loss": 0.3022, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.9413870839411218, |
|
"grad_norm": 3.817760705947876, |
|
"learning_rate": 1.5349633938448517e-05, |
|
"loss": 0.3118, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.9466914202360428, |
|
"grad_norm": 3.5809881687164307, |
|
"learning_rate": 1.5318304533017403e-05, |
|
"loss": 0.327, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.951995756530964, |
|
"grad_norm": 3.3459973335266113, |
|
"learning_rate": 1.528690216908324e-05, |
|
"loss": 0.3222, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.9573000928258852, |
|
"grad_norm": 3.520768165588379, |
|
"learning_rate": 1.5255427277435474e-05, |
|
"loss": 0.3303, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.9626044291208062, |
|
"grad_norm": 3.473872423171997, |
|
"learning_rate": 1.5223880289858515e-05, |
|
"loss": 0.3337, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.9679087654157272, |
|
"grad_norm": 4.005605220794678, |
|
"learning_rate": 1.5192261639125807e-05, |
|
"loss": 0.3206, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.9732131017106485, |
|
"grad_norm": 3.648740530014038, |
|
"learning_rate": 1.5160571758993902e-05, |
|
"loss": 0.3143, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.9785174380055697, |
|
"grad_norm": 4.329815864562988, |
|
"learning_rate": 1.5128811084196505e-05, |
|
"loss": 0.3402, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.9838217743004907, |
|
"grad_norm": 3.976907253265381, |
|
"learning_rate": 1.5096980050438501e-05, |
|
"loss": 0.3137, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.9891261105954117, |
|
"grad_norm": 3.991213321685791, |
|
"learning_rate": 1.5065079094389994e-05, |
|
"loss": 0.3039, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.9944304468903329, |
|
"grad_norm": 3.3972666263580322, |
|
"learning_rate": 1.5033108653680298e-05, |
|
"loss": 0.3288, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.999734783185254, |
|
"grad_norm": 3.2909555435180664, |
|
"learning_rate": 1.5001069166891957e-05, |
|
"loss": 0.3225, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 2.005039119480175, |
|
"grad_norm": 2.6935155391693115, |
|
"learning_rate": 1.4968961073554708e-05, |
|
"loss": 0.1467, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 2.010343455775096, |
|
"grad_norm": 2.5926148891448975, |
|
"learning_rate": 1.4936784814139453e-05, |
|
"loss": 0.1284, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 2.015647792070017, |
|
"grad_norm": 2.8939812183380127, |
|
"learning_rate": 1.4904540830052234e-05, |
|
"loss": 0.1273, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.0209521283649385, |
|
"grad_norm": 3.236732244491577, |
|
"learning_rate": 1.4872229563628158e-05, |
|
"loss": 0.1306, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 2.0262564646598595, |
|
"grad_norm": 2.838834524154663, |
|
"learning_rate": 1.4839851458125331e-05, |
|
"loss": 0.134, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 2.0315608009547805, |
|
"grad_norm": 2.6801376342773438, |
|
"learning_rate": 1.48074069577188e-05, |
|
"loss": 0.1286, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 2.0368651372497015, |
|
"grad_norm": 3.151033878326416, |
|
"learning_rate": 1.4774896507494426e-05, |
|
"loss": 0.1371, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 2.0421694735446225, |
|
"grad_norm": 2.7879486083984375, |
|
"learning_rate": 1.4742320553442797e-05, |
|
"loss": 0.1313, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.047473809839544, |
|
"grad_norm": 2.801165819168091, |
|
"learning_rate": 1.4709679542453115e-05, |
|
"loss": 0.1343, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 2.052778146134465, |
|
"grad_norm": 3.195171594619751, |
|
"learning_rate": 1.4676973922307052e-05, |
|
"loss": 0.1304, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 2.058082482429386, |
|
"grad_norm": 3.2752623558044434, |
|
"learning_rate": 1.4644204141672614e-05, |
|
"loss": 0.1325, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 2.063386818724307, |
|
"grad_norm": 2.8927152156829834, |
|
"learning_rate": 1.461137065009798e-05, |
|
"loss": 0.1298, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 2.0686911550192284, |
|
"grad_norm": 2.9264607429504395, |
|
"learning_rate": 1.4578473898005346e-05, |
|
"loss": 0.1326, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.0739954913141494, |
|
"grad_norm": 2.765760660171509, |
|
"learning_rate": 1.454551433668474e-05, |
|
"loss": 0.136, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 2.0792998276090704, |
|
"grad_norm": 2.7184066772460938, |
|
"learning_rate": 1.4512492418287828e-05, |
|
"loss": 0.1267, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 2.0846041639039914, |
|
"grad_norm": 3.297851800918579, |
|
"learning_rate": 1.4479408595821707e-05, |
|
"loss": 0.138, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 2.089908500198913, |
|
"grad_norm": 2.9943556785583496, |
|
"learning_rate": 1.4446263323142713e-05, |
|
"loss": 0.1289, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 2.095212836493834, |
|
"grad_norm": 2.7158663272857666, |
|
"learning_rate": 1.4413057054950166e-05, |
|
"loss": 0.1328, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.100517172788755, |
|
"grad_norm": 2.943380832672119, |
|
"learning_rate": 1.4379790246780152e-05, |
|
"loss": 0.1399, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 2.105821509083676, |
|
"grad_norm": 2.653359889984131, |
|
"learning_rate": 1.434646335499926e-05, |
|
"loss": 0.1339, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 2.111125845378597, |
|
"grad_norm": 2.7849793434143066, |
|
"learning_rate": 1.431307683679834e-05, |
|
"loss": 0.1315, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 2.1164301816735183, |
|
"grad_norm": 2.9013118743896484, |
|
"learning_rate": 1.4279631150186207e-05, |
|
"loss": 0.132, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 2.1217345179684393, |
|
"grad_norm": 3.1449782848358154, |
|
"learning_rate": 1.4246126753983378e-05, |
|
"loss": 0.1368, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.1270388542633603, |
|
"grad_norm": 2.9267892837524414, |
|
"learning_rate": 1.4212564107815774e-05, |
|
"loss": 0.1361, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 2.1323431905582813, |
|
"grad_norm": 3.1600418090820312, |
|
"learning_rate": 1.4178943672108402e-05, |
|
"loss": 0.1302, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 2.1376475268532027, |
|
"grad_norm": 2.9146716594696045, |
|
"learning_rate": 1.4145265908079051e-05, |
|
"loss": 0.1346, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 2.1429518631481237, |
|
"grad_norm": 2.721554756164551, |
|
"learning_rate": 1.4111531277731965e-05, |
|
"loss": 0.141, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 2.1482561994430447, |
|
"grad_norm": 2.9390203952789307, |
|
"learning_rate": 1.4077740243851497e-05, |
|
"loss": 0.1367, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.1535605357379657, |
|
"grad_norm": 2.7774572372436523, |
|
"learning_rate": 1.4043893269995766e-05, |
|
"loss": 0.1393, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 2.1588648720328867, |
|
"grad_norm": 3.0352425575256348, |
|
"learning_rate": 1.4009990820490296e-05, |
|
"loss": 0.1333, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 2.164169208327808, |
|
"grad_norm": 2.9310920238494873, |
|
"learning_rate": 1.3976033360421652e-05, |
|
"loss": 0.1385, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.169473544622729, |
|
"grad_norm": 2.895400047302246, |
|
"learning_rate": 1.3942021355631047e-05, |
|
"loss": 0.1364, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 2.17477788091765, |
|
"grad_norm": 2.785146474838257, |
|
"learning_rate": 1.3907955272707963e-05, |
|
"loss": 0.1345, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.180082217212571, |
|
"grad_norm": 2.920459032058716, |
|
"learning_rate": 1.3873835578983747e-05, |
|
"loss": 0.1379, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 2.1853865535074926, |
|
"grad_norm": 2.7607204914093018, |
|
"learning_rate": 1.3839662742525199e-05, |
|
"loss": 0.1348, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 2.1906908898024136, |
|
"grad_norm": 2.9964351654052734, |
|
"learning_rate": 1.3805437232128149e-05, |
|
"loss": 0.1308, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 2.1959952260973346, |
|
"grad_norm": 2.8842570781707764, |
|
"learning_rate": 1.3771159517311026e-05, |
|
"loss": 0.1329, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 2.2012995623922555, |
|
"grad_norm": 2.794616222381592, |
|
"learning_rate": 1.3736830068308429e-05, |
|
"loss": 0.1359, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.2066038986871765, |
|
"grad_norm": 3.242060661315918, |
|
"learning_rate": 1.3702449356064648e-05, |
|
"loss": 0.1376, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 2.211908234982098, |
|
"grad_norm": 2.9713857173919678, |
|
"learning_rate": 1.366801785222724e-05, |
|
"loss": 0.1388, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 2.217212571277019, |
|
"grad_norm": 2.71294903755188, |
|
"learning_rate": 1.3633536029140535e-05, |
|
"loss": 0.1348, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 2.22251690757194, |
|
"grad_norm": 3.0826351642608643, |
|
"learning_rate": 1.359900435983915e-05, |
|
"loss": 0.1376, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 2.227821243866861, |
|
"grad_norm": 2.7096450328826904, |
|
"learning_rate": 1.3564423318041527e-05, |
|
"loss": 0.1364, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.2331255801617824, |
|
"grad_norm": 2.883322238922119, |
|
"learning_rate": 1.3529793378143407e-05, |
|
"loss": 0.1364, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 2.2384299164567034, |
|
"grad_norm": 3.0577938556671143, |
|
"learning_rate": 1.3495115015211341e-05, |
|
"loss": 0.1353, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 2.2437342527516244, |
|
"grad_norm": 2.8565526008605957, |
|
"learning_rate": 1.3460388704976162e-05, |
|
"loss": 0.1355, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 2.2490385890465454, |
|
"grad_norm": 2.8537418842315674, |
|
"learning_rate": 1.3425614923826463e-05, |
|
"loss": 0.1387, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 2.254342925341467, |
|
"grad_norm": 2.9709837436676025, |
|
"learning_rate": 1.3390794148802055e-05, |
|
"loss": 0.1392, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.259647261636388, |
|
"grad_norm": 3.2056241035461426, |
|
"learning_rate": 1.3355926857587442e-05, |
|
"loss": 0.132, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 2.264951597931309, |
|
"grad_norm": 2.8401198387145996, |
|
"learning_rate": 1.3321013528505242e-05, |
|
"loss": 0.139, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 2.27025593422623, |
|
"grad_norm": 3.095860481262207, |
|
"learning_rate": 1.3286054640509642e-05, |
|
"loss": 0.1421, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 2.275560270521151, |
|
"grad_norm": 2.8098833560943604, |
|
"learning_rate": 1.325105067317983e-05, |
|
"loss": 0.1374, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 2.2808646068160723, |
|
"grad_norm": 2.9357638359069824, |
|
"learning_rate": 1.3216002106713394e-05, |
|
"loss": 0.1357, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.2861689431109933, |
|
"grad_norm": 2.8763411045074463, |
|
"learning_rate": 1.3180909421919763e-05, |
|
"loss": 0.134, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 2.2914732794059143, |
|
"grad_norm": 3.224968194961548, |
|
"learning_rate": 1.3145773100213596e-05, |
|
"loss": 0.1379, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 2.2967776157008353, |
|
"grad_norm": 2.9192330837249756, |
|
"learning_rate": 1.3110593623608174e-05, |
|
"loss": 0.1336, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 2.3020819519957567, |
|
"grad_norm": 3.039752721786499, |
|
"learning_rate": 1.3075371474708798e-05, |
|
"loss": 0.1378, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 2.3073862882906777, |
|
"grad_norm": 3.0629944801330566, |
|
"learning_rate": 1.3040107136706162e-05, |
|
"loss": 0.1362, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.3126906245855987, |
|
"grad_norm": 2.8435914516448975, |
|
"learning_rate": 1.3004801093369723e-05, |
|
"loss": 0.1313, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 2.3179949608805197, |
|
"grad_norm": 2.789707660675049, |
|
"learning_rate": 1.2969453829041073e-05, |
|
"loss": 0.1389, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 2.323299297175441, |
|
"grad_norm": 3.2722177505493164, |
|
"learning_rate": 1.2934065828627285e-05, |
|
"loss": 0.1369, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 2.328603633470362, |
|
"grad_norm": 2.8057737350463867, |
|
"learning_rate": 1.289863757759427e-05, |
|
"loss": 0.1396, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 2.333907969765283, |
|
"grad_norm": 2.924314022064209, |
|
"learning_rate": 1.2863169561960105e-05, |
|
"loss": 0.1414, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.339212306060204, |
|
"grad_norm": 2.850343942642212, |
|
"learning_rate": 1.2827662268288377e-05, |
|
"loss": 0.1359, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 2.344516642355125, |
|
"grad_norm": 2.6075124740600586, |
|
"learning_rate": 1.2792116183681506e-05, |
|
"loss": 0.1347, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 2.3498209786500466, |
|
"grad_norm": 3.160735845565796, |
|
"learning_rate": 1.2756531795774053e-05, |
|
"loss": 0.1304, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 2.3551253149449676, |
|
"grad_norm": 3.01257586479187, |
|
"learning_rate": 1.2720909592726045e-05, |
|
"loss": 0.1412, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 2.3604296512398886, |
|
"grad_norm": 2.9084932804107666, |
|
"learning_rate": 1.268525006321627e-05, |
|
"loss": 0.1402, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.3657339875348096, |
|
"grad_norm": 2.940924644470215, |
|
"learning_rate": 1.2649553696435576e-05, |
|
"loss": 0.1357, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 2.3710383238297306, |
|
"grad_norm": 3.0558974742889404, |
|
"learning_rate": 1.261382098208015e-05, |
|
"loss": 0.1414, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 2.376342660124652, |
|
"grad_norm": 2.9939308166503906, |
|
"learning_rate": 1.2578052410344823e-05, |
|
"loss": 0.1388, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 2.381646996419573, |
|
"grad_norm": 3.261167287826538, |
|
"learning_rate": 1.2542248471916319e-05, |
|
"loss": 0.1338, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 2.386951332714494, |
|
"grad_norm": 2.484503746032715, |
|
"learning_rate": 1.2506409657966536e-05, |
|
"loss": 0.1409, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.3922556690094154, |
|
"grad_norm": 2.55649733543396, |
|
"learning_rate": 1.2470536460145818e-05, |
|
"loss": 0.1354, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 2.3975600053043364, |
|
"grad_norm": 2.5738770961761475, |
|
"learning_rate": 1.2434629370576188e-05, |
|
"loss": 0.1365, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 2.4028643415992574, |
|
"grad_norm": 2.8223392963409424, |
|
"learning_rate": 1.2398688881844613e-05, |
|
"loss": 0.1331, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 2.4081686778941784, |
|
"grad_norm": 2.891040325164795, |
|
"learning_rate": 1.236271548699625e-05, |
|
"loss": 0.1336, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 2.4134730141890994, |
|
"grad_norm": 2.738555431365967, |
|
"learning_rate": 1.2326709679527662e-05, |
|
"loss": 0.1382, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.418777350484021, |
|
"grad_norm": 2.6195318698883057, |
|
"learning_rate": 1.229067195338007e-05, |
|
"loss": 0.1376, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 2.424081686778942, |
|
"grad_norm": 3.151268243789673, |
|
"learning_rate": 1.2254602802932556e-05, |
|
"loss": 0.1474, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 2.429386023073863, |
|
"grad_norm": 2.687887668609619, |
|
"learning_rate": 1.2218502722995306e-05, |
|
"loss": 0.1337, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 2.434690359368784, |
|
"grad_norm": 2.977978229522705, |
|
"learning_rate": 1.2182372208802804e-05, |
|
"loss": 0.1308, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 2.439994695663705, |
|
"grad_norm": 2.9313466548919678, |
|
"learning_rate": 1.2146211756007035e-05, |
|
"loss": 0.1352, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.4452990319586263, |
|
"grad_norm": 3.2692997455596924, |
|
"learning_rate": 1.2110021860670703e-05, |
|
"loss": 0.1345, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 2.4506033682535473, |
|
"grad_norm": 2.7192065715789795, |
|
"learning_rate": 1.207380301926041e-05, |
|
"loss": 0.1386, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 2.4559077045484683, |
|
"grad_norm": 3.0179250240325928, |
|
"learning_rate": 1.2037555728639856e-05, |
|
"loss": 0.1375, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 2.4612120408433893, |
|
"grad_norm": 3.132338523864746, |
|
"learning_rate": 1.200128048606301e-05, |
|
"loss": 0.139, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 2.4665163771383107, |
|
"grad_norm": 2.6613612174987793, |
|
"learning_rate": 1.1964977789167304e-05, |
|
"loss": 0.1374, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.4718207134332317, |
|
"grad_norm": 2.995262384414673, |
|
"learning_rate": 1.1928648135966799e-05, |
|
"loss": 0.1402, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 2.4771250497281527, |
|
"grad_norm": 2.70011305809021, |
|
"learning_rate": 1.1892292024845343e-05, |
|
"loss": 0.1353, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 2.4824293860230737, |
|
"grad_norm": 2.9024317264556885, |
|
"learning_rate": 1.1855909954549754e-05, |
|
"loss": 0.1415, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 2.487733722317995, |
|
"grad_norm": 3.167525291442871, |
|
"learning_rate": 1.1819502424182965e-05, |
|
"loss": 0.1358, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 2.493038058612916, |
|
"grad_norm": 2.754595994949341, |
|
"learning_rate": 1.178306993319718e-05, |
|
"loss": 0.1383, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.498342394907837, |
|
"grad_norm": 3.2000560760498047, |
|
"learning_rate": 1.1746612981387016e-05, |
|
"loss": 0.1368, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 2.503646731202758, |
|
"grad_norm": 2.9707882404327393, |
|
"learning_rate": 1.1710132068882663e-05, |
|
"loss": 0.1298, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 2.508951067497679, |
|
"grad_norm": 3.044996500015259, |
|
"learning_rate": 1.1673627696143006e-05, |
|
"loss": 0.134, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 2.5142554037926006, |
|
"grad_norm": 2.9082412719726562, |
|
"learning_rate": 1.1637100363948767e-05, |
|
"loss": 0.139, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 2.5195597400875216, |
|
"grad_norm": 3.294870138168335, |
|
"learning_rate": 1.1600550573395639e-05, |
|
"loss": 0.1407, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.5248640763824426, |
|
"grad_norm": 2.9033164978027344, |
|
"learning_rate": 1.1563978825887403e-05, |
|
"loss": 0.1336, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 2.5301684126773636, |
|
"grad_norm": 3.089531183242798, |
|
"learning_rate": 1.152738562312905e-05, |
|
"loss": 0.1344, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 2.5354727489722846, |
|
"grad_norm": 3.015316963195801, |
|
"learning_rate": 1.149077146711991e-05, |
|
"loss": 0.1361, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 2.540777085267206, |
|
"grad_norm": 2.8561995029449463, |
|
"learning_rate": 1.1454136860146757e-05, |
|
"loss": 0.1305, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 2.546081421562127, |
|
"grad_norm": 3.1055350303649902, |
|
"learning_rate": 1.141748230477691e-05, |
|
"loss": 0.1332, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.551385757857048, |
|
"grad_norm": 3.5380849838256836, |
|
"learning_rate": 1.138080830385136e-05, |
|
"loss": 0.1342, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 2.5566900941519695, |
|
"grad_norm": 2.7498412132263184, |
|
"learning_rate": 1.134411536047785e-05, |
|
"loss": 0.1383, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 2.5619944304468905, |
|
"grad_norm": 3.1483683586120605, |
|
"learning_rate": 1.1307403978023985e-05, |
|
"loss": 0.1364, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 2.5672987667418115, |
|
"grad_norm": 2.886155128479004, |
|
"learning_rate": 1.127067466011033e-05, |
|
"loss": 0.1355, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 2.5726031030367325, |
|
"grad_norm": 3.173999786376953, |
|
"learning_rate": 1.1233927910603486e-05, |
|
"loss": 0.132, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.5779074393316534, |
|
"grad_norm": 3.0697178840637207, |
|
"learning_rate": 1.1197164233609195e-05, |
|
"loss": 0.1379, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 2.583211775626575, |
|
"grad_norm": 2.831016778945923, |
|
"learning_rate": 1.1160384133465413e-05, |
|
"loss": 0.1311, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 2.588516111921496, |
|
"grad_norm": 3.2016053199768066, |
|
"learning_rate": 1.1123588114735394e-05, |
|
"loss": 0.1378, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 2.593820448216417, |
|
"grad_norm": 3.0537898540496826, |
|
"learning_rate": 1.108677668220077e-05, |
|
"loss": 0.1367, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 2.599124784511338, |
|
"grad_norm": 2.7495782375335693, |
|
"learning_rate": 1.1049950340854629e-05, |
|
"loss": 0.1347, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.604429120806259, |
|
"grad_norm": 3.102475881576538, |
|
"learning_rate": 1.1013109595894578e-05, |
|
"loss": 0.1307, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 2.6097334571011803, |
|
"grad_norm": 2.821542978286743, |
|
"learning_rate": 1.0976254952715821e-05, |
|
"loss": 0.1356, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 2.6150377933961013, |
|
"grad_norm": 2.982942819595337, |
|
"learning_rate": 1.0939386916904227e-05, |
|
"loss": 0.138, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 2.6203421296910223, |
|
"grad_norm": 2.680748701095581, |
|
"learning_rate": 1.0902505994229377e-05, |
|
"loss": 0.1361, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 2.6256464659859438, |
|
"grad_norm": 2.7981534004211426, |
|
"learning_rate": 1.0865612690637657e-05, |
|
"loss": 0.1361, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.6309508022808648, |
|
"grad_norm": 2.9631705284118652, |
|
"learning_rate": 1.0828707512245285e-05, |
|
"loss": 0.1325, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 2.6362551385757858, |
|
"grad_norm": 3.211549997329712, |
|
"learning_rate": 1.0791790965331388e-05, |
|
"loss": 0.1308, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 2.6415594748707067, |
|
"grad_norm": 3.033447027206421, |
|
"learning_rate": 1.075486355633105e-05, |
|
"loss": 0.1334, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 2.6468638111656277, |
|
"grad_norm": 3.050119638442993, |
|
"learning_rate": 1.0717925791828362e-05, |
|
"loss": 0.1373, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 2.652168147460549, |
|
"grad_norm": 2.9277257919311523, |
|
"learning_rate": 1.0680978178549488e-05, |
|
"loss": 0.1349, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.65747248375547, |
|
"grad_norm": 3.1963348388671875, |
|
"learning_rate": 1.0644021223355679e-05, |
|
"loss": 0.134, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 2.662776820050391, |
|
"grad_norm": 2.882453441619873, |
|
"learning_rate": 1.060705543323636e-05, |
|
"loss": 0.1337, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 2.668081156345312, |
|
"grad_norm": 2.7132582664489746, |
|
"learning_rate": 1.0570081315302152e-05, |
|
"loss": 0.1386, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 2.673385492640233, |
|
"grad_norm": 2.729783535003662, |
|
"learning_rate": 1.0533099376777922e-05, |
|
"loss": 0.1344, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 2.6786898289351546, |
|
"grad_norm": 3.0825912952423096, |
|
"learning_rate": 1.0496110124995814e-05, |
|
"loss": 0.1321, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 2.6839941652300756, |
|
"grad_norm": 2.804955005645752, |
|
"learning_rate": 1.0459114067388308e-05, |
|
"loss": 0.1308, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 2.6892985015249966, |
|
"grad_norm": 2.9028618335723877, |
|
"learning_rate": 1.0422111711481246e-05, |
|
"loss": 0.1356, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 2.694602837819918, |
|
"grad_norm": 3.0055623054504395, |
|
"learning_rate": 1.0385103564886869e-05, |
|
"loss": 0.1338, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 2.6999071741148386, |
|
"grad_norm": 2.8479676246643066, |
|
"learning_rate": 1.0348090135296865e-05, |
|
"loss": 0.1324, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 2.70521151040976, |
|
"grad_norm": 3.0957729816436768, |
|
"learning_rate": 1.0311071930475382e-05, |
|
"loss": 0.1368, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.710515846704681, |
|
"grad_norm": 2.8696446418762207, |
|
"learning_rate": 1.0274049458252091e-05, |
|
"loss": 0.1292, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 2.715820182999602, |
|
"grad_norm": 2.482884168624878, |
|
"learning_rate": 1.0237023226515197e-05, |
|
"loss": 0.1345, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 2.7211245192945235, |
|
"grad_norm": 2.8822643756866455, |
|
"learning_rate": 1.019999374320448e-05, |
|
"loss": 0.1282, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 2.7264288555894445, |
|
"grad_norm": 2.7656660079956055, |
|
"learning_rate": 1.0162961516304333e-05, |
|
"loss": 0.1256, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 2.7317331918843655, |
|
"grad_norm": 2.6746714115142822, |
|
"learning_rate": 1.0125927053836773e-05, |
|
"loss": 0.1309, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 2.7370375281792865, |
|
"grad_norm": 2.917820930480957, |
|
"learning_rate": 1.0088890863854497e-05, |
|
"loss": 0.134, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 2.7423418644742075, |
|
"grad_norm": 2.9841034412384033, |
|
"learning_rate": 1.0051853454433902e-05, |
|
"loss": 0.1326, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 2.747646200769129, |
|
"grad_norm": 3.027301549911499, |
|
"learning_rate": 1.0014815333668101e-05, |
|
"loss": 0.1286, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 2.75295053706405, |
|
"grad_norm": 3.0368714332580566, |
|
"learning_rate": 9.97777700965998e-06, |
|
"loss": 0.1368, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 2.758254873358971, |
|
"grad_norm": 2.925471782684326, |
|
"learning_rate": 9.940738990515202e-06, |
|
"loss": 0.1326, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.763559209653892, |
|
"grad_norm": 2.8439114093780518, |
|
"learning_rate": 9.903701784335256e-06, |
|
"loss": 0.1254, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 2.768863545948813, |
|
"grad_norm": 2.9670259952545166, |
|
"learning_rate": 9.866665899210472e-06, |
|
"loss": 0.1309, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 2.7741678822437343, |
|
"grad_norm": 3.0596694946289062, |
|
"learning_rate": 9.829631843213059e-06, |
|
"loss": 0.1317, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 2.7794722185386553, |
|
"grad_norm": 2.936415910720825, |
|
"learning_rate": 9.79260012439014e-06, |
|
"loss": 0.128, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 2.7847765548335763, |
|
"grad_norm": 2.7913734912872314, |
|
"learning_rate": 9.755571250756761e-06, |
|
"loss": 0.1293, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.7900808911284978, |
|
"grad_norm": 2.662478446960449, |
|
"learning_rate": 9.718545730288956e-06, |
|
"loss": 0.1309, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 2.7953852274234188, |
|
"grad_norm": 2.7307651042938232, |
|
"learning_rate": 9.681524070916745e-06, |
|
"loss": 0.1293, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 2.8006895637183398, |
|
"grad_norm": 2.8188600540161133, |
|
"learning_rate": 9.644506780517178e-06, |
|
"loss": 0.1251, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 2.8059939000132608, |
|
"grad_norm": 2.798912286758423, |
|
"learning_rate": 9.607494366907384e-06, |
|
"loss": 0.1342, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 2.8112982363081818, |
|
"grad_norm": 2.9201605319976807, |
|
"learning_rate": 9.57048733783758e-06, |
|
"loss": 0.129, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.816602572603103, |
|
"grad_norm": 2.914685010910034, |
|
"learning_rate": 9.53348620098411e-06, |
|
"loss": 0.1334, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 2.821906908898024, |
|
"grad_norm": 2.8835930824279785, |
|
"learning_rate": 9.496491463942507e-06, |
|
"loss": 0.1331, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 2.827211245192945, |
|
"grad_norm": 3.123211145401001, |
|
"learning_rate": 9.459503634220488e-06, |
|
"loss": 0.1247, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 2.832515581487866, |
|
"grad_norm": 3.094163656234741, |
|
"learning_rate": 9.422523219231019e-06, |
|
"loss": 0.1308, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 2.837819917782787, |
|
"grad_norm": 3.100656747817993, |
|
"learning_rate": 9.385550726285357e-06, |
|
"loss": 0.1288, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 2.8431242540777086, |
|
"grad_norm": 2.8668949604034424, |
|
"learning_rate": 9.348586662586072e-06, |
|
"loss": 0.1294, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 2.8484285903726296, |
|
"grad_norm": 2.8769168853759766, |
|
"learning_rate": 9.311631535220096e-06, |
|
"loss": 0.1197, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 2.8537329266675506, |
|
"grad_norm": 3.1265363693237305, |
|
"learning_rate": 9.274685851151777e-06, |
|
"loss": 0.1264, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 2.859037262962472, |
|
"grad_norm": 2.7183499336242676, |
|
"learning_rate": 9.237750117215917e-06, |
|
"loss": 0.1277, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 2.864341599257393, |
|
"grad_norm": 2.801440954208374, |
|
"learning_rate": 9.200824840110808e-06, |
|
"loss": 0.1289, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.869645935552314, |
|
"grad_norm": 2.798792600631714, |
|
"learning_rate": 9.163910526391301e-06, |
|
"loss": 0.1305, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 2.874950271847235, |
|
"grad_norm": 2.9811506271362305, |
|
"learning_rate": 9.12700768246184e-06, |
|
"loss": 0.1265, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 2.880254608142156, |
|
"grad_norm": 2.8695929050445557, |
|
"learning_rate": 9.090116814569532e-06, |
|
"loss": 0.1311, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 2.8855589444370775, |
|
"grad_norm": 3.0875537395477295, |
|
"learning_rate": 9.053238428797184e-06, |
|
"loss": 0.1239, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 2.8908632807319985, |
|
"grad_norm": 3.061387062072754, |
|
"learning_rate": 9.016373031056365e-06, |
|
"loss": 0.1288, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 2.8961676170269195, |
|
"grad_norm": 2.614323139190674, |
|
"learning_rate": 8.979521127080482e-06, |
|
"loss": 0.1315, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 2.9014719533218405, |
|
"grad_norm": 2.8680355548858643, |
|
"learning_rate": 8.942683222417823e-06, |
|
"loss": 0.125, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 2.9067762896167615, |
|
"grad_norm": 2.898186206817627, |
|
"learning_rate": 8.905859822424617e-06, |
|
"loss": 0.1294, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 2.912080625911683, |
|
"grad_norm": 2.7704014778137207, |
|
"learning_rate": 8.869051432258137e-06, |
|
"loss": 0.121, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 2.917384962206604, |
|
"grad_norm": 3.1199190616607666, |
|
"learning_rate": 8.832258556869724e-06, |
|
"loss": 0.1312, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.922689298501525, |
|
"grad_norm": 3.096050500869751, |
|
"learning_rate": 8.795481700997886e-06, |
|
"loss": 0.1288, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 2.9279936347964464, |
|
"grad_norm": 2.9244625568389893, |
|
"learning_rate": 8.75872136916138e-06, |
|
"loss": 0.1241, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 2.933297971091367, |
|
"grad_norm": 2.6919119358062744, |
|
"learning_rate": 8.72197806565227e-06, |
|
"loss": 0.1238, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 2.9386023073862884, |
|
"grad_norm": 3.3755581378936768, |
|
"learning_rate": 8.685252294529016e-06, |
|
"loss": 0.1244, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 2.9439066436812094, |
|
"grad_norm": 2.898566246032715, |
|
"learning_rate": 8.648544559609575e-06, |
|
"loss": 0.1223, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 2.9492109799761304, |
|
"grad_norm": 2.690538167953491, |
|
"learning_rate": 8.611855364464465e-06, |
|
"loss": 0.124, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 2.954515316271052, |
|
"grad_norm": 2.957213878631592, |
|
"learning_rate": 8.57518521240987e-06, |
|
"loss": 0.1265, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 2.959819652565973, |
|
"grad_norm": 3.0998001098632812, |
|
"learning_rate": 8.538534606500743e-06, |
|
"loss": 0.1282, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 2.965123988860894, |
|
"grad_norm": 2.7344188690185547, |
|
"learning_rate": 8.50190404952388e-06, |
|
"loss": 0.1244, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 2.970428325155815, |
|
"grad_norm": 2.824951171875, |
|
"learning_rate": 8.465294043991056e-06, |
|
"loss": 0.1251, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.975732661450736, |
|
"grad_norm": 2.772991418838501, |
|
"learning_rate": 8.428705092132102e-06, |
|
"loss": 0.1219, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 2.9810369977456572, |
|
"grad_norm": 2.6752243041992188, |
|
"learning_rate": 8.392137695888022e-06, |
|
"loss": 0.1251, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 2.986341334040578, |
|
"grad_norm": 3.0803351402282715, |
|
"learning_rate": 8.355592356904132e-06, |
|
"loss": 0.1318, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 2.991645670335499, |
|
"grad_norm": 2.781517744064331, |
|
"learning_rate": 8.319069576523136e-06, |
|
"loss": 0.1195, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 2.99695000663042, |
|
"grad_norm": 2.9005682468414307, |
|
"learning_rate": 8.282569855778282e-06, |
|
"loss": 0.1278, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 3.0022543429253417, |
|
"grad_norm": 1.316965103149414, |
|
"learning_rate": 8.246093695386475e-06, |
|
"loss": 0.0921, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 3.0075586792202627, |
|
"grad_norm": 1.6865043640136719, |
|
"learning_rate": 8.209641595741413e-06, |
|
"loss": 0.0469, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 3.0128630155151837, |
|
"grad_norm": 1.917323350906372, |
|
"learning_rate": 8.173214056906716e-06, |
|
"loss": 0.0447, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 3.0181673518101046, |
|
"grad_norm": 1.8996626138687134, |
|
"learning_rate": 8.13681157860907e-06, |
|
"loss": 0.0455, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 3.0234716881050256, |
|
"grad_norm": 1.5856680870056152, |
|
"learning_rate": 8.10043466023137e-06, |
|
"loss": 0.0436, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 3.028776024399947, |
|
"grad_norm": 1.8763635158538818, |
|
"learning_rate": 8.064083800805875e-06, |
|
"loss": 0.044, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 3.034080360694868, |
|
"grad_norm": 1.6315194368362427, |
|
"learning_rate": 8.027759499007356e-06, |
|
"loss": 0.0485, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 3.039384696989789, |
|
"grad_norm": 1.9602937698364258, |
|
"learning_rate": 7.991462253146251e-06, |
|
"loss": 0.0432, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 3.04468903328471, |
|
"grad_norm": 1.6057361364364624, |
|
"learning_rate": 7.955192561161841e-06, |
|
"loss": 0.0407, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 3.0499933695796315, |
|
"grad_norm": 1.7018084526062012, |
|
"learning_rate": 7.918950920615412e-06, |
|
"loss": 0.043, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 3.0552977058745525, |
|
"grad_norm": 2.058243989944458, |
|
"learning_rate": 7.882737828683423e-06, |
|
"loss": 0.0433, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 3.0606020421694735, |
|
"grad_norm": 1.7645084857940674, |
|
"learning_rate": 7.846553782150703e-06, |
|
"loss": 0.04, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 3.0659063784643945, |
|
"grad_norm": 2.0199408531188965, |
|
"learning_rate": 7.810399277403618e-06, |
|
"loss": 0.0428, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 3.071210714759316, |
|
"grad_norm": 2.021491050720215, |
|
"learning_rate": 7.774274810423265e-06, |
|
"loss": 0.0441, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 3.076515051054237, |
|
"grad_norm": 1.8167482614517212, |
|
"learning_rate": 7.738180876778686e-06, |
|
"loss": 0.0421, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 3.081819387349158, |
|
"grad_norm": 2.0647082328796387, |
|
"learning_rate": 7.702117971620042e-06, |
|
"loss": 0.0437, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 3.087123723644079, |
|
"grad_norm": 1.6139365434646606, |
|
"learning_rate": 7.666086589671833e-06, |
|
"loss": 0.0427, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 3.092428059939, |
|
"grad_norm": 1.4240131378173828, |
|
"learning_rate": 7.630087225226126e-06, |
|
"loss": 0.0432, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 3.0977323962339214, |
|
"grad_norm": 1.8977570533752441, |
|
"learning_rate": 7.594120372135743e-06, |
|
"loss": 0.0402, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 3.1030367325288424, |
|
"grad_norm": 1.6448214054107666, |
|
"learning_rate": 7.558186523807509e-06, |
|
"loss": 0.041, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 3.1083410688237634, |
|
"grad_norm": 1.8806428909301758, |
|
"learning_rate": 7.5222861731954856e-06, |
|
"loss": 0.0411, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 3.1136454051186844, |
|
"grad_norm": 1.971258521080017, |
|
"learning_rate": 7.48641981279419e-06, |
|
"loss": 0.042, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 3.118949741413606, |
|
"grad_norm": 1.4597933292388916, |
|
"learning_rate": 7.4505879346318475e-06, |
|
"loss": 0.0396, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 3.124254077708527, |
|
"grad_norm": 1.6617002487182617, |
|
"learning_rate": 7.414791030263655e-06, |
|
"loss": 0.0391, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 3.129558414003448, |
|
"grad_norm": 1.851900577545166, |
|
"learning_rate": 7.379029590765015e-06, |
|
"loss": 0.0411, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 3.134862750298369, |
|
"grad_norm": 1.9996873140335083, |
|
"learning_rate": 7.343304106724807e-06, |
|
"loss": 0.0398, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 3.14016708659329, |
|
"grad_norm": 1.655671238899231, |
|
"learning_rate": 7.307615068238676e-06, |
|
"loss": 0.0405, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 3.1454714228882112, |
|
"grad_norm": 1.929533839225769, |
|
"learning_rate": 7.271962964902277e-06, |
|
"loss": 0.041, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 3.1507757591831322, |
|
"grad_norm": 1.8329622745513916, |
|
"learning_rate": 7.236348285804581e-06, |
|
"loss": 0.0408, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 3.1560800954780532, |
|
"grad_norm": 1.4492971897125244, |
|
"learning_rate": 7.200771519521161e-06, |
|
"loss": 0.0417, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 3.1613844317729742, |
|
"grad_norm": 1.6154602766036987, |
|
"learning_rate": 7.1652331541074845e-06, |
|
"loss": 0.0428, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 3.1666887680678957, |
|
"grad_norm": 1.6101605892181396, |
|
"learning_rate": 7.129733677092225e-06, |
|
"loss": 0.0409, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 3.1719931043628167, |
|
"grad_norm": 1.7562830448150635, |
|
"learning_rate": 7.094273575470562e-06, |
|
"loss": 0.0406, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 3.1772974406577377, |
|
"grad_norm": 1.5611249208450317, |
|
"learning_rate": 7.058853335697517e-06, |
|
"loss": 0.0429, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 3.1826017769526587, |
|
"grad_norm": 1.839369535446167, |
|
"learning_rate": 7.023473443681275e-06, |
|
"loss": 0.0396, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.18790611324758, |
|
"grad_norm": 1.6211901903152466, |
|
"learning_rate": 6.9881343847765025e-06, |
|
"loss": 0.037, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 3.193210449542501, |
|
"grad_norm": 1.5419272184371948, |
|
"learning_rate": 6.952836643777707e-06, |
|
"loss": 0.0405, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 3.198514785837422, |
|
"grad_norm": 1.750074863433838, |
|
"learning_rate": 6.917580704912592e-06, |
|
"loss": 0.0412, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 3.203819122132343, |
|
"grad_norm": 1.8624082803726196, |
|
"learning_rate": 6.882367051835389e-06, |
|
"loss": 0.0429, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 3.209123458427264, |
|
"grad_norm": 1.7120383977890015, |
|
"learning_rate": 6.84719616762024e-06, |
|
"loss": 0.0408, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 3.2144277947221855, |
|
"grad_norm": 1.9718371629714966, |
|
"learning_rate": 6.812068534754579e-06, |
|
"loss": 0.0404, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 3.2197321310171065, |
|
"grad_norm": 1.9837217330932617, |
|
"learning_rate": 6.776984635132491e-06, |
|
"loss": 0.0425, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 3.2250364673120275, |
|
"grad_norm": 1.57326078414917, |
|
"learning_rate": 6.741944950048106e-06, |
|
"loss": 0.0413, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 3.2303408036069485, |
|
"grad_norm": 1.6847975254058838, |
|
"learning_rate": 6.706949960189022e-06, |
|
"loss": 0.0408, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 3.23564513990187, |
|
"grad_norm": 1.8581093549728394, |
|
"learning_rate": 6.672000145629671e-06, |
|
"loss": 0.0427, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 3.240949476196791, |
|
"grad_norm": 1.6762524843215942, |
|
"learning_rate": 6.637095985824771e-06, |
|
"loss": 0.0389, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 3.246253812491712, |
|
"grad_norm": 1.8649601936340332, |
|
"learning_rate": 6.602237959602715e-06, |
|
"loss": 0.0404, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 3.251558148786633, |
|
"grad_norm": 1.706531047821045, |
|
"learning_rate": 6.567426545159024e-06, |
|
"loss": 0.0414, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 3.2568624850815544, |
|
"grad_norm": 1.5406407117843628, |
|
"learning_rate": 6.532662220049788e-06, |
|
"loss": 0.039, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 3.2621668213764754, |
|
"grad_norm": 1.4194878339767456, |
|
"learning_rate": 6.4979454611851e-06, |
|
"loss": 0.0385, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 3.2674711576713964, |
|
"grad_norm": 1.7470570802688599, |
|
"learning_rate": 6.463276744822517e-06, |
|
"loss": 0.0409, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 3.2727754939663174, |
|
"grad_norm": 1.7166268825531006, |
|
"learning_rate": 6.428656546560547e-06, |
|
"loss": 0.0402, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 3.2780798302612384, |
|
"grad_norm": 1.7307934761047363, |
|
"learning_rate": 6.394085341332092e-06, |
|
"loss": 0.0411, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 3.28338416655616, |
|
"grad_norm": 1.5930451154708862, |
|
"learning_rate": 6.359563603397956e-06, |
|
"loss": 0.0368, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 3.288688502851081, |
|
"grad_norm": 1.637946605682373, |
|
"learning_rate": 6.325091806340335e-06, |
|
"loss": 0.0409, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 3.293992839146002, |
|
"grad_norm": 1.4354368448257446, |
|
"learning_rate": 6.290670423056313e-06, |
|
"loss": 0.0382, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 3.299297175440923, |
|
"grad_norm": 1.6184732913970947, |
|
"learning_rate": 6.256299925751374e-06, |
|
"loss": 0.0394, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 3.304601511735844, |
|
"grad_norm": 1.510335087776184, |
|
"learning_rate": 6.221980785932945e-06, |
|
"loss": 0.037, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 3.3099058480307653, |
|
"grad_norm": 1.6335468292236328, |
|
"learning_rate": 6.187713474403895e-06, |
|
"loss": 0.0391, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 3.3152101843256863, |
|
"grad_norm": 1.9890940189361572, |
|
"learning_rate": 6.1534984612561e-06, |
|
"loss": 0.0424, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 3.3205145206206073, |
|
"grad_norm": 1.5295640230178833, |
|
"learning_rate": 6.119336215863988e-06, |
|
"loss": 0.038, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 3.3258188569155283, |
|
"grad_norm": 1.9646100997924805, |
|
"learning_rate": 6.0852272068780975e-06, |
|
"loss": 0.0402, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 3.3311231932104497, |
|
"grad_norm": 1.5410913228988647, |
|
"learning_rate": 6.051171902218651e-06, |
|
"loss": 0.0411, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 3.3364275295053707, |
|
"grad_norm": 1.6982203722000122, |
|
"learning_rate": 6.017170769069134e-06, |
|
"loss": 0.04, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 3.3417318658002917, |
|
"grad_norm": 1.836172103881836, |
|
"learning_rate": 5.983224273869881e-06, |
|
"loss": 0.0376, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 3.3470362020952127, |
|
"grad_norm": 1.5369129180908203, |
|
"learning_rate": 5.949332882311697e-06, |
|
"loss": 0.0408, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 3.352340538390134, |
|
"grad_norm": 1.420976996421814, |
|
"learning_rate": 5.915497059329442e-06, |
|
"loss": 0.0422, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 3.357644874685055, |
|
"grad_norm": 1.4260170459747314, |
|
"learning_rate": 5.881717269095668e-06, |
|
"loss": 0.0359, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 3.362949210979976, |
|
"grad_norm": 1.5417782068252563, |
|
"learning_rate": 5.8479939750142535e-06, |
|
"loss": 0.0379, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 3.368253547274897, |
|
"grad_norm": 2.017428159713745, |
|
"learning_rate": 5.814327639714037e-06, |
|
"loss": 0.0378, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 3.373557883569818, |
|
"grad_norm": 1.7676209211349487, |
|
"learning_rate": 5.7807187250424665e-06, |
|
"loss": 0.0403, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 3.3788622198647396, |
|
"grad_norm": 1.666673183441162, |
|
"learning_rate": 5.7471676920593015e-06, |
|
"loss": 0.04, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 3.3841665561596606, |
|
"grad_norm": 1.7691739797592163, |
|
"learning_rate": 5.713675001030221e-06, |
|
"loss": 0.0368, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 3.3894708924545816, |
|
"grad_norm": 1.5981837511062622, |
|
"learning_rate": 5.680241111420572e-06, |
|
"loss": 0.0366, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 3.3947752287495025, |
|
"grad_norm": 1.4606654644012451, |
|
"learning_rate": 5.646866481889035e-06, |
|
"loss": 0.0399, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 3.400079565044424, |
|
"grad_norm": 1.8489834070205688, |
|
"learning_rate": 5.613551570281337e-06, |
|
"loss": 0.0363, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 3.405383901339345, |
|
"grad_norm": 1.9334096908569336, |
|
"learning_rate": 5.580296833623977e-06, |
|
"loss": 0.0405, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 3.410688237634266, |
|
"grad_norm": 1.5512571334838867, |
|
"learning_rate": 5.547102728117939e-06, |
|
"loss": 0.0389, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 3.415992573929187, |
|
"grad_norm": 1.4512279033660889, |
|
"learning_rate": 5.513969709132458e-06, |
|
"loss": 0.0383, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 3.4212969102241084, |
|
"grad_norm": 1.7268502712249756, |
|
"learning_rate": 5.48089823119876e-06, |
|
"loss": 0.0382, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 3.4266012465190294, |
|
"grad_norm": 2.3311150074005127, |
|
"learning_rate": 5.447888748003827e-06, |
|
"loss": 0.0398, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 3.4319055828139504, |
|
"grad_norm": 1.7974883317947388, |
|
"learning_rate": 5.414941712384161e-06, |
|
"loss": 0.0373, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 3.4372099191088714, |
|
"grad_norm": 1.693633794784546, |
|
"learning_rate": 5.382057576319613e-06, |
|
"loss": 0.04, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 3.4425142554037924, |
|
"grad_norm": 2.0825114250183105, |
|
"learning_rate": 5.349236790927122e-06, |
|
"loss": 0.0394, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 3.447818591698714, |
|
"grad_norm": 1.58704674243927, |
|
"learning_rate": 5.316479806454578e-06, |
|
"loss": 0.0375, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.453122927993635, |
|
"grad_norm": 1.7522614002227783, |
|
"learning_rate": 5.283787072274624e-06, |
|
"loss": 0.039, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 3.458427264288556, |
|
"grad_norm": 1.915915846824646, |
|
"learning_rate": 5.251159036878493e-06, |
|
"loss": 0.0416, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 3.463731600583477, |
|
"grad_norm": 1.9502066373825073, |
|
"learning_rate": 5.2185961478698435e-06, |
|
"loss": 0.0356, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 3.469035936878398, |
|
"grad_norm": 1.562122106552124, |
|
"learning_rate": 5.186098851958656e-06, |
|
"loss": 0.0394, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 3.4743402731733193, |
|
"grad_norm": 1.9908523559570312, |
|
"learning_rate": 5.1536675949550545e-06, |
|
"loss": 0.0404, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 3.4796446094682403, |
|
"grad_norm": 1.6014606952667236, |
|
"learning_rate": 5.12130282176323e-06, |
|
"loss": 0.039, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 3.4849489457631613, |
|
"grad_norm": 1.838715672492981, |
|
"learning_rate": 5.089004976375322e-06, |
|
"loss": 0.0374, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 3.4902532820580827, |
|
"grad_norm": 1.7421026229858398, |
|
"learning_rate": 5.056774501865329e-06, |
|
"loss": 0.0371, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 3.4955576183530037, |
|
"grad_norm": 1.673134684562683, |
|
"learning_rate": 5.02461184038303e-06, |
|
"loss": 0.0383, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 3.5008619546479247, |
|
"grad_norm": 1.484175443649292, |
|
"learning_rate": 4.992517433147922e-06, |
|
"loss": 0.0357, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 3.5061662909428457, |
|
"grad_norm": 1.5539370775222778, |
|
"learning_rate": 4.960491720443151e-06, |
|
"loss": 0.0395, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 3.5114706272377667, |
|
"grad_norm": 1.7498281002044678, |
|
"learning_rate": 4.92853514160951e-06, |
|
"loss": 0.0403, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 3.516774963532688, |
|
"grad_norm": 1.8837767839431763, |
|
"learning_rate": 4.8966481350393655e-06, |
|
"loss": 0.0409, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 3.522079299827609, |
|
"grad_norm": 1.9168699979782104, |
|
"learning_rate": 4.864831138170675e-06, |
|
"loss": 0.0405, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 3.52738363612253, |
|
"grad_norm": 1.812839388847351, |
|
"learning_rate": 4.833084587480975e-06, |
|
"loss": 0.0371, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 3.532687972417451, |
|
"grad_norm": 1.71259605884552, |
|
"learning_rate": 4.801408918481402e-06, |
|
"loss": 0.0388, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 3.537992308712372, |
|
"grad_norm": 1.6360466480255127, |
|
"learning_rate": 4.769804565710693e-06, |
|
"loss": 0.0371, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 3.5432966450072936, |
|
"grad_norm": 1.8788199424743652, |
|
"learning_rate": 4.7382719627292595e-06, |
|
"loss": 0.0388, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 3.5486009813022146, |
|
"grad_norm": 1.5442296266555786, |
|
"learning_rate": 4.7068115421132146e-06, |
|
"loss": 0.0336, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 3.5539053175971356, |
|
"grad_norm": 1.5917887687683105, |
|
"learning_rate": 4.675423735448448e-06, |
|
"loss": 0.0393, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 3.559209653892057, |
|
"grad_norm": 2.0295755863189697, |
|
"learning_rate": 4.644108973324708e-06, |
|
"loss": 0.036, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 3.564513990186978, |
|
"grad_norm": 2.050459861755371, |
|
"learning_rate": 4.612867685329679e-06, |
|
"loss": 0.0406, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 3.569818326481899, |
|
"grad_norm": 1.806357502937317, |
|
"learning_rate": 4.58170030004311e-06, |
|
"loss": 0.036, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 3.57512266277682, |
|
"grad_norm": 1.5718046426773071, |
|
"learning_rate": 4.550607245030923e-06, |
|
"loss": 0.0336, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 3.580426999071741, |
|
"grad_norm": 1.753868818283081, |
|
"learning_rate": 4.519588946839346e-06, |
|
"loss": 0.0396, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 3.5857313353666624, |
|
"grad_norm": 1.49803626537323, |
|
"learning_rate": 4.488645830989069e-06, |
|
"loss": 0.0371, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 3.5910356716615834, |
|
"grad_norm": 1.698613166809082, |
|
"learning_rate": 4.457778321969404e-06, |
|
"loss": 0.0356, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 3.5963400079565044, |
|
"grad_norm": 1.8640644550323486, |
|
"learning_rate": 4.426986843232443e-06, |
|
"loss": 0.0393, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 3.6016443442514254, |
|
"grad_norm": 1.7568309307098389, |
|
"learning_rate": 4.3962718171872945e-06, |
|
"loss": 0.0375, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 3.6069486805463464, |
|
"grad_norm": 1.7229827642440796, |
|
"learning_rate": 4.3656336651942355e-06, |
|
"loss": 0.0417, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 3.612253016841268, |
|
"grad_norm": 1.8499397039413452, |
|
"learning_rate": 4.3350728075589676e-06, |
|
"loss": 0.0408, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 3.617557353136189, |
|
"grad_norm": 1.4821689128875732, |
|
"learning_rate": 4.304589663526838e-06, |
|
"loss": 0.0382, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 3.62286168943111, |
|
"grad_norm": 1.4930757284164429, |
|
"learning_rate": 4.274184651277092e-06, |
|
"loss": 0.0378, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 3.6281660257260313, |
|
"grad_norm": 1.834444522857666, |
|
"learning_rate": 4.243858187917117e-06, |
|
"loss": 0.0357, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 3.633470362020952, |
|
"grad_norm": 1.4575175046920776, |
|
"learning_rate": 4.213610689476767e-06, |
|
"loss": 0.0392, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 3.6387746983158733, |
|
"grad_norm": 1.5352399349212646, |
|
"learning_rate": 4.183442570902597e-06, |
|
"loss": 0.0358, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 3.6440790346107943, |
|
"grad_norm": 1.5042182207107544, |
|
"learning_rate": 4.1533542460522155e-06, |
|
"loss": 0.0366, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 3.6493833709057153, |
|
"grad_norm": 1.5749527215957642, |
|
"learning_rate": 4.123346127688587e-06, |
|
"loss": 0.0385, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 3.6546877072006367, |
|
"grad_norm": 1.647090196609497, |
|
"learning_rate": 4.093418627474373e-06, |
|
"loss": 0.037, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 3.6599920434955577, |
|
"grad_norm": 1.7862918376922607, |
|
"learning_rate": 4.063572155966274e-06, |
|
"loss": 0.0366, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 3.6652963797904787, |
|
"grad_norm": 1.605738639831543, |
|
"learning_rate": 4.033807122609435e-06, |
|
"loss": 0.0373, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 3.6706007160853997, |
|
"grad_norm": 1.787914514541626, |
|
"learning_rate": 4.0041239357317725e-06, |
|
"loss": 0.0348, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 3.6759050523803207, |
|
"grad_norm": 1.8057564496994019, |
|
"learning_rate": 3.97452300253842e-06, |
|
"loss": 0.0349, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 3.681209388675242, |
|
"grad_norm": 1.519795536994934, |
|
"learning_rate": 3.945004729106116e-06, |
|
"loss": 0.0354, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 3.686513724970163, |
|
"grad_norm": 1.7589900493621826, |
|
"learning_rate": 3.915569520377648e-06, |
|
"loss": 0.0385, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 3.691818061265084, |
|
"grad_norm": 1.6296403408050537, |
|
"learning_rate": 3.886217780156285e-06, |
|
"loss": 0.0359, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 3.697122397560005, |
|
"grad_norm": 1.6236915588378906, |
|
"learning_rate": 3.856949911100249e-06, |
|
"loss": 0.0365, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 3.702426733854926, |
|
"grad_norm": 1.5271315574645996, |
|
"learning_rate": 3.827766314717175e-06, |
|
"loss": 0.0355, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 3.7077310701498476, |
|
"grad_norm": 1.6985539197921753, |
|
"learning_rate": 3.7986673913586246e-06, |
|
"loss": 0.0376, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 3.7130354064447686, |
|
"grad_norm": 1.4878424406051636, |
|
"learning_rate": 3.7696535402145807e-06, |
|
"loss": 0.0363, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.7183397427396896, |
|
"grad_norm": 1.5377414226531982, |
|
"learning_rate": 3.7407251593079697e-06, |
|
"loss": 0.0355, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 3.723644079034611, |
|
"grad_norm": 1.6395467519760132, |
|
"learning_rate": 3.7118826454892132e-06, |
|
"loss": 0.0353, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 3.728948415329532, |
|
"grad_norm": 1.8777011632919312, |
|
"learning_rate": 3.6831263944307626e-06, |
|
"loss": 0.0339, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 3.734252751624453, |
|
"grad_norm": 1.419209361076355, |
|
"learning_rate": 3.654456800621695e-06, |
|
"loss": 0.036, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 3.739557087919374, |
|
"grad_norm": 1.9455891847610474, |
|
"learning_rate": 3.6258742573622887e-06, |
|
"loss": 0.0374, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 3.744861424214295, |
|
"grad_norm": 1.8757051229476929, |
|
"learning_rate": 3.5973791567586313e-06, |
|
"loss": 0.0369, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 3.7501657605092165, |
|
"grad_norm": 1.5636701583862305, |
|
"learning_rate": 3.5689718897172265e-06, |
|
"loss": 0.0362, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 3.7554700968041375, |
|
"grad_norm": 1.814666986465454, |
|
"learning_rate": 3.540652845939667e-06, |
|
"loss": 0.0348, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 3.7607744330990585, |
|
"grad_norm": 1.4877405166625977, |
|
"learning_rate": 3.5124224139172413e-06, |
|
"loss": 0.0364, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 3.7660787693939795, |
|
"grad_norm": 1.3752930164337158, |
|
"learning_rate": 3.4842809809256527e-06, |
|
"loss": 0.036, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 3.7713831056889005, |
|
"grad_norm": 1.8896254301071167, |
|
"learning_rate": 3.4562289330196586e-06, |
|
"loss": 0.036, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 3.776687441983822, |
|
"grad_norm": 1.7569184303283691, |
|
"learning_rate": 3.428266655027812e-06, |
|
"loss": 0.0383, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 3.781991778278743, |
|
"grad_norm": 1.7225362062454224, |
|
"learning_rate": 3.4003945305471676e-06, |
|
"loss": 0.039, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 3.787296114573664, |
|
"grad_norm": 1.4813677072525024, |
|
"learning_rate": 3.3726129419380203e-06, |
|
"loss": 0.0354, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 3.7926004508685853, |
|
"grad_norm": 1.5851385593414307, |
|
"learning_rate": 3.344922270318649e-06, |
|
"loss": 0.0368, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 3.7979047871635063, |
|
"grad_norm": 1.8603835105895996, |
|
"learning_rate": 3.31732289556012e-06, |
|
"loss": 0.0375, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 3.8032091234584273, |
|
"grad_norm": 1.8360573053359985, |
|
"learning_rate": 3.289815196281033e-06, |
|
"loss": 0.0382, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 3.8085134597533483, |
|
"grad_norm": 1.8814833164215088, |
|
"learning_rate": 3.2623995498423622e-06, |
|
"loss": 0.0367, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 3.8138177960482693, |
|
"grad_norm": 1.66216242313385, |
|
"learning_rate": 3.235076332342264e-06, |
|
"loss": 0.0353, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 3.8191221323431908, |
|
"grad_norm": 1.5455678701400757, |
|
"learning_rate": 3.207845918610921e-06, |
|
"loss": 0.0376, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 3.8244264686381118, |
|
"grad_norm": 1.4232027530670166, |
|
"learning_rate": 3.1807086822053867e-06, |
|
"loss": 0.034, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 3.8297308049330328, |
|
"grad_norm": 1.4929195642471313, |
|
"learning_rate": 3.153664995404496e-06, |
|
"loss": 0.0328, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 3.8350351412279537, |
|
"grad_norm": 1.6196081638336182, |
|
"learning_rate": 3.126715229203713e-06, |
|
"loss": 0.0346, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 3.8403394775228747, |
|
"grad_norm": 1.8542481660842896, |
|
"learning_rate": 3.099859753310075e-06, |
|
"loss": 0.0329, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 3.845643813817796, |
|
"grad_norm": 1.6762479543685913, |
|
"learning_rate": 3.0730989361371056e-06, |
|
"loss": 0.0335, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 3.850948150112717, |
|
"grad_norm": 1.3557182550430298, |
|
"learning_rate": 3.0464331447997686e-06, |
|
"loss": 0.0345, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 3.856252486407638, |
|
"grad_norm": 1.3587796688079834, |
|
"learning_rate": 3.0198627451094264e-06, |
|
"loss": 0.0341, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 3.861556822702559, |
|
"grad_norm": 1.8085464239120483, |
|
"learning_rate": 2.993388101568816e-06, |
|
"loss": 0.0339, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 3.86686115899748, |
|
"grad_norm": 1.6431082487106323, |
|
"learning_rate": 2.9670095773670626e-06, |
|
"loss": 0.0328, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 3.8721654952924016, |
|
"grad_norm": 1.445833683013916, |
|
"learning_rate": 2.9407275343746886e-06, |
|
"loss": 0.0364, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 3.8774698315873226, |
|
"grad_norm": 1.570131778717041, |
|
"learning_rate": 2.9145423331386546e-06, |
|
"loss": 0.0384, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 3.8827741678822436, |
|
"grad_norm": 1.5940959453582764, |
|
"learning_rate": 2.888454332877396e-06, |
|
"loss": 0.0324, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 3.888078504177165, |
|
"grad_norm": 1.5815328359603882, |
|
"learning_rate": 2.8624638914759306e-06, |
|
"loss": 0.0375, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 3.893382840472086, |
|
"grad_norm": 2.0120179653167725, |
|
"learning_rate": 2.8365713654809058e-06, |
|
"loss": 0.0356, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 3.898687176767007, |
|
"grad_norm": 1.5655955076217651, |
|
"learning_rate": 2.8107771100957393e-06, |
|
"loss": 0.0339, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 3.903991513061928, |
|
"grad_norm": 1.696489930152893, |
|
"learning_rate": 2.785081479175734e-06, |
|
"loss": 0.037, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 3.909295849356849, |
|
"grad_norm": 1.6997973918914795, |
|
"learning_rate": 2.75948482522323e-06, |
|
"loss": 0.0362, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 3.9146001856517705, |
|
"grad_norm": 1.550915241241455, |
|
"learning_rate": 2.7339874993827476e-06, |
|
"loss": 0.0323, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 3.9199045219466915, |
|
"grad_norm": 1.8627867698669434, |
|
"learning_rate": 2.708589851436211e-06, |
|
"loss": 0.0336, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 3.9252088582416125, |
|
"grad_norm": 1.4691085815429688, |
|
"learning_rate": 2.6832922297981044e-06, |
|
"loss": 0.0336, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 3.9305131945365335, |
|
"grad_norm": 1.572690725326538, |
|
"learning_rate": 2.6580949815107248e-06, |
|
"loss": 0.036, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 3.9358175308314545, |
|
"grad_norm": 1.718151569366455, |
|
"learning_rate": 2.6329984522394057e-06, |
|
"loss": 0.0348, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 3.941121867126376, |
|
"grad_norm": 1.8015989065170288, |
|
"learning_rate": 2.6080029862677813e-06, |
|
"loss": 0.0331, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 3.946426203421297, |
|
"grad_norm": 2.0196962356567383, |
|
"learning_rate": 2.5831089264930607e-06, |
|
"loss": 0.0365, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 3.951730539716218, |
|
"grad_norm": 1.51595938205719, |
|
"learning_rate": 2.5583166144213265e-06, |
|
"loss": 0.0342, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 3.9570348760111393, |
|
"grad_norm": 1.6583274602890015, |
|
"learning_rate": 2.5336263901628387e-06, |
|
"loss": 0.0345, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 3.9623392123060603, |
|
"grad_norm": 1.7668447494506836, |
|
"learning_rate": 2.5090385924273953e-06, |
|
"loss": 0.0332, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 3.9676435486009813, |
|
"grad_norm": 1.3587608337402344, |
|
"learning_rate": 2.4845535585196503e-06, |
|
"loss": 0.0314, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 3.9729478848959023, |
|
"grad_norm": 1.6988006830215454, |
|
"learning_rate": 2.4601716243345176e-06, |
|
"loss": 0.0343, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 3.9782522211908233, |
|
"grad_norm": 1.744411826133728, |
|
"learning_rate": 2.435893124352545e-06, |
|
"loss": 0.0347, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.9835565574857448, |
|
"grad_norm": 1.4949431419372559, |
|
"learning_rate": 2.4117183916353357e-06, |
|
"loss": 0.0341, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 3.9888608937806658, |
|
"grad_norm": 1.8944114446640015, |
|
"learning_rate": 2.3876477578209657e-06, |
|
"loss": 0.0351, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 3.9941652300755868, |
|
"grad_norm": 1.938253402709961, |
|
"learning_rate": 2.363681553119449e-06, |
|
"loss": 0.034, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 3.9994695663705078, |
|
"grad_norm": 1.3915281295776367, |
|
"learning_rate": 2.339820106308204e-06, |
|
"loss": 0.0325, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 4.004773902665429, |
|
"grad_norm": 0.8860509395599365, |
|
"learning_rate": 2.3160637447275347e-06, |
|
"loss": 0.0159, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 4.01007823896035, |
|
"grad_norm": 0.8337376117706299, |
|
"learning_rate": 2.292412794276152e-06, |
|
"loss": 0.0137, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 4.015382575255271, |
|
"grad_norm": 0.7365363836288452, |
|
"learning_rate": 2.268867579406697e-06, |
|
"loss": 0.0127, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 4.020686911550192, |
|
"grad_norm": 0.7730606198310852, |
|
"learning_rate": 2.245428423121282e-06, |
|
"loss": 0.013, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 4.025991247845114, |
|
"grad_norm": 0.6678166389465332, |
|
"learning_rate": 2.2220956469670774e-06, |
|
"loss": 0.0131, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 4.031295584140034, |
|
"grad_norm": 0.9078973531723022, |
|
"learning_rate": 2.1988695710318875e-06, |
|
"loss": 0.0125, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 4.036599920434956, |
|
"grad_norm": 0.8652137517929077, |
|
"learning_rate": 2.1757505139397627e-06, |
|
"loss": 0.0129, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 4.041904256729877, |
|
"grad_norm": 0.6685816645622253, |
|
"learning_rate": 2.1527387928466313e-06, |
|
"loss": 0.0111, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 4.047208593024798, |
|
"grad_norm": 0.7991968393325806, |
|
"learning_rate": 2.129834723435935e-06, |
|
"loss": 0.0122, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 4.052512929319719, |
|
"grad_norm": 0.8681981563568115, |
|
"learning_rate": 2.1070386199143288e-06, |
|
"loss": 0.0126, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 4.05781726561464, |
|
"grad_norm": 0.6502617001533508, |
|
"learning_rate": 2.0843507950073317e-06, |
|
"loss": 0.0122, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 4.063121601909561, |
|
"grad_norm": 0.7759711742401123, |
|
"learning_rate": 2.061771559955066e-06, |
|
"loss": 0.0113, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 4.0684259382044825, |
|
"grad_norm": 0.9321727752685547, |
|
"learning_rate": 2.0393012245079757e-06, |
|
"loss": 0.0124, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 4.073730274499403, |
|
"grad_norm": 0.715567946434021, |
|
"learning_rate": 2.016940096922582e-06, |
|
"loss": 0.0133, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 4.0790346107943245, |
|
"grad_norm": 0.8862237930297852, |
|
"learning_rate": 1.99468848395724e-06, |
|
"loss": 0.0132, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 4.084338947089245, |
|
"grad_norm": 0.6084906458854675, |
|
"learning_rate": 1.9725466908679626e-06, |
|
"loss": 0.0129, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 4.0896432833841665, |
|
"grad_norm": 0.9061374068260193, |
|
"learning_rate": 1.950515021404189e-06, |
|
"loss": 0.0122, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 4.094947619679088, |
|
"grad_norm": 0.8219320178031921, |
|
"learning_rate": 1.9285937778046582e-06, |
|
"loss": 0.0124, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 4.1002519559740085, |
|
"grad_norm": 0.9768364429473877, |
|
"learning_rate": 1.906783260793238e-06, |
|
"loss": 0.0118, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 4.10555629226893, |
|
"grad_norm": 0.9576259851455688, |
|
"learning_rate": 1.8850837695748104e-06, |
|
"loss": 0.0127, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 4.110860628563851, |
|
"grad_norm": 0.5971377491950989, |
|
"learning_rate": 1.8634956018311566e-06, |
|
"loss": 0.0113, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 4.116164964858772, |
|
"grad_norm": 0.8423818349838257, |
|
"learning_rate": 1.8420190537168947e-06, |
|
"loss": 0.0125, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 4.121469301153693, |
|
"grad_norm": 0.8644461631774902, |
|
"learning_rate": 1.8206544198553855e-06, |
|
"loss": 0.0114, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 4.126773637448614, |
|
"grad_norm": 0.6005859375, |
|
"learning_rate": 1.7994019933347252e-06, |
|
"loss": 0.0121, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 4.132077973743535, |
|
"grad_norm": 0.7392746806144714, |
|
"learning_rate": 1.778262065703692e-06, |
|
"loss": 0.0112, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 4.137382310038457, |
|
"grad_norm": 0.646115779876709, |
|
"learning_rate": 1.7572349269677713e-06, |
|
"loss": 0.0114, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 4.142686646333377, |
|
"grad_norm": 0.8041878342628479, |
|
"learning_rate": 1.7363208655851649e-06, |
|
"loss": 0.0121, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 4.147990982628299, |
|
"grad_norm": 0.6283585429191589, |
|
"learning_rate": 1.715520168462842e-06, |
|
"loss": 0.0118, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 4.153295318923219, |
|
"grad_norm": 0.8467313051223755, |
|
"learning_rate": 1.6948331209525859e-06, |
|
"loss": 0.011, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 4.158599655218141, |
|
"grad_norm": 0.7284769415855408, |
|
"learning_rate": 1.674260006847105e-06, |
|
"loss": 0.0125, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 4.163903991513062, |
|
"grad_norm": 0.808991551399231, |
|
"learning_rate": 1.6538011083761186e-06, |
|
"loss": 0.012, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 4.169208327807983, |
|
"grad_norm": 0.5860360264778137, |
|
"learning_rate": 1.6334567062024963e-06, |
|
"loss": 0.0112, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 4.174512664102904, |
|
"grad_norm": 0.8675763010978699, |
|
"learning_rate": 1.613227079418407e-06, |
|
"loss": 0.0116, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 4.179817000397826, |
|
"grad_norm": 0.9475387930870056, |
|
"learning_rate": 1.5931125055414764e-06, |
|
"loss": 0.0108, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 4.185121336692746, |
|
"grad_norm": 0.7731668949127197, |
|
"learning_rate": 1.5731132605110034e-06, |
|
"loss": 0.0108, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 4.190425672987668, |
|
"grad_norm": 0.8824770450592041, |
|
"learning_rate": 1.5532296186841577e-06, |
|
"loss": 0.0118, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 4.195730009282588, |
|
"grad_norm": 0.7245500087738037, |
|
"learning_rate": 1.5334618528322231e-06, |
|
"loss": 0.0115, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 4.20103434557751, |
|
"grad_norm": 0.8868821263313293, |
|
"learning_rate": 1.513810234136842e-06, |
|
"loss": 0.0118, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 4.206338681872431, |
|
"grad_norm": 0.7213976979255676, |
|
"learning_rate": 1.4942750321863274e-06, |
|
"loss": 0.0108, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 4.211643018167352, |
|
"grad_norm": 0.6786092519760132, |
|
"learning_rate": 1.4748565149719196e-06, |
|
"loss": 0.012, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 4.216947354462273, |
|
"grad_norm": 0.9421653747558594, |
|
"learning_rate": 1.4555549488841568e-06, |
|
"loss": 0.0108, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 4.222251690757194, |
|
"grad_norm": 0.8586103320121765, |
|
"learning_rate": 1.4363705987091781e-06, |
|
"loss": 0.012, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 4.227556027052115, |
|
"grad_norm": 0.8113248944282532, |
|
"learning_rate": 1.4173037276251222e-06, |
|
"loss": 0.0113, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 4.2328603633470365, |
|
"grad_norm": 0.7525209188461304, |
|
"learning_rate": 1.3983545971985024e-06, |
|
"loss": 0.0112, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 4.238164699641957, |
|
"grad_norm": 0.773442804813385, |
|
"learning_rate": 1.3795234673806223e-06, |
|
"loss": 0.0109, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 4.2434690359368785, |
|
"grad_norm": 0.9594974517822266, |
|
"learning_rate": 1.3608105965040008e-06, |
|
"loss": 0.0111, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.248773372231799, |
|
"grad_norm": 1.0915862321853638, |
|
"learning_rate": 1.3422162412788532e-06, |
|
"loss": 0.0123, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 4.2540777085267205, |
|
"grad_norm": 0.7315853238105774, |
|
"learning_rate": 1.323740656789535e-06, |
|
"loss": 0.0122, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 4.259382044821642, |
|
"grad_norm": 0.7983627319335938, |
|
"learning_rate": 1.305384096491068e-06, |
|
"loss": 0.0113, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 4.2646863811165625, |
|
"grad_norm": 0.9778603911399841, |
|
"learning_rate": 1.2871468122056574e-06, |
|
"loss": 0.0121, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 4.269990717411484, |
|
"grad_norm": 0.9073354005813599, |
|
"learning_rate": 1.2690290541192317e-06, |
|
"loss": 0.0118, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 4.275295053706405, |
|
"grad_norm": 0.9715719819068909, |
|
"learning_rate": 1.2510310707780093e-06, |
|
"loss": 0.011, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 4.280599390001326, |
|
"grad_norm": 1.0042502880096436, |
|
"learning_rate": 1.233153109085108e-06, |
|
"loss": 0.0122, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 4.285903726296247, |
|
"grad_norm": 0.7477414608001709, |
|
"learning_rate": 1.215395414297127e-06, |
|
"loss": 0.0109, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 4.291208062591168, |
|
"grad_norm": 0.7065203785896301, |
|
"learning_rate": 1.1977582300208102e-06, |
|
"loss": 0.0112, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 4.296512398886089, |
|
"grad_norm": 0.9468834400177002, |
|
"learning_rate": 1.180241798209687e-06, |
|
"loss": 0.0107, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 4.301816735181011, |
|
"grad_norm": 0.8056593537330627, |
|
"learning_rate": 1.162846359160762e-06, |
|
"loss": 0.0115, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 4.307121071475931, |
|
"grad_norm": 0.6542430520057678, |
|
"learning_rate": 1.1455721515112161e-06, |
|
"loss": 0.0107, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 4.312425407770853, |
|
"grad_norm": 0.6646844744682312, |
|
"learning_rate": 1.1284194122351276e-06, |
|
"loss": 0.0133, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 4.317729744065773, |
|
"grad_norm": 0.9997266530990601, |
|
"learning_rate": 1.11138837664023e-06, |
|
"loss": 0.0108, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 4.323034080360695, |
|
"grad_norm": 0.8001927137374878, |
|
"learning_rate": 1.0944792783646808e-06, |
|
"loss": 0.0122, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 4.328338416655616, |
|
"grad_norm": 0.635581374168396, |
|
"learning_rate": 1.077692349373851e-06, |
|
"loss": 0.0119, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 4.333642752950537, |
|
"grad_norm": 0.7629982233047485, |
|
"learning_rate": 1.0610278199571522e-06, |
|
"loss": 0.012, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 4.338947089245458, |
|
"grad_norm": 0.9843791127204895, |
|
"learning_rate": 1.0444859187248701e-06, |
|
"loss": 0.0116, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 4.34425142554038, |
|
"grad_norm": 0.7684705853462219, |
|
"learning_rate": 1.0280668726050302e-06, |
|
"loss": 0.0128, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 4.3495557618353, |
|
"grad_norm": 1.196272373199463, |
|
"learning_rate": 1.0117709068402858e-06, |
|
"loss": 0.0132, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 4.354860098130222, |
|
"grad_norm": 0.6995633244514465, |
|
"learning_rate": 9.95598244984829e-07, |
|
"loss": 0.0113, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 4.360164434425142, |
|
"grad_norm": 0.9181510806083679, |
|
"learning_rate": 9.795491089013233e-07, |
|
"loss": 0.012, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 4.365468770720064, |
|
"grad_norm": 0.9330178499221802, |
|
"learning_rate": 9.636237187578502e-07, |
|
"loss": 0.0119, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 4.370773107014985, |
|
"grad_norm": 0.7912993431091309, |
|
"learning_rate": 9.478222930249148e-07, |
|
"loss": 0.0108, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 4.376077443309906, |
|
"grad_norm": 0.8073052167892456, |
|
"learning_rate": 9.32145048472416e-07, |
|
"loss": 0.0123, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 4.381381779604827, |
|
"grad_norm": 0.778070330619812, |
|
"learning_rate": 9.165922001666949e-07, |
|
"loss": 0.0117, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 4.386686115899748, |
|
"grad_norm": 0.8094714283943176, |
|
"learning_rate": 9.011639614675783e-07, |
|
"loss": 0.0111, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 4.391990452194669, |
|
"grad_norm": 0.6484878659248352, |
|
"learning_rate": 8.858605440254519e-07, |
|
"loss": 0.0112, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 4.3972947884895905, |
|
"grad_norm": 0.6638253927230835, |
|
"learning_rate": 8.706821577783542e-07, |
|
"loss": 0.0113, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 4.402599124784511, |
|
"grad_norm": 0.948729395866394, |
|
"learning_rate": 8.556290109491017e-07, |
|
"loss": 0.0118, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 4.4079034610794325, |
|
"grad_norm": 1.0264664888381958, |
|
"learning_rate": 8.407013100424222e-07, |
|
"loss": 0.0116, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 4.413207797374353, |
|
"grad_norm": 1.1023824214935303, |
|
"learning_rate": 8.258992598421422e-07, |
|
"loss": 0.0116, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 4.4185121336692745, |
|
"grad_norm": 0.6815558671951294, |
|
"learning_rate": 8.112230634083518e-07, |
|
"loss": 0.0111, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 4.423816469964196, |
|
"grad_norm": 0.6698101758956909, |
|
"learning_rate": 7.966729220746372e-07, |
|
"loss": 0.0118, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 4.4291208062591165, |
|
"grad_norm": 0.6504749059677124, |
|
"learning_rate": 7.82249035445315e-07, |
|
"loss": 0.0104, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 4.434425142554038, |
|
"grad_norm": 0.7233061194419861, |
|
"learning_rate": 7.679516013926902e-07, |
|
"loss": 0.0113, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 4.439729478848959, |
|
"grad_norm": 0.8585641384124756, |
|
"learning_rate": 7.537808160543403e-07, |
|
"loss": 0.0115, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 4.44503381514388, |
|
"grad_norm": 0.7206557989120483, |
|
"learning_rate": 7.397368738304367e-07, |
|
"loss": 0.0106, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 4.450338151438801, |
|
"grad_norm": 0.8581918478012085, |
|
"learning_rate": 7.258199673810595e-07, |
|
"loss": 0.0113, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 4.455642487733722, |
|
"grad_norm": 0.7734334468841553, |
|
"learning_rate": 7.120302876235707e-07, |
|
"loss": 0.011, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 4.460946824028643, |
|
"grad_norm": 0.7396286725997925, |
|
"learning_rate": 6.983680237299861e-07, |
|
"loss": 0.0106, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 4.466251160323565, |
|
"grad_norm": 1.0284420251846313, |
|
"learning_rate": 6.848333631243853e-07, |
|
"loss": 0.0114, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 4.471555496618485, |
|
"grad_norm": 0.8151918053627014, |
|
"learning_rate": 6.714264914803348e-07, |
|
"loss": 0.0113, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 4.476859832913407, |
|
"grad_norm": 0.8683885335922241, |
|
"learning_rate": 6.581475927183444e-07, |
|
"loss": 0.0108, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 4.482164169208328, |
|
"grad_norm": 1.0174139738082886, |
|
"learning_rate": 6.449968490033453e-07, |
|
"loss": 0.0122, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 4.487468505503249, |
|
"grad_norm": 0.7874058485031128, |
|
"learning_rate": 6.319744407421891e-07, |
|
"loss": 0.0117, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 4.49277284179817, |
|
"grad_norm": 0.9520915746688843, |
|
"learning_rate": 6.190805465811745e-07, |
|
"loss": 0.0103, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 4.498077178093091, |
|
"grad_norm": 0.8549929261207581, |
|
"learning_rate": 6.063153434035896e-07, |
|
"loss": 0.0121, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 4.503381514388012, |
|
"grad_norm": 0.8030751943588257, |
|
"learning_rate": 5.936790063273013e-07, |
|
"loss": 0.0104, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 4.508685850682934, |
|
"grad_norm": 0.6169995069503784, |
|
"learning_rate": 5.811717087023327e-07, |
|
"loss": 0.0104, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.513990186977854, |
|
"grad_norm": 0.9810777306556702, |
|
"learning_rate": 5.687936221085022e-07, |
|
"loss": 0.0125, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 4.519294523272776, |
|
"grad_norm": 0.7712530493736267, |
|
"learning_rate": 5.565449163530578e-07, |
|
"loss": 0.0103, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 4.524598859567696, |
|
"grad_norm": 0.6955736875534058, |
|
"learning_rate": 5.444257594683577e-07, |
|
"loss": 0.0115, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 4.529903195862618, |
|
"grad_norm": 1.2472916841506958, |
|
"learning_rate": 5.324363177095526e-07, |
|
"loss": 0.0111, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 4.535207532157539, |
|
"grad_norm": 0.8412001729011536, |
|
"learning_rate": 5.205767555523211e-07, |
|
"loss": 0.0113, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 4.54051186845246, |
|
"grad_norm": 0.7206942439079285, |
|
"learning_rate": 5.088472356905971e-07, |
|
"loss": 0.0109, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 4.545816204747381, |
|
"grad_norm": 0.8209089040756226, |
|
"learning_rate": 4.972479190343494e-07, |
|
"loss": 0.0107, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 4.551120541042302, |
|
"grad_norm": 1.1593587398529053, |
|
"learning_rate": 4.857789647073685e-07, |
|
"loss": 0.0115, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 4.556424877337223, |
|
"grad_norm": 0.6742283701896667, |
|
"learning_rate": 4.7444053004508716e-07, |
|
"loss": 0.0113, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 4.561729213632145, |
|
"grad_norm": 1.103501558303833, |
|
"learning_rate": 4.632327705924178e-07, |
|
"loss": 0.0121, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 4.567033549927065, |
|
"grad_norm": 0.7072399258613586, |
|
"learning_rate": 4.521558401016246e-07, |
|
"loss": 0.0132, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 4.572337886221987, |
|
"grad_norm": 0.7149022221565247, |
|
"learning_rate": 4.4120989053020423e-07, |
|
"loss": 0.01, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 4.577642222516907, |
|
"grad_norm": 0.7452453970909119, |
|
"learning_rate": 4.3039507203881836e-07, |
|
"loss": 0.0122, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 4.5829465588118286, |
|
"grad_norm": 0.5006600022315979, |
|
"learning_rate": 4.197115329892121e-07, |
|
"loss": 0.0105, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 4.58825089510675, |
|
"grad_norm": 0.8474171757698059, |
|
"learning_rate": 4.091594199421967e-07, |
|
"loss": 0.0102, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 4.5935552314016705, |
|
"grad_norm": 1.0133711099624634, |
|
"learning_rate": 3.9873887765563e-07, |
|
"loss": 0.0111, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 4.598859567696592, |
|
"grad_norm": 0.6478588581085205, |
|
"learning_rate": 3.884500490824339e-07, |
|
"loss": 0.0104, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 4.604163903991513, |
|
"grad_norm": 0.7794708609580994, |
|
"learning_rate": 3.782930753686287e-07, |
|
"loss": 0.0109, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 4.609468240286434, |
|
"grad_norm": 0.9100542068481445, |
|
"learning_rate": 3.6826809585140287e-07, |
|
"loss": 0.0121, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 4.614772576581355, |
|
"grad_norm": 0.7963326573371887, |
|
"learning_rate": 3.5837524805719784e-07, |
|
"loss": 0.0106, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 4.620076912876276, |
|
"grad_norm": 0.7512088418006897, |
|
"learning_rate": 3.4861466769982364e-07, |
|
"loss": 0.0105, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 4.625381249171197, |
|
"grad_norm": 0.9739687442779541, |
|
"learning_rate": 3.389864886785943e-07, |
|
"loss": 0.012, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 4.630685585466119, |
|
"grad_norm": 0.7893660068511963, |
|
"learning_rate": 3.2949084307649317e-07, |
|
"loss": 0.0117, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 4.635989921761039, |
|
"grad_norm": 0.738175630569458, |
|
"learning_rate": 3.2012786115836024e-07, |
|
"loss": 0.0115, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 4.641294258055961, |
|
"grad_norm": 0.6872140765190125, |
|
"learning_rate": 3.1089767136910475e-07, |
|
"loss": 0.0102, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 4.646598594350882, |
|
"grad_norm": 0.6499819755554199, |
|
"learning_rate": 3.0180040033194415e-07, |
|
"loss": 0.0102, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 4.651902930645803, |
|
"grad_norm": 0.8301778435707092, |
|
"learning_rate": 2.9283617284666666e-07, |
|
"loss": 0.0125, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 4.657207266940724, |
|
"grad_norm": 0.8570474982261658, |
|
"learning_rate": 2.8400511188791834e-07, |
|
"loss": 0.0109, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 4.662511603235645, |
|
"grad_norm": 0.6350365877151489, |
|
"learning_rate": 2.7530733860351434e-07, |
|
"loss": 0.0103, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 4.667815939530566, |
|
"grad_norm": 0.9536558985710144, |
|
"learning_rate": 2.6674297231278677e-07, |
|
"loss": 0.0118, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 4.673120275825488, |
|
"grad_norm": 0.6549102663993835, |
|
"learning_rate": 2.583121305049308e-07, |
|
"loss": 0.0102, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 4.678424612120408, |
|
"grad_norm": 0.7842022776603699, |
|
"learning_rate": 2.5001492883740984e-07, |
|
"loss": 0.0116, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 4.68372894841533, |
|
"grad_norm": 0.8783378005027771, |
|
"learning_rate": 2.418514811343575e-07, |
|
"loss": 0.0115, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 4.68903328471025, |
|
"grad_norm": 0.8600866198539734, |
|
"learning_rate": 2.3382189938502387e-07, |
|
"loss": 0.012, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 4.694337621005172, |
|
"grad_norm": 1.0350086688995361, |
|
"learning_rate": 2.2592629374222907e-07, |
|
"loss": 0.0105, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 4.699641957300093, |
|
"grad_norm": 0.8720340132713318, |
|
"learning_rate": 2.1816477252086689e-07, |
|
"loss": 0.0115, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 4.704946293595014, |
|
"grad_norm": 0.8609745502471924, |
|
"learning_rate": 2.105374421964046e-07, |
|
"loss": 0.0124, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 4.710250629889935, |
|
"grad_norm": 0.7636396288871765, |
|
"learning_rate": 2.030444074034288e-07, |
|
"loss": 0.0095, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 4.715554966184856, |
|
"grad_norm": 0.6747872233390808, |
|
"learning_rate": 1.9568577093421303e-07, |
|
"loss": 0.0098, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 4.720859302479777, |
|
"grad_norm": 0.6326771974563599, |
|
"learning_rate": 1.884616337373002e-07, |
|
"loss": 0.0118, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 4.726163638774699, |
|
"grad_norm": 0.7632216215133667, |
|
"learning_rate": 1.813720949161235e-07, |
|
"loss": 0.0105, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 4.731467975069619, |
|
"grad_norm": 0.9641050100326538, |
|
"learning_rate": 1.7441725172764434e-07, |
|
"loss": 0.0111, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 4.736772311364541, |
|
"grad_norm": 0.7419525384902954, |
|
"learning_rate": 1.6759719958101883e-07, |
|
"loss": 0.0125, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 4.742076647659461, |
|
"grad_norm": 0.8092560768127441, |
|
"learning_rate": 1.6091203203629003e-07, |
|
"loss": 0.0111, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 4.747380983954383, |
|
"grad_norm": 0.710726797580719, |
|
"learning_rate": 1.5436184080310112e-07, |
|
"loss": 0.0107, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 4.752685320249304, |
|
"grad_norm": 1.1181225776672363, |
|
"learning_rate": 1.4794671573944096e-07, |
|
"loss": 0.0114, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 4.757989656544225, |
|
"grad_norm": 0.8513716459274292, |
|
"learning_rate": 1.416667448504083e-07, |
|
"loss": 0.0116, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 4.763293992839146, |
|
"grad_norm": 0.6909376382827759, |
|
"learning_rate": 1.355220142870095e-07, |
|
"loss": 0.0109, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 4.7685983291340674, |
|
"grad_norm": 0.6143341660499573, |
|
"learning_rate": 1.2951260834496826e-07, |
|
"loss": 0.0105, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 4.773902665428988, |
|
"grad_norm": 0.8020744323730469, |
|
"learning_rate": 1.2363860946357885e-07, |
|
"loss": 0.0109, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.779207001723909, |
|
"grad_norm": 0.9505652785301208, |
|
"learning_rate": 1.1790009822456704e-07, |
|
"loss": 0.0106, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 4.784511338018831, |
|
"grad_norm": 0.5784342885017395, |
|
"learning_rate": 1.1229715335098978e-07, |
|
"loss": 0.0106, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 4.789815674313751, |
|
"grad_norm": 0.9723225831985474, |
|
"learning_rate": 1.0682985170615612e-07, |
|
"loss": 0.0115, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 4.795120010608673, |
|
"grad_norm": 0.7735673189163208, |
|
"learning_rate": 1.014982682925636e-07, |
|
"loss": 0.0116, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 4.800424346903593, |
|
"grad_norm": 0.7800854444503784, |
|
"learning_rate": 9.630247625088129e-08, |
|
"loss": 0.0114, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 4.805728683198515, |
|
"grad_norm": 0.8556525111198425, |
|
"learning_rate": 9.124254685894174e-08, |
|
"loss": 0.0107, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 4.811033019493436, |
|
"grad_norm": 0.8377553820610046, |
|
"learning_rate": 8.631854953075836e-08, |
|
"loss": 0.0104, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 4.816337355788357, |
|
"grad_norm": 0.8369781374931335, |
|
"learning_rate": 8.153055181557956e-08, |
|
"loss": 0.0119, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 4.821641692083278, |
|
"grad_norm": 0.49618351459503174, |
|
"learning_rate": 7.687861939696173e-08, |
|
"loss": 0.0103, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 4.826946028378199, |
|
"grad_norm": 0.8221543431282043, |
|
"learning_rate": 7.236281609186213e-08, |
|
"loss": 0.0113, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 4.83225036467312, |
|
"grad_norm": 0.7929049134254456, |
|
"learning_rate": 6.798320384977297e-08, |
|
"loss": 0.0109, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 4.837554700968042, |
|
"grad_norm": 0.9620218873023987, |
|
"learning_rate": 6.373984275185985e-08, |
|
"loss": 0.0122, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 4.842859037262962, |
|
"grad_norm": 0.7376503348350525, |
|
"learning_rate": 5.963279101014907e-08, |
|
"loss": 0.0104, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 4.848163373557884, |
|
"grad_norm": 0.6144804358482361, |
|
"learning_rate": 5.566210496672164e-08, |
|
"loss": 0.0114, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 4.853467709852804, |
|
"grad_norm": 0.5833884477615356, |
|
"learning_rate": 5.1827839092943864e-08, |
|
"loss": 0.0104, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 4.858772046147726, |
|
"grad_norm": 1.0123804807662964, |
|
"learning_rate": 4.813004598871684e-08, |
|
"loss": 0.0118, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 4.864076382442647, |
|
"grad_norm": 0.6741945743560791, |
|
"learning_rate": 4.456877638175927e-08, |
|
"loss": 0.0105, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 4.869380718737568, |
|
"grad_norm": 1.017996907234192, |
|
"learning_rate": 4.114407912690577e-08, |
|
"loss": 0.0116, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 4.874685055032489, |
|
"grad_norm": 0.6289529800415039, |
|
"learning_rate": 3.785600120544297e-08, |
|
"loss": 0.0111, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 4.87998939132741, |
|
"grad_norm": 0.6166532635688782, |
|
"learning_rate": 3.470458772446228e-08, |
|
"loss": 0.0103, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 4.885293727622331, |
|
"grad_norm": 0.7388268113136292, |
|
"learning_rate": 3.168988191623923e-08, |
|
"loss": 0.012, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 4.890598063917253, |
|
"grad_norm": 0.883564293384552, |
|
"learning_rate": 2.8811925137641748e-08, |
|
"loss": 0.0116, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 4.895902400212173, |
|
"grad_norm": 0.649075448513031, |
|
"learning_rate": 2.607075686956617e-08, |
|
"loss": 0.0106, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 4.901206736507095, |
|
"grad_norm": 0.5050731897354126, |
|
"learning_rate": 2.3466414716387664e-08, |
|
"loss": 0.011, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 4.906511072802015, |
|
"grad_norm": 0.7908238172531128, |
|
"learning_rate": 2.0998934405453973e-08, |
|
"loss": 0.0113, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 4.911815409096937, |
|
"grad_norm": 0.8571054339408875, |
|
"learning_rate": 1.866834978658805e-08, |
|
"loss": 0.0106, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 4.917119745391858, |
|
"grad_norm": 1.0134270191192627, |
|
"learning_rate": 1.647469283162617e-08, |
|
"loss": 0.0102, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 4.922424081686779, |
|
"grad_norm": 0.7379550933837891, |
|
"learning_rate": 1.4417993633980553e-08, |
|
"loss": 0.011, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 4.9277284179817, |
|
"grad_norm": 0.6573840379714966, |
|
"learning_rate": 1.2498280408225205e-08, |
|
"loss": 0.0109, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 4.9330327542766215, |
|
"grad_norm": 1.1111972332000732, |
|
"learning_rate": 1.0715579489707362e-08, |
|
"loss": 0.0117, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 4.938337090571542, |
|
"grad_norm": 1.0421433448791504, |
|
"learning_rate": 9.069915334189994e-09, |
|
"loss": 0.0115, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 4.9436414268664635, |
|
"grad_norm": 0.7943109273910522, |
|
"learning_rate": 7.561310517514298e-09, |
|
"loss": 0.0121, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 4.948945763161385, |
|
"grad_norm": 1.0451573133468628, |
|
"learning_rate": 6.189785735286613e-09, |
|
"loss": 0.0117, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 4.9542500994563055, |
|
"grad_norm": 0.661553144454956, |
|
"learning_rate": 4.955359802601978e-09, |
|
"loss": 0.011, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 4.959554435751227, |
|
"grad_norm": 0.6280125975608826, |
|
"learning_rate": 3.858049653778783e-09, |
|
"loss": 0.0116, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 4.9648587720461475, |
|
"grad_norm": 0.9882097244262695, |
|
"learning_rate": 2.8978703421311815e-09, |
|
"loss": 0.0118, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 4.970163108341069, |
|
"grad_norm": 0.9897807240486145, |
|
"learning_rate": 2.0748350397592487e-09, |
|
"loss": 0.0108, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 4.97546744463599, |
|
"grad_norm": 0.885931670665741, |
|
"learning_rate": 1.388955037373574e-09, |
|
"loss": 0.0112, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 4.980771780930911, |
|
"grad_norm": 1.1094691753387451, |
|
"learning_rate": 8.40239744130944e-10, |
|
"loss": 0.0111, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 4.986076117225832, |
|
"grad_norm": 0.7109507322311401, |
|
"learning_rate": 4.286966875166609e-10, |
|
"loss": 0.0119, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 4.991380453520753, |
|
"grad_norm": 0.9953659772872925, |
|
"learning_rate": 1.5433151323129835e-10, |
|
"loss": 0.0097, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 4.996684789815674, |
|
"grad_norm": 0.77718186378479, |
|
"learning_rate": 1.7147985121868106e-11, |
|
"loss": 0.011, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 4.999336957963135, |
|
"step": 9425, |
|
"total_flos": 9.928428136927396e+17, |
|
"train_loss": 0.1870748889659697, |
|
"train_runtime": 13802.6902, |
|
"train_samples_per_second": 21.853, |
|
"train_steps_per_second": 0.683 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 9425, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 40000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.928428136927396e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|