|
{ |
|
"best_metric": 0.9669172932330827, |
|
"best_model_checkpoint": "YAHIA/vivit-b-16x2-collected-dataset\\checkpoint-8418", |
|
"epoch": 9.099358059914408, |
|
"eval_steps": 500, |
|
"global_step": 14020, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 16.260046005249023, |
|
"learning_rate": 3.566333808844508e-07, |
|
"loss": 1.7843, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 24.135656356811523, |
|
"learning_rate": 7.132667617689016e-07, |
|
"loss": 1.8164, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 22.565906524658203, |
|
"learning_rate": 1.0699001426533523e-06, |
|
"loss": 1.9396, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 20.68889045715332, |
|
"learning_rate": 1.4265335235378032e-06, |
|
"loss": 1.9576, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 22.999574661254883, |
|
"learning_rate": 1.7831669044222541e-06, |
|
"loss": 1.9828, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 27.69795036315918, |
|
"learning_rate": 2.1398002853067046e-06, |
|
"loss": 1.9381, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 25.143293380737305, |
|
"learning_rate": 2.4964336661911553e-06, |
|
"loss": 1.8222, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 20.670278549194336, |
|
"learning_rate": 2.8530670470756064e-06, |
|
"loss": 1.7593, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 20.401081085205078, |
|
"learning_rate": 3.209700427960057e-06, |
|
"loss": 1.7611, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 21.053512573242188, |
|
"learning_rate": 3.5663338088445082e-06, |
|
"loss": 1.7861, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 21.759618759155273, |
|
"learning_rate": 3.922967189728959e-06, |
|
"loss": 1.7698, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 22.194372177124023, |
|
"learning_rate": 4.279600570613409e-06, |
|
"loss": 1.7558, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 20.19968605041504, |
|
"learning_rate": 4.63623395149786e-06, |
|
"loss": 1.8624, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 19.7205753326416, |
|
"learning_rate": 4.992867332382311e-06, |
|
"loss": 1.6795, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 19.424144744873047, |
|
"learning_rate": 5.349500713266762e-06, |
|
"loss": 1.7443, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 19.568191528320312, |
|
"learning_rate": 5.706134094151213e-06, |
|
"loss": 1.7468, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 22.05777931213379, |
|
"learning_rate": 6.062767475035663e-06, |
|
"loss": 1.7362, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 17.77819061279297, |
|
"learning_rate": 6.419400855920114e-06, |
|
"loss": 1.527, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 19.465238571166992, |
|
"learning_rate": 6.776034236804565e-06, |
|
"loss": 1.5923, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 20.62281036376953, |
|
"learning_rate": 7.1326676176890165e-06, |
|
"loss": 1.697, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 16.7163028717041, |
|
"learning_rate": 7.489300998573468e-06, |
|
"loss": 1.4694, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 20.071901321411133, |
|
"learning_rate": 7.845934379457918e-06, |
|
"loss": 1.4549, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 20.55426597595215, |
|
"learning_rate": 8.202567760342367e-06, |
|
"loss": 1.3167, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 26.36579704284668, |
|
"learning_rate": 8.559201141226818e-06, |
|
"loss": 1.6743, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 17.331533432006836, |
|
"learning_rate": 8.91583452211127e-06, |
|
"loss": 1.4754, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 19.567764282226562, |
|
"learning_rate": 9.27246790299572e-06, |
|
"loss": 1.45, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 16.322946548461914, |
|
"learning_rate": 9.629101283880172e-06, |
|
"loss": 1.3971, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 18.62678337097168, |
|
"learning_rate": 9.985734664764621e-06, |
|
"loss": 1.4368, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 20.327966690063477, |
|
"learning_rate": 1.0342368045649072e-05, |
|
"loss": 1.5098, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 18.31135368347168, |
|
"learning_rate": 1.0699001426533523e-05, |
|
"loss": 1.1699, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 28.94702911376953, |
|
"learning_rate": 1.1055634807417975e-05, |
|
"loss": 1.5048, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 21.377225875854492, |
|
"learning_rate": 1.1412268188302426e-05, |
|
"loss": 1.4112, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 15.965813636779785, |
|
"learning_rate": 1.1768901569186877e-05, |
|
"loss": 1.5097, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 19.742080688476562, |
|
"learning_rate": 1.2125534950071326e-05, |
|
"loss": 1.2703, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 18.924072265625, |
|
"learning_rate": 1.2482168330955777e-05, |
|
"loss": 1.2194, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 18.15528106689453, |
|
"learning_rate": 1.2838801711840228e-05, |
|
"loss": 1.0668, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 21.82122802734375, |
|
"learning_rate": 1.3195435092724678e-05, |
|
"loss": 0.9053, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 20.609405517578125, |
|
"learning_rate": 1.355206847360913e-05, |
|
"loss": 1.2574, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 25.153718948364258, |
|
"learning_rate": 1.390870185449358e-05, |
|
"loss": 1.1619, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 12.118425369262695, |
|
"learning_rate": 1.4265335235378033e-05, |
|
"loss": 1.1514, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 25.673738479614258, |
|
"learning_rate": 1.4621968616262482e-05, |
|
"loss": 1.2191, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 23.109697341918945, |
|
"learning_rate": 1.4978601997146935e-05, |
|
"loss": 0.8535, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 15.181422233581543, |
|
"learning_rate": 1.5335235378031385e-05, |
|
"loss": 0.7464, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 31.820419311523438, |
|
"learning_rate": 1.5691868758915836e-05, |
|
"loss": 1.2202, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 24.667930603027344, |
|
"learning_rate": 1.6048502139800287e-05, |
|
"loss": 1.0542, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 16.041976928710938, |
|
"learning_rate": 1.6405135520684735e-05, |
|
"loss": 0.9541, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 9.076415061950684, |
|
"learning_rate": 1.676176890156919e-05, |
|
"loss": 1.0625, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 9.516477584838867, |
|
"learning_rate": 1.7118402282453637e-05, |
|
"loss": 1.1725, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 29.433717727661133, |
|
"learning_rate": 1.7475035663338088e-05, |
|
"loss": 0.9226, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 14.57030200958252, |
|
"learning_rate": 1.783166904422254e-05, |
|
"loss": 0.7345, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 19.724756240844727, |
|
"learning_rate": 1.818830242510699e-05, |
|
"loss": 1.1076, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 17.7041072845459, |
|
"learning_rate": 1.854493580599144e-05, |
|
"loss": 1.1412, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.248980522155762, |
|
"learning_rate": 1.8901569186875892e-05, |
|
"loss": 0.675, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 6.876579284667969, |
|
"learning_rate": 1.9258202567760344e-05, |
|
"loss": 0.703, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 15.930359840393066, |
|
"learning_rate": 1.9614835948644795e-05, |
|
"loss": 0.671, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 7.9089226722717285, |
|
"learning_rate": 1.9971469329529242e-05, |
|
"loss": 0.7656, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 20.674118041992188, |
|
"learning_rate": 2.0328102710413697e-05, |
|
"loss": 1.4598, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 33.44108581542969, |
|
"learning_rate": 2.0684736091298145e-05, |
|
"loss": 0.9271, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 9.660829544067383, |
|
"learning_rate": 2.10413694721826e-05, |
|
"loss": 0.9135, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 3.2455947399139404, |
|
"learning_rate": 2.1398002853067047e-05, |
|
"loss": 0.9244, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 16.9035587310791, |
|
"learning_rate": 2.1754636233951498e-05, |
|
"loss": 1.2397, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 12.139324188232422, |
|
"learning_rate": 2.211126961483595e-05, |
|
"loss": 0.9137, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 13.0936861038208, |
|
"learning_rate": 2.24679029957204e-05, |
|
"loss": 0.891, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 8.328577995300293, |
|
"learning_rate": 2.282453637660485e-05, |
|
"loss": 0.8631, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 24.814929962158203, |
|
"learning_rate": 2.3181169757489303e-05, |
|
"loss": 0.5948, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 15.284310340881348, |
|
"learning_rate": 2.3537803138373754e-05, |
|
"loss": 1.1153, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.1705708503723145, |
|
"learning_rate": 2.3894436519258205e-05, |
|
"loss": 0.6161, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 15.621281623840332, |
|
"learning_rate": 2.4251069900142652e-05, |
|
"loss": 0.8466, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 37.767173767089844, |
|
"learning_rate": 2.4607703281027107e-05, |
|
"loss": 0.7471, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.153799533843994, |
|
"learning_rate": 2.4964336661911555e-05, |
|
"loss": 0.5421, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.665609359741211, |
|
"learning_rate": 2.5320970042796006e-05, |
|
"loss": 0.4251, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 21.673925399780273, |
|
"learning_rate": 2.5677603423680457e-05, |
|
"loss": 0.8117, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 23.484006881713867, |
|
"learning_rate": 2.603423680456491e-05, |
|
"loss": 0.4761, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 24.750452041625977, |
|
"learning_rate": 2.6390870185449356e-05, |
|
"loss": 0.95, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.027065277099609, |
|
"learning_rate": 2.674750356633381e-05, |
|
"loss": 0.9197, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 33.312313079833984, |
|
"learning_rate": 2.710413694721826e-05, |
|
"loss": 1.033, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 15.621706008911133, |
|
"learning_rate": 2.7460770328102713e-05, |
|
"loss": 0.2779, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 10.880739212036133, |
|
"learning_rate": 2.781740370898716e-05, |
|
"loss": 0.5387, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 11.6985445022583, |
|
"learning_rate": 2.8174037089871615e-05, |
|
"loss": 0.7687, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 25.108810424804688, |
|
"learning_rate": 2.8530670470756066e-05, |
|
"loss": 0.4862, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 23.200624465942383, |
|
"learning_rate": 2.8887303851640514e-05, |
|
"loss": 0.9553, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 30.682540893554688, |
|
"learning_rate": 2.9243937232524965e-05, |
|
"loss": 0.8558, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 12.823701858520508, |
|
"learning_rate": 2.9600570613409416e-05, |
|
"loss": 0.6195, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 11.762367248535156, |
|
"learning_rate": 2.995720399429387e-05, |
|
"loss": 0.758, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.1662691831588745, |
|
"learning_rate": 3.0313837375178318e-05, |
|
"loss": 0.354, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 23.4963436126709, |
|
"learning_rate": 3.067047075606277e-05, |
|
"loss": 0.8267, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.13900019228458405, |
|
"learning_rate": 3.102710413694722e-05, |
|
"loss": 0.6618, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 7.843920707702637, |
|
"learning_rate": 3.138373751783167e-05, |
|
"loss": 0.7689, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 31.13179588317871, |
|
"learning_rate": 3.174037089871612e-05, |
|
"loss": 0.7772, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 44.58312225341797, |
|
"learning_rate": 3.2097004279600574e-05, |
|
"loss": 1.1062, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 18.089794158935547, |
|
"learning_rate": 3.2453637660485025e-05, |
|
"loss": 0.7678, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 33.472625732421875, |
|
"learning_rate": 3.281027104136947e-05, |
|
"loss": 1.6911, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 33.618831634521484, |
|
"learning_rate": 3.316690442225393e-05, |
|
"loss": 0.8881, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.6782217025756836, |
|
"learning_rate": 3.352353780313838e-05, |
|
"loss": 0.7327, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.1791036128997803, |
|
"learning_rate": 3.388017118402282e-05, |
|
"loss": 0.8054, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 6.972609043121338, |
|
"learning_rate": 3.4236804564907274e-05, |
|
"loss": 0.2614, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 24.085866928100586, |
|
"learning_rate": 3.459343794579173e-05, |
|
"loss": 0.4054, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.9996914863586426, |
|
"learning_rate": 3.4950071326676176e-05, |
|
"loss": 0.5344, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.13388022780418396, |
|
"learning_rate": 3.530670470756063e-05, |
|
"loss": 0.7224, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 30.018585205078125, |
|
"learning_rate": 3.566333808844508e-05, |
|
"loss": 0.6226, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 14.195096015930176, |
|
"learning_rate": 3.6019971469329536e-05, |
|
"loss": 0.7356, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 25.853748321533203, |
|
"learning_rate": 3.637660485021398e-05, |
|
"loss": 0.7235, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 29.89474868774414, |
|
"learning_rate": 3.673323823109843e-05, |
|
"loss": 1.1801, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 12.760407447814941, |
|
"learning_rate": 3.708987161198288e-05, |
|
"loss": 0.4307, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 29.496700286865234, |
|
"learning_rate": 3.7446504992867334e-05, |
|
"loss": 0.9473, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.0010541677474976, |
|
"learning_rate": 3.7803138373751785e-05, |
|
"loss": 0.5983, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.141446352005005, |
|
"learning_rate": 3.8159771754636236e-05, |
|
"loss": 0.4888, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.1996098756790161, |
|
"learning_rate": 3.851640513552069e-05, |
|
"loss": 0.5292, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 20.964256286621094, |
|
"learning_rate": 3.887303851640514e-05, |
|
"loss": 0.6905, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.3161448538303375, |
|
"learning_rate": 3.922967189728959e-05, |
|
"loss": 0.7078, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 13.272440910339355, |
|
"learning_rate": 3.958630527817404e-05, |
|
"loss": 0.9984, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.3290501832962036, |
|
"learning_rate": 3.9942938659058485e-05, |
|
"loss": 0.4982, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.7225183248519897, |
|
"learning_rate": 4.029957203994294e-05, |
|
"loss": 0.4532, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 24.277801513671875, |
|
"learning_rate": 4.0656205420827394e-05, |
|
"loss": 0.5147, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 9.140922546386719, |
|
"learning_rate": 4.1012838801711845e-05, |
|
"loss": 0.7049, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.139643669128418, |
|
"learning_rate": 4.136947218259629e-05, |
|
"loss": 1.3454, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 24.44458770751953, |
|
"learning_rate": 4.172610556348075e-05, |
|
"loss": 0.6409, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 53.11198425292969, |
|
"learning_rate": 4.20827389443652e-05, |
|
"loss": 0.7063, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 15.888784408569336, |
|
"learning_rate": 4.243937232524964e-05, |
|
"loss": 0.686, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 29.689838409423828, |
|
"learning_rate": 4.2796005706134094e-05, |
|
"loss": 0.6301, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 37.24555206298828, |
|
"learning_rate": 4.3152639087018545e-05, |
|
"loss": 0.939, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 39.6224479675293, |
|
"learning_rate": 4.3509272467902996e-05, |
|
"loss": 0.9322, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 28.799930572509766, |
|
"learning_rate": 4.386590584878745e-05, |
|
"loss": 1.1431, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.9416821002960205, |
|
"learning_rate": 4.42225392296719e-05, |
|
"loss": 0.7622, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 11.397088050842285, |
|
"learning_rate": 4.457917261055635e-05, |
|
"loss": 0.5302, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.13693714141845703, |
|
"learning_rate": 4.49358059914408e-05, |
|
"loss": 0.9497, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 22.64994239807129, |
|
"learning_rate": 4.529243937232525e-05, |
|
"loss": 1.4811, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 63.26667404174805, |
|
"learning_rate": 4.56490727532097e-05, |
|
"loss": 0.7023, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 24.035776138305664, |
|
"learning_rate": 4.6005706134094154e-05, |
|
"loss": 0.3478, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.0889860987663269, |
|
"learning_rate": 4.6362339514978605e-05, |
|
"loss": 0.3317, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 14.644208908081055, |
|
"learning_rate": 4.6718972895863056e-05, |
|
"loss": 0.6627, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 16.509044647216797, |
|
"learning_rate": 4.707560627674751e-05, |
|
"loss": 1.3097, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 23.583152770996094, |
|
"learning_rate": 4.743223965763195e-05, |
|
"loss": 0.9481, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 50.59526443481445, |
|
"learning_rate": 4.778887303851641e-05, |
|
"loss": 0.7222, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 18.746498107910156, |
|
"learning_rate": 4.814550641940086e-05, |
|
"loss": 0.7993, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 14.619526863098145, |
|
"learning_rate": 4.8502139800285305e-05, |
|
"loss": 0.8045, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.3897199332714081, |
|
"learning_rate": 4.8858773181169756e-05, |
|
"loss": 0.7668, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.14925141632556915, |
|
"learning_rate": 4.9215406562054214e-05, |
|
"loss": 0.2882, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 38.2923469543457, |
|
"learning_rate": 4.9572039942938665e-05, |
|
"loss": 0.8372, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.04119894281029701, |
|
"learning_rate": 4.992867332382311e-05, |
|
"loss": 0.1001, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.7789473684210526, |
|
"eval_loss": 0.898942768573761, |
|
"eval_runtime": 2157.0352, |
|
"eval_samples_per_second": 0.308, |
|
"eval_steps_per_second": 0.154, |
|
"step": 1403 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.562422752380371, |
|
"learning_rate": 4.99682992550325e-05, |
|
"loss": 0.6604, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.05182512477040291, |
|
"learning_rate": 4.992867332382311e-05, |
|
"loss": 0.7648, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 10.46511459350586, |
|
"learning_rate": 4.988904739261373e-05, |
|
"loss": 0.7306, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 26.981674194335938, |
|
"learning_rate": 4.984942146140435e-05, |
|
"loss": 0.3427, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 38.77156066894531, |
|
"learning_rate": 4.9809795530194966e-05, |
|
"loss": 0.4313, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.08875282108783722, |
|
"learning_rate": 4.977016959898558e-05, |
|
"loss": 0.1047, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 7.9550042152404785, |
|
"learning_rate": 4.97305436677762e-05, |
|
"loss": 0.8428, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 3.2877941131591797, |
|
"learning_rate": 4.969091773656681e-05, |
|
"loss": 0.5087, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 27.133525848388672, |
|
"learning_rate": 4.965129180535743e-05, |
|
"loss": 0.4621, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 24.116609573364258, |
|
"learning_rate": 4.9611665874148046e-05, |
|
"loss": 0.8207, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 48.552242279052734, |
|
"learning_rate": 4.9572039942938665e-05, |
|
"loss": 1.3676, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.1313333660364151, |
|
"learning_rate": 4.953241401172928e-05, |
|
"loss": 0.4705, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 14.919997215270996, |
|
"learning_rate": 4.9492788080519896e-05, |
|
"loss": 1.6541, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 2.8064146041870117, |
|
"learning_rate": 4.945316214931051e-05, |
|
"loss": 0.6358, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 33.633766174316406, |
|
"learning_rate": 4.941353621810113e-05, |
|
"loss": 0.3344, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 25.58049774169922, |
|
"learning_rate": 4.9373910286891746e-05, |
|
"loss": 0.3765, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.5938677191734314, |
|
"learning_rate": 4.933428435568236e-05, |
|
"loss": 0.7599, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.17297320067882538, |
|
"learning_rate": 4.9294658424472976e-05, |
|
"loss": 0.1381, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 1.1137043237686157, |
|
"learning_rate": 4.9255032493263595e-05, |
|
"loss": 0.5216, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 11.281981468200684, |
|
"learning_rate": 4.9215406562054214e-05, |
|
"loss": 0.9504, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 35.159671783447266, |
|
"learning_rate": 4.9175780630844826e-05, |
|
"loss": 0.6236, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 15.732198715209961, |
|
"learning_rate": 4.9136154699635445e-05, |
|
"loss": 0.3037, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 4.352818965911865, |
|
"learning_rate": 4.909652876842606e-05, |
|
"loss": 0.5294, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 27.470956802368164, |
|
"learning_rate": 4.9056902837216676e-05, |
|
"loss": 0.5626, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 33.91129684448242, |
|
"learning_rate": 4.9017276906007294e-05, |
|
"loss": 0.2882, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.26898714900016785, |
|
"learning_rate": 4.897765097479791e-05, |
|
"loss": 0.4265, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.8199774622917175, |
|
"learning_rate": 4.8938025043588525e-05, |
|
"loss": 0.5277, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.020548412576317787, |
|
"learning_rate": 4.8898399112379144e-05, |
|
"loss": 0.889, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.048078641295433044, |
|
"learning_rate": 4.8858773181169756e-05, |
|
"loss": 0.8639, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 20.957611083984375, |
|
"learning_rate": 4.881914724996038e-05, |
|
"loss": 0.3244, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.15246962010860443, |
|
"learning_rate": 4.8779521318750994e-05, |
|
"loss": 0.258, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.09393693506717682, |
|
"learning_rate": 4.873989538754161e-05, |
|
"loss": 0.9169, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.9115855097770691, |
|
"learning_rate": 4.8700269456332225e-05, |
|
"loss": 0.5618, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 6.85861873626709, |
|
"learning_rate": 4.866064352512284e-05, |
|
"loss": 0.6588, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 33.108909606933594, |
|
"learning_rate": 4.862101759391346e-05, |
|
"loss": 0.3317, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 2.785113573074341, |
|
"learning_rate": 4.8581391662704074e-05, |
|
"loss": 0.1886, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.07260994613170624, |
|
"learning_rate": 4.854176573149469e-05, |
|
"loss": 0.4813, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 1.9168213605880737, |
|
"learning_rate": 4.8502139800285305e-05, |
|
"loss": 0.6163, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 32.30327224731445, |
|
"learning_rate": 4.8462513869075924e-05, |
|
"loss": 0.5272, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.09055186808109283, |
|
"learning_rate": 4.842288793786654e-05, |
|
"loss": 0.58, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.15506546199321747, |
|
"learning_rate": 4.838326200665716e-05, |
|
"loss": 0.1171, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 32.89198303222656, |
|
"learning_rate": 4.8343636075447773e-05, |
|
"loss": 0.4713, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.009175814688205719, |
|
"learning_rate": 4.830401014423839e-05, |
|
"loss": 0.6139, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 32.81629943847656, |
|
"learning_rate": 4.8264384213029004e-05, |
|
"loss": 0.6131, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 62.49550247192383, |
|
"learning_rate": 4.822475828181962e-05, |
|
"loss": 0.5677, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 2.451925754547119, |
|
"learning_rate": 4.818513235061024e-05, |
|
"loss": 1.4171, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.2953392267227173, |
|
"learning_rate": 4.814550641940086e-05, |
|
"loss": 0.3838, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 53.240325927734375, |
|
"learning_rate": 4.810588048819147e-05, |
|
"loss": 0.1432, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.1574406921863556, |
|
"learning_rate": 4.806625455698209e-05, |
|
"loss": 0.318, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 35.6072998046875, |
|
"learning_rate": 4.802662862577271e-05, |
|
"loss": 0.6746, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.012536413036286831, |
|
"learning_rate": 4.798700269456333e-05, |
|
"loss": 0.7181, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.05652592331171036, |
|
"learning_rate": 4.794737676335394e-05, |
|
"loss": 0.0939, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 2.5210182666778564, |
|
"learning_rate": 4.790775083214456e-05, |
|
"loss": 1.1188, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.16478443145751953, |
|
"learning_rate": 4.786812490093517e-05, |
|
"loss": 0.4504, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 26.002525329589844, |
|
"learning_rate": 4.782849896972579e-05, |
|
"loss": 0.8049, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 37.67827606201172, |
|
"learning_rate": 4.778887303851641e-05, |
|
"loss": 0.4756, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 36.84476852416992, |
|
"learning_rate": 4.774924710730702e-05, |
|
"loss": 0.9565, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.18539421260356903, |
|
"learning_rate": 4.770962117609764e-05, |
|
"loss": 0.3279, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 3.1605958938598633, |
|
"learning_rate": 4.766999524488825e-05, |
|
"loss": 0.1506, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.08869732171297073, |
|
"learning_rate": 4.763036931367887e-05, |
|
"loss": 0.5895, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 1.4320793151855469, |
|
"learning_rate": 4.759074338246949e-05, |
|
"loss": 0.3709, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.013628893531858921, |
|
"learning_rate": 4.755111745126011e-05, |
|
"loss": 0.4708, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.008771849796175957, |
|
"learning_rate": 4.751149152005072e-05, |
|
"loss": 0.2291, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.034172721207141876, |
|
"learning_rate": 4.747186558884134e-05, |
|
"loss": 0.3005, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 2.04589581489563, |
|
"learning_rate": 4.743223965763195e-05, |
|
"loss": 0.7397, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 7.416354656219482, |
|
"learning_rate": 4.739261372642258e-05, |
|
"loss": 0.5153, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 59.661014556884766, |
|
"learning_rate": 4.735298779521319e-05, |
|
"loss": 0.7417, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 38.264408111572266, |
|
"learning_rate": 4.731336186400381e-05, |
|
"loss": 0.4673, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.010330034419894218, |
|
"learning_rate": 4.727373593279442e-05, |
|
"loss": 0.7802, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.009081050753593445, |
|
"learning_rate": 4.723411000158504e-05, |
|
"loss": 0.5108, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.9804019331932068, |
|
"learning_rate": 4.719448407037566e-05, |
|
"loss": 0.1208, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 17.980236053466797, |
|
"learning_rate": 4.7154858139166276e-05, |
|
"loss": 0.3317, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.03598076477646828, |
|
"learning_rate": 4.711523220795689e-05, |
|
"loss": 0.4675, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 28.66923713684082, |
|
"learning_rate": 4.707560627674751e-05, |
|
"loss": 0.7232, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.05090579390525818, |
|
"learning_rate": 4.703598034553812e-05, |
|
"loss": 0.2404, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 39.81483840942383, |
|
"learning_rate": 4.6996354414328745e-05, |
|
"loss": 0.382, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.010164987295866013, |
|
"learning_rate": 4.695672848311936e-05, |
|
"loss": 0.4326, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.007712522987276316, |
|
"learning_rate": 4.691710255190997e-05, |
|
"loss": 0.1419, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.03637077286839485, |
|
"learning_rate": 4.687747662070059e-05, |
|
"loss": 0.0087, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.2942463755607605, |
|
"learning_rate": 4.68378506894912e-05, |
|
"loss": 0.5895, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 11.139754295349121, |
|
"learning_rate": 4.6798224758281825e-05, |
|
"loss": 0.2236, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.1554485708475113, |
|
"learning_rate": 4.675859882707244e-05, |
|
"loss": 0.3646, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.10144095867872238, |
|
"learning_rate": 4.6718972895863056e-05, |
|
"loss": 0.2301, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 34.84081268310547, |
|
"learning_rate": 4.667934696465367e-05, |
|
"loss": 0.7788, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.31722915172576904, |
|
"learning_rate": 4.663972103344429e-05, |
|
"loss": 0.1405, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 32.6774787902832, |
|
"learning_rate": 4.6600095102234906e-05, |
|
"loss": 0.4079, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.04206797853112221, |
|
"learning_rate": 4.6560469171025525e-05, |
|
"loss": 0.8566, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.007674456108361483, |
|
"learning_rate": 4.6520843239816137e-05, |
|
"loss": 0.6745, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 4.521772384643555, |
|
"learning_rate": 4.6481217308606755e-05, |
|
"loss": 0.2311, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.43009287118911743, |
|
"learning_rate": 4.644159137739737e-05, |
|
"loss": 0.9246, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.108181893825531, |
|
"learning_rate": 4.6401965446187986e-05, |
|
"loss": 1.0581, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.0881645604968071, |
|
"learning_rate": 4.6362339514978605e-05, |
|
"loss": 0.431, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.03945665806531906, |
|
"learning_rate": 4.6322713583769224e-05, |
|
"loss": 0.3784, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.01008934061974287, |
|
"learning_rate": 4.6283087652559836e-05, |
|
"loss": 0.4657, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 1.8987274169921875, |
|
"learning_rate": 4.6243461721350455e-05, |
|
"loss": 0.3025, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 52.36662292480469, |
|
"learning_rate": 4.6203835790141073e-05, |
|
"loss": 0.1047, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 30.82796287536621, |
|
"learning_rate": 4.616420985893169e-05, |
|
"loss": 0.3864, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 28.43499755859375, |
|
"learning_rate": 4.6124583927722304e-05, |
|
"loss": 0.5029, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 36.37118911743164, |
|
"learning_rate": 4.608495799651292e-05, |
|
"loss": 0.3412, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.03059449978172779, |
|
"learning_rate": 4.6045332065303535e-05, |
|
"loss": 1.6759, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.028554683551192284, |
|
"learning_rate": 4.6005706134094154e-05, |
|
"loss": 0.4232, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.3136725425720215, |
|
"learning_rate": 4.596608020288477e-05, |
|
"loss": 0.4531, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 9.164505004882812, |
|
"learning_rate": 4.5926454271675385e-05, |
|
"loss": 1.4434, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 26.755535125732422, |
|
"learning_rate": 4.5886828340466004e-05, |
|
"loss": 0.5678, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.027405157685279846, |
|
"learning_rate": 4.5847202409256616e-05, |
|
"loss": 0.1547, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 7.302061080932617, |
|
"learning_rate": 4.5807576478047234e-05, |
|
"loss": 0.1004, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 36.72040557861328, |
|
"learning_rate": 4.576795054683785e-05, |
|
"loss": 0.4913, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.10275045782327652, |
|
"learning_rate": 4.572832461562847e-05, |
|
"loss": 0.0076, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.008994188159704208, |
|
"learning_rate": 4.5688698684419084e-05, |
|
"loss": 0.0018, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.02831762284040451, |
|
"learning_rate": 4.56490727532097e-05, |
|
"loss": 0.086, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.6608620285987854, |
|
"learning_rate": 4.5609446822000315e-05, |
|
"loss": 0.0874, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 1.8176270723342896, |
|
"learning_rate": 4.556982089079094e-05, |
|
"loss": 0.459, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 28.674335479736328, |
|
"learning_rate": 4.553019495958155e-05, |
|
"loss": 0.4304, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.06465455144643784, |
|
"learning_rate": 4.549056902837217e-05, |
|
"loss": 0.6094, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.011676542460918427, |
|
"learning_rate": 4.545094309716278e-05, |
|
"loss": 0.333, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 4.420731544494629, |
|
"learning_rate": 4.54113171659534e-05, |
|
"loss": 0.1013, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.1971130520105362, |
|
"learning_rate": 4.537169123474402e-05, |
|
"loss": 0.0122, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 2.309307813644409, |
|
"learning_rate": 4.533206530353464e-05, |
|
"loss": 0.0247, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.010364987887442112, |
|
"learning_rate": 4.529243937232525e-05, |
|
"loss": 0.0751, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 30.956512451171875, |
|
"learning_rate": 4.525281344111587e-05, |
|
"loss": 0.7706, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 21.555742263793945, |
|
"learning_rate": 4.521318750990648e-05, |
|
"loss": 0.0501, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.02271176129579544, |
|
"learning_rate": 4.51735615786971e-05, |
|
"loss": 0.219, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.10638172179460526, |
|
"learning_rate": 4.513393564748772e-05, |
|
"loss": 0.4009, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.0012674570316448808, |
|
"learning_rate": 4.509430971627833e-05, |
|
"loss": 0.0365, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.01860959082841873, |
|
"learning_rate": 4.505468378506895e-05, |
|
"loss": 0.064, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.029620472341775894, |
|
"learning_rate": 4.501505785385956e-05, |
|
"loss": 0.5788, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 69.32429504394531, |
|
"learning_rate": 4.497543192265019e-05, |
|
"loss": 0.5635, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.004012781195342541, |
|
"learning_rate": 4.49358059914408e-05, |
|
"loss": 0.8204, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.007074211724102497, |
|
"learning_rate": 4.489618006023142e-05, |
|
"loss": 0.2872, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 2.3659746646881104, |
|
"learning_rate": 4.485655412902203e-05, |
|
"loss": 0.6915, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.2181590050458908, |
|
"learning_rate": 4.481692819781265e-05, |
|
"loss": 0.0489, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.00418996112421155, |
|
"learning_rate": 4.477730226660327e-05, |
|
"loss": 0.4435, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.011325598694384098, |
|
"learning_rate": 4.473767633539389e-05, |
|
"loss": 0.0595, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 6.933524131774902, |
|
"learning_rate": 4.46980504041845e-05, |
|
"loss": 0.6284, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.04216031730175018, |
|
"learning_rate": 4.465842447297512e-05, |
|
"loss": 0.1847, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 49.92095184326172, |
|
"learning_rate": 4.461879854176573e-05, |
|
"loss": 0.1075, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.0068773203529417515, |
|
"learning_rate": 4.457917261055635e-05, |
|
"loss": 0.7253, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 49.53322219848633, |
|
"learning_rate": 4.453954667934697e-05, |
|
"loss": 0.3937, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.0059694708324968815, |
|
"learning_rate": 4.449992074813759e-05, |
|
"loss": 0.3752, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.006113003473728895, |
|
"learning_rate": 4.44602948169282e-05, |
|
"loss": 0.2646, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_accuracy": 0.8857142857142857, |
|
"eval_loss": 0.5655186772346497, |
|
"eval_runtime": 2204.4149, |
|
"eval_samples_per_second": 0.302, |
|
"eval_steps_per_second": 0.151, |
|
"step": 2806 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.030710767954587936, |
|
"learning_rate": 4.442066888571882e-05, |
|
"loss": 0.2584, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.961925208568573, |
|
"learning_rate": 4.438104295450943e-05, |
|
"loss": 0.0208, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.034155942499637604, |
|
"learning_rate": 4.434141702330005e-05, |
|
"loss": 0.0855, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.023880697786808014, |
|
"learning_rate": 4.430179109209067e-05, |
|
"loss": 0.0589, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.4049380421638489, |
|
"learning_rate": 4.426216516088128e-05, |
|
"loss": 0.0529, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.3483090102672577, |
|
"learning_rate": 4.42225392296719e-05, |
|
"loss": 0.6943, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 24.336814880371094, |
|
"learning_rate": 4.418291329846252e-05, |
|
"loss": 0.5243, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.0032091333996504545, |
|
"learning_rate": 4.4143287367253136e-05, |
|
"loss": 0.1202, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.047493454068899155, |
|
"learning_rate": 4.410366143604375e-05, |
|
"loss": 0.1007, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.017678333446383476, |
|
"learning_rate": 4.406403550483437e-05, |
|
"loss": 0.0008, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.9050219058990479, |
|
"learning_rate": 4.402440957362498e-05, |
|
"loss": 0.3121, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.03626730665564537, |
|
"learning_rate": 4.39847836424156e-05, |
|
"loss": 0.1179, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.011950280517339706, |
|
"learning_rate": 4.3945157711206216e-05, |
|
"loss": 0.4693, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.02412373013794422, |
|
"learning_rate": 4.3905531779996835e-05, |
|
"loss": 0.8616, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 30.13632583618164, |
|
"learning_rate": 4.386590584878745e-05, |
|
"loss": 0.3553, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.0061193606816232204, |
|
"learning_rate": 4.3826279917578066e-05, |
|
"loss": 0.0383, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.2041795253753662, |
|
"learning_rate": 4.378665398636868e-05, |
|
"loss": 0.6114, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.26872244477272034, |
|
"learning_rate": 4.3747028055159304e-05, |
|
"loss": 0.5494, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.06702332943677902, |
|
"learning_rate": 4.3707402123949916e-05, |
|
"loss": 0.0144, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 0.0052034310065209866, |
|
"learning_rate": 4.3667776192740534e-05, |
|
"loss": 0.0128, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 2.874134063720703, |
|
"learning_rate": 4.3628150261531146e-05, |
|
"loss": 0.6844, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 1.1829482316970825, |
|
"learning_rate": 4.3588524330321765e-05, |
|
"loss": 0.1866, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 41.475399017333984, |
|
"learning_rate": 4.3548898399112384e-05, |
|
"loss": 0.156, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 0.07054935395717621, |
|
"learning_rate": 4.3509272467902996e-05, |
|
"loss": 0.3379, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 0.44977086782455444, |
|
"learning_rate": 4.3469646536693615e-05, |
|
"loss": 0.5629, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 0.06215721368789673, |
|
"learning_rate": 4.343002060548423e-05, |
|
"loss": 0.0019, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 43.73810958862305, |
|
"learning_rate": 4.3390394674274846e-05, |
|
"loss": 0.3984, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 0.9523270130157471, |
|
"learning_rate": 4.3350768743065464e-05, |
|
"loss": 0.0888, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 0.005942572373896837, |
|
"learning_rate": 4.331114281185608e-05, |
|
"loss": 0.1961, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 0.022418642416596413, |
|
"learning_rate": 4.3271516880646695e-05, |
|
"loss": 0.0597, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 0.04196101427078247, |
|
"learning_rate": 4.3231890949437314e-05, |
|
"loss": 0.1147, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 0.003765852889046073, |
|
"learning_rate": 4.3192265018227926e-05, |
|
"loss": 0.0416, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 0.053471703082323074, |
|
"learning_rate": 4.3152639087018545e-05, |
|
"loss": 0.3866, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 51.969120025634766, |
|
"learning_rate": 4.3113013155809164e-05, |
|
"loss": 0.0616, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 0.005074084736406803, |
|
"learning_rate": 4.307338722459978e-05, |
|
"loss": 0.1907, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.0045975870452821255, |
|
"learning_rate": 4.3033761293390395e-05, |
|
"loss": 0.0072, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.0040842327289283276, |
|
"learning_rate": 4.299413536218101e-05, |
|
"loss": 0.2306, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.008049121126532555, |
|
"learning_rate": 4.295450943097163e-05, |
|
"loss": 0.0058, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 21.22314453125, |
|
"learning_rate": 4.291488349976225e-05, |
|
"loss": 0.5502, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.036627013236284256, |
|
"learning_rate": 4.287525756855286e-05, |
|
"loss": 0.1419, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 2.3564202785491943, |
|
"learning_rate": 4.283563163734348e-05, |
|
"loss": 0.0279, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.0108193913474679, |
|
"learning_rate": 4.2796005706134094e-05, |
|
"loss": 0.0004, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.0201814454048872, |
|
"learning_rate": 4.275637977492471e-05, |
|
"loss": 0.3249, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.03389296308159828, |
|
"learning_rate": 4.271675384371533e-05, |
|
"loss": 0.3386, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.01544855535030365, |
|
"learning_rate": 4.267712791250595e-05, |
|
"loss": 0.3866, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.025016358122229576, |
|
"learning_rate": 4.263750198129656e-05, |
|
"loss": 0.0013, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.0923624038696289, |
|
"learning_rate": 4.2597876050087174e-05, |
|
"loss": 0.2532, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 23.150659561157227, |
|
"learning_rate": 4.255825011887779e-05, |
|
"loss": 0.3962, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.015515293926000595, |
|
"learning_rate": 4.251862418766841e-05, |
|
"loss": 0.0004, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.003917529247701168, |
|
"learning_rate": 4.247899825645903e-05, |
|
"loss": 0.2484, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 28.370773315429688, |
|
"learning_rate": 4.243937232524964e-05, |
|
"loss": 0.0367, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.026205556467175484, |
|
"learning_rate": 4.239974639404026e-05, |
|
"loss": 0.3111, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.01336923148483038, |
|
"learning_rate": 4.2360120462830874e-05, |
|
"loss": 0.0047, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.011190090328454971, |
|
"learning_rate": 4.23204945316215e-05, |
|
"loss": 0.3007, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.003655917476862669, |
|
"learning_rate": 4.228086860041211e-05, |
|
"loss": 0.5659, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.017216026782989502, |
|
"learning_rate": 4.224124266920273e-05, |
|
"loss": 0.0933, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.007373865228146315, |
|
"learning_rate": 4.220161673799334e-05, |
|
"loss": 0.0298, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.035991325974464417, |
|
"learning_rate": 4.216199080678396e-05, |
|
"loss": 0.0916, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.007277372293174267, |
|
"learning_rate": 4.212236487557458e-05, |
|
"loss": 0.0008, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.0012711473973467946, |
|
"learning_rate": 4.20827389443652e-05, |
|
"loss": 0.049, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.004262813366949558, |
|
"learning_rate": 4.204311301315581e-05, |
|
"loss": 0.3255, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.6016376614570618, |
|
"learning_rate": 4.200348708194643e-05, |
|
"loss": 0.0016, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.027856985107064247, |
|
"learning_rate": 4.196386115073704e-05, |
|
"loss": 0.1706, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 36.658660888671875, |
|
"learning_rate": 4.192423521952766e-05, |
|
"loss": 0.393, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 4.459847927093506, |
|
"learning_rate": 4.188460928831828e-05, |
|
"loss": 0.2113, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.003763306187465787, |
|
"learning_rate": 4.18449833571089e-05, |
|
"loss": 0.0946, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.9358043670654297, |
|
"learning_rate": 4.180535742589951e-05, |
|
"loss": 0.1248, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 5.325794219970703, |
|
"learning_rate": 4.176573149469012e-05, |
|
"loss": 0.1882, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.01291597355157137, |
|
"learning_rate": 4.172610556348075e-05, |
|
"loss": 0.5989, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.05552150309085846, |
|
"learning_rate": 4.168647963227136e-05, |
|
"loss": 0.1317, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.0046797278337180614, |
|
"learning_rate": 4.164685370106198e-05, |
|
"loss": 0.9324, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.19884918630123138, |
|
"learning_rate": 4.160722776985259e-05, |
|
"loss": 0.0027, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.06361120939254761, |
|
"learning_rate": 4.156760183864321e-05, |
|
"loss": 0.1294, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.025872783735394478, |
|
"learning_rate": 4.152797590743383e-05, |
|
"loss": 0.3453, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.44598618149757385, |
|
"learning_rate": 4.1488349976224446e-05, |
|
"loss": 0.0445, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.00139313330873847, |
|
"learning_rate": 4.144872404501506e-05, |
|
"loss": 0.4126, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.004861112684011459, |
|
"learning_rate": 4.140909811380568e-05, |
|
"loss": 0.3162, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 37.97075653076172, |
|
"learning_rate": 4.136947218259629e-05, |
|
"loss": 0.0275, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 0.006260779220610857, |
|
"learning_rate": 4.132984625138691e-05, |
|
"loss": 0.5518, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 10.439234733581543, |
|
"learning_rate": 4.129022032017753e-05, |
|
"loss": 0.0489, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 0.009267416782677174, |
|
"learning_rate": 4.1250594388968146e-05, |
|
"loss": 0.2203, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 0.003436572849750519, |
|
"learning_rate": 4.121096845775876e-05, |
|
"loss": 0.0489, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 0.02378927730023861, |
|
"learning_rate": 4.1171342526549377e-05, |
|
"loss": 0.3647, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 0.04053608328104019, |
|
"learning_rate": 4.113171659533999e-05, |
|
"loss": 0.4505, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 0.8833039402961731, |
|
"learning_rate": 4.1092090664130614e-05, |
|
"loss": 0.0626, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 11.919655799865723, |
|
"learning_rate": 4.1052464732921226e-05, |
|
"loss": 0.0989, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 0.03586142137646675, |
|
"learning_rate": 4.1012838801711845e-05, |
|
"loss": 0.2583, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 0.1854490488767624, |
|
"learning_rate": 4.097321287050246e-05, |
|
"loss": 0.1368, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 0.057375673204660416, |
|
"learning_rate": 4.0933586939293076e-05, |
|
"loss": 0.345, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 0.015717756003141403, |
|
"learning_rate": 4.0893961008083695e-05, |
|
"loss": 0.0065, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 0.02194334752857685, |
|
"learning_rate": 4.085433507687431e-05, |
|
"loss": 0.0021, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 16.584745407104492, |
|
"learning_rate": 4.0814709145664925e-05, |
|
"loss": 0.0147, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 0.0053609260357916355, |
|
"learning_rate": 4.077508321445554e-05, |
|
"loss": 0.3077, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 0.002716219983994961, |
|
"learning_rate": 4.0735457283246156e-05, |
|
"loss": 0.4135, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 0.8324286937713623, |
|
"learning_rate": 4.0695831352036775e-05, |
|
"loss": 0.3889, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 0.004210811574012041, |
|
"learning_rate": 4.0656205420827394e-05, |
|
"loss": 0.0015, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 0.0026693022809922695, |
|
"learning_rate": 4.0616579489618006e-05, |
|
"loss": 0.566, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 0.02392963133752346, |
|
"learning_rate": 4.0576953558408625e-05, |
|
"loss": 0.0124, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 0.020079661160707474, |
|
"learning_rate": 4.053732762719924e-05, |
|
"loss": 0.0152, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 0.0014172615483403206, |
|
"learning_rate": 4.049770169598986e-05, |
|
"loss": 0.0932, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 5.266864776611328, |
|
"learning_rate": 4.0458075764780474e-05, |
|
"loss": 0.5559, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 0.009865287691354752, |
|
"learning_rate": 4.041844983357109e-05, |
|
"loss": 0.2131, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 0.0027454691007733345, |
|
"learning_rate": 4.0378823902361705e-05, |
|
"loss": 0.0006, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 0.15348024666309357, |
|
"learning_rate": 4.0339197971152324e-05, |
|
"loss": 1.0736, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 0.002026822417974472, |
|
"learning_rate": 4.029957203994294e-05, |
|
"loss": 0.4398, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.023032035678625107, |
|
"learning_rate": 4.025994610873356e-05, |
|
"loss": 0.3724, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 110.459716796875, |
|
"learning_rate": 4.0220320177524174e-05, |
|
"loss": 0.4695, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.026824643835425377, |
|
"learning_rate": 4.018069424631479e-05, |
|
"loss": 0.4355, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.007477205712348223, |
|
"learning_rate": 4.0141068315105404e-05, |
|
"loss": 0.3392, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.0020925672724843025, |
|
"learning_rate": 4.010144238389602e-05, |
|
"loss": 0.228, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.003810058580711484, |
|
"learning_rate": 4.006181645268664e-05, |
|
"loss": 0.2477, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.0076815299689769745, |
|
"learning_rate": 4.0022190521477254e-05, |
|
"loss": 0.4539, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.005379770882427692, |
|
"learning_rate": 3.998256459026787e-05, |
|
"loss": 0.0341, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.003831785172224045, |
|
"learning_rate": 3.9942938659058485e-05, |
|
"loss": 0.5954, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.7539482116699219, |
|
"learning_rate": 3.9903312727849104e-05, |
|
"loss": 0.3885, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.005533235147595406, |
|
"learning_rate": 3.986368679663972e-05, |
|
"loss": 0.0053, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.008866420015692711, |
|
"learning_rate": 3.982406086543034e-05, |
|
"loss": 0.0453, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.014108781702816486, |
|
"learning_rate": 3.978443493422095e-05, |
|
"loss": 0.2954, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.016585228964686394, |
|
"learning_rate": 3.974480900301157e-05, |
|
"loss": 0.0076, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 3.2773778438568115, |
|
"learning_rate": 3.970518307180219e-05, |
|
"loss": 0.1556, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 7.254385948181152, |
|
"learning_rate": 3.966555714059281e-05, |
|
"loss": 0.1696, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 0.0035600659903138876, |
|
"learning_rate": 3.962593120938342e-05, |
|
"loss": 0.0074, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 8.71975040435791, |
|
"learning_rate": 3.958630527817404e-05, |
|
"loss": 0.3048, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 0.0020627696067094803, |
|
"learning_rate": 3.954667934696465e-05, |
|
"loss": 0.7165, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 0.007494701538234949, |
|
"learning_rate": 3.950705341575527e-05, |
|
"loss": 0.5529, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 0.016065679490566254, |
|
"learning_rate": 3.946742748454589e-05, |
|
"loss": 0.0128, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 0.322768896818161, |
|
"learning_rate": 3.942780155333651e-05, |
|
"loss": 0.0628, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 0.023394137620925903, |
|
"learning_rate": 3.938817562212712e-05, |
|
"loss": 0.4743, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 0.0229184590280056, |
|
"learning_rate": 3.934854969091774e-05, |
|
"loss": 0.2818, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 0.006755081005394459, |
|
"learning_rate": 3.930892375970835e-05, |
|
"loss": 0.3635, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 0.004403649363666773, |
|
"learning_rate": 3.926929782849898e-05, |
|
"loss": 0.0568, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 0.0034377514384686947, |
|
"learning_rate": 3.922967189728959e-05, |
|
"loss": 0.1624, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 0.005851461086422205, |
|
"learning_rate": 3.91900459660802e-05, |
|
"loss": 0.6674, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 0.004862835630774498, |
|
"learning_rate": 3.915042003487082e-05, |
|
"loss": 0.5013, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 42.758365631103516, |
|
"learning_rate": 3.911079410366143e-05, |
|
"loss": 0.4173, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 0.004606719594448805, |
|
"learning_rate": 3.907116817245206e-05, |
|
"loss": 0.5328, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 78.40693664550781, |
|
"learning_rate": 3.903154224124267e-05, |
|
"loss": 0.4105, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 23.919864654541016, |
|
"learning_rate": 3.899191631003329e-05, |
|
"loss": 1.4324, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 0.00816379301249981, |
|
"learning_rate": 3.89522903788239e-05, |
|
"loss": 0.3002, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 0.022155677899718285, |
|
"learning_rate": 3.891266444761452e-05, |
|
"loss": 0.0785, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_accuracy": 0.9052631578947369, |
|
"eval_loss": 0.4806475341320038, |
|
"eval_runtime": 2299.8623, |
|
"eval_samples_per_second": 0.289, |
|
"eval_steps_per_second": 0.145, |
|
"step": 4209 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.0499810166656971, |
|
"learning_rate": 3.887303851640514e-05, |
|
"loss": 0.1101, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.00219921232201159, |
|
"learning_rate": 3.883341258519576e-05, |
|
"loss": 0.0019, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.05688053369522095, |
|
"learning_rate": 3.879378665398637e-05, |
|
"loss": 0.4249, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.004060626961290836, |
|
"learning_rate": 3.875416072277699e-05, |
|
"loss": 0.329, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.4057186245918274, |
|
"learning_rate": 3.87145347915676e-05, |
|
"loss": 0.0089, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.0447358600795269, |
|
"learning_rate": 3.8674908860358226e-05, |
|
"loss": 0.0034, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.003750765696167946, |
|
"learning_rate": 3.863528292914884e-05, |
|
"loss": 0.0953, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 6.533902168273926, |
|
"learning_rate": 3.8595656997939456e-05, |
|
"loss": 0.0106, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 0.001664067734964192, |
|
"learning_rate": 3.855603106673007e-05, |
|
"loss": 0.0162, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 0.8516010046005249, |
|
"learning_rate": 3.851640513552069e-05, |
|
"loss": 0.4751, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 0.03567550331354141, |
|
"learning_rate": 3.8476779204311306e-05, |
|
"loss": 0.161, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 0.0029626258183270693, |
|
"learning_rate": 3.8437153273101925e-05, |
|
"loss": 0.1448, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 0.017234837636351585, |
|
"learning_rate": 3.839752734189254e-05, |
|
"loss": 0.0052, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 0.14725999534130096, |
|
"learning_rate": 3.835790141068315e-05, |
|
"loss": 0.0645, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 0.002782195108011365, |
|
"learning_rate": 3.831827547947377e-05, |
|
"loss": 0.0004, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 34.547061920166016, |
|
"learning_rate": 3.8278649548264386e-05, |
|
"loss": 0.4315, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 0.0030270384158939123, |
|
"learning_rate": 3.8239023617055005e-05, |
|
"loss": 0.3151, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 0.008927990682423115, |
|
"learning_rate": 3.819939768584562e-05, |
|
"loss": 0.0002, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 0.11368348449468613, |
|
"learning_rate": 3.8159771754636236e-05, |
|
"loss": 0.0077, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 0.10815131664276123, |
|
"learning_rate": 3.812014582342685e-05, |
|
"loss": 0.0182, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 0.020075034350156784, |
|
"learning_rate": 3.808051989221747e-05, |
|
"loss": 0.0007, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 0.001029517618007958, |
|
"learning_rate": 3.8040893961008086e-05, |
|
"loss": 0.0397, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 0.003121725283563137, |
|
"learning_rate": 3.8001268029798704e-05, |
|
"loss": 0.0001, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 5.35357141494751, |
|
"learning_rate": 3.7961642098589316e-05, |
|
"loss": 0.0069, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 0.013706800527870655, |
|
"learning_rate": 3.7922016167379935e-05, |
|
"loss": 0.0007, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 0.09196832776069641, |
|
"learning_rate": 3.7882390236170554e-05, |
|
"loss": 0.0003, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 0.003602321958169341, |
|
"learning_rate": 3.784276430496117e-05, |
|
"loss": 0.2969, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 20.944992065429688, |
|
"learning_rate": 3.7803138373751785e-05, |
|
"loss": 0.0272, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 0.002105366438627243, |
|
"learning_rate": 3.7763512442542404e-05, |
|
"loss": 0.0002, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 0.004411764442920685, |
|
"learning_rate": 3.7723886511333016e-05, |
|
"loss": 0.0076, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 0.005865162704139948, |
|
"learning_rate": 3.7684260580123635e-05, |
|
"loss": 0.0059, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 0.011046779341995716, |
|
"learning_rate": 3.764463464891425e-05, |
|
"loss": 0.0043, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 0.023666031658649445, |
|
"learning_rate": 3.760500871770487e-05, |
|
"loss": 0.0009, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 53.04268264770508, |
|
"learning_rate": 3.7565382786495484e-05, |
|
"loss": 0.3129, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 16.536462783813477, |
|
"learning_rate": 3.7525756855286096e-05, |
|
"loss": 0.0079, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"grad_norm": 0.004224766045808792, |
|
"learning_rate": 3.7486130924076715e-05, |
|
"loss": 0.073, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"grad_norm": 0.005598429590463638, |
|
"learning_rate": 3.7446504992867334e-05, |
|
"loss": 0.2742, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"grad_norm": 0.0030881077982485294, |
|
"learning_rate": 3.740687906165795e-05, |
|
"loss": 0.2948, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"grad_norm": 0.019152648746967316, |
|
"learning_rate": 3.7367253130448565e-05, |
|
"loss": 0.0047, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"grad_norm": 0.001949524856172502, |
|
"learning_rate": 3.7327627199239183e-05, |
|
"loss": 0.0003, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"grad_norm": 0.001139726140536368, |
|
"learning_rate": 3.7288001268029795e-05, |
|
"loss": 0.0033, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"grad_norm": 0.0009636884205974638, |
|
"learning_rate": 3.724837533682042e-05, |
|
"loss": 0.5236, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"grad_norm": 0.0024904939346015453, |
|
"learning_rate": 3.720874940561103e-05, |
|
"loss": 0.0005, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"grad_norm": 0.004561484791338444, |
|
"learning_rate": 3.716912347440165e-05, |
|
"loss": 0.4394, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"grad_norm": 21.228055953979492, |
|
"learning_rate": 3.7129497543192264e-05, |
|
"loss": 0.5905, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"grad_norm": 38.67287063598633, |
|
"learning_rate": 3.708987161198288e-05, |
|
"loss": 0.0304, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"grad_norm": 0.002863664412871003, |
|
"learning_rate": 3.70502456807735e-05, |
|
"loss": 0.4688, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"grad_norm": 0.0070022111758589745, |
|
"learning_rate": 3.701061974956412e-05, |
|
"loss": 0.0044, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"grad_norm": 98.50983428955078, |
|
"learning_rate": 3.697099381835473e-05, |
|
"loss": 0.2539, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 0.044561292976140976, |
|
"learning_rate": 3.693136788714535e-05, |
|
"loss": 0.0002, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 2.370043992996216, |
|
"learning_rate": 3.689174195593596e-05, |
|
"loss": 0.0055, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 12.61652660369873, |
|
"learning_rate": 3.685211602472658e-05, |
|
"loss": 0.456, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 0.020174263045191765, |
|
"learning_rate": 3.68124900935172e-05, |
|
"loss": 0.0023, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 0.032532501965761185, |
|
"learning_rate": 3.677286416230782e-05, |
|
"loss": 0.0004, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 39.96610641479492, |
|
"learning_rate": 3.673323823109843e-05, |
|
"loss": 0.5033, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 0.006895292084664106, |
|
"learning_rate": 3.669361229988905e-05, |
|
"loss": 0.2369, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 0.0018528720829635859, |
|
"learning_rate": 3.665398636867967e-05, |
|
"loss": 0.0005, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 57.440799713134766, |
|
"learning_rate": 3.661436043747028e-05, |
|
"loss": 0.7416, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 0.15606503188610077, |
|
"learning_rate": 3.65747345062609e-05, |
|
"loss": 0.0005, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 0.06342484056949615, |
|
"learning_rate": 3.653510857505151e-05, |
|
"loss": 0.0008, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 0.0007686218596063554, |
|
"learning_rate": 3.649548264384213e-05, |
|
"loss": 0.0083, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 0.007868933491408825, |
|
"learning_rate": 3.645585671263275e-05, |
|
"loss": 0.0002, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 0.0038664869498461485, |
|
"learning_rate": 3.641623078142337e-05, |
|
"loss": 0.5171, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 139.34559631347656, |
|
"learning_rate": 3.637660485021398e-05, |
|
"loss": 0.1279, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 0.01549526583403349, |
|
"learning_rate": 3.63369789190046e-05, |
|
"loss": 0.0024, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 0.009506451897323132, |
|
"learning_rate": 3.629735298779521e-05, |
|
"loss": 0.0322, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 0.01199623104184866, |
|
"learning_rate": 3.625772705658583e-05, |
|
"loss": 0.5853, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 0.023425359278917313, |
|
"learning_rate": 3.621810112537645e-05, |
|
"loss": 0.0074, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 0.0029667699709534645, |
|
"learning_rate": 3.617847519416707e-05, |
|
"loss": 0.0003, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 0.0053655593656003475, |
|
"learning_rate": 3.613884926295768e-05, |
|
"loss": 0.0022, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 0.002650972455739975, |
|
"learning_rate": 3.60992233317483e-05, |
|
"loss": 0.0286, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 0.029404861852526665, |
|
"learning_rate": 3.605959740053891e-05, |
|
"loss": 0.355, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 0.0018920317525044084, |
|
"learning_rate": 3.6019971469329536e-05, |
|
"loss": 0.0105, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 0.2511395812034607, |
|
"learning_rate": 3.598034553812015e-05, |
|
"loss": 0.0721, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 0.0022750215139240026, |
|
"learning_rate": 3.594071960691077e-05, |
|
"loss": 0.0011, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 0.0004970223526470363, |
|
"learning_rate": 3.590109367570138e-05, |
|
"loss": 0.0493, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 0.0018257640767842531, |
|
"learning_rate": 3.5861467744492e-05, |
|
"loss": 0.0041, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 0.002615696983411908, |
|
"learning_rate": 3.5821841813282617e-05, |
|
"loss": 0.0002, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 0.0015312007162719965, |
|
"learning_rate": 3.578221588207323e-05, |
|
"loss": 0.0048, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 0.003842801321297884, |
|
"learning_rate": 3.574258995086385e-05, |
|
"loss": 0.0001, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 0.003976788371801376, |
|
"learning_rate": 3.570296401965446e-05, |
|
"loss": 0.0002, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 0.0027057684492319822, |
|
"learning_rate": 3.566333808844508e-05, |
|
"loss": 0.0001, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 0.0013581090606749058, |
|
"learning_rate": 3.56237121572357e-05, |
|
"loss": 0.0643, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 73.48147583007812, |
|
"learning_rate": 3.5584086226026316e-05, |
|
"loss": 0.1412, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 0.05521896854043007, |
|
"learning_rate": 3.554446029481693e-05, |
|
"loss": 0.8526, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 0.01980687491595745, |
|
"learning_rate": 3.550483436360755e-05, |
|
"loss": 0.3712, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 0.0016443756176158786, |
|
"learning_rate": 3.546520843239816e-05, |
|
"loss": 0.0004, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 2.7786030769348145, |
|
"learning_rate": 3.5425582501188784e-05, |
|
"loss": 0.0024, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 0.002752843778580427, |
|
"learning_rate": 3.5385956569979396e-05, |
|
"loss": 0.0989, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 0.07084832340478897, |
|
"learning_rate": 3.5346330638770015e-05, |
|
"loss": 0.006, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 0.0023438192438334227, |
|
"learning_rate": 3.530670470756063e-05, |
|
"loss": 0.0631, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 0.023146087303757668, |
|
"learning_rate": 3.5267078776351246e-05, |
|
"loss": 0.3281, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"grad_norm": 0.0026536276564002037, |
|
"learning_rate": 3.5227452845141865e-05, |
|
"loss": 0.2622, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"grad_norm": 25.2746639251709, |
|
"learning_rate": 3.5187826913932483e-05, |
|
"loss": 0.9971, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"grad_norm": 2.3518447875976562, |
|
"learning_rate": 3.5148200982723095e-05, |
|
"loss": 0.0101, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"grad_norm": 0.004120847675949335, |
|
"learning_rate": 3.5108575051513714e-05, |
|
"loss": 0.2435, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"grad_norm": 0.0018114675767719746, |
|
"learning_rate": 3.5068949120304326e-05, |
|
"loss": 0.0002, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"grad_norm": 0.003768153488636017, |
|
"learning_rate": 3.5029323189094945e-05, |
|
"loss": 0.0041, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"grad_norm": 0.07695072889328003, |
|
"learning_rate": 3.4989697257885564e-05, |
|
"loss": 0.0025, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"grad_norm": 57.52178955078125, |
|
"learning_rate": 3.4950071326676176e-05, |
|
"loss": 0.0468, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"grad_norm": 0.001811747089959681, |
|
"learning_rate": 3.4910445395466795e-05, |
|
"loss": 0.0007, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"grad_norm": 0.0032631447538733482, |
|
"learning_rate": 3.487081946425741e-05, |
|
"loss": 0.0962, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"grad_norm": 0.0040063695050776005, |
|
"learning_rate": 3.4831193533048026e-05, |
|
"loss": 0.0718, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"grad_norm": 0.042804840952157974, |
|
"learning_rate": 3.4791567601838644e-05, |
|
"loss": 0.0038, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"grad_norm": 0.0023616242688149214, |
|
"learning_rate": 3.475194167062926e-05, |
|
"loss": 0.0002, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"grad_norm": 0.002275130245834589, |
|
"learning_rate": 3.4712315739419875e-05, |
|
"loss": 0.6309, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"grad_norm": 0.011256784200668335, |
|
"learning_rate": 3.4672689808210494e-05, |
|
"loss": 0.0002, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"grad_norm": 0.0045999023132026196, |
|
"learning_rate": 3.463306387700111e-05, |
|
"loss": 0.1079, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"grad_norm": 0.001873884117230773, |
|
"learning_rate": 3.459343794579173e-05, |
|
"loss": 0.0004, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"grad_norm": 0.003349520266056061, |
|
"learning_rate": 3.4553812014582344e-05, |
|
"loss": 0.0392, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"grad_norm": 0.07745254039764404, |
|
"learning_rate": 3.451418608337296e-05, |
|
"loss": 0.1259, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"grad_norm": 0.001707065268419683, |
|
"learning_rate": 3.4474560152163574e-05, |
|
"loss": 0.2405, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"grad_norm": 0.00230118609033525, |
|
"learning_rate": 3.443493422095419e-05, |
|
"loss": 0.0026, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"grad_norm": 0.03412836417555809, |
|
"learning_rate": 3.439530828974481e-05, |
|
"loss": 0.1592, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"grad_norm": 0.006646712776273489, |
|
"learning_rate": 3.435568235853543e-05, |
|
"loss": 0.0253, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"grad_norm": 0.10694713890552521, |
|
"learning_rate": 3.431605642732604e-05, |
|
"loss": 0.3725, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"grad_norm": 0.14875371754169464, |
|
"learning_rate": 3.427643049611666e-05, |
|
"loss": 0.7354, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"grad_norm": 0.06602335721254349, |
|
"learning_rate": 3.4236804564907274e-05, |
|
"loss": 0.3752, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"grad_norm": 0.002122233621776104, |
|
"learning_rate": 3.41971786336979e-05, |
|
"loss": 0.0014, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"grad_norm": 0.020870821550488472, |
|
"learning_rate": 3.415755270248851e-05, |
|
"loss": 0.001, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"grad_norm": 51.36176681518555, |
|
"learning_rate": 3.411792677127912e-05, |
|
"loss": 0.0521, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"grad_norm": 0.002612057374790311, |
|
"learning_rate": 3.407830084006974e-05, |
|
"loss": 0.3004, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"grad_norm": 0.006323930341750383, |
|
"learning_rate": 3.4038674908860354e-05, |
|
"loss": 0.0007, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"grad_norm": 0.010717890225350857, |
|
"learning_rate": 3.399904897765098e-05, |
|
"loss": 0.0003, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"grad_norm": 0.20171226561069489, |
|
"learning_rate": 3.395942304644159e-05, |
|
"loss": 0.0007, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"grad_norm": 0.0015600892947986722, |
|
"learning_rate": 3.391979711523221e-05, |
|
"loss": 0.0011, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"grad_norm": 1.7858773469924927, |
|
"learning_rate": 3.388017118402282e-05, |
|
"loss": 0.0032, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"grad_norm": 0.0012454432435333729, |
|
"learning_rate": 3.384054525281344e-05, |
|
"loss": 0.0002, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"grad_norm": 0.0015952313551679254, |
|
"learning_rate": 3.380091932160406e-05, |
|
"loss": 0.001, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"grad_norm": 0.002770837862044573, |
|
"learning_rate": 3.376129339039468e-05, |
|
"loss": 0.0008, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"grad_norm": 1.3940935134887695, |
|
"learning_rate": 3.372166745918529e-05, |
|
"loss": 0.0037, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"grad_norm": 0.1070881336927414, |
|
"learning_rate": 3.368204152797591e-05, |
|
"loss": 0.0003, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"grad_norm": 60.142276763916016, |
|
"learning_rate": 3.364241559676652e-05, |
|
"loss": 0.4676, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"grad_norm": 0.0014883485855534673, |
|
"learning_rate": 3.360278966555714e-05, |
|
"loss": 0.0003, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 0.002981774276122451, |
|
"learning_rate": 3.356316373434776e-05, |
|
"loss": 0.0002, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 0.000889226037543267, |
|
"learning_rate": 3.352353780313838e-05, |
|
"loss": 0.0001, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 0.006324201822280884, |
|
"learning_rate": 3.348391187192899e-05, |
|
"loss": 0.0002, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 0.013741032220423222, |
|
"learning_rate": 3.344428594071961e-05, |
|
"loss": 0.0038, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 0.0982193648815155, |
|
"learning_rate": 3.340466000951023e-05, |
|
"loss": 0.0002, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 0.0023921611718833447, |
|
"learning_rate": 3.336503407830085e-05, |
|
"loss": 0.0001, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 0.004157126881182194, |
|
"learning_rate": 3.332540814709146e-05, |
|
"loss": 0.0001, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_accuracy": 0.9398496240601504, |
|
"eval_loss": 0.3705739974975586, |
|
"eval_runtime": 2358.2538, |
|
"eval_samples_per_second": 0.282, |
|
"eval_steps_per_second": 0.141, |
|
"step": 5612 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.0009104391792789102, |
|
"learning_rate": 3.328578221588208e-05, |
|
"loss": 0.3095, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.002629748312756419, |
|
"learning_rate": 3.324615628467269e-05, |
|
"loss": 0.1543, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.005159564781934023, |
|
"learning_rate": 3.320653035346331e-05, |
|
"loss": 0.0003, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.000841008557472378, |
|
"learning_rate": 3.316690442225393e-05, |
|
"loss": 0.0004, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.004792694002389908, |
|
"learning_rate": 3.312727849104454e-05, |
|
"loss": 0.0017, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.0014269945677369833, |
|
"learning_rate": 3.308765255983516e-05, |
|
"loss": 0.0002, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.0021025442983955145, |
|
"learning_rate": 3.304802662862577e-05, |
|
"loss": 0.115, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"grad_norm": 0.0010108908172696829, |
|
"learning_rate": 3.300840069741639e-05, |
|
"loss": 0.0034, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"grad_norm": 0.012116851285099983, |
|
"learning_rate": 3.296877476620701e-05, |
|
"loss": 0.001, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"grad_norm": 28.641616821289062, |
|
"learning_rate": 3.2929148834997626e-05, |
|
"loss": 0.5096, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"grad_norm": 21.132633209228516, |
|
"learning_rate": 3.288952290378824e-05, |
|
"loss": 0.0347, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"grad_norm": 0.0044413842260837555, |
|
"learning_rate": 3.284989697257886e-05, |
|
"loss": 0.0145, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"grad_norm": 48.15180206298828, |
|
"learning_rate": 3.281027104136947e-05, |
|
"loss": 0.0224, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"grad_norm": 0.003202601335942745, |
|
"learning_rate": 3.2770645110160095e-05, |
|
"loss": 0.1978, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"grad_norm": 20.207809448242188, |
|
"learning_rate": 3.273101917895071e-05, |
|
"loss": 0.0856, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"grad_norm": 0.0013485507806763053, |
|
"learning_rate": 3.2691393247741326e-05, |
|
"loss": 0.0013, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"grad_norm": 0.0005685106734745204, |
|
"learning_rate": 3.265176731653194e-05, |
|
"loss": 0.4924, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"grad_norm": 0.0010126458946615458, |
|
"learning_rate": 3.2612141385322556e-05, |
|
"loss": 0.0096, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"grad_norm": 95.05339813232422, |
|
"learning_rate": 3.2572515454113175e-05, |
|
"loss": 0.1921, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"grad_norm": 0.002471966203302145, |
|
"learning_rate": 3.2532889522903794e-05, |
|
"loss": 0.0008, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"grad_norm": 0.001297266804613173, |
|
"learning_rate": 3.2493263591694406e-05, |
|
"loss": 0.0002, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"grad_norm": 19.474943161010742, |
|
"learning_rate": 3.2453637660485025e-05, |
|
"loss": 0.3726, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"grad_norm": 0.015567510388791561, |
|
"learning_rate": 3.241401172927564e-05, |
|
"loss": 0.0003, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"grad_norm": 0.002833213657140732, |
|
"learning_rate": 3.2374385798066256e-05, |
|
"loss": 0.6715, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"grad_norm": 1.4238035678863525, |
|
"learning_rate": 3.2334759866856875e-05, |
|
"loss": 0.0008, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"grad_norm": 0.0025125148240476847, |
|
"learning_rate": 3.2295133935647487e-05, |
|
"loss": 0.055, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"grad_norm": 0.001612589810974896, |
|
"learning_rate": 3.2255508004438105e-05, |
|
"loss": 0.1114, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"grad_norm": 0.0115219596773386, |
|
"learning_rate": 3.221588207322872e-05, |
|
"loss": 0.0002, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"grad_norm": 0.002122466452419758, |
|
"learning_rate": 3.217625614201934e-05, |
|
"loss": 0.0463, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"grad_norm": 0.012210741639137268, |
|
"learning_rate": 3.2136630210809955e-05, |
|
"loss": 0.0038, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"grad_norm": 0.006696117110550404, |
|
"learning_rate": 3.2097004279600574e-05, |
|
"loss": 0.0006, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"grad_norm": 0.003299353178590536, |
|
"learning_rate": 3.2057378348391186e-05, |
|
"loss": 0.4329, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"grad_norm": 0.017031671479344368, |
|
"learning_rate": 3.2017752417181805e-05, |
|
"loss": 0.0014, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"grad_norm": 0.008915259502828121, |
|
"learning_rate": 3.197812648597242e-05, |
|
"loss": 0.0003, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"grad_norm": 0.0033541598822921515, |
|
"learning_rate": 3.193850055476304e-05, |
|
"loss": 0.0261, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"grad_norm": 0.0039758519269526005, |
|
"learning_rate": 3.1898874623553654e-05, |
|
"loss": 0.0701, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"grad_norm": 0.0859561562538147, |
|
"learning_rate": 3.185924869234427e-05, |
|
"loss": 0.0012, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"grad_norm": 0.0011740931076928973, |
|
"learning_rate": 3.1819622761134885e-05, |
|
"loss": 0.0009, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"grad_norm": 0.0011881589889526367, |
|
"learning_rate": 3.1779996829925504e-05, |
|
"loss": 0.1377, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"grad_norm": 0.3393727242946625, |
|
"learning_rate": 3.174037089871612e-05, |
|
"loss": 0.0085, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"grad_norm": 0.26653632521629333, |
|
"learning_rate": 3.170074496750674e-05, |
|
"loss": 0.5985, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"grad_norm": 71.80652618408203, |
|
"learning_rate": 3.1661119036297353e-05, |
|
"loss": 0.1081, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"grad_norm": 0.3816182017326355, |
|
"learning_rate": 3.162149310508797e-05, |
|
"loss": 0.0015, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"grad_norm": 3.339017629623413, |
|
"learning_rate": 3.1581867173878584e-05, |
|
"loss": 0.0737, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"grad_norm": 0.0013679158873856068, |
|
"learning_rate": 3.15422412426692e-05, |
|
"loss": 0.2465, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"grad_norm": 0.003948609344661236, |
|
"learning_rate": 3.150261531145982e-05, |
|
"loss": 0.2111, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"grad_norm": 0.005721264984458685, |
|
"learning_rate": 3.1462989380250434e-05, |
|
"loss": 0.0024, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"grad_norm": 0.0018883657176047564, |
|
"learning_rate": 3.142336344904105e-05, |
|
"loss": 0.2687, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"grad_norm": 0.008159175515174866, |
|
"learning_rate": 3.138373751783167e-05, |
|
"loss": 0.1406, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"grad_norm": 0.009790794923901558, |
|
"learning_rate": 3.134411158662229e-05, |
|
"loss": 0.2122, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"grad_norm": 0.645839273929596, |
|
"learning_rate": 3.13044856554129e-05, |
|
"loss": 0.0223, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"grad_norm": 0.0012109485687687993, |
|
"learning_rate": 3.126485972420352e-05, |
|
"loss": 0.2131, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"grad_norm": 0.005074062384665012, |
|
"learning_rate": 3.122523379299413e-05, |
|
"loss": 0.4669, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"grad_norm": 0.04010836407542229, |
|
"learning_rate": 3.118560786178475e-05, |
|
"loss": 0.012, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"grad_norm": 0.018426967784762383, |
|
"learning_rate": 3.114598193057537e-05, |
|
"loss": 0.0008, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"grad_norm": 0.0035447929985821247, |
|
"learning_rate": 3.110635599936599e-05, |
|
"loss": 0.1271, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"grad_norm": 0.012344791553914547, |
|
"learning_rate": 3.10667300681566e-05, |
|
"loss": 0.0002, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"grad_norm": 0.0015085155609995127, |
|
"learning_rate": 3.102710413694722e-05, |
|
"loss": 0.0064, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"grad_norm": 0.0013396035647019744, |
|
"learning_rate": 3.098747820573783e-05, |
|
"loss": 0.0003, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"grad_norm": 0.007324972189962864, |
|
"learning_rate": 3.094785227452846e-05, |
|
"loss": 0.0001, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"grad_norm": 0.029165761545300484, |
|
"learning_rate": 3.090822634331907e-05, |
|
"loss": 0.0002, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"grad_norm": 0.006251147948205471, |
|
"learning_rate": 3.086860041210969e-05, |
|
"loss": 0.0001, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"grad_norm": 0.0033136485144495964, |
|
"learning_rate": 3.08289744809003e-05, |
|
"loss": 0.1959, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"grad_norm": 15.712539672851562, |
|
"learning_rate": 3.078934854969092e-05, |
|
"loss": 0.0053, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"grad_norm": 0.004770079627633095, |
|
"learning_rate": 3.074972261848154e-05, |
|
"loss": 0.2429, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"grad_norm": 0.001170918345451355, |
|
"learning_rate": 3.071009668727215e-05, |
|
"loss": 0.4537, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"grad_norm": 0.003140375716611743, |
|
"learning_rate": 3.067047075606277e-05, |
|
"loss": 0.0003, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"grad_norm": 0.005154268350452185, |
|
"learning_rate": 3.063084482485338e-05, |
|
"loss": 0.0002, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"grad_norm": 0.718346357345581, |
|
"learning_rate": 3.0591218893644e-05, |
|
"loss": 0.0039, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"grad_norm": 0.29760679602622986, |
|
"learning_rate": 3.055159296243462e-05, |
|
"loss": 0.0325, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"grad_norm": 0.0015770556638017297, |
|
"learning_rate": 3.0511967031225234e-05, |
|
"loss": 0.1031, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"grad_norm": 14.039325714111328, |
|
"learning_rate": 3.047234110001585e-05, |
|
"loss": 0.0254, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"grad_norm": 12.89113998413086, |
|
"learning_rate": 3.043271516880647e-05, |
|
"loss": 0.0182, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"grad_norm": 0.0020349326077848673, |
|
"learning_rate": 3.0393089237597084e-05, |
|
"loss": 0.0047, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"grad_norm": 0.0006648111157119274, |
|
"learning_rate": 3.0353463306387703e-05, |
|
"loss": 0.0111, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"grad_norm": 0.00324794533662498, |
|
"learning_rate": 3.0313837375178318e-05, |
|
"loss": 0.001, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"grad_norm": 0.002352567156776786, |
|
"learning_rate": 3.0274211443968937e-05, |
|
"loss": 0.5155, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"grad_norm": 0.0007183744455687702, |
|
"learning_rate": 3.023458551275955e-05, |
|
"loss": 0.1828, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"grad_norm": 0.0010205942671746016, |
|
"learning_rate": 3.019495958155017e-05, |
|
"loss": 0.0004, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"grad_norm": 0.0007507322006858885, |
|
"learning_rate": 3.0155333650340783e-05, |
|
"loss": 0.0078, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"grad_norm": 0.0010719618294388056, |
|
"learning_rate": 3.0115707719131402e-05, |
|
"loss": 0.0024, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"grad_norm": 0.004630456678569317, |
|
"learning_rate": 3.0076081787922017e-05, |
|
"loss": 0.0001, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"grad_norm": 110.9379653930664, |
|
"learning_rate": 3.0036455856712636e-05, |
|
"loss": 0.2711, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"grad_norm": 0.0028752069920301437, |
|
"learning_rate": 2.999682992550325e-05, |
|
"loss": 0.1684, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"grad_norm": 0.00176974234636873, |
|
"learning_rate": 2.995720399429387e-05, |
|
"loss": 0.1832, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"grad_norm": 0.0004082988016307354, |
|
"learning_rate": 2.9917578063084482e-05, |
|
"loss": 0.1391, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"grad_norm": 91.996337890625, |
|
"learning_rate": 2.9877952131875105e-05, |
|
"loss": 0.0717, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"grad_norm": 0.3914591372013092, |
|
"learning_rate": 2.9838326200665717e-05, |
|
"loss": 0.0006, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"grad_norm": 0.0014606121694669127, |
|
"learning_rate": 2.9798700269456332e-05, |
|
"loss": 0.0002, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"grad_norm": 0.002047004410997033, |
|
"learning_rate": 2.975907433824695e-05, |
|
"loss": 0.243, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"grad_norm": 0.0009985043434426188, |
|
"learning_rate": 2.9719448407037563e-05, |
|
"loss": 0.0339, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"grad_norm": 0.0007074224413372576, |
|
"learning_rate": 2.9679822475828185e-05, |
|
"loss": 0.0015, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"grad_norm": 0.004130239132791758, |
|
"learning_rate": 2.9640196544618797e-05, |
|
"loss": 0.0001, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"grad_norm": 0.004487643018364906, |
|
"learning_rate": 2.9600570613409416e-05, |
|
"loss": 0.0002, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"grad_norm": 0.001936771790497005, |
|
"learning_rate": 2.956094468220003e-05, |
|
"loss": 0.0001, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"grad_norm": 0.004075042437762022, |
|
"learning_rate": 2.952131875099065e-05, |
|
"loss": 0.3415, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"grad_norm": 0.05164702981710434, |
|
"learning_rate": 2.9481692819781266e-05, |
|
"loss": 0.0001, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"grad_norm": 0.0014617941342294216, |
|
"learning_rate": 2.9442066888571884e-05, |
|
"loss": 0.0001, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"grad_norm": 0.0017148368060588837, |
|
"learning_rate": 2.94024409573625e-05, |
|
"loss": 0.0753, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"grad_norm": 0.003370764898136258, |
|
"learning_rate": 2.936281502615312e-05, |
|
"loss": 0.0072, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"grad_norm": 0.003846656298264861, |
|
"learning_rate": 2.932318909494373e-05, |
|
"loss": 0.0001, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"grad_norm": 0.002365513239055872, |
|
"learning_rate": 2.928356316373435e-05, |
|
"loss": 0.0008, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"grad_norm": 0.0008402117528021336, |
|
"learning_rate": 2.9243937232524965e-05, |
|
"loss": 0.0001, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"grad_norm": 0.004054752178490162, |
|
"learning_rate": 2.9204311301315584e-05, |
|
"loss": 0.0548, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"grad_norm": 0.0017859063809737563, |
|
"learning_rate": 2.91646853701062e-05, |
|
"loss": 0.0001, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 3.045167922973633, |
|
"learning_rate": 2.9125059438896818e-05, |
|
"loss": 0.0106, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 0.034478865563869476, |
|
"learning_rate": 2.9085433507687433e-05, |
|
"loss": 0.001, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 0.0020598298870027065, |
|
"learning_rate": 2.9045807576478052e-05, |
|
"loss": 0.9616, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 0.009513617493212223, |
|
"learning_rate": 2.9006181645268664e-05, |
|
"loss": 0.0601, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 0.10755365341901779, |
|
"learning_rate": 2.896655571405928e-05, |
|
"loss": 0.4313, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 89.41072845458984, |
|
"learning_rate": 2.8926929782849898e-05, |
|
"loss": 0.7753, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 0.005557952914386988, |
|
"learning_rate": 2.8887303851640514e-05, |
|
"loss": 0.022, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 0.07544991374015808, |
|
"learning_rate": 2.8847677920431132e-05, |
|
"loss": 0.1043, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 0.004230760037899017, |
|
"learning_rate": 2.8808051989221744e-05, |
|
"loss": 0.2305, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 0.0005384175456129014, |
|
"learning_rate": 2.8768426058012367e-05, |
|
"loss": 0.0003, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 0.0020217718556523323, |
|
"learning_rate": 2.872880012680298e-05, |
|
"loss": 0.0008, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 0.001009553438052535, |
|
"learning_rate": 2.8689174195593598e-05, |
|
"loss": 0.3165, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 0.002491355175152421, |
|
"learning_rate": 2.8649548264384213e-05, |
|
"loss": 0.5646, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 0.002977263880893588, |
|
"learning_rate": 2.8609922333174832e-05, |
|
"loss": 0.0001, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"grad_norm": 0.0012742202961817384, |
|
"learning_rate": 2.8570296401965447e-05, |
|
"loss": 0.2827, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"grad_norm": 0.0030132278334349394, |
|
"learning_rate": 2.8530670470756066e-05, |
|
"loss": 0.0006, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"grad_norm": 0.06876442581415176, |
|
"learning_rate": 2.8491044539546678e-05, |
|
"loss": 0.0062, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"grad_norm": 0.008195163682103157, |
|
"learning_rate": 2.84514186083373e-05, |
|
"loss": 0.0145, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"grad_norm": 0.0023913795594125986, |
|
"learning_rate": 2.8411792677127912e-05, |
|
"loss": 0.022, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"grad_norm": 0.0004799796442966908, |
|
"learning_rate": 2.837216674591853e-05, |
|
"loss": 0.2252, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"grad_norm": 0.11730991303920746, |
|
"learning_rate": 2.8332540814709146e-05, |
|
"loss": 0.0033, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"grad_norm": 0.01119227148592472, |
|
"learning_rate": 2.8292914883499765e-05, |
|
"loss": 0.0764, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"grad_norm": 1.4117075204849243, |
|
"learning_rate": 2.825328895229038e-05, |
|
"loss": 0.0313, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"grad_norm": 0.9569471478462219, |
|
"learning_rate": 2.8213663021081e-05, |
|
"loss": 0.0037, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"grad_norm": 0.001442433800548315, |
|
"learning_rate": 2.8174037089871615e-05, |
|
"loss": 0.0001, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"grad_norm": 0.0015686535043641925, |
|
"learning_rate": 2.8134411158662227e-05, |
|
"loss": 0.2835, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"grad_norm": 0.00151319510769099, |
|
"learning_rate": 2.8094785227452846e-05, |
|
"loss": 0.0004, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"grad_norm": 0.00654405914247036, |
|
"learning_rate": 2.805515929624346e-05, |
|
"loss": 0.0011, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"grad_norm": 0.0008709866087883711, |
|
"learning_rate": 2.801553336503408e-05, |
|
"loss": 0.0128, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"grad_norm": 0.00043904109043069184, |
|
"learning_rate": 2.7975907433824695e-05, |
|
"loss": 0.8107, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"grad_norm": 0.00742549542337656, |
|
"learning_rate": 2.7936281502615314e-05, |
|
"loss": 0.3098, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"grad_norm": 0.0007969782454892993, |
|
"learning_rate": 2.7896655571405926e-05, |
|
"loss": 0.0001, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"grad_norm": 52.47648620605469, |
|
"learning_rate": 2.7857029640196548e-05, |
|
"loss": 0.4694, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"grad_norm": 0.0088576078414917, |
|
"learning_rate": 2.781740370898716e-05, |
|
"loss": 0.0016, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"grad_norm": 3.6093878746032715, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.054, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"eval_accuracy": 0.9368421052631579, |
|
"eval_loss": 0.4006503224372864, |
|
"eval_runtime": 2328.3346, |
|
"eval_samples_per_second": 0.286, |
|
"eval_steps_per_second": 0.143, |
|
"step": 7015 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.0007481848588213325, |
|
"learning_rate": 2.7738151846568395e-05, |
|
"loss": 0.0007, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 162.9553680419922, |
|
"learning_rate": 2.7698525915359013e-05, |
|
"loss": 0.4298, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.0006780726835131645, |
|
"learning_rate": 2.765889998414963e-05, |
|
"loss": 0.4696, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.0014015401247888803, |
|
"learning_rate": 2.7619274052940248e-05, |
|
"loss": 0.028, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.00443660095334053, |
|
"learning_rate": 2.757964812173086e-05, |
|
"loss": 0.0078, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 4.740740776062012, |
|
"learning_rate": 2.7540022190521482e-05, |
|
"loss": 0.1712, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.0052452534437179565, |
|
"learning_rate": 2.7500396259312094e-05, |
|
"loss": 0.0001, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"grad_norm": 0.0006377240642905235, |
|
"learning_rate": 2.7460770328102713e-05, |
|
"loss": 0.045, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"grad_norm": 0.0011151348007842898, |
|
"learning_rate": 2.7421144396893328e-05, |
|
"loss": 0.0001, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"grad_norm": 0.0006300232489593327, |
|
"learning_rate": 2.7381518465683947e-05, |
|
"loss": 0.0004, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"grad_norm": 0.008152210153639317, |
|
"learning_rate": 2.7341892534474562e-05, |
|
"loss": 0.0002, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"grad_norm": 0.0016102171503007412, |
|
"learning_rate": 2.7302266603265174e-05, |
|
"loss": 0.0302, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"grad_norm": 0.0014644188340753317, |
|
"learning_rate": 2.7262640672055796e-05, |
|
"loss": 0.0, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"grad_norm": 0.0012343927519395947, |
|
"learning_rate": 2.722301474084641e-05, |
|
"loss": 0.001, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"grad_norm": 0.002109797904267907, |
|
"learning_rate": 2.7183388809637027e-05, |
|
"loss": 0.0003, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"grad_norm": 0.0012583807110786438, |
|
"learning_rate": 2.7143762878427643e-05, |
|
"loss": 0.0001, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"grad_norm": 0.0009702452807687223, |
|
"learning_rate": 2.710413694721826e-05, |
|
"loss": 0.1802, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"grad_norm": 0.004518999718129635, |
|
"learning_rate": 2.7064511016008877e-05, |
|
"loss": 0.0001, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"grad_norm": 0.0008531950297765434, |
|
"learning_rate": 2.7024885084799496e-05, |
|
"loss": 0.0001, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"grad_norm": 0.003954921383410692, |
|
"learning_rate": 2.6985259153590108e-05, |
|
"loss": 0.0001, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"grad_norm": 0.0006554504507221282, |
|
"learning_rate": 2.694563322238073e-05, |
|
"loss": 0.0002, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 0.0011577644618228078, |
|
"learning_rate": 2.6906007291171342e-05, |
|
"loss": 0.006, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 0.0004994067130610347, |
|
"learning_rate": 2.686638135996196e-05, |
|
"loss": 0.0001, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 0.006224981974810362, |
|
"learning_rate": 2.6826755428752576e-05, |
|
"loss": 0.4425, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 0.00843863096088171, |
|
"learning_rate": 2.6787129497543195e-05, |
|
"loss": 0.1975, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 0.0011182260932400823, |
|
"learning_rate": 2.674750356633381e-05, |
|
"loss": 0.0002, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 0.0012028939090669155, |
|
"learning_rate": 2.670787763512443e-05, |
|
"loss": 0.0001, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 0.0064741140231490135, |
|
"learning_rate": 2.666825170391504e-05, |
|
"loss": 0.0066, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 0.0013653126079589128, |
|
"learning_rate": 2.6628625772705663e-05, |
|
"loss": 0.0802, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 0.0032840375788509846, |
|
"learning_rate": 2.6588999841496275e-05, |
|
"loss": 0.0495, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 0.006207801401615143, |
|
"learning_rate": 2.6549373910286894e-05, |
|
"loss": 0.0001, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 0.0015818944666534662, |
|
"learning_rate": 2.650974797907751e-05, |
|
"loss": 0.0935, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 0.0013846838846802711, |
|
"learning_rate": 2.647012204786813e-05, |
|
"loss": 0.0101, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 0.0015213302103802562, |
|
"learning_rate": 2.6430496116658744e-05, |
|
"loss": 0.0001, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 0.0016765915788710117, |
|
"learning_rate": 2.6390870185449356e-05, |
|
"loss": 0.0008, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"grad_norm": 0.0029850786086171865, |
|
"learning_rate": 2.6351244254239975e-05, |
|
"loss": 0.5417, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"grad_norm": 0.0028296930249780416, |
|
"learning_rate": 2.631161832303059e-05, |
|
"loss": 0.0029, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"grad_norm": 0.19774562120437622, |
|
"learning_rate": 2.627199239182121e-05, |
|
"loss": 0.0424, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"grad_norm": 0.20521485805511475, |
|
"learning_rate": 2.6232366460611824e-05, |
|
"loss": 0.0003, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"grad_norm": 3.243302822113037, |
|
"learning_rate": 2.6192740529402443e-05, |
|
"loss": 0.0033, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"grad_norm": 0.002176284557208419, |
|
"learning_rate": 2.615311459819306e-05, |
|
"loss": 0.0905, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"grad_norm": 0.00346784177236259, |
|
"learning_rate": 2.6113488666983677e-05, |
|
"loss": 0.0058, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"grad_norm": 0.0022136277984827757, |
|
"learning_rate": 2.607386273577429e-05, |
|
"loss": 0.0145, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"grad_norm": 0.0054547772742807865, |
|
"learning_rate": 2.603423680456491e-05, |
|
"loss": 0.0001, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"grad_norm": 0.0017041038954630494, |
|
"learning_rate": 2.5994610873355524e-05, |
|
"loss": 0.0043, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"grad_norm": 0.00526059465482831, |
|
"learning_rate": 2.5954984942146142e-05, |
|
"loss": 0.0001, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"grad_norm": 0.0015646722167730331, |
|
"learning_rate": 2.5915359010936758e-05, |
|
"loss": 0.0001, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"grad_norm": 0.0014299266040325165, |
|
"learning_rate": 2.5875733079727377e-05, |
|
"loss": 0.0001, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"grad_norm": 0.857555627822876, |
|
"learning_rate": 2.5836107148517992e-05, |
|
"loss": 0.0494, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"grad_norm": 0.0019163636025041342, |
|
"learning_rate": 2.579648121730861e-05, |
|
"loss": 0.0001, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"grad_norm": 0.001081604859791696, |
|
"learning_rate": 2.5756855286099223e-05, |
|
"loss": 0.0001, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"grad_norm": 0.002402815269306302, |
|
"learning_rate": 2.5717229354889845e-05, |
|
"loss": 0.0001, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"grad_norm": 0.0032065189443528652, |
|
"learning_rate": 2.5677603423680457e-05, |
|
"loss": 0.5271, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"grad_norm": 0.0037377572152763605, |
|
"learning_rate": 2.5637977492471076e-05, |
|
"loss": 0.0001, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"grad_norm": 0.0010730663780122995, |
|
"learning_rate": 2.559835156126169e-05, |
|
"loss": 0.0001, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"grad_norm": 0.018039198592305183, |
|
"learning_rate": 2.5558725630052303e-05, |
|
"loss": 0.1574, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"grad_norm": 0.0008627079077996314, |
|
"learning_rate": 2.5519099698842925e-05, |
|
"loss": 0.0004, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"grad_norm": 0.00304847932420671, |
|
"learning_rate": 2.5479473767633537e-05, |
|
"loss": 0.0002, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"grad_norm": 56.73731231689453, |
|
"learning_rate": 2.5439847836424156e-05, |
|
"loss": 0.2908, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"grad_norm": 0.0014052072074264288, |
|
"learning_rate": 2.540022190521477e-05, |
|
"loss": 0.0001, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"grad_norm": 0.0024271756410598755, |
|
"learning_rate": 2.536059597400539e-05, |
|
"loss": 0.0363, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"grad_norm": 0.0011607712367549539, |
|
"learning_rate": 2.5320970042796006e-05, |
|
"loss": 0.0703, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"grad_norm": 0.0010089229326695204, |
|
"learning_rate": 2.5281344111586625e-05, |
|
"loss": 0.0001, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"grad_norm": 0.0012477770214900374, |
|
"learning_rate": 2.524171818037724e-05, |
|
"loss": 0.471, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"grad_norm": 0.0015396666713058949, |
|
"learning_rate": 2.520209224916786e-05, |
|
"loss": 0.2129, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"grad_norm": 0.000801810878328979, |
|
"learning_rate": 2.516246631795847e-05, |
|
"loss": 0.0314, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"grad_norm": 0.0009846306638792157, |
|
"learning_rate": 2.512284038674909e-05, |
|
"loss": 0.0003, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"grad_norm": 0.03625110909342766, |
|
"learning_rate": 2.5083214455539705e-05, |
|
"loss": 0.0016, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"grad_norm": 0.14931851625442505, |
|
"learning_rate": 2.5043588524330324e-05, |
|
"loss": 0.4488, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"grad_norm": 0.007826775312423706, |
|
"learning_rate": 2.500396259312094e-05, |
|
"loss": 0.0002, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"grad_norm": 0.00988730974495411, |
|
"learning_rate": 2.4964336661911555e-05, |
|
"loss": 0.0001, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"grad_norm": 0.0005387517157942057, |
|
"learning_rate": 2.4924710730702174e-05, |
|
"loss": 0.7611, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"grad_norm": 0.0011877217330038548, |
|
"learning_rate": 2.488508479949279e-05, |
|
"loss": 0.0001, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"grad_norm": 0.019128194078803062, |
|
"learning_rate": 2.4845458868283404e-05, |
|
"loss": 0.517, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"grad_norm": 1.5278313159942627, |
|
"learning_rate": 2.4805832937074023e-05, |
|
"loss": 0.0012, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"grad_norm": 0.0027036985848098993, |
|
"learning_rate": 2.476620700586464e-05, |
|
"loss": 0.5882, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"grad_norm": 0.002757065463811159, |
|
"learning_rate": 2.4726581074655254e-05, |
|
"loss": 0.0155, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"grad_norm": 0.004905847366899252, |
|
"learning_rate": 2.4686955143445873e-05, |
|
"loss": 0.0102, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"grad_norm": 0.0014356361934915185, |
|
"learning_rate": 2.4647329212236488e-05, |
|
"loss": 0.0001, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"grad_norm": 3.6968801021575928, |
|
"learning_rate": 2.4607703281027107e-05, |
|
"loss": 0.2234, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"grad_norm": 66.777099609375, |
|
"learning_rate": 2.4568077349817722e-05, |
|
"loss": 0.3216, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"grad_norm": 0.001242569531314075, |
|
"learning_rate": 2.4528451418608338e-05, |
|
"loss": 0.0003, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"grad_norm": 0.0016161628300324082, |
|
"learning_rate": 2.4488825487398957e-05, |
|
"loss": 0.024, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"grad_norm": 0.06756754219532013, |
|
"learning_rate": 2.4449199556189572e-05, |
|
"loss": 0.0614, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"grad_norm": 0.0006389593472704291, |
|
"learning_rate": 2.440957362498019e-05, |
|
"loss": 0.1259, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"grad_norm": 0.004206878133118153, |
|
"learning_rate": 2.4369947693770806e-05, |
|
"loss": 0.2293, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"grad_norm": 0.0025491828564554453, |
|
"learning_rate": 2.433032176256142e-05, |
|
"loss": 0.0004, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"grad_norm": 0.0015132069820538163, |
|
"learning_rate": 2.4290695831352037e-05, |
|
"loss": 0.0453, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"grad_norm": 0.0013023455394431949, |
|
"learning_rate": 2.4251069900142652e-05, |
|
"loss": 0.515, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"grad_norm": 0.0006147758103907108, |
|
"learning_rate": 2.421144396893327e-05, |
|
"loss": 0.0004, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"grad_norm": 0.0013257160317152739, |
|
"learning_rate": 2.4171818037723887e-05, |
|
"loss": 0.0004, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"grad_norm": 0.0010351515375077724, |
|
"learning_rate": 2.4132192106514502e-05, |
|
"loss": 0.2861, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"grad_norm": 0.004010920412838459, |
|
"learning_rate": 2.409256617530512e-05, |
|
"loss": 0.144, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"grad_norm": 0.002655152464285493, |
|
"learning_rate": 2.4052940244095736e-05, |
|
"loss": 0.5804, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"grad_norm": 0.009208135306835175, |
|
"learning_rate": 2.4013314312886355e-05, |
|
"loss": 0.0008, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"grad_norm": 124.37940979003906, |
|
"learning_rate": 2.397368838167697e-05, |
|
"loss": 0.1359, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"grad_norm": 0.0007841124897822738, |
|
"learning_rate": 2.3934062450467586e-05, |
|
"loss": 0.0073, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"grad_norm": 14.345431327819824, |
|
"learning_rate": 2.3894436519258205e-05, |
|
"loss": 0.009, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"grad_norm": 0.0012639712076634169, |
|
"learning_rate": 2.385481058804882e-05, |
|
"loss": 0.0001, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"grad_norm": 0.004882665816694498, |
|
"learning_rate": 2.3815184656839436e-05, |
|
"loss": 0.765, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"grad_norm": 1.992924690246582, |
|
"learning_rate": 2.3775558725630054e-05, |
|
"loss": 0.0199, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"grad_norm": 0.008574814535677433, |
|
"learning_rate": 2.373593279442067e-05, |
|
"loss": 0.0121, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"grad_norm": 0.0031569607090204954, |
|
"learning_rate": 2.369630686321129e-05, |
|
"loss": 0.0105, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"grad_norm": 0.005381352733820677, |
|
"learning_rate": 2.3656680932001904e-05, |
|
"loss": 0.0002, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"grad_norm": 0.0014025687705725431, |
|
"learning_rate": 2.361705500079252e-05, |
|
"loss": 0.1309, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"grad_norm": 0.00232652947306633, |
|
"learning_rate": 2.3577429069583138e-05, |
|
"loss": 0.0253, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"grad_norm": 0.004494811408221722, |
|
"learning_rate": 2.3537803138373754e-05, |
|
"loss": 0.0004, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"grad_norm": 0.007132168859243393, |
|
"learning_rate": 2.3498177207164372e-05, |
|
"loss": 0.0002, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"grad_norm": 0.002315562916919589, |
|
"learning_rate": 2.3458551275954984e-05, |
|
"loss": 0.0048, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"grad_norm": 0.0011102244025096297, |
|
"learning_rate": 2.34189253447456e-05, |
|
"loss": 0.1166, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"grad_norm": 0.0011376317124813795, |
|
"learning_rate": 2.337929941353622e-05, |
|
"loss": 0.0001, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"grad_norm": 0.009772238321602345, |
|
"learning_rate": 2.3339673482326834e-05, |
|
"loss": 0.1212, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"grad_norm": 0.0009250590810552239, |
|
"learning_rate": 2.3300047551117453e-05, |
|
"loss": 0.0077, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"grad_norm": 0.0008343447698280215, |
|
"learning_rate": 2.3260421619908068e-05, |
|
"loss": 0.0001, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"grad_norm": 0.005889697000384331, |
|
"learning_rate": 2.3220795688698684e-05, |
|
"loss": 0.2522, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"grad_norm": 0.004577580373734236, |
|
"learning_rate": 2.3181169757489303e-05, |
|
"loss": 0.0055, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"grad_norm": 0.0006038689170964062, |
|
"learning_rate": 2.3141543826279918e-05, |
|
"loss": 0.22, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"grad_norm": 119.69172668457031, |
|
"learning_rate": 2.3101917895070537e-05, |
|
"loss": 0.2874, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"grad_norm": 0.01207007933408022, |
|
"learning_rate": 2.3062291963861152e-05, |
|
"loss": 0.0003, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"grad_norm": 0.005133229307830334, |
|
"learning_rate": 2.3022666032651768e-05, |
|
"loss": 0.0002, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"grad_norm": 0.0014045186107978225, |
|
"learning_rate": 2.2983040101442386e-05, |
|
"loss": 0.0003, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"grad_norm": 0.005631518550217152, |
|
"learning_rate": 2.2943414170233002e-05, |
|
"loss": 0.0002, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"grad_norm": 0.0011396125191822648, |
|
"learning_rate": 2.2903788239023617e-05, |
|
"loss": 0.0004, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"grad_norm": 0.16508010029792786, |
|
"learning_rate": 2.2864162307814236e-05, |
|
"loss": 0.0002, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"grad_norm": 0.005040541756898165, |
|
"learning_rate": 2.282453637660485e-05, |
|
"loss": 0.016, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"grad_norm": 0.0026673241518437862, |
|
"learning_rate": 2.278491044539547e-05, |
|
"loss": 0.0024, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"grad_norm": 0.0025323168374598026, |
|
"learning_rate": 2.2745284514186086e-05, |
|
"loss": 0.0001, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"grad_norm": 0.002470273757353425, |
|
"learning_rate": 2.27056585829767e-05, |
|
"loss": 0.0001, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"grad_norm": 0.0011150416685268283, |
|
"learning_rate": 2.266603265176732e-05, |
|
"loss": 0.0027, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"grad_norm": 0.0062728519551455975, |
|
"learning_rate": 2.2626406720557935e-05, |
|
"loss": 0.0006, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"grad_norm": 0.001863997895270586, |
|
"learning_rate": 2.258678078934855e-05, |
|
"loss": 0.0001, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"grad_norm": 0.0009478493593633175, |
|
"learning_rate": 2.2547154858139166e-05, |
|
"loss": 0.0179, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"grad_norm": 0.0012072144309058785, |
|
"learning_rate": 2.250752892692978e-05, |
|
"loss": 0.2482, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"grad_norm": 0.0013612033799290657, |
|
"learning_rate": 2.24679029957204e-05, |
|
"loss": 0.0001, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"grad_norm": 0.001653852523304522, |
|
"learning_rate": 2.2428277064511016e-05, |
|
"loss": 0.0001, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"grad_norm": 0.004468216095119715, |
|
"learning_rate": 2.2388651133301634e-05, |
|
"loss": 0.0003, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"grad_norm": 0.21759329736232758, |
|
"learning_rate": 2.234902520209225e-05, |
|
"loss": 0.0004, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"grad_norm": 0.002769963815808296, |
|
"learning_rate": 2.2309399270882865e-05, |
|
"loss": 0.0002, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"grad_norm": 0.0010608519660308957, |
|
"learning_rate": 2.2269773339673484e-05, |
|
"loss": 0.1718, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"grad_norm": 0.0008747797110117972, |
|
"learning_rate": 2.22301474084641e-05, |
|
"loss": 0.0003, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"eval_accuracy": 0.9669172932330827, |
|
"eval_loss": 0.23544873297214508, |
|
"eval_runtime": 2342.8874, |
|
"eval_samples_per_second": 0.284, |
|
"eval_steps_per_second": 0.142, |
|
"step": 8418 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.0007297981064766645, |
|
"learning_rate": 2.2190521477254715e-05, |
|
"loss": 0.0001, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.007375821936875582, |
|
"learning_rate": 2.2150895546045334e-05, |
|
"loss": 0.0001, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.0019510581623762846, |
|
"learning_rate": 2.211126961483595e-05, |
|
"loss": 0.0001, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.009307813830673695, |
|
"learning_rate": 2.2071643683626568e-05, |
|
"loss": 0.0002, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.07272663712501526, |
|
"learning_rate": 2.2032017752417183e-05, |
|
"loss": 0.0002, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.004176029469817877, |
|
"learning_rate": 2.19923918212078e-05, |
|
"loss": 0.058, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.0019298582337796688, |
|
"learning_rate": 2.1952765889998418e-05, |
|
"loss": 0.0001, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"grad_norm": 137.64112854003906, |
|
"learning_rate": 2.1913139958789033e-05, |
|
"loss": 0.144, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"grad_norm": 0.0035788225941359997, |
|
"learning_rate": 2.1873514027579652e-05, |
|
"loss": 0.1903, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"grad_norm": 0.00900218915194273, |
|
"learning_rate": 2.1833888096370267e-05, |
|
"loss": 0.3162, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"grad_norm": 0.006812531501054764, |
|
"learning_rate": 2.1794262165160883e-05, |
|
"loss": 0.0001, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"grad_norm": 0.011043643578886986, |
|
"learning_rate": 2.1754636233951498e-05, |
|
"loss": 0.0001, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"grad_norm": 0.0009386019664816558, |
|
"learning_rate": 2.1715010302742113e-05, |
|
"loss": 0.0132, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"grad_norm": 0.0009653670713305473, |
|
"learning_rate": 2.1675384371532732e-05, |
|
"loss": 0.0001, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"grad_norm": 0.000631912553217262, |
|
"learning_rate": 2.1635758440323348e-05, |
|
"loss": 0.0046, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"grad_norm": 0.005377355497330427, |
|
"learning_rate": 2.1596132509113963e-05, |
|
"loss": 0.0001, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"grad_norm": 0.0015233962330967188, |
|
"learning_rate": 2.1556506577904582e-05, |
|
"loss": 0.26, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"grad_norm": 0.003712683217599988, |
|
"learning_rate": 2.1516880646695197e-05, |
|
"loss": 0.0103, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"grad_norm": 0.002746036509051919, |
|
"learning_rate": 2.1477254715485816e-05, |
|
"loss": 0.0001, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"grad_norm": 0.001353266416117549, |
|
"learning_rate": 2.143762878427643e-05, |
|
"loss": 0.0001, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"grad_norm": 0.05317896232008934, |
|
"learning_rate": 2.1398002853067047e-05, |
|
"loss": 0.0002, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"grad_norm": 0.002108694287016988, |
|
"learning_rate": 2.1358376921857666e-05, |
|
"loss": 0.0001, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"grad_norm": 0.0015535557176917791, |
|
"learning_rate": 2.131875099064828e-05, |
|
"loss": 0.2856, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"grad_norm": 0.0007479583146050572, |
|
"learning_rate": 2.1279125059438897e-05, |
|
"loss": 0.0001, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"grad_norm": 0.0013678164687007666, |
|
"learning_rate": 2.1239499128229515e-05, |
|
"loss": 0.0001, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"grad_norm": 0.0011460609966889024, |
|
"learning_rate": 2.119987319702013e-05, |
|
"loss": 0.1748, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"grad_norm": 0.005598797462880611, |
|
"learning_rate": 2.116024726581075e-05, |
|
"loss": 0.0005, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"grad_norm": 0.0058416505344212055, |
|
"learning_rate": 2.1120621334601365e-05, |
|
"loss": 0.003, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"grad_norm": 0.0018327133730053902, |
|
"learning_rate": 2.108099540339198e-05, |
|
"loss": 0.0001, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"grad_norm": 0.0008349318522959948, |
|
"learning_rate": 2.10413694721826e-05, |
|
"loss": 0.0014, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"grad_norm": 0.0007587561849504709, |
|
"learning_rate": 2.1001743540973215e-05, |
|
"loss": 0.0012, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"grad_norm": 0.003939191345125437, |
|
"learning_rate": 2.096211760976383e-05, |
|
"loss": 0.0001, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"grad_norm": 0.007147368974983692, |
|
"learning_rate": 2.092249167855445e-05, |
|
"loss": 0.0003, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"grad_norm": 0.0007460744236595929, |
|
"learning_rate": 2.088286574734506e-05, |
|
"loss": 0.0, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"grad_norm": 0.005187608767300844, |
|
"learning_rate": 2.084323981613568e-05, |
|
"loss": 0.0001, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"grad_norm": 0.0012044048635289073, |
|
"learning_rate": 2.0803613884926295e-05, |
|
"loss": 0.0003, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"grad_norm": 0.005269182845950127, |
|
"learning_rate": 2.0763987953716914e-05, |
|
"loss": 0.1424, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"grad_norm": 0.0014458984369412065, |
|
"learning_rate": 2.072436202250753e-05, |
|
"loss": 0.1836, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"grad_norm": 0.003018228802829981, |
|
"learning_rate": 2.0684736091298145e-05, |
|
"loss": 0.0002, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"grad_norm": 0.0005208718357607722, |
|
"learning_rate": 2.0645110160088763e-05, |
|
"loss": 0.276, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"grad_norm": 0.0005419257213361561, |
|
"learning_rate": 2.060548422887938e-05, |
|
"loss": 0.0, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"grad_norm": 0.0056818630546331406, |
|
"learning_rate": 2.0565858297669994e-05, |
|
"loss": 0.0003, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"grad_norm": 0.0021387594752013683, |
|
"learning_rate": 2.0526232366460613e-05, |
|
"loss": 0.0001, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"grad_norm": 0.0017361573409289122, |
|
"learning_rate": 2.048660643525123e-05, |
|
"loss": 0.0235, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"grad_norm": 0.0031765319872647524, |
|
"learning_rate": 2.0446980504041847e-05, |
|
"loss": 0.0002, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"grad_norm": 0.0006492682150565088, |
|
"learning_rate": 2.0407354572832463e-05, |
|
"loss": 0.0, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"grad_norm": 0.009603900834918022, |
|
"learning_rate": 2.0367728641623078e-05, |
|
"loss": 0.0116, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"grad_norm": 0.0014260296011343598, |
|
"learning_rate": 2.0328102710413697e-05, |
|
"loss": 0.0272, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"grad_norm": 0.001238304190337658, |
|
"learning_rate": 2.0288476779204312e-05, |
|
"loss": 0.0001, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"grad_norm": 0.004389143083244562, |
|
"learning_rate": 2.024885084799493e-05, |
|
"loss": 0.0001, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"grad_norm": 0.0006919855368323624, |
|
"learning_rate": 2.0209224916785547e-05, |
|
"loss": 0.0015, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"grad_norm": 0.0013250050833448768, |
|
"learning_rate": 2.0169598985576162e-05, |
|
"loss": 0.0, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"grad_norm": 0.0006862548179924488, |
|
"learning_rate": 2.012997305436678e-05, |
|
"loss": 0.005, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"grad_norm": 0.0006481676246039569, |
|
"learning_rate": 2.0090347123157396e-05, |
|
"loss": 0.0002, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"grad_norm": 0.0009765150607563555, |
|
"learning_rate": 2.005072119194801e-05, |
|
"loss": 0.3095, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"grad_norm": 0.0008786149555817246, |
|
"learning_rate": 2.0011095260738627e-05, |
|
"loss": 0.1903, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"grad_norm": 0.00043602605001069605, |
|
"learning_rate": 1.9971469329529242e-05, |
|
"loss": 0.0002, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"grad_norm": 0.0006052827229723334, |
|
"learning_rate": 1.993184339831986e-05, |
|
"loss": 0.0028, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"grad_norm": 0.0027263278607279062, |
|
"learning_rate": 1.9892217467110477e-05, |
|
"loss": 0.0988, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"grad_norm": 0.0004901738138869405, |
|
"learning_rate": 1.9852591535901095e-05, |
|
"loss": 0.0008, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"grad_norm": 0.004134719260036945, |
|
"learning_rate": 1.981296560469171e-05, |
|
"loss": 0.0001, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"grad_norm": 6.425068378448486, |
|
"learning_rate": 1.9773339673482326e-05, |
|
"loss": 0.0009, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"grad_norm": 0.0021010099444538355, |
|
"learning_rate": 1.9733713742272945e-05, |
|
"loss": 0.0139, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"grad_norm": 0.0003429889620747417, |
|
"learning_rate": 1.969408781106356e-05, |
|
"loss": 0.0001, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"grad_norm": 0.00465469341725111, |
|
"learning_rate": 1.9654461879854176e-05, |
|
"loss": 0.0048, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"grad_norm": 0.0007626991719007492, |
|
"learning_rate": 1.9614835948644795e-05, |
|
"loss": 0.0694, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"grad_norm": 0.0005379422218538821, |
|
"learning_rate": 1.957521001743541e-05, |
|
"loss": 0.0001, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"grad_norm": 0.0018008677288889885, |
|
"learning_rate": 1.953558408622603e-05, |
|
"loss": 0.1537, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"grad_norm": 0.005486232694238424, |
|
"learning_rate": 1.9495958155016644e-05, |
|
"loss": 0.0001, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"grad_norm": 0.0016153625911101699, |
|
"learning_rate": 1.945633222380726e-05, |
|
"loss": 0.1517, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"grad_norm": 0.00048393840552307665, |
|
"learning_rate": 1.941670629259788e-05, |
|
"loss": 0.0515, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"grad_norm": 0.00044351426186040044, |
|
"learning_rate": 1.9377080361388494e-05, |
|
"loss": 0.0, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"grad_norm": 0.003928069956600666, |
|
"learning_rate": 1.9337454430179113e-05, |
|
"loss": 0.0001, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"grad_norm": 0.0009555955766700208, |
|
"learning_rate": 1.9297828498969728e-05, |
|
"loss": 0.0001, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"grad_norm": 0.003042226191610098, |
|
"learning_rate": 1.9258202567760344e-05, |
|
"loss": 0.0, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"grad_norm": 0.0003893129760399461, |
|
"learning_rate": 1.9218576636550962e-05, |
|
"loss": 0.0, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"grad_norm": 0.0008289095130749047, |
|
"learning_rate": 1.9178950705341574e-05, |
|
"loss": 0.0, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"grad_norm": 0.0010318297427147627, |
|
"learning_rate": 1.9139324774132193e-05, |
|
"loss": 0.0, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"grad_norm": 0.0007037441828288138, |
|
"learning_rate": 1.909969884292281e-05, |
|
"loss": 0.0837, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"grad_norm": 44.11083221435547, |
|
"learning_rate": 1.9060072911713424e-05, |
|
"loss": 0.0226, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"grad_norm": 0.0010193975176662207, |
|
"learning_rate": 1.9020446980504043e-05, |
|
"loss": 0.0018, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"grad_norm": 0.0026808753609657288, |
|
"learning_rate": 1.8980821049294658e-05, |
|
"loss": 0.0001, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"grad_norm": 0.0013365477789193392, |
|
"learning_rate": 1.8941195118085277e-05, |
|
"loss": 0.0, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"grad_norm": 33.180870056152344, |
|
"learning_rate": 1.8901569186875892e-05, |
|
"loss": 0.3046, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"grad_norm": 0.001624317723326385, |
|
"learning_rate": 1.8861943255666508e-05, |
|
"loss": 0.2476, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"grad_norm": 0.002660261234268546, |
|
"learning_rate": 1.8822317324457127e-05, |
|
"loss": 0.329, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"grad_norm": 0.001928847748786211, |
|
"learning_rate": 1.8782691393247742e-05, |
|
"loss": 0.0116, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"grad_norm": 0.0004771009262185544, |
|
"learning_rate": 1.8743065462038357e-05, |
|
"loss": 0.0, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"grad_norm": 0.0006694819312542677, |
|
"learning_rate": 1.8703439530828976e-05, |
|
"loss": 0.0743, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"grad_norm": 0.010220357216894627, |
|
"learning_rate": 1.8663813599619592e-05, |
|
"loss": 0.0001, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"grad_norm": 0.0014199281577020884, |
|
"learning_rate": 1.862418766841021e-05, |
|
"loss": 0.0103, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"grad_norm": 0.001806290470995009, |
|
"learning_rate": 1.8584561737200826e-05, |
|
"loss": 0.0006, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"grad_norm": 0.0005750704440288246, |
|
"learning_rate": 1.854493580599144e-05, |
|
"loss": 0.0003, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"grad_norm": 0.0009846296161413193, |
|
"learning_rate": 1.850530987478206e-05, |
|
"loss": 0.0013, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"grad_norm": 0.0016641179099678993, |
|
"learning_rate": 1.8465683943572676e-05, |
|
"loss": 0.06, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"grad_norm": 0.0014823460951447487, |
|
"learning_rate": 1.842605801236329e-05, |
|
"loss": 0.0008, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"grad_norm": 0.0026860409416258335, |
|
"learning_rate": 1.838643208115391e-05, |
|
"loss": 0.0005, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"grad_norm": 0.0014451199676841497, |
|
"learning_rate": 1.8346806149944525e-05, |
|
"loss": 0.0001, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"grad_norm": 0.004795750603079796, |
|
"learning_rate": 1.830718021873514e-05, |
|
"loss": 0.0003, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"grad_norm": 0.0025767534971237183, |
|
"learning_rate": 1.8267554287525756e-05, |
|
"loss": 0.0002, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"grad_norm": 0.0006194358575157821, |
|
"learning_rate": 1.8227928356316375e-05, |
|
"loss": 0.0738, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"grad_norm": 0.007454677484929562, |
|
"learning_rate": 1.818830242510699e-05, |
|
"loss": 0.0048, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"grad_norm": 0.0012314959894865751, |
|
"learning_rate": 1.8148676493897606e-05, |
|
"loss": 0.0045, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"grad_norm": 0.0007009029504843056, |
|
"learning_rate": 1.8109050562688224e-05, |
|
"loss": 0.2806, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"grad_norm": 0.0005554054514504969, |
|
"learning_rate": 1.806942463147884e-05, |
|
"loss": 0.0001, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"grad_norm": 0.00048346296534873545, |
|
"learning_rate": 1.8029798700269455e-05, |
|
"loss": 0.0458, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"grad_norm": 0.0011084218276664615, |
|
"learning_rate": 1.7990172769060074e-05, |
|
"loss": 0.0, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"grad_norm": 0.0003880435542669147, |
|
"learning_rate": 1.795054683785069e-05, |
|
"loss": 0.0142, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"grad_norm": 0.0006134477443993092, |
|
"learning_rate": 1.7910920906641308e-05, |
|
"loss": 0.0, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"grad_norm": 0.0005665639764629304, |
|
"learning_rate": 1.7871294975431924e-05, |
|
"loss": 0.0, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"grad_norm": 0.0003921152965631336, |
|
"learning_rate": 1.783166904422254e-05, |
|
"loss": 0.0, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"grad_norm": 0.001750220195390284, |
|
"learning_rate": 1.7792043113013158e-05, |
|
"loss": 0.0437, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"grad_norm": 0.0012650929857045412, |
|
"learning_rate": 1.7752417181803773e-05, |
|
"loss": 0.3903, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"grad_norm": 57.58509063720703, |
|
"learning_rate": 1.7712791250594392e-05, |
|
"loss": 0.0546, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"grad_norm": 0.00026887169224210083, |
|
"learning_rate": 1.7673165319385008e-05, |
|
"loss": 0.0001, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"grad_norm": 0.0019770157523453236, |
|
"learning_rate": 1.7633539388175623e-05, |
|
"loss": 0.1362, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"grad_norm": 0.0007267651380971074, |
|
"learning_rate": 1.7593913456966242e-05, |
|
"loss": 0.0006, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"grad_norm": 0.001434961101040244, |
|
"learning_rate": 1.7554287525756857e-05, |
|
"loss": 0.0263, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"grad_norm": 0.00044755820999853313, |
|
"learning_rate": 1.7514661594547473e-05, |
|
"loss": 0.0, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"grad_norm": 0.000376471463823691, |
|
"learning_rate": 1.7475035663338088e-05, |
|
"loss": 0.0207, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"grad_norm": 0.014877337031066418, |
|
"learning_rate": 1.7435409732128703e-05, |
|
"loss": 0.0, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"grad_norm": 0.0012328572338446975, |
|
"learning_rate": 1.7395783800919322e-05, |
|
"loss": 0.0259, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"grad_norm": 0.0011149095371365547, |
|
"learning_rate": 1.7356157869709938e-05, |
|
"loss": 0.0, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"grad_norm": 0.000868526753038168, |
|
"learning_rate": 1.7316531938500556e-05, |
|
"loss": 0.0107, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"grad_norm": 0.0003520081809256226, |
|
"learning_rate": 1.7276906007291172e-05, |
|
"loss": 0.0, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"grad_norm": 0.00045317449257709086, |
|
"learning_rate": 1.7237280076081787e-05, |
|
"loss": 0.1845, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"grad_norm": 0.00035488023422658443, |
|
"learning_rate": 1.7197654144872406e-05, |
|
"loss": 0.0, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"grad_norm": 0.0007327714120037854, |
|
"learning_rate": 1.715802821366302e-05, |
|
"loss": 0.0001, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"grad_norm": 0.0025048046372830868, |
|
"learning_rate": 1.7118402282453637e-05, |
|
"loss": 0.0001, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"grad_norm": 0.00043331715278327465, |
|
"learning_rate": 1.7078776351244256e-05, |
|
"loss": 0.0167, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"grad_norm": 0.0004680192796513438, |
|
"learning_rate": 1.703915042003487e-05, |
|
"loss": 0.0, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"grad_norm": 0.0005406651180237532, |
|
"learning_rate": 1.699952448882549e-05, |
|
"loss": 0.0, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"grad_norm": 1.6138055324554443, |
|
"learning_rate": 1.6959898557616105e-05, |
|
"loss": 0.0005, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"grad_norm": 0.0005159827414900064, |
|
"learning_rate": 1.692027262640672e-05, |
|
"loss": 0.0001, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"grad_norm": 0.001156438491307199, |
|
"learning_rate": 1.688064669519734e-05, |
|
"loss": 0.0, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"grad_norm": 0.00034518956090323627, |
|
"learning_rate": 1.6841020763987955e-05, |
|
"loss": 0.0316, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"grad_norm": 0.0007839056779630482, |
|
"learning_rate": 1.680139483277857e-05, |
|
"loss": 0.0016, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"grad_norm": 0.000456125068012625, |
|
"learning_rate": 1.676176890156919e-05, |
|
"loss": 0.0, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"grad_norm": 0.0007673576474189758, |
|
"learning_rate": 1.6722142970359805e-05, |
|
"loss": 0.0, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"grad_norm": 0.000683379708789289, |
|
"learning_rate": 1.6682517039150423e-05, |
|
"loss": 0.0, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"grad_norm": 0.0009253775351680815, |
|
"learning_rate": 1.664289110794104e-05, |
|
"loss": 0.0001, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"eval_accuracy": 0.9473684210526315, |
|
"eval_loss": 0.3900492191314697, |
|
"eval_runtime": 2421.9145, |
|
"eval_samples_per_second": 0.275, |
|
"eval_steps_per_second": 0.137, |
|
"step": 9821 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.0004204540455248207, |
|
"learning_rate": 1.6603265176731654e-05, |
|
"loss": 0.5604, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.00036831918987445533, |
|
"learning_rate": 1.656363924552227e-05, |
|
"loss": 0.0001, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.00044371382682584226, |
|
"learning_rate": 1.6524013314312885e-05, |
|
"loss": 0.0, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.0005366410478018224, |
|
"learning_rate": 1.6484387383103504e-05, |
|
"loss": 0.0, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.0006946607609279454, |
|
"learning_rate": 1.644476145189412e-05, |
|
"loss": 0.0, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.00034042325569316745, |
|
"learning_rate": 1.6405135520684735e-05, |
|
"loss": 0.0001, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.00025543957599438727, |
|
"learning_rate": 1.6365509589475353e-05, |
|
"loss": 0.0001, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"grad_norm": 0.0005577169358730316, |
|
"learning_rate": 1.632588365826597e-05, |
|
"loss": 0.0001, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"grad_norm": 0.0007238492253236473, |
|
"learning_rate": 1.6286257727056588e-05, |
|
"loss": 0.0001, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"grad_norm": 0.00047818326856940985, |
|
"learning_rate": 1.6246631795847203e-05, |
|
"loss": 0.2815, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"grad_norm": 0.004355975892394781, |
|
"learning_rate": 1.620700586463782e-05, |
|
"loss": 0.0, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"grad_norm": 0.0002552367513999343, |
|
"learning_rate": 1.6167379933428437e-05, |
|
"loss": 0.0, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"grad_norm": 0.0011531308991834521, |
|
"learning_rate": 1.6127754002219053e-05, |
|
"loss": 0.0, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"grad_norm": 0.0009820818668231368, |
|
"learning_rate": 1.608812807100967e-05, |
|
"loss": 0.0, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"grad_norm": 0.0006331288604997098, |
|
"learning_rate": 1.6048502139800287e-05, |
|
"loss": 0.0001, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"grad_norm": 23.247167587280273, |
|
"learning_rate": 1.6008876208590902e-05, |
|
"loss": 0.4045, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"grad_norm": 0.0005796991754323244, |
|
"learning_rate": 1.596925027738152e-05, |
|
"loss": 0.2264, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"grad_norm": 0.0002432822366245091, |
|
"learning_rate": 1.5929624346172137e-05, |
|
"loss": 0.0, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"grad_norm": 0.00044970333692617714, |
|
"learning_rate": 1.5889998414962752e-05, |
|
"loss": 0.5798, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"grad_norm": 0.00043129053665325046, |
|
"learning_rate": 1.585037248375337e-05, |
|
"loss": 0.0, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"grad_norm": 0.0009400318958796561, |
|
"learning_rate": 1.5810746552543986e-05, |
|
"loss": 0.0001, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"grad_norm": 0.001612617983482778, |
|
"learning_rate": 1.57711206213346e-05, |
|
"loss": 0.184, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"grad_norm": 0.003397996537387371, |
|
"learning_rate": 1.5731494690125217e-05, |
|
"loss": 0.0001, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"grad_norm": 0.003118938999250531, |
|
"learning_rate": 1.5691868758915836e-05, |
|
"loss": 0.0118, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"grad_norm": 0.0016245280858129263, |
|
"learning_rate": 1.565224282770645e-05, |
|
"loss": 0.0002, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"grad_norm": 0.003330792533233762, |
|
"learning_rate": 1.5612616896497067e-05, |
|
"loss": 0.0001, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"grad_norm": 0.01675890013575554, |
|
"learning_rate": 1.5572990965287685e-05, |
|
"loss": 0.1361, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"grad_norm": 0.0016782371094450355, |
|
"learning_rate": 1.55333650340783e-05, |
|
"loss": 0.0001, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"grad_norm": 0.0006982790655456483, |
|
"learning_rate": 1.5493739102868916e-05, |
|
"loss": 0.0, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"grad_norm": 0.004016962368041277, |
|
"learning_rate": 1.5454113171659535e-05, |
|
"loss": 0.0002, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"grad_norm": 0.0016343995230272412, |
|
"learning_rate": 1.541448724045015e-05, |
|
"loss": 0.0001, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"grad_norm": 0.0003891861706506461, |
|
"learning_rate": 1.537486130924077e-05, |
|
"loss": 0.0002, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"grad_norm": 0.0005568304331973195, |
|
"learning_rate": 1.5335235378031385e-05, |
|
"loss": 0.0, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"grad_norm": 0.0009192074066959321, |
|
"learning_rate": 1.5295609446822e-05, |
|
"loss": 0.0001, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"grad_norm": 0.00041831223643384874, |
|
"learning_rate": 1.5255983515612617e-05, |
|
"loss": 0.0, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"grad_norm": 0.002276873914524913, |
|
"learning_rate": 1.5216357584403234e-05, |
|
"loss": 0.0001, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"grad_norm": 0.0021974798291921616, |
|
"learning_rate": 1.5176731653193851e-05, |
|
"loss": 0.0, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"grad_norm": 0.03672347217798233, |
|
"learning_rate": 1.5137105721984468e-05, |
|
"loss": 0.0001, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"grad_norm": 0.0004960880614817142, |
|
"learning_rate": 1.5097479790775086e-05, |
|
"loss": 0.0001, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"grad_norm": 0.0003698187356349081, |
|
"learning_rate": 1.5057853859565701e-05, |
|
"loss": 0.0, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"grad_norm": 30.79059600830078, |
|
"learning_rate": 1.5018227928356318e-05, |
|
"loss": 0.013, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"grad_norm": 0.00040281921974383295, |
|
"learning_rate": 1.4978601997146935e-05, |
|
"loss": 0.0001, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"grad_norm": 0.001930213999003172, |
|
"learning_rate": 1.4938976065937552e-05, |
|
"loss": 0.0005, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"grad_norm": 0.0016294418601319194, |
|
"learning_rate": 1.4899350134728166e-05, |
|
"loss": 0.0001, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"grad_norm": 0.0029418901540338993, |
|
"learning_rate": 1.4859724203518781e-05, |
|
"loss": 0.0001, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"grad_norm": 0.0005179463187232614, |
|
"learning_rate": 1.4820098272309399e-05, |
|
"loss": 0.0, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"grad_norm": 117.77789306640625, |
|
"learning_rate": 1.4780472341100016e-05, |
|
"loss": 0.0485, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"grad_norm": 0.0006345610017888248, |
|
"learning_rate": 1.4740846409890633e-05, |
|
"loss": 0.0072, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"grad_norm": 0.004750640131533146, |
|
"learning_rate": 1.470122047868125e-05, |
|
"loss": 0.0001, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"grad_norm": 0.0016635819338262081, |
|
"learning_rate": 1.4661594547471865e-05, |
|
"loss": 0.5727, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"grad_norm": 0.0009257107740268111, |
|
"learning_rate": 1.4621968616262482e-05, |
|
"loss": 0.019, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"grad_norm": 0.0004995065974071622, |
|
"learning_rate": 1.45823426850531e-05, |
|
"loss": 0.0, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"grad_norm": 0.003641214920207858, |
|
"learning_rate": 1.4542716753843717e-05, |
|
"loss": 0.3737, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"grad_norm": 0.0005538859404623508, |
|
"learning_rate": 1.4503090822634332e-05, |
|
"loss": 0.1959, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"grad_norm": 0.0024865760933607817, |
|
"learning_rate": 1.4463464891424949e-05, |
|
"loss": 0.0, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"grad_norm": 0.0012498443247750401, |
|
"learning_rate": 1.4423838960215566e-05, |
|
"loss": 0.0003, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"grad_norm": 0.003093864070251584, |
|
"learning_rate": 1.4384213029006183e-05, |
|
"loss": 0.0001, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"grad_norm": 0.0016697756946086884, |
|
"learning_rate": 1.4344587097796799e-05, |
|
"loss": 0.0001, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"grad_norm": 0.0024545500054955482, |
|
"learning_rate": 1.4304961166587416e-05, |
|
"loss": 0.0, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"grad_norm": 0.0010031814454123378, |
|
"learning_rate": 1.4265335235378033e-05, |
|
"loss": 0.0, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"grad_norm": 0.000834242207929492, |
|
"learning_rate": 1.422570930416865e-05, |
|
"loss": 0.214, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"grad_norm": 0.0008862247341312468, |
|
"learning_rate": 1.4186083372959265e-05, |
|
"loss": 0.0001, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"grad_norm": 0.0010633807396516204, |
|
"learning_rate": 1.4146457441749883e-05, |
|
"loss": 0.0104, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"grad_norm": 24.041336059570312, |
|
"learning_rate": 1.41068315105405e-05, |
|
"loss": 0.0049, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"grad_norm": 0.0011859643273055553, |
|
"learning_rate": 1.4067205579331113e-05, |
|
"loss": 0.0001, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"grad_norm": 0.0006510709063149989, |
|
"learning_rate": 1.402757964812173e-05, |
|
"loss": 0.001, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"grad_norm": 0.000353335402905941, |
|
"learning_rate": 1.3987953716912348e-05, |
|
"loss": 0.0002, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"grad_norm": 0.0005472557386383414, |
|
"learning_rate": 1.3948327785702963e-05, |
|
"loss": 0.0001, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"grad_norm": 0.0006235065520741045, |
|
"learning_rate": 1.390870185449358e-05, |
|
"loss": 0.0004, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"grad_norm": 0.00039498330443166196, |
|
"learning_rate": 1.3869075923284197e-05, |
|
"loss": 0.0, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"grad_norm": 0.0009459428838454187, |
|
"learning_rate": 1.3829449992074814e-05, |
|
"loss": 0.125, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"grad_norm": 0.000288288458250463, |
|
"learning_rate": 1.378982406086543e-05, |
|
"loss": 0.0005, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"grad_norm": 0.0010236125672236085, |
|
"learning_rate": 1.3750198129656047e-05, |
|
"loss": 0.0001, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"grad_norm": 0.0005923935095779598, |
|
"learning_rate": 1.3710572198446664e-05, |
|
"loss": 0.0, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"grad_norm": 0.001925037824548781, |
|
"learning_rate": 1.3670946267237281e-05, |
|
"loss": 0.0, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"grad_norm": 0.0010172594338655472, |
|
"learning_rate": 1.3631320336027898e-05, |
|
"loss": 0.0, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"grad_norm": 0.00022477912716567516, |
|
"learning_rate": 1.3591694404818514e-05, |
|
"loss": 0.4117, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"grad_norm": 0.001964542781934142, |
|
"learning_rate": 1.355206847360913e-05, |
|
"loss": 0.0, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"grad_norm": 0.0008729117107577622, |
|
"learning_rate": 1.3512442542399748e-05, |
|
"loss": 0.005, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"grad_norm": 0.0013933833688497543, |
|
"learning_rate": 1.3472816611190365e-05, |
|
"loss": 0.0, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"grad_norm": 0.720376193523407, |
|
"learning_rate": 1.343319067998098e-05, |
|
"loss": 0.0005, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"grad_norm": 0.0024294324684888124, |
|
"learning_rate": 1.3393564748771597e-05, |
|
"loss": 0.5075, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"grad_norm": 0.00034565231180749834, |
|
"learning_rate": 1.3353938817562215e-05, |
|
"loss": 0.0017, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"grad_norm": 0.0005883481935597956, |
|
"learning_rate": 1.3314312886352832e-05, |
|
"loss": 0.0, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"grad_norm": 0.001018638489767909, |
|
"learning_rate": 1.3274686955143447e-05, |
|
"loss": 0.0005, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"grad_norm": 0.000567563867662102, |
|
"learning_rate": 1.3235061023934064e-05, |
|
"loss": 0.0071, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"grad_norm": 0.0006969044334255159, |
|
"learning_rate": 1.3195435092724678e-05, |
|
"loss": 0.2151, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"grad_norm": 0.000248556025326252, |
|
"learning_rate": 1.3155809161515295e-05, |
|
"loss": 0.0, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"grad_norm": 0.0008631858509033918, |
|
"learning_rate": 1.3116183230305912e-05, |
|
"loss": 0.0001, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"grad_norm": 0.001508180401287973, |
|
"learning_rate": 1.307655729909653e-05, |
|
"loss": 0.0001, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"grad_norm": 0.0005554750678129494, |
|
"learning_rate": 1.3036931367887145e-05, |
|
"loss": 0.0, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"grad_norm": 0.0003934628330171108, |
|
"learning_rate": 1.2997305436677762e-05, |
|
"loss": 0.0, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"grad_norm": 0.001727793482132256, |
|
"learning_rate": 1.2957679505468379e-05, |
|
"loss": 0.0, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"grad_norm": 0.002404275583103299, |
|
"learning_rate": 1.2918053574258996e-05, |
|
"loss": 0.0, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"grad_norm": 0.0008175792172551155, |
|
"learning_rate": 1.2878427643049611e-05, |
|
"loss": 0.0, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"grad_norm": 0.0022247559390962124, |
|
"learning_rate": 1.2838801711840228e-05, |
|
"loss": 0.0001, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"grad_norm": 0.0014646403724327683, |
|
"learning_rate": 1.2799175780630846e-05, |
|
"loss": 0.001, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"grad_norm": 0.0020718346349895, |
|
"learning_rate": 1.2759549849421463e-05, |
|
"loss": 0.0001, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"grad_norm": 0.0008824109099805355, |
|
"learning_rate": 1.2719923918212078e-05, |
|
"loss": 0.5802, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"grad_norm": 0.0007525623659603298, |
|
"learning_rate": 1.2680297987002695e-05, |
|
"loss": 0.0, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"grad_norm": 64.19054412841797, |
|
"learning_rate": 1.2640672055793312e-05, |
|
"loss": 0.4105, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"grad_norm": 0.0019008672097697854, |
|
"learning_rate": 1.260104612458393e-05, |
|
"loss": 0.0005, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"grad_norm": 0.0010036254534497857, |
|
"learning_rate": 1.2561420193374545e-05, |
|
"loss": 0.0006, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"grad_norm": 0.0006118649616837502, |
|
"learning_rate": 1.2521794262165162e-05, |
|
"loss": 0.0, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"grad_norm": 0.003166553797200322, |
|
"learning_rate": 1.2482168330955777e-05, |
|
"loss": 0.0001, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"grad_norm": 0.001118882093578577, |
|
"learning_rate": 1.2442542399746394e-05, |
|
"loss": 0.0, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"grad_norm": 0.0010632872581481934, |
|
"learning_rate": 1.2402916468537012e-05, |
|
"loss": 0.0001, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"grad_norm": 0.001252860063686967, |
|
"learning_rate": 1.2363290537327627e-05, |
|
"loss": 0.0001, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"grad_norm": 0.003005104372277856, |
|
"learning_rate": 1.2323664606118244e-05, |
|
"loss": 0.0001, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"grad_norm": 0.004219905007630587, |
|
"learning_rate": 1.2284038674908861e-05, |
|
"loss": 0.0006, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"grad_norm": 0.0003512962721288204, |
|
"learning_rate": 1.2244412743699478e-05, |
|
"loss": 0.0, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"grad_norm": 0.0026769828982651234, |
|
"learning_rate": 1.2204786812490095e-05, |
|
"loss": 0.0001, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"grad_norm": 0.0003416830440983176, |
|
"learning_rate": 1.216516088128071e-05, |
|
"loss": 0.0, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"grad_norm": 0.0010573529871180654, |
|
"learning_rate": 1.2125534950071326e-05, |
|
"loss": 0.0, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"grad_norm": 0.0013822006294503808, |
|
"learning_rate": 1.2085909018861943e-05, |
|
"loss": 0.2014, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"grad_norm": 0.0003184191882610321, |
|
"learning_rate": 1.204628308765256e-05, |
|
"loss": 0.0, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"grad_norm": 0.004402919672429562, |
|
"learning_rate": 1.2006657156443178e-05, |
|
"loss": 0.006, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"grad_norm": 4.32048225402832, |
|
"learning_rate": 1.1967031225233793e-05, |
|
"loss": 0.0019, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"grad_norm": 0.0006507772486656904, |
|
"learning_rate": 1.192740529402441e-05, |
|
"loss": 0.0, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"grad_norm": 0.0004223829018883407, |
|
"learning_rate": 1.1887779362815027e-05, |
|
"loss": 0.0001, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"grad_norm": 0.0006153634749352932, |
|
"learning_rate": 1.1848153431605644e-05, |
|
"loss": 0.0001, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"grad_norm": 0.0007072246517054737, |
|
"learning_rate": 1.180852750039626e-05, |
|
"loss": 0.0, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"grad_norm": 0.0007087733829393983, |
|
"learning_rate": 1.1768901569186877e-05, |
|
"loss": 0.0591, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"grad_norm": 0.00040967803215608, |
|
"learning_rate": 1.1729275637977492e-05, |
|
"loss": 0.0, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"grad_norm": 0.0018612256972119212, |
|
"learning_rate": 1.168964970676811e-05, |
|
"loss": 0.0001, |
|
"step": 11070 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"grad_norm": 0.0016640513204038143, |
|
"learning_rate": 1.1650023775558726e-05, |
|
"loss": 0.0001, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"grad_norm": 0.004190579988062382, |
|
"learning_rate": 1.1610397844349342e-05, |
|
"loss": 0.0, |
|
"step": 11090 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"grad_norm": 0.001836647279560566, |
|
"learning_rate": 1.1570771913139959e-05, |
|
"loss": 0.0, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"grad_norm": 0.0005556776304729283, |
|
"learning_rate": 1.1531145981930576e-05, |
|
"loss": 0.0001, |
|
"step": 11110 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"grad_norm": 0.0008808193379081786, |
|
"learning_rate": 1.1491520050721193e-05, |
|
"loss": 0.0001, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"grad_norm": 0.0001681848953012377, |
|
"learning_rate": 1.1451894119511809e-05, |
|
"loss": 0.2524, |
|
"step": 11130 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"grad_norm": 0.0008354588062502444, |
|
"learning_rate": 1.1412268188302426e-05, |
|
"loss": 0.0, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"grad_norm": 0.00051628437358886, |
|
"learning_rate": 1.1372642257093043e-05, |
|
"loss": 0.0, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"grad_norm": 0.0008145067258737981, |
|
"learning_rate": 1.133301632588366e-05, |
|
"loss": 0.0001, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"grad_norm": 0.0004701870202552527, |
|
"learning_rate": 1.1293390394674275e-05, |
|
"loss": 0.1907, |
|
"step": 11170 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"grad_norm": 0.0011952131753787398, |
|
"learning_rate": 1.125376446346489e-05, |
|
"loss": 0.0, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"grad_norm": 0.00032050846493802965, |
|
"learning_rate": 1.1214138532255508e-05, |
|
"loss": 0.0, |
|
"step": 11190 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"grad_norm": 0.0006612459546886384, |
|
"learning_rate": 1.1174512601046125e-05, |
|
"loss": 0.0001, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"grad_norm": 0.0030058922711759806, |
|
"learning_rate": 1.1134886669836742e-05, |
|
"loss": 0.0, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"grad_norm": 0.0034754828084260225, |
|
"learning_rate": 1.1095260738627357e-05, |
|
"loss": 0.0003, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"eval_accuracy": 0.9578947368421052, |
|
"eval_loss": 0.2666740119457245, |
|
"eval_runtime": 2322.4119, |
|
"eval_samples_per_second": 0.286, |
|
"eval_steps_per_second": 0.143, |
|
"step": 11224 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.004194905515760183, |
|
"learning_rate": 1.1055634807417975e-05, |
|
"loss": 0.0001, |
|
"step": 11230 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.0024937952402979136, |
|
"learning_rate": 1.1016008876208592e-05, |
|
"loss": 0.0, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.00039031429332681, |
|
"learning_rate": 1.0976382944999209e-05, |
|
"loss": 0.0, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.005691041238605976, |
|
"learning_rate": 1.0936757013789826e-05, |
|
"loss": 0.0001, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.00017179737915284932, |
|
"learning_rate": 1.0897131082580441e-05, |
|
"loss": 0.0001, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.000949267705436796, |
|
"learning_rate": 1.0857505151371057e-05, |
|
"loss": 0.0001, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.0003036385169252753, |
|
"learning_rate": 1.0817879220161674e-05, |
|
"loss": 0.0001, |
|
"step": 11290 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"grad_norm": 0.004243906121701002, |
|
"learning_rate": 1.0778253288952291e-05, |
|
"loss": 0.0005, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"grad_norm": 0.0010142240207642317, |
|
"learning_rate": 1.0738627357742908e-05, |
|
"loss": 0.0, |
|
"step": 11310 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"grad_norm": 0.0010380720486864448, |
|
"learning_rate": 1.0699001426533523e-05, |
|
"loss": 0.0001, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"grad_norm": 0.0005737761966884136, |
|
"learning_rate": 1.065937549532414e-05, |
|
"loss": 0.0001, |
|
"step": 11330 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"grad_norm": 0.001465731067582965, |
|
"learning_rate": 1.0619749564114758e-05, |
|
"loss": 0.0, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"grad_norm": 0.002500841859728098, |
|
"learning_rate": 1.0580123632905375e-05, |
|
"loss": 0.0, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"grad_norm": 0.00024287942505907267, |
|
"learning_rate": 1.054049770169599e-05, |
|
"loss": 0.0, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"grad_norm": 0.0006320082466118038, |
|
"learning_rate": 1.0500871770486607e-05, |
|
"loss": 0.0001, |
|
"step": 11370 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"grad_norm": 0.00030024844454601407, |
|
"learning_rate": 1.0461245839277224e-05, |
|
"loss": 0.0097, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"grad_norm": 0.00043432554230093956, |
|
"learning_rate": 1.042161990806784e-05, |
|
"loss": 0.0421, |
|
"step": 11390 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"grad_norm": 0.002737953094765544, |
|
"learning_rate": 1.0381993976858457e-05, |
|
"loss": 0.0, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"grad_norm": 0.000816858431790024, |
|
"learning_rate": 1.0342368045649072e-05, |
|
"loss": 0.0, |
|
"step": 11410 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"grad_norm": 0.00036986047052778304, |
|
"learning_rate": 1.030274211443969e-05, |
|
"loss": 0.0, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"grad_norm": 0.0004323932225815952, |
|
"learning_rate": 1.0263116183230307e-05, |
|
"loss": 0.0002, |
|
"step": 11430 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"grad_norm": 0.0004024511144962162, |
|
"learning_rate": 1.0223490252020924e-05, |
|
"loss": 0.421, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"grad_norm": 0.0024430027697235346, |
|
"learning_rate": 1.0183864320811539e-05, |
|
"loss": 0.0, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"grad_norm": 0.001345345051959157, |
|
"learning_rate": 1.0144238389602156e-05, |
|
"loss": 0.0, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"grad_norm": 0.0006153620779514313, |
|
"learning_rate": 1.0104612458392773e-05, |
|
"loss": 0.0001, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"grad_norm": 0.0015972702531144023, |
|
"learning_rate": 1.006498652718339e-05, |
|
"loss": 0.0, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"grad_norm": 0.0008706132066436112, |
|
"learning_rate": 1.0025360595974006e-05, |
|
"loss": 0.0001, |
|
"step": 11490 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"grad_norm": 0.001384895178489387, |
|
"learning_rate": 9.985734664764621e-06, |
|
"loss": 0.0001, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"grad_norm": 0.0010631100740283728, |
|
"learning_rate": 9.946108733555238e-06, |
|
"loss": 0.0002, |
|
"step": 11510 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"grad_norm": 0.0007243629661388695, |
|
"learning_rate": 9.906482802345855e-06, |
|
"loss": 0.0001, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"grad_norm": 74.74536895751953, |
|
"learning_rate": 9.866856871136473e-06, |
|
"loss": 0.0797, |
|
"step": 11530 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"grad_norm": 0.005114846862852573, |
|
"learning_rate": 9.827230939927088e-06, |
|
"loss": 0.0001, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"grad_norm": 0.0024818070232868195, |
|
"learning_rate": 9.787605008717705e-06, |
|
"loss": 0.0001, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"grad_norm": 0.00041646783938631415, |
|
"learning_rate": 9.747979077508322e-06, |
|
"loss": 0.0, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"grad_norm": 0.0007332797977142036, |
|
"learning_rate": 9.70835314629894e-06, |
|
"loss": 0.0116, |
|
"step": 11570 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"grad_norm": 0.0007879806798882782, |
|
"learning_rate": 9.668727215089556e-06, |
|
"loss": 0.0, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"grad_norm": 0.0009714935440570116, |
|
"learning_rate": 9.629101283880172e-06, |
|
"loss": 0.0001, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"grad_norm": 0.0009343309211544693, |
|
"learning_rate": 9.589475352670787e-06, |
|
"loss": 0.5311, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"grad_norm": 0.00037891563260927796, |
|
"learning_rate": 9.549849421461404e-06, |
|
"loss": 0.002, |
|
"step": 11610 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"grad_norm": 0.001986218150705099, |
|
"learning_rate": 9.510223490252021e-06, |
|
"loss": 0.0111, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"grad_norm": 0.0015318701043725014, |
|
"learning_rate": 9.470597559042639e-06, |
|
"loss": 0.0001, |
|
"step": 11630 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"grad_norm": 0.0006765589932911098, |
|
"learning_rate": 9.430971627833254e-06, |
|
"loss": 0.0, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"grad_norm": 0.0005000099190510809, |
|
"learning_rate": 9.391345696623871e-06, |
|
"loss": 0.0, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"grad_norm": 0.0017080691177397966, |
|
"learning_rate": 9.351719765414488e-06, |
|
"loss": 0.0002, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"grad_norm": 0.0017356324242427945, |
|
"learning_rate": 9.312093834205105e-06, |
|
"loss": 0.0001, |
|
"step": 11670 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"grad_norm": 0.0010568661382421851, |
|
"learning_rate": 9.27246790299572e-06, |
|
"loss": 0.0, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"grad_norm": 0.0014095234218984842, |
|
"learning_rate": 9.232841971786338e-06, |
|
"loss": 0.0021, |
|
"step": 11690 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"grad_norm": 0.00167833489831537, |
|
"learning_rate": 9.193216040576955e-06, |
|
"loss": 0.0001, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"grad_norm": 0.0016795884585008025, |
|
"learning_rate": 9.15359010936757e-06, |
|
"loss": 0.0002, |
|
"step": 11710 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"grad_norm": 0.0003502909676171839, |
|
"learning_rate": 9.113964178158187e-06, |
|
"loss": 0.0, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"grad_norm": 0.10941363871097565, |
|
"learning_rate": 9.074338246948803e-06, |
|
"loss": 0.0001, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"grad_norm": 0.0014083647402003407, |
|
"learning_rate": 9.03471231573942e-06, |
|
"loss": 0.3081, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"grad_norm": 0.0014537267852574587, |
|
"learning_rate": 8.995086384530037e-06, |
|
"loss": 0.0, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"grad_norm": 0.0005781695363111794, |
|
"learning_rate": 8.955460453320654e-06, |
|
"loss": 0.0, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"grad_norm": 0.0007176825893111527, |
|
"learning_rate": 8.91583452211127e-06, |
|
"loss": 0.0, |
|
"step": 11770 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"grad_norm": 0.000545515853445977, |
|
"learning_rate": 8.876208590901887e-06, |
|
"loss": 0.0, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"grad_norm": 0.0025596795603632927, |
|
"learning_rate": 8.836582659692504e-06, |
|
"loss": 0.0002, |
|
"step": 11790 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"grad_norm": 0.030005350708961487, |
|
"learning_rate": 8.796956728483121e-06, |
|
"loss": 0.0001, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"grad_norm": 0.00035480278893373907, |
|
"learning_rate": 8.757330797273736e-06, |
|
"loss": 0.0018, |
|
"step": 11810 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"grad_norm": 0.004515402484685183, |
|
"learning_rate": 8.717704866064352e-06, |
|
"loss": 0.0, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"grad_norm": 0.0032044288236647844, |
|
"learning_rate": 8.678078934854969e-06, |
|
"loss": 0.0036, |
|
"step": 11830 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"grad_norm": 0.0009629257838241756, |
|
"learning_rate": 8.638453003645586e-06, |
|
"loss": 0.149, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"grad_norm": 0.0024080132134258747, |
|
"learning_rate": 8.598827072436203e-06, |
|
"loss": 0.0003, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"grad_norm": 0.0015089749358594418, |
|
"learning_rate": 8.559201141226818e-06, |
|
"loss": 0.0, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"grad_norm": 0.0019321951549500227, |
|
"learning_rate": 8.519575210017436e-06, |
|
"loss": 0.0, |
|
"step": 11870 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"grad_norm": 0.005924368277192116, |
|
"learning_rate": 8.479949278808053e-06, |
|
"loss": 0.0, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"grad_norm": 0.0007942487136460841, |
|
"learning_rate": 8.44032334759867e-06, |
|
"loss": 0.0, |
|
"step": 11890 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"grad_norm": 0.0022497123572975397, |
|
"learning_rate": 8.400697416389285e-06, |
|
"loss": 0.1055, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"grad_norm": 0.0006818937254138291, |
|
"learning_rate": 8.361071485179902e-06, |
|
"loss": 0.0001, |
|
"step": 11910 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"grad_norm": 0.0004379069432616234, |
|
"learning_rate": 8.32144555397052e-06, |
|
"loss": 0.0, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"grad_norm": 0.00047276023542508483, |
|
"learning_rate": 8.281819622761135e-06, |
|
"loss": 0.0, |
|
"step": 11930 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"grad_norm": 0.0004771367821376771, |
|
"learning_rate": 8.242193691551752e-06, |
|
"loss": 0.0, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"grad_norm": 0.0005501986015588045, |
|
"learning_rate": 8.202567760342367e-06, |
|
"loss": 0.0001, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"grad_norm": 0.0011177220148965716, |
|
"learning_rate": 8.162941829132984e-06, |
|
"loss": 0.0703, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"grad_norm": 0.0004951581358909607, |
|
"learning_rate": 8.123315897923602e-06, |
|
"loss": 0.0, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"grad_norm": 0.0008309069671668112, |
|
"learning_rate": 8.083689966714219e-06, |
|
"loss": 0.0, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"grad_norm": 0.000472767511382699, |
|
"learning_rate": 8.044064035504836e-06, |
|
"loss": 0.5261, |
|
"step": 11990 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"grad_norm": 0.00044904148671776056, |
|
"learning_rate": 8.004438104295451e-06, |
|
"loss": 0.0, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"grad_norm": 0.0004107660206500441, |
|
"learning_rate": 7.964812173086068e-06, |
|
"loss": 0.0, |
|
"step": 12010 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"grad_norm": 0.00042746157851070166, |
|
"learning_rate": 7.925186241876685e-06, |
|
"loss": 0.0, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"grad_norm": 0.0007110532023943961, |
|
"learning_rate": 7.8855603106673e-06, |
|
"loss": 0.0, |
|
"step": 12030 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"grad_norm": 0.0007705994066782296, |
|
"learning_rate": 7.845934379457918e-06, |
|
"loss": 0.0, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"grad_norm": 0.0006966418586671352, |
|
"learning_rate": 7.806308448248533e-06, |
|
"loss": 0.0, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"grad_norm": 0.020446307957172394, |
|
"learning_rate": 7.76668251703915e-06, |
|
"loss": 0.0001, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"grad_norm": 0.0004377638688310981, |
|
"learning_rate": 7.727056585829768e-06, |
|
"loss": 0.0, |
|
"step": 12070 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"grad_norm": 0.00036184967029839754, |
|
"learning_rate": 7.687430654620385e-06, |
|
"loss": 0.0002, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"grad_norm": 0.00029569625621661544, |
|
"learning_rate": 7.647804723411e-06, |
|
"loss": 0.0001, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"grad_norm": 0.0003205812827218324, |
|
"learning_rate": 7.608178792201617e-06, |
|
"loss": 0.0236, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"grad_norm": 0.00043995011947117746, |
|
"learning_rate": 7.568552860992234e-06, |
|
"loss": 0.0001, |
|
"step": 12110 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"grad_norm": 0.0021792801562696695, |
|
"learning_rate": 7.5289269297828505e-06, |
|
"loss": 0.0, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"grad_norm": 0.003733986523002386, |
|
"learning_rate": 7.489300998573468e-06, |
|
"loss": 0.0, |
|
"step": 12130 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"grad_norm": 0.001138021470978856, |
|
"learning_rate": 7.449675067364083e-06, |
|
"loss": 0.0001, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"grad_norm": 0.0003544053470250219, |
|
"learning_rate": 7.410049136154699e-06, |
|
"loss": 0.0, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"grad_norm": 0.0007718518027104437, |
|
"learning_rate": 7.370423204945316e-06, |
|
"loss": 0.0, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"grad_norm": 0.000794577703345567, |
|
"learning_rate": 7.330797273735933e-06, |
|
"loss": 0.0, |
|
"step": 12170 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"grad_norm": 0.0007835258147679269, |
|
"learning_rate": 7.29117134252655e-06, |
|
"loss": 0.0, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"grad_norm": 0.0008351559517905116, |
|
"learning_rate": 7.251545411317166e-06, |
|
"loss": 0.0, |
|
"step": 12190 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"grad_norm": 0.001067393459379673, |
|
"learning_rate": 7.211919480107783e-06, |
|
"loss": 0.0, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"grad_norm": 0.0005535806412808597, |
|
"learning_rate": 7.172293548898399e-06, |
|
"loss": 0.0, |
|
"step": 12210 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"grad_norm": 0.0013392162509262562, |
|
"learning_rate": 7.1326676176890165e-06, |
|
"loss": 0.0, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"grad_norm": 0.011801800690591335, |
|
"learning_rate": 7.093041686479633e-06, |
|
"loss": 0.0001, |
|
"step": 12230 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"grad_norm": 0.0003349117760080844, |
|
"learning_rate": 7.05341575527025e-06, |
|
"loss": 0.0, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"grad_norm": 0.0009791525080800056, |
|
"learning_rate": 7.013789824060865e-06, |
|
"loss": 0.0, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"grad_norm": 0.0003134564030915499, |
|
"learning_rate": 6.9741638928514815e-06, |
|
"loss": 0.0, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"grad_norm": 0.0011281302431598306, |
|
"learning_rate": 6.934537961642099e-06, |
|
"loss": 0.0003, |
|
"step": 12270 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"grad_norm": 0.0004596656945068389, |
|
"learning_rate": 6.894912030432715e-06, |
|
"loss": 0.0, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"grad_norm": 0.017007848247885704, |
|
"learning_rate": 6.855286099223332e-06, |
|
"loss": 0.1894, |
|
"step": 12290 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"grad_norm": 0.0009561624028719962, |
|
"learning_rate": 6.815660168013949e-06, |
|
"loss": 0.0001, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"grad_norm": 0.0006208363920450211, |
|
"learning_rate": 6.776034236804565e-06, |
|
"loss": 0.0, |
|
"step": 12310 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"grad_norm": 0.00040551909478381276, |
|
"learning_rate": 6.7364083055951825e-06, |
|
"loss": 0.0, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"grad_norm": 0.0010045063681900501, |
|
"learning_rate": 6.696782374385799e-06, |
|
"loss": 0.0001, |
|
"step": 12330 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"grad_norm": 0.001559635391458869, |
|
"learning_rate": 6.657156443176416e-06, |
|
"loss": 0.1317, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"grad_norm": 0.00036661443300545216, |
|
"learning_rate": 6.617530511967032e-06, |
|
"loss": 0.0, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"grad_norm": 0.0022761470172554255, |
|
"learning_rate": 6.5779045807576475e-06, |
|
"loss": 0.0, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"grad_norm": 0.0002771701547317207, |
|
"learning_rate": 6.538278649548265e-06, |
|
"loss": 0.0, |
|
"step": 12370 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"grad_norm": 0.0009405760793015361, |
|
"learning_rate": 6.498652718338881e-06, |
|
"loss": 0.0, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"grad_norm": 0.0011777085019275546, |
|
"learning_rate": 6.459026787129498e-06, |
|
"loss": 0.0, |
|
"step": 12390 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"grad_norm": 0.0007950080907903612, |
|
"learning_rate": 6.419400855920114e-06, |
|
"loss": 0.0023, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"grad_norm": 0.000328573863953352, |
|
"learning_rate": 6.379774924710731e-06, |
|
"loss": 0.0001, |
|
"step": 12410 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"grad_norm": 0.000489677709992975, |
|
"learning_rate": 6.340148993501348e-06, |
|
"loss": 0.0, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"grad_norm": 19.678516387939453, |
|
"learning_rate": 6.300523062291965e-06, |
|
"loss": 0.2121, |
|
"step": 12430 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"grad_norm": 0.001576061244122684, |
|
"learning_rate": 6.260897131082581e-06, |
|
"loss": 0.2006, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"grad_norm": 0.0010969837894663215, |
|
"learning_rate": 6.221271199873197e-06, |
|
"loss": 0.0089, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"grad_norm": 0.0006820796988904476, |
|
"learning_rate": 6.1816452686638135e-06, |
|
"loss": 0.0001, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"grad_norm": 0.0039375657215714455, |
|
"learning_rate": 6.142019337454431e-06, |
|
"loss": 0.0, |
|
"step": 12470 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"grad_norm": 0.00018676265608519316, |
|
"learning_rate": 6.102393406245048e-06, |
|
"loss": 0.0002, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"grad_norm": 0.0015864548040553927, |
|
"learning_rate": 6.062767475035663e-06, |
|
"loss": 0.0, |
|
"step": 12490 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"grad_norm": 0.0005812132731080055, |
|
"learning_rate": 6.02314154382628e-06, |
|
"loss": 0.0001, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"grad_norm": 0.0015394919319078326, |
|
"learning_rate": 5.9835156126168965e-06, |
|
"loss": 0.0, |
|
"step": 12510 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"grad_norm": 0.5876509547233582, |
|
"learning_rate": 5.943889681407514e-06, |
|
"loss": 0.0002, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"grad_norm": 0.001257477910257876, |
|
"learning_rate": 5.90426375019813e-06, |
|
"loss": 0.0, |
|
"step": 12530 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"grad_norm": 0.007748996838927269, |
|
"learning_rate": 5.864637818988746e-06, |
|
"loss": 0.0002, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"grad_norm": 0.0004220679693389684, |
|
"learning_rate": 5.825011887779363e-06, |
|
"loss": 0.0001, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"grad_norm": 0.0003514468262437731, |
|
"learning_rate": 5.7853859565699795e-06, |
|
"loss": 0.0062, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"grad_norm": 0.0004685299936681986, |
|
"learning_rate": 5.745760025360597e-06, |
|
"loss": 0.0016, |
|
"step": 12570 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"grad_norm": 0.0002851441968232393, |
|
"learning_rate": 5.706134094151213e-06, |
|
"loss": 0.0004, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"grad_norm": 0.0006324647110886872, |
|
"learning_rate": 5.66650816294183e-06, |
|
"loss": 0.0, |
|
"step": 12590 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"grad_norm": 0.000717841787263751, |
|
"learning_rate": 5.626882231732445e-06, |
|
"loss": 0.0, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"grad_norm": 0.001114896615035832, |
|
"learning_rate": 5.5872563005230625e-06, |
|
"loss": 0.0, |
|
"step": 12610 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"grad_norm": 0.0011514411307871342, |
|
"learning_rate": 5.547630369313679e-06, |
|
"loss": 0.0001, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"eval_accuracy": 0.9654135338345865, |
|
"eval_loss": 0.2435862421989441, |
|
"eval_runtime": 2357.1776, |
|
"eval_samples_per_second": 0.282, |
|
"eval_steps_per_second": 0.141, |
|
"step": 12627 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.00044704281026497483, |
|
"learning_rate": 5.508004438104296e-06, |
|
"loss": 0.0, |
|
"step": 12630 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.00041269470239058137, |
|
"learning_rate": 5.468378506894913e-06, |
|
"loss": 0.0, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.0003670216246973723, |
|
"learning_rate": 5.428752575685528e-06, |
|
"loss": 0.0, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.003106119344010949, |
|
"learning_rate": 5.3891266444761455e-06, |
|
"loss": 0.0, |
|
"step": 12660 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.00040537622408010066, |
|
"learning_rate": 5.349500713266762e-06, |
|
"loss": 0.0, |
|
"step": 12670 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.00037262984551489353, |
|
"learning_rate": 5.309874782057379e-06, |
|
"loss": 0.0, |
|
"step": 12680 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.000418797048041597, |
|
"learning_rate": 5.270248850847995e-06, |
|
"loss": 0.0, |
|
"step": 12690 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"grad_norm": 0.0015914670657366514, |
|
"learning_rate": 5.230622919638612e-06, |
|
"loss": 0.0, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"grad_norm": 0.005690779071301222, |
|
"learning_rate": 5.1909969884292285e-06, |
|
"loss": 0.0976, |
|
"step": 12710 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"grad_norm": 0.001181070227175951, |
|
"learning_rate": 5.151371057219845e-06, |
|
"loss": 0.0, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"grad_norm": 0.0007823907653801143, |
|
"learning_rate": 5.111745126010462e-06, |
|
"loss": 0.0, |
|
"step": 12730 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"grad_norm": 0.0010620895773172379, |
|
"learning_rate": 5.072119194801078e-06, |
|
"loss": 0.0, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"grad_norm": 0.00028126072720624506, |
|
"learning_rate": 5.032493263591695e-06, |
|
"loss": 0.0052, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"grad_norm": 0.0005754511221311986, |
|
"learning_rate": 4.992867332382311e-06, |
|
"loss": 0.0, |
|
"step": 12760 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"grad_norm": 0.000247256743023172, |
|
"learning_rate": 4.953241401172928e-06, |
|
"loss": 0.0, |
|
"step": 12770 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"grad_norm": 0.0017203809693455696, |
|
"learning_rate": 4.913615469963544e-06, |
|
"loss": 0.0001, |
|
"step": 12780 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"grad_norm": 0.0005222621257416904, |
|
"learning_rate": 4.873989538754161e-06, |
|
"loss": 0.0, |
|
"step": 12790 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"grad_norm": 0.00047639888362027705, |
|
"learning_rate": 4.834363607544778e-06, |
|
"loss": 0.0001, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"grad_norm": 0.0015658453339710832, |
|
"learning_rate": 4.794737676335394e-06, |
|
"loss": 0.0, |
|
"step": 12810 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"grad_norm": 0.0002700120967347175, |
|
"learning_rate": 4.755111745126011e-06, |
|
"loss": 0.0, |
|
"step": 12820 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"grad_norm": 0.00036174681736156344, |
|
"learning_rate": 4.715485813916627e-06, |
|
"loss": 0.0, |
|
"step": 12830 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"grad_norm": 0.00048193742986768484, |
|
"learning_rate": 4.675859882707244e-06, |
|
"loss": 0.0001, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"grad_norm": 0.00021181856573093683, |
|
"learning_rate": 4.63623395149786e-06, |
|
"loss": 0.0, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"grad_norm": 0.0007221151608973742, |
|
"learning_rate": 4.5966080202884774e-06, |
|
"loss": 0.0001, |
|
"step": 12860 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"grad_norm": 0.0008499003597535193, |
|
"learning_rate": 4.556982089079094e-06, |
|
"loss": 0.0, |
|
"step": 12870 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"grad_norm": 0.00024478594423271716, |
|
"learning_rate": 4.51735615786971e-06, |
|
"loss": 0.0, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"grad_norm": 0.000799850036855787, |
|
"learning_rate": 4.477730226660327e-06, |
|
"loss": 0.0, |
|
"step": 12890 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"grad_norm": 0.0012479170691221952, |
|
"learning_rate": 4.438104295450943e-06, |
|
"loss": 0.0007, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"grad_norm": 0.0008572249207645655, |
|
"learning_rate": 4.3984783642415604e-06, |
|
"loss": 0.0, |
|
"step": 12910 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"grad_norm": 0.00028230881434865296, |
|
"learning_rate": 4.358852433032176e-06, |
|
"loss": 0.0773, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"grad_norm": 0.0003641119983512908, |
|
"learning_rate": 4.319226501822793e-06, |
|
"loss": 0.0, |
|
"step": 12930 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"grad_norm": 0.0009531981777399778, |
|
"learning_rate": 4.279600570613409e-06, |
|
"loss": 0.0, |
|
"step": 12940 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"grad_norm": 0.00067020149435848, |
|
"learning_rate": 4.239974639404026e-06, |
|
"loss": 0.0, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"grad_norm": 0.0001659138360992074, |
|
"learning_rate": 4.200348708194643e-06, |
|
"loss": 0.0, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"grad_norm": 0.0005148449563421309, |
|
"learning_rate": 4.16072277698526e-06, |
|
"loss": 0.0107, |
|
"step": 12970 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"grad_norm": 0.000638917728792876, |
|
"learning_rate": 4.121096845775876e-06, |
|
"loss": 0.0, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"grad_norm": 0.00047383896890096366, |
|
"learning_rate": 4.081470914566492e-06, |
|
"loss": 0.0, |
|
"step": 12990 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"grad_norm": 0.0007675238302908838, |
|
"learning_rate": 4.041844983357109e-06, |
|
"loss": 0.0, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"grad_norm": 0.001697351224720478, |
|
"learning_rate": 4.0022190521477256e-06, |
|
"loss": 0.0, |
|
"step": 13010 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"grad_norm": 0.00020665867486968637, |
|
"learning_rate": 3.962593120938343e-06, |
|
"loss": 0.0, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"grad_norm": 0.001027750549837947, |
|
"learning_rate": 3.922967189728959e-06, |
|
"loss": 0.2632, |
|
"step": 13030 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"grad_norm": 0.003146632807329297, |
|
"learning_rate": 3.883341258519575e-06, |
|
"loss": 0.0, |
|
"step": 13040 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"grad_norm": 0.0007864089566282928, |
|
"learning_rate": 3.843715327310192e-06, |
|
"loss": 0.0044, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"grad_norm": 0.00022077991161495447, |
|
"learning_rate": 3.8040893961008086e-06, |
|
"loss": 0.0, |
|
"step": 13060 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"grad_norm": 0.0005595972179435194, |
|
"learning_rate": 3.7644634648914252e-06, |
|
"loss": 0.0, |
|
"step": 13070 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"grad_norm": 0.0005725977243855596, |
|
"learning_rate": 3.7248375336820415e-06, |
|
"loss": 0.0, |
|
"step": 13080 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"grad_norm": 0.0011127095203846693, |
|
"learning_rate": 3.685211602472658e-06, |
|
"loss": 0.0, |
|
"step": 13090 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"grad_norm": 0.001887647551484406, |
|
"learning_rate": 3.645585671263275e-06, |
|
"loss": 0.0, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"grad_norm": 0.0005976618267595768, |
|
"learning_rate": 3.6059597400538916e-06, |
|
"loss": 0.0003, |
|
"step": 13110 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"grad_norm": 0.0006656855694018304, |
|
"learning_rate": 3.5663338088445082e-06, |
|
"loss": 0.0, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"grad_norm": 0.003439901163801551, |
|
"learning_rate": 3.526707877635125e-06, |
|
"loss": 0.0, |
|
"step": 13130 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"grad_norm": 0.00043997442116960883, |
|
"learning_rate": 3.4870819464257408e-06, |
|
"loss": 0.0, |
|
"step": 13140 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"grad_norm": 0.0005484743160195649, |
|
"learning_rate": 3.4474560152163574e-06, |
|
"loss": 0.0, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"grad_norm": 0.00040827819611877203, |
|
"learning_rate": 3.4078300840069746e-06, |
|
"loss": 0.0, |
|
"step": 13160 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"grad_norm": 0.005499335937201977, |
|
"learning_rate": 3.3682041527975912e-06, |
|
"loss": 0.0001, |
|
"step": 13170 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"grad_norm": 0.001736334292218089, |
|
"learning_rate": 3.328578221588208e-06, |
|
"loss": 0.0, |
|
"step": 13180 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"grad_norm": 0.0006113905692473054, |
|
"learning_rate": 3.2889522903788238e-06, |
|
"loss": 0.0, |
|
"step": 13190 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"grad_norm": 0.001001613331027329, |
|
"learning_rate": 3.2493263591694404e-06, |
|
"loss": 0.1631, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"grad_norm": 0.0003023295139428228, |
|
"learning_rate": 3.209700427960057e-06, |
|
"loss": 0.0, |
|
"step": 13210 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"grad_norm": 0.0009469907963648438, |
|
"learning_rate": 3.170074496750674e-06, |
|
"loss": 0.0, |
|
"step": 13220 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"grad_norm": 0.0007909215637482703, |
|
"learning_rate": 3.1304485655412905e-06, |
|
"loss": 0.0, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"grad_norm": 0.001787104643881321, |
|
"learning_rate": 3.0908226343319067e-06, |
|
"loss": 0.0, |
|
"step": 13240 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"grad_norm": 0.0008837388013489544, |
|
"learning_rate": 3.051196703122524e-06, |
|
"loss": 0.0, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"grad_norm": 0.0007934242021292448, |
|
"learning_rate": 3.01157077191314e-06, |
|
"loss": 0.0004, |
|
"step": 13260 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"grad_norm": 0.0011570610804483294, |
|
"learning_rate": 2.971944840703757e-06, |
|
"loss": 0.0001, |
|
"step": 13270 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"grad_norm": 0.00029090800671838224, |
|
"learning_rate": 2.932318909494373e-06, |
|
"loss": 0.0, |
|
"step": 13280 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"grad_norm": 0.0010709144407883286, |
|
"learning_rate": 2.8926929782849897e-06, |
|
"loss": 0.0, |
|
"step": 13290 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"grad_norm": 0.001289168605580926, |
|
"learning_rate": 2.8530670470756064e-06, |
|
"loss": 0.0, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"grad_norm": 0.002187453443184495, |
|
"learning_rate": 2.8134411158662227e-06, |
|
"loss": 0.0, |
|
"step": 13310 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"grad_norm": 0.0007116499473340809, |
|
"learning_rate": 2.7738151846568394e-06, |
|
"loss": 0.0, |
|
"step": 13320 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"grad_norm": 0.000514859682880342, |
|
"learning_rate": 2.7341892534474565e-06, |
|
"loss": 0.0001, |
|
"step": 13330 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"grad_norm": 0.0007328620995394886, |
|
"learning_rate": 2.6945633222380727e-06, |
|
"loss": 0.0691, |
|
"step": 13340 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"grad_norm": 0.0007036814349703491, |
|
"learning_rate": 2.6549373910286894e-06, |
|
"loss": 0.0, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"grad_norm": 0.001070524798706174, |
|
"learning_rate": 2.615311459819306e-06, |
|
"loss": 0.0, |
|
"step": 13360 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"grad_norm": 0.0008939993567764759, |
|
"learning_rate": 2.5756855286099224e-06, |
|
"loss": 0.0001, |
|
"step": 13370 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"grad_norm": 0.0004034818266518414, |
|
"learning_rate": 2.536059597400539e-06, |
|
"loss": 0.0206, |
|
"step": 13380 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"grad_norm": 4.411261081695557, |
|
"learning_rate": 2.4964336661911553e-06, |
|
"loss": 0.0124, |
|
"step": 13390 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"grad_norm": 0.0006528793601319194, |
|
"learning_rate": 2.456807734981772e-06, |
|
"loss": 0.0, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"grad_norm": 0.0003673941537272185, |
|
"learning_rate": 2.417181803772389e-06, |
|
"loss": 0.0, |
|
"step": 13410 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"grad_norm": 0.001056182780303061, |
|
"learning_rate": 2.3775558725630054e-06, |
|
"loss": 0.0, |
|
"step": 13420 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"grad_norm": 0.0012370526092126966, |
|
"learning_rate": 2.337929941353622e-06, |
|
"loss": 0.0001, |
|
"step": 13430 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"grad_norm": 0.0015783560229465365, |
|
"learning_rate": 2.2983040101442387e-06, |
|
"loss": 0.0, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"grad_norm": 0.0001985041017178446, |
|
"learning_rate": 2.258678078934855e-06, |
|
"loss": 0.0016, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"grad_norm": 0.0010269788326695561, |
|
"learning_rate": 2.2190521477254717e-06, |
|
"loss": 0.1057, |
|
"step": 13460 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"grad_norm": 0.04036625847220421, |
|
"learning_rate": 2.179426216516088e-06, |
|
"loss": 0.2287, |
|
"step": 13470 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"grad_norm": 0.000473200052510947, |
|
"learning_rate": 2.1398002853067046e-06, |
|
"loss": 0.0001, |
|
"step": 13480 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"grad_norm": 0.0003723807749338448, |
|
"learning_rate": 2.1001743540973213e-06, |
|
"loss": 0.0, |
|
"step": 13490 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"grad_norm": 0.0007169354357756674, |
|
"learning_rate": 2.060548422887938e-06, |
|
"loss": 0.0008, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"grad_norm": 0.00031334979576058686, |
|
"learning_rate": 2.0209224916785547e-06, |
|
"loss": 0.0, |
|
"step": 13510 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"grad_norm": 0.000616435194388032, |
|
"learning_rate": 1.9812965604691713e-06, |
|
"loss": 0.0, |
|
"step": 13520 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"grad_norm": 0.0008787320111878216, |
|
"learning_rate": 1.9416706292597876e-06, |
|
"loss": 0.0, |
|
"step": 13530 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"grad_norm": 0.0002825538394972682, |
|
"learning_rate": 1.9020446980504043e-06, |
|
"loss": 0.0021, |
|
"step": 13540 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"grad_norm": 0.002063804306089878, |
|
"learning_rate": 1.8624187668410208e-06, |
|
"loss": 0.0004, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"grad_norm": 0.000512151513248682, |
|
"learning_rate": 1.8227928356316374e-06, |
|
"loss": 0.0, |
|
"step": 13560 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"grad_norm": 0.0006224968819878995, |
|
"learning_rate": 1.7831669044222541e-06, |
|
"loss": 0.0, |
|
"step": 13570 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"grad_norm": 0.00019008757953997701, |
|
"learning_rate": 1.7435409732128704e-06, |
|
"loss": 0.0, |
|
"step": 13580 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"grad_norm": 0.0002794242464005947, |
|
"learning_rate": 1.7039150420034873e-06, |
|
"loss": 0.0, |
|
"step": 13590 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"grad_norm": 0.0009566029766574502, |
|
"learning_rate": 1.664289110794104e-06, |
|
"loss": 0.0001, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"grad_norm": 0.0003199617494828999, |
|
"learning_rate": 1.6246631795847202e-06, |
|
"loss": 0.0001, |
|
"step": 13610 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"grad_norm": 0.00032697312417440116, |
|
"learning_rate": 1.585037248375337e-06, |
|
"loss": 0.0, |
|
"step": 13620 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"grad_norm": 0.002565112430602312, |
|
"learning_rate": 1.5454113171659534e-06, |
|
"loss": 0.0, |
|
"step": 13630 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"grad_norm": 237.59519958496094, |
|
"learning_rate": 1.50578538595657e-06, |
|
"loss": 0.1313, |
|
"step": 13640 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"grad_norm": 0.0006662964588031173, |
|
"learning_rate": 1.4661594547471865e-06, |
|
"loss": 0.0, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"grad_norm": 0.0011941486736759543, |
|
"learning_rate": 1.4265335235378032e-06, |
|
"loss": 0.0, |
|
"step": 13660 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"grad_norm": 0.0028123382944613695, |
|
"learning_rate": 1.3869075923284197e-06, |
|
"loss": 0.0, |
|
"step": 13670 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"grad_norm": 0.0008815588662400842, |
|
"learning_rate": 1.3472816611190364e-06, |
|
"loss": 0.0277, |
|
"step": 13680 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"grad_norm": 0.00045147593482397497, |
|
"learning_rate": 1.307655729909653e-06, |
|
"loss": 0.0, |
|
"step": 13690 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"grad_norm": 0.00011046286817872897, |
|
"learning_rate": 1.2680297987002695e-06, |
|
"loss": 0.1484, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"grad_norm": 0.0018034332897514105, |
|
"learning_rate": 1.228403867490886e-06, |
|
"loss": 0.0027, |
|
"step": 13710 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"grad_norm": 0.000713842804543674, |
|
"learning_rate": 1.1887779362815027e-06, |
|
"loss": 0.0, |
|
"step": 13720 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"grad_norm": 0.0010389587841928005, |
|
"learning_rate": 1.1491520050721194e-06, |
|
"loss": 0.0, |
|
"step": 13730 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"grad_norm": 0.0003368295438122004, |
|
"learning_rate": 1.1095260738627358e-06, |
|
"loss": 0.0, |
|
"step": 13740 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"grad_norm": 0.000346412300132215, |
|
"learning_rate": 1.0699001426533523e-06, |
|
"loss": 0.0, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"grad_norm": 0.0004677934921346605, |
|
"learning_rate": 1.030274211443969e-06, |
|
"loss": 0.0, |
|
"step": 13760 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"grad_norm": 0.0008401199011132121, |
|
"learning_rate": 9.906482802345857e-07, |
|
"loss": 0.0, |
|
"step": 13770 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"grad_norm": 0.0003339408722240478, |
|
"learning_rate": 9.510223490252021e-07, |
|
"loss": 0.0002, |
|
"step": 13780 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"grad_norm": 0.0004967558197677135, |
|
"learning_rate": 9.113964178158187e-07, |
|
"loss": 0.0, |
|
"step": 13790 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"grad_norm": 0.002963978098705411, |
|
"learning_rate": 8.717704866064352e-07, |
|
"loss": 0.0, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"grad_norm": 0.001155543839558959, |
|
"learning_rate": 8.32144555397052e-07, |
|
"loss": 0.0, |
|
"step": 13810 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"grad_norm": 0.000786484801210463, |
|
"learning_rate": 7.925186241876685e-07, |
|
"loss": 0.1625, |
|
"step": 13820 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"grad_norm": 0.0002841146197170019, |
|
"learning_rate": 7.52892692978285e-07, |
|
"loss": 0.0, |
|
"step": 13830 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"grad_norm": 0.00030605948995798826, |
|
"learning_rate": 7.132667617689016e-07, |
|
"loss": 0.0, |
|
"step": 13840 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"grad_norm": 0.001265210215933621, |
|
"learning_rate": 6.736408305595182e-07, |
|
"loss": 0.0, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"grad_norm": 0.00038683577440679073, |
|
"learning_rate": 6.340148993501348e-07, |
|
"loss": 0.0, |
|
"step": 13860 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"grad_norm": 0.0005034942296333611, |
|
"learning_rate": 5.943889681407513e-07, |
|
"loss": 0.0, |
|
"step": 13870 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"grad_norm": 0.0011582579463720322, |
|
"learning_rate": 5.547630369313679e-07, |
|
"loss": 0.0, |
|
"step": 13880 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"grad_norm": 0.0016904632793739438, |
|
"learning_rate": 5.151371057219845e-07, |
|
"loss": 0.0, |
|
"step": 13890 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"grad_norm": 0.00032329061650671065, |
|
"learning_rate": 4.7551117451260107e-07, |
|
"loss": 0.0, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"grad_norm": 0.0003388900659047067, |
|
"learning_rate": 4.358852433032176e-07, |
|
"loss": 0.0, |
|
"step": 13910 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"grad_norm": 0.0003800652630161494, |
|
"learning_rate": 3.962593120938342e-07, |
|
"loss": 0.0, |
|
"step": 13920 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"grad_norm": 0.0009641946526244283, |
|
"learning_rate": 3.566333808844508e-07, |
|
"loss": 0.0, |
|
"step": 13930 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"grad_norm": 0.0005723941139876842, |
|
"learning_rate": 3.170074496750674e-07, |
|
"loss": 0.0001, |
|
"step": 13940 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"grad_norm": 0.0005183956818655133, |
|
"learning_rate": 2.7738151846568396e-07, |
|
"loss": 0.0, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"grad_norm": 0.009076601825654507, |
|
"learning_rate": 2.3775558725630054e-07, |
|
"loss": 0.0, |
|
"step": 13960 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"grad_norm": 0.0007901808712631464, |
|
"learning_rate": 1.981296560469171e-07, |
|
"loss": 0.0, |
|
"step": 13970 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"grad_norm": 0.0005284142098389566, |
|
"learning_rate": 1.585037248375337e-07, |
|
"loss": 0.0, |
|
"step": 13980 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"grad_norm": 0.0006428571650758386, |
|
"learning_rate": 1.1887779362815027e-07, |
|
"loss": 0.0, |
|
"step": 13990 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"grad_norm": 0.0012319569941610098, |
|
"learning_rate": 7.925186241876685e-08, |
|
"loss": 0.0001, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"grad_norm": 0.000267757655819878, |
|
"learning_rate": 3.962593120938342e-08, |
|
"loss": 0.0, |
|
"step": 14010 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"grad_norm": 0.0010718012927100062, |
|
"learning_rate": 0.0, |
|
"loss": 0.0, |
|
"step": 14020 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"eval_accuracy": 0.9654135338345865, |
|
"eval_loss": 0.24323464930057526, |
|
"eval_runtime": 2339.7693, |
|
"eval_samples_per_second": 0.284, |
|
"eval_steps_per_second": 0.142, |
|
"step": 14020 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"step": 14020, |
|
"total_flos": 7.1819242300007645e+19, |
|
"train_loss": 0.21922695452951727, |
|
"train_runtime": 145352.8053, |
|
"train_samples_per_second": 0.193, |
|
"train_steps_per_second": 0.096 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"eval_accuracy": 0.960960960960961, |
|
"eval_loss": 0.25779759883880615, |
|
"eval_runtime": 1196.8626, |
|
"eval_samples_per_second": 0.278, |
|
"eval_steps_per_second": 0.14, |
|
"step": 14020 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"eval_accuracy": 0.960960960960961, |
|
"eval_loss": 0.25779759883880615, |
|
"eval_runtime": 1193.7233, |
|
"eval_samples_per_second": 0.279, |
|
"eval_steps_per_second": 0.14, |
|
"step": 14020 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 14020, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"total_flos": 7.1819242300007645e+19, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|