|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.1752854264607118, |
|
"eval_steps": 500, |
|
"global_step": 14000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.30050432682037354, |
|
"learning_rate": 9.995802552048355e-06, |
|
"loss": 0.0015, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.02444104291498661, |
|
"learning_rate": 9.99160510409671e-06, |
|
"loss": 0.0012, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.30079442262649536, |
|
"learning_rate": 9.987407656145065e-06, |
|
"loss": 0.0022, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.11910504847764969, |
|
"learning_rate": 9.98321020819342e-06, |
|
"loss": 0.0044, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.025635888800024986, |
|
"learning_rate": 9.979012760241774e-06, |
|
"loss": 0.004, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.10340698063373566, |
|
"learning_rate": 9.974815312290129e-06, |
|
"loss": 0.0034, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.04860584810376167, |
|
"learning_rate": 9.970617864338483e-06, |
|
"loss": 0.0007, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.029318638145923615, |
|
"learning_rate": 9.966420416386838e-06, |
|
"loss": 0.0051, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.023802747949957848, |
|
"learning_rate": 9.962222968435192e-06, |
|
"loss": 0.0008, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.06850480288267136, |
|
"learning_rate": 9.958025520483547e-06, |
|
"loss": 0.0037, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.021825846284627914, |
|
"learning_rate": 9.953828072531902e-06, |
|
"loss": 0.0017, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.03794211894273758, |
|
"learning_rate": 9.949630624580256e-06, |
|
"loss": 0.0013, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.10325329005718231, |
|
"learning_rate": 9.945433176628611e-06, |
|
"loss": 0.0026, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.03597491234540939, |
|
"learning_rate": 9.941235728676965e-06, |
|
"loss": 0.0034, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.23858389258384705, |
|
"learning_rate": 9.93703828072532e-06, |
|
"loss": 0.0024, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.15489879250526428, |
|
"learning_rate": 9.932840832773675e-06, |
|
"loss": 0.0092, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.07030558586120605, |
|
"learning_rate": 9.92864338482203e-06, |
|
"loss": 0.0022, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.5954773426055908, |
|
"learning_rate": 9.924445936870384e-06, |
|
"loss": 0.0032, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.19402949512004852, |
|
"learning_rate": 9.920248488918739e-06, |
|
"loss": 0.0018, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.0862368643283844, |
|
"learning_rate": 9.916051040967093e-06, |
|
"loss": 0.0041, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.02818392589688301, |
|
"learning_rate": 9.911853593015446e-06, |
|
"loss": 0.0035, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.35951200127601624, |
|
"learning_rate": 9.907656145063802e-06, |
|
"loss": 0.0027, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.033146314322948456, |
|
"learning_rate": 9.903458697112157e-06, |
|
"loss": 0.0023, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.183016836643219, |
|
"learning_rate": 9.899261249160512e-06, |
|
"loss": 0.0029, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.010143009014427662, |
|
"learning_rate": 9.895063801208866e-06, |
|
"loss": 0.0038, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.01728486269712448, |
|
"learning_rate": 9.890866353257221e-06, |
|
"loss": 0.0016, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.016598215326666832, |
|
"learning_rate": 9.886668905305575e-06, |
|
"loss": 0.0021, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.010375693440437317, |
|
"learning_rate": 9.88247145735393e-06, |
|
"loss": 0.0028, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.0275142602622509, |
|
"learning_rate": 9.878274009402285e-06, |
|
"loss": 0.0058, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.3856583833694458, |
|
"learning_rate": 9.874076561450638e-06, |
|
"loss": 0.0029, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.01894170604646206, |
|
"learning_rate": 9.869879113498994e-06, |
|
"loss": 0.0016, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.33644765615463257, |
|
"learning_rate": 9.865681665547347e-06, |
|
"loss": 0.0016, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.015369775705039501, |
|
"learning_rate": 9.861484217595703e-06, |
|
"loss": 0.0018, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.027933355420827866, |
|
"learning_rate": 9.857286769644058e-06, |
|
"loss": 0.0024, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.006843829993158579, |
|
"learning_rate": 9.853089321692412e-06, |
|
"loss": 0.0096, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.008493180386722088, |
|
"learning_rate": 9.848891873740767e-06, |
|
"loss": 0.0031, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.6798232197761536, |
|
"learning_rate": 9.84469442578912e-06, |
|
"loss": 0.0049, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.0329626239836216, |
|
"learning_rate": 9.840496977837476e-06, |
|
"loss": 0.0016, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.015625745058059692, |
|
"learning_rate": 9.83629952988583e-06, |
|
"loss": 0.0008, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.024704214185476303, |
|
"learning_rate": 9.832102081934186e-06, |
|
"loss": 0.0054, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.6314619183540344, |
|
"learning_rate": 9.827904633982538e-06, |
|
"loss": 0.0021, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.46060019731521606, |
|
"learning_rate": 9.823707186030895e-06, |
|
"loss": 0.0057, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.05328000336885452, |
|
"learning_rate": 9.819509738079248e-06, |
|
"loss": 0.0009, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.22446390986442566, |
|
"learning_rate": 9.815312290127604e-06, |
|
"loss": 0.0011, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.035243839025497437, |
|
"learning_rate": 9.811114842175959e-06, |
|
"loss": 0.0009, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.06827638298273087, |
|
"learning_rate": 9.806917394224312e-06, |
|
"loss": 0.0004, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.08792345225811005, |
|
"learning_rate": 9.802719946272668e-06, |
|
"loss": 0.0031, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.00334025826305151, |
|
"learning_rate": 9.79852249832102e-06, |
|
"loss": 0.0043, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.03577791526913643, |
|
"learning_rate": 9.794325050369377e-06, |
|
"loss": 0.0009, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.10479724407196045, |
|
"learning_rate": 9.79012760241773e-06, |
|
"loss": 0.0009, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.006824735552072525, |
|
"learning_rate": 9.785930154466086e-06, |
|
"loss": 0.0044, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.003342829644680023, |
|
"learning_rate": 9.78173270651444e-06, |
|
"loss": 0.0029, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.010192741639912128, |
|
"learning_rate": 9.777535258562796e-06, |
|
"loss": 0.0022, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.08739089965820312, |
|
"learning_rate": 9.773337810611148e-06, |
|
"loss": 0.0032, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.3144104480743408, |
|
"learning_rate": 9.769140362659503e-06, |
|
"loss": 0.0015, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.0013409241801127791, |
|
"learning_rate": 9.76494291470786e-06, |
|
"loss": 0.0043, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.004873868077993393, |
|
"learning_rate": 9.760745466756212e-06, |
|
"loss": 0.0023, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.13740317523479462, |
|
"learning_rate": 9.756548018804569e-06, |
|
"loss": 0.0028, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.1480674296617508, |
|
"learning_rate": 9.752350570852922e-06, |
|
"loss": 0.0034, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.01920839212834835, |
|
"learning_rate": 9.748153122901278e-06, |
|
"loss": 0.0009, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.06275931745767593, |
|
"learning_rate": 9.74395567494963e-06, |
|
"loss": 0.0011, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.4199993312358856, |
|
"learning_rate": 9.739758226997985e-06, |
|
"loss": 0.0048, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.022090768441557884, |
|
"learning_rate": 9.73556077904634e-06, |
|
"loss": 0.0048, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.1497245728969574, |
|
"learning_rate": 9.731363331094695e-06, |
|
"loss": 0.0142, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.011996796354651451, |
|
"learning_rate": 9.72716588314305e-06, |
|
"loss": 0.0034, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.004964240826666355, |
|
"learning_rate": 9.722968435191404e-06, |
|
"loss": 0.0024, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.1952650249004364, |
|
"learning_rate": 9.71877098723976e-06, |
|
"loss": 0.0091, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.2123202085494995, |
|
"learning_rate": 9.714573539288113e-06, |
|
"loss": 0.0064, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.030770471319556236, |
|
"learning_rate": 9.71037609133647e-06, |
|
"loss": 0.0012, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.06245679035782814, |
|
"learning_rate": 9.706178643384822e-06, |
|
"loss": 0.0022, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.09754525870084763, |
|
"learning_rate": 9.701981195433177e-06, |
|
"loss": 0.0016, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.2677933871746063, |
|
"learning_rate": 9.697783747481532e-06, |
|
"loss": 0.0012, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.01193680427968502, |
|
"learning_rate": 9.693586299529886e-06, |
|
"loss": 0.0021, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.00777805782854557, |
|
"learning_rate": 9.68938885157824e-06, |
|
"loss": 0.0005, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.0012337628286331892, |
|
"learning_rate": 9.685191403626595e-06, |
|
"loss": 0.0026, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.009749867022037506, |
|
"learning_rate": 9.68099395567495e-06, |
|
"loss": 0.0089, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.05441896617412567, |
|
"learning_rate": 9.676796507723305e-06, |
|
"loss": 0.0051, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.2501550614833832, |
|
"learning_rate": 9.67259905977166e-06, |
|
"loss": 0.003, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.030330829322338104, |
|
"learning_rate": 9.668401611820014e-06, |
|
"loss": 0.0026, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.02893720008432865, |
|
"learning_rate": 9.664204163868369e-06, |
|
"loss": 0.0012, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.016928590834140778, |
|
"learning_rate": 9.660006715916723e-06, |
|
"loss": 0.0095, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.3214503824710846, |
|
"learning_rate": 9.655809267965078e-06, |
|
"loss": 0.0014, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.005326243583112955, |
|
"learning_rate": 9.651611820013432e-06, |
|
"loss": 0.0026, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.006249762140214443, |
|
"learning_rate": 9.647414372061787e-06, |
|
"loss": 0.0012, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.005905636586248875, |
|
"learning_rate": 9.643216924110142e-06, |
|
"loss": 0.0039, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.4915952682495117, |
|
"learning_rate": 9.639019476158496e-06, |
|
"loss": 0.0029, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.039309654384851456, |
|
"learning_rate": 9.634822028206851e-06, |
|
"loss": 0.0016, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.25026625394821167, |
|
"learning_rate": 9.630624580255205e-06, |
|
"loss": 0.0027, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.059997640550136566, |
|
"learning_rate": 9.62642713230356e-06, |
|
"loss": 0.0031, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.03755790740251541, |
|
"learning_rate": 9.622229684351915e-06, |
|
"loss": 0.0023, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.04567611590027809, |
|
"learning_rate": 9.61803223640027e-06, |
|
"loss": 0.0027, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.13617774844169617, |
|
"learning_rate": 9.613834788448624e-06, |
|
"loss": 0.0023, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.007796242367476225, |
|
"learning_rate": 9.609637340496979e-06, |
|
"loss": 0.0008, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.09431927651166916, |
|
"learning_rate": 9.605439892545333e-06, |
|
"loss": 0.0043, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.00464077852666378, |
|
"learning_rate": 9.601242444593688e-06, |
|
"loss": 0.0032, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.07417366653680801, |
|
"learning_rate": 9.597044996642042e-06, |
|
"loss": 0.004, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.07863020896911621, |
|
"learning_rate": 9.592847548690397e-06, |
|
"loss": 0.0038, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.06590215861797333, |
|
"learning_rate": 9.588650100738752e-06, |
|
"loss": 0.0051, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.5270329117774963, |
|
"learning_rate": 9.584452652787106e-06, |
|
"loss": 0.0081, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.005750787910073996, |
|
"learning_rate": 9.580255204835461e-06, |
|
"loss": 0.002, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.4699457883834839, |
|
"learning_rate": 9.576057756883816e-06, |
|
"loss": 0.0025, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.010430064052343369, |
|
"learning_rate": 9.57186030893217e-06, |
|
"loss": 0.0013, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.006583033595234156, |
|
"learning_rate": 9.567662860980525e-06, |
|
"loss": 0.0027, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.01882992312312126, |
|
"learning_rate": 9.56346541302888e-06, |
|
"loss": 0.0046, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.06240439414978027, |
|
"learning_rate": 9.559267965077234e-06, |
|
"loss": 0.0054, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.872185468673706, |
|
"learning_rate": 9.555070517125589e-06, |
|
"loss": 0.0046, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.18814022839069366, |
|
"learning_rate": 9.550873069173943e-06, |
|
"loss": 0.003, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.18875594437122345, |
|
"learning_rate": 9.546675621222298e-06, |
|
"loss": 0.0031, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.03390906751155853, |
|
"learning_rate": 9.542478173270652e-06, |
|
"loss": 0.0027, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.1369648426771164, |
|
"learning_rate": 9.538280725319007e-06, |
|
"loss": 0.0016, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.02575293742120266, |
|
"learning_rate": 9.534083277367362e-06, |
|
"loss": 0.0011, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.043303512036800385, |
|
"learning_rate": 9.529885829415716e-06, |
|
"loss": 0.0015, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.0034562412183731794, |
|
"learning_rate": 9.525688381464071e-06, |
|
"loss": 0.0068, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.10729283839464188, |
|
"learning_rate": 9.521490933512426e-06, |
|
"loss": 0.0017, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.029321426525712013, |
|
"learning_rate": 9.51729348556078e-06, |
|
"loss": 0.0005, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.26224708557128906, |
|
"learning_rate": 9.513096037609135e-06, |
|
"loss": 0.0056, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.081400066614151, |
|
"learning_rate": 9.50889858965749e-06, |
|
"loss": 0.0012, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.06388130784034729, |
|
"learning_rate": 9.504701141705844e-06, |
|
"loss": 0.0035, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.03783797100186348, |
|
"learning_rate": 9.500503693754199e-06, |
|
"loss": 0.0028, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.0943446159362793, |
|
"learning_rate": 9.496306245802552e-06, |
|
"loss": 0.0015, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.4624231159687042, |
|
"learning_rate": 9.492108797850908e-06, |
|
"loss": 0.0035, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.06161864846944809, |
|
"learning_rate": 9.487911349899263e-06, |
|
"loss": 0.0064, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.027636563405394554, |
|
"learning_rate": 9.483713901947617e-06, |
|
"loss": 0.001, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.06516788899898529, |
|
"learning_rate": 9.479516453995972e-06, |
|
"loss": 0.0044, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.0022635594941675663, |
|
"learning_rate": 9.475319006044326e-06, |
|
"loss": 0.004, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.016013437882065773, |
|
"learning_rate": 9.471121558092681e-06, |
|
"loss": 0.001, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.018587607890367508, |
|
"learning_rate": 9.466924110141034e-06, |
|
"loss": 0.0022, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.13551315665245056, |
|
"learning_rate": 9.46272666218939e-06, |
|
"loss": 0.0009, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.016198860481381416, |
|
"learning_rate": 9.458529214237743e-06, |
|
"loss": 0.0034, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.09964737296104431, |
|
"learning_rate": 9.4543317662861e-06, |
|
"loss": 0.0016, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.01190153881907463, |
|
"learning_rate": 9.450134318334452e-06, |
|
"loss": 0.0023, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.2369023561477661, |
|
"learning_rate": 9.445936870382809e-06, |
|
"loss": 0.01, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.024809740483760834, |
|
"learning_rate": 9.441739422431163e-06, |
|
"loss": 0.001, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.15385492146015167, |
|
"learning_rate": 9.437541974479518e-06, |
|
"loss": 0.0018, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.12145934998989105, |
|
"learning_rate": 9.433344526527873e-06, |
|
"loss": 0.0018, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.04905625805258751, |
|
"learning_rate": 9.429147078576225e-06, |
|
"loss": 0.0036, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.03543395921587944, |
|
"learning_rate": 9.424949630624582e-06, |
|
"loss": 0.0015, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.0014148950576782, |
|
"learning_rate": 9.420752182672935e-06, |
|
"loss": 0.005, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.02649211883544922, |
|
"learning_rate": 9.416554734721291e-06, |
|
"loss": 0.0013, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.03208385780453682, |
|
"learning_rate": 9.412357286769644e-06, |
|
"loss": 0.0007, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.0064937155693769455, |
|
"learning_rate": 9.408159838818e-06, |
|
"loss": 0.0013, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.294610857963562, |
|
"learning_rate": 9.403962390866353e-06, |
|
"loss": 0.002, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.04225017875432968, |
|
"learning_rate": 9.39976494291471e-06, |
|
"loss": 0.0011, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.17220620810985565, |
|
"learning_rate": 9.395567494963064e-06, |
|
"loss": 0.0017, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.01658468507230282, |
|
"learning_rate": 9.391370047011417e-06, |
|
"loss": 0.004, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.09207724779844284, |
|
"learning_rate": 9.387172599059773e-06, |
|
"loss": 0.0033, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.0661146268248558, |
|
"learning_rate": 9.382975151108126e-06, |
|
"loss": 0.0016, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.012653768993914127, |
|
"learning_rate": 9.378777703156483e-06, |
|
"loss": 0.0003, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.038489799946546555, |
|
"learning_rate": 9.374580255204835e-06, |
|
"loss": 0.0009, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.14739686250686646, |
|
"learning_rate": 9.370382807253192e-06, |
|
"loss": 0.004, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.04394904151558876, |
|
"learning_rate": 9.366185359301545e-06, |
|
"loss": 0.0025, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.2980542778968811, |
|
"learning_rate": 9.3619879113499e-06, |
|
"loss": 0.0049, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.6049527525901794, |
|
"learning_rate": 9.357790463398254e-06, |
|
"loss": 0.0068, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.2390802502632141, |
|
"learning_rate": 9.353593015446609e-06, |
|
"loss": 0.0014, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.057861894369125366, |
|
"learning_rate": 9.349395567494965e-06, |
|
"loss": 0.0007, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.010473741218447685, |
|
"learning_rate": 9.345198119543318e-06, |
|
"loss": 0.001, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.6854666471481323, |
|
"learning_rate": 9.341000671591674e-06, |
|
"loss": 0.0037, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.28555673360824585, |
|
"learning_rate": 9.336803223640027e-06, |
|
"loss": 0.0015, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.05343065410852432, |
|
"learning_rate": 9.332605775688383e-06, |
|
"loss": 0.0011, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.040986571460962296, |
|
"learning_rate": 9.328408327736736e-06, |
|
"loss": 0.0019, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.09061778336763382, |
|
"learning_rate": 9.324210879785091e-06, |
|
"loss": 0.0009, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.022660354152321815, |
|
"learning_rate": 9.320013431833446e-06, |
|
"loss": 0.0007, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.019222592934966087, |
|
"learning_rate": 9.3158159838818e-06, |
|
"loss": 0.0012, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.007360487245023251, |
|
"learning_rate": 9.311618535930155e-06, |
|
"loss": 0.001, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.04865903779864311, |
|
"learning_rate": 9.30742108797851e-06, |
|
"loss": 0.0009, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.04993268847465515, |
|
"learning_rate": 9.303223640026866e-06, |
|
"loss": 0.0011, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.049482040107250214, |
|
"learning_rate": 9.299026192075219e-06, |
|
"loss": 0.0006, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.08518658578395844, |
|
"learning_rate": 9.294828744123573e-06, |
|
"loss": 0.0018, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.15694260597229004, |
|
"learning_rate": 9.290631296171928e-06, |
|
"loss": 0.0008, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.2914963662624359, |
|
"learning_rate": 9.286433848220282e-06, |
|
"loss": 0.0046, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.02077990211546421, |
|
"learning_rate": 9.282236400268637e-06, |
|
"loss": 0.0011, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.13449931144714355, |
|
"learning_rate": 9.278038952316992e-06, |
|
"loss": 0.0037, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.07630223780870438, |
|
"learning_rate": 9.273841504365346e-06, |
|
"loss": 0.0029, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.19343329966068268, |
|
"learning_rate": 9.269644056413701e-06, |
|
"loss": 0.002, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.03355003520846367, |
|
"learning_rate": 9.265446608462056e-06, |
|
"loss": 0.0006, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.07213837653398514, |
|
"learning_rate": 9.26124916051041e-06, |
|
"loss": 0.0012, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.11827400326728821, |
|
"learning_rate": 9.257051712558765e-06, |
|
"loss": 0.0014, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.0025086326058954, |
|
"learning_rate": 9.25285426460712e-06, |
|
"loss": 0.0009, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.05490359291434288, |
|
"learning_rate": 9.248656816655474e-06, |
|
"loss": 0.0012, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.2864171862602234, |
|
"learning_rate": 9.244459368703829e-06, |
|
"loss": 0.0028, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.18638712167739868, |
|
"learning_rate": 9.240261920752183e-06, |
|
"loss": 0.0027, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.1295178234577179, |
|
"learning_rate": 9.236064472800538e-06, |
|
"loss": 0.0014, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.4142198860645294, |
|
"learning_rate": 9.231867024848892e-06, |
|
"loss": 0.0017, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.0262783020734787, |
|
"learning_rate": 9.227669576897247e-06, |
|
"loss": 0.0013, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.030707404017448425, |
|
"learning_rate": 9.223472128945602e-06, |
|
"loss": 0.0009, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.016312995925545692, |
|
"learning_rate": 9.219274680993956e-06, |
|
"loss": 0.0013, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.036473799496889114, |
|
"learning_rate": 9.215077233042311e-06, |
|
"loss": 0.0031, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.01363462395966053, |
|
"learning_rate": 9.210879785090666e-06, |
|
"loss": 0.0029, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.06613222509622574, |
|
"learning_rate": 9.20668233713902e-06, |
|
"loss": 0.0037, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.02161981351673603, |
|
"learning_rate": 9.202484889187375e-06, |
|
"loss": 0.0012, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.013683819212019444, |
|
"learning_rate": 9.19828744123573e-06, |
|
"loss": 0.0019, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.13119082152843475, |
|
"learning_rate": 9.194089993284084e-06, |
|
"loss": 0.001, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.08101898431777954, |
|
"learning_rate": 9.189892545332439e-06, |
|
"loss": 0.0019, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.0600835420191288, |
|
"learning_rate": 9.185695097380793e-06, |
|
"loss": 0.0014, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.05432651564478874, |
|
"learning_rate": 9.181497649429148e-06, |
|
"loss": 0.0015, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.06074979156255722, |
|
"learning_rate": 9.177300201477503e-06, |
|
"loss": 0.007, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.44976115226745605, |
|
"learning_rate": 9.173102753525857e-06, |
|
"loss": 0.0025, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.005641541443765163, |
|
"learning_rate": 9.168905305574212e-06, |
|
"loss": 0.0019, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.006832782179117203, |
|
"learning_rate": 9.164707857622566e-06, |
|
"loss": 0.0013, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.07477637380361557, |
|
"learning_rate": 9.160510409670921e-06, |
|
"loss": 0.0027, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.07955386489629745, |
|
"learning_rate": 9.156312961719276e-06, |
|
"loss": 0.001, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.009969334118068218, |
|
"learning_rate": 9.15211551376763e-06, |
|
"loss": 0.0015, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.024671079590916634, |
|
"learning_rate": 9.147918065815985e-06, |
|
"loss": 0.0017, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.23273734748363495, |
|
"learning_rate": 9.14372061786434e-06, |
|
"loss": 0.0012, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.00974265392869711, |
|
"learning_rate": 9.139523169912694e-06, |
|
"loss": 0.0046, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.5019146203994751, |
|
"learning_rate": 9.135325721961049e-06, |
|
"loss": 0.0024, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.0385243222117424, |
|
"learning_rate": 9.131128274009403e-06, |
|
"loss": 0.0009, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.005131669342517853, |
|
"learning_rate": 9.126930826057758e-06, |
|
"loss": 0.0014, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.06231481209397316, |
|
"learning_rate": 9.122733378106113e-06, |
|
"loss": 0.0007, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.057179391384124756, |
|
"learning_rate": 9.118535930154467e-06, |
|
"loss": 0.004, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.06546686589717865, |
|
"learning_rate": 9.114338482202822e-06, |
|
"loss": 0.0018, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.003714991034939885, |
|
"learning_rate": 9.110141034251176e-06, |
|
"loss": 0.0005, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.06414463371038437, |
|
"learning_rate": 9.105943586299531e-06, |
|
"loss": 0.0005, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.14546462893486023, |
|
"learning_rate": 9.101746138347886e-06, |
|
"loss": 0.0021, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.05881655961275101, |
|
"learning_rate": 9.09754869039624e-06, |
|
"loss": 0.0015, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.013162361457943916, |
|
"learning_rate": 9.093351242444595e-06, |
|
"loss": 0.0022, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.017041141167283058, |
|
"learning_rate": 9.08915379449295e-06, |
|
"loss": 0.0018, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.09564032405614853, |
|
"learning_rate": 9.084956346541304e-06, |
|
"loss": 0.0042, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.014800913631916046, |
|
"learning_rate": 9.080758898589657e-06, |
|
"loss": 0.0009, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.17374302446842194, |
|
"learning_rate": 9.076561450638013e-06, |
|
"loss": 0.0013, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.035490747541189194, |
|
"learning_rate": 9.072364002686368e-06, |
|
"loss": 0.0038, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.1741674244403839, |
|
"learning_rate": 9.068166554734723e-06, |
|
"loss": 0.0026, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.04081317409873009, |
|
"learning_rate": 9.063969106783077e-06, |
|
"loss": 0.0014, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.2914838194847107, |
|
"learning_rate": 9.059771658831432e-06, |
|
"loss": 0.0034, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.02334975078701973, |
|
"learning_rate": 9.055574210879786e-06, |
|
"loss": 0.0041, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.05057836323976517, |
|
"learning_rate": 9.05137676292814e-06, |
|
"loss": 0.0019, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.16913112998008728, |
|
"learning_rate": 9.047179314976496e-06, |
|
"loss": 0.0023, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.03450404852628708, |
|
"learning_rate": 9.042981867024849e-06, |
|
"loss": 0.0047, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.007191210053861141, |
|
"learning_rate": 9.038784419073205e-06, |
|
"loss": 0.0058, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.0037567466497421265, |
|
"learning_rate": 9.034586971121558e-06, |
|
"loss": 0.004, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.6598537564277649, |
|
"learning_rate": 9.030389523169914e-06, |
|
"loss": 0.0025, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.021132759749889374, |
|
"learning_rate": 9.026192075218269e-06, |
|
"loss": 0.002, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.017818361520767212, |
|
"learning_rate": 9.021994627266623e-06, |
|
"loss": 0.0009, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.016008947044610977, |
|
"learning_rate": 9.017797179314978e-06, |
|
"loss": 0.0005, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.009955563582479954, |
|
"learning_rate": 9.013599731363331e-06, |
|
"loss": 0.0012, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.11668864637613297, |
|
"learning_rate": 9.009402283411687e-06, |
|
"loss": 0.0032, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.05549318715929985, |
|
"learning_rate": 9.00520483546004e-06, |
|
"loss": 0.0009, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.026752743870019913, |
|
"learning_rate": 9.001007387508396e-06, |
|
"loss": 0.0018, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.00796794705092907, |
|
"learning_rate": 8.99680993955675e-06, |
|
"loss": 0.0007, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.02195807732641697, |
|
"learning_rate": 8.992612491605106e-06, |
|
"loss": 0.0031, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.007499654311686754, |
|
"learning_rate": 8.988415043653459e-06, |
|
"loss": 0.001, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.010224456898868084, |
|
"learning_rate": 8.984217595701813e-06, |
|
"loss": 0.0011, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.031140731647610664, |
|
"learning_rate": 8.98002014775017e-06, |
|
"loss": 0.0007, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.012480244040489197, |
|
"learning_rate": 8.975822699798522e-06, |
|
"loss": 0.0037, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.13071498274803162, |
|
"learning_rate": 8.971625251846879e-06, |
|
"loss": 0.0008, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.22285941243171692, |
|
"learning_rate": 8.967427803895232e-06, |
|
"loss": 0.0016, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.2518530488014221, |
|
"learning_rate": 8.963230355943588e-06, |
|
"loss": 0.0026, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.11510931700468063, |
|
"learning_rate": 8.959032907991941e-06, |
|
"loss": 0.0096, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.010645145550370216, |
|
"learning_rate": 8.954835460040297e-06, |
|
"loss": 0.0013, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.01338688749819994, |
|
"learning_rate": 8.95063801208865e-06, |
|
"loss": 0.0023, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.01874951645731926, |
|
"learning_rate": 8.946440564137005e-06, |
|
"loss": 0.0031, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.5476735234260559, |
|
"learning_rate": 8.94224311618536e-06, |
|
"loss": 0.0017, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.01938123069703579, |
|
"learning_rate": 8.938045668233714e-06, |
|
"loss": 0.0003, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.17968221008777618, |
|
"learning_rate": 8.93384822028207e-06, |
|
"loss": 0.0032, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.07702717930078506, |
|
"learning_rate": 8.929650772330423e-06, |
|
"loss": 0.0012, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.043315693736076355, |
|
"learning_rate": 8.92545332437878e-06, |
|
"loss": 0.002, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.009717993438243866, |
|
"learning_rate": 8.921255876427133e-06, |
|
"loss": 0.0008, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.0067781913094222546, |
|
"learning_rate": 8.917058428475487e-06, |
|
"loss": 0.0014, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.011173662729561329, |
|
"learning_rate": 8.912860980523842e-06, |
|
"loss": 0.0018, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.07794959843158722, |
|
"learning_rate": 8.908663532572196e-06, |
|
"loss": 0.0025, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.005871940404176712, |
|
"learning_rate": 8.904466084620551e-06, |
|
"loss": 0.003, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.06580822169780731, |
|
"learning_rate": 8.900268636668906e-06, |
|
"loss": 0.0054, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.10812622308731079, |
|
"learning_rate": 8.89607118871726e-06, |
|
"loss": 0.0014, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.08168531954288483, |
|
"learning_rate": 8.891873740765615e-06, |
|
"loss": 0.0023, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.15115512907505035, |
|
"learning_rate": 8.887676292813971e-06, |
|
"loss": 0.0019, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.04277985170483589, |
|
"learning_rate": 8.883478844862324e-06, |
|
"loss": 0.0011, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.0317169725894928, |
|
"learning_rate": 8.879281396910679e-06, |
|
"loss": 0.0009, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.01838303916156292, |
|
"learning_rate": 8.875083948959033e-06, |
|
"loss": 0.0011, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.0044249724596738815, |
|
"learning_rate": 8.870886501007388e-06, |
|
"loss": 0.0009, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.03765324875712395, |
|
"learning_rate": 8.866689053055743e-06, |
|
"loss": 0.0039, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.29022645950317383, |
|
"learning_rate": 8.862491605104097e-06, |
|
"loss": 0.0055, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.5368918776512146, |
|
"learning_rate": 8.858294157152452e-06, |
|
"loss": 0.0072, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.0023665865883231163, |
|
"learning_rate": 8.854096709200806e-06, |
|
"loss": 0.0018, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.07755130529403687, |
|
"learning_rate": 8.849899261249161e-06, |
|
"loss": 0.0015, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.0149016622453928, |
|
"learning_rate": 8.845701813297516e-06, |
|
"loss": 0.0005, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.0034178367350250483, |
|
"learning_rate": 8.84150436534587e-06, |
|
"loss": 0.0003, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.11365914344787598, |
|
"learning_rate": 8.837306917394225e-06, |
|
"loss": 0.003, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.11521656811237335, |
|
"learning_rate": 8.83310946944258e-06, |
|
"loss": 0.001, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.10270581394433975, |
|
"learning_rate": 8.828912021490934e-06, |
|
"loss": 0.0015, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.17976179718971252, |
|
"learning_rate": 8.824714573539289e-06, |
|
"loss": 0.0007, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.07761932164430618, |
|
"learning_rate": 8.820936870382809e-06, |
|
"loss": 0.0029, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.01468763779848814, |
|
"learning_rate": 8.816739422431163e-06, |
|
"loss": 0.0005, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.009555538184940815, |
|
"learning_rate": 8.812541974479516e-06, |
|
"loss": 0.0018, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.01784587651491165, |
|
"learning_rate": 8.808344526527873e-06, |
|
"loss": 0.0047, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.35948890447616577, |
|
"learning_rate": 8.804147078576226e-06, |
|
"loss": 0.0044, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.052703533321619034, |
|
"learning_rate": 8.799949630624582e-06, |
|
"loss": 0.0006, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.171566441655159, |
|
"learning_rate": 8.795752182672935e-06, |
|
"loss": 0.0023, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.3138861060142517, |
|
"learning_rate": 8.791554734721291e-06, |
|
"loss": 0.0007, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.15738941729068756, |
|
"learning_rate": 8.787357286769644e-06, |
|
"loss": 0.0022, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.03881895914673805, |
|
"learning_rate": 8.783159838818e-06, |
|
"loss": 0.004, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.027307789772748947, |
|
"learning_rate": 8.778962390866353e-06, |
|
"loss": 0.0009, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.009243845008313656, |
|
"learning_rate": 8.774764942914708e-06, |
|
"loss": 0.0021, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.010441136546432972, |
|
"learning_rate": 8.770567494963064e-06, |
|
"loss": 0.0003, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.053923387080430984, |
|
"learning_rate": 8.766370047011417e-06, |
|
"loss": 0.0033, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.0529450997710228, |
|
"learning_rate": 8.762172599059773e-06, |
|
"loss": 0.0017, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.017623068764805794, |
|
"learning_rate": 8.757975151108126e-06, |
|
"loss": 0.007, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.06934964656829834, |
|
"learning_rate": 8.753777703156483e-06, |
|
"loss": 0.0017, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.4888018071651459, |
|
"learning_rate": 8.749580255204836e-06, |
|
"loss": 0.0018, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.3019413948059082, |
|
"learning_rate": 8.74538280725319e-06, |
|
"loss": 0.0078, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.20448799431324005, |
|
"learning_rate": 8.741185359301545e-06, |
|
"loss": 0.0026, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.15655213594436646, |
|
"learning_rate": 8.7369879113499e-06, |
|
"loss": 0.0007, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.00702297780662775, |
|
"learning_rate": 8.732790463398254e-06, |
|
"loss": 0.0022, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.03202832117676735, |
|
"learning_rate": 8.728593015446609e-06, |
|
"loss": 0.0048, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.3602777123451233, |
|
"learning_rate": 8.724395567494965e-06, |
|
"loss": 0.0027, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.035856008529663086, |
|
"learning_rate": 8.720198119543318e-06, |
|
"loss": 0.0011, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.02676764689385891, |
|
"learning_rate": 8.716000671591674e-06, |
|
"loss": 0.0006, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.06010160595178604, |
|
"learning_rate": 8.711803223640027e-06, |
|
"loss": 0.0023, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.33468106389045715, |
|
"learning_rate": 8.707605775688382e-06, |
|
"loss": 0.0013, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.07244027405977249, |
|
"learning_rate": 8.703408327736736e-06, |
|
"loss": 0.001, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.1765681654214859, |
|
"learning_rate": 8.699210879785091e-06, |
|
"loss": 0.0021, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.0064493464305996895, |
|
"learning_rate": 8.695013431833446e-06, |
|
"loss": 0.0021, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.12435489892959595, |
|
"learning_rate": 8.6908159838818e-06, |
|
"loss": 0.0017, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.05187215656042099, |
|
"learning_rate": 8.686618535930155e-06, |
|
"loss": 0.001, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.01066290121525526, |
|
"learning_rate": 8.68242108797851e-06, |
|
"loss": 0.0011, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.04758090898394585, |
|
"learning_rate": 8.678223640026866e-06, |
|
"loss": 0.0005, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.02921927347779274, |
|
"learning_rate": 8.674026192075219e-06, |
|
"loss": 0.0021, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.009303642436861992, |
|
"learning_rate": 8.669828744123573e-06, |
|
"loss": 0.0007, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.5222365856170654, |
|
"learning_rate": 8.665631296171928e-06, |
|
"loss": 0.007, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.005165881011635065, |
|
"learning_rate": 8.661433848220283e-06, |
|
"loss": 0.0018, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.8817521333694458, |
|
"learning_rate": 8.657236400268637e-06, |
|
"loss": 0.002, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.07392257452011108, |
|
"learning_rate": 8.653038952316992e-06, |
|
"loss": 0.0015, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.039199940860271454, |
|
"learning_rate": 8.648841504365346e-06, |
|
"loss": 0.0007, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.2725190222263336, |
|
"learning_rate": 8.644644056413701e-06, |
|
"loss": 0.0022, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.012581940740346909, |
|
"learning_rate": 8.640446608462056e-06, |
|
"loss": 0.0073, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.008940734900534153, |
|
"learning_rate": 8.63624916051041e-06, |
|
"loss": 0.0009, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.1721634566783905, |
|
"learning_rate": 8.632051712558765e-06, |
|
"loss": 0.0017, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.31307950615882874, |
|
"learning_rate": 8.62785426460712e-06, |
|
"loss": 0.0033, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.18467161059379578, |
|
"learning_rate": 8.623656816655474e-06, |
|
"loss": 0.0029, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.04656468331813812, |
|
"learning_rate": 8.619879113498994e-06, |
|
"loss": 0.0018, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.0923081561923027, |
|
"learning_rate": 8.615681665547347e-06, |
|
"loss": 0.0011, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.15977904200553894, |
|
"learning_rate": 8.611484217595703e-06, |
|
"loss": 0.0027, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.10176800936460495, |
|
"learning_rate": 8.607286769644058e-06, |
|
"loss": 0.0024, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.057700444012880325, |
|
"learning_rate": 8.603089321692411e-06, |
|
"loss": 0.0031, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.03322164714336395, |
|
"learning_rate": 8.598891873740767e-06, |
|
"loss": 0.0015, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.018284544348716736, |
|
"learning_rate": 8.59469442578912e-06, |
|
"loss": 0.0022, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.02011759579181671, |
|
"learning_rate": 8.590496977837476e-06, |
|
"loss": 0.0008, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.22747820615768433, |
|
"learning_rate": 8.58629952988583e-06, |
|
"loss": 0.0009, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.08898269385099411, |
|
"learning_rate": 8.582102081934186e-06, |
|
"loss": 0.0018, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.3748880922794342, |
|
"learning_rate": 8.577904633982539e-06, |
|
"loss": 0.0045, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.007508324924856424, |
|
"learning_rate": 8.573707186030893e-06, |
|
"loss": 0.004, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.08410963416099548, |
|
"learning_rate": 8.569509738079248e-06, |
|
"loss": 0.0049, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.03300091251730919, |
|
"learning_rate": 8.565312290127602e-06, |
|
"loss": 0.0013, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.030519753694534302, |
|
"learning_rate": 8.561114842175959e-06, |
|
"loss": 0.0035, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.013716090470552444, |
|
"learning_rate": 8.556917394224312e-06, |
|
"loss": 0.0012, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.07122453302145004, |
|
"learning_rate": 8.552719946272668e-06, |
|
"loss": 0.0023, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.006021963898092508, |
|
"learning_rate": 8.548522498321021e-06, |
|
"loss": 0.0012, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.007859396748244762, |
|
"learning_rate": 8.544325050369377e-06, |
|
"loss": 0.0005, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.09434602409601212, |
|
"learning_rate": 8.54012760241773e-06, |
|
"loss": 0.0011, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.01787838153541088, |
|
"learning_rate": 8.535930154466085e-06, |
|
"loss": 0.0011, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.018500063568353653, |
|
"learning_rate": 8.53173270651444e-06, |
|
"loss": 0.0017, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.0013119007926434278, |
|
"learning_rate": 8.527535258562794e-06, |
|
"loss": 0.0013, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.02389811910688877, |
|
"learning_rate": 8.523337810611149e-06, |
|
"loss": 0.0011, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.0021516175474971533, |
|
"learning_rate": 8.519140362659503e-06, |
|
"loss": 0.0056, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.016399670392274857, |
|
"learning_rate": 8.51494291470786e-06, |
|
"loss": 0.001, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.07569669187068939, |
|
"learning_rate": 8.510745466756212e-06, |
|
"loss": 0.0048, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.002499897498637438, |
|
"learning_rate": 8.506548018804569e-06, |
|
"loss": 0.0015, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.0031851092353463173, |
|
"learning_rate": 8.502350570852922e-06, |
|
"loss": 0.0002, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.6716926693916321, |
|
"learning_rate": 8.498153122901276e-06, |
|
"loss": 0.0052, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.026271577924489975, |
|
"learning_rate": 8.493955674949631e-06, |
|
"loss": 0.0017, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.47044169902801514, |
|
"learning_rate": 8.489758226997986e-06, |
|
"loss": 0.0037, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.175507590174675, |
|
"learning_rate": 8.48556077904634e-06, |
|
"loss": 0.0011, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.04265287518501282, |
|
"learning_rate": 8.481363331094695e-06, |
|
"loss": 0.0057, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.06122719869017601, |
|
"learning_rate": 8.47716588314305e-06, |
|
"loss": 0.0012, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.09644156694412231, |
|
"learning_rate": 8.472968435191404e-06, |
|
"loss": 0.0008, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.11276187747716904, |
|
"learning_rate": 8.468770987239759e-06, |
|
"loss": 0.0017, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.02469436265528202, |
|
"learning_rate": 8.464573539288113e-06, |
|
"loss": 0.0046, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.7709282040596008, |
|
"learning_rate": 8.460376091336468e-06, |
|
"loss": 0.008, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.4117774963378906, |
|
"learning_rate": 8.456178643384822e-06, |
|
"loss": 0.0024, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.07191318273544312, |
|
"learning_rate": 8.451981195433177e-06, |
|
"loss": 0.0011, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.014883830212056637, |
|
"learning_rate": 8.447783747481532e-06, |
|
"loss": 0.001, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.0047638434916734695, |
|
"learning_rate": 8.443586299529886e-06, |
|
"loss": 0.0012, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.19078807532787323, |
|
"learning_rate": 8.439388851578241e-06, |
|
"loss": 0.0011, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.07204841822385788, |
|
"learning_rate": 8.435191403626596e-06, |
|
"loss": 0.0018, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.6154640913009644, |
|
"learning_rate": 8.43099395567495e-06, |
|
"loss": 0.0038, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.02338569425046444, |
|
"learning_rate": 8.426796507723305e-06, |
|
"loss": 0.0016, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.13649843633174896, |
|
"learning_rate": 8.423018804566823e-06, |
|
"loss": 0.0047, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.0963369682431221, |
|
"learning_rate": 8.41882135661518e-06, |
|
"loss": 0.0006, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.31827324628829956, |
|
"learning_rate": 8.414623908663532e-06, |
|
"loss": 0.0058, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.006173072848469019, |
|
"learning_rate": 8.410426460711889e-06, |
|
"loss": 0.0014, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.13730692863464355, |
|
"learning_rate": 8.406229012760242e-06, |
|
"loss": 0.0009, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.004133482463657856, |
|
"learning_rate": 8.402031564808596e-06, |
|
"loss": 0.0033, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.13467635214328766, |
|
"learning_rate": 8.397834116856953e-06, |
|
"loss": 0.0009, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.07814080268144608, |
|
"learning_rate": 8.393636668905305e-06, |
|
"loss": 0.0016, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.011251374147832394, |
|
"learning_rate": 8.389439220953662e-06, |
|
"loss": 0.0009, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.001605527475476265, |
|
"learning_rate": 8.385241773002015e-06, |
|
"loss": 0.0013, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.008475171402096748, |
|
"learning_rate": 8.381044325050371e-06, |
|
"loss": 0.0008, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.018021291121840477, |
|
"learning_rate": 8.376846877098724e-06, |
|
"loss": 0.0004, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.006848904769867659, |
|
"learning_rate": 8.37264942914708e-06, |
|
"loss": 0.0014, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.025030123069882393, |
|
"learning_rate": 8.368451981195433e-06, |
|
"loss": 0.0014, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.4155631959438324, |
|
"learning_rate": 8.364254533243788e-06, |
|
"loss": 0.0015, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.005679064895957708, |
|
"learning_rate": 8.360057085292142e-06, |
|
"loss": 0.003, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.009578896686434746, |
|
"learning_rate": 8.355859637340497e-06, |
|
"loss": 0.0048, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.011455328203737736, |
|
"learning_rate": 8.351662189388853e-06, |
|
"loss": 0.0007, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.02022560127079487, |
|
"learning_rate": 8.347464741437206e-06, |
|
"loss": 0.002, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.013254129327833652, |
|
"learning_rate": 8.343267293485563e-06, |
|
"loss": 0.0008, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.03954704850912094, |
|
"learning_rate": 8.339069845533915e-06, |
|
"loss": 0.0021, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.03727477416396141, |
|
"learning_rate": 8.334872397582272e-06, |
|
"loss": 0.0021, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.005523467902094126, |
|
"learning_rate": 8.330674949630625e-06, |
|
"loss": 0.0007, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.022439954802393913, |
|
"learning_rate": 8.32647750167898e-06, |
|
"loss": 0.0021, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.012325704097747803, |
|
"learning_rate": 8.322280053727334e-06, |
|
"loss": 0.0014, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.12552155554294586, |
|
"learning_rate": 8.318082605775689e-06, |
|
"loss": 0.0011, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.017489569261670113, |
|
"learning_rate": 8.313885157824043e-06, |
|
"loss": 0.0011, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.052284810692071915, |
|
"learning_rate": 8.309687709872398e-06, |
|
"loss": 0.001, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.05398709326982498, |
|
"learning_rate": 8.305490261920754e-06, |
|
"loss": 0.0014, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.25828316807746887, |
|
"learning_rate": 8.301292813969107e-06, |
|
"loss": 0.0026, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.01474198792129755, |
|
"learning_rate": 8.297095366017462e-06, |
|
"loss": 0.0017, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.0038133654743433, |
|
"learning_rate": 8.292897918065816e-06, |
|
"loss": 0.0009, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.021653324365615845, |
|
"learning_rate": 8.288700470114171e-06, |
|
"loss": 0.0008, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.017369549721479416, |
|
"learning_rate": 8.284503022162526e-06, |
|
"loss": 0.0011, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.18425977230072021, |
|
"learning_rate": 8.28030557421088e-06, |
|
"loss": 0.0015, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.01438868883997202, |
|
"learning_rate": 8.276108126259235e-06, |
|
"loss": 0.0006, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.0022420857567340136, |
|
"learning_rate": 8.27191067830759e-06, |
|
"loss": 0.0002, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.12319325655698776, |
|
"learning_rate": 8.267713230355944e-06, |
|
"loss": 0.0051, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.01060109119862318, |
|
"learning_rate": 8.263515782404299e-06, |
|
"loss": 0.0003, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.001791100949048996, |
|
"learning_rate": 8.259318334452653e-06, |
|
"loss": 0.0016, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.009030510671436787, |
|
"learning_rate": 8.255120886501008e-06, |
|
"loss": 0.0004, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.1018250361084938, |
|
"learning_rate": 8.250923438549362e-06, |
|
"loss": 0.0018, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.11478858441114426, |
|
"learning_rate": 8.246725990597717e-06, |
|
"loss": 0.0004, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.08903089910745621, |
|
"learning_rate": 8.242528542646072e-06, |
|
"loss": 0.0018, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.109068363904953, |
|
"learning_rate": 8.238331094694426e-06, |
|
"loss": 0.0007, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.005220823921263218, |
|
"learning_rate": 8.234133646742781e-06, |
|
"loss": 0.0044, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.09990601986646652, |
|
"learning_rate": 8.229936198791136e-06, |
|
"loss": 0.0012, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.07386545091867447, |
|
"learning_rate": 8.22573875083949e-06, |
|
"loss": 0.0008, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.1059875562787056, |
|
"learning_rate": 8.221541302887845e-06, |
|
"loss": 0.0007, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.07418499141931534, |
|
"learning_rate": 8.2173438549362e-06, |
|
"loss": 0.0014, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.01813233084976673, |
|
"learning_rate": 8.213146406984554e-06, |
|
"loss": 0.0007, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.01308862492442131, |
|
"learning_rate": 8.208948959032909e-06, |
|
"loss": 0.0002, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.6047486066818237, |
|
"learning_rate": 8.204751511081263e-06, |
|
"loss": 0.0054, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.030174901708960533, |
|
"learning_rate": 8.200554063129618e-06, |
|
"loss": 0.0007, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.07209368050098419, |
|
"learning_rate": 8.196356615177972e-06, |
|
"loss": 0.0011, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.6904421448707581, |
|
"learning_rate": 8.192159167226327e-06, |
|
"loss": 0.0018, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.11670655757188797, |
|
"learning_rate": 8.187961719274682e-06, |
|
"loss": 0.0028, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.016770286485552788, |
|
"learning_rate": 8.183764271323036e-06, |
|
"loss": 0.0005, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.021145779639482498, |
|
"learning_rate": 8.179566823371391e-06, |
|
"loss": 0.0018, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.02627403475344181, |
|
"learning_rate": 8.175369375419746e-06, |
|
"loss": 0.0014, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.01143514271825552, |
|
"learning_rate": 8.1711719274681e-06, |
|
"loss": 0.0007, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.05208797752857208, |
|
"learning_rate": 8.166974479516455e-06, |
|
"loss": 0.0023, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.08176781982183456, |
|
"learning_rate": 8.16277703156481e-06, |
|
"loss": 0.0028, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.0852191299200058, |
|
"learning_rate": 8.158579583613164e-06, |
|
"loss": 0.0023, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.007911001332104206, |
|
"learning_rate": 8.154382135661519e-06, |
|
"loss": 0.0032, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.012264366261661053, |
|
"learning_rate": 8.150184687709873e-06, |
|
"loss": 0.0011, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.012588711455464363, |
|
"learning_rate": 8.145987239758228e-06, |
|
"loss": 0.0003, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.04339655116200447, |
|
"learning_rate": 8.141789791806583e-06, |
|
"loss": 0.0016, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.009127240628004074, |
|
"learning_rate": 8.137592343854937e-06, |
|
"loss": 0.0029, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.05993298068642616, |
|
"learning_rate": 8.133394895903292e-06, |
|
"loss": 0.0008, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.0026275282725691795, |
|
"learning_rate": 8.129197447951646e-06, |
|
"loss": 0.0009, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.04943431168794632, |
|
"learning_rate": 8.125000000000001e-06, |
|
"loss": 0.0014, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.018763918429613113, |
|
"learning_rate": 8.120802552048356e-06, |
|
"loss": 0.0007, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.5426563620567322, |
|
"learning_rate": 8.11660510409671e-06, |
|
"loss": 0.0021, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.4148786962032318, |
|
"learning_rate": 8.112407656145065e-06, |
|
"loss": 0.0014, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.479037344455719, |
|
"learning_rate": 8.10821020819342e-06, |
|
"loss": 0.0034, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.005279048811644316, |
|
"learning_rate": 8.104012760241774e-06, |
|
"loss": 0.0015, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.003773655276745558, |
|
"learning_rate": 8.099815312290129e-06, |
|
"loss": 0.0011, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.0063260081224143505, |
|
"learning_rate": 8.095617864338483e-06, |
|
"loss": 0.0011, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.030879395082592964, |
|
"learning_rate": 8.091420416386836e-06, |
|
"loss": 0.0092, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.020290937274694443, |
|
"learning_rate": 8.087222968435193e-06, |
|
"loss": 0.0005, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.007667260244488716, |
|
"learning_rate": 8.083025520483545e-06, |
|
"loss": 0.0006, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.004535841289907694, |
|
"learning_rate": 8.078828072531902e-06, |
|
"loss": 0.0005, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.011321828700602055, |
|
"learning_rate": 8.074630624580256e-06, |
|
"loss": 0.002, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.3192904591560364, |
|
"learning_rate": 8.070433176628611e-06, |
|
"loss": 0.0031, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.017921652644872665, |
|
"learning_rate": 8.066235728676966e-06, |
|
"loss": 0.0008, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.0025262893177568913, |
|
"learning_rate": 8.06203828072532e-06, |
|
"loss": 0.0006, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.013485982082784176, |
|
"learning_rate": 8.057840832773675e-06, |
|
"loss": 0.0016, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.004218879155814648, |
|
"learning_rate": 8.053643384822028e-06, |
|
"loss": 0.0004, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.1440621167421341, |
|
"learning_rate": 8.049445936870384e-06, |
|
"loss": 0.0023, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.1656440794467926, |
|
"learning_rate": 8.045248488918737e-06, |
|
"loss": 0.0004, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.05483277887105942, |
|
"learning_rate": 8.041051040967093e-06, |
|
"loss": 0.0022, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.011686104349792004, |
|
"learning_rate": 8.036853593015446e-06, |
|
"loss": 0.0002, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.23753832280635834, |
|
"learning_rate": 8.032656145063803e-06, |
|
"loss": 0.0022, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.14190416038036346, |
|
"learning_rate": 8.028458697112157e-06, |
|
"loss": 0.001, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.04893433302640915, |
|
"learning_rate": 8.024261249160512e-06, |
|
"loss": 0.0019, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.06139785423874855, |
|
"learning_rate": 8.020063801208866e-06, |
|
"loss": 0.0023, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.31157851219177246, |
|
"learning_rate": 8.01586635325722e-06, |
|
"loss": 0.0031, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.01513377670198679, |
|
"learning_rate": 8.011668905305576e-06, |
|
"loss": 0.0032, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.014633961953222752, |
|
"learning_rate": 8.007471457353929e-06, |
|
"loss": 0.0073, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.03786788135766983, |
|
"learning_rate": 8.003274009402285e-06, |
|
"loss": 0.0005, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.013805943541228771, |
|
"learning_rate": 7.999076561450638e-06, |
|
"loss": 0.0006, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.423827201128006, |
|
"learning_rate": 7.994879113498994e-06, |
|
"loss": 0.0024, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.15604566037654877, |
|
"learning_rate": 7.990681665547347e-06, |
|
"loss": 0.004, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.1691051870584488, |
|
"learning_rate": 7.986484217595702e-06, |
|
"loss": 0.0007, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.10865192860364914, |
|
"learning_rate": 7.982286769644058e-06, |
|
"loss": 0.0008, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.002326708287000656, |
|
"learning_rate": 7.978089321692411e-06, |
|
"loss": 0.0047, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.0631171315908432, |
|
"learning_rate": 7.973891873740767e-06, |
|
"loss": 0.001, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.003491542534902692, |
|
"learning_rate": 7.96969442578912e-06, |
|
"loss": 0.0019, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.023969637230038643, |
|
"learning_rate": 7.965496977837476e-06, |
|
"loss": 0.0013, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.1500246375799179, |
|
"learning_rate": 7.96129952988583e-06, |
|
"loss": 0.0012, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.05663210526108742, |
|
"learning_rate": 7.957102081934186e-06, |
|
"loss": 0.002, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.004468943923711777, |
|
"learning_rate": 7.952904633982539e-06, |
|
"loss": 0.0006, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.049168992787599564, |
|
"learning_rate": 7.948707186030893e-06, |
|
"loss": 0.0028, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.017784954980015755, |
|
"learning_rate": 7.944509738079248e-06, |
|
"loss": 0.0034, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.0007347997161559761, |
|
"learning_rate": 7.940312290127602e-06, |
|
"loss": 0.0009, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.07714752852916718, |
|
"learning_rate": 7.936114842175959e-06, |
|
"loss": 0.0005, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.24701713025569916, |
|
"learning_rate": 7.931917394224312e-06, |
|
"loss": 0.0027, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.18050765991210938, |
|
"learning_rate": 7.927719946272668e-06, |
|
"loss": 0.0021, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.10449690371751785, |
|
"learning_rate": 7.923522498321021e-06, |
|
"loss": 0.0026, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.0794496163725853, |
|
"learning_rate": 7.919325050369376e-06, |
|
"loss": 0.002, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.005070607643574476, |
|
"learning_rate": 7.91512760241773e-06, |
|
"loss": 0.0015, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.23382118344306946, |
|
"learning_rate": 7.910930154466085e-06, |
|
"loss": 0.0014, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.023488784208893776, |
|
"learning_rate": 7.90673270651444e-06, |
|
"loss": 0.001, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.08839926868677139, |
|
"learning_rate": 7.902535258562794e-06, |
|
"loss": 0.0004, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.11559606343507767, |
|
"learning_rate": 7.898337810611149e-06, |
|
"loss": 0.0061, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.0005539359408430755, |
|
"learning_rate": 7.894140362659503e-06, |
|
"loss": 0.0017, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.04608330875635147, |
|
"learning_rate": 7.88994291470786e-06, |
|
"loss": 0.0006, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.029279248788952827, |
|
"learning_rate": 7.885745466756213e-06, |
|
"loss": 0.002, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.055397383868694305, |
|
"learning_rate": 7.881548018804567e-06, |
|
"loss": 0.004, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.014438183978199959, |
|
"learning_rate": 7.877350570852922e-06, |
|
"loss": 0.0007, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.00333590479567647, |
|
"learning_rate": 7.873153122901276e-06, |
|
"loss": 0.002, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.01975451223552227, |
|
"learning_rate": 7.868955674949631e-06, |
|
"loss": 0.0017, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.2275656759738922, |
|
"learning_rate": 7.864758226997986e-06, |
|
"loss": 0.0007, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.009497923776507378, |
|
"learning_rate": 7.86056077904634e-06, |
|
"loss": 0.0005, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.12707078456878662, |
|
"learning_rate": 7.856363331094695e-06, |
|
"loss": 0.0022, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.13071304559707642, |
|
"learning_rate": 7.85216588314305e-06, |
|
"loss": 0.0012, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.12163705378770828, |
|
"learning_rate": 7.847968435191404e-06, |
|
"loss": 0.0025, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.12689749896526337, |
|
"learning_rate": 7.843770987239759e-06, |
|
"loss": 0.0018, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.24196821451187134, |
|
"learning_rate": 7.839573539288113e-06, |
|
"loss": 0.0024, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.0556950718164444, |
|
"learning_rate": 7.835376091336468e-06, |
|
"loss": 0.001, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.004124140832573175, |
|
"learning_rate": 7.831178643384823e-06, |
|
"loss": 0.0006, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.03047008253633976, |
|
"learning_rate": 7.826981195433177e-06, |
|
"loss": 0.002, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.6133390069007874, |
|
"learning_rate": 7.822783747481532e-06, |
|
"loss": 0.0014, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.1056409627199173, |
|
"learning_rate": 7.818586299529886e-06, |
|
"loss": 0.0018, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.02283824421465397, |
|
"learning_rate": 7.814388851578241e-06, |
|
"loss": 0.0018, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.04858771339058876, |
|
"learning_rate": 7.810191403626596e-06, |
|
"loss": 0.0027, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.001151357777416706, |
|
"learning_rate": 7.80599395567495e-06, |
|
"loss": 0.0011, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.022891288623213768, |
|
"learning_rate": 7.801796507723305e-06, |
|
"loss": 0.0008, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.2969476580619812, |
|
"learning_rate": 7.79759905977166e-06, |
|
"loss": 0.0008, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.07708311080932617, |
|
"learning_rate": 7.793401611820014e-06, |
|
"loss": 0.003, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.048849739134311676, |
|
"learning_rate": 7.789204163868369e-06, |
|
"loss": 0.0026, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.04516473039984703, |
|
"learning_rate": 7.785006715916723e-06, |
|
"loss": 0.002, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.0004609221068676561, |
|
"learning_rate": 7.780809267965078e-06, |
|
"loss": 0.0015, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.04255954921245575, |
|
"learning_rate": 7.776611820013433e-06, |
|
"loss": 0.0022, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.22186703979969025, |
|
"learning_rate": 7.772414372061787e-06, |
|
"loss": 0.0015, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.014675665646791458, |
|
"learning_rate": 7.768216924110142e-06, |
|
"loss": 0.0005, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.11927191913127899, |
|
"learning_rate": 7.764019476158496e-06, |
|
"loss": 0.0016, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.24450215697288513, |
|
"learning_rate": 7.759822028206851e-06, |
|
"loss": 0.0009, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.012072744779288769, |
|
"learning_rate": 7.755624580255206e-06, |
|
"loss": 0.0005, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.11228229105472565, |
|
"learning_rate": 7.75142713230356e-06, |
|
"loss": 0.0031, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.007494674064218998, |
|
"learning_rate": 7.747229684351915e-06, |
|
"loss": 0.0074, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.020272986963391304, |
|
"learning_rate": 7.74303223640027e-06, |
|
"loss": 0.001, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.041734930127859116, |
|
"learning_rate": 7.738834788448624e-06, |
|
"loss": 0.0011, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.033374298363924026, |
|
"learning_rate": 7.734637340496979e-06, |
|
"loss": 0.0042, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.007725745439529419, |
|
"learning_rate": 7.730439892545333e-06, |
|
"loss": 0.0008, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.40455764532089233, |
|
"learning_rate": 7.726242444593688e-06, |
|
"loss": 0.0013, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.0638241171836853, |
|
"learning_rate": 7.722044996642043e-06, |
|
"loss": 0.0012, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.5149130821228027, |
|
"learning_rate": 7.717847548690397e-06, |
|
"loss": 0.0022, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.041986189782619476, |
|
"learning_rate": 7.71365010073875e-06, |
|
"loss": 0.0028, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.0023236775305122137, |
|
"learning_rate": 7.709452652787106e-06, |
|
"loss": 0.0006, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.08948805928230286, |
|
"learning_rate": 7.705255204835461e-06, |
|
"loss": 0.0007, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.03315718472003937, |
|
"learning_rate": 7.701057756883816e-06, |
|
"loss": 0.0018, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.15775519609451294, |
|
"learning_rate": 7.69686030893217e-06, |
|
"loss": 0.0009, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.00936898123472929, |
|
"learning_rate": 7.692662860980525e-06, |
|
"loss": 0.002, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.00790819339454174, |
|
"learning_rate": 7.68846541302888e-06, |
|
"loss": 0.0011, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.021303502842783928, |
|
"learning_rate": 7.684267965077234e-06, |
|
"loss": 0.0007, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.0739978775382042, |
|
"learning_rate": 7.680070517125589e-06, |
|
"loss": 0.0019, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.0839531272649765, |
|
"learning_rate": 7.675873069173942e-06, |
|
"loss": 0.0021, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.6550424695014954, |
|
"learning_rate": 7.671675621222298e-06, |
|
"loss": 0.0017, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.015562259592115879, |
|
"learning_rate": 7.667478173270651e-06, |
|
"loss": 0.0006, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.29718390107154846, |
|
"learning_rate": 7.663280725319007e-06, |
|
"loss": 0.0016, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.02291831001639366, |
|
"learning_rate": 7.659083277367362e-06, |
|
"loss": 0.0004, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.3322148323059082, |
|
"learning_rate": 7.654885829415716e-06, |
|
"loss": 0.0037, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.6336252689361572, |
|
"learning_rate": 7.650688381464071e-06, |
|
"loss": 0.0022, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.17173808813095093, |
|
"learning_rate": 7.646490933512426e-06, |
|
"loss": 0.0007, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.01680012419819832, |
|
"learning_rate": 7.64229348556078e-06, |
|
"loss": 0.0013, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.07509610801935196, |
|
"learning_rate": 7.638096037609133e-06, |
|
"loss": 0.0054, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.01283180620521307, |
|
"learning_rate": 7.63389858965749e-06, |
|
"loss": 0.0016, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.03960631042718887, |
|
"learning_rate": 7.629701141705842e-06, |
|
"loss": 0.0005, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.03129389509558678, |
|
"learning_rate": 7.625503693754198e-06, |
|
"loss": 0.0005, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.01344276126474142, |
|
"learning_rate": 7.621306245802553e-06, |
|
"loss": 0.0012, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.010192753747105598, |
|
"learning_rate": 7.617108797850907e-06, |
|
"loss": 0.0006, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.029010513797402382, |
|
"learning_rate": 7.612911349899263e-06, |
|
"loss": 0.0031, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.015769800171256065, |
|
"learning_rate": 7.6087139019476164e-06, |
|
"loss": 0.0005, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.015835179015994072, |
|
"learning_rate": 7.604516453995972e-06, |
|
"loss": 0.0005, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.009272762574255466, |
|
"learning_rate": 7.600319006044326e-06, |
|
"loss": 0.0018, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.06365930289030075, |
|
"learning_rate": 7.59612155809268e-06, |
|
"loss": 0.0011, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.011347809806466103, |
|
"learning_rate": 7.591924110141035e-06, |
|
"loss": 0.0011, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.029438406229019165, |
|
"learning_rate": 7.5877266621893895e-06, |
|
"loss": 0.0018, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.0030098084826022387, |
|
"learning_rate": 7.583529214237743e-06, |
|
"loss": 0.0026, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.07430537790060043, |
|
"learning_rate": 7.579331766286099e-06, |
|
"loss": 0.0011, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.2529279291629791, |
|
"learning_rate": 7.5751343183344525e-06, |
|
"loss": 0.001, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.0054864161647856236, |
|
"learning_rate": 7.570936870382808e-06, |
|
"loss": 0.0004, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.05911405757069588, |
|
"learning_rate": 7.566739422431163e-06, |
|
"loss": 0.0005, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.03409591317176819, |
|
"learning_rate": 7.562541974479517e-06, |
|
"loss": 0.0005, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.013324552215635777, |
|
"learning_rate": 7.558344526527872e-06, |
|
"loss": 0.0016, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.012020808644592762, |
|
"learning_rate": 7.5541470785762265e-06, |
|
"loss": 0.0038, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.2555389404296875, |
|
"learning_rate": 7.549949630624581e-06, |
|
"loss": 0.0011, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.09523122757673264, |
|
"learning_rate": 7.545752182672935e-06, |
|
"loss": 0.0008, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.07814309000968933, |
|
"learning_rate": 7.54155473472129e-06, |
|
"loss": 0.003, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.018278853967785835, |
|
"learning_rate": 7.537357286769644e-06, |
|
"loss": 0.0016, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.13258874416351318, |
|
"learning_rate": 7.5331598388179996e-06, |
|
"loss": 0.002, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.008451174944639206, |
|
"learning_rate": 7.528962390866353e-06, |
|
"loss": 0.0016, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.028404515236616135, |
|
"learning_rate": 7.524764942914709e-06, |
|
"loss": 0.0007, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.46089375019073486, |
|
"learning_rate": 7.520567494963063e-06, |
|
"loss": 0.0011, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.19054672122001648, |
|
"learning_rate": 7.516370047011417e-06, |
|
"loss": 0.0037, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.7026522755622864, |
|
"learning_rate": 7.512172599059773e-06, |
|
"loss": 0.0026, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.026187479496002197, |
|
"learning_rate": 7.507975151108126e-06, |
|
"loss": 0.0013, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.0011275768047198653, |
|
"learning_rate": 7.503777703156482e-06, |
|
"loss": 0.0026, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.007955752313137054, |
|
"learning_rate": 7.499580255204836e-06, |
|
"loss": 0.0011, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.008177239447832108, |
|
"learning_rate": 7.495382807253191e-06, |
|
"loss": 0.0013, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.01679716818034649, |
|
"learning_rate": 7.491185359301545e-06, |
|
"loss": 0.0008, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.022488052025437355, |
|
"learning_rate": 7.4869879113499e-06, |
|
"loss": 0.0005, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.00388827221468091, |
|
"learning_rate": 7.482790463398254e-06, |
|
"loss": 0.0054, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.040717240422964096, |
|
"learning_rate": 7.478593015446609e-06, |
|
"loss": 0.001, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.0023668904323130846, |
|
"learning_rate": 7.474395567494964e-06, |
|
"loss": 0.0004, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.13439880311489105, |
|
"learning_rate": 7.470198119543318e-06, |
|
"loss": 0.007, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.3206893503665924, |
|
"learning_rate": 7.4660006715916734e-06, |
|
"loss": 0.0048, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.09794365614652634, |
|
"learning_rate": 7.461803223640027e-06, |
|
"loss": 0.0006, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.3034574091434479, |
|
"learning_rate": 7.457605775688383e-06, |
|
"loss": 0.0022, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.06691265106201172, |
|
"learning_rate": 7.4534083277367364e-06, |
|
"loss": 0.0008, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.10203710198402405, |
|
"learning_rate": 7.449210879785092e-06, |
|
"loss": 0.0007, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.014348863624036312, |
|
"learning_rate": 7.445013431833446e-06, |
|
"loss": 0.0008, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.03633236512541771, |
|
"learning_rate": 7.4408159838818e-06, |
|
"loss": 0.002, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.06054764240980148, |
|
"learning_rate": 7.436618535930155e-06, |
|
"loss": 0.0024, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.021807057783007622, |
|
"learning_rate": 7.4324210879785095e-06, |
|
"loss": 0.0019, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.024098997935652733, |
|
"learning_rate": 7.428223640026865e-06, |
|
"loss": 0.0008, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.08782277256250381, |
|
"learning_rate": 7.424026192075219e-06, |
|
"loss": 0.0018, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.0001189708709717, |
|
"learning_rate": 7.419828744123574e-06, |
|
"loss": 0.0021, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.03228935971856117, |
|
"learning_rate": 7.415631296171928e-06, |
|
"loss": 0.0008, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.014304124750196934, |
|
"learning_rate": 7.411433848220283e-06, |
|
"loss": 0.0062, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.0017881994135677814, |
|
"learning_rate": 7.407236400268637e-06, |
|
"loss": 0.0013, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.16372914612293243, |
|
"learning_rate": 7.403038952316992e-06, |
|
"loss": 0.0015, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.04461158439517021, |
|
"learning_rate": 7.3988415043653465e-06, |
|
"loss": 0.0067, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.09605967253446579, |
|
"learning_rate": 7.394644056413701e-06, |
|
"loss": 0.0008, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.6841378211975098, |
|
"learning_rate": 7.390446608462055e-06, |
|
"loss": 0.005, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.04636238515377045, |
|
"learning_rate": 7.38624916051041e-06, |
|
"loss": 0.0007, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.5079997777938843, |
|
"learning_rate": 7.382051712558766e-06, |
|
"loss": 0.0018, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.09513850510120392, |
|
"learning_rate": 7.3778542646071196e-06, |
|
"loss": 0.0024, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.06880282610654831, |
|
"learning_rate": 7.373656816655474e-06, |
|
"loss": 0.0034, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.18673266470432281, |
|
"learning_rate": 7.369459368703829e-06, |
|
"loss": 0.0037, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.03851670026779175, |
|
"learning_rate": 7.365261920752183e-06, |
|
"loss": 0.0005, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.021820688620209694, |
|
"learning_rate": 7.361064472800537e-06, |
|
"loss": 0.0009, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.008911089040338993, |
|
"learning_rate": 7.356867024848893e-06, |
|
"loss": 0.0019, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.016666969284415245, |
|
"learning_rate": 7.3526695768972464e-06, |
|
"loss": 0.0006, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.0268320981413126, |
|
"learning_rate": 7.348472128945602e-06, |
|
"loss": 0.0018, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.006555848754942417, |
|
"learning_rate": 7.344274680993956e-06, |
|
"loss": 0.0009, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.014750578440725803, |
|
"learning_rate": 7.340077233042311e-06, |
|
"loss": 0.0011, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.011571898125112057, |
|
"learning_rate": 7.335879785090666e-06, |
|
"loss": 0.0025, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.014332173392176628, |
|
"learning_rate": 7.33168233713902e-06, |
|
"loss": 0.0006, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.03584123030304909, |
|
"learning_rate": 7.327484889187375e-06, |
|
"loss": 0.001, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.0009831301867961884, |
|
"learning_rate": 7.323287441235729e-06, |
|
"loss": 0.0016, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.01324765756726265, |
|
"learning_rate": 7.319089993284084e-06, |
|
"loss": 0.0005, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.2932877540588379, |
|
"learning_rate": 7.314892545332438e-06, |
|
"loss": 0.0017, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.01268022321164608, |
|
"learning_rate": 7.3106950973807935e-06, |
|
"loss": 0.003, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.04986048489809036, |
|
"learning_rate": 7.306497649429147e-06, |
|
"loss": 0.0007, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.019490044564008713, |
|
"learning_rate": 7.302300201477503e-06, |
|
"loss": 0.0052, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.7576485872268677, |
|
"learning_rate": 7.2981027535258565e-06, |
|
"loss": 0.0013, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.1456100046634674, |
|
"learning_rate": 7.293905305574212e-06, |
|
"loss": 0.0009, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.030114591121673584, |
|
"learning_rate": 7.2897078576225665e-06, |
|
"loss": 0.0015, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.02888800948858261, |
|
"learning_rate": 7.28551040967092e-06, |
|
"loss": 0.0003, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.003675349522382021, |
|
"learning_rate": 7.281312961719276e-06, |
|
"loss": 0.0048, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.010422208346426487, |
|
"learning_rate": 7.2771155137676296e-06, |
|
"loss": 0.0021, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.009601653553545475, |
|
"learning_rate": 7.272918065815985e-06, |
|
"loss": 0.0018, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.00894622690975666, |
|
"learning_rate": 7.268720617864339e-06, |
|
"loss": 0.0005, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.01711229234933853, |
|
"learning_rate": 7.264523169912694e-06, |
|
"loss": 0.0019, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.25115489959716797, |
|
"learning_rate": 7.260325721961048e-06, |
|
"loss": 0.006, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.028431080281734467, |
|
"learning_rate": 7.256128274009403e-06, |
|
"loss": 0.0004, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.005961297079920769, |
|
"learning_rate": 7.251930826057757e-06, |
|
"loss": 0.0014, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.017467642202973366, |
|
"learning_rate": 7.247733378106112e-06, |
|
"loss": 0.0008, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.011652447283267975, |
|
"learning_rate": 7.243535930154467e-06, |
|
"loss": 0.0068, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.0026287129148840904, |
|
"learning_rate": 7.239338482202821e-06, |
|
"loss": 0.002, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.27473968267440796, |
|
"learning_rate": 7.2351410342511766e-06, |
|
"loss": 0.0024, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.010974191129207611, |
|
"learning_rate": 7.23094358629953e-06, |
|
"loss": 0.0006, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.0035286422353237867, |
|
"learning_rate": 7.226746138347886e-06, |
|
"loss": 0.0068, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.09986690431833267, |
|
"learning_rate": 7.22254869039624e-06, |
|
"loss": 0.0006, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.0497036911547184, |
|
"learning_rate": 7.218351242444594e-06, |
|
"loss": 0.0014, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.0852247104048729, |
|
"learning_rate": 7.214153794492949e-06, |
|
"loss": 0.0039, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.2507276237010956, |
|
"learning_rate": 7.2099563465413034e-06, |
|
"loss": 0.0084, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.08247136324644089, |
|
"learning_rate": 7.205758898589657e-06, |
|
"loss": 0.0004, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.010528313927352428, |
|
"learning_rate": 7.201561450638013e-06, |
|
"loss": 0.003, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.014672808349132538, |
|
"learning_rate": 7.197364002686368e-06, |
|
"loss": 0.0015, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.027063751593232155, |
|
"learning_rate": 7.193166554734722e-06, |
|
"loss": 0.0004, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.007319859229028225, |
|
"learning_rate": 7.1889691067830765e-06, |
|
"loss": 0.0009, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.17890995740890503, |
|
"learning_rate": 7.184771658831431e-06, |
|
"loss": 0.0006, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.0006071716779842973, |
|
"learning_rate": 7.180574210879786e-06, |
|
"loss": 0.0091, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.0499555803835392, |
|
"learning_rate": 7.17637676292814e-06, |
|
"loss": 0.0012, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.1350906640291214, |
|
"learning_rate": 7.172179314976495e-06, |
|
"loss": 0.0011, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.026632359251379967, |
|
"learning_rate": 7.167981867024849e-06, |
|
"loss": 0.0014, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.011988186277449131, |
|
"learning_rate": 7.163784419073204e-06, |
|
"loss": 0.0044, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.004848783370107412, |
|
"learning_rate": 7.159586971121558e-06, |
|
"loss": 0.0031, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.1688844859600067, |
|
"learning_rate": 7.1553895231699135e-06, |
|
"loss": 0.0012, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.05839391052722931, |
|
"learning_rate": 7.151192075218268e-06, |
|
"loss": 0.0014, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.006566599011421204, |
|
"learning_rate": 7.146994627266623e-06, |
|
"loss": 0.0014, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.028615593910217285, |
|
"learning_rate": 7.142797179314977e-06, |
|
"loss": 0.0019, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.029592135921120644, |
|
"learning_rate": 7.138599731363331e-06, |
|
"loss": 0.0008, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.027602553367614746, |
|
"learning_rate": 7.1344022834116866e-06, |
|
"loss": 0.0006, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.012585037387907505, |
|
"learning_rate": 7.13020483546004e-06, |
|
"loss": 0.0016, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.003249362576752901, |
|
"learning_rate": 7.126007387508396e-06, |
|
"loss": 0.0019, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.6995984315872192, |
|
"learning_rate": 7.1218099395567496e-06, |
|
"loss": 0.0043, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.02927136979997158, |
|
"learning_rate": 7.117612491605105e-06, |
|
"loss": 0.0014, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.021853366866707802, |
|
"learning_rate": 7.113415043653459e-06, |
|
"loss": 0.002, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.012909365817904472, |
|
"learning_rate": 7.109217595701814e-06, |
|
"loss": 0.0006, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.09880993515253067, |
|
"learning_rate": 7.105020147750169e-06, |
|
"loss": 0.0012, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.07496526092290878, |
|
"learning_rate": 7.100822699798523e-06, |
|
"loss": 0.0013, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.34422990679740906, |
|
"learning_rate": 7.096625251846878e-06, |
|
"loss": 0.0015, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.006235859822481871, |
|
"learning_rate": 7.092427803895232e-06, |
|
"loss": 0.0018, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.03030078299343586, |
|
"learning_rate": 7.088230355943587e-06, |
|
"loss": 0.0011, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.06541412323713303, |
|
"learning_rate": 7.084032907991941e-06, |
|
"loss": 0.001, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.000910624279640615, |
|
"learning_rate": 7.079835460040297e-06, |
|
"loss": 0.0032, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.004598297644406557, |
|
"learning_rate": 7.07563801208865e-06, |
|
"loss": 0.0007, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.02102428674697876, |
|
"learning_rate": 7.071440564137006e-06, |
|
"loss": 0.0022, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.17559894919395447, |
|
"learning_rate": 7.06724311618536e-06, |
|
"loss": 0.0029, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.07612307369709015, |
|
"learning_rate": 7.063045668233714e-06, |
|
"loss": 0.0014, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.25046348571777344, |
|
"learning_rate": 7.05884822028207e-06, |
|
"loss": 0.0009, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.01964881271123886, |
|
"learning_rate": 7.0546507723304234e-06, |
|
"loss": 0.0009, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.13353663682937622, |
|
"learning_rate": 7.050453324378779e-06, |
|
"loss": 0.003, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.31505197286605835, |
|
"learning_rate": 7.046255876427133e-06, |
|
"loss": 0.0011, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.05410172790288925, |
|
"learning_rate": 7.042058428475488e-06, |
|
"loss": 0.001, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.015809668228030205, |
|
"learning_rate": 7.037860980523842e-06, |
|
"loss": 0.0031, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.0416773185133934, |
|
"learning_rate": 7.0336635325721965e-06, |
|
"loss": 0.001, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.018480846658349037, |
|
"learning_rate": 7.029466084620551e-06, |
|
"loss": 0.0023, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.0960954800248146, |
|
"learning_rate": 7.025268636668906e-06, |
|
"loss": 0.0011, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.08874082565307617, |
|
"learning_rate": 7.02107118871726e-06, |
|
"loss": 0.0005, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.011005674488842487, |
|
"learning_rate": 7.016873740765615e-06, |
|
"loss": 0.001, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.17913727462291718, |
|
"learning_rate": 7.0126762928139705e-06, |
|
"loss": 0.0005, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.010488308034837246, |
|
"learning_rate": 7.008478844862324e-06, |
|
"loss": 0.0008, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.03276985511183739, |
|
"learning_rate": 7.00428139691068e-06, |
|
"loss": 0.0034, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.03501226380467415, |
|
"learning_rate": 7.0000839489590335e-06, |
|
"loss": 0.0004, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.04926338791847229, |
|
"learning_rate": 6.995886501007388e-06, |
|
"loss": 0.0007, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.00882528442889452, |
|
"learning_rate": 6.991689053055743e-06, |
|
"loss": 0.0009, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.0117332823574543, |
|
"learning_rate": 6.987491605104097e-06, |
|
"loss": 0.0006, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.019057177007198334, |
|
"learning_rate": 6.983294157152451e-06, |
|
"loss": 0.0038, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.0012227203696966171, |
|
"learning_rate": 6.9790967092008066e-06, |
|
"loss": 0.0008, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.03291918337345123, |
|
"learning_rate": 6.97489926124916e-06, |
|
"loss": 0.0095, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.7484022378921509, |
|
"learning_rate": 6.970701813297516e-06, |
|
"loss": 0.0129, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.04075862094759941, |
|
"learning_rate": 6.96650436534587e-06, |
|
"loss": 0.0012, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.12988588213920593, |
|
"learning_rate": 6.962306917394225e-06, |
|
"loss": 0.0007, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.04254931956529617, |
|
"learning_rate": 6.95810946944258e-06, |
|
"loss": 0.0004, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.17507688701152802, |
|
"learning_rate": 6.953912021490934e-06, |
|
"loss": 0.0019, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.018078705295920372, |
|
"learning_rate": 6.949714573539289e-06, |
|
"loss": 0.0056, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.016659587621688843, |
|
"learning_rate": 6.945517125587643e-06, |
|
"loss": 0.0005, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.05568535998463631, |
|
"learning_rate": 6.941319677635998e-06, |
|
"loss": 0.0047, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.004235047847032547, |
|
"learning_rate": 6.937122229684352e-06, |
|
"loss": 0.0008, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.017537878826260567, |
|
"learning_rate": 6.932924781732707e-06, |
|
"loss": 0.0019, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.023402167484164238, |
|
"learning_rate": 6.928727333781061e-06, |
|
"loss": 0.0011, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.05992670729756355, |
|
"learning_rate": 6.924529885829417e-06, |
|
"loss": 0.0021, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.005670278798788786, |
|
"learning_rate": 6.920332437877771e-06, |
|
"loss": 0.0007, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.03450972959399223, |
|
"learning_rate": 6.916134989926126e-06, |
|
"loss": 0.0044, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.00952198263257742, |
|
"learning_rate": 6.9119375419744805e-06, |
|
"loss": 0.0023, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.14748263359069824, |
|
"learning_rate": 6.907740094022834e-06, |
|
"loss": 0.0006, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.008964456617832184, |
|
"learning_rate": 6.90354264607119e-06, |
|
"loss": 0.0025, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.5854199528694153, |
|
"learning_rate": 6.8993451981195435e-06, |
|
"loss": 0.0018, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.008997979573905468, |
|
"learning_rate": 6.895147750167899e-06, |
|
"loss": 0.0012, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.013361281715333462, |
|
"learning_rate": 6.890950302216253e-06, |
|
"loss": 0.0006, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.01578344777226448, |
|
"learning_rate": 6.886752854264608e-06, |
|
"loss": 0.0005, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.011211412958800793, |
|
"learning_rate": 6.882555406312962e-06, |
|
"loss": 0.0047, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.032785531133413315, |
|
"learning_rate": 6.8783579583613166e-06, |
|
"loss": 0.0004, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.2810578942298889, |
|
"learning_rate": 6.874160510409672e-06, |
|
"loss": 0.0007, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.012072360143065453, |
|
"learning_rate": 6.869963062458026e-06, |
|
"loss": 0.0006, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.033594295382499695, |
|
"learning_rate": 6.865765614506381e-06, |
|
"loss": 0.0008, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.09040901064872742, |
|
"learning_rate": 6.861568166554735e-06, |
|
"loss": 0.0004, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.006718501914292574, |
|
"learning_rate": 6.8573707186030905e-06, |
|
"loss": 0.0094, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.3409207761287689, |
|
"learning_rate": 6.853173270651444e-06, |
|
"loss": 0.0009, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.017661171033978462, |
|
"learning_rate": 6.8489758226998e-06, |
|
"loss": 0.0006, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.4969084560871124, |
|
"learning_rate": 6.8447783747481535e-06, |
|
"loss": 0.0021, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.08554006367921829, |
|
"learning_rate": 6.840580926796508e-06, |
|
"loss": 0.0018, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.0186705831438303, |
|
"learning_rate": 6.836383478844863e-06, |
|
"loss": 0.002, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.020978650078177452, |
|
"learning_rate": 6.832186030893217e-06, |
|
"loss": 0.0045, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.043272119015455246, |
|
"learning_rate": 6.827988582941573e-06, |
|
"loss": 0.0018, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.05586528778076172, |
|
"learning_rate": 6.823791134989927e-06, |
|
"loss": 0.0005, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.07728602737188339, |
|
"learning_rate": 6.819593687038282e-06, |
|
"loss": 0.0013, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.23588360846042633, |
|
"learning_rate": 6.815396239086636e-06, |
|
"loss": 0.001, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.016243470832705498, |
|
"learning_rate": 6.8111987911349904e-06, |
|
"loss": 0.0003, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.048352282494306564, |
|
"learning_rate": 6.807001343183345e-06, |
|
"loss": 0.0008, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.013593564741313457, |
|
"learning_rate": 6.8028038952317e-06, |
|
"loss": 0.001, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.1125347837805748, |
|
"learning_rate": 6.798606447280054e-06, |
|
"loss": 0.0007, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.040121354162693024, |
|
"learning_rate": 6.794408999328409e-06, |
|
"loss": 0.0023, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.004880643915385008, |
|
"learning_rate": 6.790211551376763e-06, |
|
"loss": 0.0013, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.03261597827076912, |
|
"learning_rate": 6.786014103425118e-06, |
|
"loss": 0.0048, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.001294650137424469, |
|
"learning_rate": 6.781816655473474e-06, |
|
"loss": 0.0062, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.013758821412920952, |
|
"learning_rate": 6.777619207521827e-06, |
|
"loss": 0.0004, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.2742080092430115, |
|
"learning_rate": 6.773421759570182e-06, |
|
"loss": 0.0015, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.09271080791950226, |
|
"learning_rate": 6.769224311618537e-06, |
|
"loss": 0.0025, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.004653418902307749, |
|
"learning_rate": 6.765026863666891e-06, |
|
"loss": 0.0052, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.0033459344413131475, |
|
"learning_rate": 6.760829415715245e-06, |
|
"loss": 0.0019, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.24762499332427979, |
|
"learning_rate": 6.7566319677636005e-06, |
|
"loss": 0.0005, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.2468775510787964, |
|
"learning_rate": 6.752434519811954e-06, |
|
"loss": 0.0014, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.06739424914121628, |
|
"learning_rate": 6.74823707186031e-06, |
|
"loss": 0.0004, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.01460706815123558, |
|
"learning_rate": 6.7440396239086635e-06, |
|
"loss": 0.0026, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.0012919761938974261, |
|
"learning_rate": 6.739842175957019e-06, |
|
"loss": 0.0027, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.16211023926734924, |
|
"learning_rate": 6.7356447280053736e-06, |
|
"loss": 0.0028, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.9419199228286743, |
|
"learning_rate": 6.731447280053728e-06, |
|
"loss": 0.0155, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.04390889033675194, |
|
"learning_rate": 6.727249832102083e-06, |
|
"loss": 0.004, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.058596525341272354, |
|
"learning_rate": 6.7230523841504366e-06, |
|
"loss": 0.0041, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.67715984582901, |
|
"learning_rate": 6.718854936198792e-06, |
|
"loss": 0.0014, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.015570656396448612, |
|
"learning_rate": 6.714657488247146e-06, |
|
"loss": 0.0004, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.07929646968841553, |
|
"learning_rate": 6.710460040295501e-06, |
|
"loss": 0.0036, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.008636005222797394, |
|
"learning_rate": 6.706262592343855e-06, |
|
"loss": 0.0037, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.1776953488588333, |
|
"learning_rate": 6.7020651443922105e-06, |
|
"loss": 0.0014, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.03655342012643814, |
|
"learning_rate": 6.697867696440564e-06, |
|
"loss": 0.0003, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.0143968490883708, |
|
"learning_rate": 6.69367024848892e-06, |
|
"loss": 0.0017, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.137611523270607, |
|
"learning_rate": 6.689472800537274e-06, |
|
"loss": 0.002, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.45386821031570435, |
|
"learning_rate": 6.685275352585628e-06, |
|
"loss": 0.001, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.010178464464843273, |
|
"learning_rate": 6.681077904633984e-06, |
|
"loss": 0.0016, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.039013270288705826, |
|
"learning_rate": 6.676880456682337e-06, |
|
"loss": 0.0015, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.01565985381603241, |
|
"learning_rate": 6.672683008730693e-06, |
|
"loss": 0.0006, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.017331531271338463, |
|
"learning_rate": 6.668485560779047e-06, |
|
"loss": 0.0005, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.27378222346305847, |
|
"learning_rate": 6.664288112827402e-06, |
|
"loss": 0.0013, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.29315513372421265, |
|
"learning_rate": 6.660090664875756e-06, |
|
"loss": 0.001, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.37698185443878174, |
|
"learning_rate": 6.6558932169241104e-06, |
|
"loss": 0.0047, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.009589233435690403, |
|
"learning_rate": 6.651695768972465e-06, |
|
"loss": 0.0012, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.03333524242043495, |
|
"learning_rate": 6.64749832102082e-06, |
|
"loss": 0.0003, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.00575454905629158, |
|
"learning_rate": 6.643300873069175e-06, |
|
"loss": 0.0007, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.0640854686498642, |
|
"learning_rate": 6.639103425117529e-06, |
|
"loss": 0.0014, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.007772125769406557, |
|
"learning_rate": 6.634905977165884e-06, |
|
"loss": 0.0004, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.18589778244495392, |
|
"learning_rate": 6.630708529214238e-06, |
|
"loss": 0.0012, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.027769211679697037, |
|
"learning_rate": 6.626511081262594e-06, |
|
"loss": 0.0005, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.0029272972606122494, |
|
"learning_rate": 6.622313633310947e-06, |
|
"loss": 0.0006, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.009183773770928383, |
|
"learning_rate": 6.618116185359302e-06, |
|
"loss": 0.001, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.004283113870769739, |
|
"learning_rate": 6.613918737407657e-06, |
|
"loss": 0.001, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.007778434548527002, |
|
"learning_rate": 6.609721289456011e-06, |
|
"loss": 0.0022, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.0018987306393682957, |
|
"learning_rate": 6.605523841504365e-06, |
|
"loss": 0.0027, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.12906016409397125, |
|
"learning_rate": 6.6013263935527205e-06, |
|
"loss": 0.0009, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.007297148462384939, |
|
"learning_rate": 6.597128945601076e-06, |
|
"loss": 0.0007, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.33462247252464294, |
|
"learning_rate": 6.59293149764943e-06, |
|
"loss": 0.0043, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.8214085698127747, |
|
"learning_rate": 6.588734049697784e-06, |
|
"loss": 0.0021, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.024006789550185204, |
|
"learning_rate": 6.584536601746139e-06, |
|
"loss": 0.0008, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.02031201310455799, |
|
"learning_rate": 6.5803391537944936e-06, |
|
"loss": 0.0009, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.08601708710193634, |
|
"learning_rate": 6.576141705842848e-06, |
|
"loss": 0.0009, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.022639337927103043, |
|
"learning_rate": 6.571944257891203e-06, |
|
"loss": 0.0004, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.02436881698668003, |
|
"learning_rate": 6.5677468099395566e-06, |
|
"loss": 0.0003, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.02970285154879093, |
|
"learning_rate": 6.563549361987912e-06, |
|
"loss": 0.0022, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.1934923380613327, |
|
"learning_rate": 6.559351914036266e-06, |
|
"loss": 0.001, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.019940687343478203, |
|
"learning_rate": 6.555154466084621e-06, |
|
"loss": 0.0013, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.025434071198105812, |
|
"learning_rate": 6.550957018132975e-06, |
|
"loss": 0.0008, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.0027662869542837143, |
|
"learning_rate": 6.5467595701813305e-06, |
|
"loss": 0.0007, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.2719932496547699, |
|
"learning_rate": 6.542562122229685e-06, |
|
"loss": 0.0006, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.07692870497703552, |
|
"learning_rate": 6.53836467427804e-06, |
|
"loss": 0.0009, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.0984962210059166, |
|
"learning_rate": 6.534167226326394e-06, |
|
"loss": 0.0021, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.2010423243045807, |
|
"learning_rate": 6.529969778374748e-06, |
|
"loss": 0.0008, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.047768231481313705, |
|
"learning_rate": 6.525772330423104e-06, |
|
"loss": 0.0006, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.002377886790782213, |
|
"learning_rate": 6.521574882471457e-06, |
|
"loss": 0.0009, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.04574467986822128, |
|
"learning_rate": 6.517377434519813e-06, |
|
"loss": 0.0004, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.002358232159167528, |
|
"learning_rate": 6.513179986568167e-06, |
|
"loss": 0.0017, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.04641246423125267, |
|
"learning_rate": 6.508982538616522e-06, |
|
"loss": 0.0017, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.013856269419193268, |
|
"learning_rate": 6.504785090664876e-06, |
|
"loss": 0.0034, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.08428198844194412, |
|
"learning_rate": 6.5005876427132305e-06, |
|
"loss": 0.0004, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.07378539443016052, |
|
"learning_rate": 6.496390194761586e-06, |
|
"loss": 0.0007, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.015434389002621174, |
|
"learning_rate": 6.49219274680994e-06, |
|
"loss": 0.0021, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.014926637522876263, |
|
"learning_rate": 6.487995298858295e-06, |
|
"loss": 0.0011, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.23808802664279938, |
|
"learning_rate": 6.483797850906649e-06, |
|
"loss": 0.0003, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.013105678372085094, |
|
"learning_rate": 6.479600402955004e-06, |
|
"loss": 0.0008, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.01477357279509306, |
|
"learning_rate": 6.475402955003358e-06, |
|
"loss": 0.0012, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.1917484700679779, |
|
"learning_rate": 6.471205507051714e-06, |
|
"loss": 0.0009, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.005253896117210388, |
|
"learning_rate": 6.467008059100067e-06, |
|
"loss": 0.0003, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.028020091354846954, |
|
"learning_rate": 6.462810611148422e-06, |
|
"loss": 0.0009, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.07543497532606125, |
|
"learning_rate": 6.458613163196777e-06, |
|
"loss": 0.0003, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.21784624457359314, |
|
"learning_rate": 6.454415715245131e-06, |
|
"loss": 0.0007, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.004629680421203375, |
|
"learning_rate": 6.450218267293487e-06, |
|
"loss": 0.0005, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.05804940313100815, |
|
"learning_rate": 6.4460208193418405e-06, |
|
"loss": 0.0004, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.008218007162213326, |
|
"learning_rate": 6.441823371390196e-06, |
|
"loss": 0.0024, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.26576822996139526, |
|
"learning_rate": 6.43762592343855e-06, |
|
"loss": 0.0004, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.05083778500556946, |
|
"learning_rate": 6.433428475486904e-06, |
|
"loss": 0.0018, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.03845244273543358, |
|
"learning_rate": 6.429231027535259e-06, |
|
"loss": 0.0006, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.22800813615322113, |
|
"learning_rate": 6.425033579583614e-06, |
|
"loss": 0.0011, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.07355496287345886, |
|
"learning_rate": 6.420836131631968e-06, |
|
"loss": 0.0018, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.14929671585559845, |
|
"learning_rate": 6.416638683680323e-06, |
|
"loss": 0.001, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.009975256398320198, |
|
"learning_rate": 6.412441235728677e-06, |
|
"loss": 0.0004, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.049514010548591614, |
|
"learning_rate": 6.408663532572197e-06, |
|
"loss": 0.0036, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.17448657751083374, |
|
"learning_rate": 6.404466084620551e-06, |
|
"loss": 0.0029, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.0019642149563878775, |
|
"learning_rate": 6.400268636668906e-06, |
|
"loss": 0.0007, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.12057841569185257, |
|
"learning_rate": 6.39607118871726e-06, |
|
"loss": 0.0004, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.08199835568666458, |
|
"learning_rate": 6.391873740765615e-06, |
|
"loss": 0.003, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.008217026479542255, |
|
"learning_rate": 6.3876762928139705e-06, |
|
"loss": 0.0031, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.0021252892911434174, |
|
"learning_rate": 6.383478844862324e-06, |
|
"loss": 0.0003, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.5874733924865723, |
|
"learning_rate": 6.379281396910679e-06, |
|
"loss": 0.0028, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.01447449903935194, |
|
"learning_rate": 6.3750839489590335e-06, |
|
"loss": 0.0016, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.020882418379187584, |
|
"learning_rate": 6.370886501007388e-06, |
|
"loss": 0.0008, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.03403160721063614, |
|
"learning_rate": 6.366689053055743e-06, |
|
"loss": 0.0008, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.0914774239063263, |
|
"learning_rate": 6.362491605104097e-06, |
|
"loss": 0.0024, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.02440485917031765, |
|
"learning_rate": 6.358294157152451e-06, |
|
"loss": 0.0008, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.02295794151723385, |
|
"learning_rate": 6.354096709200807e-06, |
|
"loss": 0.0006, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.005137599539011717, |
|
"learning_rate": 6.34989926124916e-06, |
|
"loss": 0.0007, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.009145709685981274, |
|
"learning_rate": 6.345701813297516e-06, |
|
"loss": 0.004, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.1550685167312622, |
|
"learning_rate": 6.3415043653458705e-06, |
|
"loss": 0.0009, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.04273473471403122, |
|
"learning_rate": 6.337306917394225e-06, |
|
"loss": 0.0009, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.01607104018330574, |
|
"learning_rate": 6.33310946944258e-06, |
|
"loss": 0.0018, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.006609264761209488, |
|
"learning_rate": 6.3289120214909335e-06, |
|
"loss": 0.001, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.6425864100456238, |
|
"learning_rate": 6.324714573539289e-06, |
|
"loss": 0.0061, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.009361191652715206, |
|
"learning_rate": 6.320517125587643e-06, |
|
"loss": 0.0039, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.06796202808618546, |
|
"learning_rate": 6.316319677635998e-06, |
|
"loss": 0.0021, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.13239143788814545, |
|
"learning_rate": 6.312122229684352e-06, |
|
"loss": 0.0003, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.4967783987522125, |
|
"learning_rate": 6.307924781732707e-06, |
|
"loss": 0.0011, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.40928366780281067, |
|
"learning_rate": 6.303727333781061e-06, |
|
"loss": 0.0012, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.06716176867485046, |
|
"learning_rate": 6.299529885829417e-06, |
|
"loss": 0.0005, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.005962076131254435, |
|
"learning_rate": 6.295332437877771e-06, |
|
"loss": 0.0015, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.03685719147324562, |
|
"learning_rate": 6.291134989926125e-06, |
|
"loss": 0.0004, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.08257108926773071, |
|
"learning_rate": 6.2869375419744805e-06, |
|
"loss": 0.0004, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.0008770768763497472, |
|
"learning_rate": 6.282740094022834e-06, |
|
"loss": 0.0003, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.3550475537776947, |
|
"learning_rate": 6.27854264607119e-06, |
|
"loss": 0.0007, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.007021765224635601, |
|
"learning_rate": 6.2743451981195435e-06, |
|
"loss": 0.0052, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.012746850028634071, |
|
"learning_rate": 6.270147750167899e-06, |
|
"loss": 0.0005, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.09132123738527298, |
|
"learning_rate": 6.265950302216253e-06, |
|
"loss": 0.0095, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.004254049155861139, |
|
"learning_rate": 6.261752854264607e-06, |
|
"loss": 0.0008, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.032813917845487595, |
|
"learning_rate": 6.257555406312962e-06, |
|
"loss": 0.001, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.15052670240402222, |
|
"learning_rate": 6.253357958361317e-06, |
|
"loss": 0.001, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.4991702139377594, |
|
"learning_rate": 6.249160510409672e-06, |
|
"loss": 0.0032, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.0437382347881794, |
|
"learning_rate": 6.244963062458026e-06, |
|
"loss": 0.0008, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.0036775097250938416, |
|
"learning_rate": 6.240765614506381e-06, |
|
"loss": 0.0003, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.0415210947394371, |
|
"learning_rate": 6.236568166554735e-06, |
|
"loss": 0.0029, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.005274785682559013, |
|
"learning_rate": 6.2323707186030905e-06, |
|
"loss": 0.0004, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.001851436565630138, |
|
"learning_rate": 6.228173270651444e-06, |
|
"loss": 0.002, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.027700498700141907, |
|
"learning_rate": 6.223975822699799e-06, |
|
"loss": 0.0022, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.0017797650070860982, |
|
"learning_rate": 6.2197783747481535e-06, |
|
"loss": 0.0017, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.2582283914089203, |
|
"learning_rate": 6.215580926796508e-06, |
|
"loss": 0.0009, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.020876875147223473, |
|
"learning_rate": 6.211383478844863e-06, |
|
"loss": 0.0052, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.07340060919523239, |
|
"learning_rate": 6.207186030893217e-06, |
|
"loss": 0.0018, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.005077909212559462, |
|
"learning_rate": 6.202988582941573e-06, |
|
"loss": 0.0009, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.0850280374288559, |
|
"learning_rate": 6.198791134989927e-06, |
|
"loss": 0.001, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.22353577613830566, |
|
"learning_rate": 6.194593687038281e-06, |
|
"loss": 0.0012, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.003676492255181074, |
|
"learning_rate": 6.190396239086636e-06, |
|
"loss": 0.0003, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.1580864042043686, |
|
"learning_rate": 6.1861987911349905e-06, |
|
"loss": 0.0007, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.1484569013118744, |
|
"learning_rate": 6.182001343183345e-06, |
|
"loss": 0.0004, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.5588276982307434, |
|
"learning_rate": 6.1778038952317e-06, |
|
"loss": 0.0026, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.36433857679367065, |
|
"learning_rate": 6.1736064472800535e-06, |
|
"loss": 0.0012, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.009398641996085644, |
|
"learning_rate": 6.169408999328409e-06, |
|
"loss": 0.0055, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.22657063603401184, |
|
"learning_rate": 6.165211551376763e-06, |
|
"loss": 0.0012, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.2038237750530243, |
|
"learning_rate": 6.161014103425118e-06, |
|
"loss": 0.0011, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.009869251400232315, |
|
"learning_rate": 6.156816655473473e-06, |
|
"loss": 0.001, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.06920814514160156, |
|
"learning_rate": 6.1526192075218274e-06, |
|
"loss": 0.0009, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.002625921508297324, |
|
"learning_rate": 6.148421759570182e-06, |
|
"loss": 0.0019, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.00261191138997674, |
|
"learning_rate": 6.144224311618537e-06, |
|
"loss": 0.0005, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.01784035935997963, |
|
"learning_rate": 6.140026863666891e-06, |
|
"loss": 0.0032, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.013288385234773159, |
|
"learning_rate": 6.135829415715245e-06, |
|
"loss": 0.0011, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.005483401007950306, |
|
"learning_rate": 6.1316319677636005e-06, |
|
"loss": 0.0006, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.00357685424387455, |
|
"learning_rate": 6.127434519811954e-06, |
|
"loss": 0.0007, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.025155210867524147, |
|
"learning_rate": 6.12323707186031e-06, |
|
"loss": 0.0093, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.10848862677812576, |
|
"learning_rate": 6.1190396239086635e-06, |
|
"loss": 0.0009, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.0211066585034132, |
|
"learning_rate": 6.114842175957019e-06, |
|
"loss": 0.0009, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.4916308522224426, |
|
"learning_rate": 6.110644728005374e-06, |
|
"loss": 0.0042, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.019028645008802414, |
|
"learning_rate": 6.106447280053727e-06, |
|
"loss": 0.0019, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.143443763256073, |
|
"learning_rate": 6.102249832102083e-06, |
|
"loss": 0.0007, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.017074372619390488, |
|
"learning_rate": 6.098052384150437e-06, |
|
"loss": 0.001, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.0011280884500592947, |
|
"learning_rate": 6.093854936198792e-06, |
|
"loss": 0.0045, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.015283357352018356, |
|
"learning_rate": 6.089657488247146e-06, |
|
"loss": 0.0007, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.015474602580070496, |
|
"learning_rate": 6.085460040295501e-06, |
|
"loss": 0.0005, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.0023845334071666002, |
|
"learning_rate": 6.081262592343855e-06, |
|
"loss": 0.0004, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.40035849809646606, |
|
"learning_rate": 6.0770651443922106e-06, |
|
"loss": 0.002, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.015063658356666565, |
|
"learning_rate": 6.072867696440564e-06, |
|
"loss": 0.0004, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.10512324422597885, |
|
"learning_rate": 6.068670248488919e-06, |
|
"loss": 0.0054, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.018911171704530716, |
|
"learning_rate": 6.064472800537274e-06, |
|
"loss": 0.0005, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.01821274310350418, |
|
"learning_rate": 6.060275352585628e-06, |
|
"loss": 0.0019, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.006468980107456446, |
|
"learning_rate": 6.056077904633984e-06, |
|
"loss": 0.0011, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.0246189646422863, |
|
"learning_rate": 6.051880456682337e-06, |
|
"loss": 0.0003, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.030287379398941994, |
|
"learning_rate": 6.047683008730693e-06, |
|
"loss": 0.0003, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.14614807069301605, |
|
"learning_rate": 6.043485560779047e-06, |
|
"loss": 0.0007, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.03680308535695076, |
|
"learning_rate": 6.039288112827401e-06, |
|
"loss": 0.0007, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.020408429205417633, |
|
"learning_rate": 6.035090664875756e-06, |
|
"loss": 0.0042, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.12720835208892822, |
|
"learning_rate": 6.0308932169241105e-06, |
|
"loss": 0.0008, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.5571761727333069, |
|
"learning_rate": 6.026695768972465e-06, |
|
"loss": 0.0069, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.287207692861557, |
|
"learning_rate": 6.02249832102082e-06, |
|
"loss": 0.0011, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.19260409474372864, |
|
"learning_rate": 6.018300873069175e-06, |
|
"loss": 0.0012, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.5683563351631165, |
|
"learning_rate": 6.014103425117529e-06, |
|
"loss": 0.0025, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.0048590730875730515, |
|
"learning_rate": 6.0099059771658844e-06, |
|
"loss": 0.0016, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.03889940306544304, |
|
"learning_rate": 6.005708529214238e-06, |
|
"loss": 0.0019, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.09073323756456375, |
|
"learning_rate": 6.001511081262593e-06, |
|
"loss": 0.0041, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.5227171182632446, |
|
"learning_rate": 5.9973136333109474e-06, |
|
"loss": 0.0047, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.054277870804071426, |
|
"learning_rate": 5.993116185359302e-06, |
|
"loss": 0.0007, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.0757797583937645, |
|
"learning_rate": 5.988918737407657e-06, |
|
"loss": 0.0008, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.022691043093800545, |
|
"learning_rate": 5.984721289456011e-06, |
|
"loss": 0.0007, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.010830945335328579, |
|
"learning_rate": 5.980523841504365e-06, |
|
"loss": 0.0004, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.07802291959524155, |
|
"learning_rate": 5.9763263935527205e-06, |
|
"loss": 0.002, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.03440363332629204, |
|
"learning_rate": 5.972128945601076e-06, |
|
"loss": 0.0009, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.01543488446623087, |
|
"learning_rate": 5.96793149764943e-06, |
|
"loss": 0.0009, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.014080232009291649, |
|
"learning_rate": 5.963734049697784e-06, |
|
"loss": 0.0008, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.43045684695243835, |
|
"learning_rate": 5.959536601746139e-06, |
|
"loss": 0.0008, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.006179173942655325, |
|
"learning_rate": 5.955339153794494e-06, |
|
"loss": 0.0011, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.0018634877633303404, |
|
"learning_rate": 5.951141705842847e-06, |
|
"loss": 0.0002, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.04732802137732506, |
|
"learning_rate": 5.946944257891203e-06, |
|
"loss": 0.0068, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.028062958270311356, |
|
"learning_rate": 5.942746809939557e-06, |
|
"loss": 0.0021, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.013063440099358559, |
|
"learning_rate": 5.938549361987912e-06, |
|
"loss": 0.0007, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.008527678437530994, |
|
"learning_rate": 5.934351914036266e-06, |
|
"loss": 0.0003, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.006061127409338951, |
|
"learning_rate": 5.930154466084621e-06, |
|
"loss": 0.0005, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.05889621749520302, |
|
"learning_rate": 5.925957018132975e-06, |
|
"loss": 0.0008, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.025425154715776443, |
|
"learning_rate": 5.9217595701813306e-06, |
|
"loss": 0.0009, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.12469867616891861, |
|
"learning_rate": 5.917562122229685e-06, |
|
"loss": 0.0018, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.0440966971218586, |
|
"learning_rate": 5.913364674278039e-06, |
|
"loss": 0.0014, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.0034553324803709984, |
|
"learning_rate": 5.909167226326394e-06, |
|
"loss": 0.0014, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.010510810650885105, |
|
"learning_rate": 5.904969778374748e-06, |
|
"loss": 0.0006, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.01508377306163311, |
|
"learning_rate": 5.900772330423104e-06, |
|
"loss": 0.0026, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.001469660666771233, |
|
"learning_rate": 5.8965748824714574e-06, |
|
"loss": 0.0012, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.037131939083337784, |
|
"learning_rate": 5.892377434519813e-06, |
|
"loss": 0.0002, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.00974042434245348, |
|
"learning_rate": 5.888179986568167e-06, |
|
"loss": 0.0003, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.15985484421253204, |
|
"learning_rate": 5.883982538616521e-06, |
|
"loss": 0.0004, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.17923088371753693, |
|
"learning_rate": 5.879785090664876e-06, |
|
"loss": 0.0004, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.7428094148635864, |
|
"learning_rate": 5.8755876427132305e-06, |
|
"loss": 0.0025, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.05475035682320595, |
|
"learning_rate": 5.871390194761586e-06, |
|
"loss": 0.0003, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.0039265332743525505, |
|
"learning_rate": 5.86719274680994e-06, |
|
"loss": 0.0039, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.02810598909854889, |
|
"learning_rate": 5.862995298858295e-06, |
|
"loss": 0.0017, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.034790728241205215, |
|
"learning_rate": 5.858797850906649e-06, |
|
"loss": 0.0009, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.17595210671424866, |
|
"learning_rate": 5.8546004029550045e-06, |
|
"loss": 0.0006, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.12365200370550156, |
|
"learning_rate": 5.850402955003358e-06, |
|
"loss": 0.0027, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.025172580033540726, |
|
"learning_rate": 5.846205507051713e-06, |
|
"loss": 0.0034, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.03391372412443161, |
|
"learning_rate": 5.8420080591000675e-06, |
|
"loss": 0.0022, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.0013367092469707131, |
|
"learning_rate": 5.837810611148422e-06, |
|
"loss": 0.0009, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.2373090237379074, |
|
"learning_rate": 5.833613163196777e-06, |
|
"loss": 0.0013, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.0014829274732619524, |
|
"learning_rate": 5.829415715245131e-06, |
|
"loss": 0.0004, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.02379409596323967, |
|
"learning_rate": 5.825218267293487e-06, |
|
"loss": 0.0007, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.01043054461479187, |
|
"learning_rate": 5.8210208193418405e-06, |
|
"loss": 0.0009, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.5758562088012695, |
|
"learning_rate": 5.816823371390196e-06, |
|
"loss": 0.0076, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.003396995598450303, |
|
"learning_rate": 5.81262592343855e-06, |
|
"loss": 0.0005, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.004527990706264973, |
|
"learning_rate": 5.808428475486904e-06, |
|
"loss": 0.0003, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.38935422897338867, |
|
"learning_rate": 5.804231027535259e-06, |
|
"loss": 0.0021, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.007596536539494991, |
|
"learning_rate": 5.800033579583614e-06, |
|
"loss": 0.0026, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.034029822796583176, |
|
"learning_rate": 5.795836131631967e-06, |
|
"loss": 0.0004, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.8351893424987793, |
|
"learning_rate": 5.791638683680323e-06, |
|
"loss": 0.0024, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.007966914214193821, |
|
"learning_rate": 5.787441235728677e-06, |
|
"loss": 0.0009, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.039692457765340805, |
|
"learning_rate": 5.783243787777032e-06, |
|
"loss": 0.0011, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.7721789479255676, |
|
"learning_rate": 5.779046339825387e-06, |
|
"loss": 0.0019, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.01767701469361782, |
|
"learning_rate": 5.774848891873741e-06, |
|
"loss": 0.0008, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.012562219053506851, |
|
"learning_rate": 5.770651443922096e-06, |
|
"loss": 0.0015, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.34681907296180725, |
|
"learning_rate": 5.766453995970451e-06, |
|
"loss": 0.0045, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.0029207984916865826, |
|
"learning_rate": 5.762256548018805e-06, |
|
"loss": 0.0017, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.018310122191905975, |
|
"learning_rate": 5.758059100067159e-06, |
|
"loss": 0.0015, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.06843991577625275, |
|
"learning_rate": 5.7538616521155144e-06, |
|
"loss": 0.0048, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.03804745152592659, |
|
"learning_rate": 5.749664204163868e-06, |
|
"loss": 0.0025, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.011987677775323391, |
|
"learning_rate": 5.745466756212224e-06, |
|
"loss": 0.0023, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.0005486512091010809, |
|
"learning_rate": 5.7412693082605774e-06, |
|
"loss": 0.0008, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.12931697070598602, |
|
"learning_rate": 5.737071860308933e-06, |
|
"loss": 0.0016, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.014491924084722996, |
|
"learning_rate": 5.7328744123572875e-06, |
|
"loss": 0.0006, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.008687620051205158, |
|
"learning_rate": 5.728676964405641e-06, |
|
"loss": 0.0007, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.015420785173773766, |
|
"learning_rate": 5.724479516453997e-06, |
|
"loss": 0.0002, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.0028673780616372824, |
|
"learning_rate": 5.7202820685023505e-06, |
|
"loss": 0.0005, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.014829892665147781, |
|
"learning_rate": 5.716084620550706e-06, |
|
"loss": 0.0002, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.051586270332336426, |
|
"learning_rate": 5.71188717259906e-06, |
|
"loss": 0.004, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.0063542029820382595, |
|
"learning_rate": 5.707689724647415e-06, |
|
"loss": 0.0036, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.014711436815559864, |
|
"learning_rate": 5.703492276695769e-06, |
|
"loss": 0.0021, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.013807760551571846, |
|
"learning_rate": 5.6992948287441245e-06, |
|
"loss": 0.0004, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.024736396968364716, |
|
"learning_rate": 5.695097380792478e-06, |
|
"loss": 0.0003, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.6258419752120972, |
|
"learning_rate": 5.690899932840833e-06, |
|
"loss": 0.0026, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.003710240125656128, |
|
"learning_rate": 5.686702484889188e-06, |
|
"loss": 0.0035, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.653317391872406, |
|
"learning_rate": 5.682505036937542e-06, |
|
"loss": 0.0013, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.02590692788362503, |
|
"learning_rate": 5.6783075889858976e-06, |
|
"loss": 0.0018, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.006416289601475, |
|
"learning_rate": 5.674110141034251e-06, |
|
"loss": 0.0011, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.02078012377023697, |
|
"learning_rate": 5.669912693082607e-06, |
|
"loss": 0.0004, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.011740882880985737, |
|
"learning_rate": 5.6657152451309606e-06, |
|
"loss": 0.0013, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.02781371772289276, |
|
"learning_rate": 5.661517797179315e-06, |
|
"loss": 0.0018, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.011912044137716293, |
|
"learning_rate": 5.65732034922767e-06, |
|
"loss": 0.0008, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.11006138473749161, |
|
"learning_rate": 5.653122901276024e-06, |
|
"loss": 0.0016, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.03869205340743065, |
|
"learning_rate": 5.648925453324379e-06, |
|
"loss": 0.0008, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.07503340393304825, |
|
"learning_rate": 5.644728005372734e-06, |
|
"loss": 0.0014, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.001265190658159554, |
|
"learning_rate": 5.640530557421089e-06, |
|
"loss": 0.0003, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.011775599792599678, |
|
"learning_rate": 5.636333109469443e-06, |
|
"loss": 0.0006, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.11629336327314377, |
|
"learning_rate": 5.632135661517798e-06, |
|
"loss": 0.0006, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.0015451526269316673, |
|
"learning_rate": 5.627938213566152e-06, |
|
"loss": 0.0038, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.04855529963970184, |
|
"learning_rate": 5.623740765614507e-06, |
|
"loss": 0.0004, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.01069630403071642, |
|
"learning_rate": 5.619543317662861e-06, |
|
"loss": 0.0006, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.004674754571169615, |
|
"learning_rate": 5.615345869711216e-06, |
|
"loss": 0.0012, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.025356121361255646, |
|
"learning_rate": 5.611148421759571e-06, |
|
"loss": 0.0006, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.02232622541487217, |
|
"learning_rate": 5.606950973807925e-06, |
|
"loss": 0.0013, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.497832328081131, |
|
"learning_rate": 5.602753525856279e-06, |
|
"loss": 0.0016, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.057482216507196426, |
|
"learning_rate": 5.5985560779046344e-06, |
|
"loss": 0.0005, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.003028752515092492, |
|
"learning_rate": 5.59435862995299e-06, |
|
"loss": 0.0004, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.02643987536430359, |
|
"learning_rate": 5.590161182001344e-06, |
|
"loss": 0.0014, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.02531200833618641, |
|
"learning_rate": 5.585963734049698e-06, |
|
"loss": 0.0005, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.04974319785833359, |
|
"learning_rate": 5.581766286098053e-06, |
|
"loss": 0.0008, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.005935164168477058, |
|
"learning_rate": 5.5775688381464075e-06, |
|
"loss": 0.0003, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.00849095918238163, |
|
"learning_rate": 5.573371390194761e-06, |
|
"loss": 0.0003, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.0163765549659729, |
|
"learning_rate": 5.569173942243117e-06, |
|
"loss": 0.0014, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.003922377247363329, |
|
"learning_rate": 5.5649764942914705e-06, |
|
"loss": 0.0017, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.008663144893944263, |
|
"learning_rate": 5.560779046339826e-06, |
|
"loss": 0.0005, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.042090997099876404, |
|
"learning_rate": 5.55658159838818e-06, |
|
"loss": 0.0011, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.018130453303456306, |
|
"learning_rate": 5.552384150436535e-06, |
|
"loss": 0.0002, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.02145092561841011, |
|
"learning_rate": 5.54818670248489e-06, |
|
"loss": 0.0005, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.016469236463308334, |
|
"learning_rate": 5.5439892545332445e-06, |
|
"loss": 0.0015, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.07116610556840897, |
|
"learning_rate": 5.539791806581599e-06, |
|
"loss": 0.0003, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.003410005010664463, |
|
"learning_rate": 5.535594358629953e-06, |
|
"loss": 0.0005, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.0010806163772940636, |
|
"learning_rate": 5.531396910678308e-06, |
|
"loss": 0.0017, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.0015185626689344645, |
|
"learning_rate": 5.527199462726662e-06, |
|
"loss": 0.0006, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.016374943777918816, |
|
"learning_rate": 5.5230020147750176e-06, |
|
"loss": 0.0094, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.00241836323402822, |
|
"learning_rate": 5.518804566823371e-06, |
|
"loss": 0.0004, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.03313888981938362, |
|
"learning_rate": 5.514607118871727e-06, |
|
"loss": 0.0023, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.1092633306980133, |
|
"learning_rate": 5.5104096709200806e-06, |
|
"loss": 0.0002, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.03263653814792633, |
|
"learning_rate": 5.506212222968435e-06, |
|
"loss": 0.0069, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.05272672697901726, |
|
"learning_rate": 5.502014775016791e-06, |
|
"loss": 0.0004, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.007287841290235519, |
|
"learning_rate": 5.4978173270651444e-06, |
|
"loss": 0.0019, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.006436245981603861, |
|
"learning_rate": 5.4936198791135e-06, |
|
"loss": 0.0042, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.02231592871248722, |
|
"learning_rate": 5.489422431161854e-06, |
|
"loss": 0.0023, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.013109694235026836, |
|
"learning_rate": 5.485224983210209e-06, |
|
"loss": 0.0044, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.0014881811803206801, |
|
"learning_rate": 5.481027535258563e-06, |
|
"loss": 0.0074, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.002896430203691125, |
|
"learning_rate": 5.476830087306918e-06, |
|
"loss": 0.0012, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.009817220270633698, |
|
"learning_rate": 5.472632639355272e-06, |
|
"loss": 0.0028, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.013071718625724316, |
|
"learning_rate": 5.468435191403627e-06, |
|
"loss": 0.0005, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.14298699796199799, |
|
"learning_rate": 5.464237743451981e-06, |
|
"loss": 0.0003, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.0034705400466918945, |
|
"learning_rate": 5.460040295500336e-06, |
|
"loss": 0.0019, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.028280634433031082, |
|
"learning_rate": 5.4558428475486915e-06, |
|
"loss": 0.0009, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.0049716150388121605, |
|
"learning_rate": 5.451645399597045e-06, |
|
"loss": 0.0003, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.06866387277841568, |
|
"learning_rate": 5.447447951645401e-06, |
|
"loss": 0.0055, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.001268894993700087, |
|
"learning_rate": 5.4432505036937545e-06, |
|
"loss": 0.0037, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.4983358085155487, |
|
"learning_rate": 5.43905305574211e-06, |
|
"loss": 0.0123, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.07618770748376846, |
|
"learning_rate": 5.434855607790464e-06, |
|
"loss": 0.0013, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.054555345326662064, |
|
"learning_rate": 5.430658159838818e-06, |
|
"loss": 0.0014, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.005016769748181105, |
|
"learning_rate": 5.426460711887173e-06, |
|
"loss": 0.0015, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.02383585087954998, |
|
"learning_rate": 5.4222632639355275e-06, |
|
"loss": 0.0004, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.032023441046476364, |
|
"learning_rate": 5.418065815983881e-06, |
|
"loss": 0.0009, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.020678533241152763, |
|
"learning_rate": 5.413868368032237e-06, |
|
"loss": 0.0019, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.04738430306315422, |
|
"learning_rate": 5.409670920080592e-06, |
|
"loss": 0.0008, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.10293067991733551, |
|
"learning_rate": 5.405473472128946e-06, |
|
"loss": 0.001, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.006902909372001886, |
|
"learning_rate": 5.401276024177301e-06, |
|
"loss": 0.0021, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.09025087207555771, |
|
"learning_rate": 5.397078576225655e-06, |
|
"loss": 0.0015, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.5670678615570068, |
|
"learning_rate": 5.39288112827401e-06, |
|
"loss": 0.0032, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.08049952238798141, |
|
"learning_rate": 5.3886836803223645e-06, |
|
"loss": 0.0011, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.009549125097692013, |
|
"learning_rate": 5.384486232370719e-06, |
|
"loss": 0.0005, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.06387516856193542, |
|
"learning_rate": 5.380288784419073e-06, |
|
"loss": 0.0004, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.00499248132109642, |
|
"learning_rate": 5.376091336467428e-06, |
|
"loss": 0.001, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.04614701494574547, |
|
"learning_rate": 5.371893888515782e-06, |
|
"loss": 0.001, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.2745211124420166, |
|
"learning_rate": 5.367696440564138e-06, |
|
"loss": 0.0042, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.1052335873246193, |
|
"learning_rate": 5.363498992612492e-06, |
|
"loss": 0.0008, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.025874366983771324, |
|
"learning_rate": 5.359301544660847e-06, |
|
"loss": 0.0022, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.28177767992019653, |
|
"learning_rate": 5.3551040967092014e-06, |
|
"loss": 0.0015, |
|
"step": 11070 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.018476691097021103, |
|
"learning_rate": 5.350906648757555e-06, |
|
"loss": 0.0003, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.015466156415641308, |
|
"learning_rate": 5.346709200805911e-06, |
|
"loss": 0.0009, |
|
"step": 11090 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.5496931076049805, |
|
"learning_rate": 5.3425117528542644e-06, |
|
"loss": 0.0071, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.005472448188811541, |
|
"learning_rate": 5.33831430490262e-06, |
|
"loss": 0.0004, |
|
"step": 11110 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.02211419679224491, |
|
"learning_rate": 5.334116856950974e-06, |
|
"loss": 0.0007, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.008153458125889301, |
|
"learning_rate": 5.329919408999329e-06, |
|
"loss": 0.0005, |
|
"step": 11130 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.04427899047732353, |
|
"learning_rate": 5.325721961047683e-06, |
|
"loss": 0.0006, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.15745842456817627, |
|
"learning_rate": 5.321524513096038e-06, |
|
"loss": 0.0013, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.10899525135755539, |
|
"learning_rate": 5.317327065144393e-06, |
|
"loss": 0.0025, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.003296980634331703, |
|
"learning_rate": 5.313129617192747e-06, |
|
"loss": 0.0092, |
|
"step": 11170 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.00297130411490798, |
|
"learning_rate": 5.308932169241102e-06, |
|
"loss": 0.0005, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.01586499996483326, |
|
"learning_rate": 5.304734721289456e-06, |
|
"loss": 0.0009, |
|
"step": 11190 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.03256227821111679, |
|
"learning_rate": 5.3005372733378115e-06, |
|
"loss": 0.0008, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.015256262384355068, |
|
"learning_rate": 5.296339825386165e-06, |
|
"loss": 0.0018, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.012666000984609127, |
|
"learning_rate": 5.292142377434521e-06, |
|
"loss": 0.0049, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.009229323826730251, |
|
"learning_rate": 5.2879449294828745e-06, |
|
"loss": 0.0002, |
|
"step": 11230 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.07503992319107056, |
|
"learning_rate": 5.28374748153123e-06, |
|
"loss": 0.0006, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.07024108618497849, |
|
"learning_rate": 5.279550033579584e-06, |
|
"loss": 0.0009, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.0038027705159038305, |
|
"learning_rate": 5.275352585627938e-06, |
|
"loss": 0.0006, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.006678453180938959, |
|
"learning_rate": 5.271155137676294e-06, |
|
"loss": 0.0008, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.11047659069299698, |
|
"learning_rate": 5.2669576897246476e-06, |
|
"loss": 0.0011, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.04119430482387543, |
|
"learning_rate": 5.262760241773003e-06, |
|
"loss": 0.0004, |
|
"step": 11290 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.0067368606105446815, |
|
"learning_rate": 5.258562793821357e-06, |
|
"loss": 0.0011, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.08865031599998474, |
|
"learning_rate": 5.254365345869712e-06, |
|
"loss": 0.0028, |
|
"step": 11310 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.8145259618759155, |
|
"learning_rate": 5.250167897918066e-06, |
|
"loss": 0.0024, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.10966548323631287, |
|
"learning_rate": 5.245970449966421e-06, |
|
"loss": 0.0013, |
|
"step": 11330 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.31640541553497314, |
|
"learning_rate": 5.241773002014775e-06, |
|
"loss": 0.0023, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.01933100074529648, |
|
"learning_rate": 5.23757555406313e-06, |
|
"loss": 0.0006, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.04006137698888779, |
|
"learning_rate": 5.2333781061114845e-06, |
|
"loss": 0.0004, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.01764124073088169, |
|
"learning_rate": 5.229180658159839e-06, |
|
"loss": 0.0029, |
|
"step": 11370 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.02364155650138855, |
|
"learning_rate": 5.224983210208195e-06, |
|
"loss": 0.0004, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.046450551599264145, |
|
"learning_rate": 5.220785762256548e-06, |
|
"loss": 0.0015, |
|
"step": 11390 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.802649736404419, |
|
"learning_rate": 5.216588314304904e-06, |
|
"loss": 0.0024, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.0018641554052010179, |
|
"learning_rate": 5.212390866353258e-06, |
|
"loss": 0.0002, |
|
"step": 11410 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.027689019218087196, |
|
"learning_rate": 5.208193418401612e-06, |
|
"loss": 0.0014, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.014141437597572803, |
|
"learning_rate": 5.203995970449967e-06, |
|
"loss": 0.0014, |
|
"step": 11430 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.04088863730430603, |
|
"learning_rate": 5.1997985224983214e-06, |
|
"loss": 0.0011, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.26688361167907715, |
|
"learning_rate": 5.195601074546675e-06, |
|
"loss": 0.0016, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.6439863443374634, |
|
"learning_rate": 5.191403626595031e-06, |
|
"loss": 0.0022, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.009749379009008408, |
|
"learning_rate": 5.1872061786433845e-06, |
|
"loss": 0.0014, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.04378553479909897, |
|
"learning_rate": 5.18300873069174e-06, |
|
"loss": 0.001, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.01073415856808424, |
|
"learning_rate": 5.1788112827400945e-06, |
|
"loss": 0.001, |
|
"step": 11490 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.0023095139767974615, |
|
"learning_rate": 5.174613834788449e-06, |
|
"loss": 0.0004, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.05889497324824333, |
|
"learning_rate": 5.170416386836804e-06, |
|
"loss": 0.0005, |
|
"step": 11510 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.018245402723550797, |
|
"learning_rate": 5.166218938885158e-06, |
|
"loss": 0.0006, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.18495741486549377, |
|
"learning_rate": 5.162021490933513e-06, |
|
"loss": 0.0005, |
|
"step": 11530 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.002312073251232505, |
|
"learning_rate": 5.157824042981867e-06, |
|
"loss": 0.0005, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.09305838495492935, |
|
"learning_rate": 5.153626595030222e-06, |
|
"loss": 0.0053, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.11583968251943588, |
|
"learning_rate": 5.149429147078576e-06, |
|
"loss": 0.0057, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.11358506232500076, |
|
"learning_rate": 5.1452316991269315e-06, |
|
"loss": 0.0004, |
|
"step": 11570 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.49590811133384705, |
|
"learning_rate": 5.141034251175285e-06, |
|
"loss": 0.0028, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.016838036477565765, |
|
"learning_rate": 5.136836803223641e-06, |
|
"loss": 0.0007, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.003618579125031829, |
|
"learning_rate": 5.132639355271995e-06, |
|
"loss": 0.0005, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.00428624777123332, |
|
"learning_rate": 5.128441907320349e-06, |
|
"loss": 0.002, |
|
"step": 11610 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.08151665329933167, |
|
"learning_rate": 5.1242444593687046e-06, |
|
"loss": 0.0013, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.006494326516985893, |
|
"learning_rate": 5.120047011417058e-06, |
|
"loss": 0.0011, |
|
"step": 11630 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.005522624123841524, |
|
"learning_rate": 5.115849563465414e-06, |
|
"loss": 0.0045, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.0026271094102412462, |
|
"learning_rate": 5.1116521155137676e-06, |
|
"loss": 0.0023, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.026925839483737946, |
|
"learning_rate": 5.107454667562123e-06, |
|
"loss": 0.0008, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.019350741058588028, |
|
"learning_rate": 5.103257219610477e-06, |
|
"loss": 0.0006, |
|
"step": 11670 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.02016269974410534, |
|
"learning_rate": 5.099059771658832e-06, |
|
"loss": 0.0063, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.07899802923202515, |
|
"learning_rate": 5.094862323707186e-06, |
|
"loss": 0.0009, |
|
"step": 11690 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.006061141844838858, |
|
"learning_rate": 5.090664875755541e-06, |
|
"loss": 0.0012, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.0007106554112397134, |
|
"learning_rate": 5.086467427803896e-06, |
|
"loss": 0.0027, |
|
"step": 11710 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.08793941885232925, |
|
"learning_rate": 5.08226997985225e-06, |
|
"loss": 0.0011, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.009847030974924564, |
|
"learning_rate": 5.078072531900605e-06, |
|
"loss": 0.0017, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.1629151850938797, |
|
"learning_rate": 5.073875083948959e-06, |
|
"loss": 0.0007, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.008842062205076218, |
|
"learning_rate": 5.069677635997315e-06, |
|
"loss": 0.0036, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.0026031406596302986, |
|
"learning_rate": 5.065480188045668e-06, |
|
"loss": 0.0005, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.00415264954790473, |
|
"learning_rate": 5.061282740094024e-06, |
|
"loss": 0.0013, |
|
"step": 11770 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.007113313302397728, |
|
"learning_rate": 5.057085292142378e-06, |
|
"loss": 0.0007, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.024390025064349174, |
|
"learning_rate": 5.052887844190732e-06, |
|
"loss": 0.0012, |
|
"step": 11790 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.0008230148232541978, |
|
"learning_rate": 5.048690396239087e-06, |
|
"loss": 0.0004, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.005063881631940603, |
|
"learning_rate": 5.0444929482874415e-06, |
|
"loss": 0.0004, |
|
"step": 11810 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.04501771554350853, |
|
"learning_rate": 5.040295500335797e-06, |
|
"loss": 0.0065, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.007200128864496946, |
|
"learning_rate": 5.036098052384151e-06, |
|
"loss": 0.0054, |
|
"step": 11830 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.33267006278038025, |
|
"learning_rate": 5.031900604432506e-06, |
|
"loss": 0.0039, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.21112149953842163, |
|
"learning_rate": 5.02770315648086e-06, |
|
"loss": 0.0006, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.003047073259949684, |
|
"learning_rate": 5.0235057085292145e-06, |
|
"loss": 0.0003, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.056417226791381836, |
|
"learning_rate": 5.019308260577569e-06, |
|
"loss": 0.0004, |
|
"step": 11870 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.0030665055382996798, |
|
"learning_rate": 5.015110812625924e-06, |
|
"loss": 0.0007, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.021434858441352844, |
|
"learning_rate": 5.010913364674278e-06, |
|
"loss": 0.0013, |
|
"step": 11890 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.13436031341552734, |
|
"learning_rate": 5.006715916722633e-06, |
|
"loss": 0.0004, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.012255342677235603, |
|
"learning_rate": 5.002518468770987e-06, |
|
"loss": 0.0025, |
|
"step": 11910 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.01314691361039877, |
|
"learning_rate": 4.998321020819342e-06, |
|
"loss": 0.0006, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.17141364514827728, |
|
"learning_rate": 4.994123572867697e-06, |
|
"loss": 0.0015, |
|
"step": 11930 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.06440167874097824, |
|
"learning_rate": 4.9899261249160515e-06, |
|
"loss": 0.0004, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.003272134577855468, |
|
"learning_rate": 4.985728676964406e-06, |
|
"loss": 0.0026, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.10054084658622742, |
|
"learning_rate": 4.981531229012761e-06, |
|
"loss": 0.0003, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.05302809551358223, |
|
"learning_rate": 4.977333781061115e-06, |
|
"loss": 0.0007, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.023061759769916534, |
|
"learning_rate": 4.97313633310947e-06, |
|
"loss": 0.0026, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.038504019379615784, |
|
"learning_rate": 4.968938885157825e-06, |
|
"loss": 0.0026, |
|
"step": 11990 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.0051516154780983925, |
|
"learning_rate": 4.964741437206179e-06, |
|
"loss": 0.0003, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.05448487401008606, |
|
"learning_rate": 4.960543989254534e-06, |
|
"loss": 0.001, |
|
"step": 12010 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.048447802662849426, |
|
"learning_rate": 4.9563465413028884e-06, |
|
"loss": 0.0003, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.14971762895584106, |
|
"learning_rate": 4.952149093351243e-06, |
|
"loss": 0.0004, |
|
"step": 12030 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.03111051581799984, |
|
"learning_rate": 4.947951645399598e-06, |
|
"loss": 0.0021, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.035793572664260864, |
|
"learning_rate": 4.943754197447952e-06, |
|
"loss": 0.0002, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.25612950325012207, |
|
"learning_rate": 4.939556749496306e-06, |
|
"loss": 0.0008, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.015790291130542755, |
|
"learning_rate": 4.935359301544661e-06, |
|
"loss": 0.004, |
|
"step": 12070 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.00603965250775218, |
|
"learning_rate": 4.931161853593015e-06, |
|
"loss": 0.0005, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.029880041256546974, |
|
"learning_rate": 4.926964405641371e-06, |
|
"loss": 0.0006, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.14440520107746124, |
|
"learning_rate": 4.922766957689725e-06, |
|
"loss": 0.0005, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.006907114293426275, |
|
"learning_rate": 4.91856950973808e-06, |
|
"loss": 0.0011, |
|
"step": 12110 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.08127205818891525, |
|
"learning_rate": 4.914372061786435e-06, |
|
"loss": 0.0013, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.8747881650924683, |
|
"learning_rate": 4.910174613834789e-06, |
|
"loss": 0.0015, |
|
"step": 12130 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.0023702913895249367, |
|
"learning_rate": 4.905977165883144e-06, |
|
"loss": 0.0006, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.0015660161152482033, |
|
"learning_rate": 4.901779717931498e-06, |
|
"loss": 0.0008, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.016091827303171158, |
|
"learning_rate": 4.897582269979852e-06, |
|
"loss": 0.0006, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.16924422979354858, |
|
"learning_rate": 4.893384822028207e-06, |
|
"loss": 0.0035, |
|
"step": 12170 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.0360410138964653, |
|
"learning_rate": 4.8891873740765615e-06, |
|
"loss": 0.0012, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.2576335370540619, |
|
"learning_rate": 4.884989926124916e-06, |
|
"loss": 0.0007, |
|
"step": 12190 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.026088684797286987, |
|
"learning_rate": 4.8807924781732716e-06, |
|
"loss": 0.0004, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.030685720965266228, |
|
"learning_rate": 4.876595030221626e-06, |
|
"loss": 0.0003, |
|
"step": 12210 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.043232034891843796, |
|
"learning_rate": 4.872397582269981e-06, |
|
"loss": 0.0004, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.05359245836734772, |
|
"learning_rate": 4.8682001343183346e-06, |
|
"loss": 0.001, |
|
"step": 12230 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.0005476790247485042, |
|
"learning_rate": 4.864002686366689e-06, |
|
"loss": 0.0017, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.005588450003415346, |
|
"learning_rate": 4.859805238415044e-06, |
|
"loss": 0.001, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.07085375487804413, |
|
"learning_rate": 4.855607790463398e-06, |
|
"loss": 0.0011, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.012516853399574757, |
|
"learning_rate": 4.851410342511753e-06, |
|
"loss": 0.0025, |
|
"step": 12270 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.010629468597471714, |
|
"learning_rate": 4.847212894560108e-06, |
|
"loss": 0.0007, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.07004597038030624, |
|
"learning_rate": 4.843015446608462e-06, |
|
"loss": 0.0004, |
|
"step": 12290 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.009928259067237377, |
|
"learning_rate": 4.838817998656817e-06, |
|
"loss": 0.0055, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.05139904469251633, |
|
"learning_rate": 4.8346205507051715e-06, |
|
"loss": 0.0008, |
|
"step": 12310 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.00505171250551939, |
|
"learning_rate": 4.830423102753526e-06, |
|
"loss": 0.0019, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.22344523668289185, |
|
"learning_rate": 4.826225654801881e-06, |
|
"loss": 0.0101, |
|
"step": 12330 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.44625988602638245, |
|
"learning_rate": 4.822028206850235e-06, |
|
"loss": 0.002, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.003404056653380394, |
|
"learning_rate": 4.81783075889859e-06, |
|
"loss": 0.0003, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.05186540633440018, |
|
"learning_rate": 4.813633310946945e-06, |
|
"loss": 0.0011, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.012058926746249199, |
|
"learning_rate": 4.809435862995299e-06, |
|
"loss": 0.0003, |
|
"step": 12370 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.17031264305114746, |
|
"learning_rate": 4.805238415043654e-06, |
|
"loss": 0.0003, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.0036926474422216415, |
|
"learning_rate": 4.8010409670920084e-06, |
|
"loss": 0.0004, |
|
"step": 12390 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.0020336448214948177, |
|
"learning_rate": 4.796843519140363e-06, |
|
"loss": 0.0049, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.009088937193155289, |
|
"learning_rate": 4.792646071188718e-06, |
|
"loss": 0.0005, |
|
"step": 12410 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.012697056867182255, |
|
"learning_rate": 4.788448623237072e-06, |
|
"loss": 0.0016, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.05333826318383217, |
|
"learning_rate": 4.784251175285427e-06, |
|
"loss": 0.0014, |
|
"step": 12430 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.0574137419462204, |
|
"learning_rate": 4.7800537273337815e-06, |
|
"loss": 0.0007, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.08394545316696167, |
|
"learning_rate": 4.775856279382136e-06, |
|
"loss": 0.0035, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.014619513414800167, |
|
"learning_rate": 4.771658831430491e-06, |
|
"loss": 0.0006, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.010346817784011364, |
|
"learning_rate": 4.767461383478845e-06, |
|
"loss": 0.0004, |
|
"step": 12470 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.010911854915320873, |
|
"learning_rate": 4.7632639355272e-06, |
|
"loss": 0.0007, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.007937319576740265, |
|
"learning_rate": 4.759066487575555e-06, |
|
"loss": 0.0017, |
|
"step": 12490 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.004206392448395491, |
|
"learning_rate": 4.754869039623909e-06, |
|
"loss": 0.0004, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.004093706142157316, |
|
"learning_rate": 4.750671591672264e-06, |
|
"loss": 0.0002, |
|
"step": 12510 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.015210299752652645, |
|
"learning_rate": 4.746474143720618e-06, |
|
"loss": 0.0019, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.013090868480503559, |
|
"learning_rate": 4.742276695768973e-06, |
|
"loss": 0.0004, |
|
"step": 12530 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.006157224997878075, |
|
"learning_rate": 4.738079247817328e-06, |
|
"loss": 0.0003, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.027267277240753174, |
|
"learning_rate": 4.733881799865682e-06, |
|
"loss": 0.0053, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.28201332688331604, |
|
"learning_rate": 4.729684351914037e-06, |
|
"loss": 0.0005, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.015838222578167915, |
|
"learning_rate": 4.7254869039623916e-06, |
|
"loss": 0.0023, |
|
"step": 12570 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.3681650459766388, |
|
"learning_rate": 4.721289456010746e-06, |
|
"loss": 0.0014, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.0024659759365022182, |
|
"learning_rate": 4.717092008059101e-06, |
|
"loss": 0.0038, |
|
"step": 12590 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.024921996518969536, |
|
"learning_rate": 4.7128945601074546e-06, |
|
"loss": 0.0026, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.018606165423989296, |
|
"learning_rate": 4.708697112155809e-06, |
|
"loss": 0.0006, |
|
"step": 12610 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.0071179852820932865, |
|
"learning_rate": 4.704499664204164e-06, |
|
"loss": 0.0002, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.0048920102417469025, |
|
"learning_rate": 4.7003022162525184e-06, |
|
"loss": 0.001, |
|
"step": 12630 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.08117188513278961, |
|
"learning_rate": 4.696104768300874e-06, |
|
"loss": 0.0009, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.002124218037351966, |
|
"learning_rate": 4.6919073203492285e-06, |
|
"loss": 0.0008, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.013521471992135048, |
|
"learning_rate": 4.687709872397583e-06, |
|
"loss": 0.0026, |
|
"step": 12660 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.05654778704047203, |
|
"learning_rate": 4.683512424445938e-06, |
|
"loss": 0.0005, |
|
"step": 12670 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.007984794676303864, |
|
"learning_rate": 4.6793149764942915e-06, |
|
"loss": 0.0003, |
|
"step": 12680 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.0030824383720755577, |
|
"learning_rate": 4.675117528542646e-06, |
|
"loss": 0.0008, |
|
"step": 12690 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.0020946026779711246, |
|
"learning_rate": 4.670920080591001e-06, |
|
"loss": 0.0003, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.03582073748111725, |
|
"learning_rate": 4.666722632639355e-06, |
|
"loss": 0.0014, |
|
"step": 12710 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.02298971451818943, |
|
"learning_rate": 4.66252518468771e-06, |
|
"loss": 0.0021, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.028649525716900826, |
|
"learning_rate": 4.658327736736065e-06, |
|
"loss": 0.0002, |
|
"step": 12730 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.0038425384555011988, |
|
"learning_rate": 4.654130288784419e-06, |
|
"loss": 0.0009, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.0029490068554878235, |
|
"learning_rate": 4.649932840832775e-06, |
|
"loss": 0.002, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.022061439231038094, |
|
"learning_rate": 4.6457353928811285e-06, |
|
"loss": 0.0003, |
|
"step": 12760 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.001973432954400778, |
|
"learning_rate": 4.641537944929483e-06, |
|
"loss": 0.0011, |
|
"step": 12770 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.015841417014598846, |
|
"learning_rate": 4.637340496977838e-06, |
|
"loss": 0.0004, |
|
"step": 12780 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.005069794598966837, |
|
"learning_rate": 4.633143049026192e-06, |
|
"loss": 0.0002, |
|
"step": 12790 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.001450516632758081, |
|
"learning_rate": 4.628945601074547e-06, |
|
"loss": 0.0032, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.01164446771144867, |
|
"learning_rate": 4.6247481531229015e-06, |
|
"loss": 0.0012, |
|
"step": 12810 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.027409467846155167, |
|
"learning_rate": 4.620550705171256e-06, |
|
"loss": 0.001, |
|
"step": 12820 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.020864328369498253, |
|
"learning_rate": 4.616353257219611e-06, |
|
"loss": 0.0012, |
|
"step": 12830 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.0067404527217149734, |
|
"learning_rate": 4.612155809267965e-06, |
|
"loss": 0.0003, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.0015818218234926462, |
|
"learning_rate": 4.60795836131632e-06, |
|
"loss": 0.0013, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.010182135738432407, |
|
"learning_rate": 4.603760913364675e-06, |
|
"loss": 0.0007, |
|
"step": 12860 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.025032339617609978, |
|
"learning_rate": 4.599563465413029e-06, |
|
"loss": 0.0005, |
|
"step": 12870 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.10075455904006958, |
|
"learning_rate": 4.595785762256548e-06, |
|
"loss": 0.0025, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.03304919973015785, |
|
"learning_rate": 4.591588314304903e-06, |
|
"loss": 0.0021, |
|
"step": 12890 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.013808912597596645, |
|
"learning_rate": 4.587390866353258e-06, |
|
"loss": 0.0025, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.0035981941036880016, |
|
"learning_rate": 4.583193418401612e-06, |
|
"loss": 0.001, |
|
"step": 12910 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.037712134420871735, |
|
"learning_rate": 4.578995970449967e-06, |
|
"loss": 0.0012, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.03361295536160469, |
|
"learning_rate": 4.5747985224983215e-06, |
|
"loss": 0.0003, |
|
"step": 12930 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.016955135390162468, |
|
"learning_rate": 4.570601074546676e-06, |
|
"loss": 0.0005, |
|
"step": 12940 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.01527098473161459, |
|
"learning_rate": 4.566403626595031e-06, |
|
"loss": 0.0002, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.011272264644503593, |
|
"learning_rate": 4.562206178643385e-06, |
|
"loss": 0.0002, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.03093000315129757, |
|
"learning_rate": 4.55800873069174e-06, |
|
"loss": 0.0003, |
|
"step": 12970 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.019925663247704506, |
|
"learning_rate": 4.553811282740095e-06, |
|
"loss": 0.0008, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.0032857642509043217, |
|
"learning_rate": 4.549613834788449e-06, |
|
"loss": 0.0007, |
|
"step": 12990 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.006109967827796936, |
|
"learning_rate": 4.545416386836804e-06, |
|
"loss": 0.0014, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.007658987771719694, |
|
"learning_rate": 4.541218938885158e-06, |
|
"loss": 0.0003, |
|
"step": 13010 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.012553730048239231, |
|
"learning_rate": 4.537021490933513e-06, |
|
"loss": 0.0023, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.0013994296314194798, |
|
"learning_rate": 4.532824042981868e-06, |
|
"loss": 0.0008, |
|
"step": 13030 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.44866248965263367, |
|
"learning_rate": 4.528626595030222e-06, |
|
"loss": 0.0068, |
|
"step": 13040 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.04373318701982498, |
|
"learning_rate": 4.524429147078577e-06, |
|
"loss": 0.0004, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.00616934010758996, |
|
"learning_rate": 4.5202316991269315e-06, |
|
"loss": 0.0004, |
|
"step": 13060 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.12982237339019775, |
|
"learning_rate": 4.516034251175286e-06, |
|
"loss": 0.0005, |
|
"step": 13070 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.02142524905502796, |
|
"learning_rate": 4.511836803223641e-06, |
|
"loss": 0.0003, |
|
"step": 13080 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.054495833814144135, |
|
"learning_rate": 4.5076393552719945e-06, |
|
"loss": 0.0005, |
|
"step": 13090 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.2867399752140045, |
|
"learning_rate": 4.503441907320349e-06, |
|
"loss": 0.0008, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.024905776605010033, |
|
"learning_rate": 4.499244459368704e-06, |
|
"loss": 0.0008, |
|
"step": 13110 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.0018473687814548612, |
|
"learning_rate": 4.495047011417058e-06, |
|
"loss": 0.0003, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.1236455887556076, |
|
"learning_rate": 4.490849563465413e-06, |
|
"loss": 0.0046, |
|
"step": 13130 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.07682592421770096, |
|
"learning_rate": 4.4866521155137685e-06, |
|
"loss": 0.0006, |
|
"step": 13140 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.01288694143295288, |
|
"learning_rate": 4.482454667562123e-06, |
|
"loss": 0.0007, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.0026057243812829256, |
|
"learning_rate": 4.478257219610478e-06, |
|
"loss": 0.0002, |
|
"step": 13160 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.012556037865579128, |
|
"learning_rate": 4.4740597716588315e-06, |
|
"loss": 0.0023, |
|
"step": 13170 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.01807723566889763, |
|
"learning_rate": 4.469862323707186e-06, |
|
"loss": 0.0003, |
|
"step": 13180 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.02373344823718071, |
|
"learning_rate": 4.465664875755541e-06, |
|
"loss": 0.0015, |
|
"step": 13190 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.0012779162498191, |
|
"learning_rate": 4.461467427803895e-06, |
|
"loss": 0.0001, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.13116711378097534, |
|
"learning_rate": 4.45726997985225e-06, |
|
"loss": 0.0011, |
|
"step": 13210 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.0024926308542490005, |
|
"learning_rate": 4.4530725319006046e-06, |
|
"loss": 0.0011, |
|
"step": 13220 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.017771735787391663, |
|
"learning_rate": 4.448875083948959e-06, |
|
"loss": 0.0007, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.002527177333831787, |
|
"learning_rate": 4.444677635997314e-06, |
|
"loss": 0.0009, |
|
"step": 13240 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.11742949485778809, |
|
"learning_rate": 4.4404801880456684e-06, |
|
"loss": 0.0003, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.30960482358932495, |
|
"learning_rate": 4.436282740094023e-06, |
|
"loss": 0.0015, |
|
"step": 13260 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.039885230362415314, |
|
"learning_rate": 4.432085292142378e-06, |
|
"loss": 0.0005, |
|
"step": 13270 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.1734355092048645, |
|
"learning_rate": 4.427887844190732e-06, |
|
"loss": 0.0022, |
|
"step": 13280 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.003132781246677041, |
|
"learning_rate": 4.423690396239087e-06, |
|
"loss": 0.0053, |
|
"step": 13290 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.0005980546702630818, |
|
"learning_rate": 4.4194929482874415e-06, |
|
"loss": 0.0012, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.004592574667185545, |
|
"learning_rate": 4.415295500335796e-06, |
|
"loss": 0.0006, |
|
"step": 13310 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.031002620235085487, |
|
"learning_rate": 4.411098052384151e-06, |
|
"loss": 0.0012, |
|
"step": 13320 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.09958618134260178, |
|
"learning_rate": 4.406900604432505e-06, |
|
"loss": 0.0006, |
|
"step": 13330 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.06991326808929443, |
|
"learning_rate": 4.40270315648086e-06, |
|
"loss": 0.004, |
|
"step": 13340 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.26911312341690063, |
|
"learning_rate": 4.398505708529215e-06, |
|
"loss": 0.0007, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.35761767625808716, |
|
"learning_rate": 4.394308260577569e-06, |
|
"loss": 0.0023, |
|
"step": 13360 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.007256943266838789, |
|
"learning_rate": 4.390110812625924e-06, |
|
"loss": 0.0001, |
|
"step": 13370 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.16001386940479279, |
|
"learning_rate": 4.3859133646742785e-06, |
|
"loss": 0.0005, |
|
"step": 13380 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.01736878603696823, |
|
"learning_rate": 4.381715916722633e-06, |
|
"loss": 0.0003, |
|
"step": 13390 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.26081618666648865, |
|
"learning_rate": 4.377518468770988e-06, |
|
"loss": 0.001, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.0361749492585659, |
|
"learning_rate": 4.373321020819342e-06, |
|
"loss": 0.0003, |
|
"step": 13410 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.028411924839019775, |
|
"learning_rate": 4.369123572867697e-06, |
|
"loss": 0.0005, |
|
"step": 13420 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.02852339670062065, |
|
"learning_rate": 4.3649261249160515e-06, |
|
"loss": 0.001, |
|
"step": 13430 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.0012577788438647985, |
|
"learning_rate": 4.360728676964406e-06, |
|
"loss": 0.0015, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.23752494156360626, |
|
"learning_rate": 4.356531229012761e-06, |
|
"loss": 0.0006, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.3116104304790497, |
|
"learning_rate": 4.3523337810611146e-06, |
|
"loss": 0.0012, |
|
"step": 13460 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.0024859828408807516, |
|
"learning_rate": 4.34813633310947e-06, |
|
"loss": 0.0008, |
|
"step": 13470 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.004907153081148863, |
|
"learning_rate": 4.343938885157825e-06, |
|
"loss": 0.0007, |
|
"step": 13480 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.0010646579321473837, |
|
"learning_rate": 4.339741437206179e-06, |
|
"loss": 0.0027, |
|
"step": 13490 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.2371465414762497, |
|
"learning_rate": 4.335543989254534e-06, |
|
"loss": 0.0012, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.01433322299271822, |
|
"learning_rate": 4.3313465413028885e-06, |
|
"loss": 0.0015, |
|
"step": 13510 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.0013969101710245013, |
|
"learning_rate": 4.327149093351243e-06, |
|
"loss": 0.0008, |
|
"step": 13520 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.08360123634338379, |
|
"learning_rate": 4.322951645399598e-06, |
|
"loss": 0.0007, |
|
"step": 13530 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.009467093273997307, |
|
"learning_rate": 4.3187541974479515e-06, |
|
"loss": 0.0002, |
|
"step": 13540 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.07131925225257874, |
|
"learning_rate": 4.314556749496306e-06, |
|
"loss": 0.0003, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.00770066911354661, |
|
"learning_rate": 4.310359301544661e-06, |
|
"loss": 0.0008, |
|
"step": 13560 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.011390848085284233, |
|
"learning_rate": 4.306161853593015e-06, |
|
"loss": 0.0006, |
|
"step": 13570 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.02216522768139839, |
|
"learning_rate": 4.301964405641371e-06, |
|
"loss": 0.0012, |
|
"step": 13580 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.1769302636384964, |
|
"learning_rate": 4.2977669576897254e-06, |
|
"loss": 0.001, |
|
"step": 13590 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.06368517130613327, |
|
"learning_rate": 4.29356950973808e-06, |
|
"loss": 0.0004, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.022844741120934486, |
|
"learning_rate": 4.289372061786435e-06, |
|
"loss": 0.001, |
|
"step": 13610 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.009264915250241756, |
|
"learning_rate": 4.2851746138347884e-06, |
|
"loss": 0.0004, |
|
"step": 13620 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.004532854538410902, |
|
"learning_rate": 4.280977165883143e-06, |
|
"loss": 0.0011, |
|
"step": 13630 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.020466448739171028, |
|
"learning_rate": 4.276779717931498e-06, |
|
"loss": 0.0002, |
|
"step": 13640 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.0009050247026607394, |
|
"learning_rate": 4.272582269979852e-06, |
|
"loss": 0.0004, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.011387677863240242, |
|
"learning_rate": 4.268384822028207e-06, |
|
"loss": 0.0005, |
|
"step": 13660 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.047608114778995514, |
|
"learning_rate": 4.2641873740765615e-06, |
|
"loss": 0.0003, |
|
"step": 13670 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.0906815230846405, |
|
"learning_rate": 4.259989926124916e-06, |
|
"loss": 0.0009, |
|
"step": 13680 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.06214312091469765, |
|
"learning_rate": 4.255792478173272e-06, |
|
"loss": 0.003, |
|
"step": 13690 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.580539345741272, |
|
"learning_rate": 4.251595030221625e-06, |
|
"loss": 0.0018, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.08133112639188766, |
|
"learning_rate": 4.24739758226998e-06, |
|
"loss": 0.0008, |
|
"step": 13710 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.006161406170576811, |
|
"learning_rate": 4.243200134318335e-06, |
|
"loss": 0.0003, |
|
"step": 13720 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.11143741756677628, |
|
"learning_rate": 4.239002686366689e-06, |
|
"loss": 0.0004, |
|
"step": 13730 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.3503442406654358, |
|
"learning_rate": 4.234805238415044e-06, |
|
"loss": 0.005, |
|
"step": 13740 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.01972077041864395, |
|
"learning_rate": 4.2306077904633985e-06, |
|
"loss": 0.0002, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.022888071835041046, |
|
"learning_rate": 4.226410342511753e-06, |
|
"loss": 0.0005, |
|
"step": 13760 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.12486062943935394, |
|
"learning_rate": 4.222212894560108e-06, |
|
"loss": 0.0006, |
|
"step": 13770 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.043477680534124374, |
|
"learning_rate": 4.218015446608462e-06, |
|
"loss": 0.0049, |
|
"step": 13780 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.02344905212521553, |
|
"learning_rate": 4.213817998656817e-06, |
|
"loss": 0.0018, |
|
"step": 13790 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.13969235122203827, |
|
"learning_rate": 4.2096205507051716e-06, |
|
"loss": 0.0007, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.5832682847976685, |
|
"learning_rate": 4.205423102753526e-06, |
|
"loss": 0.0029, |
|
"step": 13810 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.004915925674140453, |
|
"learning_rate": 4.201225654801881e-06, |
|
"loss": 0.0007, |
|
"step": 13820 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.024911779910326004, |
|
"learning_rate": 4.197028206850235e-06, |
|
"loss": 0.0007, |
|
"step": 13830 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.0007527291309088469, |
|
"learning_rate": 4.19283075889859e-06, |
|
"loss": 0.0023, |
|
"step": 13840 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.008193068206310272, |
|
"learning_rate": 4.188633310946945e-06, |
|
"loss": 0.0006, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.10630033910274506, |
|
"learning_rate": 4.184435862995299e-06, |
|
"loss": 0.001, |
|
"step": 13860 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.053735826164484024, |
|
"learning_rate": 4.180238415043654e-06, |
|
"loss": 0.0013, |
|
"step": 13870 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.008053253404796124, |
|
"learning_rate": 4.1760409670920085e-06, |
|
"loss": 0.0011, |
|
"step": 13880 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.012578310444951057, |
|
"learning_rate": 4.171843519140363e-06, |
|
"loss": 0.0005, |
|
"step": 13890 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.07252337783575058, |
|
"learning_rate": 4.167646071188718e-06, |
|
"loss": 0.0005, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.004061713349074125, |
|
"learning_rate": 4.163448623237072e-06, |
|
"loss": 0.0016, |
|
"step": 13910 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.07049524784088135, |
|
"learning_rate": 4.159251175285427e-06, |
|
"loss": 0.0006, |
|
"step": 13920 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.00046717686927877367, |
|
"learning_rate": 4.155053727333782e-06, |
|
"loss": 0.0005, |
|
"step": 13930 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.021116163581609726, |
|
"learning_rate": 4.150856279382136e-06, |
|
"loss": 0.0017, |
|
"step": 13940 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.022102046757936478, |
|
"learning_rate": 4.146658831430491e-06, |
|
"loss": 0.0017, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.007913138717412949, |
|
"learning_rate": 4.1424613834788454e-06, |
|
"loss": 0.0049, |
|
"step": 13960 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.022254914045333862, |
|
"learning_rate": 4.1382639355272e-06, |
|
"loss": 0.0019, |
|
"step": 13970 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.01675882749259472, |
|
"learning_rate": 4.134066487575555e-06, |
|
"loss": 0.0004, |
|
"step": 13980 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.004890616983175278, |
|
"learning_rate": 4.1298690396239085e-06, |
|
"loss": 0.0005, |
|
"step": 13990 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 0.5471255779266357, |
|
"learning_rate": 4.125671591672263e-06, |
|
"loss": 0.0041, |
|
"step": 14000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 23824, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 1000, |
|
"total_flos": 0.0, |
|
"train_batch_size": 12, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|