|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 9705, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0030911901081916537, |
|
"grad_norm": 0.0021730465814471245, |
|
"learning_rate": 0.0009989696032972695, |
|
"loss": 1.1462, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0061823802163833074, |
|
"grad_norm": 0.0018489831127226353, |
|
"learning_rate": 0.000997939206594539, |
|
"loss": 1.0204, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.00927357032457496, |
|
"grad_norm": 0.0019520210335031152, |
|
"learning_rate": 0.0009969088098918083, |
|
"loss": 1.0607, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.012364760432766615, |
|
"grad_norm": 0.002070036716759205, |
|
"learning_rate": 0.0009958784131890777, |
|
"loss": 1.0284, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.015455950540958269, |
|
"grad_norm": 0.0020703268237411976, |
|
"learning_rate": 0.0009948480164863472, |
|
"loss": 1.0654, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01854714064914992, |
|
"grad_norm": 0.0019948079716414213, |
|
"learning_rate": 0.0009938176197836167, |
|
"loss": 1.0034, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.021638330757341576, |
|
"grad_norm": 0.002126147970557213, |
|
"learning_rate": 0.0009927872230808862, |
|
"loss": 1.0173, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02472952086553323, |
|
"grad_norm": 0.0017046661814674735, |
|
"learning_rate": 0.0009917568263781555, |
|
"loss": 1.0153, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.027820710973724884, |
|
"grad_norm": 0.0018810734618455172, |
|
"learning_rate": 0.000990726429675425, |
|
"loss": 0.9208, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.030911901081916538, |
|
"grad_norm": 0.0019539243075996637, |
|
"learning_rate": 0.0009896960329726944, |
|
"loss": 1.0072, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03400309119010819, |
|
"grad_norm": 0.0015656334580853581, |
|
"learning_rate": 0.000988665636269964, |
|
"loss": 0.9477, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03709428129829984, |
|
"grad_norm": 0.0018790484173223376, |
|
"learning_rate": 0.0009876352395672334, |
|
"loss": 0.9543, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0401854714064915, |
|
"grad_norm": 0.0014801452634856105, |
|
"learning_rate": 0.0009866048428645029, |
|
"loss": 0.9507, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04327666151468315, |
|
"grad_norm": 0.0023686159402132034, |
|
"learning_rate": 0.0009855744461617724, |
|
"loss": 0.9471, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04636785162287481, |
|
"grad_norm": 0.0018689304124563932, |
|
"learning_rate": 0.0009845440494590419, |
|
"loss": 0.9483, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04945904173106646, |
|
"grad_norm": 0.0017488012090325356, |
|
"learning_rate": 0.0009835136527563111, |
|
"loss": 0.954, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.05255023183925812, |
|
"grad_norm": 0.001690636039711535, |
|
"learning_rate": 0.0009824832560535806, |
|
"loss": 0.9277, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05564142194744977, |
|
"grad_norm": 0.0017418304923921824, |
|
"learning_rate": 0.00098145285935085, |
|
"loss": 0.9258, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05873261205564142, |
|
"grad_norm": 0.0014526835875585675, |
|
"learning_rate": 0.0009804224626481196, |
|
"loss": 0.9577, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.061823802163833076, |
|
"grad_norm": 0.002275065751746297, |
|
"learning_rate": 0.000979392065945389, |
|
"loss": 0.9241, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06491499227202473, |
|
"grad_norm": 0.0012924526818096638, |
|
"learning_rate": 0.0009783616692426585, |
|
"loss": 0.9271, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06800618238021638, |
|
"grad_norm": 0.001761179999448359, |
|
"learning_rate": 0.000977331272539928, |
|
"loss": 0.9516, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.07109737248840804, |
|
"grad_norm": 0.001756934798322618, |
|
"learning_rate": 0.0009763008758371973, |
|
"loss": 0.9176, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.07418856259659969, |
|
"grad_norm": 0.0015818781685084105, |
|
"learning_rate": 0.0009752704791344668, |
|
"loss": 0.9549, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07727975270479134, |
|
"grad_norm": 0.0015478008426725864, |
|
"learning_rate": 0.0009742400824317363, |
|
"loss": 0.9418, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.080370942812983, |
|
"grad_norm": 0.0014897435903549194, |
|
"learning_rate": 0.0009732096857290057, |
|
"loss": 0.9052, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.08346213292117466, |
|
"grad_norm": 0.0013134022010490298, |
|
"learning_rate": 0.0009721792890262751, |
|
"loss": 0.9479, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.0865533230293663, |
|
"grad_norm": 0.0018637892790138721, |
|
"learning_rate": 0.0009711488923235445, |
|
"loss": 0.8966, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.08964451313755796, |
|
"grad_norm": 0.0032756649889051914, |
|
"learning_rate": 0.000970118495620814, |
|
"loss": 0.9641, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.09273570324574962, |
|
"grad_norm": 0.001614488777704537, |
|
"learning_rate": 0.0009690880989180835, |
|
"loss": 0.9373, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09582689335394126, |
|
"grad_norm": 0.0015908046625554562, |
|
"learning_rate": 0.000968057702215353, |
|
"loss": 0.953, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.09891808346213292, |
|
"grad_norm": 0.0017299671890214086, |
|
"learning_rate": 0.0009670273055126224, |
|
"loss": 0.9651, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.10200927357032458, |
|
"grad_norm": 0.0015082499012351036, |
|
"learning_rate": 0.0009659969088098919, |
|
"loss": 0.952, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.10510046367851623, |
|
"grad_norm": 0.00181696773506701, |
|
"learning_rate": 0.0009649665121071612, |
|
"loss": 0.9673, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.10819165378670788, |
|
"grad_norm": 0.001661508227698505, |
|
"learning_rate": 0.0009639361154044307, |
|
"loss": 0.9167, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.11128284389489954, |
|
"grad_norm": 0.0016009090468287468, |
|
"learning_rate": 0.0009629057187017002, |
|
"loss": 0.9363, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1143740340030912, |
|
"grad_norm": 0.0020189261995255947, |
|
"learning_rate": 0.0009618753219989696, |
|
"loss": 0.9025, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.11746522411128284, |
|
"grad_norm": 0.0024259143974632025, |
|
"learning_rate": 0.0009608449252962391, |
|
"loss": 0.9646, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1205564142194745, |
|
"grad_norm": 0.0015644305385649204, |
|
"learning_rate": 0.0009598145285935085, |
|
"loss": 0.9226, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.12364760432766615, |
|
"grad_norm": 0.0017478523077443242, |
|
"learning_rate": 0.000958784131890778, |
|
"loss": 0.9232, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1267387944358578, |
|
"grad_norm": 0.002042934997007251, |
|
"learning_rate": 0.0009577537351880475, |
|
"loss": 0.9464, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.12982998454404945, |
|
"grad_norm": 0.0017407669220119715, |
|
"learning_rate": 0.0009567233384853169, |
|
"loss": 0.9258, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.13292117465224113, |
|
"grad_norm": 0.001527661457657814, |
|
"learning_rate": 0.0009556929417825863, |
|
"loss": 0.9104, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.13601236476043277, |
|
"grad_norm": 0.0014940258115530014, |
|
"learning_rate": 0.0009546625450798557, |
|
"loss": 0.9208, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.1391035548686244, |
|
"grad_norm": 0.0018555278657004237, |
|
"learning_rate": 0.0009536321483771252, |
|
"loss": 0.9441, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.14219474497681608, |
|
"grad_norm": 0.002033649478107691, |
|
"learning_rate": 0.0009526017516743947, |
|
"loss": 0.9155, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.14528593508500773, |
|
"grad_norm": 0.0015595933655276895, |
|
"learning_rate": 0.0009515713549716642, |
|
"loss": 0.9164, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.14837712519319937, |
|
"grad_norm": 0.0016203809063881636, |
|
"learning_rate": 0.0009505409582689336, |
|
"loss": 0.9267, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.15146831530139104, |
|
"grad_norm": 0.0014497883385047317, |
|
"learning_rate": 0.0009495105615662029, |
|
"loss": 0.9328, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.1545595054095827, |
|
"grad_norm": 0.0019730357453227043, |
|
"learning_rate": 0.0009484801648634724, |
|
"loss": 0.9057, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.15765069551777433, |
|
"grad_norm": 0.0020096744410693645, |
|
"learning_rate": 0.0009474497681607419, |
|
"loss": 0.9217, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.160741885625966, |
|
"grad_norm": 0.0019119798671454191, |
|
"learning_rate": 0.0009464193714580114, |
|
"loss": 0.9324, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.16383307573415765, |
|
"grad_norm": 0.0014629390789195895, |
|
"learning_rate": 0.0009453889747552809, |
|
"loss": 0.9424, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.16692426584234932, |
|
"grad_norm": 0.0016167230205610394, |
|
"learning_rate": 0.0009443585780525502, |
|
"loss": 0.8622, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.17001545595054096, |
|
"grad_norm": 0.0016633981140330434, |
|
"learning_rate": 0.0009433281813498197, |
|
"loss": 0.9366, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.1731066460587326, |
|
"grad_norm": 0.0015964192571118474, |
|
"learning_rate": 0.0009422977846470892, |
|
"loss": 0.9552, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.17619783616692428, |
|
"grad_norm": 0.0013956124894320965, |
|
"learning_rate": 0.0009412673879443586, |
|
"loss": 0.9995, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.17928902627511592, |
|
"grad_norm": 0.0014371597208082676, |
|
"learning_rate": 0.0009402369912416281, |
|
"loss": 0.8622, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.18238021638330756, |
|
"grad_norm": 0.0021855118684470654, |
|
"learning_rate": 0.0009392065945388974, |
|
"loss": 0.9862, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.18547140649149924, |
|
"grad_norm": 0.001544400816783309, |
|
"learning_rate": 0.0009381761978361669, |
|
"loss": 0.919, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.18856259659969088, |
|
"grad_norm": 0.0016864053905010223, |
|
"learning_rate": 0.0009371458011334364, |
|
"loss": 0.9405, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.19165378670788252, |
|
"grad_norm": 0.0021651415154337883, |
|
"learning_rate": 0.0009361154044307059, |
|
"loss": 0.9939, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.1947449768160742, |
|
"grad_norm": 0.001466871122829616, |
|
"learning_rate": 0.0009350850077279754, |
|
"loss": 0.8776, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.19783616692426584, |
|
"grad_norm": 0.0013067360268905759, |
|
"learning_rate": 0.0009340546110252446, |
|
"loss": 0.9684, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.2009273570324575, |
|
"grad_norm": 0.001740931300446391, |
|
"learning_rate": 0.0009330242143225141, |
|
"loss": 0.9786, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.20401854714064915, |
|
"grad_norm": 0.0017273235134780407, |
|
"learning_rate": 0.0009319938176197836, |
|
"loss": 0.9089, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.2071097372488408, |
|
"grad_norm": 0.001577013055793941, |
|
"learning_rate": 0.0009309634209170531, |
|
"loss": 0.9154, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.21020092735703247, |
|
"grad_norm": 0.001800213591195643, |
|
"learning_rate": 0.0009299330242143226, |
|
"loss": 0.9726, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.2132921174652241, |
|
"grad_norm": 0.0015475867548957467, |
|
"learning_rate": 0.000928902627511592, |
|
"loss": 0.8864, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.21638330757341576, |
|
"grad_norm": 0.002068218309432268, |
|
"learning_rate": 0.0009278722308088614, |
|
"loss": 0.8666, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.21947449768160743, |
|
"grad_norm": 0.0014894594205543399, |
|
"learning_rate": 0.0009268418341061309, |
|
"loss": 0.9178, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.22256568778979907, |
|
"grad_norm": 0.0018193743890151381, |
|
"learning_rate": 0.0009258114374034003, |
|
"loss": 0.913, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.22565687789799072, |
|
"grad_norm": 0.0019854323472827673, |
|
"learning_rate": 0.0009247810407006698, |
|
"loss": 0.9015, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.2287480680061824, |
|
"grad_norm": 0.0015401191776618361, |
|
"learning_rate": 0.0009237506439979392, |
|
"loss": 0.942, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.23183925811437403, |
|
"grad_norm": 0.0016189438756555319, |
|
"learning_rate": 0.0009227202472952086, |
|
"loss": 0.9647, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.23493044822256567, |
|
"grad_norm": 0.0018937138374894857, |
|
"learning_rate": 0.0009216898505924781, |
|
"loss": 0.9455, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.23802163833075735, |
|
"grad_norm": 0.0015417198883369565, |
|
"learning_rate": 0.0009206594538897476, |
|
"loss": 0.926, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.241112828438949, |
|
"grad_norm": 0.0014824847457930446, |
|
"learning_rate": 0.0009196290571870171, |
|
"loss": 0.8998, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.24420401854714066, |
|
"grad_norm": 0.0015080615412443876, |
|
"learning_rate": 0.0009185986604842864, |
|
"loss": 0.905, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.2472952086553323, |
|
"grad_norm": 0.0017820092616602778, |
|
"learning_rate": 0.0009175682637815559, |
|
"loss": 0.9504, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.250386398763524, |
|
"grad_norm": 0.001539805787615478, |
|
"learning_rate": 0.0009165378670788253, |
|
"loss": 0.9369, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.2534775888717156, |
|
"grad_norm": 0.001816835138015449, |
|
"learning_rate": 0.0009155074703760948, |
|
"loss": 0.8814, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.25656877897990726, |
|
"grad_norm": 0.0017855115002021194, |
|
"learning_rate": 0.0009144770736733643, |
|
"loss": 0.9066, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.2596599690880989, |
|
"grad_norm": 0.0015401588752865791, |
|
"learning_rate": 0.0009134466769706337, |
|
"loss": 0.9257, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.26275115919629055, |
|
"grad_norm": 0.0023962745908647776, |
|
"learning_rate": 0.0009124162802679032, |
|
"loss": 0.9428, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.26584234930448225, |
|
"grad_norm": 0.0014296959852799773, |
|
"learning_rate": 0.0009113858835651726, |
|
"loss": 0.9263, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.2689335394126739, |
|
"grad_norm": 0.0018063073512166739, |
|
"learning_rate": 0.000910355486862442, |
|
"loss": 0.9235, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.27202472952086554, |
|
"grad_norm": 0.0016391921089962125, |
|
"learning_rate": 0.0009093250901597115, |
|
"loss": 0.8952, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.2751159196290572, |
|
"grad_norm": 0.0023141205310821533, |
|
"learning_rate": 0.0009082946934569809, |
|
"loss": 0.9494, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.2782071097372488, |
|
"grad_norm": 0.0018004857702180743, |
|
"learning_rate": 0.0009072642967542504, |
|
"loss": 0.9346, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.28129829984544047, |
|
"grad_norm": 0.002469270955771208, |
|
"learning_rate": 0.0009062339000515199, |
|
"loss": 0.9245, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.28438948995363217, |
|
"grad_norm": 0.001746363122947514, |
|
"learning_rate": 0.0009052035033487893, |
|
"loss": 0.8899, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.2874806800618238, |
|
"grad_norm": 0.0017825034447014332, |
|
"learning_rate": 0.0009041731066460588, |
|
"loss": 0.9097, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.29057187017001546, |
|
"grad_norm": 0.0015837879618629813, |
|
"learning_rate": 0.0009031427099433283, |
|
"loss": 0.9406, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.2936630602782071, |
|
"grad_norm": 0.00214924244210124, |
|
"learning_rate": 0.0009021123132405976, |
|
"loss": 0.8884, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.29675425038639874, |
|
"grad_norm": 0.001666102441959083, |
|
"learning_rate": 0.0009010819165378671, |
|
"loss": 0.9393, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.29984544049459044, |
|
"grad_norm": 0.0013613239862024784, |
|
"learning_rate": 0.0009000515198351365, |
|
"loss": 0.9177, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.3029366306027821, |
|
"grad_norm": 0.0017140130512416363, |
|
"learning_rate": 0.000899021123132406, |
|
"loss": 0.9315, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.30602782071097373, |
|
"grad_norm": 0.001512790797278285, |
|
"learning_rate": 0.0008979907264296755, |
|
"loss": 0.9364, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.3091190108191654, |
|
"grad_norm": 0.0017727742670103908, |
|
"learning_rate": 0.0008969603297269449, |
|
"loss": 0.9175, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.312210200927357, |
|
"grad_norm": 0.0019065055530518293, |
|
"learning_rate": 0.0008959299330242144, |
|
"loss": 0.9585, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.31530139103554866, |
|
"grad_norm": 0.002561497036367655, |
|
"learning_rate": 0.0008948995363214837, |
|
"loss": 0.936, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.31839258114374036, |
|
"grad_norm": 0.0015061356825754046, |
|
"learning_rate": 0.0008938691396187532, |
|
"loss": 0.8856, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.321483771251932, |
|
"grad_norm": 0.0022356980480253696, |
|
"learning_rate": 0.0008928387429160227, |
|
"loss": 0.9152, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.32457496136012365, |
|
"grad_norm": 0.0017119398107752204, |
|
"learning_rate": 0.0008918083462132921, |
|
"loss": 0.8799, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.3276661514683153, |
|
"grad_norm": 0.0021370877511799335, |
|
"learning_rate": 0.0008907779495105616, |
|
"loss": 0.9308, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.33075734157650694, |
|
"grad_norm": 0.0019707437604665756, |
|
"learning_rate": 0.0008897475528078311, |
|
"loss": 0.9182, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.33384853168469864, |
|
"grad_norm": 0.001411254983395338, |
|
"learning_rate": 0.0008887171561051005, |
|
"loss": 0.9057, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.3369397217928903, |
|
"grad_norm": 0.0021633992437273264, |
|
"learning_rate": 0.00088768675940237, |
|
"loss": 0.9089, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.3400309119010819, |
|
"grad_norm": 0.001840689335949719, |
|
"learning_rate": 0.0008866563626996393, |
|
"loss": 0.8939, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.34312210200927357, |
|
"grad_norm": 0.0018973862752318382, |
|
"learning_rate": 0.0008856259659969088, |
|
"loss": 0.9206, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.3462132921174652, |
|
"grad_norm": 0.0015567062655463815, |
|
"learning_rate": 0.0008845955692941783, |
|
"loss": 0.9424, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.34930448222565685, |
|
"grad_norm": 0.0021733948960900307, |
|
"learning_rate": 0.0008835651725914478, |
|
"loss": 0.9965, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.35239567233384855, |
|
"grad_norm": 0.0025423939805477858, |
|
"learning_rate": 0.0008825347758887172, |
|
"loss": 0.8603, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.3554868624420402, |
|
"grad_norm": 0.0014392200391739607, |
|
"learning_rate": 0.0008815043791859866, |
|
"loss": 0.9483, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.35857805255023184, |
|
"grad_norm": 0.0015780443791300058, |
|
"learning_rate": 0.0008804739824832561, |
|
"loss": 0.8963, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.3616692426584235, |
|
"grad_norm": 0.002231495687738061, |
|
"learning_rate": 0.0008794435857805255, |
|
"loss": 0.9123, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.36476043276661513, |
|
"grad_norm": 0.0014009552542120218, |
|
"learning_rate": 0.000878413189077795, |
|
"loss": 0.9337, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.3678516228748068, |
|
"grad_norm": 0.0020310496911406517, |
|
"learning_rate": 0.0008773827923750644, |
|
"loss": 0.9137, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.37094281298299847, |
|
"grad_norm": 0.00151388393715024, |
|
"learning_rate": 0.0008763523956723338, |
|
"loss": 0.9091, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3740340030911901, |
|
"grad_norm": 0.0018349160673096776, |
|
"learning_rate": 0.0008753219989696033, |
|
"loss": 0.9486, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.37712519319938176, |
|
"grad_norm": 0.0018245832761749625, |
|
"learning_rate": 0.0008742916022668728, |
|
"loss": 0.9163, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.3802163833075734, |
|
"grad_norm": 0.002115410752594471, |
|
"learning_rate": 0.0008732612055641423, |
|
"loss": 0.9043, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.38330757341576505, |
|
"grad_norm": 0.0019245495786890388, |
|
"learning_rate": 0.0008722308088614118, |
|
"loss": 0.9212, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.38639876352395675, |
|
"grad_norm": 0.001513317576609552, |
|
"learning_rate": 0.000871200412158681, |
|
"loss": 0.9068, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.3894899536321484, |
|
"grad_norm": 0.0017635183176025748, |
|
"learning_rate": 0.0008701700154559505, |
|
"loss": 0.9261, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.39258114374034003, |
|
"grad_norm": 0.0017686467617750168, |
|
"learning_rate": 0.00086913961875322, |
|
"loss": 0.8763, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.3956723338485317, |
|
"grad_norm": 0.0015009143389761448, |
|
"learning_rate": 0.0008681092220504895, |
|
"loss": 0.8968, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.3987635239567233, |
|
"grad_norm": 0.0014831022126600146, |
|
"learning_rate": 0.000867078825347759, |
|
"loss": 0.8927, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.401854714064915, |
|
"grad_norm": 0.0029206760227680206, |
|
"learning_rate": 0.0008660484286450283, |
|
"loss": 0.9455, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.40494590417310666, |
|
"grad_norm": 0.0014479625970125198, |
|
"learning_rate": 0.0008650180319422978, |
|
"loss": 0.9229, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.4080370942812983, |
|
"grad_norm": 0.0014661536552011967, |
|
"learning_rate": 0.0008639876352395672, |
|
"loss": 0.9088, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.41112828438948995, |
|
"grad_norm": 0.0014888847945258021, |
|
"learning_rate": 0.0008629572385368367, |
|
"loss": 0.9633, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.4142194744976816, |
|
"grad_norm": 0.0017181559232994914, |
|
"learning_rate": 0.0008619268418341062, |
|
"loss": 0.9224, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.41731066460587324, |
|
"grad_norm": 0.0015694062458351254, |
|
"learning_rate": 0.0008608964451313755, |
|
"loss": 0.8789, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.42040185471406494, |
|
"grad_norm": 0.001495172269642353, |
|
"learning_rate": 0.000859866048428645, |
|
"loss": 0.8666, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.4234930448222566, |
|
"grad_norm": 0.002242365386337042, |
|
"learning_rate": 0.0008588356517259145, |
|
"loss": 0.9108, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.4265842349304482, |
|
"grad_norm": 0.0014668918447569013, |
|
"learning_rate": 0.000857805255023184, |
|
"loss": 0.916, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.42967542503863987, |
|
"grad_norm": 0.001498398371040821, |
|
"learning_rate": 0.0008567748583204535, |
|
"loss": 0.9183, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.4327666151468315, |
|
"grad_norm": 0.001534744049422443, |
|
"learning_rate": 0.0008557444616177228, |
|
"loss": 0.9114, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.43585780525502316, |
|
"grad_norm": 0.0014011348830536008, |
|
"learning_rate": 0.0008547140649149922, |
|
"loss": 0.9394, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.43894899536321486, |
|
"grad_norm": 0.0016137160127982497, |
|
"learning_rate": 0.0008536836682122617, |
|
"loss": 0.8852, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.4420401854714065, |
|
"grad_norm": 0.0018244803650304675, |
|
"learning_rate": 0.0008526532715095312, |
|
"loss": 0.9006, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.44513137557959814, |
|
"grad_norm": 0.002001643180847168, |
|
"learning_rate": 0.0008516228748068007, |
|
"loss": 0.8877, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.4482225656877898, |
|
"grad_norm": 0.001545743434689939, |
|
"learning_rate": 0.0008505924781040701, |
|
"loss": 0.9169, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.45131375579598143, |
|
"grad_norm": 0.0015252763405442238, |
|
"learning_rate": 0.0008495620814013395, |
|
"loss": 0.9302, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.45440494590417313, |
|
"grad_norm": 0.0018486313056200743, |
|
"learning_rate": 0.0008485316846986089, |
|
"loss": 0.8845, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.4574961360123648, |
|
"grad_norm": 0.0013468407560139894, |
|
"learning_rate": 0.0008475012879958784, |
|
"loss": 0.9079, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.4605873261205564, |
|
"grad_norm": 0.0011928731109946966, |
|
"learning_rate": 0.0008464708912931479, |
|
"loss": 0.8997, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.46367851622874806, |
|
"grad_norm": 0.0018724995898082852, |
|
"learning_rate": 0.0008454404945904173, |
|
"loss": 0.944, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4667697063369397, |
|
"grad_norm": 0.0021075448021292686, |
|
"learning_rate": 0.0008444100978876868, |
|
"loss": 0.9155, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.46986089644513135, |
|
"grad_norm": 0.0016975891776382923, |
|
"learning_rate": 0.0008433797011849562, |
|
"loss": 0.9349, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.47295208655332305, |
|
"grad_norm": 0.001552650355733931, |
|
"learning_rate": 0.0008423493044822257, |
|
"loss": 0.9145, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.4760432766615147, |
|
"grad_norm": 0.0016928149852901697, |
|
"learning_rate": 0.0008413189077794952, |
|
"loss": 0.8694, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.47913446676970634, |
|
"grad_norm": 0.0015649759443476796, |
|
"learning_rate": 0.0008402885110767645, |
|
"loss": 0.9505, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.482225656877898, |
|
"grad_norm": 0.0020843998063355684, |
|
"learning_rate": 0.000839258114374034, |
|
"loss": 0.9463, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.4853168469860896, |
|
"grad_norm": 0.0012236249167472124, |
|
"learning_rate": 0.0008382277176713034, |
|
"loss": 0.9033, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.4884080370942813, |
|
"grad_norm": 0.0017901939572766423, |
|
"learning_rate": 0.0008371973209685729, |
|
"loss": 0.911, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.49149922720247297, |
|
"grad_norm": 0.0018610935658216476, |
|
"learning_rate": 0.0008361669242658424, |
|
"loss": 0.8655, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.4945904173106646, |
|
"grad_norm": 0.001789487199857831, |
|
"learning_rate": 0.0008351365275631119, |
|
"loss": 0.9427, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.49768160741885625, |
|
"grad_norm": 0.00190592254512012, |
|
"learning_rate": 0.0008341061308603813, |
|
"loss": 0.9192, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.500772797527048, |
|
"grad_norm": 0.0016090746503323317, |
|
"learning_rate": 0.0008330757341576506, |
|
"loss": 0.9487, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.5038639876352395, |
|
"grad_norm": 0.0016335912514477968, |
|
"learning_rate": 0.0008320453374549201, |
|
"loss": 0.9051, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.5069551777434312, |
|
"grad_norm": 0.0016785924090072513, |
|
"learning_rate": 0.0008310149407521896, |
|
"loss": 0.9104, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.5100463678516228, |
|
"grad_norm": 0.0022380806040018797, |
|
"learning_rate": 0.0008299845440494591, |
|
"loss": 0.9038, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.5131375579598145, |
|
"grad_norm": 0.0016855926951393485, |
|
"learning_rate": 0.0008289541473467285, |
|
"loss": 0.9256, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.5162287480680062, |
|
"grad_norm": 0.0019196901703253388, |
|
"learning_rate": 0.000827923750643998, |
|
"loss": 0.9271, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.5193199381761978, |
|
"grad_norm": 0.001529015600681305, |
|
"learning_rate": 0.0008268933539412674, |
|
"loss": 0.9053, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.5224111282843895, |
|
"grad_norm": 0.001290348474867642, |
|
"learning_rate": 0.0008258629572385369, |
|
"loss": 0.8879, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.5255023183925811, |
|
"grad_norm": 0.0017919385572895408, |
|
"learning_rate": 0.0008248325605358063, |
|
"loss": 0.8537, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.5285935085007728, |
|
"grad_norm": 0.0017021787352859974, |
|
"learning_rate": 0.0008238021638330757, |
|
"loss": 0.9126, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.5316846986089645, |
|
"grad_norm": 0.0017202612943947315, |
|
"learning_rate": 0.0008227717671303452, |
|
"loss": 0.8977, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.5347758887171561, |
|
"grad_norm": 0.0021942094899713993, |
|
"learning_rate": 0.0008217413704276147, |
|
"loss": 0.944, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.5378670788253478, |
|
"grad_norm": 0.001882906537503004, |
|
"learning_rate": 0.0008207109737248841, |
|
"loss": 0.9704, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.5409582689335394, |
|
"grad_norm": 0.0015875013777986169, |
|
"learning_rate": 0.0008196805770221536, |
|
"loss": 0.927, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.5440494590417311, |
|
"grad_norm": 0.0017645510379225016, |
|
"learning_rate": 0.000818650180319423, |
|
"loss": 0.9719, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.5471406491499228, |
|
"grad_norm": 0.0016093801241368055, |
|
"learning_rate": 0.0008176197836166924, |
|
"loss": 0.8753, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.5502318392581144, |
|
"grad_norm": 0.0016733432421460748, |
|
"learning_rate": 0.0008165893869139619, |
|
"loss": 0.9132, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.5533230293663061, |
|
"grad_norm": 0.0014284460339695215, |
|
"learning_rate": 0.0008155589902112313, |
|
"loss": 0.9242, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.5564142194744977, |
|
"grad_norm": 0.0018177167512476444, |
|
"learning_rate": 0.0008145285935085008, |
|
"loss": 0.9469, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.5595054095826894, |
|
"grad_norm": 0.0020286261569708586, |
|
"learning_rate": 0.0008134981968057702, |
|
"loss": 0.8884, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.5625965996908809, |
|
"grad_norm": 0.0014468576991930604, |
|
"learning_rate": 0.0008124678001030397, |
|
"loss": 0.908, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.5656877897990726, |
|
"grad_norm": 0.001559574855491519, |
|
"learning_rate": 0.0008114374034003092, |
|
"loss": 0.9104, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.5687789799072643, |
|
"grad_norm": 0.0017769662663340569, |
|
"learning_rate": 0.0008104070066975787, |
|
"loss": 0.9222, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.5718701700154559, |
|
"grad_norm": 0.001862520119175315, |
|
"learning_rate": 0.000809376609994848, |
|
"loss": 0.8862, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.5749613601236476, |
|
"grad_norm": 0.0021106936037540436, |
|
"learning_rate": 0.0008083462132921174, |
|
"loss": 0.8862, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.5780525502318392, |
|
"grad_norm": 0.0013291973154991865, |
|
"learning_rate": 0.0008073158165893869, |
|
"loss": 0.9381, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.5811437403400309, |
|
"grad_norm": 0.001646311953663826, |
|
"learning_rate": 0.0008062854198866564, |
|
"loss": 0.8862, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.5842349304482226, |
|
"grad_norm": 0.0015801474219188094, |
|
"learning_rate": 0.0008052550231839259, |
|
"loss": 0.9402, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.5873261205564142, |
|
"grad_norm": 0.0015533153200522065, |
|
"learning_rate": 0.0008042246264811953, |
|
"loss": 0.9242, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5904173106646059, |
|
"grad_norm": 0.0016890340484678745, |
|
"learning_rate": 0.0008031942297784647, |
|
"loss": 0.9234, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.5935085007727975, |
|
"grad_norm": 0.0014929634053260088, |
|
"learning_rate": 0.0008021638330757341, |
|
"loss": 0.9383, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.5965996908809892, |
|
"grad_norm": 0.001471440540626645, |
|
"learning_rate": 0.0008011334363730036, |
|
"loss": 0.908, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.5996908809891809, |
|
"grad_norm": 0.00180807092692703, |
|
"learning_rate": 0.0008001030396702731, |
|
"loss": 0.9211, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.6027820710973725, |
|
"grad_norm": 0.0016187585424631834, |
|
"learning_rate": 0.0007990726429675426, |
|
"loss": 0.9254, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.6058732612055642, |
|
"grad_norm": 0.0016198824159801006, |
|
"learning_rate": 0.0007980422462648119, |
|
"loss": 0.9118, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.6089644513137558, |
|
"grad_norm": 0.0017275193240493536, |
|
"learning_rate": 0.0007970118495620814, |
|
"loss": 0.9444, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.6120556414219475, |
|
"grad_norm": 0.002495982451364398, |
|
"learning_rate": 0.0007959814528593509, |
|
"loss": 0.8879, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.615146831530139, |
|
"grad_norm": 0.0013608982553705573, |
|
"learning_rate": 0.0007949510561566204, |
|
"loss": 0.8695, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.6182380216383307, |
|
"grad_norm": 0.0015398486284539104, |
|
"learning_rate": 0.0007939206594538898, |
|
"loss": 0.916, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6213292117465224, |
|
"grad_norm": 0.0016108902636915445, |
|
"learning_rate": 0.0007928902627511591, |
|
"loss": 0.9244, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.624420401854714, |
|
"grad_norm": 0.0016804412007331848, |
|
"learning_rate": 0.0007918598660484286, |
|
"loss": 0.9358, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.6275115919629057, |
|
"grad_norm": 0.0018602460622787476, |
|
"learning_rate": 0.0007908294693456981, |
|
"loss": 0.8829, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.6306027820710973, |
|
"grad_norm": 0.0017209933139383793, |
|
"learning_rate": 0.0007897990726429676, |
|
"loss": 0.9365, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.633693972179289, |
|
"grad_norm": 0.002164942678064108, |
|
"learning_rate": 0.0007887686759402371, |
|
"loss": 0.9349, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.6367851622874807, |
|
"grad_norm": 0.0021378265228122473, |
|
"learning_rate": 0.0007877382792375064, |
|
"loss": 0.8884, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.6398763523956723, |
|
"grad_norm": 0.001859784359112382, |
|
"learning_rate": 0.0007867078825347758, |
|
"loss": 0.9461, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.642967542503864, |
|
"grad_norm": 0.001505703548900783, |
|
"learning_rate": 0.0007856774858320453, |
|
"loss": 0.9284, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.6460587326120556, |
|
"grad_norm": 0.0014758047182112932, |
|
"learning_rate": 0.0007846470891293148, |
|
"loss": 0.8943, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.6491499227202473, |
|
"grad_norm": 0.001482132589444518, |
|
"learning_rate": 0.0007836166924265843, |
|
"loss": 0.9309, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.652241112828439, |
|
"grad_norm": 0.0018712684977799654, |
|
"learning_rate": 0.0007825862957238537, |
|
"loss": 0.9705, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.6553323029366306, |
|
"grad_norm": 0.0025388901121914387, |
|
"learning_rate": 0.0007815558990211231, |
|
"loss": 0.9129, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.6584234930448223, |
|
"grad_norm": 0.001495323609560728, |
|
"learning_rate": 0.0007805255023183926, |
|
"loss": 0.9435, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.6615146831530139, |
|
"grad_norm": 0.0016260349657386541, |
|
"learning_rate": 0.0007794951056156621, |
|
"loss": 0.9124, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.6646058732612056, |
|
"grad_norm": 0.0019677565433084965, |
|
"learning_rate": 0.0007784647089129315, |
|
"loss": 0.9317, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.6676970633693973, |
|
"grad_norm": 0.0023265851195901632, |
|
"learning_rate": 0.0007774343122102009, |
|
"loss": 0.9538, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.6707882534775889, |
|
"grad_norm": 0.0014457949437201023, |
|
"learning_rate": 0.0007764039155074703, |
|
"loss": 0.9538, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.6738794435857806, |
|
"grad_norm": 0.0014781310455873609, |
|
"learning_rate": 0.0007753735188047398, |
|
"loss": 0.8816, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.6769706336939721, |
|
"grad_norm": 0.0017087183659896255, |
|
"learning_rate": 0.0007743431221020093, |
|
"loss": 0.9142, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.6800618238021638, |
|
"grad_norm": 0.002620991552248597, |
|
"learning_rate": 0.0007733127253992788, |
|
"loss": 0.9345, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.6831530139103554, |
|
"grad_norm": 0.001568863750435412, |
|
"learning_rate": 0.0007722823286965483, |
|
"loss": 0.9089, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.6862442040185471, |
|
"grad_norm": 0.0017851140582934022, |
|
"learning_rate": 0.0007712519319938175, |
|
"loss": 0.8943, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.6893353941267388, |
|
"grad_norm": 0.0013759072171524167, |
|
"learning_rate": 0.000770221535291087, |
|
"loss": 0.9176, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.6924265842349304, |
|
"grad_norm": 0.0015892439987510443, |
|
"learning_rate": 0.0007691911385883565, |
|
"loss": 0.8854, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.6955177743431221, |
|
"grad_norm": 0.001841311459429562, |
|
"learning_rate": 0.000768160741885626, |
|
"loss": 0.8824, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.6986089644513137, |
|
"grad_norm": 0.0016689961776137352, |
|
"learning_rate": 0.0007671303451828955, |
|
"loss": 0.9442, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.7017001545595054, |
|
"grad_norm": 0.0018330076709389687, |
|
"learning_rate": 0.0007660999484801649, |
|
"loss": 0.9398, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.7047913446676971, |
|
"grad_norm": 0.0021366167347878218, |
|
"learning_rate": 0.0007650695517774343, |
|
"loss": 0.9507, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.7078825347758887, |
|
"grad_norm": 0.0017535964725539088, |
|
"learning_rate": 0.0007640391550747038, |
|
"loss": 0.8902, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.7109737248840804, |
|
"grad_norm": 0.0015847982140257955, |
|
"learning_rate": 0.0007630087583719732, |
|
"loss": 0.9163, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.714064914992272, |
|
"grad_norm": 0.0016199509846046567, |
|
"learning_rate": 0.0007619783616692427, |
|
"loss": 0.8775, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.7171561051004637, |
|
"grad_norm": 0.001773869269527495, |
|
"learning_rate": 0.0007609479649665121, |
|
"loss": 0.9406, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.7202472952086554, |
|
"grad_norm": 0.0015184408985078335, |
|
"learning_rate": 0.0007599175682637816, |
|
"loss": 0.8945, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.723338485316847, |
|
"grad_norm": 0.005015387199819088, |
|
"learning_rate": 0.000758887171561051, |
|
"loss": 0.9407, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.7264296754250387, |
|
"grad_norm": 0.0020386965479701757, |
|
"learning_rate": 0.0007578567748583205, |
|
"loss": 0.9122, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.7295208655332303, |
|
"grad_norm": 0.002024392830207944, |
|
"learning_rate": 0.00075682637815559, |
|
"loss": 0.8902, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.732612055641422, |
|
"grad_norm": 0.0016152510652318597, |
|
"learning_rate": 0.0007557959814528593, |
|
"loss": 0.907, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.7357032457496137, |
|
"grad_norm": 0.001684612943790853, |
|
"learning_rate": 0.0007547655847501288, |
|
"loss": 0.8557, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.7387944358578052, |
|
"grad_norm": 0.0018417539540678263, |
|
"learning_rate": 0.0007537351880473982, |
|
"loss": 0.932, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.7418856259659969, |
|
"grad_norm": 0.0020610857754945755, |
|
"learning_rate": 0.0007527047913446677, |
|
"loss": 0.9125, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.7449768160741885, |
|
"grad_norm": 0.0023381186183542013, |
|
"learning_rate": 0.0007516743946419372, |
|
"loss": 0.9251, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.7480680061823802, |
|
"grad_norm": 0.002318663988262415, |
|
"learning_rate": 0.0007506439979392066, |
|
"loss": 0.9043, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.7511591962905718, |
|
"grad_norm": 0.0017375986790284514, |
|
"learning_rate": 0.0007496136012364761, |
|
"loss": 0.9061, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.7542503863987635, |
|
"grad_norm": 0.0018655112944543362, |
|
"learning_rate": 0.0007485832045337456, |
|
"loss": 1.0007, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.7573415765069552, |
|
"grad_norm": 0.0019265462178736925, |
|
"learning_rate": 0.0007475528078310149, |
|
"loss": 0.9089, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.7604327666151468, |
|
"grad_norm": 0.0018528448417782784, |
|
"learning_rate": 0.0007465224111282844, |
|
"loss": 0.8856, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.7635239567233385, |
|
"grad_norm": 0.0016357959248125553, |
|
"learning_rate": 0.0007454920144255538, |
|
"loss": 0.8712, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.7666151468315301, |
|
"grad_norm": 0.0013976657064631581, |
|
"learning_rate": 0.0007444616177228233, |
|
"loss": 0.9761, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.7697063369397218, |
|
"grad_norm": 0.002004920970648527, |
|
"learning_rate": 0.0007434312210200928, |
|
"loss": 0.9023, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.7727975270479135, |
|
"grad_norm": 0.0015114221023395658, |
|
"learning_rate": 0.0007424008243173622, |
|
"loss": 0.9048, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.7758887171561051, |
|
"grad_norm": 0.0016935282619670033, |
|
"learning_rate": 0.0007413704276146317, |
|
"loss": 0.9617, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.7789799072642968, |
|
"grad_norm": 0.0015191826969385147, |
|
"learning_rate": 0.000740340030911901, |
|
"loss": 0.927, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.7820710973724884, |
|
"grad_norm": 0.0022661250550299883, |
|
"learning_rate": 0.0007393096342091705, |
|
"loss": 0.8871, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.7851622874806801, |
|
"grad_norm": 0.002032969379797578, |
|
"learning_rate": 0.00073827923750644, |
|
"loss": 0.9149, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.7882534775888718, |
|
"grad_norm": 0.0017924593994393945, |
|
"learning_rate": 0.0007372488408037095, |
|
"loss": 0.9564, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.7913446676970634, |
|
"grad_norm": 0.0014548065373674035, |
|
"learning_rate": 0.0007362184441009789, |
|
"loss": 0.9095, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.794435857805255, |
|
"grad_norm": 0.0025248208548873663, |
|
"learning_rate": 0.0007351880473982483, |
|
"loss": 0.9337, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.7975270479134466, |
|
"grad_norm": 0.002107022562995553, |
|
"learning_rate": 0.0007341576506955178, |
|
"loss": 0.9482, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.8006182380216383, |
|
"grad_norm": 0.0016369119985029101, |
|
"learning_rate": 0.0007331272539927873, |
|
"loss": 0.9035, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.80370942812983, |
|
"grad_norm": 0.0018871091306209564, |
|
"learning_rate": 0.0007320968572900567, |
|
"loss": 0.8882, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.8068006182380216, |
|
"grad_norm": 0.0019260449334979057, |
|
"learning_rate": 0.0007310664605873261, |
|
"loss": 0.9074, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.8098918083462133, |
|
"grad_norm": 0.0018819051329046488, |
|
"learning_rate": 0.0007300360638845955, |
|
"loss": 0.9218, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.8129829984544049, |
|
"grad_norm": 0.0015719749499112368, |
|
"learning_rate": 0.000729005667181865, |
|
"loss": 0.9155, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.8160741885625966, |
|
"grad_norm": 0.0015479732537642121, |
|
"learning_rate": 0.0007279752704791345, |
|
"loss": 0.909, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.8191653786707882, |
|
"grad_norm": 0.0013954916503280401, |
|
"learning_rate": 0.000726944873776404, |
|
"loss": 0.9217, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.8222565687789799, |
|
"grad_norm": 0.0013768866192549467, |
|
"learning_rate": 0.0007259144770736735, |
|
"loss": 0.9496, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.8253477588871716, |
|
"grad_norm": 0.001571536879055202, |
|
"learning_rate": 0.0007248840803709427, |
|
"loss": 0.9305, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.8284389489953632, |
|
"grad_norm": 0.002294939709827304, |
|
"learning_rate": 0.0007238536836682122, |
|
"loss": 0.9262, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.8315301391035549, |
|
"grad_norm": 0.0016881937626749277, |
|
"learning_rate": 0.0007228232869654817, |
|
"loss": 0.8624, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.8346213292117465, |
|
"grad_norm": 0.001524370047263801, |
|
"learning_rate": 0.0007217928902627512, |
|
"loss": 0.9194, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.8377125193199382, |
|
"grad_norm": 0.0020258829463273287, |
|
"learning_rate": 0.0007207624935600207, |
|
"loss": 0.9034, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.8408037094281299, |
|
"grad_norm": 0.0019363865721970797, |
|
"learning_rate": 0.00071973209685729, |
|
"loss": 0.929, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.8438948995363215, |
|
"grad_norm": 0.00164938741363585, |
|
"learning_rate": 0.0007187017001545595, |
|
"loss": 0.9569, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.8469860896445132, |
|
"grad_norm": 0.0016705304151400924, |
|
"learning_rate": 0.000717671303451829, |
|
"loss": 0.8835, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.8500772797527048, |
|
"grad_norm": 0.0018051480874419212, |
|
"learning_rate": 0.0007166409067490984, |
|
"loss": 0.8838, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.8531684698608965, |
|
"grad_norm": 0.0015429792692884803, |
|
"learning_rate": 0.0007156105100463679, |
|
"loss": 0.9004, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.8562596599690881, |
|
"grad_norm": 0.002758611924946308, |
|
"learning_rate": 0.0007145801133436372, |
|
"loss": 0.9299, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.8593508500772797, |
|
"grad_norm": 0.002330324612557888, |
|
"learning_rate": 0.0007135497166409067, |
|
"loss": 0.9749, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.8624420401854714, |
|
"grad_norm": 0.0015028082998469472, |
|
"learning_rate": 0.0007125193199381762, |
|
"loss": 0.9128, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.865533230293663, |
|
"grad_norm": 0.0014309794642031193, |
|
"learning_rate": 0.0007114889232354457, |
|
"loss": 0.8633, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.8686244204018547, |
|
"grad_norm": 0.0013788605574518442, |
|
"learning_rate": 0.0007104585265327152, |
|
"loss": 0.921, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.8717156105100463, |
|
"grad_norm": 0.0019114799797534943, |
|
"learning_rate": 0.0007094281298299847, |
|
"loss": 0.9312, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.874806800618238, |
|
"grad_norm": 0.002490201499313116, |
|
"learning_rate": 0.0007083977331272539, |
|
"loss": 0.9027, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.8778979907264297, |
|
"grad_norm": 0.0015281651867553592, |
|
"learning_rate": 0.0007073673364245234, |
|
"loss": 0.9189, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.8809891808346213, |
|
"grad_norm": 0.0015036484692245722, |
|
"learning_rate": 0.0007063369397217929, |
|
"loss": 0.8979, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.884080370942813, |
|
"grad_norm": 0.0015251452568918467, |
|
"learning_rate": 0.0007053065430190624, |
|
"loss": 0.9028, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.8871715610510046, |
|
"grad_norm": 0.001428957679308951, |
|
"learning_rate": 0.0007042761463163319, |
|
"loss": 0.8717, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.8902627511591963, |
|
"grad_norm": 0.0015625401865690947, |
|
"learning_rate": 0.0007032457496136012, |
|
"loss": 0.9324, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.893353941267388, |
|
"grad_norm": 0.001634020241908729, |
|
"learning_rate": 0.0007022153529108707, |
|
"loss": 0.8697, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.8964451313755796, |
|
"grad_norm": 0.0015363607089966536, |
|
"learning_rate": 0.0007011849562081401, |
|
"loss": 0.8956, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.8995363214837713, |
|
"grad_norm": 0.0012279663933441043, |
|
"learning_rate": 0.0007001545595054096, |
|
"loss": 0.9616, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.9026275115919629, |
|
"grad_norm": 0.0015052827075123787, |
|
"learning_rate": 0.0006991241628026791, |
|
"loss": 0.8944, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.9057187017001546, |
|
"grad_norm": 0.001815865165553987, |
|
"learning_rate": 0.0006980937660999485, |
|
"loss": 0.8739, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.9088098918083463, |
|
"grad_norm": 0.0019412669353187084, |
|
"learning_rate": 0.0006970633693972179, |
|
"loss": 0.8708, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.9119010819165378, |
|
"grad_norm": 0.001834962284192443, |
|
"learning_rate": 0.0006960329726944874, |
|
"loss": 0.9456, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.9149922720247295, |
|
"grad_norm": 0.002000207779929042, |
|
"learning_rate": 0.0006950025759917569, |
|
"loss": 0.9188, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.9180834621329211, |
|
"grad_norm": 0.0017901693936437368, |
|
"learning_rate": 0.0006939721792890264, |
|
"loss": 0.9377, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.9211746522411128, |
|
"grad_norm": 0.002317288890480995, |
|
"learning_rate": 0.0006929417825862957, |
|
"loss": 0.9159, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.9242658423493045, |
|
"grad_norm": 0.0019505108939483762, |
|
"learning_rate": 0.0006919113858835651, |
|
"loss": 0.9171, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.9273570324574961, |
|
"grad_norm": 0.0016651200130581856, |
|
"learning_rate": 0.0006908809891808346, |
|
"loss": 0.9331, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.9304482225656878, |
|
"grad_norm": 0.0016768771456554532, |
|
"learning_rate": 0.0006898505924781041, |
|
"loss": 0.9731, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.9335394126738794, |
|
"grad_norm": 0.0020638711284846067, |
|
"learning_rate": 0.0006888201957753736, |
|
"loss": 0.9023, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.9366306027820711, |
|
"grad_norm": 0.001335518783889711, |
|
"learning_rate": 0.000687789799072643, |
|
"loss": 0.917, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.9397217928902627, |
|
"grad_norm": 0.0013942529913038015, |
|
"learning_rate": 0.0006867594023699125, |
|
"loss": 0.9273, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.9428129829984544, |
|
"grad_norm": 0.0013294010423123837, |
|
"learning_rate": 0.0006857290056671818, |
|
"loss": 0.8909, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.9459041731066461, |
|
"grad_norm": 0.0017269228119403124, |
|
"learning_rate": 0.0006846986089644513, |
|
"loss": 0.9405, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.9489953632148377, |
|
"grad_norm": 0.0018391566118225455, |
|
"learning_rate": 0.0006836682122617208, |
|
"loss": 0.9, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.9520865533230294, |
|
"grad_norm": 0.0018784053390845656, |
|
"learning_rate": 0.0006826378155589902, |
|
"loss": 0.9081, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.955177743431221, |
|
"grad_norm": 0.0017941402038559318, |
|
"learning_rate": 0.0006816074188562597, |
|
"loss": 0.9343, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.9582689335394127, |
|
"grad_norm": 0.0019338412676006556, |
|
"learning_rate": 0.0006805770221535291, |
|
"loss": 0.8904, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.9613601236476044, |
|
"grad_norm": 0.0016496024327352643, |
|
"learning_rate": 0.0006795466254507986, |
|
"loss": 0.8983, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.964451313755796, |
|
"grad_norm": 0.0015189488185569644, |
|
"learning_rate": 0.0006785162287480681, |
|
"loss": 0.9118, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.9675425038639877, |
|
"grad_norm": 0.001589839463122189, |
|
"learning_rate": 0.0006774858320453374, |
|
"loss": 0.8996, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.9706336939721792, |
|
"grad_norm": 0.00146203744225204, |
|
"learning_rate": 0.0006764554353426069, |
|
"loss": 0.9122, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.973724884080371, |
|
"grad_norm": 0.0015465939650312066, |
|
"learning_rate": 0.0006754250386398764, |
|
"loss": 0.935, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.9768160741885626, |
|
"grad_norm": 0.002063942141830921, |
|
"learning_rate": 0.0006743946419371458, |
|
"loss": 0.9088, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.9799072642967542, |
|
"grad_norm": 0.0012433248339220881, |
|
"learning_rate": 0.0006733642452344153, |
|
"loss": 0.8963, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.9829984544049459, |
|
"grad_norm": 0.001582261291332543, |
|
"learning_rate": 0.0006723338485316847, |
|
"loss": 0.9304, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.9860896445131375, |
|
"grad_norm": 0.0015674213645979762, |
|
"learning_rate": 0.0006713034518289542, |
|
"loss": 0.9299, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.9891808346213292, |
|
"grad_norm": 0.0017826062394306064, |
|
"learning_rate": 0.0006702730551262236, |
|
"loss": 0.8951, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.9922720247295209, |
|
"grad_norm": 0.002112460555508733, |
|
"learning_rate": 0.000669242658423493, |
|
"loss": 0.9393, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.9953632148377125, |
|
"grad_norm": 0.0014212781097739935, |
|
"learning_rate": 0.0006682122617207625, |
|
"loss": 0.916, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.9984544049459042, |
|
"grad_norm": 0.001774617237970233, |
|
"learning_rate": 0.0006671818650180319, |
|
"loss": 0.9271, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.001545595054096, |
|
"grad_norm": 0.0016346676275134087, |
|
"learning_rate": 0.0006661514683153014, |
|
"loss": 0.8911, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.0046367851622875, |
|
"grad_norm": 0.0014871886232867837, |
|
"learning_rate": 0.0006651210716125709, |
|
"loss": 0.8988, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.007727975270479, |
|
"grad_norm": 0.0014850738225504756, |
|
"learning_rate": 0.0006640906749098404, |
|
"loss": 0.8936, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.010819165378671, |
|
"grad_norm": 0.0015949340304359794, |
|
"learning_rate": 0.0006630602782071098, |
|
"loss": 0.8987, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.0139103554868625, |
|
"grad_norm": 0.0019407504005357623, |
|
"learning_rate": 0.0006620298815043791, |
|
"loss": 0.8736, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.017001545595054, |
|
"grad_norm": 0.0019730369094759226, |
|
"learning_rate": 0.0006609994848016486, |
|
"loss": 0.8818, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.0200927357032457, |
|
"grad_norm": 0.0018432583892717957, |
|
"learning_rate": 0.0006599690880989181, |
|
"loss": 0.9042, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.0231839258114375, |
|
"grad_norm": 0.0017056462820619345, |
|
"learning_rate": 0.0006589386913961876, |
|
"loss": 0.932, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.026275115919629, |
|
"grad_norm": 0.0015121812466531992, |
|
"learning_rate": 0.000657908294693457, |
|
"loss": 0.8639, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.0293663060278206, |
|
"grad_norm": 0.004580393433570862, |
|
"learning_rate": 0.0006568778979907264, |
|
"loss": 0.9044, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.0324574961360125, |
|
"grad_norm": 0.0015995085705071688, |
|
"learning_rate": 0.0006558475012879959, |
|
"loss": 0.9453, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.035548686244204, |
|
"grad_norm": 0.002061218721792102, |
|
"learning_rate": 0.0006548171045852653, |
|
"loss": 0.8802, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.0386398763523956, |
|
"grad_norm": 0.0015771668404340744, |
|
"learning_rate": 0.0006537867078825348, |
|
"loss": 0.8759, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.0417310664605872, |
|
"grad_norm": 0.0015714009059593081, |
|
"learning_rate": 0.0006527563111798043, |
|
"loss": 0.8915, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.044822256568779, |
|
"grad_norm": 0.0019235257059335709, |
|
"learning_rate": 0.0006517259144770736, |
|
"loss": 0.9143, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.0479134466769706, |
|
"grad_norm": 0.0017326029483228922, |
|
"learning_rate": 0.0006506955177743431, |
|
"loss": 0.8883, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.0510046367851622, |
|
"grad_norm": 0.001543081016279757, |
|
"learning_rate": 0.0006496651210716126, |
|
"loss": 0.9235, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.054095826893354, |
|
"grad_norm": 0.0018034736858680844, |
|
"learning_rate": 0.0006486347243688821, |
|
"loss": 0.9445, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.0571870170015456, |
|
"grad_norm": 0.001585203455761075, |
|
"learning_rate": 0.0006476043276661516, |
|
"loss": 0.9192, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.0602782071097372, |
|
"grad_norm": 0.0018698821077123284, |
|
"learning_rate": 0.0006465739309634208, |
|
"loss": 0.9463, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.063369397217929, |
|
"grad_norm": 0.0014568824553862214, |
|
"learning_rate": 0.0006455435342606903, |
|
"loss": 0.8907, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.0664605873261206, |
|
"grad_norm": 0.002080600941553712, |
|
"learning_rate": 0.0006445131375579598, |
|
"loss": 0.9314, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.0695517774343122, |
|
"grad_norm": 0.0016853328561410308, |
|
"learning_rate": 0.0006434827408552293, |
|
"loss": 0.9277, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.0726429675425038, |
|
"grad_norm": 0.0016854364657774568, |
|
"learning_rate": 0.0006424523441524988, |
|
"loss": 0.9045, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.0757341576506956, |
|
"grad_norm": 0.0020330501720309258, |
|
"learning_rate": 0.0006414219474497683, |
|
"loss": 0.9253, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.0788253477588872, |
|
"grad_norm": 0.001624101772904396, |
|
"learning_rate": 0.0006403915507470376, |
|
"loss": 0.9275, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.0819165378670788, |
|
"grad_norm": 0.0021750659216195345, |
|
"learning_rate": 0.000639361154044307, |
|
"loss": 0.9483, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.0850077279752706, |
|
"grad_norm": 0.002389618894085288, |
|
"learning_rate": 0.0006383307573415765, |
|
"loss": 0.9259, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.0880989180834622, |
|
"grad_norm": 0.0017334523145109415, |
|
"learning_rate": 0.000637300360638846, |
|
"loss": 0.9142, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.0911901081916537, |
|
"grad_norm": 0.0015356771182268858, |
|
"learning_rate": 0.0006362699639361155, |
|
"loss": 0.9139, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.0942812982998453, |
|
"grad_norm": 0.001495121861808002, |
|
"learning_rate": 0.0006352395672333848, |
|
"loss": 0.9382, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.0973724884080371, |
|
"grad_norm": 0.0018960656598210335, |
|
"learning_rate": 0.0006342091705306543, |
|
"loss": 0.9208, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.1004636785162287, |
|
"grad_norm": 0.0019199528032913804, |
|
"learning_rate": 0.0006331787738279238, |
|
"loss": 0.8903, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.1035548686244203, |
|
"grad_norm": 0.0016839156160131097, |
|
"learning_rate": 0.0006321483771251933, |
|
"loss": 0.8923, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.1066460587326121, |
|
"grad_norm": 0.0016232216730713844, |
|
"learning_rate": 0.0006311179804224627, |
|
"loss": 0.8847, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.1097372488408037, |
|
"grad_norm": 0.0016339183785021305, |
|
"learning_rate": 0.000630087583719732, |
|
"loss": 0.9283, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.1128284389489953, |
|
"grad_norm": 0.001477651298046112, |
|
"learning_rate": 0.0006290571870170015, |
|
"loss": 0.9211, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.1159196290571871, |
|
"grad_norm": 0.0016854658024385571, |
|
"learning_rate": 0.000628026790314271, |
|
"loss": 0.8857, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.1190108191653787, |
|
"grad_norm": 0.001720211817882955, |
|
"learning_rate": 0.0006269963936115405, |
|
"loss": 0.93, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.1221020092735703, |
|
"grad_norm": 0.0018675002502277493, |
|
"learning_rate": 0.00062596599690881, |
|
"loss": 0.9222, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.125193199381762, |
|
"grad_norm": 0.0014751511625945568, |
|
"learning_rate": 0.0006249356002060794, |
|
"loss": 0.8624, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.1282843894899537, |
|
"grad_norm": 0.001356113119982183, |
|
"learning_rate": 0.0006239052035033487, |
|
"loss": 0.92, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.1313755795981453, |
|
"grad_norm": 0.0012932941317558289, |
|
"learning_rate": 0.0006228748068006182, |
|
"loss": 0.9394, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.1344667697063369, |
|
"grad_norm": 0.0022874162532389164, |
|
"learning_rate": 0.0006218444100978877, |
|
"loss": 0.8998, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.1375579598145287, |
|
"grad_norm": 0.0017121599521487951, |
|
"learning_rate": 0.0006208140133951572, |
|
"loss": 0.9188, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.1406491499227203, |
|
"grad_norm": 0.001489990041591227, |
|
"learning_rate": 0.0006197836166924266, |
|
"loss": 0.8829, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.1437403400309119, |
|
"grad_norm": 0.0015349757159128785, |
|
"learning_rate": 0.000618753219989696, |
|
"loss": 0.9028, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.1468315301391034, |
|
"grad_norm": 0.0022630542516708374, |
|
"learning_rate": 0.0006177228232869655, |
|
"loss": 0.9536, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.1499227202472952, |
|
"grad_norm": 0.0017121895216405392, |
|
"learning_rate": 0.000616692426584235, |
|
"loss": 0.9069, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.1530139103554868, |
|
"grad_norm": 0.0019011534750461578, |
|
"learning_rate": 0.0006156620298815044, |
|
"loss": 0.8873, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.1561051004636784, |
|
"grad_norm": 0.0014921397669240832, |
|
"learning_rate": 0.0006146316331787738, |
|
"loss": 0.9354, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.1591962905718702, |
|
"grad_norm": 0.001888715079985559, |
|
"learning_rate": 0.0006136012364760433, |
|
"loss": 0.9054, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.1622874806800618, |
|
"grad_norm": 0.0026860106736421585, |
|
"learning_rate": 0.0006125708397733127, |
|
"loss": 0.9096, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.1653786707882534, |
|
"grad_norm": 0.0014799319906160235, |
|
"learning_rate": 0.0006115404430705822, |
|
"loss": 0.892, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.1684698608964452, |
|
"grad_norm": 0.001760624349117279, |
|
"learning_rate": 0.0006105100463678517, |
|
"loss": 0.9615, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.1715610510046368, |
|
"grad_norm": 0.0016477296594530344, |
|
"learning_rate": 0.0006094796496651211, |
|
"loss": 0.8632, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.1746522411128284, |
|
"grad_norm": 0.0015910883666947484, |
|
"learning_rate": 0.0006084492529623905, |
|
"loss": 0.9434, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.1777434312210202, |
|
"grad_norm": 0.002248365432024002, |
|
"learning_rate": 0.0006074188562596599, |
|
"loss": 0.9087, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.1808346213292118, |
|
"grad_norm": 0.0020230677910149097, |
|
"learning_rate": 0.0006063884595569294, |
|
"loss": 0.9356, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.1839258114374034, |
|
"grad_norm": 0.0016803268808871508, |
|
"learning_rate": 0.0006053580628541989, |
|
"loss": 0.9394, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.187017001545595, |
|
"grad_norm": 0.0012555584544315934, |
|
"learning_rate": 0.0006043276661514683, |
|
"loss": 0.8883, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.1901081916537868, |
|
"grad_norm": 0.0015084665501490235, |
|
"learning_rate": 0.0006032972694487378, |
|
"loss": 0.8967, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.1931993817619784, |
|
"grad_norm": 0.001311285886913538, |
|
"learning_rate": 0.0006022668727460073, |
|
"loss": 0.8849, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.19629057187017, |
|
"grad_norm": 0.0018462970620021224, |
|
"learning_rate": 0.0006012364760432767, |
|
"loss": 0.8442, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.1993817619783615, |
|
"grad_norm": 0.0016767915803939104, |
|
"learning_rate": 0.0006002060793405461, |
|
"loss": 0.9013, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.2024729520865534, |
|
"grad_norm": 0.0018409952754154801, |
|
"learning_rate": 0.0005991756826378155, |
|
"loss": 0.9189, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.205564142194745, |
|
"grad_norm": 0.0016513046575710177, |
|
"learning_rate": 0.000598145285935085, |
|
"loss": 0.935, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.2086553323029365, |
|
"grad_norm": 0.0017494558123871684, |
|
"learning_rate": 0.0005971148892323545, |
|
"loss": 0.9038, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 1.2117465224111283, |
|
"grad_norm": 0.0021330672316253185, |
|
"learning_rate": 0.0005960844925296239, |
|
"loss": 0.8762, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.21483771251932, |
|
"grad_norm": 0.00180672702845186, |
|
"learning_rate": 0.0005950540958268934, |
|
"loss": 0.8729, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 1.2179289026275115, |
|
"grad_norm": 0.001298164832405746, |
|
"learning_rate": 0.0005940236991241628, |
|
"loss": 0.9208, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.2210200927357033, |
|
"grad_norm": 0.0016548632411286235, |
|
"learning_rate": 0.0005929933024214322, |
|
"loss": 0.8658, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.224111282843895, |
|
"grad_norm": 0.0019319544080644846, |
|
"learning_rate": 0.0005919629057187017, |
|
"loss": 0.9357, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.2272024729520865, |
|
"grad_norm": 0.0016805862542241812, |
|
"learning_rate": 0.0005909325090159712, |
|
"loss": 0.8781, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 1.2302936630602783, |
|
"grad_norm": 0.0023612873628735542, |
|
"learning_rate": 0.0005899021123132406, |
|
"loss": 0.8979, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.23338485316847, |
|
"grad_norm": 0.001474004122428596, |
|
"learning_rate": 0.00058887171561051, |
|
"loss": 0.9272, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 1.2364760432766615, |
|
"grad_norm": 0.0016212017508223653, |
|
"learning_rate": 0.0005878413189077795, |
|
"loss": 0.9453, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.239567233384853, |
|
"grad_norm": 0.0017642155289649963, |
|
"learning_rate": 0.000586810922205049, |
|
"loss": 0.9517, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 1.242658423493045, |
|
"grad_norm": 0.0018736496567726135, |
|
"learning_rate": 0.0005857805255023185, |
|
"loss": 0.8862, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 1.2457496136012365, |
|
"grad_norm": 0.001532053924165666, |
|
"learning_rate": 0.0005847501287995878, |
|
"loss": 0.8866, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 1.248840803709428, |
|
"grad_norm": 0.0017100380500778556, |
|
"learning_rate": 0.0005837197320968572, |
|
"loss": 0.9678, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 1.2519319938176197, |
|
"grad_norm": 0.0018053441308438778, |
|
"learning_rate": 0.0005826893353941267, |
|
"loss": 0.9066, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.2550231839258115, |
|
"grad_norm": 0.0015492727980017662, |
|
"learning_rate": 0.0005816589386913962, |
|
"loss": 0.9649, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 1.258114374034003, |
|
"grad_norm": 0.0014449515147134662, |
|
"learning_rate": 0.0005806285419886657, |
|
"loss": 0.8973, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 1.2612055641421946, |
|
"grad_norm": 0.001734506106004119, |
|
"learning_rate": 0.0005795981452859352, |
|
"loss": 0.8801, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 1.2642967542503865, |
|
"grad_norm": 0.0013686501188203692, |
|
"learning_rate": 0.0005785677485832046, |
|
"loss": 0.8936, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 1.267387944358578, |
|
"grad_norm": 0.0016823920886963606, |
|
"learning_rate": 0.0005775373518804739, |
|
"loss": 0.9334, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.2704791344667696, |
|
"grad_norm": 0.0022253175266087055, |
|
"learning_rate": 0.0005765069551777434, |
|
"loss": 0.9328, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 1.2735703245749614, |
|
"grad_norm": 0.0017447506543248892, |
|
"learning_rate": 0.0005754765584750129, |
|
"loss": 0.884, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 1.276661514683153, |
|
"grad_norm": 0.0017223991453647614, |
|
"learning_rate": 0.0005744461617722824, |
|
"loss": 0.8914, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 1.2797527047913446, |
|
"grad_norm": 0.001338414615020156, |
|
"learning_rate": 0.0005734157650695518, |
|
"loss": 0.8588, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 1.2828438948995364, |
|
"grad_norm": 0.0020726649090647697, |
|
"learning_rate": 0.0005723853683668212, |
|
"loss": 0.8827, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.285935085007728, |
|
"grad_norm": 0.0016284299781545997, |
|
"learning_rate": 0.0005713549716640907, |
|
"loss": 0.9044, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.2890262751159196, |
|
"grad_norm": 0.0015132429543882608, |
|
"learning_rate": 0.0005703245749613602, |
|
"loss": 0.8997, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 1.2921174652241114, |
|
"grad_norm": 0.0019543899688869715, |
|
"learning_rate": 0.0005692941782586296, |
|
"loss": 0.8779, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 1.295208655332303, |
|
"grad_norm": 0.0016743885353207588, |
|
"learning_rate": 0.000568263781555899, |
|
"loss": 0.9378, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 1.2982998454404946, |
|
"grad_norm": 0.0015272155869752169, |
|
"learning_rate": 0.0005672333848531684, |
|
"loss": 0.9391, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.3013910355486862, |
|
"grad_norm": 0.001885525998659432, |
|
"learning_rate": 0.0005662029881504379, |
|
"loss": 0.9257, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 1.3044822256568778, |
|
"grad_norm": 0.0016695179510861635, |
|
"learning_rate": 0.0005651725914477074, |
|
"loss": 0.9012, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 1.3075734157650696, |
|
"grad_norm": 0.0013361867750063539, |
|
"learning_rate": 0.0005641421947449769, |
|
"loss": 0.8968, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 1.3106646058732612, |
|
"grad_norm": 0.0015216304454952478, |
|
"learning_rate": 0.0005631117980422464, |
|
"loss": 0.9027, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 1.3137557959814528, |
|
"grad_norm": 0.0013232153141871095, |
|
"learning_rate": 0.0005620814013395156, |
|
"loss": 0.8984, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.3168469860896446, |
|
"grad_norm": 0.0019559410866349936, |
|
"learning_rate": 0.0005610510046367851, |
|
"loss": 0.8915, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 1.3199381761978362, |
|
"grad_norm": 0.0014317093882709742, |
|
"learning_rate": 0.0005600206079340546, |
|
"loss": 0.9054, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 1.3230293663060277, |
|
"grad_norm": 0.0013388870283961296, |
|
"learning_rate": 0.0005589902112313241, |
|
"loss": 0.8892, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 1.3261205564142196, |
|
"grad_norm": 0.0015756129287183285, |
|
"learning_rate": 0.0005579598145285936, |
|
"loss": 0.8773, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 1.3292117465224111, |
|
"grad_norm": 0.0014559467090293765, |
|
"learning_rate": 0.0005569294178258629, |
|
"loss": 0.9396, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.3323029366306027, |
|
"grad_norm": 0.0015288847498595715, |
|
"learning_rate": 0.0005558990211231324, |
|
"loss": 0.8731, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 1.3353941267387945, |
|
"grad_norm": 0.0019514070590958, |
|
"learning_rate": 0.0005548686244204019, |
|
"loss": 0.9192, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 1.3384853168469861, |
|
"grad_norm": 0.001415872247889638, |
|
"learning_rate": 0.0005538382277176713, |
|
"loss": 0.8815, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 1.3415765069551777, |
|
"grad_norm": 0.0014958428218960762, |
|
"learning_rate": 0.0005528078310149408, |
|
"loss": 0.9032, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 1.3446676970633695, |
|
"grad_norm": 0.002053620759397745, |
|
"learning_rate": 0.0005517774343122102, |
|
"loss": 0.887, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.3477588871715611, |
|
"grad_norm": 0.0018398250686004758, |
|
"learning_rate": 0.0005507470376094796, |
|
"loss": 0.9225, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 1.3508500772797527, |
|
"grad_norm": 0.0017333675641566515, |
|
"learning_rate": 0.0005497166409067491, |
|
"loss": 0.8951, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 1.3539412673879443, |
|
"grad_norm": 0.002342061372473836, |
|
"learning_rate": 0.0005486862442040186, |
|
"loss": 0.8829, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 1.3570324574961359, |
|
"grad_norm": 0.00188271829392761, |
|
"learning_rate": 0.0005476558475012881, |
|
"loss": 0.8816, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 1.3601236476043277, |
|
"grad_norm": 0.0013321408769115806, |
|
"learning_rate": 0.0005466254507985575, |
|
"loss": 0.9114, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.3632148377125193, |
|
"grad_norm": 0.00140297575853765, |
|
"learning_rate": 0.0005455950540958268, |
|
"loss": 0.9209, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 1.3663060278207109, |
|
"grad_norm": 0.002004598267376423, |
|
"learning_rate": 0.0005445646573930963, |
|
"loss": 0.8764, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 1.3693972179289027, |
|
"grad_norm": 0.0019030956318601966, |
|
"learning_rate": 0.0005435342606903658, |
|
"loss": 0.9532, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 1.3724884080370943, |
|
"grad_norm": 0.002172063337638974, |
|
"learning_rate": 0.0005425038639876353, |
|
"loss": 0.9243, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 1.3755795981452859, |
|
"grad_norm": 0.0018728856230154634, |
|
"learning_rate": 0.0005414734672849047, |
|
"loss": 0.893, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.3786707882534777, |
|
"grad_norm": 0.0015217667678371072, |
|
"learning_rate": 0.0005404430705821742, |
|
"loss": 0.9209, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 1.3817619783616693, |
|
"grad_norm": 0.001939924550242722, |
|
"learning_rate": 0.0005394126738794436, |
|
"loss": 0.9407, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 1.3848531684698608, |
|
"grad_norm": 0.001418776111677289, |
|
"learning_rate": 0.000538382277176713, |
|
"loss": 0.8918, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 1.3879443585780527, |
|
"grad_norm": 0.0015887707704678178, |
|
"learning_rate": 0.0005373518804739825, |
|
"loss": 0.8835, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 1.3910355486862442, |
|
"grad_norm": 0.002404952421784401, |
|
"learning_rate": 0.0005363214837712519, |
|
"loss": 0.9215, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.3941267387944358, |
|
"grad_norm": 0.0013456016313284636, |
|
"learning_rate": 0.0005352910870685214, |
|
"loss": 0.9352, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 1.3972179289026276, |
|
"grad_norm": 0.0014747907407581806, |
|
"learning_rate": 0.0005342606903657908, |
|
"loss": 0.9033, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 1.4003091190108192, |
|
"grad_norm": 0.0016936981119215488, |
|
"learning_rate": 0.0005332302936630603, |
|
"loss": 0.8774, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 1.4034003091190108, |
|
"grad_norm": 0.00203963671810925, |
|
"learning_rate": 0.0005321998969603298, |
|
"loss": 0.941, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 1.4064914992272024, |
|
"grad_norm": 0.0018227493856102228, |
|
"learning_rate": 0.0005311695002575992, |
|
"loss": 0.8724, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.409582689335394, |
|
"grad_norm": 0.002075436757877469, |
|
"learning_rate": 0.0005301391035548686, |
|
"loss": 0.8892, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 1.4126738794435858, |
|
"grad_norm": 0.0016266778111457825, |
|
"learning_rate": 0.000529108706852138, |
|
"loss": 0.8583, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 1.4157650695517774, |
|
"grad_norm": 0.0016663891728967428, |
|
"learning_rate": 0.0005280783101494075, |
|
"loss": 0.9634, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 1.418856259659969, |
|
"grad_norm": 0.0015336048090830445, |
|
"learning_rate": 0.000527047913446677, |
|
"loss": 0.9069, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 1.4219474497681608, |
|
"grad_norm": 0.0020592466462403536, |
|
"learning_rate": 0.0005260175167439464, |
|
"loss": 0.8638, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.4250386398763524, |
|
"grad_norm": 0.0019336834084242582, |
|
"learning_rate": 0.0005249871200412159, |
|
"loss": 0.9, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 1.428129829984544, |
|
"grad_norm": 0.001620001159608364, |
|
"learning_rate": 0.0005239567233384854, |
|
"loss": 0.8783, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 1.4312210200927358, |
|
"grad_norm": 0.0018929082434624434, |
|
"learning_rate": 0.0005229263266357547, |
|
"loss": 0.8888, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 1.4343122102009274, |
|
"grad_norm": 0.00144308025483042, |
|
"learning_rate": 0.0005218959299330242, |
|
"loss": 0.9338, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 1.437403400309119, |
|
"grad_norm": 0.0015054781688377261, |
|
"learning_rate": 0.0005208655332302936, |
|
"loss": 0.8962, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.4404945904173108, |
|
"grad_norm": 0.0017711712280288339, |
|
"learning_rate": 0.0005198351365275631, |
|
"loss": 0.9417, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 1.4435857805255023, |
|
"grad_norm": 0.0019218528177589178, |
|
"learning_rate": 0.0005188047398248326, |
|
"loss": 0.9703, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 1.446676970633694, |
|
"grad_norm": 0.0019779358990490437, |
|
"learning_rate": 0.000517774343122102, |
|
"loss": 0.8804, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 1.4497681607418857, |
|
"grad_norm": 0.001458328333683312, |
|
"learning_rate": 0.0005167439464193715, |
|
"loss": 0.9228, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 1.4528593508500773, |
|
"grad_norm": 0.0017885727575048804, |
|
"learning_rate": 0.000515713549716641, |
|
"loss": 0.9256, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.455950540958269, |
|
"grad_norm": 0.0015944467158988118, |
|
"learning_rate": 0.0005146831530139103, |
|
"loss": 0.8739, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 1.4590417310664605, |
|
"grad_norm": 0.001835488947108388, |
|
"learning_rate": 0.0005136527563111798, |
|
"loss": 0.952, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 1.4621329211746523, |
|
"grad_norm": 0.0025023729540407658, |
|
"learning_rate": 0.0005126223596084493, |
|
"loss": 0.9499, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 1.465224111282844, |
|
"grad_norm": 0.0017449932638555765, |
|
"learning_rate": 0.0005115919629057187, |
|
"loss": 0.9269, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 1.4683153013910355, |
|
"grad_norm": 0.0013545970432460308, |
|
"learning_rate": 0.0005105615662029882, |
|
"loss": 0.8909, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.471406491499227, |
|
"grad_norm": 0.001730005955323577, |
|
"learning_rate": 0.0005095311695002576, |
|
"loss": 0.8975, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 1.474497681607419, |
|
"grad_norm": 0.0017201779410243034, |
|
"learning_rate": 0.0005085007727975271, |
|
"loss": 0.9217, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 1.4775888717156105, |
|
"grad_norm": 0.0020651696249842644, |
|
"learning_rate": 0.0005074703760947965, |
|
"loss": 0.8986, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 1.480680061823802, |
|
"grad_norm": 0.0016624495619907975, |
|
"learning_rate": 0.000506439979392066, |
|
"loss": 0.8983, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 1.4837712519319939, |
|
"grad_norm": 0.0014232158428058028, |
|
"learning_rate": 0.0005054095826893354, |
|
"loss": 0.8733, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.4868624420401855, |
|
"grad_norm": 0.0019593520555645227, |
|
"learning_rate": 0.0005043791859866048, |
|
"loss": 0.9165, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 1.489953632148377, |
|
"grad_norm": 0.002281294437125325, |
|
"learning_rate": 0.0005033487892838743, |
|
"loss": 0.9311, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 1.4930448222565689, |
|
"grad_norm": 0.001690705306828022, |
|
"learning_rate": 0.0005023183925811438, |
|
"loss": 0.9152, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 1.4961360123647605, |
|
"grad_norm": 0.0024157485458999872, |
|
"learning_rate": 0.0005012879958784133, |
|
"loss": 0.9276, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 1.499227202472952, |
|
"grad_norm": 0.0015235710889101028, |
|
"learning_rate": 0.0005002575991756827, |
|
"loss": 0.8763, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.5023183925811439, |
|
"grad_norm": 0.0018749197479337454, |
|
"learning_rate": 0.0004992272024729521, |
|
"loss": 0.9412, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 1.5054095826893354, |
|
"grad_norm": 0.0020271523389965296, |
|
"learning_rate": 0.0004981968057702215, |
|
"loss": 0.8985, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 1.508500772797527, |
|
"grad_norm": 0.0015005801105871797, |
|
"learning_rate": 0.000497166409067491, |
|
"loss": 0.9426, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 1.5115919629057188, |
|
"grad_norm": 0.002262561582028866, |
|
"learning_rate": 0.0004961360123647605, |
|
"loss": 0.9107, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 1.5146831530139102, |
|
"grad_norm": 0.001319503178820014, |
|
"learning_rate": 0.0004951056156620298, |
|
"loss": 0.9054, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.517774343122102, |
|
"grad_norm": 0.0014885893324390054, |
|
"learning_rate": 0.0004940752189592993, |
|
"loss": 0.9265, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 1.5208655332302936, |
|
"grad_norm": 0.0017433296889066696, |
|
"learning_rate": 0.0004930448222565688, |
|
"loss": 0.8997, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 1.5239567233384852, |
|
"grad_norm": 0.0013538316125050187, |
|
"learning_rate": 0.0004920144255538382, |
|
"loss": 0.8618, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 1.527047913446677, |
|
"grad_norm": 0.0014708703383803368, |
|
"learning_rate": 0.0004909840288511077, |
|
"loss": 0.9203, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 1.5301391035548686, |
|
"grad_norm": 0.0017004169058054686, |
|
"learning_rate": 0.0004899536321483772, |
|
"loss": 0.8936, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.5332302936630602, |
|
"grad_norm": 0.0017624979373067617, |
|
"learning_rate": 0.0004889232354456466, |
|
"loss": 0.8778, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 1.536321483771252, |
|
"grad_norm": 0.0015045328764244914, |
|
"learning_rate": 0.000487892838742916, |
|
"loss": 0.8828, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 1.5394126738794436, |
|
"grad_norm": 0.0018641521455720067, |
|
"learning_rate": 0.0004868624420401855, |
|
"loss": 0.9158, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 1.5425038639876352, |
|
"grad_norm": 0.001571865752339363, |
|
"learning_rate": 0.00048583204533745493, |
|
"loss": 0.8962, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 1.545595054095827, |
|
"grad_norm": 0.0016725645400583744, |
|
"learning_rate": 0.00048480164863472436, |
|
"loss": 0.9001, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.5486862442040186, |
|
"grad_norm": 0.001643617171794176, |
|
"learning_rate": 0.00048377125193199385, |
|
"loss": 0.9103, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 1.5517774343122102, |
|
"grad_norm": 0.002170222345739603, |
|
"learning_rate": 0.0004827408552292633, |
|
"loss": 0.9045, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 1.554868624420402, |
|
"grad_norm": 0.0015412492211908102, |
|
"learning_rate": 0.0004817104585265327, |
|
"loss": 0.8995, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 1.5579598145285936, |
|
"grad_norm": 0.0019475616281852126, |
|
"learning_rate": 0.0004806800618238022, |
|
"loss": 0.8543, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 1.5610510046367851, |
|
"grad_norm": 0.0014466085704043508, |
|
"learning_rate": 0.0004796496651210716, |
|
"loss": 0.899, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.564142194744977, |
|
"grad_norm": 0.0015615527518093586, |
|
"learning_rate": 0.00047861926841834105, |
|
"loss": 0.9303, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 1.5672333848531683, |
|
"grad_norm": 0.0016237753443419933, |
|
"learning_rate": 0.0004775888717156105, |
|
"loss": 0.914, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 1.5703245749613601, |
|
"grad_norm": 0.0015945249469950795, |
|
"learning_rate": 0.00047655847501287997, |
|
"loss": 0.899, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 1.573415765069552, |
|
"grad_norm": 0.001989311771467328, |
|
"learning_rate": 0.00047552807831014945, |
|
"loss": 0.9609, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 1.5765069551777433, |
|
"grad_norm": 0.0025777083355933428, |
|
"learning_rate": 0.00047449768160741883, |
|
"loss": 0.9187, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.5795981452859351, |
|
"grad_norm": 0.0016967840492725372, |
|
"learning_rate": 0.0004734672849046883, |
|
"loss": 0.9198, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 1.5826893353941267, |
|
"grad_norm": 0.0015623560175299644, |
|
"learning_rate": 0.00047243688820195774, |
|
"loss": 0.9066, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 1.5857805255023183, |
|
"grad_norm": 0.0014336062595248222, |
|
"learning_rate": 0.00047140649149922723, |
|
"loss": 0.8992, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 1.58887171561051, |
|
"grad_norm": 0.0018111519748345017, |
|
"learning_rate": 0.00047037609479649666, |
|
"loss": 0.9826, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 1.5919629057187017, |
|
"grad_norm": 0.0016681707929819822, |
|
"learning_rate": 0.0004693456980937661, |
|
"loss": 0.9236, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.5950540958268933, |
|
"grad_norm": 0.0015410635387524962, |
|
"learning_rate": 0.0004683153013910356, |
|
"loss": 0.9151, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 1.598145285935085, |
|
"grad_norm": 0.0017971232300624251, |
|
"learning_rate": 0.00046728490468830506, |
|
"loss": 0.9007, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 1.6012364760432767, |
|
"grad_norm": 0.0019288517069071531, |
|
"learning_rate": 0.00046625450798557443, |
|
"loss": 0.9123, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 1.6043276661514683, |
|
"grad_norm": 0.0013020862825214863, |
|
"learning_rate": 0.0004652241112828439, |
|
"loss": 0.8848, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 1.60741885625966, |
|
"grad_norm": 0.0015427186153829098, |
|
"learning_rate": 0.00046419371458011335, |
|
"loss": 0.948, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.6105100463678517, |
|
"grad_norm": 0.0016680203843861818, |
|
"learning_rate": 0.0004631633178773828, |
|
"loss": 0.9122, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 1.6136012364760433, |
|
"grad_norm": 0.0014202597085386515, |
|
"learning_rate": 0.00046213292117465226, |
|
"loss": 0.8974, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 1.616692426584235, |
|
"grad_norm": 0.0018021473661065102, |
|
"learning_rate": 0.0004611025244719217, |
|
"loss": 0.8518, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 1.6197836166924264, |
|
"grad_norm": 0.001819357625208795, |
|
"learning_rate": 0.0004600721277691912, |
|
"loss": 0.9143, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 1.6228748068006182, |
|
"grad_norm": 0.0018893377855420113, |
|
"learning_rate": 0.00045904173106646055, |
|
"loss": 0.8866, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.62596599690881, |
|
"grad_norm": 0.0018815461080521345, |
|
"learning_rate": 0.00045801133436373004, |
|
"loss": 0.9116, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 1.6290571870170014, |
|
"grad_norm": 0.0018397814128547907, |
|
"learning_rate": 0.0004569809376609995, |
|
"loss": 0.9021, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 1.6321483771251932, |
|
"grad_norm": 0.002361022401601076, |
|
"learning_rate": 0.00045595054095826895, |
|
"loss": 0.9044, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 1.6352395672333848, |
|
"grad_norm": 0.0016238827956840396, |
|
"learning_rate": 0.0004549201442555384, |
|
"loss": 0.9038, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 1.6383307573415764, |
|
"grad_norm": 0.0020596208050847054, |
|
"learning_rate": 0.0004538897475528078, |
|
"loss": 0.8645, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.6414219474497682, |
|
"grad_norm": 0.0016590558225288987, |
|
"learning_rate": 0.0004528593508500773, |
|
"loss": 0.8853, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 1.6445131375579598, |
|
"grad_norm": 0.0019768117927014828, |
|
"learning_rate": 0.0004518289541473468, |
|
"loss": 0.8622, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 1.6476043276661514, |
|
"grad_norm": 0.0016761173028498888, |
|
"learning_rate": 0.00045079855744461616, |
|
"loss": 0.9272, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 1.6506955177743432, |
|
"grad_norm": 0.0013793542748317122, |
|
"learning_rate": 0.00044976816074188564, |
|
"loss": 0.935, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 1.6537867078825348, |
|
"grad_norm": 0.001541083212941885, |
|
"learning_rate": 0.0004487377640391551, |
|
"loss": 0.8854, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.6568778979907264, |
|
"grad_norm": 0.0018007430480793118, |
|
"learning_rate": 0.00044770736733642456, |
|
"loss": 0.8571, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 1.6599690880989182, |
|
"grad_norm": 0.0015837026294320822, |
|
"learning_rate": 0.000446676970633694, |
|
"loss": 0.8731, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 1.6630602782071098, |
|
"grad_norm": 0.0019535787869244814, |
|
"learning_rate": 0.0004456465739309634, |
|
"loss": 0.8952, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 1.6661514683153014, |
|
"grad_norm": 0.0015085084596648812, |
|
"learning_rate": 0.0004446161772282329, |
|
"loss": 0.9039, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 1.6692426584234932, |
|
"grad_norm": 0.0016192490002140403, |
|
"learning_rate": 0.0004435857805255023, |
|
"loss": 0.8972, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.6723338485316845, |
|
"grad_norm": 0.0025146189145743847, |
|
"learning_rate": 0.00044255538382277176, |
|
"loss": 0.909, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 1.6754250386398764, |
|
"grad_norm": 0.001912578009068966, |
|
"learning_rate": 0.00044152498712004125, |
|
"loss": 0.909, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 1.6785162287480682, |
|
"grad_norm": 0.0015931734815239906, |
|
"learning_rate": 0.0004404945904173107, |
|
"loss": 0.9285, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 1.6816074188562595, |
|
"grad_norm": 0.0015723604010418057, |
|
"learning_rate": 0.0004394641937145801, |
|
"loss": 0.883, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 1.6846986089644513, |
|
"grad_norm": 0.0017312741838395596, |
|
"learning_rate": 0.00043843379701184954, |
|
"loss": 0.8687, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.687789799072643, |
|
"grad_norm": 0.0015717818168923259, |
|
"learning_rate": 0.000437403400309119, |
|
"loss": 0.8634, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 1.6908809891808345, |
|
"grad_norm": 0.0017777059692889452, |
|
"learning_rate": 0.0004363730036063885, |
|
"loss": 0.9342, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 1.6939721792890263, |
|
"grad_norm": 0.0014749905094504356, |
|
"learning_rate": 0.0004353426069036579, |
|
"loss": 0.8881, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 1.697063369397218, |
|
"grad_norm": 0.0015921080484986305, |
|
"learning_rate": 0.00043431221020092737, |
|
"loss": 0.8682, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 1.7001545595054095, |
|
"grad_norm": 0.0017204548930749297, |
|
"learning_rate": 0.00043328181349819685, |
|
"loss": 0.9315, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.7032457496136013, |
|
"grad_norm": 0.0013450038386508822, |
|
"learning_rate": 0.0004322514167954663, |
|
"loss": 0.8749, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 1.706336939721793, |
|
"grad_norm": 0.0020300759933888912, |
|
"learning_rate": 0.0004312210200927357, |
|
"loss": 0.9331, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 1.7094281298299845, |
|
"grad_norm": 0.0023906801361590624, |
|
"learning_rate": 0.00043019062339000514, |
|
"loss": 0.9049, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 1.7125193199381763, |
|
"grad_norm": 0.0016755072865635157, |
|
"learning_rate": 0.00042916022668727463, |
|
"loss": 0.8818, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 1.7156105100463679, |
|
"grad_norm": 0.001380381640046835, |
|
"learning_rate": 0.00042812982998454406, |
|
"loss": 0.9016, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.7187017001545595, |
|
"grad_norm": 0.0016399535816162825, |
|
"learning_rate": 0.0004270994332818135, |
|
"loss": 0.9137, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 1.7217928902627513, |
|
"grad_norm": 0.0018158459570258856, |
|
"learning_rate": 0.000426069036579083, |
|
"loss": 0.8935, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 1.7248840803709427, |
|
"grad_norm": 0.0017615389078855515, |
|
"learning_rate": 0.0004250386398763524, |
|
"loss": 0.8964, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 1.7279752704791345, |
|
"grad_norm": 0.0018352493643760681, |
|
"learning_rate": 0.00042400824317362183, |
|
"loss": 0.9344, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 1.7310664605873263, |
|
"grad_norm": 0.0015487250639125705, |
|
"learning_rate": 0.0004229778464708913, |
|
"loss": 0.888, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.7341576506955176, |
|
"grad_norm": 0.00184920453466475, |
|
"learning_rate": 0.00042194744976816075, |
|
"loss": 0.866, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 1.7372488408037094, |
|
"grad_norm": 0.0018842272693291306, |
|
"learning_rate": 0.00042091705306543023, |
|
"loss": 0.9157, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 1.740340030911901, |
|
"grad_norm": 0.001251103589311242, |
|
"learning_rate": 0.0004198866563626996, |
|
"loss": 0.8774, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 1.7434312210200926, |
|
"grad_norm": 0.0017979164840653539, |
|
"learning_rate": 0.0004188562596599691, |
|
"loss": 0.9327, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 1.7465224111282844, |
|
"grad_norm": 0.003756187856197357, |
|
"learning_rate": 0.0004178258629572386, |
|
"loss": 0.898, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 1.749613601236476, |
|
"grad_norm": 0.0025360011495649815, |
|
"learning_rate": 0.000416795466254508, |
|
"loss": 0.9021, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 1.7527047913446676, |
|
"grad_norm": 0.0015577342128381133, |
|
"learning_rate": 0.00041576506955177744, |
|
"loss": 0.8972, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 1.7557959814528594, |
|
"grad_norm": 0.002707903040573001, |
|
"learning_rate": 0.00041473467284904687, |
|
"loss": 0.8647, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 1.758887171561051, |
|
"grad_norm": 0.0017573883524164557, |
|
"learning_rate": 0.00041370427614631635, |
|
"loss": 0.9007, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 1.7619783616692426, |
|
"grad_norm": 0.002097573596984148, |
|
"learning_rate": 0.0004126738794435858, |
|
"loss": 0.865, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.7650695517774344, |
|
"grad_norm": 0.0018730917945504189, |
|
"learning_rate": 0.0004116434827408552, |
|
"loss": 0.9073, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 1.768160741885626, |
|
"grad_norm": 0.0017573771765455604, |
|
"learning_rate": 0.0004106130860381247, |
|
"loss": 0.9246, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 1.7712519319938176, |
|
"grad_norm": 0.0017129804473370314, |
|
"learning_rate": 0.00040958268933539413, |
|
"loss": 0.93, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 1.7743431221020094, |
|
"grad_norm": 0.0012526778737083077, |
|
"learning_rate": 0.00040855229263266356, |
|
"loss": 0.8911, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 1.7774343122102008, |
|
"grad_norm": 0.002290197880938649, |
|
"learning_rate": 0.00040752189592993304, |
|
"loss": 0.9049, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.7805255023183926, |
|
"grad_norm": 0.001954218838363886, |
|
"learning_rate": 0.0004064914992272025, |
|
"loss": 0.8975, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 1.7836166924265844, |
|
"grad_norm": 0.002614745870232582, |
|
"learning_rate": 0.00040546110252447196, |
|
"loss": 0.8844, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 1.7867078825347757, |
|
"grad_norm": 0.0014066528528928757, |
|
"learning_rate": 0.00040443070582174133, |
|
"loss": 0.8726, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 1.7897990726429676, |
|
"grad_norm": 0.0013754137326031923, |
|
"learning_rate": 0.0004034003091190108, |
|
"loss": 0.9034, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 1.7928902627511591, |
|
"grad_norm": 0.0014499702956527472, |
|
"learning_rate": 0.0004023699124162803, |
|
"loss": 0.8515, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.7959814528593507, |
|
"grad_norm": 0.0015088847139850259, |
|
"learning_rate": 0.00040133951571354973, |
|
"loss": 0.9027, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 1.7990726429675425, |
|
"grad_norm": 0.0013973376480862498, |
|
"learning_rate": 0.00040030911901081916, |
|
"loss": 0.8943, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 1.8021638330757341, |
|
"grad_norm": 0.0014548856997862458, |
|
"learning_rate": 0.00039927872230808865, |
|
"loss": 0.9106, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 1.8052550231839257, |
|
"grad_norm": 0.0017355557065457106, |
|
"learning_rate": 0.0003982483256053581, |
|
"loss": 0.8727, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 1.8083462132921175, |
|
"grad_norm": 0.0021262154914438725, |
|
"learning_rate": 0.0003972179289026275, |
|
"loss": 0.8824, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.8114374034003091, |
|
"grad_norm": 0.001427137991413474, |
|
"learning_rate": 0.00039618753219989694, |
|
"loss": 0.9374, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 1.8145285935085007, |
|
"grad_norm": 0.0016721707070246339, |
|
"learning_rate": 0.0003951571354971664, |
|
"loss": 0.919, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 1.8176197836166925, |
|
"grad_norm": 0.0017290489049628377, |
|
"learning_rate": 0.0003941267387944359, |
|
"loss": 0.9544, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.820710973724884, |
|
"grad_norm": 0.001801205798983574, |
|
"learning_rate": 0.0003930963420917053, |
|
"loss": 0.8656, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 1.8238021638330757, |
|
"grad_norm": 0.0016462091589346528, |
|
"learning_rate": 0.00039206594538897477, |
|
"loss": 0.8515, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.8268933539412675, |
|
"grad_norm": 0.001921969000250101, |
|
"learning_rate": 0.0003910355486862442, |
|
"loss": 0.8472, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 1.829984544049459, |
|
"grad_norm": 0.0016511150170117617, |
|
"learning_rate": 0.0003900051519835137, |
|
"loss": 0.8858, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 1.8330757341576507, |
|
"grad_norm": 0.0014706592774018645, |
|
"learning_rate": 0.0003889747552807831, |
|
"loss": 0.8983, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 1.8361669242658425, |
|
"grad_norm": 0.0024984250776469707, |
|
"learning_rate": 0.00038794435857805254, |
|
"loss": 0.9051, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 1.8392581143740339, |
|
"grad_norm": 0.001705004251562059, |
|
"learning_rate": 0.00038691396187532203, |
|
"loss": 0.902, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 1.8423493044822257, |
|
"grad_norm": 0.0019023518543690443, |
|
"learning_rate": 0.00038588356517259146, |
|
"loss": 0.8798, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 1.8454404945904173, |
|
"grad_norm": 0.0018084270413964987, |
|
"learning_rate": 0.0003848531684698609, |
|
"loss": 0.9092, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 1.8485316846986088, |
|
"grad_norm": 0.002029530005529523, |
|
"learning_rate": 0.00038382277176713037, |
|
"loss": 0.9116, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 1.8516228748068007, |
|
"grad_norm": 0.0018030694918707013, |
|
"learning_rate": 0.0003827923750643998, |
|
"loss": 0.9126, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 1.8547140649149922, |
|
"grad_norm": 0.0018464057939127088, |
|
"learning_rate": 0.00038176197836166923, |
|
"loss": 0.8679, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.8578052550231838, |
|
"grad_norm": 0.0018209113040938973, |
|
"learning_rate": 0.00038073158165893866, |
|
"loss": 0.9302, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 1.8608964451313756, |
|
"grad_norm": 0.001530204783193767, |
|
"learning_rate": 0.00037970118495620815, |
|
"loss": 0.8936, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 1.8639876352395672, |
|
"grad_norm": 0.0017929489258676767, |
|
"learning_rate": 0.00037867078825347763, |
|
"loss": 0.8799, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 1.8670788253477588, |
|
"grad_norm": 0.0020685286726802588, |
|
"learning_rate": 0.000377640391550747, |
|
"loss": 0.9022, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 1.8701700154559506, |
|
"grad_norm": 0.0016937406035140157, |
|
"learning_rate": 0.0003766099948480165, |
|
"loss": 0.922, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 1.8732612055641422, |
|
"grad_norm": 0.001672919373959303, |
|
"learning_rate": 0.0003755795981452859, |
|
"loss": 0.8984, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 1.8763523956723338, |
|
"grad_norm": 0.0020905956625938416, |
|
"learning_rate": 0.0003745492014425554, |
|
"loss": 0.9168, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 1.8794435857805256, |
|
"grad_norm": 0.0016705166781321168, |
|
"learning_rate": 0.00037351880473982484, |
|
"loss": 0.9095, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 1.8825347758887172, |
|
"grad_norm": 0.002511728322133422, |
|
"learning_rate": 0.00037248840803709427, |
|
"loss": 0.9591, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 1.8856259659969088, |
|
"grad_norm": 0.0016106483526527882, |
|
"learning_rate": 0.00037145801133436375, |
|
"loss": 0.9133, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.8887171561051006, |
|
"grad_norm": 0.0018942320020869374, |
|
"learning_rate": 0.00037042761463163324, |
|
"loss": 0.9216, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 1.891808346213292, |
|
"grad_norm": 0.0014648385113105178, |
|
"learning_rate": 0.0003693972179289026, |
|
"loss": 0.854, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 1.8948995363214838, |
|
"grad_norm": 0.0018195216543972492, |
|
"learning_rate": 0.0003683668212261721, |
|
"loss": 0.9546, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 1.8979907264296756, |
|
"grad_norm": 0.0016678489046171308, |
|
"learning_rate": 0.00036733642452344153, |
|
"loss": 0.882, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 1.901081916537867, |
|
"grad_norm": 0.0018015914829447865, |
|
"learning_rate": 0.000366306027820711, |
|
"loss": 0.9164, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 1.9041731066460588, |
|
"grad_norm": 0.0018244273960590363, |
|
"learning_rate": 0.00036527563111798044, |
|
"loss": 0.9182, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 1.9072642967542504, |
|
"grad_norm": 0.002539639361202717, |
|
"learning_rate": 0.00036424523441524987, |
|
"loss": 0.8878, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 1.910355486862442, |
|
"grad_norm": 0.0017704921774566174, |
|
"learning_rate": 0.00036321483771251936, |
|
"loss": 0.8888, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 1.9134466769706338, |
|
"grad_norm": 0.0023106811568140984, |
|
"learning_rate": 0.00036218444100978873, |
|
"loss": 0.9106, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 1.9165378670788253, |
|
"grad_norm": 0.0019237243104726076, |
|
"learning_rate": 0.0003611540443070582, |
|
"loss": 0.8905, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.919629057187017, |
|
"grad_norm": 0.0014340688940137625, |
|
"learning_rate": 0.0003601236476043277, |
|
"loss": 0.8845, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 1.9227202472952087, |
|
"grad_norm": 0.0015864827437326312, |
|
"learning_rate": 0.00035909325090159713, |
|
"loss": 0.8996, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 1.9258114374034003, |
|
"grad_norm": 0.0015774049097672105, |
|
"learning_rate": 0.00035806285419886656, |
|
"loss": 0.9557, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 1.928902627511592, |
|
"grad_norm": 0.0022414042614400387, |
|
"learning_rate": 0.000357032457496136, |
|
"loss": 0.8609, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 1.9319938176197837, |
|
"grad_norm": 0.001590002211742103, |
|
"learning_rate": 0.0003560020607934055, |
|
"loss": 0.9381, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 1.9350850077279753, |
|
"grad_norm": 0.002488743746653199, |
|
"learning_rate": 0.00035497166409067496, |
|
"loss": 0.9132, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 1.938176197836167, |
|
"grad_norm": 0.0019566200207918882, |
|
"learning_rate": 0.00035394126738794434, |
|
"loss": 0.9023, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 1.9412673879443587, |
|
"grad_norm": 0.0015884449239820242, |
|
"learning_rate": 0.0003529108706852138, |
|
"loss": 0.9132, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 1.94435857805255, |
|
"grad_norm": 0.002089353743940592, |
|
"learning_rate": 0.00035188047398248325, |
|
"loss": 0.8855, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 1.947449768160742, |
|
"grad_norm": 0.0014843277167528868, |
|
"learning_rate": 0.00035085007727975274, |
|
"loss": 0.898, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.9505409582689337, |
|
"grad_norm": 0.0015669453423470259, |
|
"learning_rate": 0.00034981968057702217, |
|
"loss": 0.9371, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 1.953632148377125, |
|
"grad_norm": 0.0016016702866181731, |
|
"learning_rate": 0.0003487892838742916, |
|
"loss": 0.9119, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 1.9567233384853169, |
|
"grad_norm": 0.0017695052083581686, |
|
"learning_rate": 0.0003477588871715611, |
|
"loss": 0.8987, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 1.9598145285935085, |
|
"grad_norm": 0.00243277451954782, |
|
"learning_rate": 0.00034672849046883046, |
|
"loss": 0.9257, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 1.9629057187017, |
|
"grad_norm": 0.0014211182715371251, |
|
"learning_rate": 0.00034569809376609994, |
|
"loss": 0.8801, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 1.9659969088098919, |
|
"grad_norm": 0.0024740241933614016, |
|
"learning_rate": 0.0003446676970633694, |
|
"loss": 0.8899, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 1.9690880989180835, |
|
"grad_norm": 0.001807063934393227, |
|
"learning_rate": 0.00034363730036063886, |
|
"loss": 0.8971, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 1.972179289026275, |
|
"grad_norm": 0.0013645980507135391, |
|
"learning_rate": 0.0003426069036579083, |
|
"loss": 0.9323, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 1.9752704791344669, |
|
"grad_norm": 0.0015155840665102005, |
|
"learning_rate": 0.0003415765069551777, |
|
"loss": 0.8874, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 1.9783616692426584, |
|
"grad_norm": 0.0016512033762410283, |
|
"learning_rate": 0.0003405461102524472, |
|
"loss": 0.8489, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.98145285935085, |
|
"grad_norm": 0.001505164080299437, |
|
"learning_rate": 0.0003395157135497167, |
|
"loss": 0.9208, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 1.9845440494590418, |
|
"grad_norm": 0.0018190627451986074, |
|
"learning_rate": 0.00033848531684698606, |
|
"loss": 0.9155, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 1.9876352395672334, |
|
"grad_norm": 0.0019098619231954217, |
|
"learning_rate": 0.00033745492014425555, |
|
"loss": 0.913, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 1.990726429675425, |
|
"grad_norm": 0.0015993445413187146, |
|
"learning_rate": 0.00033642452344152503, |
|
"loss": 0.8934, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 1.9938176197836168, |
|
"grad_norm": 0.0017613953677937388, |
|
"learning_rate": 0.00033539412673879446, |
|
"loss": 0.8946, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 1.9969088098918082, |
|
"grad_norm": 0.0015573868295177817, |
|
"learning_rate": 0.0003343637300360639, |
|
"loss": 0.9159, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.0019827033393085003, |
|
"learning_rate": 0.0003333333333333333, |
|
"loss": 0.9071, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 2.003091190108192, |
|
"grad_norm": 0.001513644470833242, |
|
"learning_rate": 0.0003323029366306028, |
|
"loss": 0.9439, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 2.006182380216383, |
|
"grad_norm": 0.001435752958059311, |
|
"learning_rate": 0.00033127253992787224, |
|
"loss": 0.9027, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 2.009273570324575, |
|
"grad_norm": 0.0016444657230749726, |
|
"learning_rate": 0.00033024214322514167, |
|
"loss": 0.8816, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.012364760432767, |
|
"grad_norm": 0.0021295547485351562, |
|
"learning_rate": 0.00032921174652241115, |
|
"loss": 0.9254, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 2.015455950540958, |
|
"grad_norm": 0.001806983258575201, |
|
"learning_rate": 0.0003281813498196806, |
|
"loss": 0.9273, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 2.01854714064915, |
|
"grad_norm": 0.0016168680740520358, |
|
"learning_rate": 0.00032715095311695, |
|
"loss": 0.9036, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 2.021638330757342, |
|
"grad_norm": 0.0014968597097322345, |
|
"learning_rate": 0.0003261205564142195, |
|
"loss": 0.8532, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 2.024729520865533, |
|
"grad_norm": 0.0019258251413702965, |
|
"learning_rate": 0.0003250901597114889, |
|
"loss": 0.9347, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 2.027820710973725, |
|
"grad_norm": 0.0017398808849975467, |
|
"learning_rate": 0.0003240597630087584, |
|
"loss": 0.8724, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 2.0309119010819163, |
|
"grad_norm": 0.001705456175841391, |
|
"learning_rate": 0.0003230293663060278, |
|
"loss": 0.9058, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 2.034003091190108, |
|
"grad_norm": 0.0018997775623574853, |
|
"learning_rate": 0.00032199896960329727, |
|
"loss": 0.9097, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 2.0370942812983, |
|
"grad_norm": 0.001412282115779817, |
|
"learning_rate": 0.00032096857290056676, |
|
"loss": 0.8857, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 2.0401854714064913, |
|
"grad_norm": 0.0014306355733424425, |
|
"learning_rate": 0.0003199381761978362, |
|
"loss": 0.91, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.043276661514683, |
|
"grad_norm": 0.001689639175310731, |
|
"learning_rate": 0.0003189077794951056, |
|
"loss": 0.9038, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 2.046367851622875, |
|
"grad_norm": 0.0015731025487184525, |
|
"learning_rate": 0.00031787738279237505, |
|
"loss": 0.913, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 2.0494590417310663, |
|
"grad_norm": 0.0014561581192538142, |
|
"learning_rate": 0.00031684698608964453, |
|
"loss": 0.8767, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 2.052550231839258, |
|
"grad_norm": 0.0017503297422081232, |
|
"learning_rate": 0.00031581658938691396, |
|
"loss": 0.917, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 2.05564142194745, |
|
"grad_norm": 0.00222258223220706, |
|
"learning_rate": 0.0003147861926841834, |
|
"loss": 0.8869, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 2.0587326120556413, |
|
"grad_norm": 0.0023604268208146095, |
|
"learning_rate": 0.0003137557959814529, |
|
"loss": 0.9149, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 2.061823802163833, |
|
"grad_norm": 0.0023476933129131794, |
|
"learning_rate": 0.0003127253992787223, |
|
"loss": 0.949, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 2.064914992272025, |
|
"grad_norm": 0.0021014835219830275, |
|
"learning_rate": 0.00031169500257599174, |
|
"loss": 0.9374, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 2.0680061823802163, |
|
"grad_norm": 0.001549664419144392, |
|
"learning_rate": 0.0003106646058732612, |
|
"loss": 0.9189, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 2.071097372488408, |
|
"grad_norm": 0.0018286675913259387, |
|
"learning_rate": 0.00030963420917053065, |
|
"loss": 0.9028, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.0741885625966, |
|
"grad_norm": 0.0018017146503552794, |
|
"learning_rate": 0.00030860381246780014, |
|
"loss": 0.8899, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 2.0772797527047913, |
|
"grad_norm": 0.001875316142104566, |
|
"learning_rate": 0.0003075734157650695, |
|
"loss": 0.9123, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 2.080370942812983, |
|
"grad_norm": 0.0018925000913441181, |
|
"learning_rate": 0.000306543019062339, |
|
"loss": 0.9139, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 2.0834621329211744, |
|
"grad_norm": 0.0015832912176847458, |
|
"learning_rate": 0.0003055126223596085, |
|
"loss": 0.9131, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 2.0865533230293662, |
|
"grad_norm": 0.0018890087958425283, |
|
"learning_rate": 0.0003044822256568779, |
|
"loss": 0.8798, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 2.089644513137558, |
|
"grad_norm": 0.0020662271417677402, |
|
"learning_rate": 0.00030345182895414734, |
|
"loss": 0.9027, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 2.0927357032457494, |
|
"grad_norm": 0.001351105165667832, |
|
"learning_rate": 0.0003024214322514168, |
|
"loss": 0.8641, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 2.0958268933539412, |
|
"grad_norm": 0.001607073936611414, |
|
"learning_rate": 0.00030139103554868626, |
|
"loss": 0.8716, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 2.098918083462133, |
|
"grad_norm": 0.0022712023928761482, |
|
"learning_rate": 0.0003003606388459557, |
|
"loss": 0.9286, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 2.1020092735703244, |
|
"grad_norm": 0.0019429827807471156, |
|
"learning_rate": 0.0002993302421432251, |
|
"loss": 0.9157, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.105100463678516, |
|
"grad_norm": 0.0023945835418999195, |
|
"learning_rate": 0.0002982998454404946, |
|
"loss": 0.9332, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 2.108191653786708, |
|
"grad_norm": 0.0017035908531397581, |
|
"learning_rate": 0.0002972694487377641, |
|
"loss": 0.8984, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 2.1112828438948994, |
|
"grad_norm": 0.001636037020944059, |
|
"learning_rate": 0.00029623905203503346, |
|
"loss": 0.8535, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 2.114374034003091, |
|
"grad_norm": 0.0015807118033990264, |
|
"learning_rate": 0.00029520865533230295, |
|
"loss": 0.8537, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 2.117465224111283, |
|
"grad_norm": 0.0019097881158813834, |
|
"learning_rate": 0.0002941782586295724, |
|
"loss": 0.9224, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 2.1205564142194744, |
|
"grad_norm": 0.002146846381947398, |
|
"learning_rate": 0.00029314786192684186, |
|
"loss": 0.9206, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 2.123647604327666, |
|
"grad_norm": 0.0016978083876892924, |
|
"learning_rate": 0.0002921174652241113, |
|
"loss": 0.9441, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 2.126738794435858, |
|
"grad_norm": 0.0015323269180953503, |
|
"learning_rate": 0.0002910870685213807, |
|
"loss": 0.9336, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 2.1298299845440494, |
|
"grad_norm": 0.0014333493309095502, |
|
"learning_rate": 0.0002900566718186502, |
|
"loss": 0.9192, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 2.132921174652241, |
|
"grad_norm": 0.0017516023945063353, |
|
"learning_rate": 0.00028902627511591964, |
|
"loss": 0.8886, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 2.1360123647604325, |
|
"grad_norm": 0.0013508939882740378, |
|
"learning_rate": 0.00028799587841318907, |
|
"loss": 0.8788, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 2.1391035548686244, |
|
"grad_norm": 0.002034724224358797, |
|
"learning_rate": 0.00028696548171045855, |
|
"loss": 0.8884, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 2.142194744976816, |
|
"grad_norm": 0.0014981675194576383, |
|
"learning_rate": 0.000285935085007728, |
|
"loss": 0.9187, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 2.1452859350850075, |
|
"grad_norm": 0.002125379629433155, |
|
"learning_rate": 0.00028490468830499747, |
|
"loss": 0.8873, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 2.1483771251931993, |
|
"grad_norm": 0.001538197393529117, |
|
"learning_rate": 0.00028387429160226684, |
|
"loss": 0.9144, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 2.151468315301391, |
|
"grad_norm": 0.002346182242035866, |
|
"learning_rate": 0.0002828438948995363, |
|
"loss": 0.9295, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 2.1545595054095825, |
|
"grad_norm": 0.0015058065764606, |
|
"learning_rate": 0.0002818134981968058, |
|
"loss": 0.857, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 2.1576506955177743, |
|
"grad_norm": 0.0017613341333344579, |
|
"learning_rate": 0.0002807831014940752, |
|
"loss": 0.8723, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 2.160741885625966, |
|
"grad_norm": 0.0021604488138109446, |
|
"learning_rate": 0.00027975270479134467, |
|
"loss": 0.934, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 2.1638330757341575, |
|
"grad_norm": 0.0017267893999814987, |
|
"learning_rate": 0.0002787223080886141, |
|
"loss": 0.8985, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.1669242658423493, |
|
"grad_norm": 0.0014519842807203531, |
|
"learning_rate": 0.0002776919113858836, |
|
"loss": 0.9185, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 2.170015455950541, |
|
"grad_norm": 0.0023860172368586063, |
|
"learning_rate": 0.000276661514683153, |
|
"loss": 0.9081, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 2.1731066460587325, |
|
"grad_norm": 0.0017903451807796955, |
|
"learning_rate": 0.00027563111798042245, |
|
"loss": 0.9024, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 2.1761978361669243, |
|
"grad_norm": 0.0015208751428872347, |
|
"learning_rate": 0.00027460072127769193, |
|
"loss": 0.8988, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 2.179289026275116, |
|
"grad_norm": 0.0019341211300343275, |
|
"learning_rate": 0.0002735703245749614, |
|
"loss": 0.9378, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 2.1823802163833075, |
|
"grad_norm": 0.0016929497942328453, |
|
"learning_rate": 0.0002725399278722308, |
|
"loss": 0.9231, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 2.1854714064914993, |
|
"grad_norm": 0.0015913585666567087, |
|
"learning_rate": 0.0002715095311695003, |
|
"loss": 0.8959, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 2.1885625965996907, |
|
"grad_norm": 0.001789154834114015, |
|
"learning_rate": 0.0002704791344667697, |
|
"loss": 0.9459, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 2.1916537867078825, |
|
"grad_norm": 0.00176356197334826, |
|
"learning_rate": 0.0002694487377640392, |
|
"loss": 0.915, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 2.1947449768160743, |
|
"grad_norm": 0.0034130492713302374, |
|
"learning_rate": 0.0002684183410613086, |
|
"loss": 0.8311, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 2.1978361669242656, |
|
"grad_norm": 0.001789650646969676, |
|
"learning_rate": 0.00026738794435857805, |
|
"loss": 0.9061, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 2.2009273570324575, |
|
"grad_norm": 0.0015091504901647568, |
|
"learning_rate": 0.00026635754765584754, |
|
"loss": 0.882, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 2.2040185471406493, |
|
"grad_norm": 0.0016386975767090917, |
|
"learning_rate": 0.0002653271509531169, |
|
"loss": 0.8652, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 2.2071097372488406, |
|
"grad_norm": 0.0018872515065595508, |
|
"learning_rate": 0.0002642967542503864, |
|
"loss": 0.9064, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 2.2102009273570324, |
|
"grad_norm": 0.0016174730844795704, |
|
"learning_rate": 0.0002632663575476559, |
|
"loss": 0.8615, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 2.2132921174652243, |
|
"grad_norm": 0.0023867471609264612, |
|
"learning_rate": 0.0002622359608449253, |
|
"loss": 0.903, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 2.2163833075734156, |
|
"grad_norm": 0.0018768367590382695, |
|
"learning_rate": 0.00026120556414219474, |
|
"loss": 0.8828, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 2.2194744976816074, |
|
"grad_norm": 0.0019271258497610688, |
|
"learning_rate": 0.00026017516743946417, |
|
"loss": 0.8861, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 2.2225656877897992, |
|
"grad_norm": 0.001598935341462493, |
|
"learning_rate": 0.00025914477073673366, |
|
"loss": 0.8821, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 2.2256568778979906, |
|
"grad_norm": 0.002534502651542425, |
|
"learning_rate": 0.00025811437403400314, |
|
"loss": 0.923, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 2.2287480680061824, |
|
"grad_norm": 0.001254307571798563, |
|
"learning_rate": 0.0002570839773312725, |
|
"loss": 0.8744, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 2.2318392581143742, |
|
"grad_norm": 0.0013876528246328235, |
|
"learning_rate": 0.000256053580628542, |
|
"loss": 0.909, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 2.2349304482225656, |
|
"grad_norm": 0.0030744040850549936, |
|
"learning_rate": 0.00025502318392581143, |
|
"loss": 0.9358, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 2.2380216383307574, |
|
"grad_norm": 0.001663259114138782, |
|
"learning_rate": 0.0002539927872230809, |
|
"loss": 0.9078, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 2.2411128284389488, |
|
"grad_norm": 0.00184043834451586, |
|
"learning_rate": 0.00025296239052035035, |
|
"loss": 0.8703, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 2.2442040185471406, |
|
"grad_norm": 0.002181377960368991, |
|
"learning_rate": 0.0002519319938176198, |
|
"loss": 0.8823, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 2.2472952086553324, |
|
"grad_norm": 0.0015466611366719007, |
|
"learning_rate": 0.00025090159711488926, |
|
"loss": 0.8792, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 2.250386398763524, |
|
"grad_norm": 0.002074003452435136, |
|
"learning_rate": 0.0002498712004121587, |
|
"loss": 0.9361, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 2.2534775888717156, |
|
"grad_norm": 0.0014814219903200865, |
|
"learning_rate": 0.0002488408037094281, |
|
"loss": 0.8684, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 2.2565687789799074, |
|
"grad_norm": 0.0022694983053952456, |
|
"learning_rate": 0.00024781040700669755, |
|
"loss": 0.8592, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 2.2596599690880987, |
|
"grad_norm": 0.0019474639557301998, |
|
"learning_rate": 0.00024678001030396704, |
|
"loss": 0.8866, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 2.2627511591962906, |
|
"grad_norm": 0.0018335517961531878, |
|
"learning_rate": 0.00024574961360123647, |
|
"loss": 0.9352, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 2.2658423493044824, |
|
"grad_norm": 0.001545943901874125, |
|
"learning_rate": 0.00024471921689850595, |
|
"loss": 0.868, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 2.2689335394126737, |
|
"grad_norm": 0.0019200016977265477, |
|
"learning_rate": 0.00024368882019577538, |
|
"loss": 0.9414, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 2.2720247295208655, |
|
"grad_norm": 0.002204937394708395, |
|
"learning_rate": 0.0002426584234930448, |
|
"loss": 0.919, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 2.2751159196290573, |
|
"grad_norm": 0.002049383707344532, |
|
"learning_rate": 0.0002416280267903143, |
|
"loss": 0.9024, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 2.2782071097372487, |
|
"grad_norm": 0.0016608345322310925, |
|
"learning_rate": 0.00024059763008758373, |
|
"loss": 0.9112, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 2.2812982998454405, |
|
"grad_norm": 0.0020049915183335543, |
|
"learning_rate": 0.00023956723338485318, |
|
"loss": 0.9082, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 2.2843894899536323, |
|
"grad_norm": 0.0017916331999003887, |
|
"learning_rate": 0.0002385368366821226, |
|
"loss": 0.9242, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 2.2874806800618237, |
|
"grad_norm": 0.0015080280136317015, |
|
"learning_rate": 0.00023750643997939207, |
|
"loss": 0.9115, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 2.2905718701700155, |
|
"grad_norm": 0.00174785649869591, |
|
"learning_rate": 0.00023647604327666153, |
|
"loss": 0.8643, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 2.293663060278207, |
|
"grad_norm": 0.002300349297001958, |
|
"learning_rate": 0.00023544564657393096, |
|
"loss": 0.9215, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 2.2967542503863987, |
|
"grad_norm": 0.001717501669190824, |
|
"learning_rate": 0.00023441524987120042, |
|
"loss": 0.881, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 2.2998454404945905, |
|
"grad_norm": 0.0015334953786805272, |
|
"learning_rate": 0.00023338485316846985, |
|
"loss": 0.9106, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 2.3029366306027823, |
|
"grad_norm": 0.00180353585164994, |
|
"learning_rate": 0.00023235445646573933, |
|
"loss": 0.9088, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 2.3060278207109737, |
|
"grad_norm": 0.0016415161080658436, |
|
"learning_rate": 0.00023132405976300876, |
|
"loss": 0.8925, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 2.3091190108191655, |
|
"grad_norm": 0.002030453644692898, |
|
"learning_rate": 0.00023029366306027822, |
|
"loss": 0.8819, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 2.312210200927357, |
|
"grad_norm": 0.0019409249071031809, |
|
"learning_rate": 0.00022926326635754765, |
|
"loss": 0.867, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 2.3153013910355487, |
|
"grad_norm": 0.0034679800737649202, |
|
"learning_rate": 0.0002282328696548171, |
|
"loss": 0.8954, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 2.3183925811437405, |
|
"grad_norm": 0.0020659409929066896, |
|
"learning_rate": 0.00022720247295208656, |
|
"loss": 0.9357, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.321483771251932, |
|
"grad_norm": 0.0021641727071255445, |
|
"learning_rate": 0.00022617207624935602, |
|
"loss": 0.8911, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 2.3245749613601236, |
|
"grad_norm": 0.0016261821147054434, |
|
"learning_rate": 0.00022514167954662545, |
|
"loss": 0.9102, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 2.3276661514683155, |
|
"grad_norm": 0.0018004965968430042, |
|
"learning_rate": 0.0002241112828438949, |
|
"loss": 0.8924, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 2.330757341576507, |
|
"grad_norm": 0.0016102648805826902, |
|
"learning_rate": 0.00022308088614116434, |
|
"loss": 0.8812, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 2.3338485316846986, |
|
"grad_norm": 0.0018460742430761456, |
|
"learning_rate": 0.00022205048943843382, |
|
"loss": 0.8935, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 2.3369397217928904, |
|
"grad_norm": 0.0018313312903046608, |
|
"learning_rate": 0.00022102009273570325, |
|
"loss": 0.8892, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 2.340030911901082, |
|
"grad_norm": 0.0018456524703651667, |
|
"learning_rate": 0.0002199896960329727, |
|
"loss": 0.8874, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 2.3431221020092736, |
|
"grad_norm": 0.0028236303478479385, |
|
"learning_rate": 0.00021895929933024214, |
|
"loss": 0.9291, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 2.346213292117465, |
|
"grad_norm": 0.0012284901458770037, |
|
"learning_rate": 0.00021792890262751157, |
|
"loss": 0.8771, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 2.349304482225657, |
|
"grad_norm": 0.0018673281883820891, |
|
"learning_rate": 0.00021689850592478106, |
|
"loss": 0.8804, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 2.3523956723338486, |
|
"grad_norm": 0.0013668534811586142, |
|
"learning_rate": 0.00021586810922205049, |
|
"loss": 0.9415, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 2.3554868624420404, |
|
"grad_norm": 0.0014791954308748245, |
|
"learning_rate": 0.00021483771251931994, |
|
"loss": 0.9123, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 2.358578052550232, |
|
"grad_norm": 0.0016408261144533753, |
|
"learning_rate": 0.00021380731581658937, |
|
"loss": 0.9296, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 2.3616692426584236, |
|
"grad_norm": 0.0016208128072321415, |
|
"learning_rate": 0.00021277691911385886, |
|
"loss": 0.8988, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 2.364760432766615, |
|
"grad_norm": 0.0016079987399280071, |
|
"learning_rate": 0.0002117465224111283, |
|
"loss": 0.8697, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 2.3678516228748068, |
|
"grad_norm": 0.0018998971208930016, |
|
"learning_rate": 0.00021071612570839775, |
|
"loss": 0.8971, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 2.3709428129829986, |
|
"grad_norm": 0.0013807121431455016, |
|
"learning_rate": 0.00020968572900566718, |
|
"loss": 0.9038, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 2.37403400309119, |
|
"grad_norm": 0.0016093014273792505, |
|
"learning_rate": 0.00020865533230293663, |
|
"loss": 0.8853, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 2.3771251931993818, |
|
"grad_norm": 0.001660670735873282, |
|
"learning_rate": 0.0002076249356002061, |
|
"loss": 0.8867, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 2.3802163833075736, |
|
"grad_norm": 0.001987049588933587, |
|
"learning_rate": 0.00020659453889747555, |
|
"loss": 0.9064, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 2.383307573415765, |
|
"grad_norm": 0.0014046692522242665, |
|
"learning_rate": 0.00020556414219474498, |
|
"loss": 0.9145, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 2.3863987635239567, |
|
"grad_norm": 0.0018706049304455519, |
|
"learning_rate": 0.00020453374549201444, |
|
"loss": 0.8975, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 2.3894899536321486, |
|
"grad_norm": 0.001406969386152923, |
|
"learning_rate": 0.00020350334878928387, |
|
"loss": 0.9143, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 2.39258114374034, |
|
"grad_norm": 0.001984959002584219, |
|
"learning_rate": 0.00020247295208655332, |
|
"loss": 0.9189, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 2.3956723338485317, |
|
"grad_norm": 0.0022590451408177614, |
|
"learning_rate": 0.00020144255538382278, |
|
"loss": 0.9174, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 2.398763523956723, |
|
"grad_norm": 0.0013520545326173306, |
|
"learning_rate": 0.0002004121586810922, |
|
"loss": 0.8876, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 2.401854714064915, |
|
"grad_norm": 0.001583244651556015, |
|
"learning_rate": 0.00019938176197836167, |
|
"loss": 0.9106, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 2.4049459041731067, |
|
"grad_norm": 0.0016318537527695298, |
|
"learning_rate": 0.00019835136527563113, |
|
"loss": 0.8566, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 2.4080370942812985, |
|
"grad_norm": 0.0015274740289896727, |
|
"learning_rate": 0.00019732096857290058, |
|
"loss": 0.9214, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 2.41112828438949, |
|
"grad_norm": 0.0017980411648750305, |
|
"learning_rate": 0.00019629057187017, |
|
"loss": 0.8636, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 2.4142194744976817, |
|
"grad_norm": 0.0016120801446959376, |
|
"learning_rate": 0.00019526017516743947, |
|
"loss": 0.8821, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 2.417310664605873, |
|
"grad_norm": 0.001640370930545032, |
|
"learning_rate": 0.0001942297784647089, |
|
"loss": 0.9068, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 2.420401854714065, |
|
"grad_norm": 0.0015430683270096779, |
|
"learning_rate": 0.00019319938176197838, |
|
"loss": 0.8889, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 2.4234930448222567, |
|
"grad_norm": 0.0015471933875232935, |
|
"learning_rate": 0.00019216898505924782, |
|
"loss": 0.8924, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 2.426584234930448, |
|
"grad_norm": 0.0021910767536610365, |
|
"learning_rate": 0.00019113858835651727, |
|
"loss": 0.8569, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 2.42967542503864, |
|
"grad_norm": 0.0017051781760528684, |
|
"learning_rate": 0.0001901081916537867, |
|
"loss": 0.937, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 2.4327666151468317, |
|
"grad_norm": 0.0018304622499272227, |
|
"learning_rate": 0.00018907779495105616, |
|
"loss": 0.9255, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 2.435857805255023, |
|
"grad_norm": 0.001733385375700891, |
|
"learning_rate": 0.00018804739824832562, |
|
"loss": 0.8877, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 2.438948995363215, |
|
"grad_norm": 0.0014930800534784794, |
|
"learning_rate": 0.00018701700154559507, |
|
"loss": 0.9079, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 2.4420401854714067, |
|
"grad_norm": 0.0017946161096915603, |
|
"learning_rate": 0.0001859866048428645, |
|
"loss": 0.8372, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 2.445131375579598, |
|
"grad_norm": 0.00181410217192024, |
|
"learning_rate": 0.00018495620814013394, |
|
"loss": 0.9301, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 2.44822256568779, |
|
"grad_norm": 0.0018277463968843222, |
|
"learning_rate": 0.00018392581143740342, |
|
"loss": 0.9406, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 2.451313755795981, |
|
"grad_norm": 0.0017499460373073816, |
|
"learning_rate": 0.00018289541473467285, |
|
"loss": 0.9327, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 2.454404945904173, |
|
"grad_norm": 0.0017523594433441758, |
|
"learning_rate": 0.0001818650180319423, |
|
"loss": 0.9541, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 2.457496136012365, |
|
"grad_norm": 0.001588582992553711, |
|
"learning_rate": 0.00018083462132921174, |
|
"loss": 0.9244, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 2.4605873261205566, |
|
"grad_norm": 0.0015729885781183839, |
|
"learning_rate": 0.0001798042246264812, |
|
"loss": 0.902, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 2.463678516228748, |
|
"grad_norm": 0.001764149172231555, |
|
"learning_rate": 0.00017877382792375065, |
|
"loss": 0.9115, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 2.46676970633694, |
|
"grad_norm": 0.0020394367165863514, |
|
"learning_rate": 0.0001777434312210201, |
|
"loss": 0.8913, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 2.469860896445131, |
|
"grad_norm": 0.001661314396187663, |
|
"learning_rate": 0.00017671303451828954, |
|
"loss": 0.8924, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 2.472952086553323, |
|
"grad_norm": 0.0016929521225392818, |
|
"learning_rate": 0.000175682637815559, |
|
"loss": 0.9158, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.476043276661515, |
|
"grad_norm": 0.0014037607470527291, |
|
"learning_rate": 0.00017465224111282843, |
|
"loss": 0.8929, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 2.479134466769706, |
|
"grad_norm": 0.0012340841349214315, |
|
"learning_rate": 0.0001736218444100979, |
|
"loss": 0.9042, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 2.482225656877898, |
|
"grad_norm": 0.0016911630518734455, |
|
"learning_rate": 0.00017259144770736734, |
|
"loss": 0.8805, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 2.48531684698609, |
|
"grad_norm": 0.0015811780467629433, |
|
"learning_rate": 0.0001715610510046368, |
|
"loss": 0.9066, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 2.488408037094281, |
|
"grad_norm": 0.0022526499815285206, |
|
"learning_rate": 0.00017053065430190623, |
|
"loss": 0.8729, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 2.491499227202473, |
|
"grad_norm": 0.0014964583097025752, |
|
"learning_rate": 0.00016950025759917566, |
|
"loss": 0.8867, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 2.4945904173106648, |
|
"grad_norm": 0.001667377888225019, |
|
"learning_rate": 0.00016846986089644514, |
|
"loss": 0.9289, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 2.497681607418856, |
|
"grad_norm": 0.0015655744355171919, |
|
"learning_rate": 0.00016743946419371457, |
|
"loss": 0.8989, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 2.500772797527048, |
|
"grad_norm": 0.0018760713282972574, |
|
"learning_rate": 0.00016640906749098403, |
|
"loss": 0.8951, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 2.5038639876352393, |
|
"grad_norm": 0.0018504380714148283, |
|
"learning_rate": 0.00016537867078825346, |
|
"loss": 0.9548, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 2.506955177743431, |
|
"grad_norm": 0.0015352640766650438, |
|
"learning_rate": 0.00016434827408552295, |
|
"loss": 0.8789, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 2.510046367851623, |
|
"grad_norm": 0.0014199953293427825, |
|
"learning_rate": 0.00016331787738279238, |
|
"loss": 0.8956, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 2.5131375579598147, |
|
"grad_norm": 0.0022967271506786346, |
|
"learning_rate": 0.00016228748068006183, |
|
"loss": 0.8662, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 2.516228748068006, |
|
"grad_norm": 0.0015619174810126424, |
|
"learning_rate": 0.00016125708397733126, |
|
"loss": 0.9188, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 2.519319938176198, |
|
"grad_norm": 0.00181775342207402, |
|
"learning_rate": 0.00016022668727460072, |
|
"loss": 0.9115, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 2.5224111282843893, |
|
"grad_norm": 0.001615070621483028, |
|
"learning_rate": 0.00015919629057187018, |
|
"loss": 0.8719, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 2.525502318392581, |
|
"grad_norm": 0.002030865289270878, |
|
"learning_rate": 0.00015816589386913964, |
|
"loss": 0.8618, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 2.528593508500773, |
|
"grad_norm": 0.0018763948464766145, |
|
"learning_rate": 0.00015713549716640907, |
|
"loss": 0.9493, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 2.5316846986089647, |
|
"grad_norm": 0.0015980995958670974, |
|
"learning_rate": 0.00015610510046367852, |
|
"loss": 0.9139, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 2.534775888717156, |
|
"grad_norm": 0.0017758564790710807, |
|
"learning_rate": 0.00015507470376094795, |
|
"loss": 0.8785, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 2.537867078825348, |
|
"grad_norm": 0.0018766775028780103, |
|
"learning_rate": 0.0001540443070582174, |
|
"loss": 0.9099, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 2.5409582689335393, |
|
"grad_norm": 0.0018965909257531166, |
|
"learning_rate": 0.00015301391035548687, |
|
"loss": 0.9314, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 2.544049459041731, |
|
"grad_norm": 0.0015854688826948404, |
|
"learning_rate": 0.0001519835136527563, |
|
"loss": 0.9137, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 2.547140649149923, |
|
"grad_norm": 0.0018873221706598997, |
|
"learning_rate": 0.00015095311695002576, |
|
"loss": 0.8828, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 2.5502318392581143, |
|
"grad_norm": 0.0014826676342636347, |
|
"learning_rate": 0.00014992272024729521, |
|
"loss": 0.9031, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 2.553323029366306, |
|
"grad_norm": 0.0014809026615694165, |
|
"learning_rate": 0.00014889232354456467, |
|
"loss": 0.8574, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 2.5564142194744974, |
|
"grad_norm": 0.0014648685464635491, |
|
"learning_rate": 0.0001478619268418341, |
|
"loss": 0.8706, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 2.5595054095826892, |
|
"grad_norm": 0.001307973056100309, |
|
"learning_rate": 0.00014683153013910356, |
|
"loss": 0.9271, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 2.562596599690881, |
|
"grad_norm": 0.0019158597569912672, |
|
"learning_rate": 0.000145801133436373, |
|
"loss": 0.8816, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 2.565687789799073, |
|
"grad_norm": 0.0016835506539791822, |
|
"learning_rate": 0.00014477073673364247, |
|
"loss": 0.9075, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 2.5687789799072642, |
|
"grad_norm": 0.0020169655326753855, |
|
"learning_rate": 0.0001437403400309119, |
|
"loss": 0.8927, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 2.571870170015456, |
|
"grad_norm": 0.001817848184145987, |
|
"learning_rate": 0.00014270994332818136, |
|
"loss": 0.8812, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 2.5749613601236474, |
|
"grad_norm": 0.001979761989787221, |
|
"learning_rate": 0.0001416795466254508, |
|
"loss": 0.9054, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 2.578052550231839, |
|
"grad_norm": 0.002088018460199237, |
|
"learning_rate": 0.00014064914992272025, |
|
"loss": 0.8691, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 2.581143740340031, |
|
"grad_norm": 0.0017918642843142152, |
|
"learning_rate": 0.0001396187532199897, |
|
"loss": 0.9458, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 2.584234930448223, |
|
"grad_norm": 0.0016459986800327897, |
|
"learning_rate": 0.00013858835651725916, |
|
"loss": 0.9186, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 2.587326120556414, |
|
"grad_norm": 0.001407464500516653, |
|
"learning_rate": 0.0001375579598145286, |
|
"loss": 0.8839, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 2.590417310664606, |
|
"grad_norm": 0.001718651968985796, |
|
"learning_rate": 0.00013652756311179802, |
|
"loss": 0.9203, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 2.5935085007727974, |
|
"grad_norm": 0.0018458360573276877, |
|
"learning_rate": 0.0001354971664090675, |
|
"loss": 0.911, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 2.596599690880989, |
|
"grad_norm": 0.001538407290354371, |
|
"learning_rate": 0.00013446676970633694, |
|
"loss": 0.9139, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 2.599690880989181, |
|
"grad_norm": 0.001976667670533061, |
|
"learning_rate": 0.0001334363730036064, |
|
"loss": 0.9075, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 2.6027820710973724, |
|
"grad_norm": 0.0021034348756074905, |
|
"learning_rate": 0.00013240597630087583, |
|
"loss": 0.9181, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 2.605873261205564, |
|
"grad_norm": 0.00171546614728868, |
|
"learning_rate": 0.00013137557959814528, |
|
"loss": 0.9361, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 2.6089644513137555, |
|
"grad_norm": 0.001742625143378973, |
|
"learning_rate": 0.00013034518289541474, |
|
"loss": 0.8895, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 2.6120556414219473, |
|
"grad_norm": 0.0018996672006323934, |
|
"learning_rate": 0.0001293147861926842, |
|
"loss": 0.8474, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 2.615146831530139, |
|
"grad_norm": 0.001571224071085453, |
|
"learning_rate": 0.00012828438948995363, |
|
"loss": 0.8976, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 2.618238021638331, |
|
"grad_norm": 0.0018322218675166368, |
|
"learning_rate": 0.0001272539927872231, |
|
"loss": 0.9207, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 2.6213292117465223, |
|
"grad_norm": 0.0013319810386747122, |
|
"learning_rate": 0.00012622359608449252, |
|
"loss": 0.8674, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 2.624420401854714, |
|
"grad_norm": 0.0014434581389650702, |
|
"learning_rate": 0.000125193199381762, |
|
"loss": 0.829, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 2.6275115919629055, |
|
"grad_norm": 0.0027343255933374166, |
|
"learning_rate": 0.00012416280267903143, |
|
"loss": 0.8882, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.6306027820710973, |
|
"grad_norm": 0.003409834112972021, |
|
"learning_rate": 0.0001231324059763009, |
|
"loss": 0.9302, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 2.633693972179289, |
|
"grad_norm": 0.0013600644888356328, |
|
"learning_rate": 0.00012210200927357035, |
|
"loss": 0.876, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 2.636785162287481, |
|
"grad_norm": 0.0016775266267359257, |
|
"learning_rate": 0.00012107161257083978, |
|
"loss": 0.9064, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 2.6398763523956723, |
|
"grad_norm": 0.0017377499025315046, |
|
"learning_rate": 0.00012004121586810922, |
|
"loss": 0.9224, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 2.642967542503864, |
|
"grad_norm": 0.001847295556217432, |
|
"learning_rate": 0.00011901081916537868, |
|
"loss": 0.9149, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 2.6460587326120555, |
|
"grad_norm": 0.0014812115114182234, |
|
"learning_rate": 0.00011798042246264812, |
|
"loss": 0.8929, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 2.6491499227202473, |
|
"grad_norm": 0.0017952779307961464, |
|
"learning_rate": 0.00011695002575991758, |
|
"loss": 0.9285, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 2.652241112828439, |
|
"grad_norm": 0.001425527036190033, |
|
"learning_rate": 0.00011591962905718702, |
|
"loss": 0.8703, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 2.6553323029366305, |
|
"grad_norm": 0.001610907376743853, |
|
"learning_rate": 0.00011488923235445648, |
|
"loss": 0.9103, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 2.6584234930448223, |
|
"grad_norm": 0.001997585641220212, |
|
"learning_rate": 0.00011385883565172592, |
|
"loss": 0.898, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 2.6615146831530136, |
|
"grad_norm": 0.0015265914844349027, |
|
"learning_rate": 0.00011282843894899535, |
|
"loss": 0.8225, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 2.6646058732612055, |
|
"grad_norm": 0.002251180587336421, |
|
"learning_rate": 0.00011179804224626481, |
|
"loss": 0.8886, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 2.6676970633693973, |
|
"grad_norm": 0.0016794728580862284, |
|
"learning_rate": 0.00011076764554353426, |
|
"loss": 0.8991, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 2.670788253477589, |
|
"grad_norm": 0.0021927470806986094, |
|
"learning_rate": 0.00010973724884080371, |
|
"loss": 0.872, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 2.6738794435857804, |
|
"grad_norm": 0.0016734660603106022, |
|
"learning_rate": 0.00010870685213807316, |
|
"loss": 0.9082, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 2.6769706336939723, |
|
"grad_norm": 0.0023362876381725073, |
|
"learning_rate": 0.00010767645543534261, |
|
"loss": 0.8867, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 2.6800618238021636, |
|
"grad_norm": 0.0015447117621079087, |
|
"learning_rate": 0.00010664605873261206, |
|
"loss": 0.8706, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 2.6831530139103554, |
|
"grad_norm": 0.0017007689457386732, |
|
"learning_rate": 0.0001056156620298815, |
|
"loss": 0.9457, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 2.6862442040185472, |
|
"grad_norm": 0.0018746848218142986, |
|
"learning_rate": 0.00010458526532715096, |
|
"loss": 0.8834, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 2.689335394126739, |
|
"grad_norm": 0.0016901030903682113, |
|
"learning_rate": 0.0001035548686244204, |
|
"loss": 0.9321, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 2.6924265842349304, |
|
"grad_norm": 0.0017849161522462964, |
|
"learning_rate": 0.00010252447192168986, |
|
"loss": 0.8886, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 2.6955177743431222, |
|
"grad_norm": 0.0016776255797594786, |
|
"learning_rate": 0.0001014940752189593, |
|
"loss": 0.8842, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 2.6986089644513136, |
|
"grad_norm": 0.002052722033113241, |
|
"learning_rate": 0.00010046367851622876, |
|
"loss": 0.8921, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 2.7017001545595054, |
|
"grad_norm": 0.0020015337504446507, |
|
"learning_rate": 9.94332818134982e-05, |
|
"loss": 0.878, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 2.704791344667697, |
|
"grad_norm": 0.0018238229677081108, |
|
"learning_rate": 9.840288511076765e-05, |
|
"loss": 0.9286, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 2.7078825347758886, |
|
"grad_norm": 0.0019556416664272547, |
|
"learning_rate": 9.737248840803709e-05, |
|
"loss": 0.9109, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 2.7109737248840804, |
|
"grad_norm": 0.001552661880850792, |
|
"learning_rate": 9.634209170530654e-05, |
|
"loss": 0.9073, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 2.7140649149922718, |
|
"grad_norm": 0.0018825504230335355, |
|
"learning_rate": 9.5311695002576e-05, |
|
"loss": 0.9033, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 2.7171561051004636, |
|
"grad_norm": 0.0018266792176291347, |
|
"learning_rate": 9.428129829984544e-05, |
|
"loss": 0.946, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 2.7202472952086554, |
|
"grad_norm": 0.0014049782184883952, |
|
"learning_rate": 9.325090159711488e-05, |
|
"loss": 0.8817, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 2.723338485316847, |
|
"grad_norm": 0.0015499057481065392, |
|
"learning_rate": 9.222050489438434e-05, |
|
"loss": 0.8569, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 2.7264296754250386, |
|
"grad_norm": 0.0018245774554088712, |
|
"learning_rate": 9.119010819165378e-05, |
|
"loss": 0.8934, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 2.7295208655332304, |
|
"grad_norm": 0.001886395737528801, |
|
"learning_rate": 9.015971148892324e-05, |
|
"loss": 0.9212, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 2.7326120556414217, |
|
"grad_norm": 0.0017441367963328958, |
|
"learning_rate": 8.912931478619268e-05, |
|
"loss": 0.9378, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 2.7357032457496135, |
|
"grad_norm": 0.0015955539420247078, |
|
"learning_rate": 8.809891808346214e-05, |
|
"loss": 0.8835, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 2.7387944358578054, |
|
"grad_norm": 0.001902989810332656, |
|
"learning_rate": 8.706852138073158e-05, |
|
"loss": 0.9072, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 2.741885625965997, |
|
"grad_norm": 0.0014242329634726048, |
|
"learning_rate": 8.603812467800103e-05, |
|
"loss": 0.8712, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 2.7449768160741885, |
|
"grad_norm": 0.001587534206919372, |
|
"learning_rate": 8.500772797527049e-05, |
|
"loss": 0.8655, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 2.7480680061823803, |
|
"grad_norm": 0.001545070088468492, |
|
"learning_rate": 8.397733127253993e-05, |
|
"loss": 0.9077, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 2.7511591962905717, |
|
"grad_norm": 0.0018857244867831469, |
|
"learning_rate": 8.294693456980939e-05, |
|
"loss": 0.896, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 2.7542503863987635, |
|
"grad_norm": 0.0012212925357744098, |
|
"learning_rate": 8.191653786707883e-05, |
|
"loss": 0.8993, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 2.7573415765069553, |
|
"grad_norm": 0.0014877498615533113, |
|
"learning_rate": 8.088614116434827e-05, |
|
"loss": 0.9214, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 2.7604327666151467, |
|
"grad_norm": 0.0014304481446743011, |
|
"learning_rate": 7.985574446161772e-05, |
|
"loss": 0.8926, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 2.7635239567233385, |
|
"grad_norm": 0.001448179711587727, |
|
"learning_rate": 7.882534775888716e-05, |
|
"loss": 0.873, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 2.76661514683153, |
|
"grad_norm": 0.0015107804210856557, |
|
"learning_rate": 7.779495105615662e-05, |
|
"loss": 0.8396, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 2.7697063369397217, |
|
"grad_norm": 0.001311837462708354, |
|
"learning_rate": 7.676455435342606e-05, |
|
"loss": 0.9185, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 2.7727975270479135, |
|
"grad_norm": 0.0017208693316206336, |
|
"learning_rate": 7.573415765069552e-05, |
|
"loss": 0.8959, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 2.7758887171561053, |
|
"grad_norm": 0.0016424921341240406, |
|
"learning_rate": 7.470376094796496e-05, |
|
"loss": 0.8892, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 2.7789799072642967, |
|
"grad_norm": 0.0014334353618323803, |
|
"learning_rate": 7.367336424523442e-05, |
|
"loss": 0.8908, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 2.7820710973724885, |
|
"grad_norm": 0.0019086402608081698, |
|
"learning_rate": 7.264296754250387e-05, |
|
"loss": 0.8791, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.78516228748068, |
|
"grad_norm": 0.0013547363923862576, |
|
"learning_rate": 7.161257083977331e-05, |
|
"loss": 0.9024, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 2.7882534775888717, |
|
"grad_norm": 0.0013833148404955864, |
|
"learning_rate": 7.058217413704277e-05, |
|
"loss": 0.9195, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 2.7913446676970635, |
|
"grad_norm": 0.0014539181720465422, |
|
"learning_rate": 6.955177743431221e-05, |
|
"loss": 0.8835, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 2.7944358578052553, |
|
"grad_norm": 0.001557295210659504, |
|
"learning_rate": 6.852138073158167e-05, |
|
"loss": 0.8959, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 2.7975270479134466, |
|
"grad_norm": 0.0020530694164335728, |
|
"learning_rate": 6.749098402885111e-05, |
|
"loss": 0.9193, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 2.8006182380216385, |
|
"grad_norm": 0.0016817068681120872, |
|
"learning_rate": 6.646058732612057e-05, |
|
"loss": 0.8456, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 2.80370942812983, |
|
"grad_norm": 0.0019015265861526132, |
|
"learning_rate": 6.543019062339001e-05, |
|
"loss": 0.8723, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 2.8068006182380216, |
|
"grad_norm": 0.0015511283418163657, |
|
"learning_rate": 6.439979392065944e-05, |
|
"loss": 0.9393, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 2.8098918083462134, |
|
"grad_norm": 0.00140860746614635, |
|
"learning_rate": 6.33693972179289e-05, |
|
"loss": 0.8914, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 2.812982998454405, |
|
"grad_norm": 0.00200218940153718, |
|
"learning_rate": 6.233900051519836e-05, |
|
"loss": 0.8723, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 2.8160741885625966, |
|
"grad_norm": 0.001929080463014543, |
|
"learning_rate": 6.13086038124678e-05, |
|
"loss": 0.9241, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 2.819165378670788, |
|
"grad_norm": 0.0016536141047254205, |
|
"learning_rate": 6.0278207109737246e-05, |
|
"loss": 0.9228, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 2.82225656877898, |
|
"grad_norm": 0.0014851295854896307, |
|
"learning_rate": 5.9247810407006696e-05, |
|
"loss": 0.9038, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 2.8253477588871716, |
|
"grad_norm": 0.0014977608807384968, |
|
"learning_rate": 5.821741370427615e-05, |
|
"loss": 0.8881, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 2.8284389489953634, |
|
"grad_norm": 0.002265679184347391, |
|
"learning_rate": 5.71870170015456e-05, |
|
"loss": 0.8694, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 2.8315301391035548, |
|
"grad_norm": 0.0016861397307366133, |
|
"learning_rate": 5.615662029881505e-05, |
|
"loss": 0.849, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 2.8346213292117466, |
|
"grad_norm": 0.001969564938917756, |
|
"learning_rate": 5.512622359608449e-05, |
|
"loss": 0.8832, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 2.837712519319938, |
|
"grad_norm": 0.0019365076441317797, |
|
"learning_rate": 5.409582689335394e-05, |
|
"loss": 0.8763, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 2.8408037094281298, |
|
"grad_norm": 0.0024066604673862457, |
|
"learning_rate": 5.306543019062339e-05, |
|
"loss": 0.9283, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 2.8438948995363216, |
|
"grad_norm": 0.0017798724584281445, |
|
"learning_rate": 5.203503348789284e-05, |
|
"loss": 0.9098, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 2.8469860896445134, |
|
"grad_norm": 0.0014776487369090319, |
|
"learning_rate": 5.100463678516229e-05, |
|
"loss": 0.8685, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 2.8500772797527048, |
|
"grad_norm": 0.0016472855350002646, |
|
"learning_rate": 4.997424008243174e-05, |
|
"loss": 0.9151, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 2.8531684698608966, |
|
"grad_norm": 0.001969245495274663, |
|
"learning_rate": 4.894384337970119e-05, |
|
"loss": 0.8841, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 2.856259659969088, |
|
"grad_norm": 0.00151623017154634, |
|
"learning_rate": 4.791344667697063e-05, |
|
"loss": 0.8764, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 2.8593508500772797, |
|
"grad_norm": 0.0014065582072362304, |
|
"learning_rate": 4.688304997424008e-05, |
|
"loss": 0.8737, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 2.8624420401854715, |
|
"grad_norm": 0.001735221827402711, |
|
"learning_rate": 4.5852653271509534e-05, |
|
"loss": 0.8494, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 2.865533230293663, |
|
"grad_norm": 0.001790928072296083, |
|
"learning_rate": 4.4822256568778984e-05, |
|
"loss": 0.9272, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 2.8686244204018547, |
|
"grad_norm": 0.002074885880574584, |
|
"learning_rate": 4.379185986604843e-05, |
|
"loss": 0.8767, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 2.871715610510046, |
|
"grad_norm": 0.002597965532913804, |
|
"learning_rate": 4.276146316331788e-05, |
|
"loss": 0.9302, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 2.874806800618238, |
|
"grad_norm": 0.0019227894954383373, |
|
"learning_rate": 4.173106646058733e-05, |
|
"loss": 0.931, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 2.8778979907264297, |
|
"grad_norm": 0.0014659016160294414, |
|
"learning_rate": 4.070066975785677e-05, |
|
"loss": 0.944, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 2.8809891808346215, |
|
"grad_norm": 0.0017797836335375905, |
|
"learning_rate": 3.9670273055126224e-05, |
|
"loss": 0.95, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 2.884080370942813, |
|
"grad_norm": 0.0014417979400604963, |
|
"learning_rate": 3.8639876352395674e-05, |
|
"loss": 0.8956, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 2.8871715610510047, |
|
"grad_norm": 0.0013740757713094354, |
|
"learning_rate": 3.7609479649665125e-05, |
|
"loss": 0.9165, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 2.890262751159196, |
|
"grad_norm": 0.0016021078918129206, |
|
"learning_rate": 3.6579082946934575e-05, |
|
"loss": 0.9061, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 2.893353941267388, |
|
"grad_norm": 0.0017530877375975251, |
|
"learning_rate": 3.554868624420402e-05, |
|
"loss": 0.926, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 2.8964451313755797, |
|
"grad_norm": 0.0014526183949783444, |
|
"learning_rate": 3.451828954147346e-05, |
|
"loss": 0.8938, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 2.8995363214837715, |
|
"grad_norm": 0.001970699056982994, |
|
"learning_rate": 3.3487892838742914e-05, |
|
"loss": 0.8899, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 2.902627511591963, |
|
"grad_norm": 0.001455257530324161, |
|
"learning_rate": 3.2457496136012364e-05, |
|
"loss": 0.8998, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 2.9057187017001547, |
|
"grad_norm": 0.0023150176275521517, |
|
"learning_rate": 3.1427099433281815e-05, |
|
"loss": 0.8652, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 2.908809891808346, |
|
"grad_norm": 0.0021134279668331146, |
|
"learning_rate": 3.0396702730551262e-05, |
|
"loss": 0.8573, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 2.911901081916538, |
|
"grad_norm": 0.0016436997102573514, |
|
"learning_rate": 2.936630602782071e-05, |
|
"loss": 0.9263, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 2.9149922720247297, |
|
"grad_norm": 0.001375730847939849, |
|
"learning_rate": 2.833590932509016e-05, |
|
"loss": 0.8869, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 2.918083462132921, |
|
"grad_norm": 0.001517411321401596, |
|
"learning_rate": 2.730551262235961e-05, |
|
"loss": 0.9652, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 2.921174652241113, |
|
"grad_norm": 0.0015275340992957354, |
|
"learning_rate": 2.6275115919629058e-05, |
|
"loss": 0.8882, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 2.9242658423493046, |
|
"grad_norm": 0.0019407563377171755, |
|
"learning_rate": 2.5244719216898505e-05, |
|
"loss": 0.9207, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 2.927357032457496, |
|
"grad_norm": 0.0013504921225830913, |
|
"learning_rate": 2.4214322514167955e-05, |
|
"loss": 0.8926, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 2.930448222565688, |
|
"grad_norm": 0.0017280855681747198, |
|
"learning_rate": 2.3183925811437406e-05, |
|
"loss": 0.9399, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 2.9335394126738796, |
|
"grad_norm": 0.0019845685455948114, |
|
"learning_rate": 2.215352910870685e-05, |
|
"loss": 0.9159, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 2.936630602782071, |
|
"grad_norm": 0.001844555838033557, |
|
"learning_rate": 2.11231324059763e-05, |
|
"loss": 0.9431, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.939721792890263, |
|
"grad_norm": 0.0013761295704171062, |
|
"learning_rate": 2.009273570324575e-05, |
|
"loss": 0.9008, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 2.942812982998454, |
|
"grad_norm": 0.001978537067770958, |
|
"learning_rate": 1.90623390005152e-05, |
|
"loss": 0.949, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 2.945904173106646, |
|
"grad_norm": 0.001545691047795117, |
|
"learning_rate": 1.8031942297784645e-05, |
|
"loss": 0.9337, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 2.948995363214838, |
|
"grad_norm": 0.0017250186065211892, |
|
"learning_rate": 1.7001545595054096e-05, |
|
"loss": 0.8819, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 2.9520865533230296, |
|
"grad_norm": 0.0014475194038823247, |
|
"learning_rate": 1.5971148892323546e-05, |
|
"loss": 0.9038, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 2.955177743431221, |
|
"grad_norm": 0.0015622404171153903, |
|
"learning_rate": 1.4940752189592994e-05, |
|
"loss": 0.8629, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 2.958268933539413, |
|
"grad_norm": 0.00176430051214993, |
|
"learning_rate": 1.3910355486862443e-05, |
|
"loss": 0.9482, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 2.961360123647604, |
|
"grad_norm": 0.002005284419283271, |
|
"learning_rate": 1.287995878413189e-05, |
|
"loss": 0.9089, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 2.964451313755796, |
|
"grad_norm": 0.0017773432191461325, |
|
"learning_rate": 1.184956208140134e-05, |
|
"loss": 0.9071, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 2.9675425038639878, |
|
"grad_norm": 0.00213231542147696, |
|
"learning_rate": 1.0819165378670788e-05, |
|
"loss": 0.9035, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 2.970633693972179, |
|
"grad_norm": 0.002046185778453946, |
|
"learning_rate": 9.788768675940238e-06, |
|
"loss": 0.9451, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 2.973724884080371, |
|
"grad_norm": 0.001680073793977499, |
|
"learning_rate": 8.758371973209685e-06, |
|
"loss": 0.8838, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 2.9768160741885628, |
|
"grad_norm": 0.0015061446465551853, |
|
"learning_rate": 7.727975270479134e-06, |
|
"loss": 0.8764, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 2.979907264296754, |
|
"grad_norm": 0.0019097643671557307, |
|
"learning_rate": 6.697578567748584e-06, |
|
"loss": 0.9369, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 2.982998454404946, |
|
"grad_norm": 0.0017981400014832616, |
|
"learning_rate": 5.667181865018033e-06, |
|
"loss": 0.8498, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 2.9860896445131377, |
|
"grad_norm": 0.0021447227336466312, |
|
"learning_rate": 4.636785162287481e-06, |
|
"loss": 0.9168, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 2.989180834621329, |
|
"grad_norm": 0.0015040052821859717, |
|
"learning_rate": 3.6063884595569293e-06, |
|
"loss": 0.9233, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 2.992272024729521, |
|
"grad_norm": 0.00173336046282202, |
|
"learning_rate": 2.575991756826378e-06, |
|
"loss": 0.9224, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 2.9953632148377123, |
|
"grad_norm": 0.00168697745539248, |
|
"learning_rate": 1.5455950540958269e-06, |
|
"loss": 0.9131, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 2.998454404945904, |
|
"grad_norm": 0.0016643669223412871, |
|
"learning_rate": 5.151983513652757e-07, |
|
"loss": 0.945, |
|
"step": 9700 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 9705, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.329354151166001e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|