|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9998077107970387, |
|
"eval_steps": 500, |
|
"global_step": 2600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.007691568118450149, |
|
"grad_norm": 0.8310319185256958, |
|
"learning_rate": 1.923076923076923e-05, |
|
"loss": 2.3717, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.015383136236900298, |
|
"grad_norm": 0.8349233269691467, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 2.2925, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02307470435535045, |
|
"grad_norm": 0.7424416542053223, |
|
"learning_rate": 4.992229992229993e-05, |
|
"loss": 2.0978, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.030766272473800597, |
|
"grad_norm": 0.7559643387794495, |
|
"learning_rate": 4.972804972804973e-05, |
|
"loss": 1.9267, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03845784059225075, |
|
"grad_norm": 0.6887438297271729, |
|
"learning_rate": 4.9533799533799534e-05, |
|
"loss": 1.7711, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0461494087107009, |
|
"grad_norm": 0.8163133263587952, |
|
"learning_rate": 4.9339549339549344e-05, |
|
"loss": 1.7121, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.053840976829151045, |
|
"grad_norm": 0.979810357093811, |
|
"learning_rate": 4.9145299145299147e-05, |
|
"loss": 1.6597, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.061532544947601193, |
|
"grad_norm": 0.9440365433692932, |
|
"learning_rate": 4.8951048951048956e-05, |
|
"loss": 1.6236, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06922411306605133, |
|
"grad_norm": 0.8523435592651367, |
|
"learning_rate": 4.875679875679876e-05, |
|
"loss": 1.616, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0769156811845015, |
|
"grad_norm": 1.1356542110443115, |
|
"learning_rate": 4.856254856254856e-05, |
|
"loss": 1.58, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08460724930295165, |
|
"grad_norm": 1.1632951498031616, |
|
"learning_rate": 4.836829836829837e-05, |
|
"loss": 1.5241, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0922988174214018, |
|
"grad_norm": 1.1739879846572876, |
|
"learning_rate": 4.8174048174048175e-05, |
|
"loss": 1.4737, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.09999038553985194, |
|
"grad_norm": 1.1852024793624878, |
|
"learning_rate": 4.797979797979798e-05, |
|
"loss": 1.5002, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.10768195365830209, |
|
"grad_norm": 1.2705507278442383, |
|
"learning_rate": 4.778554778554779e-05, |
|
"loss": 1.4457, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.11537352177675224, |
|
"grad_norm": 1.3838472366333008, |
|
"learning_rate": 4.75912975912976e-05, |
|
"loss": 1.4372, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.12306508989520239, |
|
"grad_norm": 1.463762640953064, |
|
"learning_rate": 4.73970473970474e-05, |
|
"loss": 1.4695, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.13075665801365252, |
|
"grad_norm": 1.4812041521072388, |
|
"learning_rate": 4.7202797202797204e-05, |
|
"loss": 1.465, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.13844822613210267, |
|
"grad_norm": 1.3840124607086182, |
|
"learning_rate": 4.700854700854701e-05, |
|
"loss": 1.4121, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.14613979425055282, |
|
"grad_norm": 1.5955114364624023, |
|
"learning_rate": 4.681429681429682e-05, |
|
"loss": 1.4064, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.153831362369003, |
|
"grad_norm": 1.4825975894927979, |
|
"learning_rate": 4.662004662004662e-05, |
|
"loss": 1.3869, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.16152293048745314, |
|
"grad_norm": 1.5520519018173218, |
|
"learning_rate": 4.642579642579643e-05, |
|
"loss": 1.3914, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1692144986059033, |
|
"grad_norm": 1.3405165672302246, |
|
"learning_rate": 4.623154623154623e-05, |
|
"loss": 1.378, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.17690606672435344, |
|
"grad_norm": 1.4172375202178955, |
|
"learning_rate": 4.603729603729604e-05, |
|
"loss": 1.3843, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.1845976348428036, |
|
"grad_norm": 1.447364330291748, |
|
"learning_rate": 4.5843045843045846e-05, |
|
"loss": 1.3959, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.19228920296125374, |
|
"grad_norm": 1.5292952060699463, |
|
"learning_rate": 4.564879564879565e-05, |
|
"loss": 1.3926, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.19998077107970388, |
|
"grad_norm": 1.5983524322509766, |
|
"learning_rate": 4.545454545454546e-05, |
|
"loss": 1.3721, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.20767233919815403, |
|
"grad_norm": 1.520230770111084, |
|
"learning_rate": 4.526029526029526e-05, |
|
"loss": 1.362, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.21536390731660418, |
|
"grad_norm": 1.5406700372695923, |
|
"learning_rate": 4.506604506604507e-05, |
|
"loss": 1.36, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.22305547543505433, |
|
"grad_norm": 1.5974327325820923, |
|
"learning_rate": 4.4871794871794874e-05, |
|
"loss": 1.3326, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.23074704355350448, |
|
"grad_norm": 1.5478413105010986, |
|
"learning_rate": 4.467754467754468e-05, |
|
"loss": 1.3277, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.23843861167195463, |
|
"grad_norm": 1.537761926651001, |
|
"learning_rate": 4.448329448329449e-05, |
|
"loss": 1.3329, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.24613017979040477, |
|
"grad_norm": 1.5494816303253174, |
|
"learning_rate": 4.428904428904429e-05, |
|
"loss": 1.2966, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2538217479088549, |
|
"grad_norm": 1.6187357902526855, |
|
"learning_rate": 4.4094794094794093e-05, |
|
"loss": 1.3152, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.26151331602730504, |
|
"grad_norm": 1.9419796466827393, |
|
"learning_rate": 4.39005439005439e-05, |
|
"loss": 1.3223, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2692048841457552, |
|
"grad_norm": 1.6638212203979492, |
|
"learning_rate": 4.370629370629371e-05, |
|
"loss": 1.3366, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.27689645226420534, |
|
"grad_norm": 1.8047728538513184, |
|
"learning_rate": 4.3512043512043516e-05, |
|
"loss": 1.3156, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2845880203826555, |
|
"grad_norm": 1.6696109771728516, |
|
"learning_rate": 4.331779331779332e-05, |
|
"loss": 1.3229, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.29227958850110564, |
|
"grad_norm": 1.6470463275909424, |
|
"learning_rate": 4.312354312354312e-05, |
|
"loss": 1.2842, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2999711566195558, |
|
"grad_norm": 1.6966880559921265, |
|
"learning_rate": 4.292929292929293e-05, |
|
"loss": 1.2962, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.307662724738006, |
|
"grad_norm": 2.1080636978149414, |
|
"learning_rate": 4.2735042735042735e-05, |
|
"loss": 1.2865, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3153542928564561, |
|
"grad_norm": 1.9488563537597656, |
|
"learning_rate": 4.254079254079254e-05, |
|
"loss": 1.2932, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.3230458609749063, |
|
"grad_norm": 1.8715245723724365, |
|
"learning_rate": 4.234654234654235e-05, |
|
"loss": 1.2751, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.3307374290933564, |
|
"grad_norm": 1.6135368347167969, |
|
"learning_rate": 4.215229215229216e-05, |
|
"loss": 1.2923, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3384289972118066, |
|
"grad_norm": 2.0071396827697754, |
|
"learning_rate": 4.195804195804196e-05, |
|
"loss": 1.3004, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3461205653302567, |
|
"grad_norm": 1.954253077507019, |
|
"learning_rate": 4.1763791763791764e-05, |
|
"loss": 1.2898, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3538121334487069, |
|
"grad_norm": 2.44484806060791, |
|
"learning_rate": 4.1569541569541574e-05, |
|
"loss": 1.2873, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.361503701567157, |
|
"grad_norm": 1.7962288856506348, |
|
"learning_rate": 4.1375291375291377e-05, |
|
"loss": 1.2697, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.3691952696856072, |
|
"grad_norm": 1.6677371263504028, |
|
"learning_rate": 4.1181041181041186e-05, |
|
"loss": 1.2602, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3768868378040573, |
|
"grad_norm": 1.6148051023483276, |
|
"learning_rate": 4.098679098679099e-05, |
|
"loss": 1.2889, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.38457840592250747, |
|
"grad_norm": 2.0414624214172363, |
|
"learning_rate": 4.079254079254079e-05, |
|
"loss": 1.2861, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3922699740409576, |
|
"grad_norm": 1.5257327556610107, |
|
"learning_rate": 4.05982905982906e-05, |
|
"loss": 1.2399, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.39996154215940777, |
|
"grad_norm": 1.9141441583633423, |
|
"learning_rate": 4.0404040404040405e-05, |
|
"loss": 1.2775, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.4076531102778579, |
|
"grad_norm": 1.686346411705017, |
|
"learning_rate": 4.020979020979021e-05, |
|
"loss": 1.2471, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.41534467839630806, |
|
"grad_norm": 1.7622222900390625, |
|
"learning_rate": 4.001554001554002e-05, |
|
"loss": 1.2707, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.4230362465147582, |
|
"grad_norm": 1.8795199394226074, |
|
"learning_rate": 3.982128982128983e-05, |
|
"loss": 1.2862, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.43072781463320836, |
|
"grad_norm": 1.8214608430862427, |
|
"learning_rate": 3.962703962703963e-05, |
|
"loss": 1.2553, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.4384193827516585, |
|
"grad_norm": 1.7057331800460815, |
|
"learning_rate": 3.9432789432789434e-05, |
|
"loss": 1.2684, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.44611095087010866, |
|
"grad_norm": 1.9549400806427002, |
|
"learning_rate": 3.923853923853924e-05, |
|
"loss": 1.2512, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.4538025189885588, |
|
"grad_norm": 1.906495451927185, |
|
"learning_rate": 3.904428904428905e-05, |
|
"loss": 1.2524, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.46149408710700895, |
|
"grad_norm": 1.76618230342865, |
|
"learning_rate": 3.885003885003885e-05, |
|
"loss": 1.2613, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4691856552254591, |
|
"grad_norm": 2.038341522216797, |
|
"learning_rate": 3.865578865578865e-05, |
|
"loss": 1.2266, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.47687722334390925, |
|
"grad_norm": 1.7204527854919434, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 1.2478, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.48456879146235937, |
|
"grad_norm": 1.7129229307174683, |
|
"learning_rate": 3.826728826728827e-05, |
|
"loss": 1.2207, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.49226035958080955, |
|
"grad_norm": 1.8858224153518677, |
|
"learning_rate": 3.8073038073038076e-05, |
|
"loss": 1.2219, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.49995192769925967, |
|
"grad_norm": 1.8016833066940308, |
|
"learning_rate": 3.787878787878788e-05, |
|
"loss": 1.2586, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5076434958177098, |
|
"grad_norm": 1.7311338186264038, |
|
"learning_rate": 3.768453768453769e-05, |
|
"loss": 1.2072, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.51533506393616, |
|
"grad_norm": 1.7362291812896729, |
|
"learning_rate": 3.749028749028749e-05, |
|
"loss": 1.229, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.5230266320546101, |
|
"grad_norm": 2.358015537261963, |
|
"learning_rate": 3.72960372960373e-05, |
|
"loss": 1.196, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.5307182001730603, |
|
"grad_norm": 1.9326188564300537, |
|
"learning_rate": 3.7101787101787104e-05, |
|
"loss": 1.2491, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5384097682915104, |
|
"grad_norm": 1.9867770671844482, |
|
"learning_rate": 3.690753690753691e-05, |
|
"loss": 1.2383, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5461013364099606, |
|
"grad_norm": 2.026604175567627, |
|
"learning_rate": 3.671328671328672e-05, |
|
"loss": 1.2172, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5537929045284107, |
|
"grad_norm": 2.0612032413482666, |
|
"learning_rate": 3.651903651903652e-05, |
|
"loss": 1.2607, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5614844726468609, |
|
"grad_norm": 1.8911861181259155, |
|
"learning_rate": 3.6324786324786323e-05, |
|
"loss": 1.2352, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.569176040765311, |
|
"grad_norm": 1.8968251943588257, |
|
"learning_rate": 3.613053613053613e-05, |
|
"loss": 1.2274, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5768676088837612, |
|
"grad_norm": 1.8784927129745483, |
|
"learning_rate": 3.593628593628594e-05, |
|
"loss": 1.2138, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5845591770022113, |
|
"grad_norm": 2.070425033569336, |
|
"learning_rate": 3.5742035742035746e-05, |
|
"loss": 1.2343, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.5922507451206614, |
|
"grad_norm": 2.00534987449646, |
|
"learning_rate": 3.554778554778555e-05, |
|
"loss": 1.2484, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5999423132391116, |
|
"grad_norm": 2.018569231033325, |
|
"learning_rate": 3.535353535353535e-05, |
|
"loss": 1.1964, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.6076338813575618, |
|
"grad_norm": 2.2748711109161377, |
|
"learning_rate": 3.515928515928516e-05, |
|
"loss": 1.2415, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.615325449476012, |
|
"grad_norm": 1.8931610584259033, |
|
"learning_rate": 3.4965034965034965e-05, |
|
"loss": 1.2515, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.623017017594462, |
|
"grad_norm": 1.8468350172042847, |
|
"learning_rate": 3.477078477078477e-05, |
|
"loss": 1.2381, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.6307085857129122, |
|
"grad_norm": 2.2593376636505127, |
|
"learning_rate": 3.457653457653458e-05, |
|
"loss": 1.2556, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.6384001538313624, |
|
"grad_norm": 1.9469683170318604, |
|
"learning_rate": 3.438228438228439e-05, |
|
"loss": 1.2071, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.6460917219498126, |
|
"grad_norm": 1.769471526145935, |
|
"learning_rate": 3.418803418803419e-05, |
|
"loss": 1.2112, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6537832900682626, |
|
"grad_norm": 2.112982988357544, |
|
"learning_rate": 3.3993783993783994e-05, |
|
"loss": 1.2471, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6614748581867128, |
|
"grad_norm": 1.8016294240951538, |
|
"learning_rate": 3.37995337995338e-05, |
|
"loss": 1.2145, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.669166426305163, |
|
"grad_norm": 1.9988363981246948, |
|
"learning_rate": 3.3605283605283607e-05, |
|
"loss": 1.191, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6768579944236132, |
|
"grad_norm": 1.8531577587127686, |
|
"learning_rate": 3.341103341103341e-05, |
|
"loss": 1.1722, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6845495625420632, |
|
"grad_norm": 1.9101706743240356, |
|
"learning_rate": 3.321678321678322e-05, |
|
"loss": 1.1738, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.6922411306605134, |
|
"grad_norm": 2.064260482788086, |
|
"learning_rate": 3.302253302253302e-05, |
|
"loss": 1.2045, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6999326987789636, |
|
"grad_norm": 1.895975947380066, |
|
"learning_rate": 3.282828282828283e-05, |
|
"loss": 1.1815, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.7076242668974138, |
|
"grad_norm": 1.6610194444656372, |
|
"learning_rate": 3.2634032634032635e-05, |
|
"loss": 1.2059, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.7153158350158638, |
|
"grad_norm": 1.8297659158706665, |
|
"learning_rate": 3.243978243978244e-05, |
|
"loss": 1.2034, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.723007403134314, |
|
"grad_norm": 2.0258054733276367, |
|
"learning_rate": 3.224553224553225e-05, |
|
"loss": 1.2051, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.7306989712527642, |
|
"grad_norm": 1.9012644290924072, |
|
"learning_rate": 3.205128205128206e-05, |
|
"loss": 1.2072, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.7383905393712143, |
|
"grad_norm": 1.8051044940948486, |
|
"learning_rate": 3.185703185703186e-05, |
|
"loss": 1.2149, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.7460821074896644, |
|
"grad_norm": 2.0245988368988037, |
|
"learning_rate": 3.1662781662781664e-05, |
|
"loss": 1.1886, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.7537736756081146, |
|
"grad_norm": 1.9541068077087402, |
|
"learning_rate": 3.146853146853147e-05, |
|
"loss": 1.2094, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7614652437265648, |
|
"grad_norm": 1.8421745300292969, |
|
"learning_rate": 3.127428127428128e-05, |
|
"loss": 1.2041, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7691568118450149, |
|
"grad_norm": 1.866947889328003, |
|
"learning_rate": 3.108003108003108e-05, |
|
"loss": 1.2103, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.776848379963465, |
|
"grad_norm": 2.1929476261138916, |
|
"learning_rate": 3.088578088578088e-05, |
|
"loss": 1.2058, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.7845399480819152, |
|
"grad_norm": 2.0555646419525146, |
|
"learning_rate": 3.069153069153069e-05, |
|
"loss": 1.1894, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.7922315162003654, |
|
"grad_norm": 2.0418717861175537, |
|
"learning_rate": 3.04972804972805e-05, |
|
"loss": 1.1942, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.7999230843188155, |
|
"grad_norm": 2.218815326690674, |
|
"learning_rate": 3.0303030303030306e-05, |
|
"loss": 1.1676, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.8076146524372656, |
|
"grad_norm": 1.9924787282943726, |
|
"learning_rate": 3.010878010878011e-05, |
|
"loss": 1.1938, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.8153062205557158, |
|
"grad_norm": 1.7555562257766724, |
|
"learning_rate": 2.9914529914529915e-05, |
|
"loss": 1.1737, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.822997788674166, |
|
"grad_norm": 1.8218282461166382, |
|
"learning_rate": 2.972027972027972e-05, |
|
"loss": 1.1635, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.8306893567926161, |
|
"grad_norm": 1.9514037370681763, |
|
"learning_rate": 2.9526029526029525e-05, |
|
"loss": 1.1751, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.8383809249110662, |
|
"grad_norm": 1.995100498199463, |
|
"learning_rate": 2.9331779331779334e-05, |
|
"loss": 1.1705, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.8460724930295164, |
|
"grad_norm": 1.9724220037460327, |
|
"learning_rate": 2.913752913752914e-05, |
|
"loss": 1.2067, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8537640611479665, |
|
"grad_norm": 2.083712577819824, |
|
"learning_rate": 2.8943278943278944e-05, |
|
"loss": 1.1644, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.8614556292664167, |
|
"grad_norm": 2.1601104736328125, |
|
"learning_rate": 2.874902874902875e-05, |
|
"loss": 1.1772, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.8691471973848668, |
|
"grad_norm": 2.0567638874053955, |
|
"learning_rate": 2.8554778554778557e-05, |
|
"loss": 1.1846, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.876838765503317, |
|
"grad_norm": 2.4114835262298584, |
|
"learning_rate": 2.836052836052836e-05, |
|
"loss": 1.1658, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.8845303336217671, |
|
"grad_norm": 1.685823917388916, |
|
"learning_rate": 2.8166278166278166e-05, |
|
"loss": 1.1858, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.8922219017402173, |
|
"grad_norm": 2.006232738494873, |
|
"learning_rate": 2.7972027972027976e-05, |
|
"loss": 1.1781, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.8999134698586674, |
|
"grad_norm": 2.089742660522461, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 1.1569, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.9076050379771176, |
|
"grad_norm": 1.8699461221694946, |
|
"learning_rate": 2.7583527583527586e-05, |
|
"loss": 1.1653, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.9152966060955677, |
|
"grad_norm": 1.997068166732788, |
|
"learning_rate": 2.738927738927739e-05, |
|
"loss": 1.2116, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.9229881742140179, |
|
"grad_norm": 2.135075807571411, |
|
"learning_rate": 2.7195027195027195e-05, |
|
"loss": 1.2062, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9306797423324681, |
|
"grad_norm": 2.3898837566375732, |
|
"learning_rate": 2.7000777000777e-05, |
|
"loss": 1.1802, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.9383713104509182, |
|
"grad_norm": 1.8470486402511597, |
|
"learning_rate": 2.680652680652681e-05, |
|
"loss": 1.1388, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.9460628785693683, |
|
"grad_norm": 1.909771203994751, |
|
"learning_rate": 2.6612276612276614e-05, |
|
"loss": 1.1456, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.9537544466878185, |
|
"grad_norm": 1.75010085105896, |
|
"learning_rate": 2.641802641802642e-05, |
|
"loss": 1.1792, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.9614460148062687, |
|
"grad_norm": 1.835392951965332, |
|
"learning_rate": 2.6223776223776224e-05, |
|
"loss": 1.1666, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.9691375829247187, |
|
"grad_norm": 1.74484384059906, |
|
"learning_rate": 2.602952602952603e-05, |
|
"loss": 1.184, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.9768291510431689, |
|
"grad_norm": 1.9739990234375, |
|
"learning_rate": 2.5835275835275837e-05, |
|
"loss": 1.1342, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.9845207191616191, |
|
"grad_norm": 1.9165340662002563, |
|
"learning_rate": 2.564102564102564e-05, |
|
"loss": 1.1852, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.9922122872800693, |
|
"grad_norm": 2.0717194080352783, |
|
"learning_rate": 2.544677544677545e-05, |
|
"loss": 1.1611, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.9999038553985193, |
|
"grad_norm": 1.7079836130142212, |
|
"learning_rate": 2.5252525252525256e-05, |
|
"loss": 1.1498, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.0075954235169695, |
|
"grad_norm": 1.904863953590393, |
|
"learning_rate": 2.505827505827506e-05, |
|
"loss": 1.252, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.0152869916354197, |
|
"grad_norm": 1.7407102584838867, |
|
"learning_rate": 2.4864024864024865e-05, |
|
"loss": 1.1177, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.0229785597538699, |
|
"grad_norm": 2.13275146484375, |
|
"learning_rate": 2.4669774669774672e-05, |
|
"loss": 1.1409, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.03067012787232, |
|
"grad_norm": 1.8755226135253906, |
|
"learning_rate": 2.4475524475524478e-05, |
|
"loss": 1.1262, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.0383616959907702, |
|
"grad_norm": 2.0891833305358887, |
|
"learning_rate": 2.428127428127428e-05, |
|
"loss": 1.1126, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.0460532641092202, |
|
"grad_norm": 2.072007894515991, |
|
"learning_rate": 2.4087024087024088e-05, |
|
"loss": 1.1098, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.0537448322276703, |
|
"grad_norm": 1.8499518632888794, |
|
"learning_rate": 2.3892773892773894e-05, |
|
"loss": 1.1199, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.0614364003461205, |
|
"grad_norm": 1.9085497856140137, |
|
"learning_rate": 2.36985236985237e-05, |
|
"loss": 1.1178, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.0691279684645707, |
|
"grad_norm": 1.980193018913269, |
|
"learning_rate": 2.3504273504273504e-05, |
|
"loss": 1.1405, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.0768195365830209, |
|
"grad_norm": 1.961827039718628, |
|
"learning_rate": 2.331002331002331e-05, |
|
"loss": 1.1294, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.084511104701471, |
|
"grad_norm": 2.014617443084717, |
|
"learning_rate": 2.3115773115773116e-05, |
|
"loss": 1.1186, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.0922026728199212, |
|
"grad_norm": 2.0065386295318604, |
|
"learning_rate": 2.2921522921522923e-05, |
|
"loss": 1.0977, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.0998942409383714, |
|
"grad_norm": 1.9613438844680786, |
|
"learning_rate": 2.272727272727273e-05, |
|
"loss": 1.1649, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.1075858090568214, |
|
"grad_norm": 2.0321905612945557, |
|
"learning_rate": 2.2533022533022536e-05, |
|
"loss": 1.1437, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.1152773771752715, |
|
"grad_norm": 1.954882264137268, |
|
"learning_rate": 2.233877233877234e-05, |
|
"loss": 1.1166, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.1229689452937217, |
|
"grad_norm": 2.1001336574554443, |
|
"learning_rate": 2.2144522144522145e-05, |
|
"loss": 1.1222, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.1306605134121719, |
|
"grad_norm": 1.9967658519744873, |
|
"learning_rate": 2.195027195027195e-05, |
|
"loss": 1.113, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.138352081530622, |
|
"grad_norm": 2.000359296798706, |
|
"learning_rate": 2.1756021756021758e-05, |
|
"loss": 1.1019, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.1460436496490722, |
|
"grad_norm": 1.7127162218093872, |
|
"learning_rate": 2.156177156177156e-05, |
|
"loss": 1.1077, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.1537352177675224, |
|
"grad_norm": 2.044915199279785, |
|
"learning_rate": 2.1367521367521368e-05, |
|
"loss": 1.1493, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.1614267858859726, |
|
"grad_norm": 1.906977653503418, |
|
"learning_rate": 2.1173271173271174e-05, |
|
"loss": 1.0935, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.1691183540044228, |
|
"grad_norm": 2.004258871078491, |
|
"learning_rate": 2.097902097902098e-05, |
|
"loss": 1.1548, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.1768099221228727, |
|
"grad_norm": 2.028057813644409, |
|
"learning_rate": 2.0784770784770787e-05, |
|
"loss": 1.12, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.184501490241323, |
|
"grad_norm": 2.0909903049468994, |
|
"learning_rate": 2.0590520590520593e-05, |
|
"loss": 1.1347, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.192193058359773, |
|
"grad_norm": 2.309083938598633, |
|
"learning_rate": 2.0396270396270396e-05, |
|
"loss": 1.1467, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.1998846264782232, |
|
"grad_norm": 2.195953845977783, |
|
"learning_rate": 2.0202020202020203e-05, |
|
"loss": 1.1166, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.2075761945966734, |
|
"grad_norm": 1.8063645362854004, |
|
"learning_rate": 2.000777000777001e-05, |
|
"loss": 1.0918, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.2152677627151236, |
|
"grad_norm": 2.0312583446502686, |
|
"learning_rate": 1.9813519813519816e-05, |
|
"loss": 1.1409, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.2229593308335738, |
|
"grad_norm": 2.091669797897339, |
|
"learning_rate": 1.961926961926962e-05, |
|
"loss": 1.0938, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.2306508989520237, |
|
"grad_norm": 1.7335410118103027, |
|
"learning_rate": 1.9425019425019425e-05, |
|
"loss": 1.1085, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.238342467070474, |
|
"grad_norm": 2.0697057247161865, |
|
"learning_rate": 1.923076923076923e-05, |
|
"loss": 1.1355, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.246034035188924, |
|
"grad_norm": 2.063481569290161, |
|
"learning_rate": 1.9036519036519038e-05, |
|
"loss": 1.1069, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.2537256033073743, |
|
"grad_norm": 2.1378798484802246, |
|
"learning_rate": 1.8842268842268844e-05, |
|
"loss": 1.1273, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.2614171714258244, |
|
"grad_norm": 2.2633299827575684, |
|
"learning_rate": 1.864801864801865e-05, |
|
"loss": 1.1173, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.2691087395442746, |
|
"grad_norm": 1.8550769090652466, |
|
"learning_rate": 1.8453768453768454e-05, |
|
"loss": 1.0902, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.2768003076627248, |
|
"grad_norm": 1.6822105646133423, |
|
"learning_rate": 1.825951825951826e-05, |
|
"loss": 1.0986, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.284491875781175, |
|
"grad_norm": 1.8236192464828491, |
|
"learning_rate": 1.8065268065268067e-05, |
|
"loss": 1.1102, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.2921834438996251, |
|
"grad_norm": 2.2287065982818604, |
|
"learning_rate": 1.7871017871017873e-05, |
|
"loss": 1.1302, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.299875012018075, |
|
"grad_norm": 2.2197864055633545, |
|
"learning_rate": 1.7676767676767676e-05, |
|
"loss": 1.0902, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.3075665801365253, |
|
"grad_norm": 2.1204230785369873, |
|
"learning_rate": 1.7482517482517483e-05, |
|
"loss": 1.1228, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.3152581482549754, |
|
"grad_norm": 1.8001035451889038, |
|
"learning_rate": 1.728826728826729e-05, |
|
"loss": 1.1183, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.3229497163734256, |
|
"grad_norm": 1.9405324459075928, |
|
"learning_rate": 1.7094017094017095e-05, |
|
"loss": 1.1377, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.3306412844918758, |
|
"grad_norm": 1.9750436544418335, |
|
"learning_rate": 1.68997668997669e-05, |
|
"loss": 1.1051, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.338332852610326, |
|
"grad_norm": 1.7894169092178345, |
|
"learning_rate": 1.6705516705516705e-05, |
|
"loss": 1.0972, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.3460244207287761, |
|
"grad_norm": 1.7701419591903687, |
|
"learning_rate": 1.651126651126651e-05, |
|
"loss": 1.0815, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.353715988847226, |
|
"grad_norm": 2.0538313388824463, |
|
"learning_rate": 1.6317016317016318e-05, |
|
"loss": 1.1072, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.3614075569656765, |
|
"grad_norm": 2.0709080696105957, |
|
"learning_rate": 1.6122766122766124e-05, |
|
"loss": 1.0918, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.3690991250841265, |
|
"grad_norm": 2.1939444541931152, |
|
"learning_rate": 1.592851592851593e-05, |
|
"loss": 1.0957, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.3767906932025766, |
|
"grad_norm": 1.5446499586105347, |
|
"learning_rate": 1.5734265734265734e-05, |
|
"loss": 1.1032, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.3844822613210268, |
|
"grad_norm": 1.971351981163025, |
|
"learning_rate": 1.554001554001554e-05, |
|
"loss": 1.1319, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.392173829439477, |
|
"grad_norm": 1.9268254041671753, |
|
"learning_rate": 1.5345765345765346e-05, |
|
"loss": 1.1242, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.3998653975579272, |
|
"grad_norm": 2.0808980464935303, |
|
"learning_rate": 1.5151515151515153e-05, |
|
"loss": 1.1173, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.4075569656763773, |
|
"grad_norm": 2.044785976409912, |
|
"learning_rate": 1.4957264957264958e-05, |
|
"loss": 1.1481, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.4152485337948275, |
|
"grad_norm": 1.690045714378357, |
|
"learning_rate": 1.4763014763014762e-05, |
|
"loss": 1.1239, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.4229401019132775, |
|
"grad_norm": 1.950405478477478, |
|
"learning_rate": 1.456876456876457e-05, |
|
"loss": 1.1306, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.4306316700317276, |
|
"grad_norm": 2.236177921295166, |
|
"learning_rate": 1.4374514374514375e-05, |
|
"loss": 1.0963, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.4383232381501778, |
|
"grad_norm": 2.0903525352478027, |
|
"learning_rate": 1.418026418026418e-05, |
|
"loss": 1.1024, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.446014806268628, |
|
"grad_norm": 2.220693349838257, |
|
"learning_rate": 1.3986013986013988e-05, |
|
"loss": 1.1132, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.4537063743870782, |
|
"grad_norm": 1.9859932661056519, |
|
"learning_rate": 1.3791763791763793e-05, |
|
"loss": 1.1163, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.4613979425055283, |
|
"grad_norm": 2.0174367427825928, |
|
"learning_rate": 1.3597513597513598e-05, |
|
"loss": 1.0975, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.4690895106239785, |
|
"grad_norm": 2.1779918670654297, |
|
"learning_rate": 1.3403263403263406e-05, |
|
"loss": 1.1076, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.4767810787424285, |
|
"grad_norm": 2.0282936096191406, |
|
"learning_rate": 1.320901320901321e-05, |
|
"loss": 1.1014, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.4844726468608789, |
|
"grad_norm": 2.0938243865966797, |
|
"learning_rate": 1.3014763014763015e-05, |
|
"loss": 1.1028, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.4921642149793288, |
|
"grad_norm": 2.0279526710510254, |
|
"learning_rate": 1.282051282051282e-05, |
|
"loss": 1.0922, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.499855783097779, |
|
"grad_norm": 1.9357686042785645, |
|
"learning_rate": 1.2626262626262628e-05, |
|
"loss": 1.0896, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.5075473512162292, |
|
"grad_norm": 2.1684436798095703, |
|
"learning_rate": 1.2432012432012433e-05, |
|
"loss": 1.1063, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.5152389193346794, |
|
"grad_norm": 1.9890198707580566, |
|
"learning_rate": 1.2237762237762239e-05, |
|
"loss": 1.0927, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.5229304874531295, |
|
"grad_norm": 1.9705474376678467, |
|
"learning_rate": 1.2043512043512044e-05, |
|
"loss": 1.1135, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.5306220555715797, |
|
"grad_norm": 2.2323312759399414, |
|
"learning_rate": 1.184926184926185e-05, |
|
"loss": 1.1035, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.5383136236900299, |
|
"grad_norm": 2.031677007675171, |
|
"learning_rate": 1.1655011655011655e-05, |
|
"loss": 1.0881, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.5460051918084798, |
|
"grad_norm": 1.9229755401611328, |
|
"learning_rate": 1.1460761460761461e-05, |
|
"loss": 1.1678, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.5536967599269302, |
|
"grad_norm": 2.2385263442993164, |
|
"learning_rate": 1.1266511266511268e-05, |
|
"loss": 1.0922, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.5613883280453802, |
|
"grad_norm": 2.14032244682312, |
|
"learning_rate": 1.1072261072261073e-05, |
|
"loss": 1.0948, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.5690798961638304, |
|
"grad_norm": 2.2739627361297607, |
|
"learning_rate": 1.0878010878010879e-05, |
|
"loss": 1.1033, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.5767714642822805, |
|
"grad_norm": 2.117405891418457, |
|
"learning_rate": 1.0683760683760684e-05, |
|
"loss": 1.1144, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.5844630324007307, |
|
"grad_norm": 1.917315125465393, |
|
"learning_rate": 1.048951048951049e-05, |
|
"loss": 1.1462, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.592154600519181, |
|
"grad_norm": 1.9057689905166626, |
|
"learning_rate": 1.0295260295260297e-05, |
|
"loss": 1.1243, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.5998461686376308, |
|
"grad_norm": 2.0241281986236572, |
|
"learning_rate": 1.0101010101010101e-05, |
|
"loss": 1.1061, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.6075377367560812, |
|
"grad_norm": 2.33536958694458, |
|
"learning_rate": 9.906759906759908e-06, |
|
"loss": 1.0989, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.6152293048745312, |
|
"grad_norm": 2.1172101497650146, |
|
"learning_rate": 9.712509712509713e-06, |
|
"loss": 1.0936, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.6229208729929816, |
|
"grad_norm": 1.8319240808486938, |
|
"learning_rate": 9.518259518259519e-06, |
|
"loss": 1.1099, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.6306124411114316, |
|
"grad_norm": 1.9867093563079834, |
|
"learning_rate": 9.324009324009325e-06, |
|
"loss": 1.0983, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.6383040092298817, |
|
"grad_norm": 1.9787898063659668, |
|
"learning_rate": 9.12975912975913e-06, |
|
"loss": 1.1042, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.645995577348332, |
|
"grad_norm": 1.9842088222503662, |
|
"learning_rate": 8.935508935508937e-06, |
|
"loss": 1.1094, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.653687145466782, |
|
"grad_norm": 1.9378868341445923, |
|
"learning_rate": 8.741258741258741e-06, |
|
"loss": 1.0995, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.6613787135852323, |
|
"grad_norm": 1.8845783472061157, |
|
"learning_rate": 8.547008547008548e-06, |
|
"loss": 1.0884, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.6690702817036822, |
|
"grad_norm": 1.9667729139328003, |
|
"learning_rate": 8.352758352758352e-06, |
|
"loss": 1.0916, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.6767618498221326, |
|
"grad_norm": 2.1440649032592773, |
|
"learning_rate": 8.158508158508159e-06, |
|
"loss": 1.1166, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.6844534179405826, |
|
"grad_norm": 2.1377923488616943, |
|
"learning_rate": 7.964257964257965e-06, |
|
"loss": 1.0733, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.6921449860590327, |
|
"grad_norm": 1.9222036600112915, |
|
"learning_rate": 7.77000777000777e-06, |
|
"loss": 1.1064, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.699836554177483, |
|
"grad_norm": 1.9658797979354858, |
|
"learning_rate": 7.5757575757575764e-06, |
|
"loss": 1.1212, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.707528122295933, |
|
"grad_norm": 1.9604525566101074, |
|
"learning_rate": 7.381507381507381e-06, |
|
"loss": 1.094, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.7152196904143833, |
|
"grad_norm": 2.1394431591033936, |
|
"learning_rate": 7.187257187257188e-06, |
|
"loss": 1.0974, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.7229112585328332, |
|
"grad_norm": 1.819686770439148, |
|
"learning_rate": 6.993006993006994e-06, |
|
"loss": 1.0815, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.7306028266512836, |
|
"grad_norm": 1.8422613143920898, |
|
"learning_rate": 6.798756798756799e-06, |
|
"loss": 1.1034, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.7382943947697336, |
|
"grad_norm": 1.9406566619873047, |
|
"learning_rate": 6.604506604506605e-06, |
|
"loss": 1.0835, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.745985962888184, |
|
"grad_norm": 1.831289291381836, |
|
"learning_rate": 6.41025641025641e-06, |
|
"loss": 1.0857, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.753677531006634, |
|
"grad_norm": 1.942730188369751, |
|
"learning_rate": 6.216006216006216e-06, |
|
"loss": 1.1112, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.761369099125084, |
|
"grad_norm": 1.8144134283065796, |
|
"learning_rate": 6.021756021756022e-06, |
|
"loss": 1.0873, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.7690606672435343, |
|
"grad_norm": 1.7852503061294556, |
|
"learning_rate": 5.8275058275058275e-06, |
|
"loss": 1.0756, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.7767522353619845, |
|
"grad_norm": 2.232545852661133, |
|
"learning_rate": 5.633255633255634e-06, |
|
"loss": 1.114, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.7844438034804346, |
|
"grad_norm": 2.143216133117676, |
|
"learning_rate": 5.4390054390054395e-06, |
|
"loss": 1.1015, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.7921353715988846, |
|
"grad_norm": 1.8443506956100464, |
|
"learning_rate": 5.244755244755245e-06, |
|
"loss": 1.1006, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.799826939717335, |
|
"grad_norm": 1.7747691869735718, |
|
"learning_rate": 5.050505050505051e-06, |
|
"loss": 1.099, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.807518507835785, |
|
"grad_norm": 1.8745768070220947, |
|
"learning_rate": 4.856254856254856e-06, |
|
"loss": 1.0979, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.8152100759542353, |
|
"grad_norm": 1.9972470998764038, |
|
"learning_rate": 4.662004662004663e-06, |
|
"loss": 1.1083, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.8229016440726853, |
|
"grad_norm": 1.8246738910675049, |
|
"learning_rate": 4.467754467754468e-06, |
|
"loss": 1.1061, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.8305932121911355, |
|
"grad_norm": 1.9446369409561157, |
|
"learning_rate": 4.273504273504274e-06, |
|
"loss": 1.0843, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.8382847803095856, |
|
"grad_norm": 1.9822577238082886, |
|
"learning_rate": 4.079254079254079e-06, |
|
"loss": 1.0722, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.8459763484280358, |
|
"grad_norm": 2.3684942722320557, |
|
"learning_rate": 3.885003885003885e-06, |
|
"loss": 1.1354, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.853667916546486, |
|
"grad_norm": 1.8989230394363403, |
|
"learning_rate": 3.6907536907536906e-06, |
|
"loss": 1.0931, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.861359484664936, |
|
"grad_norm": 1.952711582183838, |
|
"learning_rate": 3.496503496503497e-06, |
|
"loss": 1.0823, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.8690510527833863, |
|
"grad_norm": 2.154554605484009, |
|
"learning_rate": 3.3022533022533026e-06, |
|
"loss": 1.0846, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.8767426209018363, |
|
"grad_norm": 1.8646643161773682, |
|
"learning_rate": 3.108003108003108e-06, |
|
"loss": 1.0961, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.8844341890202865, |
|
"grad_norm": 1.9604412317276, |
|
"learning_rate": 2.9137529137529138e-06, |
|
"loss": 1.0823, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.8921257571387367, |
|
"grad_norm": 1.7040070295333862, |
|
"learning_rate": 2.7195027195027198e-06, |
|
"loss": 1.0831, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.8998173252571868, |
|
"grad_norm": 2.3262603282928467, |
|
"learning_rate": 2.5252525252525253e-06, |
|
"loss": 1.0668, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.907508893375637, |
|
"grad_norm": 2.0159196853637695, |
|
"learning_rate": 2.3310023310023313e-06, |
|
"loss": 1.094, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.915200461494087, |
|
"grad_norm": 1.9653328657150269, |
|
"learning_rate": 2.136752136752137e-06, |
|
"loss": 1.0677, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.9228920296125374, |
|
"grad_norm": 1.7489328384399414, |
|
"learning_rate": 1.9425019425019425e-06, |
|
"loss": 1.0947, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.9305835977309873, |
|
"grad_norm": 1.9971283674240112, |
|
"learning_rate": 1.7482517482517485e-06, |
|
"loss": 1.135, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.9382751658494377, |
|
"grad_norm": 1.9700243473052979, |
|
"learning_rate": 1.554001554001554e-06, |
|
"loss": 1.0783, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.9459667339678877, |
|
"grad_norm": 1.8461562395095825, |
|
"learning_rate": 1.3597513597513599e-06, |
|
"loss": 1.113, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.9536583020863378, |
|
"grad_norm": 1.6744381189346313, |
|
"learning_rate": 1.1655011655011657e-06, |
|
"loss": 1.0829, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.961349870204788, |
|
"grad_norm": 1.9034210443496704, |
|
"learning_rate": 9.712509712509713e-07, |
|
"loss": 1.0781, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.9690414383232382, |
|
"grad_norm": 1.98823881149292, |
|
"learning_rate": 7.77000777000777e-07, |
|
"loss": 1.1031, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.9767330064416884, |
|
"grad_norm": 1.905478596687317, |
|
"learning_rate": 5.827505827505828e-07, |
|
"loss": 1.081, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.9844245745601383, |
|
"grad_norm": 1.8001110553741455, |
|
"learning_rate": 3.885003885003885e-07, |
|
"loss": 1.0793, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.9921161426785887, |
|
"grad_norm": 2.089235305786133, |
|
"learning_rate": 1.9425019425019426e-07, |
|
"loss": 1.1, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.9998077107970387, |
|
"grad_norm": 2.018970489501953, |
|
"learning_rate": 0.0, |
|
"loss": 1.0801, |
|
"step": 2600 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.1771710700767724e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|