PEGASUS_liputan6 / trainer_state.json
fatihfauzan26's picture
initial commit
ad0d2af
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 24236,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00412609341475491,
"grad_norm": 14.695940971374512,
"learning_rate": 1e-05,
"loss": 8.626,
"step": 100
},
{
"epoch": 0.00825218682950982,
"grad_norm": 19.695737838745117,
"learning_rate": 2e-05,
"loss": 7.5714,
"step": 200
},
{
"epoch": 0.01237828024426473,
"grad_norm": 7.499746799468994,
"learning_rate": 3e-05,
"loss": 6.9595,
"step": 300
},
{
"epoch": 0.01650437365901964,
"grad_norm": 22.69700813293457,
"learning_rate": 4e-05,
"loss": 5.6622,
"step": 400
},
{
"epoch": 0.02063046707377455,
"grad_norm": 16.763093948364258,
"learning_rate": 5e-05,
"loss": 1.9119,
"step": 500
},
{
"epoch": 0.02475656048852946,
"grad_norm": 3.74697208404541,
"learning_rate": 4.978934951129087e-05,
"loss": 1.6244,
"step": 600
},
{
"epoch": 0.02888265390328437,
"grad_norm": 6.804798126220703,
"learning_rate": 4.957869902258174e-05,
"loss": 1.4867,
"step": 700
},
{
"epoch": 0.03300874731803928,
"grad_norm": 4.382748603820801,
"learning_rate": 4.93680485338726e-05,
"loss": 1.4434,
"step": 800
},
{
"epoch": 0.03713484073279419,
"grad_norm": 2.020482063293457,
"learning_rate": 4.9157398045163464e-05,
"loss": 1.3701,
"step": 900
},
{
"epoch": 0.0412609341475491,
"grad_norm": 1.9225651025772095,
"learning_rate": 4.894674755645433e-05,
"loss": 1.383,
"step": 1000
},
{
"epoch": 0.04538702756230401,
"grad_norm": 1.8698792457580566,
"learning_rate": 4.87360970677452e-05,
"loss": 1.3512,
"step": 1100
},
{
"epoch": 0.04951312097705892,
"grad_norm": 4.490991592407227,
"learning_rate": 4.852544657903607e-05,
"loss": 1.3039,
"step": 1200
},
{
"epoch": 0.05363921439181383,
"grad_norm": 2.697434186935425,
"learning_rate": 4.831479609032693e-05,
"loss": 1.3085,
"step": 1300
},
{
"epoch": 0.05776530780656874,
"grad_norm": 3.3568286895751953,
"learning_rate": 4.81041456016178e-05,
"loss": 1.2915,
"step": 1400
},
{
"epoch": 0.06189140122132365,
"grad_norm": 2.012889862060547,
"learning_rate": 4.789349511290866e-05,
"loss": 1.2778,
"step": 1500
},
{
"epoch": 0.06601749463607856,
"grad_norm": 4.024045467376709,
"learning_rate": 4.768284462419953e-05,
"loss": 1.2688,
"step": 1600
},
{
"epoch": 0.07014358805083347,
"grad_norm": 2.241870880126953,
"learning_rate": 4.7472194135490394e-05,
"loss": 1.2592,
"step": 1700
},
{
"epoch": 0.07426968146558838,
"grad_norm": 3.7178213596343994,
"learning_rate": 4.7261543646781266e-05,
"loss": 1.2112,
"step": 1800
},
{
"epoch": 0.07839577488034329,
"grad_norm": 2.036505937576294,
"learning_rate": 4.705089315807213e-05,
"loss": 1.2126,
"step": 1900
},
{
"epoch": 0.0825218682950982,
"grad_norm": 1.7717580795288086,
"learning_rate": 4.6840242669362997e-05,
"loss": 1.191,
"step": 2000
},
{
"epoch": 0.08664796170985312,
"grad_norm": 1.8078298568725586,
"learning_rate": 4.662959218065386e-05,
"loss": 1.199,
"step": 2100
},
{
"epoch": 0.09077405512460802,
"grad_norm": 4.067634582519531,
"learning_rate": 4.641894169194473e-05,
"loss": 1.1415,
"step": 2200
},
{
"epoch": 0.09490014853936293,
"grad_norm": 1.9144686460494995,
"learning_rate": 4.620829120323559e-05,
"loss": 1.1486,
"step": 2300
},
{
"epoch": 0.09902624195411784,
"grad_norm": 1.4644508361816406,
"learning_rate": 4.599764071452646e-05,
"loss": 1.1665,
"step": 2400
},
{
"epoch": 0.10315233536887275,
"grad_norm": 1.6876461505889893,
"learning_rate": 4.578699022581733e-05,
"loss": 1.1197,
"step": 2500
},
{
"epoch": 0.10727842878362766,
"grad_norm": 1.6274546384811401,
"learning_rate": 4.5576339737108196e-05,
"loss": 1.1505,
"step": 2600
},
{
"epoch": 0.11140452219838257,
"grad_norm": 2.2767255306243896,
"learning_rate": 4.5365689248399054e-05,
"loss": 1.1376,
"step": 2700
},
{
"epoch": 0.11553061561313747,
"grad_norm": 2.0346932411193848,
"learning_rate": 4.5155038759689926e-05,
"loss": 1.0817,
"step": 2800
},
{
"epoch": 0.1196567090278924,
"grad_norm": 1.5901857614517212,
"learning_rate": 4.494438827098079e-05,
"loss": 1.0919,
"step": 2900
},
{
"epoch": 0.1237828024426473,
"grad_norm": 2.1194183826446533,
"learning_rate": 4.473373778227166e-05,
"loss": 1.0813,
"step": 3000
},
{
"epoch": 0.1279088958574022,
"grad_norm": 4.601478576660156,
"learning_rate": 4.452308729356252e-05,
"loss": 1.1204,
"step": 3100
},
{
"epoch": 0.13203498927215712,
"grad_norm": 2.3545944690704346,
"learning_rate": 4.431243680485339e-05,
"loss": 1.0488,
"step": 3200
},
{
"epoch": 0.13616108268691204,
"grad_norm": 1.468526005744934,
"learning_rate": 4.410178631614425e-05,
"loss": 1.1108,
"step": 3300
},
{
"epoch": 0.14028717610166694,
"grad_norm": 1.5844910144805908,
"learning_rate": 4.389113582743512e-05,
"loss": 1.1114,
"step": 3400
},
{
"epoch": 0.14441326951642186,
"grad_norm": 4.228846073150635,
"learning_rate": 4.368048533872599e-05,
"loss": 1.0961,
"step": 3500
},
{
"epoch": 0.14853936293117675,
"grad_norm": 1.4088937044143677,
"learning_rate": 4.3469834850016856e-05,
"loss": 1.0758,
"step": 3600
},
{
"epoch": 0.15266545634593168,
"grad_norm": 1.6613353490829468,
"learning_rate": 4.325918436130772e-05,
"loss": 1.0916,
"step": 3700
},
{
"epoch": 0.15679154976068657,
"grad_norm": 1.6648322343826294,
"learning_rate": 4.304853387259859e-05,
"loss": 1.0784,
"step": 3800
},
{
"epoch": 0.1609176431754415,
"grad_norm": 1.387666940689087,
"learning_rate": 4.283788338388945e-05,
"loss": 1.0894,
"step": 3900
},
{
"epoch": 0.1650437365901964,
"grad_norm": 1.416618824005127,
"learning_rate": 4.262723289518032e-05,
"loss": 1.0283,
"step": 4000
},
{
"epoch": 0.1691698300049513,
"grad_norm": 1.569306492805481,
"learning_rate": 4.241658240647118e-05,
"loss": 1.0513,
"step": 4100
},
{
"epoch": 0.17329592341970623,
"grad_norm": 1.4747782945632935,
"learning_rate": 4.2205931917762055e-05,
"loss": 1.0531,
"step": 4200
},
{
"epoch": 0.17742201683446113,
"grad_norm": 2.1925017833709717,
"learning_rate": 4.199528142905292e-05,
"loss": 1.058,
"step": 4300
},
{
"epoch": 0.18154811024921605,
"grad_norm": 1.9729565382003784,
"learning_rate": 4.1784630940343786e-05,
"loss": 1.0806,
"step": 4400
},
{
"epoch": 0.18567420366397094,
"grad_norm": 1.5546541213989258,
"learning_rate": 4.1573980451634644e-05,
"loss": 1.0746,
"step": 4500
},
{
"epoch": 0.18980029707872587,
"grad_norm": 1.8433148860931396,
"learning_rate": 4.1363329962925517e-05,
"loss": 1.0527,
"step": 4600
},
{
"epoch": 0.19392639049348076,
"grad_norm": 1.3673489093780518,
"learning_rate": 4.115267947421638e-05,
"loss": 1.0772,
"step": 4700
},
{
"epoch": 0.19805248390823568,
"grad_norm": 1.8290094137191772,
"learning_rate": 4.094202898550725e-05,
"loss": 1.0424,
"step": 4800
},
{
"epoch": 0.2021785773229906,
"grad_norm": 2.0811445713043213,
"learning_rate": 4.073137849679812e-05,
"loss": 1.0498,
"step": 4900
},
{
"epoch": 0.2063046707377455,
"grad_norm": 1.2849047183990479,
"learning_rate": 4.0520728008088985e-05,
"loss": 1.0341,
"step": 5000
},
{
"epoch": 0.21043076415250042,
"grad_norm": 3.288480281829834,
"learning_rate": 4.0310077519379843e-05,
"loss": 1.0273,
"step": 5100
},
{
"epoch": 0.21455685756725532,
"grad_norm": 1.2297766208648682,
"learning_rate": 4.009942703067071e-05,
"loss": 1.0768,
"step": 5200
},
{
"epoch": 0.21868295098201024,
"grad_norm": 1.4169477224349976,
"learning_rate": 3.988877654196158e-05,
"loss": 1.0334,
"step": 5300
},
{
"epoch": 0.22280904439676513,
"grad_norm": 1.314010739326477,
"learning_rate": 3.9678126053252446e-05,
"loss": 1.0208,
"step": 5400
},
{
"epoch": 0.22693513781152005,
"grad_norm": 1.5176063776016235,
"learning_rate": 3.946747556454331e-05,
"loss": 1.0561,
"step": 5500
},
{
"epoch": 0.23106123122627495,
"grad_norm": 1.418303370475769,
"learning_rate": 3.9256825075834184e-05,
"loss": 1.0223,
"step": 5600
},
{
"epoch": 0.23518732464102987,
"grad_norm": 1.8820278644561768,
"learning_rate": 3.904617458712504e-05,
"loss": 1.0333,
"step": 5700
},
{
"epoch": 0.2393134180557848,
"grad_norm": 1.7610660791397095,
"learning_rate": 3.883552409841591e-05,
"loss": 1.007,
"step": 5800
},
{
"epoch": 0.2434395114705397,
"grad_norm": 7.877830982208252,
"learning_rate": 3.862487360970677e-05,
"loss": 1.0138,
"step": 5900
},
{
"epoch": 0.2475656048852946,
"grad_norm": 1.4319870471954346,
"learning_rate": 3.8414223120997645e-05,
"loss": 1.0103,
"step": 6000
},
{
"epoch": 0.2516916983000495,
"grad_norm": 1.4879227876663208,
"learning_rate": 3.820357263228851e-05,
"loss": 1.0219,
"step": 6100
},
{
"epoch": 0.2558177917148044,
"grad_norm": 1.3280787467956543,
"learning_rate": 3.7992922143579376e-05,
"loss": 1.0157,
"step": 6200
},
{
"epoch": 0.25994388512955935,
"grad_norm": 2.4915549755096436,
"learning_rate": 3.778227165487024e-05,
"loss": 1.0422,
"step": 6300
},
{
"epoch": 0.26406997854431424,
"grad_norm": 1.3016897439956665,
"learning_rate": 3.757162116616111e-05,
"loss": 1.004,
"step": 6400
},
{
"epoch": 0.26819607195906914,
"grad_norm": 1.722939372062683,
"learning_rate": 3.736097067745197e-05,
"loss": 1.0196,
"step": 6500
},
{
"epoch": 0.2723221653738241,
"grad_norm": 1.4764331579208374,
"learning_rate": 3.715032018874284e-05,
"loss": 0.9871,
"step": 6600
},
{
"epoch": 0.276448258788579,
"grad_norm": 1.344777226448059,
"learning_rate": 3.693966970003371e-05,
"loss": 1.0249,
"step": 6700
},
{
"epoch": 0.2805743522033339,
"grad_norm": 1.1756465435028076,
"learning_rate": 3.6729019211324575e-05,
"loss": 1.0506,
"step": 6800
},
{
"epoch": 0.28470044561808877,
"grad_norm": 1.3845124244689941,
"learning_rate": 3.6518368722615434e-05,
"loss": 1.0041,
"step": 6900
},
{
"epoch": 0.2888265390328437,
"grad_norm": 1.074078917503357,
"learning_rate": 3.6307718233906306e-05,
"loss": 0.9849,
"step": 7000
},
{
"epoch": 0.2929526324475986,
"grad_norm": 2.0719516277313232,
"learning_rate": 3.609706774519717e-05,
"loss": 1.0022,
"step": 7100
},
{
"epoch": 0.2970787258623535,
"grad_norm": 1.1381429433822632,
"learning_rate": 3.5886417256488037e-05,
"loss": 0.9409,
"step": 7200
},
{
"epoch": 0.30120481927710846,
"grad_norm": 1.2426626682281494,
"learning_rate": 3.56757667677789e-05,
"loss": 1.0225,
"step": 7300
},
{
"epoch": 0.30533091269186335,
"grad_norm": 2.445568561553955,
"learning_rate": 3.5465116279069774e-05,
"loss": 0.9725,
"step": 7400
},
{
"epoch": 0.30945700610661825,
"grad_norm": 1.2126537561416626,
"learning_rate": 3.525446579036063e-05,
"loss": 1.0005,
"step": 7500
},
{
"epoch": 0.31358309952137314,
"grad_norm": 2.634969472885132,
"learning_rate": 3.50438153016515e-05,
"loss": 1.0079,
"step": 7600
},
{
"epoch": 0.3177091929361281,
"grad_norm": 1.4859946966171265,
"learning_rate": 3.483316481294237e-05,
"loss": 1.0192,
"step": 7700
},
{
"epoch": 0.321835286350883,
"grad_norm": 1.3265373706817627,
"learning_rate": 3.4622514324233236e-05,
"loss": 0.9836,
"step": 7800
},
{
"epoch": 0.3259613797656379,
"grad_norm": 1.569514513015747,
"learning_rate": 3.44118638355241e-05,
"loss": 1.002,
"step": 7900
},
{
"epoch": 0.3300874731803928,
"grad_norm": 1.718145728111267,
"learning_rate": 3.4201213346814966e-05,
"loss": 0.9599,
"step": 8000
},
{
"epoch": 0.3342135665951477,
"grad_norm": 1.2960829734802246,
"learning_rate": 3.399056285810583e-05,
"loss": 1.0286,
"step": 8100
},
{
"epoch": 0.3383396600099026,
"grad_norm": 1.3030658960342407,
"learning_rate": 3.37799123693967e-05,
"loss": 0.9592,
"step": 8200
},
{
"epoch": 0.3424657534246575,
"grad_norm": 1.6679294109344482,
"learning_rate": 3.356926188068756e-05,
"loss": 0.9823,
"step": 8300
},
{
"epoch": 0.34659184683941247,
"grad_norm": 1.079559326171875,
"learning_rate": 3.335861139197843e-05,
"loss": 0.9749,
"step": 8400
},
{
"epoch": 0.35071794025416736,
"grad_norm": 1.27901029586792,
"learning_rate": 3.31479609032693e-05,
"loss": 0.9801,
"step": 8500
},
{
"epoch": 0.35484403366892225,
"grad_norm": 1.292656421661377,
"learning_rate": 3.2937310414560165e-05,
"loss": 0.9824,
"step": 8600
},
{
"epoch": 0.35897012708367715,
"grad_norm": 1.2524762153625488,
"learning_rate": 3.272665992585103e-05,
"loss": 0.943,
"step": 8700
},
{
"epoch": 0.3630962204984321,
"grad_norm": 2.4386353492736816,
"learning_rate": 3.2516009437141896e-05,
"loss": 0.9738,
"step": 8800
},
{
"epoch": 0.367222313913187,
"grad_norm": 1.2332638502120972,
"learning_rate": 3.230535894843276e-05,
"loss": 0.9599,
"step": 8900
},
{
"epoch": 0.3713484073279419,
"grad_norm": 1.3955186605453491,
"learning_rate": 3.209470845972363e-05,
"loss": 1.0027,
"step": 9000
},
{
"epoch": 0.37547450074269684,
"grad_norm": 1.7736716270446777,
"learning_rate": 3.188405797101449e-05,
"loss": 0.9568,
"step": 9100
},
{
"epoch": 0.37960059415745173,
"grad_norm": 1.1282614469528198,
"learning_rate": 3.1673407482305364e-05,
"loss": 0.9892,
"step": 9200
},
{
"epoch": 0.3837266875722066,
"grad_norm": 4.226625442504883,
"learning_rate": 3.146275699359622e-05,
"loss": 0.957,
"step": 9300
},
{
"epoch": 0.3878527809869615,
"grad_norm": 1.3062007427215576,
"learning_rate": 3.125210650488709e-05,
"loss": 0.9702,
"step": 9400
},
{
"epoch": 0.39197887440171647,
"grad_norm": 1.5109843015670776,
"learning_rate": 3.104145601617796e-05,
"loss": 0.9406,
"step": 9500
},
{
"epoch": 0.39610496781647136,
"grad_norm": 1.2154899835586548,
"learning_rate": 3.0830805527468826e-05,
"loss": 0.9532,
"step": 9600
},
{
"epoch": 0.40023106123122626,
"grad_norm": 1.239396095275879,
"learning_rate": 3.062015503875969e-05,
"loss": 0.9484,
"step": 9700
},
{
"epoch": 0.4043571546459812,
"grad_norm": 1.3215525150299072,
"learning_rate": 3.0409504550050553e-05,
"loss": 0.978,
"step": 9800
},
{
"epoch": 0.4084832480607361,
"grad_norm": 1.149057149887085,
"learning_rate": 3.0198854061341425e-05,
"loss": 0.968,
"step": 9900
},
{
"epoch": 0.412609341475491,
"grad_norm": 1.271074652671814,
"learning_rate": 2.998820357263229e-05,
"loss": 0.9794,
"step": 10000
},
{
"epoch": 0.4167354348902459,
"grad_norm": 1.0992262363433838,
"learning_rate": 2.9777553083923153e-05,
"loss": 0.9594,
"step": 10100
},
{
"epoch": 0.42086152830500084,
"grad_norm": 1.1205365657806396,
"learning_rate": 2.9566902595214025e-05,
"loss": 0.9439,
"step": 10200
},
{
"epoch": 0.42498762171975574,
"grad_norm": 1.144080638885498,
"learning_rate": 2.935625210650489e-05,
"loss": 0.9745,
"step": 10300
},
{
"epoch": 0.42911371513451063,
"grad_norm": 3.2051868438720703,
"learning_rate": 2.9145601617795752e-05,
"loss": 0.9594,
"step": 10400
},
{
"epoch": 0.4332398085492656,
"grad_norm": 1.2232369184494019,
"learning_rate": 2.8934951129086618e-05,
"loss": 0.9644,
"step": 10500
},
{
"epoch": 0.4373659019640205,
"grad_norm": 1.3971831798553467,
"learning_rate": 2.872430064037749e-05,
"loss": 0.987,
"step": 10600
},
{
"epoch": 0.44149199537877537,
"grad_norm": 1.1187039613723755,
"learning_rate": 2.8513650151668352e-05,
"loss": 0.9657,
"step": 10700
},
{
"epoch": 0.44561808879353026,
"grad_norm": 1.1717453002929688,
"learning_rate": 2.8302999662959217e-05,
"loss": 0.9363,
"step": 10800
},
{
"epoch": 0.4497441822082852,
"grad_norm": 1.4479399919509888,
"learning_rate": 2.809234917425009e-05,
"loss": 0.9428,
"step": 10900
},
{
"epoch": 0.4538702756230401,
"grad_norm": 1.1537368297576904,
"learning_rate": 2.788169868554095e-05,
"loss": 0.9654,
"step": 11000
},
{
"epoch": 0.457996369037795,
"grad_norm": 1.9704123735427856,
"learning_rate": 2.7671048196831817e-05,
"loss": 0.944,
"step": 11100
},
{
"epoch": 0.4621224624525499,
"grad_norm": 1.3609466552734375,
"learning_rate": 2.7460397708122682e-05,
"loss": 0.9353,
"step": 11200
},
{
"epoch": 0.46624855586730485,
"grad_norm": 1.3835324048995972,
"learning_rate": 2.724974721941355e-05,
"loss": 0.9238,
"step": 11300
},
{
"epoch": 0.47037464928205974,
"grad_norm": 2.1749815940856934,
"learning_rate": 2.7039096730704416e-05,
"loss": 0.9215,
"step": 11400
},
{
"epoch": 0.47450074269681464,
"grad_norm": 1.941735863685608,
"learning_rate": 2.682844624199528e-05,
"loss": 0.9607,
"step": 11500
},
{
"epoch": 0.4786268361115696,
"grad_norm": 1.9667292833328247,
"learning_rate": 2.661779575328615e-05,
"loss": 0.9493,
"step": 11600
},
{
"epoch": 0.4827529295263245,
"grad_norm": 1.1912260055541992,
"learning_rate": 2.6407145264577016e-05,
"loss": 0.9364,
"step": 11700
},
{
"epoch": 0.4868790229410794,
"grad_norm": 1.2728015184402466,
"learning_rate": 2.619649477586788e-05,
"loss": 0.9135,
"step": 11800
},
{
"epoch": 0.49100511635583427,
"grad_norm": 1.326409935951233,
"learning_rate": 2.5985844287158746e-05,
"loss": 0.9665,
"step": 11900
},
{
"epoch": 0.4951312097705892,
"grad_norm": 1.4567406177520752,
"learning_rate": 2.5775193798449615e-05,
"loss": 0.9733,
"step": 12000
},
{
"epoch": 0.4992573031853441,
"grad_norm": 1.3147661685943604,
"learning_rate": 2.556454330974048e-05,
"loss": 0.9204,
"step": 12100
},
{
"epoch": 0.503383396600099,
"grad_norm": 1.6704838275909424,
"learning_rate": 2.5353892821031346e-05,
"loss": 0.9418,
"step": 12200
},
{
"epoch": 0.5075094900148539,
"grad_norm": 1.2493371963500977,
"learning_rate": 2.5143242332322215e-05,
"loss": 0.9441,
"step": 12300
},
{
"epoch": 0.5116355834296088,
"grad_norm": 1.2380743026733398,
"learning_rate": 2.493259184361308e-05,
"loss": 0.9642,
"step": 12400
},
{
"epoch": 0.5157616768443638,
"grad_norm": 1.487196922302246,
"learning_rate": 2.4721941354903942e-05,
"loss": 0.986,
"step": 12500
},
{
"epoch": 0.5198877702591187,
"grad_norm": 1.2720383405685425,
"learning_rate": 2.451129086619481e-05,
"loss": 0.9614,
"step": 12600
},
{
"epoch": 0.5240138636738736,
"grad_norm": 1.3985182046890259,
"learning_rate": 2.4300640377485676e-05,
"loss": 0.9327,
"step": 12700
},
{
"epoch": 0.5281399570886285,
"grad_norm": 1.2555489540100098,
"learning_rate": 2.408998988877654e-05,
"loss": 0.9331,
"step": 12800
},
{
"epoch": 0.5322660505033834,
"grad_norm": 1.083095908164978,
"learning_rate": 2.387933940006741e-05,
"loss": 0.9706,
"step": 12900
},
{
"epoch": 0.5363921439181383,
"grad_norm": 3.2246696949005127,
"learning_rate": 2.3668688911358276e-05,
"loss": 0.9267,
"step": 13000
},
{
"epoch": 0.5405182373328932,
"grad_norm": 1.2211159467697144,
"learning_rate": 2.345803842264914e-05,
"loss": 0.9315,
"step": 13100
},
{
"epoch": 0.5446443307476482,
"grad_norm": 1.3726495504379272,
"learning_rate": 2.3247387933940006e-05,
"loss": 0.9432,
"step": 13200
},
{
"epoch": 0.5487704241624031,
"grad_norm": 1.0996991395950317,
"learning_rate": 2.3036737445230875e-05,
"loss": 0.9213,
"step": 13300
},
{
"epoch": 0.552896517577158,
"grad_norm": 1.016136884689331,
"learning_rate": 2.282608695652174e-05,
"loss": 0.9625,
"step": 13400
},
{
"epoch": 0.5570226109919129,
"grad_norm": 1.1178189516067505,
"learning_rate": 2.2615436467812606e-05,
"loss": 0.9419,
"step": 13500
},
{
"epoch": 0.5611487044066678,
"grad_norm": 1.1706444025039673,
"learning_rate": 2.2404785979103475e-05,
"loss": 0.885,
"step": 13600
},
{
"epoch": 0.5652747978214226,
"grad_norm": 1.4330129623413086,
"learning_rate": 2.2194135490394337e-05,
"loss": 0.9024,
"step": 13700
},
{
"epoch": 0.5694008912361775,
"grad_norm": 2.2776172161102295,
"learning_rate": 2.1983485001685205e-05,
"loss": 0.933,
"step": 13800
},
{
"epoch": 0.5735269846509325,
"grad_norm": 1.3359657526016235,
"learning_rate": 2.177283451297607e-05,
"loss": 0.8791,
"step": 13900
},
{
"epoch": 0.5776530780656874,
"grad_norm": 1.1592367887496948,
"learning_rate": 2.1562184024266936e-05,
"loss": 0.9336,
"step": 14000
},
{
"epoch": 0.5817791714804423,
"grad_norm": 1.052618145942688,
"learning_rate": 2.13515335355578e-05,
"loss": 0.9303,
"step": 14100
},
{
"epoch": 0.5859052648951972,
"grad_norm": 1.2330833673477173,
"learning_rate": 2.114088304684867e-05,
"loss": 0.9247,
"step": 14200
},
{
"epoch": 0.5900313583099521,
"grad_norm": 1.7336995601654053,
"learning_rate": 2.0930232558139536e-05,
"loss": 0.9078,
"step": 14300
},
{
"epoch": 0.594157451724707,
"grad_norm": 1.1562308073043823,
"learning_rate": 2.07195820694304e-05,
"loss": 0.905,
"step": 14400
},
{
"epoch": 0.5982835451394619,
"grad_norm": 1.3212171792984009,
"learning_rate": 2.050893158072127e-05,
"loss": 0.9457,
"step": 14500
},
{
"epoch": 0.6024096385542169,
"grad_norm": 1.5021255016326904,
"learning_rate": 2.0298281092012135e-05,
"loss": 0.9213,
"step": 14600
},
{
"epoch": 0.6065357319689718,
"grad_norm": 1.1142035722732544,
"learning_rate": 2.0087630603303e-05,
"loss": 0.8988,
"step": 14700
},
{
"epoch": 0.6106618253837267,
"grad_norm": 1.0887188911437988,
"learning_rate": 1.9876980114593866e-05,
"loss": 0.9579,
"step": 14800
},
{
"epoch": 0.6147879187984816,
"grad_norm": 1.5622923374176025,
"learning_rate": 1.966632962588473e-05,
"loss": 0.9206,
"step": 14900
},
{
"epoch": 0.6189140122132365,
"grad_norm": 1.4978774785995483,
"learning_rate": 1.94556791371756e-05,
"loss": 0.9292,
"step": 15000
},
{
"epoch": 0.6230401056279914,
"grad_norm": 1.1494709253311157,
"learning_rate": 1.9245028648466465e-05,
"loss": 0.929,
"step": 15100
},
{
"epoch": 0.6271661990427463,
"grad_norm": 3.5858824253082275,
"learning_rate": 1.903437815975733e-05,
"loss": 0.9181,
"step": 15200
},
{
"epoch": 0.6312922924575013,
"grad_norm": 0.927173376083374,
"learning_rate": 1.8823727671048196e-05,
"loss": 0.9365,
"step": 15300
},
{
"epoch": 0.6354183858722562,
"grad_norm": 0.9943380355834961,
"learning_rate": 1.8613077182339065e-05,
"loss": 0.8974,
"step": 15400
},
{
"epoch": 0.6395444792870111,
"grad_norm": 1.4820857048034668,
"learning_rate": 1.840242669362993e-05,
"loss": 0.9066,
"step": 15500
},
{
"epoch": 0.643670572701766,
"grad_norm": 1.3542896509170532,
"learning_rate": 1.8191776204920796e-05,
"loss": 0.9048,
"step": 15600
},
{
"epoch": 0.6477966661165209,
"grad_norm": 2.233414888381958,
"learning_rate": 1.7981125716211664e-05,
"loss": 0.899,
"step": 15700
},
{
"epoch": 0.6519227595312758,
"grad_norm": 1.0770349502563477,
"learning_rate": 1.777047522750253e-05,
"loss": 0.9135,
"step": 15800
},
{
"epoch": 0.6560488529460307,
"grad_norm": 1.1688830852508545,
"learning_rate": 1.7559824738793395e-05,
"loss": 0.8838,
"step": 15900
},
{
"epoch": 0.6601749463607856,
"grad_norm": 1.096822738647461,
"learning_rate": 1.734917425008426e-05,
"loss": 0.9325,
"step": 16000
},
{
"epoch": 0.6643010397755406,
"grad_norm": 1.4621776342391968,
"learning_rate": 1.713852376137513e-05,
"loss": 0.9299,
"step": 16100
},
{
"epoch": 0.6684271331902955,
"grad_norm": 1.2400994300842285,
"learning_rate": 1.692787327266599e-05,
"loss": 0.8986,
"step": 16200
},
{
"epoch": 0.6725532266050503,
"grad_norm": 1.3540397882461548,
"learning_rate": 1.671722278395686e-05,
"loss": 0.9084,
"step": 16300
},
{
"epoch": 0.6766793200198052,
"grad_norm": 1.2045152187347412,
"learning_rate": 1.6506572295247725e-05,
"loss": 0.8943,
"step": 16400
},
{
"epoch": 0.6808054134345601,
"grad_norm": 1.1521943807601929,
"learning_rate": 1.629592180653859e-05,
"loss": 0.9089,
"step": 16500
},
{
"epoch": 0.684931506849315,
"grad_norm": 4.699136257171631,
"learning_rate": 1.608527131782946e-05,
"loss": 0.9169,
"step": 16600
},
{
"epoch": 0.6890576002640699,
"grad_norm": 1.3759478330612183,
"learning_rate": 1.5874620829120325e-05,
"loss": 0.9154,
"step": 16700
},
{
"epoch": 0.6931836936788249,
"grad_norm": 1.2098520994186401,
"learning_rate": 1.566397034041119e-05,
"loss": 0.9264,
"step": 16800
},
{
"epoch": 0.6973097870935798,
"grad_norm": 1.6775233745574951,
"learning_rate": 1.5453319851702056e-05,
"loss": 0.9309,
"step": 16900
},
{
"epoch": 0.7014358805083347,
"grad_norm": 1.0574172735214233,
"learning_rate": 1.5242669362992923e-05,
"loss": 0.8893,
"step": 17000
},
{
"epoch": 0.7055619739230896,
"grad_norm": 1.035610318183899,
"learning_rate": 1.503201887428379e-05,
"loss": 0.9243,
"step": 17100
},
{
"epoch": 0.7096880673378445,
"grad_norm": 1.6291944980621338,
"learning_rate": 1.4821368385574655e-05,
"loss": 0.9158,
"step": 17200
},
{
"epoch": 0.7138141607525994,
"grad_norm": 1.2090740203857422,
"learning_rate": 1.4610717896865522e-05,
"loss": 0.9026,
"step": 17300
},
{
"epoch": 0.7179402541673543,
"grad_norm": 1.2179425954818726,
"learning_rate": 1.4400067408156388e-05,
"loss": 0.8943,
"step": 17400
},
{
"epoch": 0.7220663475821093,
"grad_norm": 1.2382631301879883,
"learning_rate": 1.4189416919447255e-05,
"loss": 0.9021,
"step": 17500
},
{
"epoch": 0.7261924409968642,
"grad_norm": 2.4923956394195557,
"learning_rate": 1.3978766430738118e-05,
"loss": 0.9348,
"step": 17600
},
{
"epoch": 0.7303185344116191,
"grad_norm": 2.419496774673462,
"learning_rate": 1.3768115942028985e-05,
"loss": 0.9039,
"step": 17700
},
{
"epoch": 0.734444627826374,
"grad_norm": 1.2352160215377808,
"learning_rate": 1.3557465453319854e-05,
"loss": 0.9266,
"step": 17800
},
{
"epoch": 0.7385707212411289,
"grad_norm": 1.0967360734939575,
"learning_rate": 1.3346814964610718e-05,
"loss": 0.9046,
"step": 17900
},
{
"epoch": 0.7426968146558838,
"grad_norm": 1.0056049823760986,
"learning_rate": 1.3136164475901585e-05,
"loss": 0.9321,
"step": 18000
},
{
"epoch": 0.7468229080706387,
"grad_norm": 1.9823698997497559,
"learning_rate": 1.292551398719245e-05,
"loss": 0.9151,
"step": 18100
},
{
"epoch": 0.7509490014853937,
"grad_norm": 1.651145577430725,
"learning_rate": 1.2714863498483317e-05,
"loss": 0.904,
"step": 18200
},
{
"epoch": 0.7550750949001486,
"grad_norm": 0.9505665302276611,
"learning_rate": 1.2504213009774183e-05,
"loss": 0.878,
"step": 18300
},
{
"epoch": 0.7592011883149035,
"grad_norm": 1.558278203010559,
"learning_rate": 1.229356252106505e-05,
"loss": 0.942,
"step": 18400
},
{
"epoch": 0.7633272817296584,
"grad_norm": 1.2101174592971802,
"learning_rate": 1.2082912032355915e-05,
"loss": 0.9034,
"step": 18500
},
{
"epoch": 0.7674533751444133,
"grad_norm": 1.2382097244262695,
"learning_rate": 1.1872261543646782e-05,
"loss": 0.9119,
"step": 18600
},
{
"epoch": 0.7715794685591681,
"grad_norm": 1.1424338817596436,
"learning_rate": 1.1661611054937648e-05,
"loss": 0.9282,
"step": 18700
},
{
"epoch": 0.775705561973923,
"grad_norm": 1.0747746229171753,
"learning_rate": 1.1450960566228513e-05,
"loss": 0.9144,
"step": 18800
},
{
"epoch": 0.779831655388678,
"grad_norm": 1.4378238916397095,
"learning_rate": 1.1240310077519382e-05,
"loss": 0.9292,
"step": 18900
},
{
"epoch": 0.7839577488034329,
"grad_norm": 1.5118451118469238,
"learning_rate": 1.1029659588810247e-05,
"loss": 0.8532,
"step": 19000
},
{
"epoch": 0.7880838422181878,
"grad_norm": 1.135190725326538,
"learning_rate": 1.0819009100101113e-05,
"loss": 0.9132,
"step": 19100
},
{
"epoch": 0.7922099356329427,
"grad_norm": 1.3497545719146729,
"learning_rate": 1.060835861139198e-05,
"loss": 0.9214,
"step": 19200
},
{
"epoch": 0.7963360290476976,
"grad_norm": 1.3251924514770508,
"learning_rate": 1.0397708122682845e-05,
"loss": 0.8942,
"step": 19300
},
{
"epoch": 0.8004621224624525,
"grad_norm": 2.453803539276123,
"learning_rate": 1.018705763397371e-05,
"loss": 0.8858,
"step": 19400
},
{
"epoch": 0.8045882158772074,
"grad_norm": 1.1651134490966797,
"learning_rate": 9.976407145264577e-06,
"loss": 0.9012,
"step": 19500
},
{
"epoch": 0.8087143092919624,
"grad_norm": 2.257159471511841,
"learning_rate": 9.765756656555444e-06,
"loss": 0.9167,
"step": 19600
},
{
"epoch": 0.8128404027067173,
"grad_norm": 0.9240596294403076,
"learning_rate": 9.55510616784631e-06,
"loss": 0.8764,
"step": 19700
},
{
"epoch": 0.8169664961214722,
"grad_norm": 1.2550618648529053,
"learning_rate": 9.344455679137177e-06,
"loss": 0.8998,
"step": 19800
},
{
"epoch": 0.8210925895362271,
"grad_norm": 1.2276984453201294,
"learning_rate": 9.133805190428042e-06,
"loss": 0.909,
"step": 19900
},
{
"epoch": 0.825218682950982,
"grad_norm": 1.0953816175460815,
"learning_rate": 8.923154701718908e-06,
"loss": 0.8931,
"step": 20000
},
{
"epoch": 0.8293447763657369,
"grad_norm": 1.469269037246704,
"learning_rate": 8.712504213009775e-06,
"loss": 0.8789,
"step": 20100
},
{
"epoch": 0.8334708697804918,
"grad_norm": 1.242390751838684,
"learning_rate": 8.50185372430064e-06,
"loss": 0.9126,
"step": 20200
},
{
"epoch": 0.8375969631952468,
"grad_norm": 1.0811703205108643,
"learning_rate": 8.291203235591507e-06,
"loss": 0.913,
"step": 20300
},
{
"epoch": 0.8417230566100017,
"grad_norm": 1.0523350238800049,
"learning_rate": 8.080552746882374e-06,
"loss": 0.9118,
"step": 20400
},
{
"epoch": 0.8458491500247566,
"grad_norm": 1.4592727422714233,
"learning_rate": 7.86990225817324e-06,
"loss": 0.9099,
"step": 20500
},
{
"epoch": 0.8499752434395115,
"grad_norm": 1.0648339986801147,
"learning_rate": 7.659251769464105e-06,
"loss": 0.9198,
"step": 20600
},
{
"epoch": 0.8541013368542664,
"grad_norm": 1.3053339719772339,
"learning_rate": 7.448601280754971e-06,
"loss": 0.9156,
"step": 20700
},
{
"epoch": 0.8582274302690213,
"grad_norm": 1.0929012298583984,
"learning_rate": 7.237950792045837e-06,
"loss": 0.9103,
"step": 20800
},
{
"epoch": 0.8623535236837762,
"grad_norm": 1.234263300895691,
"learning_rate": 7.027300303336704e-06,
"loss": 0.9016,
"step": 20900
},
{
"epoch": 0.8664796170985312,
"grad_norm": 1.3241745233535767,
"learning_rate": 6.816649814627571e-06,
"loss": 0.9075,
"step": 21000
},
{
"epoch": 0.8706057105132861,
"grad_norm": 1.2847357988357544,
"learning_rate": 6.605999325918437e-06,
"loss": 0.9462,
"step": 21100
},
{
"epoch": 0.874731803928041,
"grad_norm": 1.1206868886947632,
"learning_rate": 6.395348837209303e-06,
"loss": 0.9236,
"step": 21200
},
{
"epoch": 0.8788578973427958,
"grad_norm": 1.1748895645141602,
"learning_rate": 6.1846983485001685e-06,
"loss": 0.8521,
"step": 21300
},
{
"epoch": 0.8829839907575507,
"grad_norm": 1.571519136428833,
"learning_rate": 5.974047859791035e-06,
"loss": 0.8811,
"step": 21400
},
{
"epoch": 0.8871100841723056,
"grad_norm": 1.051316738128662,
"learning_rate": 5.763397371081901e-06,
"loss": 0.9167,
"step": 21500
},
{
"epoch": 0.8912361775870605,
"grad_norm": 2.111393690109253,
"learning_rate": 5.552746882372767e-06,
"loss": 0.9333,
"step": 21600
},
{
"epoch": 0.8953622710018155,
"grad_norm": 1.3969411849975586,
"learning_rate": 5.342096393663633e-06,
"loss": 0.9434,
"step": 21700
},
{
"epoch": 0.8994883644165704,
"grad_norm": 1.7783890962600708,
"learning_rate": 5.1314459049545e-06,
"loss": 0.8947,
"step": 21800
},
{
"epoch": 0.9036144578313253,
"grad_norm": 1.359174132347107,
"learning_rate": 4.920795416245366e-06,
"loss": 0.8815,
"step": 21900
},
{
"epoch": 0.9077405512460802,
"grad_norm": 1.257117748260498,
"learning_rate": 4.710144927536232e-06,
"loss": 0.8986,
"step": 22000
},
{
"epoch": 0.9118666446608351,
"grad_norm": 0.9748762845993042,
"learning_rate": 4.499494438827098e-06,
"loss": 0.887,
"step": 22100
},
{
"epoch": 0.91599273807559,
"grad_norm": 1.5360727310180664,
"learning_rate": 4.2888439501179645e-06,
"loss": 0.9051,
"step": 22200
},
{
"epoch": 0.9201188314903449,
"grad_norm": 1.0747774839401245,
"learning_rate": 4.078193461408831e-06,
"loss": 0.9498,
"step": 22300
},
{
"epoch": 0.9242449249050998,
"grad_norm": 1.197403073310852,
"learning_rate": 3.867542972699697e-06,
"loss": 0.925,
"step": 22400
},
{
"epoch": 0.9283710183198548,
"grad_norm": 1.580825924873352,
"learning_rate": 3.6568924839905627e-06,
"loss": 0.9019,
"step": 22500
},
{
"epoch": 0.9324971117346097,
"grad_norm": 1.2338446378707886,
"learning_rate": 3.4462419952814294e-06,
"loss": 0.892,
"step": 22600
},
{
"epoch": 0.9366232051493646,
"grad_norm": 2.6846702098846436,
"learning_rate": 3.2355915065722956e-06,
"loss": 0.9121,
"step": 22700
},
{
"epoch": 0.9407492985641195,
"grad_norm": 1.0765039920806885,
"learning_rate": 3.0249410178631614e-06,
"loss": 0.9101,
"step": 22800
},
{
"epoch": 0.9448753919788744,
"grad_norm": 1.2273006439208984,
"learning_rate": 2.814290529154028e-06,
"loss": 0.8916,
"step": 22900
},
{
"epoch": 0.9490014853936293,
"grad_norm": 1.295823574066162,
"learning_rate": 2.603640040444894e-06,
"loss": 0.8958,
"step": 23000
},
{
"epoch": 0.9531275788083842,
"grad_norm": 1.5502872467041016,
"learning_rate": 2.39298955173576e-06,
"loss": 0.9069,
"step": 23100
},
{
"epoch": 0.9572536722231392,
"grad_norm": 2.524392604827881,
"learning_rate": 2.1823390630266263e-06,
"loss": 0.9301,
"step": 23200
},
{
"epoch": 0.9613797656378941,
"grad_norm": 1.5065919160842896,
"learning_rate": 1.9716885743174925e-06,
"loss": 0.8903,
"step": 23300
},
{
"epoch": 0.965505859052649,
"grad_norm": 1.1356451511383057,
"learning_rate": 1.761038085608359e-06,
"loss": 0.881,
"step": 23400
},
{
"epoch": 0.9696319524674039,
"grad_norm": 6.048961162567139,
"learning_rate": 1.550387596899225e-06,
"loss": 0.9164,
"step": 23500
},
{
"epoch": 0.9737580458821588,
"grad_norm": 1.1151750087738037,
"learning_rate": 1.339737108190091e-06,
"loss": 0.8703,
"step": 23600
},
{
"epoch": 0.9778841392969136,
"grad_norm": 1.0021706819534302,
"learning_rate": 1.1290866194809571e-06,
"loss": 0.8978,
"step": 23700
},
{
"epoch": 0.9820102327116685,
"grad_norm": 1.1401609182357788,
"learning_rate": 9.184361307718234e-07,
"loss": 0.9159,
"step": 23800
},
{
"epoch": 0.9861363261264235,
"grad_norm": 1.0629512071609497,
"learning_rate": 7.077856420626896e-07,
"loss": 0.9073,
"step": 23900
},
{
"epoch": 0.9902624195411784,
"grad_norm": 1.0977869033813477,
"learning_rate": 4.971351533535558e-07,
"loss": 0.9178,
"step": 24000
},
{
"epoch": 0.9943885129559333,
"grad_norm": 1.281014323234558,
"learning_rate": 2.8648466464442196e-07,
"loss": 0.8876,
"step": 24100
},
{
"epoch": 0.9985146063706882,
"grad_norm": 1.2343029975891113,
"learning_rate": 7.583417593528817e-08,
"loss": 0.8733,
"step": 24200
}
],
"logging_steps": 100,
"max_steps": 24236,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7.002725362748621e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}