|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 24236, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00412609341475491, |
|
"grad_norm": 14.695940971374512, |
|
"learning_rate": 1e-05, |
|
"loss": 8.626, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.00825218682950982, |
|
"grad_norm": 19.695737838745117, |
|
"learning_rate": 2e-05, |
|
"loss": 7.5714, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01237828024426473, |
|
"grad_norm": 7.499746799468994, |
|
"learning_rate": 3e-05, |
|
"loss": 6.9595, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.01650437365901964, |
|
"grad_norm": 22.69700813293457, |
|
"learning_rate": 4e-05, |
|
"loss": 5.6622, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.02063046707377455, |
|
"grad_norm": 16.763093948364258, |
|
"learning_rate": 5e-05, |
|
"loss": 1.9119, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02475656048852946, |
|
"grad_norm": 3.74697208404541, |
|
"learning_rate": 4.978934951129087e-05, |
|
"loss": 1.6244, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.02888265390328437, |
|
"grad_norm": 6.804798126220703, |
|
"learning_rate": 4.957869902258174e-05, |
|
"loss": 1.4867, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.03300874731803928, |
|
"grad_norm": 4.382748603820801, |
|
"learning_rate": 4.93680485338726e-05, |
|
"loss": 1.4434, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.03713484073279419, |
|
"grad_norm": 2.020482063293457, |
|
"learning_rate": 4.9157398045163464e-05, |
|
"loss": 1.3701, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.0412609341475491, |
|
"grad_norm": 1.9225651025772095, |
|
"learning_rate": 4.894674755645433e-05, |
|
"loss": 1.383, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.04538702756230401, |
|
"grad_norm": 1.8698792457580566, |
|
"learning_rate": 4.87360970677452e-05, |
|
"loss": 1.3512, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.04951312097705892, |
|
"grad_norm": 4.490991592407227, |
|
"learning_rate": 4.852544657903607e-05, |
|
"loss": 1.3039, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.05363921439181383, |
|
"grad_norm": 2.697434186935425, |
|
"learning_rate": 4.831479609032693e-05, |
|
"loss": 1.3085, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.05776530780656874, |
|
"grad_norm": 3.3568286895751953, |
|
"learning_rate": 4.81041456016178e-05, |
|
"loss": 1.2915, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.06189140122132365, |
|
"grad_norm": 2.012889862060547, |
|
"learning_rate": 4.789349511290866e-05, |
|
"loss": 1.2778, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.06601749463607856, |
|
"grad_norm": 4.024045467376709, |
|
"learning_rate": 4.768284462419953e-05, |
|
"loss": 1.2688, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.07014358805083347, |
|
"grad_norm": 2.241870880126953, |
|
"learning_rate": 4.7472194135490394e-05, |
|
"loss": 1.2592, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.07426968146558838, |
|
"grad_norm": 3.7178213596343994, |
|
"learning_rate": 4.7261543646781266e-05, |
|
"loss": 1.2112, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.07839577488034329, |
|
"grad_norm": 2.036505937576294, |
|
"learning_rate": 4.705089315807213e-05, |
|
"loss": 1.2126, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.0825218682950982, |
|
"grad_norm": 1.7717580795288086, |
|
"learning_rate": 4.6840242669362997e-05, |
|
"loss": 1.191, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.08664796170985312, |
|
"grad_norm": 1.8078298568725586, |
|
"learning_rate": 4.662959218065386e-05, |
|
"loss": 1.199, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.09077405512460802, |
|
"grad_norm": 4.067634582519531, |
|
"learning_rate": 4.641894169194473e-05, |
|
"loss": 1.1415, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.09490014853936293, |
|
"grad_norm": 1.9144686460494995, |
|
"learning_rate": 4.620829120323559e-05, |
|
"loss": 1.1486, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.09902624195411784, |
|
"grad_norm": 1.4644508361816406, |
|
"learning_rate": 4.599764071452646e-05, |
|
"loss": 1.1665, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.10315233536887275, |
|
"grad_norm": 1.6876461505889893, |
|
"learning_rate": 4.578699022581733e-05, |
|
"loss": 1.1197, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.10727842878362766, |
|
"grad_norm": 1.6274546384811401, |
|
"learning_rate": 4.5576339737108196e-05, |
|
"loss": 1.1505, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.11140452219838257, |
|
"grad_norm": 2.2767255306243896, |
|
"learning_rate": 4.5365689248399054e-05, |
|
"loss": 1.1376, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.11553061561313747, |
|
"grad_norm": 2.0346932411193848, |
|
"learning_rate": 4.5155038759689926e-05, |
|
"loss": 1.0817, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.1196567090278924, |
|
"grad_norm": 1.5901857614517212, |
|
"learning_rate": 4.494438827098079e-05, |
|
"loss": 1.0919, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.1237828024426473, |
|
"grad_norm": 2.1194183826446533, |
|
"learning_rate": 4.473373778227166e-05, |
|
"loss": 1.0813, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.1279088958574022, |
|
"grad_norm": 4.601478576660156, |
|
"learning_rate": 4.452308729356252e-05, |
|
"loss": 1.1204, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.13203498927215712, |
|
"grad_norm": 2.3545944690704346, |
|
"learning_rate": 4.431243680485339e-05, |
|
"loss": 1.0488, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.13616108268691204, |
|
"grad_norm": 1.468526005744934, |
|
"learning_rate": 4.410178631614425e-05, |
|
"loss": 1.1108, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.14028717610166694, |
|
"grad_norm": 1.5844910144805908, |
|
"learning_rate": 4.389113582743512e-05, |
|
"loss": 1.1114, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.14441326951642186, |
|
"grad_norm": 4.228846073150635, |
|
"learning_rate": 4.368048533872599e-05, |
|
"loss": 1.0961, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.14853936293117675, |
|
"grad_norm": 1.4088937044143677, |
|
"learning_rate": 4.3469834850016856e-05, |
|
"loss": 1.0758, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.15266545634593168, |
|
"grad_norm": 1.6613353490829468, |
|
"learning_rate": 4.325918436130772e-05, |
|
"loss": 1.0916, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.15679154976068657, |
|
"grad_norm": 1.6648322343826294, |
|
"learning_rate": 4.304853387259859e-05, |
|
"loss": 1.0784, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.1609176431754415, |
|
"grad_norm": 1.387666940689087, |
|
"learning_rate": 4.283788338388945e-05, |
|
"loss": 1.0894, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.1650437365901964, |
|
"grad_norm": 1.416618824005127, |
|
"learning_rate": 4.262723289518032e-05, |
|
"loss": 1.0283, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.1691698300049513, |
|
"grad_norm": 1.569306492805481, |
|
"learning_rate": 4.241658240647118e-05, |
|
"loss": 1.0513, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.17329592341970623, |
|
"grad_norm": 1.4747782945632935, |
|
"learning_rate": 4.2205931917762055e-05, |
|
"loss": 1.0531, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.17742201683446113, |
|
"grad_norm": 2.1925017833709717, |
|
"learning_rate": 4.199528142905292e-05, |
|
"loss": 1.058, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.18154811024921605, |
|
"grad_norm": 1.9729565382003784, |
|
"learning_rate": 4.1784630940343786e-05, |
|
"loss": 1.0806, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.18567420366397094, |
|
"grad_norm": 1.5546541213989258, |
|
"learning_rate": 4.1573980451634644e-05, |
|
"loss": 1.0746, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.18980029707872587, |
|
"grad_norm": 1.8433148860931396, |
|
"learning_rate": 4.1363329962925517e-05, |
|
"loss": 1.0527, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.19392639049348076, |
|
"grad_norm": 1.3673489093780518, |
|
"learning_rate": 4.115267947421638e-05, |
|
"loss": 1.0772, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.19805248390823568, |
|
"grad_norm": 1.8290094137191772, |
|
"learning_rate": 4.094202898550725e-05, |
|
"loss": 1.0424, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.2021785773229906, |
|
"grad_norm": 2.0811445713043213, |
|
"learning_rate": 4.073137849679812e-05, |
|
"loss": 1.0498, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.2063046707377455, |
|
"grad_norm": 1.2849047183990479, |
|
"learning_rate": 4.0520728008088985e-05, |
|
"loss": 1.0341, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.21043076415250042, |
|
"grad_norm": 3.288480281829834, |
|
"learning_rate": 4.0310077519379843e-05, |
|
"loss": 1.0273, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.21455685756725532, |
|
"grad_norm": 1.2297766208648682, |
|
"learning_rate": 4.009942703067071e-05, |
|
"loss": 1.0768, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.21868295098201024, |
|
"grad_norm": 1.4169477224349976, |
|
"learning_rate": 3.988877654196158e-05, |
|
"loss": 1.0334, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.22280904439676513, |
|
"grad_norm": 1.314010739326477, |
|
"learning_rate": 3.9678126053252446e-05, |
|
"loss": 1.0208, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.22693513781152005, |
|
"grad_norm": 1.5176063776016235, |
|
"learning_rate": 3.946747556454331e-05, |
|
"loss": 1.0561, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.23106123122627495, |
|
"grad_norm": 1.418303370475769, |
|
"learning_rate": 3.9256825075834184e-05, |
|
"loss": 1.0223, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.23518732464102987, |
|
"grad_norm": 1.8820278644561768, |
|
"learning_rate": 3.904617458712504e-05, |
|
"loss": 1.0333, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.2393134180557848, |
|
"grad_norm": 1.7610660791397095, |
|
"learning_rate": 3.883552409841591e-05, |
|
"loss": 1.007, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.2434395114705397, |
|
"grad_norm": 7.877830982208252, |
|
"learning_rate": 3.862487360970677e-05, |
|
"loss": 1.0138, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.2475656048852946, |
|
"grad_norm": 1.4319870471954346, |
|
"learning_rate": 3.8414223120997645e-05, |
|
"loss": 1.0103, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.2516916983000495, |
|
"grad_norm": 1.4879227876663208, |
|
"learning_rate": 3.820357263228851e-05, |
|
"loss": 1.0219, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.2558177917148044, |
|
"grad_norm": 1.3280787467956543, |
|
"learning_rate": 3.7992922143579376e-05, |
|
"loss": 1.0157, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.25994388512955935, |
|
"grad_norm": 2.4915549755096436, |
|
"learning_rate": 3.778227165487024e-05, |
|
"loss": 1.0422, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.26406997854431424, |
|
"grad_norm": 1.3016897439956665, |
|
"learning_rate": 3.757162116616111e-05, |
|
"loss": 1.004, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.26819607195906914, |
|
"grad_norm": 1.722939372062683, |
|
"learning_rate": 3.736097067745197e-05, |
|
"loss": 1.0196, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.2723221653738241, |
|
"grad_norm": 1.4764331579208374, |
|
"learning_rate": 3.715032018874284e-05, |
|
"loss": 0.9871, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.276448258788579, |
|
"grad_norm": 1.344777226448059, |
|
"learning_rate": 3.693966970003371e-05, |
|
"loss": 1.0249, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.2805743522033339, |
|
"grad_norm": 1.1756465435028076, |
|
"learning_rate": 3.6729019211324575e-05, |
|
"loss": 1.0506, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.28470044561808877, |
|
"grad_norm": 1.3845124244689941, |
|
"learning_rate": 3.6518368722615434e-05, |
|
"loss": 1.0041, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.2888265390328437, |
|
"grad_norm": 1.074078917503357, |
|
"learning_rate": 3.6307718233906306e-05, |
|
"loss": 0.9849, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.2929526324475986, |
|
"grad_norm": 2.0719516277313232, |
|
"learning_rate": 3.609706774519717e-05, |
|
"loss": 1.0022, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.2970787258623535, |
|
"grad_norm": 1.1381429433822632, |
|
"learning_rate": 3.5886417256488037e-05, |
|
"loss": 0.9409, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.30120481927710846, |
|
"grad_norm": 1.2426626682281494, |
|
"learning_rate": 3.56757667677789e-05, |
|
"loss": 1.0225, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.30533091269186335, |
|
"grad_norm": 2.445568561553955, |
|
"learning_rate": 3.5465116279069774e-05, |
|
"loss": 0.9725, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.30945700610661825, |
|
"grad_norm": 1.2126537561416626, |
|
"learning_rate": 3.525446579036063e-05, |
|
"loss": 1.0005, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.31358309952137314, |
|
"grad_norm": 2.634969472885132, |
|
"learning_rate": 3.50438153016515e-05, |
|
"loss": 1.0079, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.3177091929361281, |
|
"grad_norm": 1.4859946966171265, |
|
"learning_rate": 3.483316481294237e-05, |
|
"loss": 1.0192, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.321835286350883, |
|
"grad_norm": 1.3265373706817627, |
|
"learning_rate": 3.4622514324233236e-05, |
|
"loss": 0.9836, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.3259613797656379, |
|
"grad_norm": 1.569514513015747, |
|
"learning_rate": 3.44118638355241e-05, |
|
"loss": 1.002, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.3300874731803928, |
|
"grad_norm": 1.718145728111267, |
|
"learning_rate": 3.4201213346814966e-05, |
|
"loss": 0.9599, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.3342135665951477, |
|
"grad_norm": 1.2960829734802246, |
|
"learning_rate": 3.399056285810583e-05, |
|
"loss": 1.0286, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.3383396600099026, |
|
"grad_norm": 1.3030658960342407, |
|
"learning_rate": 3.37799123693967e-05, |
|
"loss": 0.9592, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.3424657534246575, |
|
"grad_norm": 1.6679294109344482, |
|
"learning_rate": 3.356926188068756e-05, |
|
"loss": 0.9823, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.34659184683941247, |
|
"grad_norm": 1.079559326171875, |
|
"learning_rate": 3.335861139197843e-05, |
|
"loss": 0.9749, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.35071794025416736, |
|
"grad_norm": 1.27901029586792, |
|
"learning_rate": 3.31479609032693e-05, |
|
"loss": 0.9801, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.35484403366892225, |
|
"grad_norm": 1.292656421661377, |
|
"learning_rate": 3.2937310414560165e-05, |
|
"loss": 0.9824, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.35897012708367715, |
|
"grad_norm": 1.2524762153625488, |
|
"learning_rate": 3.272665992585103e-05, |
|
"loss": 0.943, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.3630962204984321, |
|
"grad_norm": 2.4386353492736816, |
|
"learning_rate": 3.2516009437141896e-05, |
|
"loss": 0.9738, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.367222313913187, |
|
"grad_norm": 1.2332638502120972, |
|
"learning_rate": 3.230535894843276e-05, |
|
"loss": 0.9599, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.3713484073279419, |
|
"grad_norm": 1.3955186605453491, |
|
"learning_rate": 3.209470845972363e-05, |
|
"loss": 1.0027, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.37547450074269684, |
|
"grad_norm": 1.7736716270446777, |
|
"learning_rate": 3.188405797101449e-05, |
|
"loss": 0.9568, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.37960059415745173, |
|
"grad_norm": 1.1282614469528198, |
|
"learning_rate": 3.1673407482305364e-05, |
|
"loss": 0.9892, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.3837266875722066, |
|
"grad_norm": 4.226625442504883, |
|
"learning_rate": 3.146275699359622e-05, |
|
"loss": 0.957, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.3878527809869615, |
|
"grad_norm": 1.3062007427215576, |
|
"learning_rate": 3.125210650488709e-05, |
|
"loss": 0.9702, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.39197887440171647, |
|
"grad_norm": 1.5109843015670776, |
|
"learning_rate": 3.104145601617796e-05, |
|
"loss": 0.9406, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.39610496781647136, |
|
"grad_norm": 1.2154899835586548, |
|
"learning_rate": 3.0830805527468826e-05, |
|
"loss": 0.9532, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.40023106123122626, |
|
"grad_norm": 1.239396095275879, |
|
"learning_rate": 3.062015503875969e-05, |
|
"loss": 0.9484, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.4043571546459812, |
|
"grad_norm": 1.3215525150299072, |
|
"learning_rate": 3.0409504550050553e-05, |
|
"loss": 0.978, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.4084832480607361, |
|
"grad_norm": 1.149057149887085, |
|
"learning_rate": 3.0198854061341425e-05, |
|
"loss": 0.968, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.412609341475491, |
|
"grad_norm": 1.271074652671814, |
|
"learning_rate": 2.998820357263229e-05, |
|
"loss": 0.9794, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.4167354348902459, |
|
"grad_norm": 1.0992262363433838, |
|
"learning_rate": 2.9777553083923153e-05, |
|
"loss": 0.9594, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.42086152830500084, |
|
"grad_norm": 1.1205365657806396, |
|
"learning_rate": 2.9566902595214025e-05, |
|
"loss": 0.9439, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.42498762171975574, |
|
"grad_norm": 1.144080638885498, |
|
"learning_rate": 2.935625210650489e-05, |
|
"loss": 0.9745, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.42911371513451063, |
|
"grad_norm": 3.2051868438720703, |
|
"learning_rate": 2.9145601617795752e-05, |
|
"loss": 0.9594, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.4332398085492656, |
|
"grad_norm": 1.2232369184494019, |
|
"learning_rate": 2.8934951129086618e-05, |
|
"loss": 0.9644, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.4373659019640205, |
|
"grad_norm": 1.3971831798553467, |
|
"learning_rate": 2.872430064037749e-05, |
|
"loss": 0.987, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.44149199537877537, |
|
"grad_norm": 1.1187039613723755, |
|
"learning_rate": 2.8513650151668352e-05, |
|
"loss": 0.9657, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.44561808879353026, |
|
"grad_norm": 1.1717453002929688, |
|
"learning_rate": 2.8302999662959217e-05, |
|
"loss": 0.9363, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.4497441822082852, |
|
"grad_norm": 1.4479399919509888, |
|
"learning_rate": 2.809234917425009e-05, |
|
"loss": 0.9428, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.4538702756230401, |
|
"grad_norm": 1.1537368297576904, |
|
"learning_rate": 2.788169868554095e-05, |
|
"loss": 0.9654, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.457996369037795, |
|
"grad_norm": 1.9704123735427856, |
|
"learning_rate": 2.7671048196831817e-05, |
|
"loss": 0.944, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.4621224624525499, |
|
"grad_norm": 1.3609466552734375, |
|
"learning_rate": 2.7460397708122682e-05, |
|
"loss": 0.9353, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.46624855586730485, |
|
"grad_norm": 1.3835324048995972, |
|
"learning_rate": 2.724974721941355e-05, |
|
"loss": 0.9238, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.47037464928205974, |
|
"grad_norm": 2.1749815940856934, |
|
"learning_rate": 2.7039096730704416e-05, |
|
"loss": 0.9215, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.47450074269681464, |
|
"grad_norm": 1.941735863685608, |
|
"learning_rate": 2.682844624199528e-05, |
|
"loss": 0.9607, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.4786268361115696, |
|
"grad_norm": 1.9667292833328247, |
|
"learning_rate": 2.661779575328615e-05, |
|
"loss": 0.9493, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.4827529295263245, |
|
"grad_norm": 1.1912260055541992, |
|
"learning_rate": 2.6407145264577016e-05, |
|
"loss": 0.9364, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.4868790229410794, |
|
"grad_norm": 1.2728015184402466, |
|
"learning_rate": 2.619649477586788e-05, |
|
"loss": 0.9135, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.49100511635583427, |
|
"grad_norm": 1.326409935951233, |
|
"learning_rate": 2.5985844287158746e-05, |
|
"loss": 0.9665, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.4951312097705892, |
|
"grad_norm": 1.4567406177520752, |
|
"learning_rate": 2.5775193798449615e-05, |
|
"loss": 0.9733, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.4992573031853441, |
|
"grad_norm": 1.3147661685943604, |
|
"learning_rate": 2.556454330974048e-05, |
|
"loss": 0.9204, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.503383396600099, |
|
"grad_norm": 1.6704838275909424, |
|
"learning_rate": 2.5353892821031346e-05, |
|
"loss": 0.9418, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.5075094900148539, |
|
"grad_norm": 1.2493371963500977, |
|
"learning_rate": 2.5143242332322215e-05, |
|
"loss": 0.9441, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.5116355834296088, |
|
"grad_norm": 1.2380743026733398, |
|
"learning_rate": 2.493259184361308e-05, |
|
"loss": 0.9642, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.5157616768443638, |
|
"grad_norm": 1.487196922302246, |
|
"learning_rate": 2.4721941354903942e-05, |
|
"loss": 0.986, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.5198877702591187, |
|
"grad_norm": 1.2720383405685425, |
|
"learning_rate": 2.451129086619481e-05, |
|
"loss": 0.9614, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.5240138636738736, |
|
"grad_norm": 1.3985182046890259, |
|
"learning_rate": 2.4300640377485676e-05, |
|
"loss": 0.9327, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.5281399570886285, |
|
"grad_norm": 1.2555489540100098, |
|
"learning_rate": 2.408998988877654e-05, |
|
"loss": 0.9331, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.5322660505033834, |
|
"grad_norm": 1.083095908164978, |
|
"learning_rate": 2.387933940006741e-05, |
|
"loss": 0.9706, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.5363921439181383, |
|
"grad_norm": 3.2246696949005127, |
|
"learning_rate": 2.3668688911358276e-05, |
|
"loss": 0.9267, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.5405182373328932, |
|
"grad_norm": 1.2211159467697144, |
|
"learning_rate": 2.345803842264914e-05, |
|
"loss": 0.9315, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.5446443307476482, |
|
"grad_norm": 1.3726495504379272, |
|
"learning_rate": 2.3247387933940006e-05, |
|
"loss": 0.9432, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.5487704241624031, |
|
"grad_norm": 1.0996991395950317, |
|
"learning_rate": 2.3036737445230875e-05, |
|
"loss": 0.9213, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.552896517577158, |
|
"grad_norm": 1.016136884689331, |
|
"learning_rate": 2.282608695652174e-05, |
|
"loss": 0.9625, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.5570226109919129, |
|
"grad_norm": 1.1178189516067505, |
|
"learning_rate": 2.2615436467812606e-05, |
|
"loss": 0.9419, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.5611487044066678, |
|
"grad_norm": 1.1706444025039673, |
|
"learning_rate": 2.2404785979103475e-05, |
|
"loss": 0.885, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.5652747978214226, |
|
"grad_norm": 1.4330129623413086, |
|
"learning_rate": 2.2194135490394337e-05, |
|
"loss": 0.9024, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.5694008912361775, |
|
"grad_norm": 2.2776172161102295, |
|
"learning_rate": 2.1983485001685205e-05, |
|
"loss": 0.933, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.5735269846509325, |
|
"grad_norm": 1.3359657526016235, |
|
"learning_rate": 2.177283451297607e-05, |
|
"loss": 0.8791, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.5776530780656874, |
|
"grad_norm": 1.1592367887496948, |
|
"learning_rate": 2.1562184024266936e-05, |
|
"loss": 0.9336, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.5817791714804423, |
|
"grad_norm": 1.052618145942688, |
|
"learning_rate": 2.13515335355578e-05, |
|
"loss": 0.9303, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.5859052648951972, |
|
"grad_norm": 1.2330833673477173, |
|
"learning_rate": 2.114088304684867e-05, |
|
"loss": 0.9247, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.5900313583099521, |
|
"grad_norm": 1.7336995601654053, |
|
"learning_rate": 2.0930232558139536e-05, |
|
"loss": 0.9078, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.594157451724707, |
|
"grad_norm": 1.1562308073043823, |
|
"learning_rate": 2.07195820694304e-05, |
|
"loss": 0.905, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.5982835451394619, |
|
"grad_norm": 1.3212171792984009, |
|
"learning_rate": 2.050893158072127e-05, |
|
"loss": 0.9457, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.6024096385542169, |
|
"grad_norm": 1.5021255016326904, |
|
"learning_rate": 2.0298281092012135e-05, |
|
"loss": 0.9213, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.6065357319689718, |
|
"grad_norm": 1.1142035722732544, |
|
"learning_rate": 2.0087630603303e-05, |
|
"loss": 0.8988, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.6106618253837267, |
|
"grad_norm": 1.0887188911437988, |
|
"learning_rate": 1.9876980114593866e-05, |
|
"loss": 0.9579, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.6147879187984816, |
|
"grad_norm": 1.5622923374176025, |
|
"learning_rate": 1.966632962588473e-05, |
|
"loss": 0.9206, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.6189140122132365, |
|
"grad_norm": 1.4978774785995483, |
|
"learning_rate": 1.94556791371756e-05, |
|
"loss": 0.9292, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.6230401056279914, |
|
"grad_norm": 1.1494709253311157, |
|
"learning_rate": 1.9245028648466465e-05, |
|
"loss": 0.929, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.6271661990427463, |
|
"grad_norm": 3.5858824253082275, |
|
"learning_rate": 1.903437815975733e-05, |
|
"loss": 0.9181, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.6312922924575013, |
|
"grad_norm": 0.927173376083374, |
|
"learning_rate": 1.8823727671048196e-05, |
|
"loss": 0.9365, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.6354183858722562, |
|
"grad_norm": 0.9943380355834961, |
|
"learning_rate": 1.8613077182339065e-05, |
|
"loss": 0.8974, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.6395444792870111, |
|
"grad_norm": 1.4820857048034668, |
|
"learning_rate": 1.840242669362993e-05, |
|
"loss": 0.9066, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.643670572701766, |
|
"grad_norm": 1.3542896509170532, |
|
"learning_rate": 1.8191776204920796e-05, |
|
"loss": 0.9048, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.6477966661165209, |
|
"grad_norm": 2.233414888381958, |
|
"learning_rate": 1.7981125716211664e-05, |
|
"loss": 0.899, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.6519227595312758, |
|
"grad_norm": 1.0770349502563477, |
|
"learning_rate": 1.777047522750253e-05, |
|
"loss": 0.9135, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.6560488529460307, |
|
"grad_norm": 1.1688830852508545, |
|
"learning_rate": 1.7559824738793395e-05, |
|
"loss": 0.8838, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.6601749463607856, |
|
"grad_norm": 1.096822738647461, |
|
"learning_rate": 1.734917425008426e-05, |
|
"loss": 0.9325, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.6643010397755406, |
|
"grad_norm": 1.4621776342391968, |
|
"learning_rate": 1.713852376137513e-05, |
|
"loss": 0.9299, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.6684271331902955, |
|
"grad_norm": 1.2400994300842285, |
|
"learning_rate": 1.692787327266599e-05, |
|
"loss": 0.8986, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.6725532266050503, |
|
"grad_norm": 1.3540397882461548, |
|
"learning_rate": 1.671722278395686e-05, |
|
"loss": 0.9084, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.6766793200198052, |
|
"grad_norm": 1.2045152187347412, |
|
"learning_rate": 1.6506572295247725e-05, |
|
"loss": 0.8943, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.6808054134345601, |
|
"grad_norm": 1.1521943807601929, |
|
"learning_rate": 1.629592180653859e-05, |
|
"loss": 0.9089, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.684931506849315, |
|
"grad_norm": 4.699136257171631, |
|
"learning_rate": 1.608527131782946e-05, |
|
"loss": 0.9169, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.6890576002640699, |
|
"grad_norm": 1.3759478330612183, |
|
"learning_rate": 1.5874620829120325e-05, |
|
"loss": 0.9154, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.6931836936788249, |
|
"grad_norm": 1.2098520994186401, |
|
"learning_rate": 1.566397034041119e-05, |
|
"loss": 0.9264, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.6973097870935798, |
|
"grad_norm": 1.6775233745574951, |
|
"learning_rate": 1.5453319851702056e-05, |
|
"loss": 0.9309, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.7014358805083347, |
|
"grad_norm": 1.0574172735214233, |
|
"learning_rate": 1.5242669362992923e-05, |
|
"loss": 0.8893, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.7055619739230896, |
|
"grad_norm": 1.035610318183899, |
|
"learning_rate": 1.503201887428379e-05, |
|
"loss": 0.9243, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.7096880673378445, |
|
"grad_norm": 1.6291944980621338, |
|
"learning_rate": 1.4821368385574655e-05, |
|
"loss": 0.9158, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.7138141607525994, |
|
"grad_norm": 1.2090740203857422, |
|
"learning_rate": 1.4610717896865522e-05, |
|
"loss": 0.9026, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.7179402541673543, |
|
"grad_norm": 1.2179425954818726, |
|
"learning_rate": 1.4400067408156388e-05, |
|
"loss": 0.8943, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.7220663475821093, |
|
"grad_norm": 1.2382631301879883, |
|
"learning_rate": 1.4189416919447255e-05, |
|
"loss": 0.9021, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.7261924409968642, |
|
"grad_norm": 2.4923956394195557, |
|
"learning_rate": 1.3978766430738118e-05, |
|
"loss": 0.9348, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.7303185344116191, |
|
"grad_norm": 2.419496774673462, |
|
"learning_rate": 1.3768115942028985e-05, |
|
"loss": 0.9039, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.734444627826374, |
|
"grad_norm": 1.2352160215377808, |
|
"learning_rate": 1.3557465453319854e-05, |
|
"loss": 0.9266, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.7385707212411289, |
|
"grad_norm": 1.0967360734939575, |
|
"learning_rate": 1.3346814964610718e-05, |
|
"loss": 0.9046, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.7426968146558838, |
|
"grad_norm": 1.0056049823760986, |
|
"learning_rate": 1.3136164475901585e-05, |
|
"loss": 0.9321, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.7468229080706387, |
|
"grad_norm": 1.9823698997497559, |
|
"learning_rate": 1.292551398719245e-05, |
|
"loss": 0.9151, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.7509490014853937, |
|
"grad_norm": 1.651145577430725, |
|
"learning_rate": 1.2714863498483317e-05, |
|
"loss": 0.904, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.7550750949001486, |
|
"grad_norm": 0.9505665302276611, |
|
"learning_rate": 1.2504213009774183e-05, |
|
"loss": 0.878, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.7592011883149035, |
|
"grad_norm": 1.558278203010559, |
|
"learning_rate": 1.229356252106505e-05, |
|
"loss": 0.942, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.7633272817296584, |
|
"grad_norm": 1.2101174592971802, |
|
"learning_rate": 1.2082912032355915e-05, |
|
"loss": 0.9034, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.7674533751444133, |
|
"grad_norm": 1.2382097244262695, |
|
"learning_rate": 1.1872261543646782e-05, |
|
"loss": 0.9119, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.7715794685591681, |
|
"grad_norm": 1.1424338817596436, |
|
"learning_rate": 1.1661611054937648e-05, |
|
"loss": 0.9282, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.775705561973923, |
|
"grad_norm": 1.0747746229171753, |
|
"learning_rate": 1.1450960566228513e-05, |
|
"loss": 0.9144, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.779831655388678, |
|
"grad_norm": 1.4378238916397095, |
|
"learning_rate": 1.1240310077519382e-05, |
|
"loss": 0.9292, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.7839577488034329, |
|
"grad_norm": 1.5118451118469238, |
|
"learning_rate": 1.1029659588810247e-05, |
|
"loss": 0.8532, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.7880838422181878, |
|
"grad_norm": 1.135190725326538, |
|
"learning_rate": 1.0819009100101113e-05, |
|
"loss": 0.9132, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.7922099356329427, |
|
"grad_norm": 1.3497545719146729, |
|
"learning_rate": 1.060835861139198e-05, |
|
"loss": 0.9214, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.7963360290476976, |
|
"grad_norm": 1.3251924514770508, |
|
"learning_rate": 1.0397708122682845e-05, |
|
"loss": 0.8942, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.8004621224624525, |
|
"grad_norm": 2.453803539276123, |
|
"learning_rate": 1.018705763397371e-05, |
|
"loss": 0.8858, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.8045882158772074, |
|
"grad_norm": 1.1651134490966797, |
|
"learning_rate": 9.976407145264577e-06, |
|
"loss": 0.9012, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.8087143092919624, |
|
"grad_norm": 2.257159471511841, |
|
"learning_rate": 9.765756656555444e-06, |
|
"loss": 0.9167, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.8128404027067173, |
|
"grad_norm": 0.9240596294403076, |
|
"learning_rate": 9.55510616784631e-06, |
|
"loss": 0.8764, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.8169664961214722, |
|
"grad_norm": 1.2550618648529053, |
|
"learning_rate": 9.344455679137177e-06, |
|
"loss": 0.8998, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.8210925895362271, |
|
"grad_norm": 1.2276984453201294, |
|
"learning_rate": 9.133805190428042e-06, |
|
"loss": 0.909, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.825218682950982, |
|
"grad_norm": 1.0953816175460815, |
|
"learning_rate": 8.923154701718908e-06, |
|
"loss": 0.8931, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.8293447763657369, |
|
"grad_norm": 1.469269037246704, |
|
"learning_rate": 8.712504213009775e-06, |
|
"loss": 0.8789, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.8334708697804918, |
|
"grad_norm": 1.242390751838684, |
|
"learning_rate": 8.50185372430064e-06, |
|
"loss": 0.9126, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.8375969631952468, |
|
"grad_norm": 1.0811703205108643, |
|
"learning_rate": 8.291203235591507e-06, |
|
"loss": 0.913, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.8417230566100017, |
|
"grad_norm": 1.0523350238800049, |
|
"learning_rate": 8.080552746882374e-06, |
|
"loss": 0.9118, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.8458491500247566, |
|
"grad_norm": 1.4592727422714233, |
|
"learning_rate": 7.86990225817324e-06, |
|
"loss": 0.9099, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.8499752434395115, |
|
"grad_norm": 1.0648339986801147, |
|
"learning_rate": 7.659251769464105e-06, |
|
"loss": 0.9198, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.8541013368542664, |
|
"grad_norm": 1.3053339719772339, |
|
"learning_rate": 7.448601280754971e-06, |
|
"loss": 0.9156, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.8582274302690213, |
|
"grad_norm": 1.0929012298583984, |
|
"learning_rate": 7.237950792045837e-06, |
|
"loss": 0.9103, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.8623535236837762, |
|
"grad_norm": 1.234263300895691, |
|
"learning_rate": 7.027300303336704e-06, |
|
"loss": 0.9016, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.8664796170985312, |
|
"grad_norm": 1.3241745233535767, |
|
"learning_rate": 6.816649814627571e-06, |
|
"loss": 0.9075, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.8706057105132861, |
|
"grad_norm": 1.2847357988357544, |
|
"learning_rate": 6.605999325918437e-06, |
|
"loss": 0.9462, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.874731803928041, |
|
"grad_norm": 1.1206868886947632, |
|
"learning_rate": 6.395348837209303e-06, |
|
"loss": 0.9236, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.8788578973427958, |
|
"grad_norm": 1.1748895645141602, |
|
"learning_rate": 6.1846983485001685e-06, |
|
"loss": 0.8521, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.8829839907575507, |
|
"grad_norm": 1.571519136428833, |
|
"learning_rate": 5.974047859791035e-06, |
|
"loss": 0.8811, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.8871100841723056, |
|
"grad_norm": 1.051316738128662, |
|
"learning_rate": 5.763397371081901e-06, |
|
"loss": 0.9167, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.8912361775870605, |
|
"grad_norm": 2.111393690109253, |
|
"learning_rate": 5.552746882372767e-06, |
|
"loss": 0.9333, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.8953622710018155, |
|
"grad_norm": 1.3969411849975586, |
|
"learning_rate": 5.342096393663633e-06, |
|
"loss": 0.9434, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.8994883644165704, |
|
"grad_norm": 1.7783890962600708, |
|
"learning_rate": 5.1314459049545e-06, |
|
"loss": 0.8947, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.9036144578313253, |
|
"grad_norm": 1.359174132347107, |
|
"learning_rate": 4.920795416245366e-06, |
|
"loss": 0.8815, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.9077405512460802, |
|
"grad_norm": 1.257117748260498, |
|
"learning_rate": 4.710144927536232e-06, |
|
"loss": 0.8986, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.9118666446608351, |
|
"grad_norm": 0.9748762845993042, |
|
"learning_rate": 4.499494438827098e-06, |
|
"loss": 0.887, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.91599273807559, |
|
"grad_norm": 1.5360727310180664, |
|
"learning_rate": 4.2888439501179645e-06, |
|
"loss": 0.9051, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.9201188314903449, |
|
"grad_norm": 1.0747774839401245, |
|
"learning_rate": 4.078193461408831e-06, |
|
"loss": 0.9498, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.9242449249050998, |
|
"grad_norm": 1.197403073310852, |
|
"learning_rate": 3.867542972699697e-06, |
|
"loss": 0.925, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.9283710183198548, |
|
"grad_norm": 1.580825924873352, |
|
"learning_rate": 3.6568924839905627e-06, |
|
"loss": 0.9019, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.9324971117346097, |
|
"grad_norm": 1.2338446378707886, |
|
"learning_rate": 3.4462419952814294e-06, |
|
"loss": 0.892, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.9366232051493646, |
|
"grad_norm": 2.6846702098846436, |
|
"learning_rate": 3.2355915065722956e-06, |
|
"loss": 0.9121, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.9407492985641195, |
|
"grad_norm": 1.0765039920806885, |
|
"learning_rate": 3.0249410178631614e-06, |
|
"loss": 0.9101, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.9448753919788744, |
|
"grad_norm": 1.2273006439208984, |
|
"learning_rate": 2.814290529154028e-06, |
|
"loss": 0.8916, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.9490014853936293, |
|
"grad_norm": 1.295823574066162, |
|
"learning_rate": 2.603640040444894e-06, |
|
"loss": 0.8958, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.9531275788083842, |
|
"grad_norm": 1.5502872467041016, |
|
"learning_rate": 2.39298955173576e-06, |
|
"loss": 0.9069, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.9572536722231392, |
|
"grad_norm": 2.524392604827881, |
|
"learning_rate": 2.1823390630266263e-06, |
|
"loss": 0.9301, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.9613797656378941, |
|
"grad_norm": 1.5065919160842896, |
|
"learning_rate": 1.9716885743174925e-06, |
|
"loss": 0.8903, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.965505859052649, |
|
"grad_norm": 1.1356451511383057, |
|
"learning_rate": 1.761038085608359e-06, |
|
"loss": 0.881, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.9696319524674039, |
|
"grad_norm": 6.048961162567139, |
|
"learning_rate": 1.550387596899225e-06, |
|
"loss": 0.9164, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.9737580458821588, |
|
"grad_norm": 1.1151750087738037, |
|
"learning_rate": 1.339737108190091e-06, |
|
"loss": 0.8703, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.9778841392969136, |
|
"grad_norm": 1.0021706819534302, |
|
"learning_rate": 1.1290866194809571e-06, |
|
"loss": 0.8978, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.9820102327116685, |
|
"grad_norm": 1.1401609182357788, |
|
"learning_rate": 9.184361307718234e-07, |
|
"loss": 0.9159, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.9861363261264235, |
|
"grad_norm": 1.0629512071609497, |
|
"learning_rate": 7.077856420626896e-07, |
|
"loss": 0.9073, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.9902624195411784, |
|
"grad_norm": 1.0977869033813477, |
|
"learning_rate": 4.971351533535558e-07, |
|
"loss": 0.9178, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.9943885129559333, |
|
"grad_norm": 1.281014323234558, |
|
"learning_rate": 2.8648466464442196e-07, |
|
"loss": 0.8876, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.9985146063706882, |
|
"grad_norm": 1.2343029975891113, |
|
"learning_rate": 7.583417593528817e-08, |
|
"loss": 0.8733, |
|
"step": 24200 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 24236, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.002725362748621e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|