|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1036, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.009652509652509652, |
|
"grad_norm": 1.4631391763687134, |
|
"learning_rate": 0.0001, |
|
"loss": 0.664, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.019305019305019305, |
|
"grad_norm": 1.8917441368103027, |
|
"learning_rate": 0.0002, |
|
"loss": 0.1575, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02895752895752896, |
|
"grad_norm": 1.5227961540222168, |
|
"learning_rate": 0.00019803149606299213, |
|
"loss": 0.1233, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03861003861003861, |
|
"grad_norm": 0.9390913248062134, |
|
"learning_rate": 0.00019606299212598428, |
|
"loss": 0.1106, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04826254826254826, |
|
"grad_norm": 0.8157206177711487, |
|
"learning_rate": 0.0001940944881889764, |
|
"loss": 0.0869, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05791505791505792, |
|
"grad_norm": 0.5291563868522644, |
|
"learning_rate": 0.0001921259842519685, |
|
"loss": 0.0903, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06756756756756757, |
|
"grad_norm": 1.155928373336792, |
|
"learning_rate": 0.00019015748031496065, |
|
"loss": 0.1186, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07722007722007722, |
|
"grad_norm": 0.5892627239227295, |
|
"learning_rate": 0.00018818897637795277, |
|
"loss": 0.0914, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.08687258687258688, |
|
"grad_norm": 0.5989981889724731, |
|
"learning_rate": 0.0001862204724409449, |
|
"loss": 0.0917, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.09652509652509653, |
|
"grad_norm": 0.9171364903450012, |
|
"learning_rate": 0.000184251968503937, |
|
"loss": 0.0888, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10617760617760617, |
|
"grad_norm": 0.37568220496177673, |
|
"learning_rate": 0.00018228346456692916, |
|
"loss": 0.0865, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.11583011583011583, |
|
"grad_norm": 0.5606808662414551, |
|
"learning_rate": 0.00018031496062992125, |
|
"loss": 0.0898, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.12548262548262548, |
|
"grad_norm": 0.6872594356536865, |
|
"learning_rate": 0.00017834645669291338, |
|
"loss": 0.0851, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.13513513513513514, |
|
"grad_norm": 0.6313247680664062, |
|
"learning_rate": 0.00017637795275590552, |
|
"loss": 0.088, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.14478764478764478, |
|
"grad_norm": 0.7841348648071289, |
|
"learning_rate": 0.00017440944881889765, |
|
"loss": 0.0966, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.15444015444015444, |
|
"grad_norm": 0.8097236752510071, |
|
"learning_rate": 0.00017244094488188977, |
|
"loss": 0.0626, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1640926640926641, |
|
"grad_norm": 0.7307904362678528, |
|
"learning_rate": 0.00017047244094488192, |
|
"loss": 0.084, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.17374517374517376, |
|
"grad_norm": 0.5725339651107788, |
|
"learning_rate": 0.000168503937007874, |
|
"loss": 0.071, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1833976833976834, |
|
"grad_norm": 0.7641273736953735, |
|
"learning_rate": 0.00016653543307086613, |
|
"loss": 0.0958, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.19305019305019305, |
|
"grad_norm": 0.6539800763130188, |
|
"learning_rate": 0.00016456692913385828, |
|
"loss": 0.0803, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.20270270270270271, |
|
"grad_norm": 0.6908950805664062, |
|
"learning_rate": 0.0001625984251968504, |
|
"loss": 0.0885, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.21235521235521235, |
|
"grad_norm": 0.31019431352615356, |
|
"learning_rate": 0.00016062992125984252, |
|
"loss": 0.0588, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.222007722007722, |
|
"grad_norm": 0.7413542866706848, |
|
"learning_rate": 0.00015866141732283467, |
|
"loss": 0.0827, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.23166023166023167, |
|
"grad_norm": 0.7324005365371704, |
|
"learning_rate": 0.0001566929133858268, |
|
"loss": 0.0926, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2413127413127413, |
|
"grad_norm": 0.5140088796615601, |
|
"learning_rate": 0.0001547244094488189, |
|
"loss": 0.0768, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.25096525096525096, |
|
"grad_norm": 0.3749833405017853, |
|
"learning_rate": 0.00015275590551181104, |
|
"loss": 0.0741, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2606177606177606, |
|
"grad_norm": 0.5479316711425781, |
|
"learning_rate": 0.00015078740157480316, |
|
"loss": 0.068, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2702702702702703, |
|
"grad_norm": 0.5078131556510925, |
|
"learning_rate": 0.00014881889763779528, |
|
"loss": 0.0763, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.2799227799227799, |
|
"grad_norm": 0.5565307140350342, |
|
"learning_rate": 0.0001468503937007874, |
|
"loss": 0.0657, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.28957528957528955, |
|
"grad_norm": 0.5333523750305176, |
|
"learning_rate": 0.00014488188976377955, |
|
"loss": 0.0467, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29922779922779924, |
|
"grad_norm": 0.9170491695404053, |
|
"learning_rate": 0.00014291338582677165, |
|
"loss": 0.0677, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.3088803088803089, |
|
"grad_norm": 0.6521899104118347, |
|
"learning_rate": 0.00014094488188976377, |
|
"loss": 0.0692, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.3185328185328185, |
|
"grad_norm": 0.5061705708503723, |
|
"learning_rate": 0.00013897637795275592, |
|
"loss": 0.086, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.3281853281853282, |
|
"grad_norm": 0.643752932548523, |
|
"learning_rate": 0.00013700787401574804, |
|
"loss": 0.0621, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.33783783783783783, |
|
"grad_norm": 0.5780313014984131, |
|
"learning_rate": 0.00013503937007874016, |
|
"loss": 0.0691, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.3474903474903475, |
|
"grad_norm": 0.36350390315055847, |
|
"learning_rate": 0.0001330708661417323, |
|
"loss": 0.0679, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.35714285714285715, |
|
"grad_norm": 0.6011677384376526, |
|
"learning_rate": 0.0001311023622047244, |
|
"loss": 0.0707, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3667953667953668, |
|
"grad_norm": 0.5352160930633545, |
|
"learning_rate": 0.00012913385826771653, |
|
"loss": 0.0543, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.3764478764478765, |
|
"grad_norm": 0.4793628752231598, |
|
"learning_rate": 0.00012716535433070867, |
|
"loss": 0.058, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3861003861003861, |
|
"grad_norm": 0.6923650503158569, |
|
"learning_rate": 0.0001251968503937008, |
|
"loss": 0.0691, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.39575289575289574, |
|
"grad_norm": 0.49190232157707214, |
|
"learning_rate": 0.00012322834645669292, |
|
"loss": 0.0579, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.40540540540540543, |
|
"grad_norm": 0.23852688074111938, |
|
"learning_rate": 0.00012125984251968505, |
|
"loss": 0.0524, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.41505791505791506, |
|
"grad_norm": 0.6561082005500793, |
|
"learning_rate": 0.00011929133858267719, |
|
"loss": 0.057, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.4247104247104247, |
|
"grad_norm": 0.613944411277771, |
|
"learning_rate": 0.00011732283464566928, |
|
"loss": 0.0575, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.4343629343629344, |
|
"grad_norm": 0.3899982273578644, |
|
"learning_rate": 0.00011535433070866142, |
|
"loss": 0.0527, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.444015444015444, |
|
"grad_norm": 0.5278599262237549, |
|
"learning_rate": 0.00011338582677165355, |
|
"loss": 0.0589, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.45366795366795365, |
|
"grad_norm": 0.29473400115966797, |
|
"learning_rate": 0.00011141732283464567, |
|
"loss": 0.0271, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.46332046332046334, |
|
"grad_norm": 0.6821677088737488, |
|
"learning_rate": 0.00010944881889763781, |
|
"loss": 0.0772, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.47297297297297297, |
|
"grad_norm": 0.3420783281326294, |
|
"learning_rate": 0.00010748031496062993, |
|
"loss": 0.0543, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.4826254826254826, |
|
"grad_norm": 0.5966827869415283, |
|
"learning_rate": 0.00010551181102362204, |
|
"loss": 0.0552, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4922779922779923, |
|
"grad_norm": 0.656173825263977, |
|
"learning_rate": 0.00010354330708661417, |
|
"loss": 0.0692, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.5019305019305019, |
|
"grad_norm": 0.46832337975502014, |
|
"learning_rate": 0.0001015748031496063, |
|
"loss": 0.0456, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.5115830115830116, |
|
"grad_norm": 0.5552840828895569, |
|
"learning_rate": 9.960629921259843e-05, |
|
"loss": 0.0581, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.5212355212355212, |
|
"grad_norm": 0.7662914395332336, |
|
"learning_rate": 9.763779527559055e-05, |
|
"loss": 0.0542, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.5308880308880309, |
|
"grad_norm": 0.5977205634117126, |
|
"learning_rate": 9.566929133858268e-05, |
|
"loss": 0.0432, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5405405405405406, |
|
"grad_norm": 0.41738152503967285, |
|
"learning_rate": 9.370078740157481e-05, |
|
"loss": 0.0606, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5501930501930502, |
|
"grad_norm": 0.3176082670688629, |
|
"learning_rate": 9.173228346456693e-05, |
|
"loss": 0.053, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.5598455598455598, |
|
"grad_norm": 0.5319856405258179, |
|
"learning_rate": 8.976377952755905e-05, |
|
"loss": 0.0548, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5694980694980695, |
|
"grad_norm": 0.6625571250915527, |
|
"learning_rate": 8.779527559055119e-05, |
|
"loss": 0.0735, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.5791505791505791, |
|
"grad_norm": 0.6264726519584656, |
|
"learning_rate": 8.582677165354331e-05, |
|
"loss": 0.0574, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5888030888030888, |
|
"grad_norm": 0.49182403087615967, |
|
"learning_rate": 8.385826771653543e-05, |
|
"loss": 0.046, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5984555984555985, |
|
"grad_norm": 0.712940514087677, |
|
"learning_rate": 8.188976377952757e-05, |
|
"loss": 0.0451, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.6081081081081081, |
|
"grad_norm": 0.5095399022102356, |
|
"learning_rate": 7.992125984251969e-05, |
|
"loss": 0.0527, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.6177606177606177, |
|
"grad_norm": 0.4520854949951172, |
|
"learning_rate": 7.795275590551181e-05, |
|
"loss": 0.0498, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6274131274131274, |
|
"grad_norm": 0.7156594395637512, |
|
"learning_rate": 7.598425196850393e-05, |
|
"loss": 0.0482, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.637065637065637, |
|
"grad_norm": 0.3221661150455475, |
|
"learning_rate": 7.401574803149607e-05, |
|
"loss": 0.0399, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.6467181467181468, |
|
"grad_norm": 0.22723270952701569, |
|
"learning_rate": 7.20472440944882e-05, |
|
"loss": 0.0388, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.6563706563706564, |
|
"grad_norm": 0.5212529301643372, |
|
"learning_rate": 7.007874015748031e-05, |
|
"loss": 0.0389, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.666023166023166, |
|
"grad_norm": 0.5126072764396667, |
|
"learning_rate": 6.811023622047245e-05, |
|
"loss": 0.0525, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.6756756756756757, |
|
"grad_norm": 0.7248953580856323, |
|
"learning_rate": 6.614173228346457e-05, |
|
"loss": 0.0619, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6853281853281853, |
|
"grad_norm": 0.3085402548313141, |
|
"learning_rate": 6.417322834645669e-05, |
|
"loss": 0.053, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.694980694980695, |
|
"grad_norm": 0.3684390187263489, |
|
"learning_rate": 6.220472440944882e-05, |
|
"loss": 0.0352, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.7046332046332047, |
|
"grad_norm": 0.5736101269721985, |
|
"learning_rate": 6.0236220472440953e-05, |
|
"loss": 0.0524, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 0.24105365574359894, |
|
"learning_rate": 5.826771653543307e-05, |
|
"loss": 0.0361, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7239382239382239, |
|
"grad_norm": 0.4142036736011505, |
|
"learning_rate": 5.62992125984252e-05, |
|
"loss": 0.0423, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7335907335907336, |
|
"grad_norm": 0.43454453349113464, |
|
"learning_rate": 5.433070866141733e-05, |
|
"loss": 0.0573, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.7432432432432432, |
|
"grad_norm": 0.6893749833106995, |
|
"learning_rate": 5.236220472440945e-05, |
|
"loss": 0.0578, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.752895752895753, |
|
"grad_norm": 0.38158169388771057, |
|
"learning_rate": 5.0393700787401575e-05, |
|
"loss": 0.042, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.7625482625482626, |
|
"grad_norm": 0.6105143427848816, |
|
"learning_rate": 4.84251968503937e-05, |
|
"loss": 0.0632, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.7722007722007722, |
|
"grad_norm": 0.28586989641189575, |
|
"learning_rate": 4.645669291338583e-05, |
|
"loss": 0.0453, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.7818532818532818, |
|
"grad_norm": 0.4869031012058258, |
|
"learning_rate": 4.4488188976377954e-05, |
|
"loss": 0.0426, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.7915057915057915, |
|
"grad_norm": 0.09411308914422989, |
|
"learning_rate": 4.251968503937008e-05, |
|
"loss": 0.034, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.8011583011583011, |
|
"grad_norm": 0.43962639570236206, |
|
"learning_rate": 4.0551181102362204e-05, |
|
"loss": 0.0528, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.8108108108108109, |
|
"grad_norm": 0.33696189522743225, |
|
"learning_rate": 3.858267716535433e-05, |
|
"loss": 0.0331, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.8204633204633205, |
|
"grad_norm": 0.4021511673927307, |
|
"learning_rate": 3.661417322834646e-05, |
|
"loss": 0.0406, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.8301158301158301, |
|
"grad_norm": 0.6140969395637512, |
|
"learning_rate": 3.464566929133858e-05, |
|
"loss": 0.0466, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.8397683397683398, |
|
"grad_norm": 0.4614850878715515, |
|
"learning_rate": 3.2677165354330704e-05, |
|
"loss": 0.0285, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.8494208494208494, |
|
"grad_norm": 0.6398894786834717, |
|
"learning_rate": 3.070866141732284e-05, |
|
"loss": 0.0515, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.859073359073359, |
|
"grad_norm": 0.3512854278087616, |
|
"learning_rate": 2.874015748031496e-05, |
|
"loss": 0.0336, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.8687258687258688, |
|
"grad_norm": 0.2678048610687256, |
|
"learning_rate": 2.677165354330709e-05, |
|
"loss": 0.0286, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.8783783783783784, |
|
"grad_norm": 0.5848315358161926, |
|
"learning_rate": 2.4803149606299215e-05, |
|
"loss": 0.0384, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.888030888030888, |
|
"grad_norm": 0.2881620228290558, |
|
"learning_rate": 2.283464566929134e-05, |
|
"loss": 0.0424, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.8976833976833977, |
|
"grad_norm": 0.3219210207462311, |
|
"learning_rate": 2.0866141732283465e-05, |
|
"loss": 0.0488, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.9073359073359073, |
|
"grad_norm": 0.408877432346344, |
|
"learning_rate": 1.889763779527559e-05, |
|
"loss": 0.0345, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.916988416988417, |
|
"grad_norm": 0.20342448353767395, |
|
"learning_rate": 1.692913385826772e-05, |
|
"loss": 0.049, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.9266409266409267, |
|
"grad_norm": 0.4167528748512268, |
|
"learning_rate": 1.4960629921259845e-05, |
|
"loss": 0.043, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.9362934362934363, |
|
"grad_norm": 0.49826258420944214, |
|
"learning_rate": 1.2992125984251968e-05, |
|
"loss": 0.0412, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.9459459459459459, |
|
"grad_norm": 0.5426783561706543, |
|
"learning_rate": 1.1023622047244095e-05, |
|
"loss": 0.0538, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.9555984555984556, |
|
"grad_norm": 0.170461505651474, |
|
"learning_rate": 9.055118110236222e-06, |
|
"loss": 0.0483, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.9652509652509652, |
|
"grad_norm": 0.3618116080760956, |
|
"learning_rate": 7.086614173228347e-06, |
|
"loss": 0.038, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.974903474903475, |
|
"grad_norm": 0.417107492685318, |
|
"learning_rate": 5.118110236220473e-06, |
|
"loss": 0.0373, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.9845559845559846, |
|
"grad_norm": 0.4384624660015106, |
|
"learning_rate": 3.1496062992125985e-06, |
|
"loss": 0.0534, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.9942084942084942, |
|
"grad_norm": 0.7793737649917603, |
|
"learning_rate": 1.1811023622047244e-06, |
|
"loss": 0.0406, |
|
"step": 1030 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1036, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0789424666431488e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|