|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.013019612744638524, |
|
"eval_steps": 500, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0002603922548927705, |
|
"grad_norm": 0.8521247506141663, |
|
"learning_rate": 5.194805194805195e-06, |
|
"loss": 0.7412, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.000520784509785541, |
|
"grad_norm": 0.6229312419891357, |
|
"learning_rate": 1.038961038961039e-05, |
|
"loss": 0.7138, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0007811767646783114, |
|
"grad_norm": 0.4566498100757599, |
|
"learning_rate": 1.5584415584415583e-05, |
|
"loss": 0.7079, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.001041569019571082, |
|
"grad_norm": 0.4316692650318146, |
|
"learning_rate": 2.077922077922078e-05, |
|
"loss": 0.6988, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0013019612744638524, |
|
"grad_norm": 0.615436315536499, |
|
"learning_rate": 2.5974025974025972e-05, |
|
"loss": 0.6937, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0015623535293566228, |
|
"grad_norm": 0.48698583245277405, |
|
"learning_rate": 3.1168831168831166e-05, |
|
"loss": 0.7043, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0018227457842493933, |
|
"grad_norm": 0.3984021544456482, |
|
"learning_rate": 3.6363636363636364e-05, |
|
"loss": 0.6563, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.002083138039142164, |
|
"grad_norm": 0.37576180696487427, |
|
"learning_rate": 4.155844155844156e-05, |
|
"loss": 0.6462, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0023435302940349343, |
|
"grad_norm": 0.35269680619239807, |
|
"learning_rate": 4.675324675324675e-05, |
|
"loss": 0.6656, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0026039225489277048, |
|
"grad_norm": 0.31541451811790466, |
|
"learning_rate": 5.1948051948051944e-05, |
|
"loss": 0.6547, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.002864314803820475, |
|
"grad_norm": 0.3462330400943756, |
|
"learning_rate": 5.714285714285714e-05, |
|
"loss": 0.6621, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0031247070587132456, |
|
"grad_norm": 0.3465985953807831, |
|
"learning_rate": 6.233766233766233e-05, |
|
"loss": 0.6273, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.003385099313606016, |
|
"grad_norm": 0.3297797441482544, |
|
"learning_rate": 6.753246753246754e-05, |
|
"loss": 0.6559, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0036454915684987865, |
|
"grad_norm": 0.3888818621635437, |
|
"learning_rate": 7.272727272727273e-05, |
|
"loss": 0.6756, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.003905883823391557, |
|
"grad_norm": 0.3542368710041046, |
|
"learning_rate": 7.792207792207793e-05, |
|
"loss": 0.6506, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.004166276078284328, |
|
"grad_norm": 0.37369370460510254, |
|
"learning_rate": 8.311688311688312e-05, |
|
"loss": 0.6645, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.004426668333177098, |
|
"grad_norm": 0.3700549900531769, |
|
"learning_rate": 8.831168831168831e-05, |
|
"loss": 0.6727, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.004687060588069869, |
|
"grad_norm": 0.32032889127731323, |
|
"learning_rate": 9.35064935064935e-05, |
|
"loss": 0.6529, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.004947452842962639, |
|
"grad_norm": 0.3331650495529175, |
|
"learning_rate": 9.870129870129871e-05, |
|
"loss": 0.6627, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.0052078450978554095, |
|
"grad_norm": 0.3300645351409912, |
|
"learning_rate": 0.00010389610389610389, |
|
"loss": 0.676, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0054682373527481795, |
|
"grad_norm": 0.350356787443161, |
|
"learning_rate": 0.00010909090909090909, |
|
"loss": 0.6564, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.00572862960764095, |
|
"grad_norm": 0.382756769657135, |
|
"learning_rate": 0.00011428571428571428, |
|
"loss": 0.6243, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.00598902186253372, |
|
"grad_norm": 0.34450188279151917, |
|
"learning_rate": 0.00011948051948051949, |
|
"loss": 0.6611, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.006249414117426491, |
|
"grad_norm": 0.3705821633338928, |
|
"learning_rate": 0.00012467532467532467, |
|
"loss": 0.6384, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.006509806372319262, |
|
"grad_norm": 0.36822304129600525, |
|
"learning_rate": 0.00012987012987012987, |
|
"loss": 0.6415, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.006770198627212032, |
|
"grad_norm": 0.32358303666114807, |
|
"learning_rate": 0.00013506493506493507, |
|
"loss": 0.6584, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.007030590882104803, |
|
"grad_norm": 0.33386844396591187, |
|
"learning_rate": 0.00014025974025974028, |
|
"loss": 0.6702, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.007290983136997573, |
|
"grad_norm": 0.32447949051856995, |
|
"learning_rate": 0.00014545454545454546, |
|
"loss": 0.6519, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.007551375391890344, |
|
"grad_norm": 0.3388073146343231, |
|
"learning_rate": 0.00015064935064935066, |
|
"loss": 0.6735, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.007811767646783114, |
|
"grad_norm": 0.39655518531799316, |
|
"learning_rate": 0.00015584415584415587, |
|
"loss": 0.672, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.008072159901675884, |
|
"grad_norm": 0.41258928179740906, |
|
"learning_rate": 0.00016103896103896104, |
|
"loss": 0.6626, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.008332552156568656, |
|
"grad_norm": 0.3963010311126709, |
|
"learning_rate": 0.00016623376623376625, |
|
"loss": 0.6653, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.008592944411461426, |
|
"grad_norm": 0.3641106188297272, |
|
"learning_rate": 0.00017142857142857143, |
|
"loss": 0.6389, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.008853336666354196, |
|
"grad_norm": 0.38745763897895813, |
|
"learning_rate": 0.00017662337662337663, |
|
"loss": 0.6928, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.009113728921246966, |
|
"grad_norm": 0.4573372006416321, |
|
"learning_rate": 0.00018181818181818183, |
|
"loss": 0.6679, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.009374121176139737, |
|
"grad_norm": 0.45714282989501953, |
|
"learning_rate": 0.000187012987012987, |
|
"loss": 0.6453, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.009634513431032507, |
|
"grad_norm": 0.37631818652153015, |
|
"learning_rate": 0.00019220779220779222, |
|
"loss": 0.6467, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.009894905685925277, |
|
"grad_norm": 0.3658345639705658, |
|
"learning_rate": 0.00019740259740259742, |
|
"loss": 0.6631, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.010155297940818049, |
|
"grad_norm": 0.3953540623188019, |
|
"learning_rate": 0.00019999996515752773, |
|
"loss": 0.6573, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.010415690195710819, |
|
"grad_norm": 0.377763569355011, |
|
"learning_rate": 0.00019999968641789507, |
|
"loss": 0.6664, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.010676082450603589, |
|
"grad_norm": 0.37128835916519165, |
|
"learning_rate": 0.0001999991289394067, |
|
"loss": 0.6342, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.010936474705496359, |
|
"grad_norm": 0.33881694078445435, |
|
"learning_rate": 0.00019999829272361654, |
|
"loss": 0.6476, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.01119686696038913, |
|
"grad_norm": 0.39774075150489807, |
|
"learning_rate": 0.00019999717777285545, |
|
"loss": 0.633, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.0114572592152819, |
|
"grad_norm": 0.41350051760673523, |
|
"learning_rate": 0.00019999578409023126, |
|
"loss": 0.6541, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.01171765147017467, |
|
"grad_norm": 0.47954171895980835, |
|
"learning_rate": 0.00019999411167962868, |
|
"loss": 0.6545, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.01197804372506744, |
|
"grad_norm": 0.46860000491142273, |
|
"learning_rate": 0.00019999216054570942, |
|
"loss": 0.6512, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.012238435979960213, |
|
"grad_norm": 0.4395809471607208, |
|
"learning_rate": 0.00019998993069391205, |
|
"loss": 0.6587, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.012498828234852983, |
|
"grad_norm": 0.43222516775131226, |
|
"learning_rate": 0.00019998742213045206, |
|
"loss": 0.6292, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.012759220489745753, |
|
"grad_norm": 0.39363613724708557, |
|
"learning_rate": 0.00019998463486232179, |
|
"loss": 0.6319, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.013019612744638524, |
|
"grad_norm": 0.4984697699546814, |
|
"learning_rate": 0.0001999815688972905, |
|
"loss": 0.6488, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 19202, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.8290205089792e+17, |
|
"train_batch_size": 3, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|