|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.8808777429467085, |
|
"eval_steps": 50, |
|
"global_step": 1200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07836990595611286, |
|
"grad_norm": 0.9672619104385376, |
|
"learning_rate": 2.413127413127413e-05, |
|
"loss": 0.5333, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07836990595611286, |
|
"eval_loss": 0.21812455356121063, |
|
"eval_runtime": 136.5294, |
|
"eval_samples_per_second": 5.23, |
|
"eval_steps_per_second": 0.659, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15673981191222572, |
|
"grad_norm": 0.8239838480949402, |
|
"learning_rate": 2.3166023166023168e-05, |
|
"loss": 0.1948, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15673981191222572, |
|
"eval_loss": 0.180569127202034, |
|
"eval_runtime": 136.8617, |
|
"eval_samples_per_second": 5.217, |
|
"eval_steps_per_second": 0.658, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23510971786833856, |
|
"grad_norm": 0.4985473155975342, |
|
"learning_rate": 2.2200772200772202e-05, |
|
"loss": 0.1772, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.23510971786833856, |
|
"eval_loss": 0.17201179265975952, |
|
"eval_runtime": 136.9237, |
|
"eval_samples_per_second": 5.215, |
|
"eval_steps_per_second": 0.657, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.31347962382445144, |
|
"grad_norm": 0.5671184659004211, |
|
"learning_rate": 2.1235521235521236e-05, |
|
"loss": 0.1674, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.31347962382445144, |
|
"eval_loss": 0.16670115292072296, |
|
"eval_runtime": 137.1006, |
|
"eval_samples_per_second": 5.208, |
|
"eval_steps_per_second": 0.656, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.39184952978056425, |
|
"grad_norm": 0.5542048215866089, |
|
"learning_rate": 2.0270270270270273e-05, |
|
"loss": 0.1639, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.39184952978056425, |
|
"eval_loss": 0.16387563943862915, |
|
"eval_runtime": 136.9694, |
|
"eval_samples_per_second": 5.213, |
|
"eval_steps_per_second": 0.657, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4702194357366771, |
|
"grad_norm": 0.5355677604675293, |
|
"learning_rate": 1.9305019305019306e-05, |
|
"loss": 0.1603, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4702194357366771, |
|
"eval_loss": 0.16061373054981232, |
|
"eval_runtime": 136.8175, |
|
"eval_samples_per_second": 5.219, |
|
"eval_steps_per_second": 0.658, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.54858934169279, |
|
"grad_norm": 0.648248553276062, |
|
"learning_rate": 1.833976833976834e-05, |
|
"loss": 0.1563, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.54858934169279, |
|
"eval_loss": 0.1591891348361969, |
|
"eval_runtime": 136.7712, |
|
"eval_samples_per_second": 5.22, |
|
"eval_steps_per_second": 0.658, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6269592476489029, |
|
"grad_norm": 0.4232325851917267, |
|
"learning_rate": 1.7374517374517377e-05, |
|
"loss": 0.1595, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6269592476489029, |
|
"eval_loss": 0.15778718888759613, |
|
"eval_runtime": 137.2186, |
|
"eval_samples_per_second": 5.203, |
|
"eval_steps_per_second": 0.656, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7053291536050157, |
|
"grad_norm": 0.4626815915107727, |
|
"learning_rate": 1.640926640926641e-05, |
|
"loss": 0.1508, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7053291536050157, |
|
"eval_loss": 0.1561730057001114, |
|
"eval_runtime": 136.858, |
|
"eval_samples_per_second": 5.217, |
|
"eval_steps_per_second": 0.658, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7836990595611285, |
|
"grad_norm": 0.4221360385417938, |
|
"learning_rate": 1.5444015444015444e-05, |
|
"loss": 0.1581, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7836990595611285, |
|
"eval_loss": 0.15424229204654694, |
|
"eval_runtime": 137.1103, |
|
"eval_samples_per_second": 5.207, |
|
"eval_steps_per_second": 0.656, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8620689655172413, |
|
"grad_norm": 0.3882145583629608, |
|
"learning_rate": 1.4478764478764478e-05, |
|
"loss": 0.1441, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.8620689655172413, |
|
"eval_loss": 0.15357084572315216, |
|
"eval_runtime": 136.9739, |
|
"eval_samples_per_second": 5.213, |
|
"eval_steps_per_second": 0.657, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.9404388714733543, |
|
"grad_norm": 0.4727325141429901, |
|
"learning_rate": 1.3513513513513515e-05, |
|
"loss": 0.1487, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9404388714733543, |
|
"eval_loss": 0.15231835842132568, |
|
"eval_runtime": 136.9176, |
|
"eval_samples_per_second": 5.215, |
|
"eval_steps_per_second": 0.657, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0188087774294672, |
|
"grad_norm": 0.4438364803791046, |
|
"learning_rate": 1.2548262548262549e-05, |
|
"loss": 0.14, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.0188087774294672, |
|
"eval_loss": 0.15219952166080475, |
|
"eval_runtime": 137.0305, |
|
"eval_samples_per_second": 5.211, |
|
"eval_steps_per_second": 0.657, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.09717868338558, |
|
"grad_norm": 0.5468264818191528, |
|
"learning_rate": 1.1583011583011584e-05, |
|
"loss": 0.1366, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.09717868338558, |
|
"eval_loss": 0.1532224416732788, |
|
"eval_runtime": 137.4744, |
|
"eval_samples_per_second": 5.194, |
|
"eval_steps_per_second": 0.655, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1755485893416928, |
|
"grad_norm": 0.609539270401001, |
|
"learning_rate": 1.0617760617760618e-05, |
|
"loss": 0.1352, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.1755485893416928, |
|
"eval_loss": 0.15247634053230286, |
|
"eval_runtime": 137.6071, |
|
"eval_samples_per_second": 5.189, |
|
"eval_steps_per_second": 0.654, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.2539184952978055, |
|
"grad_norm": 0.5422230958938599, |
|
"learning_rate": 9.652509652509653e-06, |
|
"loss": 0.1278, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.2539184952978055, |
|
"eval_loss": 0.15182016789913177, |
|
"eval_runtime": 137.443, |
|
"eval_samples_per_second": 5.195, |
|
"eval_steps_per_second": 0.655, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.3322884012539185, |
|
"grad_norm": 0.574143648147583, |
|
"learning_rate": 8.687258687258689e-06, |
|
"loss": 0.1315, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.3322884012539185, |
|
"eval_loss": 0.15190598368644714, |
|
"eval_runtime": 137.5049, |
|
"eval_samples_per_second": 5.193, |
|
"eval_steps_per_second": 0.655, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.4106583072100314, |
|
"grad_norm": 0.5425278544425964, |
|
"learning_rate": 7.722007722007722e-06, |
|
"loss": 0.1274, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.4106583072100314, |
|
"eval_loss": 0.15125001966953278, |
|
"eval_runtime": 137.6226, |
|
"eval_samples_per_second": 5.188, |
|
"eval_steps_per_second": 0.654, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.489028213166144, |
|
"grad_norm": 0.48568734526634216, |
|
"learning_rate": 6.7567567567567575e-06, |
|
"loss": 0.1347, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.489028213166144, |
|
"eval_loss": 0.15007726848125458, |
|
"eval_runtime": 137.6801, |
|
"eval_samples_per_second": 5.186, |
|
"eval_steps_per_second": 0.654, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.567398119122257, |
|
"grad_norm": 0.5619414448738098, |
|
"learning_rate": 5.791505791505792e-06, |
|
"loss": 0.1332, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.567398119122257, |
|
"eval_loss": 0.15013186633586884, |
|
"eval_runtime": 137.5165, |
|
"eval_samples_per_second": 5.192, |
|
"eval_steps_per_second": 0.654, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.64576802507837, |
|
"grad_norm": 0.5540875196456909, |
|
"learning_rate": 4.8262548262548266e-06, |
|
"loss": 0.1263, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.64576802507837, |
|
"eval_loss": 0.14901334047317505, |
|
"eval_runtime": 137.5107, |
|
"eval_samples_per_second": 5.192, |
|
"eval_steps_per_second": 0.654, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.7241379310344827, |
|
"grad_norm": 0.5203628540039062, |
|
"learning_rate": 3.861003861003861e-06, |
|
"loss": 0.1291, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.7241379310344827, |
|
"eval_loss": 0.14880172908306122, |
|
"eval_runtime": 137.6519, |
|
"eval_samples_per_second": 5.187, |
|
"eval_steps_per_second": 0.654, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.8025078369905956, |
|
"grad_norm": 0.5150081515312195, |
|
"learning_rate": 2.895752895752896e-06, |
|
"loss": 0.1287, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.8025078369905956, |
|
"eval_loss": 0.14855234324932098, |
|
"eval_runtime": 137.7759, |
|
"eval_samples_per_second": 5.182, |
|
"eval_steps_per_second": 0.653, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.8808777429467085, |
|
"grad_norm": 0.5428478717803955, |
|
"learning_rate": 1.9305019305019305e-06, |
|
"loss": 0.1278, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.8808777429467085, |
|
"eval_loss": 0.14838644862174988, |
|
"eval_runtime": 137.7999, |
|
"eval_samples_per_second": 5.181, |
|
"eval_steps_per_second": 0.653, |
|
"step": 1200 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 1300, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.1478967485661184e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|