|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.090225563909774, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.999999999999999e-07, |
|
"loss": 1.8578, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 6.505149978319905e-07, |
|
"loss": 1.719, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 7.38560627359831e-07, |
|
"loss": 1.7023, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 8.01029995663981e-07, |
|
"loss": 1.7111, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 8.494850021680092e-07, |
|
"loss": 1.6419, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 8.890756251918216e-07, |
|
"loss": 1.5888, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 9.225490200071283e-07, |
|
"loss": 1.5672, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.515449934959715e-07, |
|
"loss": 1.525, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 9.771212547196622e-07, |
|
"loss": 1.5381, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 9.999999999999997e-07, |
|
"loss": 1.6184, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"eval_oa_private_accuracy": 0.6486486486486487, |
|
"eval_oa_private_loss": 1.4580078125, |
|
"eval_oa_private_runtime": 2.0191, |
|
"eval_oa_private_samples_per_second": 8.42, |
|
"eval_oa_private_steps_per_second": 0.495, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 9.941935483870966e-07, |
|
"loss": 1.4745, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 9.87741935483871e-07, |
|
"loss": 1.5171, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 9.812903225806452e-07, |
|
"loss": 1.5434, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 9.741935483870968e-07, |
|
"loss": 1.5055, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 9.67741935483871e-07, |
|
"loss": 1.4663, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 9.612903225806452e-07, |
|
"loss": 1.4662, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 9.548387096774193e-07, |
|
"loss": 1.4806, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 9.483870967741935e-07, |
|
"loss": 1.4755, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 9.419354838709677e-07, |
|
"loss": 1.4745, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 9.354838709677418e-07, |
|
"loss": 1.4963, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"eval_oa_private_accuracy": 0.6705276705276705, |
|
"eval_oa_private_loss": 1.490234375, |
|
"eval_oa_private_runtime": 0.8407, |
|
"eval_oa_private_samples_per_second": 20.222, |
|
"eval_oa_private_steps_per_second": 1.19, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 9.290322580645161e-07, |
|
"loss": 1.4592, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 9.225806451612903e-07, |
|
"loss": 1.447, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 9.161290322580644e-07, |
|
"loss": 1.4527, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 9.096774193548387e-07, |
|
"loss": 1.4361, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 9.032258064516129e-07, |
|
"loss": 1.4065, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 8.96774193548387e-07, |
|
"loss": 1.4299, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 8.896774193548387e-07, |
|
"loss": 1.4363, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 8.832258064516129e-07, |
|
"loss": 1.4161, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 8.767741935483871e-07, |
|
"loss": 1.4356, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 8.703225806451613e-07, |
|
"loss": 1.4495, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"eval_oa_private_accuracy": 0.6664378860672615, |
|
"eval_oa_private_loss": 1.3466796875, |
|
"eval_oa_private_runtime": 0.9592, |
|
"eval_oa_private_samples_per_second": 17.722, |
|
"eval_oa_private_steps_per_second": 1.042, |
|
"step": 300 |
|
} |
|
], |
|
"max_steps": 1650, |
|
"num_train_epochs": 50, |
|
"total_flos": 8.233564448028099e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|