|
{ |
|
"best_metric": 0.26939964294433594, |
|
"best_model_checkpoint": "./checkpoint/checkpoint-4000", |
|
"epoch": 4.790692369111441, |
|
"eval_steps": 500, |
|
"global_step": 10500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4e-08, |
|
"loss": 12.0623, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9960000000000002e-05, |
|
"loss": 7.4437, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_bleu": 33.6574, |
|
"eval_gen_len": 31.2317, |
|
"eval_loss": 3.4493963718414307, |
|
"eval_runtime": 576.3536, |
|
"eval_samples_per_second": 1.73, |
|
"eval_steps_per_second": 1.73, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.904543280726925e-05, |
|
"loss": 1.3107, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_bleu": 33.3513, |
|
"eval_gen_len": 31.7091, |
|
"eval_loss": 0.2903362810611725, |
|
"eval_runtime": 476.1637, |
|
"eval_samples_per_second": 2.094, |
|
"eval_steps_per_second": 2.094, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.8088952654232427e-05, |
|
"loss": 0.1582, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_bleu": 33.7009, |
|
"eval_gen_len": 31.7202, |
|
"eval_loss": 0.2747056484222412, |
|
"eval_runtime": 477.4288, |
|
"eval_samples_per_second": 2.088, |
|
"eval_steps_per_second": 2.088, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.71324725011956e-05, |
|
"loss": 0.1477, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_bleu": 34.2274, |
|
"eval_gen_len": 31.5557, |
|
"eval_loss": 0.2713315188884735, |
|
"eval_runtime": 473.8787, |
|
"eval_samples_per_second": 2.104, |
|
"eval_steps_per_second": 2.104, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.617599234815878e-05, |
|
"loss": 0.1413, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_bleu": 34.0664, |
|
"eval_gen_len": 31.674, |
|
"eval_loss": 0.2716849446296692, |
|
"eval_runtime": 477.2223, |
|
"eval_samples_per_second": 2.089, |
|
"eval_steps_per_second": 2.089, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.5219512195121952e-05, |
|
"loss": 0.1355, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_bleu": 34.1168, |
|
"eval_gen_len": 31.8506, |
|
"eval_loss": 0.2718922197818756, |
|
"eval_runtime": 479.68, |
|
"eval_samples_per_second": 2.078, |
|
"eval_steps_per_second": 2.078, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.4263032042085128e-05, |
|
"loss": 0.136, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_bleu": 34.2638, |
|
"eval_gen_len": 31.7523, |
|
"eval_loss": 0.270623117685318, |
|
"eval_runtime": 479.0203, |
|
"eval_samples_per_second": 2.081, |
|
"eval_steps_per_second": 2.081, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.3306551889048302e-05, |
|
"loss": 0.1316, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_bleu": 34.1582, |
|
"eval_gen_len": 31.6931, |
|
"eval_loss": 0.26939964294433594, |
|
"eval_runtime": 477.4424, |
|
"eval_samples_per_second": 2.088, |
|
"eval_steps_per_second": 2.088, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.235007173601148e-05, |
|
"loss": 0.1312, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_bleu": 34.4277, |
|
"eval_gen_len": 31.662, |
|
"eval_loss": 0.2704804539680481, |
|
"eval_runtime": 479.1942, |
|
"eval_samples_per_second": 2.081, |
|
"eval_steps_per_second": 2.081, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.1393591582974655e-05, |
|
"loss": 0.1258, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_bleu": 34.3594, |
|
"eval_gen_len": 31.651, |
|
"eval_loss": 0.27046987414360046, |
|
"eval_runtime": 478.936, |
|
"eval_samples_per_second": 2.082, |
|
"eval_steps_per_second": 2.082, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.043711142993783e-05, |
|
"loss": 0.1271, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_bleu": 34.3412, |
|
"eval_gen_len": 31.8094, |
|
"eval_loss": 0.27054643630981445, |
|
"eval_runtime": 481.7151, |
|
"eval_samples_per_second": 2.07, |
|
"eval_steps_per_second": 2.07, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 9.480631276901005e-06, |
|
"loss": 0.1249, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_bleu": 34.2387, |
|
"eval_gen_len": 31.7212, |
|
"eval_loss": 0.2704330086708069, |
|
"eval_runtime": 479.9945, |
|
"eval_samples_per_second": 2.077, |
|
"eval_steps_per_second": 2.077, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 8.52415112386418e-06, |
|
"loss": 0.1245, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_bleu": 34.3033, |
|
"eval_gen_len": 31.8616, |
|
"eval_loss": 0.27082785964012146, |
|
"eval_runtime": 482.2741, |
|
"eval_samples_per_second": 2.067, |
|
"eval_steps_per_second": 2.067, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 7.5676709708273554e-06, |
|
"loss": 0.1195, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"eval_bleu": 34.2748, |
|
"eval_gen_len": 31.9017, |
|
"eval_loss": 0.27176010608673096, |
|
"eval_runtime": 484.0841, |
|
"eval_samples_per_second": 2.06, |
|
"eval_steps_per_second": 2.06, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 6.611190817790531e-06, |
|
"loss": 0.1198, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"eval_bleu": 34.2897, |
|
"eval_gen_len": 31.7312, |
|
"eval_loss": 0.27175214886665344, |
|
"eval_runtime": 479.5665, |
|
"eval_samples_per_second": 2.079, |
|
"eval_steps_per_second": 2.079, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 5.654710664753707e-06, |
|
"loss": 0.1209, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"eval_bleu": 34.4446, |
|
"eval_gen_len": 31.7272, |
|
"eval_loss": 0.2709992527961731, |
|
"eval_runtime": 478.4748, |
|
"eval_samples_per_second": 2.084, |
|
"eval_steps_per_second": 2.084, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 4.6982305117168825e-06, |
|
"loss": 0.1201, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"eval_bleu": 34.3571, |
|
"eval_gen_len": 31.7432, |
|
"eval_loss": 0.2712614834308624, |
|
"eval_runtime": 478.6295, |
|
"eval_samples_per_second": 2.083, |
|
"eval_steps_per_second": 2.083, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 3.7417503586800574e-06, |
|
"loss": 0.1201, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"eval_bleu": 34.4398, |
|
"eval_gen_len": 31.7613, |
|
"eval_loss": 0.27223262190818787, |
|
"eval_runtime": 478.6225, |
|
"eval_samples_per_second": 2.083, |
|
"eval_steps_per_second": 2.083, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 2.785270205643233e-06, |
|
"loss": 0.1178, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"eval_bleu": 34.4074, |
|
"eval_gen_len": 31.7753, |
|
"eval_loss": 0.27177131175994873, |
|
"eval_runtime": 479.0762, |
|
"eval_samples_per_second": 2.081, |
|
"eval_steps_per_second": 2.081, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 1.8287900526064088e-06, |
|
"loss": 0.1181, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"eval_bleu": 34.4628, |
|
"eval_gen_len": 31.8034, |
|
"eval_loss": 0.2723881006240845, |
|
"eval_runtime": 479.5721, |
|
"eval_samples_per_second": 2.079, |
|
"eval_steps_per_second": 2.079, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 8.72309899569584e-07, |
|
"loss": 0.1169, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"eval_bleu": 34.563, |
|
"eval_gen_len": 31.7442, |
|
"eval_loss": 0.2720402777194977, |
|
"eval_runtime": 478.0313, |
|
"eval_samples_per_second": 2.086, |
|
"eval_steps_per_second": 2.086, |
|
"step": 10500 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 10955, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 1.820292017113006e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|