|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9998730481147645, |
|
"global_step": 3938, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.5e-05, |
|
"loss": 3.8742, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00015, |
|
"loss": 1.2473, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000225, |
|
"loss": 1.1998, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0003, |
|
"loss": 1.1545, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002915206331260599, |
|
"loss": 1.1364, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_bleu": 0.17166972696083574, |
|
"eval_loss": 1.1582657098770142, |
|
"eval_rouge1": 0.5000074515653312, |
|
"eval_rouge2": 0.40559916777579735, |
|
"eval_rougeL": 0.46317379201759545, |
|
"eval_rougeLsum": 0.4632046447608197, |
|
"eval_runtime": 211.1462, |
|
"eval_samples_per_second": 22.226, |
|
"eval_steps_per_second": 2.78, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002830412662521198, |
|
"loss": 1.1312, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002745618993781798, |
|
"loss": 1.1164, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00026608253250423966, |
|
"loss": 1.1037, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002576031656302996, |
|
"loss": 1.1084, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00024912379875635947, |
|
"loss": 1.0874, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_bleu": 0.174359464623795, |
|
"eval_loss": 1.1108546257019043, |
|
"eval_rouge1": 0.5050901535930163, |
|
"eval_rouge2": 0.41029314654694715, |
|
"eval_rougeL": 0.4678610312798079, |
|
"eval_rougeLsum": 0.4679688863086813, |
|
"eval_runtime": 208.6731, |
|
"eval_samples_per_second": 22.49, |
|
"eval_steps_per_second": 2.813, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002406444318824194, |
|
"loss": 1.0758, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00023216506500847936, |
|
"loss": 1.0899, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00022368569813453927, |
|
"loss": 1.0728, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00021520633126059918, |
|
"loss": 1.0751, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0002067269643866591, |
|
"loss": 1.072, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_bleu": 0.16964536883217013, |
|
"eval_loss": 1.0908721685409546, |
|
"eval_rouge1": 0.49944919713548785, |
|
"eval_rouge2": 0.4041247246204984, |
|
"eval_rougeL": 0.4622247411282464, |
|
"eval_rougeLsum": 0.4622259005781699, |
|
"eval_runtime": 214.1194, |
|
"eval_samples_per_second": 21.918, |
|
"eval_steps_per_second": 2.741, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019824759751271905, |
|
"loss": 1.0601, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00018976823063877895, |
|
"loss": 1.067, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00018128886376483889, |
|
"loss": 1.0769, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0001728094968908988, |
|
"loss": 1.0376, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001643301300169587, |
|
"loss": 1.0589, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_bleu": 0.17592370006590605, |
|
"eval_loss": 1.0714174509048462, |
|
"eval_rouge1": 0.5071117469721906, |
|
"eval_rouge2": 0.412824258369919, |
|
"eval_rougeL": 0.47019269708823, |
|
"eval_rougeLsum": 0.4702462880937972, |
|
"eval_runtime": 208.6946, |
|
"eval_samples_per_second": 22.487, |
|
"eval_steps_per_second": 2.813, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00015585076314301866, |
|
"loss": 1.0589, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00014737139626907857, |
|
"loss": 1.0593, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00013889202939513847, |
|
"loss": 1.0599, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0001304126625211984, |
|
"loss": 1.0335, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00012193329564725833, |
|
"loss": 1.0441, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_bleu": 0.16827637557044198, |
|
"eval_loss": 1.0615065097808838, |
|
"eval_rouge1": 0.500508116585923, |
|
"eval_rouge2": 0.40524827339523467, |
|
"eval_rougeL": 0.46325896066513017, |
|
"eval_rougeLsum": 0.4633425344363471, |
|
"eval_runtime": 211.7269, |
|
"eval_samples_per_second": 22.165, |
|
"eval_steps_per_second": 2.772, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00011345392877331824, |
|
"loss": 1.0273, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00010497456189937817, |
|
"loss": 1.0371, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.649519502543809e-05, |
|
"loss": 1.0219, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 8.801582815149802e-05, |
|
"loss": 1.0292, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.953646127755793e-05, |
|
"loss": 1.0292, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_bleu": 0.17334929168529137, |
|
"eval_loss": 1.0501199960708618, |
|
"eval_rouge1": 0.5057510392904037, |
|
"eval_rouge2": 0.41127958350841676, |
|
"eval_rougeL": 0.46859672321186563, |
|
"eval_rougeLsum": 0.4686909635105035, |
|
"eval_runtime": 210.649, |
|
"eval_samples_per_second": 22.279, |
|
"eval_steps_per_second": 2.787, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 7.105709440361785e-05, |
|
"loss": 1.0166, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 6.257772752967779e-05, |
|
"loss": 1.015, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5.40983606557377e-05, |
|
"loss": 1.0142, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.5618993781797626e-05, |
|
"loss": 1.0256, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.713962690785755e-05, |
|
"loss": 1.0094, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_bleu": 0.1714081170400144, |
|
"eval_loss": 1.0423640012741089, |
|
"eval_rouge1": 0.5035879586288757, |
|
"eval_rouge2": 0.40851924259079697, |
|
"eval_rougeL": 0.46637902927069697, |
|
"eval_rougeLsum": 0.4663610962140706, |
|
"eval_runtime": 210.1257, |
|
"eval_samples_per_second": 22.334, |
|
"eval_steps_per_second": 2.794, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.8660260033917467e-05, |
|
"loss": 1.0099, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.0180893159977384e-05, |
|
"loss": 0.9959, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.1701526286037308e-05, |
|
"loss": 1.0107, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.22215941209723e-06, |
|
"loss": 1.0094, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3938, |
|
"total_flos": 1.5343986185183232e+17, |
|
"train_loss": 1.1361048782216892, |
|
"train_runtime": 10695.8658, |
|
"train_samples_per_second": 23.566, |
|
"train_steps_per_second": 0.368 |
|
} |
|
], |
|
"max_steps": 3938, |
|
"num_train_epochs": 1, |
|
"total_flos": 1.5343986185183232e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|