|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9998730481147645, |
|
"global_step": 3938, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.5e-05, |
|
"loss": 12.4009, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00015, |
|
"loss": 1.7002, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000225, |
|
"loss": 1.374, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0003, |
|
"loss": 1.2745, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002915206331260599, |
|
"loss": 1.2333, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_bleu": 0.16985441338971674, |
|
"eval_loss": 1.1092561483383179, |
|
"eval_rouge1": 0.49744095525932, |
|
"eval_rouge2": 0.4023858560784097, |
|
"eval_rougeL": 0.4605319249625821, |
|
"eval_rougeLsum": 0.4606110961561354, |
|
"eval_runtime": 448.9761, |
|
"eval_samples_per_second": 10.453, |
|
"eval_steps_per_second": 1.307, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002830412662521198, |
|
"loss": 1.2179, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002745618993781798, |
|
"loss": 1.194, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00026608253250423966, |
|
"loss": 1.1763, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002576031656302996, |
|
"loss": 1.1782, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00024912379875635947, |
|
"loss": 1.1517, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_bleu": 0.167245118666087, |
|
"eval_loss": 1.0618804693222046, |
|
"eval_rouge1": 0.4974694475768039, |
|
"eval_rouge2": 0.40136842689484065, |
|
"eval_rougeL": 0.46015392715352893, |
|
"eval_rougeLsum": 0.4601809444739556, |
|
"eval_runtime": 441.1296, |
|
"eval_samples_per_second": 10.639, |
|
"eval_steps_per_second": 1.331, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002406444318824194, |
|
"loss": 1.136, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00023216506500847936, |
|
"loss": 1.1522, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00022368569813453927, |
|
"loss": 1.1346, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00021520633126059918, |
|
"loss": 1.1307, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0002067269643866591, |
|
"loss": 1.1281, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_bleu": 0.16267141788045325, |
|
"eval_loss": 1.0443658828735352, |
|
"eval_rouge1": 0.4920999084850846, |
|
"eval_rouge2": 0.3953067823623436, |
|
"eval_rougeL": 0.45460007159317317, |
|
"eval_rougeLsum": 0.4546306350106014, |
|
"eval_runtime": 454.009, |
|
"eval_samples_per_second": 10.337, |
|
"eval_steps_per_second": 1.293, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019824759751271905, |
|
"loss": 1.1131, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00018976823063877895, |
|
"loss": 1.1105, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00018128886376483889, |
|
"loss": 1.1267, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0001728094968908988, |
|
"loss": 1.0872, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001643301300169587, |
|
"loss": 1.1054, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_bleu": 0.17300330032744055, |
|
"eval_loss": 1.0234187841415405, |
|
"eval_rouge1": 0.503801949569296, |
|
"eval_rouge2": 0.40895933412262137, |
|
"eval_rougeL": 0.466809748089249, |
|
"eval_rougeLsum": 0.4667488466486195, |
|
"eval_runtime": 443.5894, |
|
"eval_samples_per_second": 10.58, |
|
"eval_steps_per_second": 1.323, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00015585076314301866, |
|
"loss": 1.1017, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00014737139626907857, |
|
"loss": 1.1064, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00013889202939513847, |
|
"loss": 1.1047, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0001304126625211984, |
|
"loss": 1.0758, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00012193329564725833, |
|
"loss": 1.0848, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_bleu": 0.16666637884945387, |
|
"eval_loss": 1.0130037069320679, |
|
"eval_rouge1": 0.49905053867339855, |
|
"eval_rouge2": 0.40300133873229244, |
|
"eval_rougeL": 0.46173781984585727, |
|
"eval_rougeLsum": 0.4617687754853303, |
|
"eval_runtime": 442.0228, |
|
"eval_samples_per_second": 10.617, |
|
"eval_steps_per_second": 1.328, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00011345392877331824, |
|
"loss": 1.0746, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00010497456189937817, |
|
"loss": 1.0777, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.649519502543809e-05, |
|
"loss": 1.0644, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 8.801582815149802e-05, |
|
"loss": 1.0662, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.953646127755793e-05, |
|
"loss": 1.0737, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_bleu": 0.17037069563841947, |
|
"eval_loss": 1.002275824546814, |
|
"eval_rouge1": 0.5031038227519693, |
|
"eval_rouge2": 0.40777366696302797, |
|
"eval_rougeL": 0.4660246892337898, |
|
"eval_rougeLsum": 0.4660288890427905, |
|
"eval_runtime": 442.0838, |
|
"eval_samples_per_second": 10.616, |
|
"eval_steps_per_second": 1.328, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 7.105709440361785e-05, |
|
"loss": 1.0639, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 6.257772752967779e-05, |
|
"loss": 1.058, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 5.40983606557377e-05, |
|
"loss": 1.0598, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.5618993781797626e-05, |
|
"loss": 1.066, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.713962690785755e-05, |
|
"loss": 1.0532, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_bleu": 0.16910393271412671, |
|
"eval_loss": 0.995514452457428, |
|
"eval_rouge1": 0.5013148071463731, |
|
"eval_rouge2": 0.405633242849629, |
|
"eval_rougeL": 0.4639778817773035, |
|
"eval_rougeLsum": 0.464049641741316, |
|
"eval_runtime": 441.9704, |
|
"eval_samples_per_second": 10.618, |
|
"eval_steps_per_second": 1.328, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.8660260033917467e-05, |
|
"loss": 1.0503, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.0180893159977384e-05, |
|
"loss": 1.0415, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.1701526286037308e-05, |
|
"loss": 1.0498, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.22215941209723e-06, |
|
"loss": 1.0493, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3938, |
|
"total_flos": 5.10811297256448e+17, |
|
"train_loss": 1.4182514357288336, |
|
"train_runtime": 30065.8698, |
|
"train_samples_per_second": 8.384, |
|
"train_steps_per_second": 0.131 |
|
} |
|
], |
|
"max_steps": 3938, |
|
"num_train_epochs": 1, |
|
"total_flos": 5.10811297256448e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|