|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 34.0, |
|
"eval_steps": 500, |
|
"global_step": 11611, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3e-05, |
|
"loss": 1.574, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3e-05, |
|
"loss": 1.474, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3e-05, |
|
"loss": 1.4104, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.4536923076923077, |
|
"eval_loss": 3.3574578762054443, |
|
"eval_runtime": 4.4299, |
|
"eval_samples_per_second": 112.869, |
|
"eval_steps_per_second": 14.221, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.08840557310176213, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.058, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3e-05, |
|
"loss": 1.3889, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3e-05, |
|
"loss": 1.387, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3e-05, |
|
"loss": 1.389, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.4543589743589744, |
|
"eval_loss": 3.4179794788360596, |
|
"eval_runtime": 4.1997, |
|
"eval_samples_per_second": 119.055, |
|
"eval_steps_per_second": 15.001, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.09820569565497561, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.056, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 3e-05, |
|
"loss": 1.3725, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 3e-05, |
|
"loss": 1.3588, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3e-05, |
|
"loss": 1.348, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 3e-05, |
|
"loss": 1.3414, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.45476923076923076, |
|
"eval_loss": 3.511923313140869, |
|
"eval_runtime": 4.1985, |
|
"eval_samples_per_second": 119.089, |
|
"eval_steps_per_second": 15.005, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.09889831624589397, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.056, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 3e-05, |
|
"loss": 1.3166, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 3e-05, |
|
"loss": 1.3035, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 3e-05, |
|
"loss": 1.3002, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.45543589743589746, |
|
"eval_loss": 3.5288071632385254, |
|
"eval_runtime": 4.4066, |
|
"eval_samples_per_second": 113.466, |
|
"eval_steps_per_second": 14.297, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.11209345982903633, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.054, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2929, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2693, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2697, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2574, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.45394871794871794, |
|
"eval_loss": 3.6893365383148193, |
|
"eval_runtime": 4.1122, |
|
"eval_samples_per_second": 121.591, |
|
"eval_steps_per_second": 15.32, |
|
"step": 1707 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.12076940663377221, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.056, |
|
"step": 1707 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2097, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2188, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2258, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.45615384615384613, |
|
"eval_loss": 3.725893497467041, |
|
"eval_runtime": 4.0989, |
|
"eval_samples_per_second": 121.984, |
|
"eval_steps_per_second": 15.37, |
|
"step": 2049 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.11884272898137646, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.058, |
|
"step": 2049 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 3e-05, |
|
"loss": 1.205, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 3e-05, |
|
"loss": 1.1643, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 3e-05, |
|
"loss": 1.1844, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.45594871794871794, |
|
"eval_loss": 3.724449396133423, |
|
"eval_runtime": 4.8158, |
|
"eval_samples_per_second": 103.825, |
|
"eval_steps_per_second": 13.082, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 0.12407896336995003, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.048, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 3e-05, |
|
"loss": 1.1771, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 3e-05, |
|
"loss": 1.1218, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 3e-05, |
|
"loss": 1.1491, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 3e-05, |
|
"loss": 1.1363, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.4543589743589744, |
|
"eval_loss": 3.8138701915740967, |
|
"eval_runtime": 4.1963, |
|
"eval_samples_per_second": 119.152, |
|
"eval_steps_per_second": 15.013, |
|
"step": 2732 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 0.11947629327387163, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.046, |
|
"step": 2732 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0858, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0951, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0903, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.4524102564102564, |
|
"eval_loss": 3.9115548133850098, |
|
"eval_runtime": 4.4011, |
|
"eval_samples_per_second": 113.609, |
|
"eval_steps_per_second": 14.315, |
|
"step": 3073 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 0.13426062809240774, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.034, |
|
"step": 3073 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0822, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0414, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0563, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0538, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.4515897435897436, |
|
"eval_loss": 3.92203426361084, |
|
"eval_runtime": 4.4105, |
|
"eval_samples_per_second": 113.366, |
|
"eval_steps_per_second": 14.284, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 0.11780869016319889, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.032, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 10.25, |
|
"learning_rate": 3e-05, |
|
"loss": 0.991, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 10.54, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0019, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"learning_rate": 3e-05, |
|
"loss": 0.9971, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.45143589743589746, |
|
"eval_loss": 3.967252492904663, |
|
"eval_runtime": 4.1003, |
|
"eval_samples_per_second": 121.943, |
|
"eval_steps_per_second": 15.365, |
|
"step": 3756 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bleu": 0.12407049349871409, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.026, |
|
"step": 3756 |
|
}, |
|
{ |
|
"epoch": 11.13, |
|
"learning_rate": 3e-05, |
|
"loss": 0.9861, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 11.42, |
|
"learning_rate": 3e-05, |
|
"loss": 0.9436, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 11.71, |
|
"learning_rate": 3e-05, |
|
"loss": 0.9699, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.45076923076923076, |
|
"eval_loss": 4.033609867095947, |
|
"eval_runtime": 4.8103, |
|
"eval_samples_per_second": 103.943, |
|
"eval_steps_per_second": 13.097, |
|
"step": 4098 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bleu": 0.12190601380876163, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.026, |
|
"step": 4098 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 3e-05, |
|
"loss": 0.9783, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 12.3, |
|
"learning_rate": 3e-05, |
|
"loss": 0.9064, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 12.59, |
|
"learning_rate": 3e-05, |
|
"loss": 0.9178, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 12.88, |
|
"learning_rate": 3e-05, |
|
"loss": 0.9235, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.4492820512820513, |
|
"eval_loss": 4.002023696899414, |
|
"eval_runtime": 4.8204, |
|
"eval_samples_per_second": 103.726, |
|
"eval_steps_per_second": 13.069, |
|
"step": 4439 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_bleu": 0.13115986163477472, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.026, |
|
"step": 4439 |
|
}, |
|
{ |
|
"epoch": 13.18, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8902, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 13.47, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8703, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 13.76, |
|
"learning_rate": 3e-05, |
|
"loss": 0.891, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.44774358974358974, |
|
"eval_loss": 4.071566104888916, |
|
"eval_runtime": 4.8316, |
|
"eval_samples_per_second": 103.485, |
|
"eval_steps_per_second": 13.039, |
|
"step": 4781 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_bleu": 0.12164300660650715, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.024, |
|
"step": 4781 |
|
}, |
|
{ |
|
"epoch": 14.06, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8698, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8338, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 14.64, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8362, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 3e-05, |
|
"loss": 0.845, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.44774358974358974, |
|
"eval_loss": 4.09920597076416, |
|
"eval_runtime": 4.4174, |
|
"eval_samples_per_second": 113.19, |
|
"eval_steps_per_second": 14.262, |
|
"step": 5122 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_bleu": 0.1224977607294519, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.02, |
|
"step": 5122 |
|
}, |
|
{ |
|
"epoch": 15.23, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7981, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 15.52, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7978, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 15.81, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8009, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.4464102564102564, |
|
"eval_loss": 4.093270301818848, |
|
"eval_runtime": 4.102, |
|
"eval_samples_per_second": 121.892, |
|
"eval_steps_per_second": 15.358, |
|
"step": 5464 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bleu": 0.12201237729153987, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.022, |
|
"step": 5464 |
|
}, |
|
{ |
|
"epoch": 16.11, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7948, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 16.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.746, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 16.69, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7627, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"learning_rate": 3e-05, |
|
"loss": 0.782, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.44671794871794873, |
|
"eval_loss": 4.1283488273620605, |
|
"eval_runtime": 4.84, |
|
"eval_samples_per_second": 103.305, |
|
"eval_steps_per_second": 13.016, |
|
"step": 5805 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_bleu": 0.1263852710208009, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.02, |
|
"step": 5805 |
|
}, |
|
{ |
|
"epoch": 17.28, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7142, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 17.57, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7253, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 17.86, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7294, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.44564102564102565, |
|
"eval_loss": 4.164257049560547, |
|
"eval_runtime": 4.1037, |
|
"eval_samples_per_second": 121.84, |
|
"eval_steps_per_second": 15.352, |
|
"step": 6147 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_bleu": 0.10715019125675011, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.022, |
|
"step": 6147 |
|
}, |
|
{ |
|
"epoch": 18.16, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7055, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 18.45, |
|
"learning_rate": 3e-05, |
|
"loss": 0.672, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 18.74, |
|
"learning_rate": 3e-05, |
|
"loss": 0.6792, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.44487179487179485, |
|
"eval_loss": 4.185911655426025, |
|
"eval_runtime": 4.823, |
|
"eval_samples_per_second": 103.67, |
|
"eval_steps_per_second": 13.062, |
|
"step": 6488 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_bleu": 0.11314440439431668, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.02, |
|
"step": 6488 |
|
}, |
|
{ |
|
"epoch": 19.03, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7053, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 19.33, |
|
"learning_rate": 3e-05, |
|
"loss": 0.6448, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 19.62, |
|
"learning_rate": 3e-05, |
|
"loss": 0.6448, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 19.91, |
|
"learning_rate": 3e-05, |
|
"loss": 0.6672, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.4436923076923077, |
|
"eval_loss": 4.201004981994629, |
|
"eval_runtime": 4.1985, |
|
"eval_samples_per_second": 119.09, |
|
"eval_steps_per_second": 15.005, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_bleu": 0.11913021835274795, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.02, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 20.2, |
|
"learning_rate": 3e-05, |
|
"loss": 0.6146, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 20.5, |
|
"learning_rate": 3e-05, |
|
"loss": 0.6102, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 20.79, |
|
"learning_rate": 3e-05, |
|
"loss": 0.6258, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.4428717948717949, |
|
"eval_loss": 4.230019569396973, |
|
"eval_runtime": 4.4124, |
|
"eval_samples_per_second": 113.318, |
|
"eval_steps_per_second": 14.278, |
|
"step": 7171 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_bleu": 0.12133891034082767, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.022, |
|
"step": 7171 |
|
}, |
|
{ |
|
"epoch": 21.08, |
|
"learning_rate": 3e-05, |
|
"loss": 0.6149, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 21.38, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5669, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 21.67, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5923, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 21.96, |
|
"learning_rate": 3e-05, |
|
"loss": 0.599, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.4418974358974359, |
|
"eval_loss": 4.253176689147949, |
|
"eval_runtime": 4.4121, |
|
"eval_samples_per_second": 113.326, |
|
"eval_steps_per_second": 14.279, |
|
"step": 7513 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_bleu": 0.1179919389999147, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.02, |
|
"step": 7513 |
|
}, |
|
{ |
|
"epoch": 22.25, |
|
"learning_rate": 3e-05, |
|
"loss": 0.536, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 22.55, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5479, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 22.84, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5625, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.443025641025641, |
|
"eval_loss": 4.293737411499023, |
|
"eval_runtime": 4.4185, |
|
"eval_samples_per_second": 113.162, |
|
"eval_steps_per_second": 14.258, |
|
"step": 7854 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_bleu": 0.10699095482247514, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.022, |
|
"step": 7854 |
|
}, |
|
{ |
|
"epoch": 23.13, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5409, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 23.43, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5067, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 23.72, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5267, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.44153846153846155, |
|
"eval_loss": 4.254815578460693, |
|
"eval_runtime": 4.2068, |
|
"eval_samples_per_second": 118.854, |
|
"eval_steps_per_second": 14.976, |
|
"step": 8196 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_bleu": 0.10975165733539657, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.024, |
|
"step": 8196 |
|
}, |
|
{ |
|
"epoch": 24.01, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5363, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 24.3, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4806, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 24.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.49, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 24.89, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5004, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.44035897435897436, |
|
"eval_loss": 4.332499027252197, |
|
"eval_runtime": 4.4052, |
|
"eval_samples_per_second": 113.503, |
|
"eval_steps_per_second": 14.301, |
|
"step": 8537 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_bleu": 0.10401238605841524, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.024, |
|
"step": 8537 |
|
}, |
|
{ |
|
"epoch": 25.18, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4746, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 25.48, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4596, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 25.77, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4681, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.43964102564102564, |
|
"eval_loss": 4.3162150382995605, |
|
"eval_runtime": 4.5151, |
|
"eval_samples_per_second": 110.739, |
|
"eval_steps_per_second": 13.953, |
|
"step": 8879 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_bleu": 0.10710979645106765, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.026, |
|
"step": 8879 |
|
}, |
|
{ |
|
"epoch": 26.06, |
|
"learning_rate": 3e-05, |
|
"loss": 0.459, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 26.35, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4242, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 26.65, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4445, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 26.94, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4453, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.4388205128205128, |
|
"eval_loss": 4.377138137817383, |
|
"eval_runtime": 4.8206, |
|
"eval_samples_per_second": 103.722, |
|
"eval_steps_per_second": 13.069, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_bleu": 0.10010720174849254, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.026, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 27.23, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4066, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 27.53, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4153, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 27.82, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4161, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.43861538461538463, |
|
"eval_loss": 4.405981063842773, |
|
"eval_runtime": 4.4101, |
|
"eval_samples_per_second": 113.376, |
|
"eval_steps_per_second": 14.285, |
|
"step": 9562 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_bleu": 0.10423428286785764, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.026, |
|
"step": 9562 |
|
}, |
|
{ |
|
"epoch": 28.11, |
|
"learning_rate": 3e-05, |
|
"loss": 0.398, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 28.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3788, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 28.7, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3943, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3994, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.4376923076923077, |
|
"eval_loss": 4.468777656555176, |
|
"eval_runtime": 4.2025, |
|
"eval_samples_per_second": 118.976, |
|
"eval_steps_per_second": 14.991, |
|
"step": 9903 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_bleu": 0.08529933417800747, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.02, |
|
"step": 9903 |
|
}, |
|
{ |
|
"epoch": 29.28, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3519, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 29.58, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3643, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 29.87, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3695, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.4376923076923077, |
|
"eval_loss": 4.464532375335693, |
|
"eval_runtime": 4.164, |
|
"eval_samples_per_second": 120.078, |
|
"eval_steps_per_second": 15.13, |
|
"step": 10245 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_bleu": 0.08562517234751162, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.026, |
|
"step": 10245 |
|
}, |
|
{ |
|
"epoch": 30.16, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3474, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 30.45, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3327, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 30.75, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3505, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.4377948717948718, |
|
"eval_loss": 4.462403297424316, |
|
"eval_runtime": 4.2457, |
|
"eval_samples_per_second": 117.767, |
|
"eval_steps_per_second": 14.839, |
|
"step": 10586 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_bleu": 0.09623161231398927, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.022, |
|
"step": 10586 |
|
}, |
|
{ |
|
"epoch": 31.04, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3476, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 31.33, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3057, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 31.63, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3224, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 31.92, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3342, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.43646153846153846, |
|
"eval_loss": 4.46300745010376, |
|
"eval_runtime": 4.5326, |
|
"eval_samples_per_second": 110.312, |
|
"eval_steps_per_second": 13.899, |
|
"step": 10928 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_bleu": 0.09571435454261526, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.02, |
|
"step": 10928 |
|
}, |
|
{ |
|
"epoch": 32.21, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2943, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 32.5, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3021, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 32.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3075, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.4342051282051282, |
|
"eval_loss": 4.544379711151123, |
|
"eval_runtime": 4.4078, |
|
"eval_samples_per_second": 113.435, |
|
"eval_steps_per_second": 14.293, |
|
"step": 11269 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_bleu": 0.11219607728661593, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.016, |
|
"step": 11269 |
|
}, |
|
{ |
|
"epoch": 33.09, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3047, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 33.38, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2764, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 33.67, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2833, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 33.97, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2949, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.43441025641025643, |
|
"eval_loss": 4.548118591308594, |
|
"eval_runtime": 4.4066, |
|
"eval_samples_per_second": 113.466, |
|
"eval_steps_per_second": 14.297, |
|
"step": 11611 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_bleu": 0.10059053111753888, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.018, |
|
"step": 11611 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 17050, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 2.2991334851571876e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|