|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 18.998535871156662, |
|
"eval_steps": 500, |
|
"global_step": 6488, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3e-05, |
|
"loss": 1.574, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3e-05, |
|
"loss": 1.474, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3e-05, |
|
"loss": 1.4104, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.4536923076923077, |
|
"eval_loss": 3.3574578762054443, |
|
"eval_runtime": 4.4299, |
|
"eval_samples_per_second": 112.869, |
|
"eval_steps_per_second": 14.221, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.08840557310176213, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.058, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3e-05, |
|
"loss": 1.3889, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3e-05, |
|
"loss": 1.387, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3e-05, |
|
"loss": 1.389, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.4543589743589744, |
|
"eval_loss": 3.4179794788360596, |
|
"eval_runtime": 4.1997, |
|
"eval_samples_per_second": 119.055, |
|
"eval_steps_per_second": 15.001, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.09820569565497561, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.056, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 3e-05, |
|
"loss": 1.3725, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 3e-05, |
|
"loss": 1.3588, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3e-05, |
|
"loss": 1.348, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 3e-05, |
|
"loss": 1.3414, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.45476923076923076, |
|
"eval_loss": 3.511923313140869, |
|
"eval_runtime": 4.1985, |
|
"eval_samples_per_second": 119.089, |
|
"eval_steps_per_second": 15.005, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.09889831624589397, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.056, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 3e-05, |
|
"loss": 1.3166, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 3e-05, |
|
"loss": 1.3035, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 3e-05, |
|
"loss": 1.3002, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.45543589743589746, |
|
"eval_loss": 3.5288071632385254, |
|
"eval_runtime": 4.4066, |
|
"eval_samples_per_second": 113.466, |
|
"eval_steps_per_second": 14.297, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.11209345982903633, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.054, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2929, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2693, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2697, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2574, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.45394871794871794, |
|
"eval_loss": 3.6893365383148193, |
|
"eval_runtime": 4.1122, |
|
"eval_samples_per_second": 121.591, |
|
"eval_steps_per_second": 15.32, |
|
"step": 1707 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.12076940663377221, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.056, |
|
"step": 1707 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2097, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2188, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2258, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.45615384615384613, |
|
"eval_loss": 3.725893497467041, |
|
"eval_runtime": 4.0989, |
|
"eval_samples_per_second": 121.984, |
|
"eval_steps_per_second": 15.37, |
|
"step": 2049 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.11884272898137646, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.058, |
|
"step": 2049 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 3e-05, |
|
"loss": 1.205, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 3e-05, |
|
"loss": 1.1643, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 3e-05, |
|
"loss": 1.1844, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.45594871794871794, |
|
"eval_loss": 3.724449396133423, |
|
"eval_runtime": 4.8158, |
|
"eval_samples_per_second": 103.825, |
|
"eval_steps_per_second": 13.082, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 0.12407896336995003, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.048, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 3e-05, |
|
"loss": 1.1771, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 3e-05, |
|
"loss": 1.1218, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 3e-05, |
|
"loss": 1.1491, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 3e-05, |
|
"loss": 1.1363, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.4543589743589744, |
|
"eval_loss": 3.8138701915740967, |
|
"eval_runtime": 4.1963, |
|
"eval_samples_per_second": 119.152, |
|
"eval_steps_per_second": 15.013, |
|
"step": 2732 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 0.11947629327387163, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.046, |
|
"step": 2732 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0858, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0951, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0903, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.4524102564102564, |
|
"eval_loss": 3.9115548133850098, |
|
"eval_runtime": 4.4011, |
|
"eval_samples_per_second": 113.609, |
|
"eval_steps_per_second": 14.315, |
|
"step": 3073 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 0.13426062809240774, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.034, |
|
"step": 3073 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0822, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0414, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0563, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0538, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.4515897435897436, |
|
"eval_loss": 3.92203426361084, |
|
"eval_runtime": 4.4105, |
|
"eval_samples_per_second": 113.366, |
|
"eval_steps_per_second": 14.284, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 0.11780869016319889, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.032, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 10.25, |
|
"learning_rate": 3e-05, |
|
"loss": 0.991, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 10.54, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0019, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"learning_rate": 3e-05, |
|
"loss": 0.9971, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.45143589743589746, |
|
"eval_loss": 3.967252492904663, |
|
"eval_runtime": 4.1003, |
|
"eval_samples_per_second": 121.943, |
|
"eval_steps_per_second": 15.365, |
|
"step": 3756 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bleu": 0.12407049349871409, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.026, |
|
"step": 3756 |
|
}, |
|
{ |
|
"epoch": 11.13, |
|
"learning_rate": 3e-05, |
|
"loss": 0.9861, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 11.42, |
|
"learning_rate": 3e-05, |
|
"loss": 0.9436, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 11.71, |
|
"learning_rate": 3e-05, |
|
"loss": 0.9699, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.45076923076923076, |
|
"eval_loss": 4.033609867095947, |
|
"eval_runtime": 4.8103, |
|
"eval_samples_per_second": 103.943, |
|
"eval_steps_per_second": 13.097, |
|
"step": 4098 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bleu": 0.12190601380876163, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.026, |
|
"step": 4098 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 3e-05, |
|
"loss": 0.9783, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 12.3, |
|
"learning_rate": 3e-05, |
|
"loss": 0.9064, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 12.59, |
|
"learning_rate": 3e-05, |
|
"loss": 0.9178, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 12.88, |
|
"learning_rate": 3e-05, |
|
"loss": 0.9235, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.4492820512820513, |
|
"eval_loss": 4.002023696899414, |
|
"eval_runtime": 4.8204, |
|
"eval_samples_per_second": 103.726, |
|
"eval_steps_per_second": 13.069, |
|
"step": 4439 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_bleu": 0.13115986163477472, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.026, |
|
"step": 4439 |
|
}, |
|
{ |
|
"epoch": 13.18, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8902, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 13.47, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8703, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 13.76, |
|
"learning_rate": 3e-05, |
|
"loss": 0.891, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.44774358974358974, |
|
"eval_loss": 4.071566104888916, |
|
"eval_runtime": 4.8316, |
|
"eval_samples_per_second": 103.485, |
|
"eval_steps_per_second": 13.039, |
|
"step": 4781 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_bleu": 0.12164300660650715, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.024, |
|
"step": 4781 |
|
}, |
|
{ |
|
"epoch": 14.06, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8698, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8338, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 14.64, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8362, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 3e-05, |
|
"loss": 0.845, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.44774358974358974, |
|
"eval_loss": 4.09920597076416, |
|
"eval_runtime": 4.4174, |
|
"eval_samples_per_second": 113.19, |
|
"eval_steps_per_second": 14.262, |
|
"step": 5122 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_bleu": 0.1224977607294519, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.02, |
|
"step": 5122 |
|
}, |
|
{ |
|
"epoch": 15.23, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7981, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 15.52, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7978, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 15.81, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8009, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.4464102564102564, |
|
"eval_loss": 4.093270301818848, |
|
"eval_runtime": 4.102, |
|
"eval_samples_per_second": 121.892, |
|
"eval_steps_per_second": 15.358, |
|
"step": 5464 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bleu": 0.12201237729153987, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.022, |
|
"step": 5464 |
|
}, |
|
{ |
|
"epoch": 16.11, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7948, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 16.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.746, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 16.69, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7627, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"learning_rate": 3e-05, |
|
"loss": 0.782, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.44671794871794873, |
|
"eval_loss": 4.1283488273620605, |
|
"eval_runtime": 4.84, |
|
"eval_samples_per_second": 103.305, |
|
"eval_steps_per_second": 13.016, |
|
"step": 5805 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_bleu": 0.1263852710208009, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.02, |
|
"step": 5805 |
|
}, |
|
{ |
|
"epoch": 17.28, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7142, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 17.57, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7253, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 17.86, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7294, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.44564102564102565, |
|
"eval_loss": 4.164257049560547, |
|
"eval_runtime": 4.1037, |
|
"eval_samples_per_second": 121.84, |
|
"eval_steps_per_second": 15.352, |
|
"step": 6147 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_bleu": 0.10715019125675011, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.022, |
|
"step": 6147 |
|
}, |
|
{ |
|
"epoch": 18.16, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7055, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 18.45, |
|
"learning_rate": 3e-05, |
|
"loss": 0.672, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 18.74, |
|
"learning_rate": 3e-05, |
|
"loss": 0.6792, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.44487179487179485, |
|
"eval_loss": 4.185911655426025, |
|
"eval_runtime": 4.823, |
|
"eval_samples_per_second": 103.67, |
|
"eval_steps_per_second": 13.062, |
|
"step": 6488 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_bleu": 0.11314440439431668, |
|
"eval_exact_match": 0.0, |
|
"eval_prefix_exact_match": 0.02, |
|
"step": 6488 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 17050, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 1.2847918834274796e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|