{ "best_metric": 44.8653, "best_model_checkpoint": "xl_ox-wn_cod_20ep_grad-acc/checkpoint-27400", "epoch": 20.0, "global_step": 27400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.75e-05, "loss": 1.8703, "step": 1370 }, { "epoch": 1.0, "eval_gen_len": 11.332355886139322, "eval_loss": 1.6332706212997437, "eval_rouge1": 32.6985, "eval_rouge2": 12.1019, "eval_rougeL": 30.2866, "eval_rougeLsum": 30.3052, "eval_runtime": 298.865, "eval_samples_per_second": 46.784, "eval_steps_per_second": 1.462, "step": 1370 }, { "epoch": 2.0, "learning_rate": 4.5e-05, "loss": 1.5949, "step": 2740 }, { "epoch": 2.0, "eval_gen_len": 11.296452581891003, "eval_loss": 1.5355325937271118, "eval_rouge1": 34.2008, "eval_rouge2": 13.6349, "eval_rougeL": 31.7662, "eval_rougeLsum": 31.7895, "eval_runtime": 299.2079, "eval_samples_per_second": 46.73, "eval_steps_per_second": 1.461, "step": 2740 }, { "epoch": 3.0, "learning_rate": 4.25e-05, "loss": 1.4162, "step": 4110 }, { "epoch": 3.0, "eval_gen_len": 11.445930482048349, "eval_loss": 1.4717226028442383, "eval_rouge1": 35.9397, "eval_rouge2": 15.1721, "eval_rougeL": 33.3504, "eval_rougeLsum": 33.3788, "eval_runtime": 307.2438, "eval_samples_per_second": 45.508, "eval_steps_per_second": 1.422, "step": 4110 }, { "epoch": 4.0, "learning_rate": 4e-05, "loss": 1.2689, "step": 5480 }, { "epoch": 4.0, "eval_gen_len": 11.853382920898298, "eval_loss": 1.4403611421585083, "eval_rouge1": 37.206, "eval_rouge2": 16.7488, "eval_rougeL": 34.5745, "eval_rougeLsum": 34.5997, "eval_runtime": 302.3654, "eval_samples_per_second": 46.242, "eval_steps_per_second": 1.445, "step": 5480 }, { "epoch": 5.0, "learning_rate": 3.7500000000000003e-05, "loss": 1.1461, "step": 6850 }, { "epoch": 5.0, "eval_gen_len": 11.862322986697182, "eval_loss": 1.43238365650177, "eval_rouge1": 38.4381, "eval_rouge2": 18.2291, "eval_rougeL": 35.7378, "eval_rougeLsum": 35.776, "eval_runtime": 312.8081, "eval_samples_per_second": 44.698, "eval_steps_per_second": 1.397, "step": 6850 }, { "epoch": 6.0, "learning_rate": 3.5e-05, "loss": 1.0438, "step": 8220 }, { "epoch": 6.0, "eval_gen_len": 11.818766986125018, "eval_loss": 1.4202848672866821, "eval_rouge1": 39.5466, "eval_rouge2": 19.5771, "eval_rougeL": 36.8922, "eval_rougeLsum": 36.9236, "eval_runtime": 315.1937, "eval_samples_per_second": 44.36, "eval_steps_per_second": 1.386, "step": 8220 }, { "epoch": 7.0, "learning_rate": 3.2500000000000004e-05, "loss": 0.9541, "step": 9590 }, { "epoch": 7.0, "eval_gen_len": 11.547775711629237, "eval_loss": 1.4285295009613037, "eval_rouge1": 40.3838, "eval_rouge2": 20.4526, "eval_rougeL": 37.7439, "eval_rougeLsum": 37.7834, "eval_runtime": 306.5558, "eval_samples_per_second": 45.61, "eval_steps_per_second": 1.426, "step": 9590 }, { "epoch": 8.0, "learning_rate": 3e-05, "loss": 0.8774, "step": 10960 }, { "epoch": 8.0, "eval_gen_len": 11.85824631669289, "eval_loss": 1.4424927234649658, "eval_rouge1": 41.3562, "eval_rouge2": 21.7414, "eval_rougeL": 38.7297, "eval_rougeLsum": 38.7663, "eval_runtime": 300.481, "eval_samples_per_second": 46.532, "eval_steps_per_second": 1.454, "step": 10960 }, { "epoch": 9.0, "learning_rate": 2.7500000000000004e-05, "loss": 0.8107, "step": 12330 }, { "epoch": 9.0, "eval_gen_len": 11.772207123444428, "eval_loss": 1.4459319114685059, "eval_rouge1": 41.7443, "eval_rouge2": 22.5744, "eval_rougeL": 39.1407, "eval_rougeLsum": 39.1869, "eval_runtime": 297.6627, "eval_samples_per_second": 46.973, "eval_steps_per_second": 1.468, "step": 12330 }, { "epoch": 10.0, "learning_rate": 2.5e-05, "loss": 0.7529, "step": 13700 }, { "epoch": 10.0, "eval_gen_len": 12.04198254899156, "eval_loss": 1.4770309925079346, "eval_rouge1": 42.415, "eval_rouge2": 23.315, "eval_rougeL": 39.8121, "eval_rougeLsum": 39.8443, "eval_runtime": 306.0301, "eval_samples_per_second": 45.688, "eval_steps_per_second": 1.428, "step": 13700 }, { "epoch": 11.0, "learning_rate": 2.25e-05, "loss": 0.7021, "step": 15070 }, { "epoch": 11.0, "eval_gen_len": 12.064940637963096, "eval_loss": 1.5062141418457031, "eval_rouge1": 42.8528, "eval_rouge2": 24.038, "eval_rougeL": 40.3049, "eval_rougeLsum": 40.3369, "eval_runtime": 315.4749, "eval_samples_per_second": 44.32, "eval_steps_per_second": 1.385, "step": 15070 }, { "epoch": 12.0, "learning_rate": 2e-05, "loss": 0.6572, "step": 16440 }, { "epoch": 12.0, "eval_gen_len": 12.163925046488343, "eval_loss": 1.53498375415802, "eval_rouge1": 43.2449, "eval_rouge2": 24.4856, "eval_rougeL": 40.5908, "eval_rougeLsum": 40.6058, "eval_runtime": 305.4599, "eval_samples_per_second": 45.774, "eval_steps_per_second": 1.431, "step": 16440 }, { "epoch": 13.0, "learning_rate": 1.75e-05, "loss": 0.6182, "step": 17810 }, { "epoch": 13.0, "eval_gen_len": 12.19503647546846, "eval_loss": 1.5605404376983643, "eval_rouge1": 43.6881, "eval_rouge2": 25.1178, "eval_rougeL": 41.0796, "eval_rougeLsum": 41.1027, "eval_runtime": 313.2658, "eval_samples_per_second": 44.633, "eval_steps_per_second": 1.395, "step": 17810 }, { "epoch": 14.0, "learning_rate": 1.5e-05, "loss": 0.5855, "step": 19180 }, { "epoch": 14.0, "eval_gen_len": 12.149549420683737, "eval_loss": 1.595506191253662, "eval_rouge1": 43.9584, "eval_rouge2": 25.3084, "eval_rougeL": 41.3372, "eval_rougeLsum": 41.3654, "eval_runtime": 304.541, "eval_samples_per_second": 45.912, "eval_steps_per_second": 1.435, "step": 19180 }, { "epoch": 15.0, "learning_rate": 1.25e-05, "loss": 0.5569, "step": 20550 }, { "epoch": 15.0, "eval_gen_len": 12.273208410813904, "eval_loss": 1.6153297424316406, "eval_rouge1": 44.1726, "eval_rouge2": 25.6522, "eval_rougeL": 41.5898, "eval_rougeLsum": 41.6245, "eval_runtime": 303.9159, "eval_samples_per_second": 46.006, "eval_steps_per_second": 1.438, "step": 20550 }, { "epoch": 16.0, "learning_rate": 1e-05, "loss": 0.5329, "step": 21920 }, { "epoch": 16.0, "eval_gen_len": 12.136389643827778, "eval_loss": 1.642225742340088, "eval_rouge1": 44.3992, "eval_rouge2": 26.0818, "eval_rougeL": 41.8869, "eval_rougeLsum": 41.9091, "eval_runtime": 307.8415, "eval_samples_per_second": 45.419, "eval_steps_per_second": 1.42, "step": 21920 }, { "epoch": 17.0, "learning_rate": 7.5e-06, "loss": 0.5121, "step": 23290 }, { "epoch": 17.0, "eval_gen_len": 12.27370905449864, "eval_loss": 1.6686856746673584, "eval_rouge1": 44.6286, "eval_rouge2": 26.3925, "eval_rougeL": 42.0502, "eval_rougeLsum": 42.0707, "eval_runtime": 318.8767, "eval_samples_per_second": 43.848, "eval_steps_per_second": 1.37, "step": 23290 }, { "epoch": 18.0, "learning_rate": 5e-06, "loss": 0.4949, "step": 24660 }, { "epoch": 18.0, "eval_gen_len": 12.227864397081962, "eval_loss": 1.6946537494659424, "eval_rouge1": 44.6441, "eval_rouge2": 26.459, "eval_rougeL": 42.0687, "eval_rougeLsum": 42.0946, "eval_runtime": 303.6837, "eval_samples_per_second": 46.041, "eval_steps_per_second": 1.439, "step": 24660 }, { "epoch": 19.0, "learning_rate": 2.5e-06, "loss": 0.4832, "step": 26030 }, { "epoch": 19.0, "eval_gen_len": 12.23701902446002, "eval_loss": 1.705538034439087, "eval_rouge1": 44.775, "eval_rouge2": 26.6738, "eval_rougeL": 42.2315, "eval_rougeLsum": 42.2558, "eval_runtime": 303.9, "eval_samples_per_second": 46.009, "eval_steps_per_second": 1.438, "step": 26030 }, { "epoch": 20.0, "learning_rate": 0.0, "loss": 0.4758, "step": 27400 }, { "epoch": 20.0, "eval_gen_len": 12.328994421398942, "eval_loss": 1.7153083086013794, "eval_rouge1": 44.8653, "eval_rouge2": 26.7238, "eval_rougeL": 42.2895, "eval_rougeLsum": 42.3168, "eval_runtime": 292.7979, "eval_samples_per_second": 47.753, "eval_steps_per_second": 1.492, "step": 27400 } ], "max_steps": 27400, "num_train_epochs": 20, "total_flos": 3.652924884167688e+18, "trial_name": null, "trial_params": null }