{ "best_metric": null, "best_model_checkpoint": null, "epoch": 25.0, "global_step": 2350, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 6.666666666666667e-05, "loss": 3.4338, "step": 94 }, { "epoch": 1.0, "eval_loss": 2.4137182235717773, "eval_runtime": 36.2713, "eval_samples_per_second": 5.514, "eval_steps_per_second": 0.689, "step": 94 }, { "epoch": 2.0, "learning_rate": 3.3333333333333335e-05, "loss": 2.9565, "step": 188 }, { "epoch": 2.0, "eval_loss": 2.173758029937744, "eval_runtime": 35.902, "eval_samples_per_second": 5.571, "eval_steps_per_second": 0.696, "step": 188 }, { "epoch": 3.0, "learning_rate": 0.0, "loss": 2.7101, "step": 282 }, { "epoch": 3.0, "eval_loss": 2.012174606323242, "eval_runtime": 35.6264, "eval_samples_per_second": 5.614, "eval_steps_per_second": 0.702, "step": 282 }, { "epoch": 4.0, "learning_rate": 4.2857142857142856e-05, "loss": 2.7515, "step": 376 }, { "epoch": 4.0, "eval_loss": 1.964645504951477, "eval_runtime": 35.4193, "eval_samples_per_second": 5.647, "eval_steps_per_second": 0.706, "step": 376 }, { "epoch": 5.0, "learning_rate": 2.857142857142857e-05, "loss": 2.724, "step": 470 }, { "epoch": 5.0, "eval_loss": 2.1284220218658447, "eval_runtime": 34.8555, "eval_samples_per_second": 5.738, "eval_steps_per_second": 0.717, "step": 470 }, { "epoch": 6.0, "learning_rate": 1.4285714285714285e-05, "loss": 2.6193, "step": 564 }, { "epoch": 6.0, "eval_loss": 1.9379758834838867, "eval_runtime": 36.3088, "eval_samples_per_second": 5.508, "eval_steps_per_second": 0.689, "step": 564 }, { "epoch": 7.0, "learning_rate": 0.0, "loss": 2.5032, "step": 658 }, { "epoch": 7.0, "eval_loss": 1.9285995960235596, "eval_runtime": 35.9858, "eval_samples_per_second": 5.558, "eval_steps_per_second": 0.695, "step": 658 }, { "epoch": 8.0, "learning_rate": 4.666666666666667e-05, "loss": 2.5342, "step": 752 }, { "epoch": 8.0, "eval_loss": 1.9365949630737305, "eval_runtime": 35.1567, "eval_samples_per_second": 5.689, "eval_steps_per_second": 0.711, "step": 752 }, { "epoch": 9.0, "learning_rate": 4e-05, "loss": 2.5519, "step": 846 }, { "epoch": 9.0, "eval_loss": 1.9736474752426147, "eval_runtime": 35.9727, "eval_samples_per_second": 5.56, "eval_steps_per_second": 0.695, "step": 846 }, { "epoch": 10.0, "learning_rate": 3.3333333333333335e-05, "loss": 2.4988, "step": 940 }, { "epoch": 10.0, "eval_loss": 1.881581425666809, "eval_runtime": 35.6769, "eval_samples_per_second": 5.606, "eval_steps_per_second": 0.701, "step": 940 }, { "epoch": 11.0, "learning_rate": 2.6666666666666667e-05, "loss": 2.5101, "step": 1034 }, { "epoch": 11.0, "eval_loss": 1.8453679084777832, "eval_runtime": 33.4013, "eval_samples_per_second": 5.988, "eval_steps_per_second": 0.748, "step": 1034 }, { "epoch": 12.0, "learning_rate": 2e-05, "loss": 2.4441, "step": 1128 }, { "epoch": 12.0, "eval_loss": 1.8143038749694824, "eval_runtime": 34.7625, "eval_samples_per_second": 5.753, "eval_steps_per_second": 0.719, "step": 1128 }, { "epoch": 13.0, "learning_rate": 1.3333333333333333e-05, "loss": 2.3857, "step": 1222 }, { "epoch": 13.0, "eval_loss": 1.7919152975082397, "eval_runtime": 33.975, "eval_samples_per_second": 5.887, "eval_steps_per_second": 0.736, "step": 1222 }, { "epoch": 14.0, "learning_rate": 6.666666666666667e-06, "loss": 2.2877, "step": 1316 }, { "epoch": 14.0, "eval_loss": 1.7400457859039307, "eval_runtime": 34.7007, "eval_samples_per_second": 5.764, "eval_steps_per_second": 0.72, "step": 1316 }, { "epoch": 15.0, "learning_rate": 0.0, "loss": 2.3013, "step": 1410 }, { "epoch": 15.0, "eval_loss": 1.7408628463745117, "eval_runtime": 34.5398, "eval_samples_per_second": 5.79, "eval_steps_per_second": 0.724, "step": 1410 }, { "epoch": 16.0, "learning_rate": 2e-05, "loss": 2.3134, "step": 1504 }, { "epoch": 16.0, "eval_loss": 1.7698218822479248, "eval_runtime": 34.7625, "eval_samples_per_second": 5.753, "eval_steps_per_second": 0.719, "step": 1504 }, { "epoch": 17.0, "learning_rate": 1.5e-05, "loss": 2.3423, "step": 1598 }, { "epoch": 17.0, "eval_loss": 1.7581219673156738, "eval_runtime": 35.8583, "eval_samples_per_second": 5.578, "eval_steps_per_second": 0.697, "step": 1598 }, { "epoch": 18.0, "learning_rate": 1e-05, "loss": 2.3536, "step": 1692 }, { "epoch": 18.0, "eval_loss": 1.7658276557922363, "eval_runtime": 35.6301, "eval_samples_per_second": 5.613, "eval_steps_per_second": 0.702, "step": 1692 }, { "epoch": 19.0, "learning_rate": 5e-06, "loss": 2.2957, "step": 1786 }, { "epoch": 19.0, "eval_loss": 1.7328851222991943, "eval_runtime": 36.2644, "eval_samples_per_second": 5.515, "eval_steps_per_second": 0.689, "step": 1786 }, { "epoch": 20.0, "learning_rate": 0.0, "loss": 2.274, "step": 1880 }, { "epoch": 20.0, "eval_loss": 1.7334604263305664, "eval_runtime": 34.2732, "eval_samples_per_second": 5.835, "eval_steps_per_second": 0.729, "step": 1880 }, { "epoch": 21.0, "learning_rate": 1.6000000000000003e-05, "loss": 2.2906, "step": 1974 }, { "epoch": 21.0, "eval_loss": 1.734320878982544, "eval_runtime": 34.5789, "eval_samples_per_second": 5.784, "eval_steps_per_second": 0.723, "step": 1974 }, { "epoch": 22.0, "learning_rate": 1.2e-05, "loss": 2.2492, "step": 2068 }, { "epoch": 22.0, "eval_loss": 1.7080261707305908, "eval_runtime": 35.1218, "eval_samples_per_second": 5.694, "eval_steps_per_second": 0.712, "step": 2068 }, { "epoch": 23.0, "learning_rate": 8.000000000000001e-06, "loss": 2.2516, "step": 2162 }, { "epoch": 23.0, "eval_loss": 1.718016505241394, "eval_runtime": 34.8144, "eval_samples_per_second": 5.745, "eval_steps_per_second": 0.718, "step": 2162 }, { "epoch": 24.0, "learning_rate": 4.000000000000001e-06, "loss": 2.2574, "step": 2256 }, { "epoch": 24.0, "eval_loss": 1.7081444263458252, "eval_runtime": 34.6174, "eval_samples_per_second": 5.777, "eval_steps_per_second": 0.722, "step": 2256 }, { "epoch": 25.0, "learning_rate": 0.0, "loss": 2.2508, "step": 2350 }, { "epoch": 25.0, "eval_loss": 1.7065184116363525, "eval_runtime": 34.9055, "eval_samples_per_second": 5.73, "eval_steps_per_second": 0.716, "step": 2350 }, { "epoch": 25.0, "step": 2350, "total_flos": 8.960483088e+18, "train_loss": 0.45198433734001, "train_runtime": 1195.2772, "train_samples_per_second": 15.687, "train_steps_per_second": 1.966 } ], "max_steps": 2350, "num_train_epochs": 25, "total_flos": 8.960483088e+18, "trial_name": null, "trial_params": null }