{ "best_metric": null, "best_model_checkpoint": null, "epoch": 6.578947368421053, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.21, "learning_rate": 0.0002962686567164179, "loss": 1.5814, "step": 250 }, { "epoch": 0.21, "eval_loss": 0.0006077612633816898, "eval_runtime": 128.8821, "eval_samples_per_second": 75.48, "eval_steps_per_second": 2.359, "step": 250 }, { "epoch": 0.41, "learning_rate": 0.00029004975124378106, "loss": 0.0051, "step": 500 }, { "epoch": 0.41, "eval_loss": 0.0004873188154306263, "eval_runtime": 129.1791, "eval_samples_per_second": 75.306, "eval_steps_per_second": 2.353, "step": 500 }, { "epoch": 0.62, "learning_rate": 0.00028383084577114425, "loss": 0.0033, "step": 750 }, { "epoch": 0.62, "eval_loss": 0.0002883475972339511, "eval_runtime": 128.9078, "eval_samples_per_second": 75.465, "eval_steps_per_second": 2.358, "step": 750 }, { "epoch": 0.82, "learning_rate": 0.00027761194029850744, "loss": 0.0026, "step": 1000 }, { "epoch": 0.82, "eval_loss": 0.00016567455895710737, "eval_runtime": 128.8984, "eval_samples_per_second": 75.47, "eval_steps_per_second": 2.358, "step": 1000 }, { "epoch": 1.03, "learning_rate": 0.00027139303482587063, "loss": 0.002, "step": 1250 }, { "epoch": 1.03, "eval_loss": 0.0001291305961785838, "eval_runtime": 128.8976, "eval_samples_per_second": 75.471, "eval_steps_per_second": 2.358, "step": 1250 }, { "epoch": 1.23, "learning_rate": 0.0002651741293532338, "loss": 0.0016, "step": 1500 }, { "epoch": 1.23, "eval_loss": 0.00010879799810936674, "eval_runtime": 128.8894, "eval_samples_per_second": 75.476, "eval_steps_per_second": 2.359, "step": 1500 }, { "epoch": 1.44, "learning_rate": 0.000258955223880597, "loss": 0.0014, "step": 1750 }, { "epoch": 1.44, "eval_loss": 0.00010257180110784248, "eval_runtime": 129.1985, "eval_samples_per_second": 75.295, "eval_steps_per_second": 2.353, "step": 1750 }, { "epoch": 1.64, "learning_rate": 0.0002527363184079602, "loss": 0.0012, "step": 2000 }, { "epoch": 1.64, "eval_loss": 6.881527951918542e-05, "eval_runtime": 129.1934, "eval_samples_per_second": 75.298, "eval_steps_per_second": 2.353, "step": 2000 }, { "epoch": 1.85, "learning_rate": 0.00024651741293532333, "loss": 0.0016, "step": 2250 }, { "epoch": 1.85, "eval_loss": 7.058009214233607e-05, "eval_runtime": 128.8993, "eval_samples_per_second": 75.47, "eval_steps_per_second": 2.358, "step": 2250 }, { "epoch": 2.06, "learning_rate": 0.00024029850746268655, "loss": 0.001, "step": 2500 }, { "epoch": 2.06, "eval_loss": 6.400555139407516e-05, "eval_runtime": 128.8789, "eval_samples_per_second": 75.482, "eval_steps_per_second": 2.359, "step": 2500 }, { "epoch": 2.26, "learning_rate": 0.0002340796019900497, "loss": 0.001, "step": 2750 }, { "epoch": 2.26, "eval_loss": 6.463182216975838e-05, "eval_runtime": 128.8854, "eval_samples_per_second": 75.478, "eval_steps_per_second": 2.359, "step": 2750 }, { "epoch": 2.47, "learning_rate": 0.00022786069651741292, "loss": 0.0012, "step": 3000 }, { "epoch": 2.47, "eval_loss": 0.0001349268713966012, "eval_runtime": 128.8866, "eval_samples_per_second": 75.477, "eval_steps_per_second": 2.359, "step": 3000 }, { "epoch": 2.67, "learning_rate": 0.0002216417910447761, "loss": 0.0009, "step": 3250 }, { "epoch": 2.67, "eval_loss": 6.4761312387418e-05, "eval_runtime": 129.1959, "eval_samples_per_second": 75.296, "eval_steps_per_second": 2.353, "step": 3250 }, { "epoch": 2.88, "learning_rate": 0.00021542288557213927, "loss": 0.0009, "step": 3500 }, { "epoch": 2.88, "eval_loss": 5.111826249049045e-05, "eval_runtime": 128.8809, "eval_samples_per_second": 75.481, "eval_steps_per_second": 2.359, "step": 3500 }, { "epoch": 3.08, "learning_rate": 0.00020920398009950246, "loss": 0.0007, "step": 3750 }, { "epoch": 3.08, "eval_loss": 6.521103205159307e-05, "eval_runtime": 128.9, "eval_samples_per_second": 75.469, "eval_steps_per_second": 2.358, "step": 3750 }, { "epoch": 3.29, "learning_rate": 0.00020298507462686565, "loss": 0.0007, "step": 4000 }, { "epoch": 3.29, "eval_loss": 4.892368451692164e-05, "eval_runtime": 128.8971, "eval_samples_per_second": 75.471, "eval_steps_per_second": 2.358, "step": 4000 }, { "epoch": 3.5, "learning_rate": 0.00019676616915422884, "loss": 0.0006, "step": 4250 }, { "epoch": 3.5, "eval_loss": 4.084123429493047e-05, "eval_runtime": 69.0191, "eval_samples_per_second": 140.947, "eval_steps_per_second": 4.405, "step": 4250 }, { "epoch": 3.7, "learning_rate": 0.00019054726368159203, "loss": 0.0006, "step": 4500 }, { "epoch": 3.7, "eval_loss": 6.229727296158671e-05, "eval_runtime": 83.9466, "eval_samples_per_second": 115.883, "eval_steps_per_second": 3.621, "step": 4500 }, { "epoch": 3.91, "learning_rate": 0.00018432835820895522, "loss": 0.0008, "step": 4750 }, { "epoch": 3.91, "eval_loss": 4.437619645614177e-05, "eval_runtime": 129.1502, "eval_samples_per_second": 75.323, "eval_steps_per_second": 2.354, "step": 4750 }, { "epoch": 4.11, "learning_rate": 0.00017810945273631838, "loss": 0.0008, "step": 5000 }, { "epoch": 4.11, "eval_loss": 4.2610481614246964e-05, "eval_runtime": 128.7805, "eval_samples_per_second": 75.539, "eval_steps_per_second": 2.361, "step": 5000 }, { "epoch": 4.32, "learning_rate": 0.00017189054726368157, "loss": 0.0005, "step": 5250 }, { "epoch": 4.32, "eval_loss": 7.266196189448237e-05, "eval_runtime": 128.7609, "eval_samples_per_second": 75.551, "eval_steps_per_second": 2.361, "step": 5250 }, { "epoch": 4.52, "learning_rate": 0.00016567164179104478, "loss": 0.0006, "step": 5500 }, { "epoch": 4.52, "eval_loss": 3.264641054556705e-05, "eval_runtime": 128.7913, "eval_samples_per_second": 75.533, "eval_steps_per_second": 2.36, "step": 5500 }, { "epoch": 4.73, "learning_rate": 0.00015945273631840794, "loss": 0.0005, "step": 5750 }, { "epoch": 4.73, "eval_loss": 2.6435563995619304e-05, "eval_runtime": 128.7891, "eval_samples_per_second": 75.534, "eval_steps_per_second": 2.36, "step": 5750 }, { "epoch": 4.93, "learning_rate": 0.00015323383084577113, "loss": 0.0006, "step": 6000 }, { "epoch": 4.93, "eval_loss": 4.4951906602364033e-05, "eval_runtime": 129.1622, "eval_samples_per_second": 75.316, "eval_steps_per_second": 2.354, "step": 6000 }, { "epoch": 5.14, "learning_rate": 0.00014701492537313432, "loss": 0.0005, "step": 6250 }, { "epoch": 5.14, "eval_loss": 3.5103537811664864e-05, "eval_runtime": 128.7717, "eval_samples_per_second": 75.545, "eval_steps_per_second": 2.361, "step": 6250 }, { "epoch": 5.35, "learning_rate": 0.00014079601990049748, "loss": 0.0004, "step": 6500 }, { "epoch": 5.35, "eval_loss": 3.251100133638829e-05, "eval_runtime": 128.8099, "eval_samples_per_second": 75.522, "eval_steps_per_second": 2.36, "step": 6500 }, { "epoch": 5.55, "learning_rate": 0.0001345771144278607, "loss": 0.0004, "step": 6750 }, { "epoch": 5.55, "eval_loss": 3.5094544728053734e-05, "eval_runtime": 128.8083, "eval_samples_per_second": 75.523, "eval_steps_per_second": 2.36, "step": 6750 }, { "epoch": 5.76, "learning_rate": 0.00012835820895522386, "loss": 0.0005, "step": 7000 }, { "epoch": 5.76, "eval_loss": 3.747216032934375e-05, "eval_runtime": 128.7944, "eval_samples_per_second": 75.531, "eval_steps_per_second": 2.36, "step": 7000 }, { "epoch": 5.96, "learning_rate": 0.00012213930348258705, "loss": 0.0004, "step": 7250 }, { "epoch": 5.96, "eval_loss": 2.8431584723875858e-05, "eval_runtime": 129.1362, "eval_samples_per_second": 75.331, "eval_steps_per_second": 2.354, "step": 7250 }, { "epoch": 6.17, "learning_rate": 0.00011592039800995025, "loss": 0.0003, "step": 7500 }, { "epoch": 6.17, "eval_loss": 2.733626024564728e-05, "eval_runtime": 128.9297, "eval_samples_per_second": 75.452, "eval_steps_per_second": 2.358, "step": 7500 }, { "epoch": 6.37, "learning_rate": 0.00010970149253731342, "loss": 0.0004, "step": 7750 }, { "epoch": 6.37, "eval_loss": 2.5848928999039344e-05, "eval_runtime": 128.8142, "eval_samples_per_second": 75.52, "eval_steps_per_second": 2.36, "step": 7750 }, { "epoch": 6.58, "learning_rate": 0.0001034825870646766, "loss": 0.0003, "step": 8000 }, { "epoch": 6.58, "eval_loss": 3.0041233912925236e-05, "eval_runtime": 128.8146, "eval_samples_per_second": 75.519, "eval_steps_per_second": 2.36, "step": 8000 } ], "max_steps": 12160, "num_train_epochs": 10, "total_flos": 4.757334967871078e+16, "trial_name": null, "trial_params": null }