{ "best_metric": null, "best_model_checkpoint": null, "epoch": 24.0, "eval_steps": 500, "global_step": 28368, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.42, "learning_rate": 9.894247038917091e-06, "loss": 3.5932, "step": 500 }, { "epoch": 0.85, "learning_rate": 9.78849407783418e-06, "loss": 3.336, "step": 1000 }, { "epoch": 1.0, "eval_loss": 3.2544538974761963, "eval_runtime": 6.101, "eval_samples_per_second": 41.961, "eval_steps_per_second": 5.245, "step": 1182 }, { "epoch": 1.27, "learning_rate": 9.68274111675127e-06, "loss": 3.2562, "step": 1500 }, { "epoch": 1.69, "learning_rate": 9.57698815566836e-06, "loss": 3.1967, "step": 2000 }, { "epoch": 2.0, "eval_loss": 3.1576380729675293, "eval_runtime": 6.0951, "eval_samples_per_second": 42.001, "eval_steps_per_second": 5.25, "step": 2364 }, { "epoch": 2.12, "learning_rate": 9.47123519458545e-06, "loss": 3.1613, "step": 2500 }, { "epoch": 2.54, "learning_rate": 9.36548223350254e-06, "loss": 3.1268, "step": 3000 }, { "epoch": 2.96, "learning_rate": 9.25972927241963e-06, "loss": 3.1148, "step": 3500 }, { "epoch": 3.0, "eval_loss": 3.1073131561279297, "eval_runtime": 6.0936, "eval_samples_per_second": 42.011, "eval_steps_per_second": 5.251, "step": 3546 }, { "epoch": 3.38, "learning_rate": 9.15397631133672e-06, "loss": 3.0804, "step": 4000 }, { "epoch": 3.81, "learning_rate": 9.048223350253808e-06, "loss": 3.0814, "step": 4500 }, { "epoch": 4.0, "eval_loss": 3.074193000793457, "eval_runtime": 6.0939, "eval_samples_per_second": 42.009, "eval_steps_per_second": 5.251, "step": 4728 }, { "epoch": 4.23, "learning_rate": 8.942470389170898e-06, "loss": 3.054, "step": 5000 }, { "epoch": 4.65, "learning_rate": 8.836717428087988e-06, "loss": 3.0355, "step": 5500 }, { "epoch": 5.0, "eval_loss": 3.04995059967041, "eval_runtime": 6.09, "eval_samples_per_second": 42.036, "eval_steps_per_second": 5.254, "step": 5910 }, { "epoch": 5.08, "learning_rate": 8.730964467005076e-06, "loss": 3.0365, "step": 6000 }, { "epoch": 5.5, "learning_rate": 8.625211505922166e-06, "loss": 3.0104, "step": 6500 }, { "epoch": 5.92, "learning_rate": 8.519458544839256e-06, "loss": 3.0126, "step": 7000 }, { "epoch": 6.0, "eval_loss": 3.0317230224609375, "eval_runtime": 6.0899, "eval_samples_per_second": 42.037, "eval_steps_per_second": 5.255, "step": 7092 }, { "epoch": 6.35, "learning_rate": 8.413705583756346e-06, "loss": 2.9923, "step": 7500 }, { "epoch": 6.77, "learning_rate": 8.307952622673435e-06, "loss": 2.9902, "step": 8000 }, { "epoch": 7.0, "eval_loss": 3.0167293548583984, "eval_runtime": 6.0906, "eval_samples_per_second": 42.032, "eval_steps_per_second": 5.254, "step": 8274 }, { "epoch": 7.19, "learning_rate": 8.202199661590525e-06, "loss": 2.9783, "step": 8500 }, { "epoch": 7.61, "learning_rate": 8.096446700507615e-06, "loss": 2.9722, "step": 9000 }, { "epoch": 8.0, "eval_loss": 3.004361391067505, "eval_runtime": 6.0481, "eval_samples_per_second": 42.327, "eval_steps_per_second": 5.291, "step": 9456 }, { "epoch": 8.04, "learning_rate": 7.990693739424705e-06, "loss": 2.9628, "step": 9500 }, { "epoch": 8.46, "learning_rate": 7.884940778341795e-06, "loss": 2.9593, "step": 10000 }, { "epoch": 8.88, "learning_rate": 7.779187817258885e-06, "loss": 2.9485, "step": 10500 }, { "epoch": 9.0, "eval_loss": 2.9940547943115234, "eval_runtime": 6.1451, "eval_samples_per_second": 41.659, "eval_steps_per_second": 5.207, "step": 10638 }, { "epoch": 9.31, "learning_rate": 7.673434856175973e-06, "loss": 2.9405, "step": 11000 }, { "epoch": 9.73, "learning_rate": 7.567681895093063e-06, "loss": 2.943, "step": 11500 }, { "epoch": 10.0, "eval_loss": 2.9857802391052246, "eval_runtime": 6.2619, "eval_samples_per_second": 40.882, "eval_steps_per_second": 5.11, "step": 11820 }, { "epoch": 10.15, "learning_rate": 7.461928934010153e-06, "loss": 2.9243, "step": 12000 }, { "epoch": 10.58, "learning_rate": 7.356175972927243e-06, "loss": 2.9228, "step": 12500 }, { "epoch": 11.0, "learning_rate": 7.2504230118443316e-06, "loss": 2.9216, "step": 13000 }, { "epoch": 11.0, "eval_loss": 2.9776651859283447, "eval_runtime": 6.2854, "eval_samples_per_second": 40.73, "eval_steps_per_second": 5.091, "step": 13002 }, { "epoch": 11.42, "learning_rate": 7.144670050761422e-06, "loss": 2.9118, "step": 13500 }, { "epoch": 11.84, "learning_rate": 7.038917089678512e-06, "loss": 2.911, "step": 14000 }, { "epoch": 12.0, "eval_loss": 2.9713006019592285, "eval_runtime": 6.1107, "eval_samples_per_second": 41.894, "eval_steps_per_second": 5.237, "step": 14184 }, { "epoch": 12.27, "learning_rate": 6.933164128595601e-06, "loss": 2.9038, "step": 14500 }, { "epoch": 12.69, "learning_rate": 6.827411167512691e-06, "loss": 2.8924, "step": 15000 }, { "epoch": 13.0, "eval_loss": 2.9653375148773193, "eval_runtime": 6.1162, "eval_samples_per_second": 41.856, "eval_steps_per_second": 5.232, "step": 15366 }, { "epoch": 13.11, "learning_rate": 6.721658206429781e-06, "loss": 2.9025, "step": 15500 }, { "epoch": 13.54, "learning_rate": 6.61590524534687e-06, "loss": 2.8886, "step": 16000 }, { "epoch": 13.96, "learning_rate": 6.51015228426396e-06, "loss": 2.8882, "step": 16500 }, { "epoch": 14.0, "eval_loss": 2.960761547088623, "eval_runtime": 6.1211, "eval_samples_per_second": 41.822, "eval_steps_per_second": 5.228, "step": 16548 }, { "epoch": 14.38, "learning_rate": 6.40439932318105e-06, "loss": 2.8777, "step": 17000 }, { "epoch": 14.81, "learning_rate": 6.298646362098139e-06, "loss": 2.8826, "step": 17500 }, { "epoch": 15.0, "eval_loss": 2.9559221267700195, "eval_runtime": 6.0998, "eval_samples_per_second": 41.969, "eval_steps_per_second": 5.246, "step": 17730 }, { "epoch": 15.23, "learning_rate": 6.1928934010152285e-06, "loss": 2.8796, "step": 18000 }, { "epoch": 15.65, "learning_rate": 6.0871404399323185e-06, "loss": 2.8697, "step": 18500 }, { "epoch": 16.0, "eval_loss": 2.952040672302246, "eval_runtime": 6.2485, "eval_samples_per_second": 40.97, "eval_steps_per_second": 5.121, "step": 18912 }, { "epoch": 16.07, "learning_rate": 5.981387478849409e-06, "loss": 2.8645, "step": 19000 }, { "epoch": 16.5, "learning_rate": 5.875634517766498e-06, "loss": 2.8678, "step": 19500 }, { "epoch": 16.92, "learning_rate": 5.769881556683588e-06, "loss": 2.8616, "step": 20000 }, { "epoch": 17.0, "eval_loss": 2.948793888092041, "eval_runtime": 6.2711, "eval_samples_per_second": 40.822, "eval_steps_per_second": 5.103, "step": 20094 }, { "epoch": 17.34, "learning_rate": 5.664128595600678e-06, "loss": 2.8548, "step": 20500 }, { "epoch": 17.77, "learning_rate": 5.558375634517766e-06, "loss": 2.8529, "step": 21000 }, { "epoch": 18.0, "eval_loss": 2.945361614227295, "eval_runtime": 6.3517, "eval_samples_per_second": 40.304, "eval_steps_per_second": 5.038, "step": 21276 }, { "epoch": 18.19, "learning_rate": 5.452622673434856e-06, "loss": 2.8557, "step": 21500 }, { "epoch": 18.61, "learning_rate": 5.346869712351946e-06, "loss": 2.8448, "step": 22000 }, { "epoch": 19.0, "eval_loss": 2.9428470134735107, "eval_runtime": 6.2219, "eval_samples_per_second": 41.145, "eval_steps_per_second": 5.143, "step": 22458 }, { "epoch": 19.04, "learning_rate": 5.241116751269036e-06, "loss": 2.8458, "step": 22500 }, { "epoch": 19.46, "learning_rate": 5.1353637901861255e-06, "loss": 2.8462, "step": 23000 }, { "epoch": 19.88, "learning_rate": 5.0296108291032155e-06, "loss": 2.84, "step": 23500 }, { "epoch": 20.0, "eval_loss": 2.940398693084717, "eval_runtime": 6.2496, "eval_samples_per_second": 40.962, "eval_steps_per_second": 5.12, "step": 23640 }, { "epoch": 20.3, "learning_rate": 4.923857868020305e-06, "loss": 2.8349, "step": 24000 }, { "epoch": 20.73, "learning_rate": 4.818104906937395e-06, "loss": 2.8285, "step": 24500 }, { "epoch": 21.0, "eval_loss": 2.938441276550293, "eval_runtime": 6.1601, "eval_samples_per_second": 41.558, "eval_steps_per_second": 5.195, "step": 24822 }, { "epoch": 21.15, "learning_rate": 4.712351945854484e-06, "loss": 2.8345, "step": 25000 }, { "epoch": 21.57, "learning_rate": 4.606598984771574e-06, "loss": 2.8302, "step": 25500 }, { "epoch": 22.0, "learning_rate": 4.500846023688664e-06, "loss": 2.8266, "step": 26000 }, { "epoch": 22.0, "eval_loss": 2.9362807273864746, "eval_runtime": 6.0955, "eval_samples_per_second": 41.998, "eval_steps_per_second": 5.25, "step": 26004 }, { "epoch": 22.42, "learning_rate": 4.395093062605753e-06, "loss": 2.819, "step": 26500 }, { "epoch": 22.84, "learning_rate": 4.289340101522843e-06, "loss": 2.8232, "step": 27000 }, { "epoch": 23.0, "eval_loss": 2.934544324874878, "eval_runtime": 6.1597, "eval_samples_per_second": 41.561, "eval_steps_per_second": 5.195, "step": 27186 }, { "epoch": 23.27, "learning_rate": 4.183587140439932e-06, "loss": 2.8213, "step": 27500 }, { "epoch": 23.69, "learning_rate": 4.0778341793570224e-06, "loss": 2.8136, "step": 28000 } ], "logging_steps": 500, "max_steps": 47280, "num_train_epochs": 40, "save_steps": 9456, "total_flos": 4.4459884412928e+16, "trial_name": null, "trial_params": null }