{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 0.0001666666666666667, "loss": 0.8745, "step": 25 }, { "epoch": 0.05, "learning_rate": 0.00019979028262377118, "loss": 0.8093, "step": 50 }, { "epoch": 0.07, "learning_rate": 0.00019893981312363562, "loss": 0.7357, "step": 75 }, { "epoch": 0.1, "learning_rate": 0.00019744105246469263, "loss": 0.7535, "step": 100 }, { "epoch": 0.1, "eval_loss": 0.4003306031227112, "eval_runtime": 1.7839, "eval_samples_per_second": 2.242, "eval_steps_per_second": 0.561, "step": 100 }, { "epoch": 0.12, "learning_rate": 0.0001953038210948861, "loss": 0.7249, "step": 125 }, { "epoch": 0.15, "learning_rate": 0.00019254212296427044, "loss": 0.7118, "step": 150 }, { "epoch": 0.17, "learning_rate": 0.00018917405376582145, "loss": 0.7467, "step": 175 }, { "epoch": 0.2, "learning_rate": 0.00018522168236559695, "loss": 0.6714, "step": 200 }, { "epoch": 0.2, "eval_loss": 0.3684937059879303, "eval_runtime": 1.7853, "eval_samples_per_second": 2.24, "eval_steps_per_second": 0.56, "step": 200 }, { "epoch": 0.23, "learning_rate": 0.00018071090619916093, "loss": 0.654, "step": 225 }, { "epoch": 0.25, "learning_rate": 0.00017567128158176953, "loss": 0.6392, "step": 250 }, { "epoch": 0.28, "learning_rate": 0.00017013583004418993, "loss": 0.5745, "step": 275 }, { "epoch": 0.3, "learning_rate": 0.000164140821963114, "loss": 0.5364, "step": 300 }, { "epoch": 0.3, "eval_loss": 0.3665352761745453, "eval_runtime": 1.7852, "eval_samples_per_second": 2.241, "eval_steps_per_second": 0.56, "step": 300 }, { "epoch": 0.33, "learning_rate": 0.00015772553890390197, "loss": 0.5693, "step": 325 }, { "epoch": 0.35, "learning_rate": 0.00015093201623287631, "loss": 0.563, "step": 350 }, { "epoch": 0.38, "learning_rate": 0.00014380476768566824, "loss": 0.5478, "step": 375 }, { "epoch": 0.4, "learning_rate": 0.00013639049369634876, "loss": 0.5763, "step": 400 }, { "epoch": 0.4, "eval_loss": 0.3363753855228424, "eval_runtime": 1.7851, "eval_samples_per_second": 2.241, "eval_steps_per_second": 0.56, "step": 400 }, { "epoch": 0.42, "learning_rate": 0.00012873777539848283, "loss": 0.4891, "step": 425 }, { "epoch": 0.45, "learning_rate": 0.00012089675630312754, "loss": 0.5331, "step": 450 }, { "epoch": 0.47, "learning_rate": 0.00011291881373954065, "loss": 0.5679, "step": 475 }, { "epoch": 0.5, "learning_rate": 0.00010485622221144484, "loss": 0.5982, "step": 500 }, { "epoch": 0.5, "eval_loss": 0.3186224400997162, "eval_runtime": 1.7828, "eval_samples_per_second": 2.244, "eval_steps_per_second": 0.561, "step": 500 }, { "epoch": 0.53, "learning_rate": 9.676181087466444e-05, "loss": 0.5467, "step": 525 }, { "epoch": 0.55, "learning_rate": 8.868861738047158e-05, "loss": 0.5706, "step": 550 }, { "epoch": 0.57, "learning_rate": 8.068954035279121e-05, "loss": 0.504, "step": 575 }, { "epoch": 0.6, "learning_rate": 7.281699277636572e-05, "loss": 0.5267, "step": 600 }, { "epoch": 0.6, "eval_loss": 0.32175499200820923, "eval_runtime": 1.7846, "eval_samples_per_second": 2.241, "eval_steps_per_second": 0.56, "step": 600 }, { "epoch": 0.62, "learning_rate": 6.512255856701177e-05, "loss": 0.5414, "step": 625 }, { "epoch": 0.65, "learning_rate": 5.765665457425102e-05, "loss": 0.5412, "step": 650 }, { "epoch": 0.68, "learning_rate": 5.0468200231001286e-05, "loss": 0.4611, "step": 675 }, { "epoch": 0.7, "learning_rate": 4.360429701490934e-05, "loss": 0.5073, "step": 700 }, { "epoch": 0.7, "eval_loss": 0.31275978684425354, "eval_runtime": 1.7833, "eval_samples_per_second": 2.243, "eval_steps_per_second": 0.561, "step": 700 }, { "epoch": 0.72, "learning_rate": 3.710991982161555e-05, "loss": 0.4778, "step": 725 }, { "epoch": 0.75, "learning_rate": 3.102762227218957e-05, "loss": 0.5454, "step": 750 }, { "epoch": 0.78, "learning_rate": 2.5397257885675397e-05, "loss": 0.5612, "step": 775 }, { "epoch": 0.8, "learning_rate": 2.025571894372794e-05, "loss": 0.4983, "step": 800 }, { "epoch": 0.8, "eval_loss": 0.31457942724227905, "eval_runtime": 1.7821, "eval_samples_per_second": 2.245, "eval_steps_per_second": 0.561, "step": 800 }, { "epoch": 0.82, "learning_rate": 1.563669475839956e-05, "loss": 0.4941, "step": 825 }, { "epoch": 0.85, "learning_rate": 1.1570450926997655e-05, "loss": 0.4926, "step": 850 }, { "epoch": 0.88, "learning_rate": 8.083631020418791e-06, "loss": 0.5094, "step": 875 }, { "epoch": 0.9, "learning_rate": 5.199082004372957e-06, "loss": 0.5116, "step": 900 }, { "epoch": 0.9, "eval_loss": 0.31357938051223755, "eval_runtime": 1.7811, "eval_samples_per_second": 2.246, "eval_steps_per_second": 0.561, "step": 900 }, { "epoch": 0.93, "learning_rate": 2.9357045374040825e-06, "loss": 0.4244, "step": 925 }, { "epoch": 0.95, "learning_rate": 1.30832912661093e-06, "loss": 0.4511, "step": 950 }, { "epoch": 0.97, "learning_rate": 3.2761895254306287e-07, "loss": 0.4179, "step": 975 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 0.4662, "step": 1000 }, { "epoch": 1.0, "eval_loss": 0.3136279284954071, "eval_runtime": 1.7836, "eval_samples_per_second": 2.243, "eval_steps_per_second": 0.561, "step": 1000 } ], "logging_steps": 25, "max_steps": 1000, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 100, "total_flos": 1.493507298557952e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }