{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.6859016393442623, "global_step": 49152, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.22, "learning_rate": 0.0004984426897459585, "loss": 0.5454, "step": 4096 }, { "epoch": 0.22, "eval_loss": 0.5005695223808289, "eval_runtime": 32.9622, "eval_samples_per_second": 104.271, "eval_steps_per_second": 6.523, "step": 4096 }, { "epoch": 0.45, "learning_rate": 0.0004934687023955681, "loss": 0.4849, "step": 8192 }, { "epoch": 0.45, "eval_loss": 0.480915367603302, "eval_runtime": 32.9785, "eval_samples_per_second": 104.219, "eval_steps_per_second": 6.519, "step": 8192 }, { "epoch": 0.67, "learning_rate": 0.000485144849673373, "loss": 0.4518, "step": 12288 }, { "epoch": 0.67, "eval_loss": 0.46660953760147095, "eval_runtime": 32.8208, "eval_samples_per_second": 104.72, "eval_steps_per_second": 6.551, "step": 12288 }, { "epoch": 0.9, "learning_rate": 0.0004735848873631612, "loss": 0.4282, "step": 16384 }, { "epoch": 0.9, "eval_loss": 0.46097490191459656, "eval_runtime": 32.8311, "eval_samples_per_second": 104.687, "eval_steps_per_second": 6.549, "step": 16384 }, { "epoch": 1.12, "learning_rate": 0.0004589518403420676, "loss": 0.4145, "step": 20480 }, { "epoch": 1.12, "eval_loss": 0.45063599944114685, "eval_runtime": 32.8912, "eval_samples_per_second": 104.496, "eval_steps_per_second": 6.537, "step": 20480 }, { "epoch": 1.34, "learning_rate": 0.0004414445597486605, "loss": 0.399, "step": 24576 }, { "epoch": 1.34, "eval_loss": 0.44468095898628235, "eval_runtime": 32.706, "eval_samples_per_second": 105.088, "eval_steps_per_second": 6.574, "step": 24576 }, { "epoch": 1.57, "learning_rate": 0.00042130386669061293, "loss": 0.3882, "step": 28672 }, { "epoch": 1.57, "eval_loss": 0.44857361912727356, "eval_runtime": 32.9754, "eval_samples_per_second": 104.229, "eval_steps_per_second": 6.52, "step": 28672 }, { "epoch": 1.79, "learning_rate": 0.0003988010477498867, "loss": 0.3767, "step": 32768 }, { "epoch": 1.79, "eval_loss": 0.44354742765426636, "eval_runtime": 32.8219, "eval_samples_per_second": 104.717, "eval_steps_per_second": 6.55, "step": 32768 }, { "epoch": 2.01, "learning_rate": 0.0003742566178542921, "loss": 0.3676, "step": 36864 }, { "epoch": 2.01, "eval_loss": 0.43940743803977966, "eval_runtime": 32.9279, "eval_samples_per_second": 104.38, "eval_steps_per_second": 6.529, "step": 36864 }, { "epoch": 2.24, "learning_rate": 0.0003479963856008823, "loss": 0.3577, "step": 40960 }, { "epoch": 2.24, "eval_loss": 0.4323909878730774, "eval_runtime": 33.0718, "eval_samples_per_second": 103.926, "eval_steps_per_second": 6.501, "step": 40960 }, { "epoch": 2.46, "learning_rate": 0.0003203943839704654, "loss": 0.3517, "step": 45056 }, { "epoch": 2.46, "eval_loss": 0.43262797594070435, "eval_runtime": 32.8978, "eval_samples_per_second": 104.475, "eval_steps_per_second": 6.535, "step": 45056 }, { "epoch": 2.69, "learning_rate": 0.000291817008494138, "loss": 0.3427, "step": 49152 }, { "epoch": 2.69, "eval_loss": 0.4303751289844513, "eval_runtime": 32.7306, "eval_samples_per_second": 105.009, "eval_steps_per_second": 6.569, "step": 49152 } ], "max_steps": 109800, "num_train_epochs": 6, "total_flos": 2.28488651552981e+17, "trial_name": null, "trial_params": null }