{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 3460, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.29, "learning_rate": 4.855491329479769e-05, "loss": 2.85, "step": 100 }, { "epoch": 0.58, "learning_rate": 4.710982658959538e-05, "loss": 2.5244, "step": 200 }, { "epoch": 0.87, "learning_rate": 4.566473988439307e-05, "loss": 2.4551, "step": 300 }, { "epoch": 1.16, "learning_rate": 4.421965317919075e-05, "loss": 2.3574, "step": 400 }, { "epoch": 1.45, "learning_rate": 4.2774566473988445e-05, "loss": 2.3376, "step": 500 }, { "epoch": 1.73, "learning_rate": 4.132947976878613e-05, "loss": 2.314, "step": 600 }, { "epoch": 2.02, "learning_rate": 3.988439306358382e-05, "loss": 2.2859, "step": 700 }, { "epoch": 2.31, "learning_rate": 3.84393063583815e-05, "loss": 2.2306, "step": 800 }, { "epoch": 2.6, "learning_rate": 3.699421965317919e-05, "loss": 2.2102, "step": 900 }, { "epoch": 2.89, "learning_rate": 3.554913294797688e-05, "loss": 2.2117, "step": 1000 }, { "epoch": 3.18, "learning_rate": 3.410404624277457e-05, "loss": 2.1662, "step": 1100 }, { "epoch": 3.47, "learning_rate": 3.265895953757225e-05, "loss": 2.1411, "step": 1200 }, { "epoch": 3.76, "learning_rate": 3.1213872832369946e-05, "loss": 2.1541, "step": 1300 }, { "epoch": 4.05, "learning_rate": 2.9768786127167632e-05, "loss": 2.1407, "step": 1400 }, { "epoch": 4.34, "learning_rate": 2.832369942196532e-05, "loss": 2.1051, "step": 1500 }, { "epoch": 4.62, "learning_rate": 2.6878612716763007e-05, "loss": 2.0974, "step": 1600 }, { "epoch": 4.91, "learning_rate": 2.5433526011560693e-05, "loss": 2.1, "step": 1700 }, { "epoch": 5.2, "learning_rate": 2.3988439306358382e-05, "loss": 2.0643, "step": 1800 }, { "epoch": 5.49, "learning_rate": 2.254335260115607e-05, "loss": 2.048, "step": 1900 }, { "epoch": 5.78, "learning_rate": 2.1098265895953757e-05, "loss": 2.0663, "step": 2000 }, { "epoch": 6.07, "learning_rate": 1.9653179190751446e-05, "loss": 2.0447, "step": 2100 }, { "epoch": 6.36, "learning_rate": 1.8208092485549132e-05, "loss": 2.0395, "step": 2200 }, { "epoch": 6.65, "learning_rate": 1.676300578034682e-05, "loss": 2.021, "step": 2300 }, { "epoch": 6.94, "learning_rate": 1.531791907514451e-05, "loss": 2.0177, "step": 2400 }, { "epoch": 7.23, "learning_rate": 1.3872832369942197e-05, "loss": 2.0087, "step": 2500 }, { "epoch": 7.51, "learning_rate": 1.2427745664739884e-05, "loss": 1.9849, "step": 2600 }, { "epoch": 7.8, "learning_rate": 1.0982658959537573e-05, "loss": 1.9991, "step": 2700 }, { "epoch": 8.09, "learning_rate": 9.53757225433526e-06, "loss": 1.9992, "step": 2800 }, { "epoch": 8.38, "learning_rate": 8.092485549132949e-06, "loss": 1.9687, "step": 2900 }, { "epoch": 8.67, "learning_rate": 6.647398843930635e-06, "loss": 1.9803, "step": 3000 }, { "epoch": 8.96, "learning_rate": 5.202312138728324e-06, "loss": 2.0046, "step": 3100 }, { "epoch": 9.25, "learning_rate": 3.757225433526012e-06, "loss": 1.9629, "step": 3200 }, { "epoch": 9.54, "learning_rate": 2.3121387283236993e-06, "loss": 1.9686, "step": 3300 }, { "epoch": 9.83, "learning_rate": 8.670520231213873e-07, "loss": 1.9876, "step": 3400 }, { "epoch": 10.0, "step": 3460, "total_flos": 3.302612726022144e+17, "train_loss": 2.1397388414151406, "train_runtime": 8310.1091, "train_samples_per_second": 13.29, "train_steps_per_second": 0.416 } ], "max_steps": 3460, "num_train_epochs": 10, "total_flos": 3.302612726022144e+17, "trial_name": null, "trial_params": null }