{ "best_metric": 2.3477182388305664, "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_sft_10000samples_-1vocab_original-frozen/checkpoint-25000", "epoch": 19.856519150625125, "global_step": 25000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.99, "l1_reg_loss": 0.0, "learning_rate": 9e-05, "loss": 3.1946, "step": 2500 }, { "epoch": 3.97, "l1_reg_loss": 0.0001, "learning_rate": 8e-05, "loss": 2.6396, "step": 5000 }, { "epoch": 3.97, "eval_loss": 2.5872137546539307, "eval_runtime": 514.4581, "eval_samples_per_second": 9.536, "eval_steps_per_second": 4.768, "step": 5000 }, { "epoch": 5.96, "l1_reg_loss": 0.0001, "learning_rate": 7e-05, "loss": 2.4815, "step": 7500 }, { "epoch": 7.94, "l1_reg_loss": 0.0001, "learning_rate": 6e-05, "loss": 2.3898, "step": 10000 }, { "epoch": 7.94, "eval_loss": 2.439215898513794, "eval_runtime": 514.6109, "eval_samples_per_second": 9.533, "eval_steps_per_second": 4.767, "step": 10000 }, { "epoch": 9.93, "l1_reg_loss": 0.0001, "learning_rate": 5e-05, "loss": 2.3298, "step": 12500 }, { "epoch": 11.91, "l1_reg_loss": 0.0001, "learning_rate": 4e-05, "loss": 2.287, "step": 15000 }, { "epoch": 11.91, "eval_loss": 2.3796885013580322, "eval_runtime": 514.3961, "eval_samples_per_second": 9.537, "eval_steps_per_second": 4.769, "step": 15000 }, { "epoch": 13.9, "l1_reg_loss": 0.0001, "learning_rate": 3e-05, "loss": 2.254, "step": 17500 }, { "epoch": 15.89, "l1_reg_loss": 0.0001, "learning_rate": 2e-05, "loss": 2.2339, "step": 20000 }, { "epoch": 15.89, "eval_loss": 2.354508399963379, "eval_runtime": 514.4981, "eval_samples_per_second": 9.536, "eval_steps_per_second": 4.768, "step": 20000 }, { "epoch": 17.87, "l1_reg_loss": 0.0001, "learning_rate": 1e-05, "loss": 2.2184, "step": 22500 }, { "epoch": 19.86, "l1_reg_loss": 0.0001, "learning_rate": 0.0, "loss": 2.21, "step": 25000 }, { "epoch": 19.86, "eval_loss": 2.3477182388305664, "eval_runtime": 514.3805, "eval_samples_per_second": 9.538, "eval_steps_per_second": 4.769, "step": 25000 }, { "epoch": 19.86, "step": 25000, "total_flos": 3.7165924872093696e+17, "train_loss": 2.42385498046875, "train_runtime": 61408.2472, "train_samples_per_second": 3.257, "train_steps_per_second": 0.407 } ], "max_steps": 25000, "num_train_epochs": 20, "total_flos": 3.7165924872093696e+17, "trial_name": null, "trial_params": null }