{ "best_metric": 1.2369898557662964, "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_si_bitfit_100000samples_-1vocab_original-frozen/checkpoint-25000", "epoch": 0.4656230502034773, "global_step": 25000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 9e-05, "loss": 1.4718, "step": 2500 }, { "epoch": 0.09, "learning_rate": 8e-05, "loss": 1.3617, "step": 5000 }, { "epoch": 0.09, "eval_loss": 1.3304363489151, "eval_runtime": 2281.5601, "eval_samples_per_second": 9.533, "eval_steps_per_second": 4.767, "step": 5000 }, { "epoch": 0.14, "learning_rate": 7e-05, "loss": 1.3157, "step": 7500 }, { "epoch": 0.19, "learning_rate": 6e-05, "loss": 1.2935, "step": 10000 }, { "epoch": 0.19, "eval_loss": 1.2805551290512085, "eval_runtime": 2274.7206, "eval_samples_per_second": 9.562, "eval_steps_per_second": 4.781, "step": 10000 }, { "epoch": 0.23, "learning_rate": 5e-05, "loss": 1.2754, "step": 12500 }, { "epoch": 0.28, "learning_rate": 4e-05, "loss": 1.263, "step": 15000 }, { "epoch": 0.28, "eval_loss": 1.253080129623413, "eval_runtime": 2282.2955, "eval_samples_per_second": 9.53, "eval_steps_per_second": 4.765, "step": 15000 }, { "epoch": 0.33, "learning_rate": 3e-05, "loss": 1.2525, "step": 17500 }, { "epoch": 0.37, "learning_rate": 2e-05, "loss": 1.2492, "step": 20000 }, { "epoch": 0.37, "eval_loss": 1.2420716285705566, "eval_runtime": 2282.8423, "eval_samples_per_second": 9.528, "eval_steps_per_second": 4.764, "step": 20000 }, { "epoch": 0.42, "learning_rate": 1e-05, "loss": 1.2439, "step": 22500 }, { "epoch": 0.47, "learning_rate": 0.0, "loss": 1.2429, "step": 25000 }, { "epoch": 0.47, "eval_loss": 1.2369898557662964, "eval_runtime": 2284.8813, "eval_samples_per_second": 9.52, "eval_steps_per_second": 4.76, "step": 25000 }, { "epoch": 0.47, "step": 25000, "total_flos": 3.714827943936e+17, "train_loss": 1.296959794921875, "train_runtime": 61646.1041, "train_samples_per_second": 3.244, "train_steps_per_second": 0.406 } ], "max_steps": 25000, "num_train_epochs": 1, "total_flos": 3.714827943936e+17, "trial_name": null, "trial_params": null }