{ "best_metric": 0.2845354974269867, "best_model_checkpoint": "outputs/checkpoint-101", "epoch": 7.724137931034483, "eval_steps": 500, "global_step": 112, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.69, "learning_rate": 0.00018219165847995, "loss": 1.693, "step": 10 }, { "epoch": 0.97, "eval_loss": 0.8468072414398193, "eval_runtime": 47.2393, "eval_samples_per_second": 0.318, "eval_steps_per_second": 0.042, "step": 14 }, { "epoch": 1.38, "learning_rate": 0.0001643297311779941, "loss": 0.8001, "step": 20 }, { "epoch": 2.0, "eval_loss": 0.5586440563201904, "eval_runtime": 46.728, "eval_samples_per_second": 0.321, "eval_steps_per_second": 0.043, "step": 29 }, { "epoch": 2.07, "learning_rate": 0.00014646780387603822, "loss": 0.6269, "step": 30 }, { "epoch": 2.76, "learning_rate": 0.00012860587657408234, "loss": 0.3671, "step": 40 }, { "epoch": 2.97, "eval_loss": 0.33814382553100586, "eval_runtime": 48.0352, "eval_samples_per_second": 0.312, "eval_steps_per_second": 0.042, "step": 43 }, { "epoch": 3.45, "learning_rate": 0.00011074394927212647, "loss": 0.2759, "step": 50 }, { "epoch": 4.0, "eval_loss": 0.31169143319129944, "eval_runtime": 47.7275, "eval_samples_per_second": 0.314, "eval_steps_per_second": 0.042, "step": 58 }, { "epoch": 4.14, "learning_rate": 9.288202197017058e-05, "loss": 0.2149, "step": 60 }, { "epoch": 4.83, "learning_rate": 7.50200946682147e-05, "loss": 0.164, "step": 70 }, { "epoch": 4.97, "eval_loss": 0.29888349771499634, "eval_runtime": 47.9342, "eval_samples_per_second": 0.313, "eval_steps_per_second": 0.042, "step": 72 }, { "epoch": 5.52, "learning_rate": 5.7158167366258816e-05, "loss": 0.1221, "step": 80 }, { "epoch": 6.0, "eval_loss": 0.2934713065624237, "eval_runtime": 48.2143, "eval_samples_per_second": 0.311, "eval_steps_per_second": 0.041, "step": 87 }, { "epoch": 6.21, "learning_rate": 3.929624006430294e-05, "loss": 0.1183, "step": 90 }, { "epoch": 6.9, "learning_rate": 2.143431276234706e-05, "loss": 0.0866, "step": 100 }, { "epoch": 6.97, "eval_loss": 0.2845354974269867, "eval_runtime": 46.7151, "eval_samples_per_second": 0.321, "eval_steps_per_second": 0.043, "step": 101 }, { "epoch": 7.59, "learning_rate": 3.572385460391176e-06, "loss": 0.0837, "step": 110 }, { "epoch": 7.72, "eval_loss": 0.2850027084350586, "eval_runtime": 47.2985, "eval_samples_per_second": 0.317, "eval_steps_per_second": 0.042, "step": 112 } ], "logging_steps": 10, "max_steps": 112, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 500, "total_flos": 2.546768317169664e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }