{ "best_metric": 0.8566413316413316, "best_model_checkpoint": "./splash_train_concept_continued/checkpoint-128", "epoch": 127.54739652870494, "global_step": 128, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.55, "learning_rate": 0.0001, "loss": 0.0026, "step": 1 }, { "epoch": 3.55, "learning_rate": 0.0001, "loss": 0.0011, "step": 4 }, { "epoch": 7.55, "learning_rate": 0.0001, "loss": 0.0022, "step": 8 }, { "epoch": 11.55, "learning_rate": 0.0001, "loss": 0.0018, "step": 12 }, { "epoch": 15.55, "learning_rate": 0.0001, "loss": 0.0003, "step": 16 }, { "epoch": 19.55, "learning_rate": 0.0001, "loss": 0.0002, "step": 20 }, { "epoch": 23.55, "learning_rate": 0.0001, "loss": 0.0003, "step": 24 }, { "epoch": 27.55, "learning_rate": 0.0001, "loss": 0.0002, "step": 28 }, { "epoch": 31.55, "learning_rate": 0.0001, "loss": 0.0018, "step": 32 }, { "epoch": 35.55, "learning_rate": 0.0001, "loss": 0.0022, "step": 36 }, { "epoch": 39.55, "learning_rate": 0.0001, "loss": 0.0002, "step": 40 }, { "epoch": 43.55, "learning_rate": 0.0001, "loss": 0.0002, "step": 44 }, { "epoch": 47.55, "learning_rate": 0.0001, "loss": 0.0002, "step": 48 }, { "epoch": 51.55, "learning_rate": 0.0001, "loss": 0.0002, "step": 52 }, { "epoch": 55.55, "learning_rate": 0.0001, "loss": 0.0002, "step": 56 }, { "epoch": 59.55, "learning_rate": 0.0001, "loss": 0.0051, "step": 60 }, { "epoch": 63.55, "learning_rate": 0.0001, "loss": 0.0003, "step": 64 }, { "epoch": 63.55, "eval_f1_score": 0.8557729057729058, "eval_loss": 0.3384537696838379, "eval_runtime": 287.8554, "eval_samples_per_second": 3.342, "step": 64 }, { "epoch": 67.55, "learning_rate": 0.0001, "loss": 0.0001, "step": 68 }, { "epoch": 71.55, "learning_rate": 0.0001, "loss": 0.0002, "step": 72 }, { "epoch": 75.55, "learning_rate": 0.0001, "loss": 0.0002, "step": 76 }, { "epoch": 79.55, "learning_rate": 0.0001, "loss": 0.0046, "step": 80 }, { "epoch": 83.55, "learning_rate": 0.0001, "loss": 0.0007, "step": 84 }, { "epoch": 87.55, "learning_rate": 0.0001, "loss": 0.0001, "step": 88 }, { "epoch": 91.55, "learning_rate": 0.0001, "loss": 0.0001, "step": 92 }, { "epoch": 95.55, "learning_rate": 0.0001, "loss": 0.0001, "step": 96 }, { "epoch": 99.55, "learning_rate": 0.0001, "loss": 0.0001, "step": 100 }, { "epoch": 103.55, "learning_rate": 0.0001, "loss": 0.0001, "step": 104 }, { "epoch": 107.55, "learning_rate": 0.0001, "loss": 0.0001, "step": 108 }, { "epoch": 111.55, "learning_rate": 0.0001, "loss": 0.0006, "step": 112 }, { "epoch": 115.55, "learning_rate": 0.0001, "loss": 0.0001, "step": 116 }, { "epoch": 119.55, "learning_rate": 0.0001, "loss": 0.0001, "step": 120 }, { "epoch": 123.55, "learning_rate": 0.0001, "loss": 0.0001, "step": 124 }, { "epoch": 127.55, "learning_rate": 0.0001, "loss": 0.0001, "step": 128 }, { "epoch": 127.55, "eval_f1_score": 0.8566413316413316, "eval_loss": 0.35904133319854736, "eval_runtime": 290.9961, "eval_samples_per_second": 3.306, "step": 128 } ], "max_steps": 3072, "num_train_epochs": 3072, "total_flos": 1.897685960620032e+18, "trial_name": null, "trial_params": null }