{ "best_metric": 0.3634186694531522, "best_model_checkpoint": "/xdisk/msurdeanu/enoriega/kw_pubmed/kw_pubmed_1000_0.0003/checkpoint-12", "epoch": 1.1524390243902438, "global_step": 52, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "eval_accuracy": 0.3436084987809126, "eval_loss": 4.372271537780762, "eval_runtime": 16.6642, "eval_samples_per_second": 600.088, "eval_steps_per_second": 18.783, "step": 4 }, { "epoch": 0.11, "learning_rate": 0.00029466666666666666, "loss": 6.0386, "step": 5 }, { "epoch": 0.17, "eval_accuracy": 0.34417450365726227, "eval_loss": 4.2112579345703125, "eval_runtime": 16.5968, "eval_samples_per_second": 602.524, "eval_steps_per_second": 18.859, "step": 8 }, { "epoch": 0.22, "learning_rate": 0.0002893333333333333, "loss": 3.7573, "step": 10 }, { "epoch": 0.26, "eval_accuracy": 0.3634186694531522, "eval_loss": 4.2079362869262695, "eval_runtime": 16.5847, "eval_samples_per_second": 602.965, "eval_steps_per_second": 18.873, "step": 12 }, { "epoch": 0.33, "learning_rate": 0.00028266666666666663, "loss": 2.9944, "step": 15 }, { "epoch": 0.35, "eval_accuracy": 0.3512713340299547, "eval_loss": 4.3369622230529785, "eval_runtime": 16.6084, "eval_samples_per_second": 602.106, "eval_steps_per_second": 18.846, "step": 16 }, { "epoch": 0.44, "learning_rate": 0.000276, "loss": 2.7048, "step": 20 }, { "epoch": 0.44, "eval_accuracy": 0.30673110414489724, "eval_loss": 4.859361171722412, "eval_runtime": 16.6048, "eval_samples_per_second": 602.234, "eval_steps_per_second": 18.85, "step": 20 }, { "epoch": 0.52, "eval_accuracy": 0.3382967607105538, "eval_loss": 4.492859840393066, "eval_runtime": 16.5439, "eval_samples_per_second": 604.454, "eval_steps_per_second": 18.919, "step": 24 }, { "epoch": 0.54, "learning_rate": 0.00027066666666666667, "loss": 2.9458, "step": 25 }, { "epoch": 0.61, "eval_accuracy": 0.34077847439916403, "eval_loss": 4.514556884765625, "eval_runtime": 16.5866, "eval_samples_per_second": 602.897, "eval_steps_per_second": 18.871, "step": 28 }, { "epoch": 0.65, "learning_rate": 0.00026399999999999997, "loss": 2.3783, "step": 30 }, { "epoch": 0.7, "eval_accuracy": 0.3429989550679206, "eval_loss": 4.5680060386657715, "eval_runtime": 16.5703, "eval_samples_per_second": 603.491, "eval_steps_per_second": 18.889, "step": 32 }, { "epoch": 0.76, "learning_rate": 0.0002573333333333333, "loss": 2.2485, "step": 35 }, { "epoch": 0.78, "eval_accuracy": 0.34770114942528735, "eval_loss": 4.509522914886475, "eval_runtime": 16.5871, "eval_samples_per_second": 602.877, "eval_steps_per_second": 18.87, "step": 36 }, { "epoch": 0.87, "learning_rate": 0.00025066666666666667, "loss": 2.1701, "step": 40 }, { "epoch": 0.87, "eval_accuracy": 0.3449146638801811, "eval_loss": 4.4971489906311035, "eval_runtime": 16.5577, "eval_samples_per_second": 603.949, "eval_steps_per_second": 18.904, "step": 40 }, { "epoch": 0.96, "eval_accuracy": 0.33207070707070707, "eval_loss": 4.7050604820251465, "eval_runtime": 16.5693, "eval_samples_per_second": 603.527, "eval_steps_per_second": 18.89, "step": 44 }, { "epoch": 0.98, "learning_rate": 0.000244, "loss": 2.0861, "step": 45 }, { "epoch": 1.07, "eval_accuracy": 0.3310257749912922, "eval_loss": 4.761545658111572, "eval_runtime": 16.5548, "eval_samples_per_second": 604.054, "eval_steps_per_second": 18.907, "step": 48 }, { "epoch": 1.11, "learning_rate": 0.00023733333333333332, "loss": 2.4168, "step": 50 }, { "epoch": 1.15, "eval_accuracy": 0.33938523162661094, "eval_loss": 4.7085795402526855, "eval_runtime": 16.5811, "eval_samples_per_second": 603.096, "eval_steps_per_second": 18.877, "step": 52 }, { "epoch": 1.15, "step": 52, "total_flos": 3.18987289303776e+16, "train_loss": 2.93648067345986, "train_runtime": 1938.9621, "train_samples_per_second": 947.283, "train_steps_per_second": 0.116 } ], "max_steps": 225, "num_train_epochs": 5, "total_flos": 3.18987289303776e+16, "trial_name": null, "trial_params": null }