|
{ |
|
"best_metric": 0.38235806339254613, |
|
"best_model_checkpoint": "/groups/claytonm/enoriega/kw_pubmed/kw_pubmed_1000_0.000006/checkpoint-108", |
|
"epoch": 3.283101045296167, |
|
"global_step": 148, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09, |
|
"eval_accuracy": 0.36533437826541276, |
|
"eval_loss": 4.0918426513671875, |
|
"eval_runtime": 16.5878, |
|
"eval_samples_per_second": 602.854, |
|
"eval_steps_per_second": 18.869, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 5.92e-06, |
|
"loss": 4.4572, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.36964472309299895, |
|
"eval_loss": 4.006958961486816, |
|
"eval_runtime": 16.4206, |
|
"eval_samples_per_second": 608.992, |
|
"eval_steps_per_second": 19.061, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 5.786666666666667e-06, |
|
"loss": 4.0734, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.37338906304423547, |
|
"eval_loss": 3.9496026039123535, |
|
"eval_runtime": 16.513, |
|
"eval_samples_per_second": 605.585, |
|
"eval_steps_per_second": 18.955, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 5.653333333333333e-06, |
|
"loss": 3.8884, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.37656739811912227, |
|
"eval_loss": 3.918774127960205, |
|
"eval_runtime": 16.5065, |
|
"eval_samples_per_second": 605.823, |
|
"eval_steps_per_second": 18.962, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.5200000000000005e-06, |
|
"loss": 3.8037, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.37495646116335773, |
|
"eval_loss": 3.9296493530273438, |
|
"eval_runtime": 16.4773, |
|
"eval_samples_per_second": 606.897, |
|
"eval_steps_per_second": 18.996, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_accuracy": 0.37891849529780564, |
|
"eval_loss": 3.903676748275757, |
|
"eval_runtime": 16.4879, |
|
"eval_samples_per_second": 606.507, |
|
"eval_steps_per_second": 18.984, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 5.386666666666667e-06, |
|
"loss": 3.7326, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.3787878787878788, |
|
"eval_loss": 3.8809974193573, |
|
"eval_runtime": 16.5092, |
|
"eval_samples_per_second": 605.724, |
|
"eval_steps_per_second": 18.959, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5.2533333333333336e-06, |
|
"loss": 3.6766, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.3770463253221874, |
|
"eval_loss": 3.8815793991088867, |
|
"eval_runtime": 16.4832, |
|
"eval_samples_per_second": 606.677, |
|
"eval_steps_per_second": 18.989, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5.12e-06, |
|
"loss": 3.6152, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.3797892720306513, |
|
"eval_loss": 3.8686890602111816, |
|
"eval_runtime": 16.4735, |
|
"eval_samples_per_second": 607.035, |
|
"eval_steps_per_second": 19.0, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.986666666666667e-06, |
|
"loss": 3.6143, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.3770463253221874, |
|
"eval_loss": 3.874457597732544, |
|
"eval_runtime": 16.4674, |
|
"eval_samples_per_second": 607.26, |
|
"eval_steps_per_second": 19.007, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.3777429467084639, |
|
"eval_loss": 3.872670888900757, |
|
"eval_runtime": 16.447, |
|
"eval_samples_per_second": 608.012, |
|
"eval_steps_per_second": 19.031, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.853333333333333e-06, |
|
"loss": 3.54, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_accuracy": 0.3747387669801463, |
|
"eval_loss": 3.882456064224243, |
|
"eval_runtime": 16.5148, |
|
"eval_samples_per_second": 605.517, |
|
"eval_steps_per_second": 18.953, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.72e-06, |
|
"loss": 4.1293, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_accuracy": 0.38079066527342387, |
|
"eval_loss": 3.8683102130889893, |
|
"eval_runtime": 16.4717, |
|
"eval_samples_per_second": 607.102, |
|
"eval_steps_per_second": 19.002, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.586666666666667e-06, |
|
"loss": 3.4379, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_accuracy": 0.37983281086729365, |
|
"eval_loss": 3.8646554946899414, |
|
"eval_runtime": 16.4967, |
|
"eval_samples_per_second": 606.182, |
|
"eval_steps_per_second": 18.973, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 4.453333333333334e-06, |
|
"loss": 3.4215, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_accuracy": 0.3801375827237896, |
|
"eval_loss": 3.8663816452026367, |
|
"eval_runtime": 16.4695, |
|
"eval_samples_per_second": 607.185, |
|
"eval_steps_per_second": 19.005, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_accuracy": 0.3804858934169279, |
|
"eval_loss": 3.8687705993652344, |
|
"eval_runtime": 16.462, |
|
"eval_samples_per_second": 607.461, |
|
"eval_steps_per_second": 19.014, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.32e-06, |
|
"loss": 3.3448, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_accuracy": 0.38140020898641586, |
|
"eval_loss": 3.876781463623047, |
|
"eval_runtime": 16.4781, |
|
"eval_samples_per_second": 606.866, |
|
"eval_steps_per_second": 18.995, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.186666666666667e-06, |
|
"loss": 3.3324, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_accuracy": 0.37717694183211425, |
|
"eval_loss": 3.913390636444092, |
|
"eval_runtime": 16.4943, |
|
"eval_samples_per_second": 606.268, |
|
"eval_steps_per_second": 18.976, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.053333333333333e-06, |
|
"loss": 3.3088, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_accuracy": 0.3799634273772205, |
|
"eval_loss": 3.882460832595825, |
|
"eval_runtime": 16.4932, |
|
"eval_samples_per_second": 606.311, |
|
"eval_steps_per_second": 18.978, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.92e-06, |
|
"loss": 3.2765, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_accuracy": 0.37787356321839083, |
|
"eval_loss": 3.8959219455718994, |
|
"eval_runtime": 16.5043, |
|
"eval_samples_per_second": 605.902, |
|
"eval_steps_per_second": 18.965, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_accuracy": 0.37913618948101707, |
|
"eval_loss": 3.9021382331848145, |
|
"eval_runtime": 16.4868, |
|
"eval_samples_per_second": 606.546, |
|
"eval_steps_per_second": 18.985, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.7866666666666667e-06, |
|
"loss": 3.2439, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.38148728665970044, |
|
"eval_loss": 3.8731861114501953, |
|
"eval_runtime": 16.4672, |
|
"eval_samples_per_second": 607.269, |
|
"eval_steps_per_second": 19.008, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.6533333333333336e-06, |
|
"loss": 3.2546, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_accuracy": 0.38044235458028564, |
|
"eval_loss": 3.8909058570861816, |
|
"eval_runtime": 16.4399, |
|
"eval_samples_per_second": 608.277, |
|
"eval_steps_per_second": 19.039, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 3.52e-06, |
|
"loss": 3.818, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_accuracy": 0.3787443399512365, |
|
"eval_loss": 3.917863368988037, |
|
"eval_runtime": 16.4722, |
|
"eval_samples_per_second": 607.083, |
|
"eval_steps_per_second": 19.002, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.3866666666666667e-06, |
|
"loss": 3.1567, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_accuracy": 0.3803988157436433, |
|
"eval_loss": 3.906223773956299, |
|
"eval_runtime": 16.5109, |
|
"eval_samples_per_second": 605.66, |
|
"eval_steps_per_second": 18.957, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_accuracy": 0.3793974225008708, |
|
"eval_loss": 3.9190924167633057, |
|
"eval_runtime": 16.4677, |
|
"eval_samples_per_second": 607.249, |
|
"eval_steps_per_second": 19.007, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 3.2533333333333337e-06, |
|
"loss": 3.1563, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_accuracy": 0.38235806339254613, |
|
"eval_loss": 3.8962931632995605, |
|
"eval_runtime": 16.4504, |
|
"eval_samples_per_second": 607.888, |
|
"eval_steps_per_second": 19.027, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 3.12e-06, |
|
"loss": 3.1417, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_accuracy": 0.37948450017415536, |
|
"eval_loss": 3.924445152282715, |
|
"eval_runtime": 16.4336, |
|
"eval_samples_per_second": 608.508, |
|
"eval_steps_per_second": 19.046, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.9866666666666667e-06, |
|
"loss": 3.1247, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_accuracy": 0.3790055729710902, |
|
"eval_loss": 3.9178736209869385, |
|
"eval_runtime": 16.4751, |
|
"eval_samples_per_second": 606.976, |
|
"eval_steps_per_second": 18.998, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.8533333333333333e-06, |
|
"loss": 3.1307, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"eval_accuracy": 0.3778300243817485, |
|
"eval_loss": 3.939385175704956, |
|
"eval_runtime": 16.4922, |
|
"eval_samples_per_second": 606.346, |
|
"eval_steps_per_second": 18.979, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_accuracy": 0.38179205851619646, |
|
"eval_loss": 3.9248249530792236, |
|
"eval_runtime": 16.503, |
|
"eval_samples_per_second": 605.949, |
|
"eval_steps_per_second": 18.966, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.72e-06, |
|
"loss": 3.0893, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_accuracy": 0.37752525252525254, |
|
"eval_loss": 3.94762921333313, |
|
"eval_runtime": 16.4815, |
|
"eval_samples_per_second": 606.74, |
|
"eval_steps_per_second": 18.991, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.5866666666666667e-06, |
|
"loss": 3.0906, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_accuracy": 0.3811825148032045, |
|
"eval_loss": 3.9266726970672607, |
|
"eval_runtime": 16.4398, |
|
"eval_samples_per_second": 608.279, |
|
"eval_steps_per_second": 19.039, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.4533333333333337e-06, |
|
"loss": 3.0575, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"eval_accuracy": 0.37813479623824453, |
|
"eval_loss": 3.937913417816162, |
|
"eval_runtime": 16.4796, |
|
"eval_samples_per_second": 606.812, |
|
"eval_steps_per_second": 18.993, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 2.32e-06, |
|
"loss": 3.6148, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"eval_accuracy": 0.3803988157436433, |
|
"eval_loss": 3.9217216968536377, |
|
"eval_runtime": 16.4193, |
|
"eval_samples_per_second": 609.04, |
|
"eval_steps_per_second": 19.063, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_accuracy": 0.37861372344130967, |
|
"eval_loss": 3.934666156768799, |
|
"eval_runtime": 16.4991, |
|
"eval_samples_per_second": 606.093, |
|
"eval_steps_per_second": 18.971, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 2.1866666666666668e-06, |
|
"loss": 3.035, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_accuracy": 0.37944096133751304, |
|
"eval_loss": 3.9348745346069336, |
|
"eval_runtime": 16.4542, |
|
"eval_samples_per_second": 607.749, |
|
"eval_steps_per_second": 19.023, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"step": 148, |
|
"total_flos": 9.09876408404562e+16, |
|
"train_loss": 3.4590183206506677, |
|
"train_runtime": 5524.5303, |
|
"train_samples_per_second": 332.471, |
|
"train_steps_per_second": 0.041 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_accuracy": 0.38079066527342387, |
|
"eval_loss": 3.901303291320801, |
|
"eval_runtime": 16.4247, |
|
"eval_samples_per_second": 608.84, |
|
"eval_steps_per_second": 19.057, |
|
"step": 148 |
|
} |
|
], |
|
"max_steps": 225, |
|
"num_train_epochs": 5, |
|
"total_flos": 9.09876408404562e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|