|
{ |
|
"best_metric": 0.053496960550546646, |
|
"best_model_checkpoint": "/tmp/model/checkpoint-822", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 822, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.6867469879518073e-05, |
|
"loss": 1.7276, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.4939759036144585e-05, |
|
"loss": 0.4579, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.96617050067659e-05, |
|
"loss": 0.3088, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.76319350473613e-05, |
|
"loss": 0.35, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.56021650879567e-05, |
|
"loss": 0.2649, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.35723951285521e-05, |
|
"loss": 0.2502, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.15426251691475e-05, |
|
"loss": 0.3374, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.95128552097429e-05, |
|
"loss": 0.1944, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7483085250338296e-05, |
|
"loss": 0.2182, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9739413680781759, |
|
"eval_auc": 0.9947087408402573, |
|
"eval_f1": 0.9719298245614035, |
|
"eval_loss": 0.09450644999742508, |
|
"eval_precision": 0.9736379613356766, |
|
"eval_recall": 0.9702276707530648, |
|
"eval_runtime": 22.7023, |
|
"eval_samples_per_second": 54.091, |
|
"eval_steps_per_second": 3.392, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.5453315290933695e-05, |
|
"loss": 0.2388, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.3423545331529095e-05, |
|
"loss": 0.1592, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.1393775372124494e-05, |
|
"loss": 0.1703, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.9364005412719893e-05, |
|
"loss": 0.2019, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.7334235453315295e-05, |
|
"loss": 0.2313, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.530446549391069e-05, |
|
"loss": 0.1362, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.327469553450609e-05, |
|
"loss": 0.1414, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.124492557510149e-05, |
|
"loss": 0.1077, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.9215155615696888e-05, |
|
"loss": 0.1434, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9723127035830619, |
|
"eval_auc": 0.9975742842139215, |
|
"eval_f1": 0.9707401032702238, |
|
"eval_loss": 0.08003830909729004, |
|
"eval_precision": 0.9543147208121827, |
|
"eval_recall": 0.9877408056042032, |
|
"eval_runtime": 23.305, |
|
"eval_samples_per_second": 52.692, |
|
"eval_steps_per_second": 3.304, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7185385656292287e-05, |
|
"loss": 0.2318, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.5155615696887688e-05, |
|
"loss": 0.1714, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.3125845737483087e-05, |
|
"loss": 0.1123, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.1096075778078486e-05, |
|
"loss": 0.1017, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 9.066305818673885e-06, |
|
"loss": 0.1417, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 7.036535859269283e-06, |
|
"loss": 0.0908, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 5.006765899864682e-06, |
|
"loss": 0.1057, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.9769959404600813e-06, |
|
"loss": 0.1653, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.0148849797023005e-06, |
|
"loss": 0.0897, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9837133550488599, |
|
"eval_auc": 0.9982940020845161, |
|
"eval_f1": 0.9824561403508772, |
|
"eval_loss": 0.053496960550546646, |
|
"eval_precision": 0.984182776801406, |
|
"eval_recall": 0.9807355516637478, |
|
"eval_runtime": 22.6402, |
|
"eval_samples_per_second": 54.24, |
|
"eval_steps_per_second": 3.401, |
|
"step": 822 |
|
} |
|
], |
|
"logging_steps": 30, |
|
"max_steps": 822, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 5.084249438613381e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|