|
{ |
|
"best_metric": 0.5745548592459963, |
|
"best_model_checkpoint": "/home/ruben/PT-Pump-Up/package-client/src/pt_pump_up/benchmarking/output/checkpoint-3217", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 6434, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 18.098752975463867, |
|
"learning_rate": 4.9222878458190864e-05, |
|
"loss": 0.393, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.4246498346328735, |
|
"learning_rate": 4.8445756916381726e-05, |
|
"loss": 0.3577, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 20.033092498779297, |
|
"learning_rate": 4.766863537457259e-05, |
|
"loss": 0.2749, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.9361429214477539, |
|
"learning_rate": 4.689151383276344e-05, |
|
"loss": 0.2693, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 2.208453893661499, |
|
"learning_rate": 4.6114392290954305e-05, |
|
"loss": 0.2839, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 5.299256324768066, |
|
"learning_rate": 4.5337270749145166e-05, |
|
"loss": 0.3315, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6945, |
|
"eval_f1": 0.5745548592459963, |
|
"eval_loss": 0.5727517604827881, |
|
"eval_precision": 0.9459547985587946, |
|
"eval_recall": 0.4125714285714286, |
|
"eval_runtime": 971.5986, |
|
"eval_samples_per_second": 28.818, |
|
"eval_steps_per_second": 1.801, |
|
"step": 3217 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 2.0671701431274414, |
|
"learning_rate": 4.456014920733603e-05, |
|
"loss": 0.5044, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 2.1807188987731934, |
|
"learning_rate": 4.378302766552689e-05, |
|
"loss": 0.6689, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 5.214261054992676, |
|
"learning_rate": 4.300590612371775e-05, |
|
"loss": 0.6711, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 4.0813446044921875, |
|
"learning_rate": 4.2228784581908613e-05, |
|
"loss": 0.69, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 14.689708709716797, |
|
"learning_rate": 4.1451663040099475e-05, |
|
"loss": 0.709, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 3.6824076175689697, |
|
"learning_rate": 4.067454149829034e-05, |
|
"loss": 0.6821, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.685297429561615, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 964.7135, |
|
"eval_samples_per_second": 29.024, |
|
"eval_steps_per_second": 1.814, |
|
"step": 6434 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 32170, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 3.215241835190381e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|