|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 8.04289544235925, |
|
"eval_steps": 500, |
|
"global_step": 3000, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.758632242679596, |
|
"eval_loss": 0.4715859889984131, |
|
"eval_runtime": 9.5006, |
|
"eval_samples_per_second": 313.981, |
|
"eval_steps_per_second": 19.683, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.3404825737265416, |
|
"grad_norm": 9.62618637084961, |
|
"learning_rate": 1.4776902224208135e-05, |
|
"loss": 0.5101, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7727120518684387, |
|
"eval_loss": 0.47308608889579773, |
|
"eval_runtime": 9.4715, |
|
"eval_samples_per_second": 314.944, |
|
"eval_steps_per_second": 19.743, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 2.680965147453083, |
|
"grad_norm": 17.83710289001465, |
|
"learning_rate": 1.2489456059470035e-05, |
|
"loss": 0.2485, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7743881940841675, |
|
"eval_loss": 0.7006397843360901, |
|
"eval_runtime": 9.4464, |
|
"eval_samples_per_second": 315.781, |
|
"eval_steps_per_second": 19.796, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7750586867332458, |
|
"eval_loss": 0.8257986903190613, |
|
"eval_runtime": 9.4942, |
|
"eval_samples_per_second": 314.193, |
|
"eval_steps_per_second": 19.696, |
|
"step": 1492 |
|
}, |
|
{ |
|
"epoch": 4.021447721179625, |
|
"grad_norm": 0.4933696985244751, |
|
"learning_rate": 1.0202009894731932e-05, |
|
"loss": 0.0969, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.782433807849884, |
|
"eval_loss": 0.9035575985908508, |
|
"eval_runtime": 9.4688, |
|
"eval_samples_per_second": 315.034, |
|
"eval_steps_per_second": 19.749, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 5.361930294906166, |
|
"grad_norm": 15.212503433227539, |
|
"learning_rate": 7.91456372999383e-06, |
|
"loss": 0.0396, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7844451665878296, |
|
"eval_loss": 1.023715853691101, |
|
"eval_runtime": 9.6126, |
|
"eval_samples_per_second": 310.323, |
|
"eval_steps_per_second": 19.454, |
|
"step": 2238 |
|
}, |
|
{ |
|
"epoch": 6.702412868632708, |
|
"grad_norm": 1.1762652397155762, |
|
"learning_rate": 5.627117565255729e-06, |
|
"loss": 0.0231, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7760643362998962, |
|
"eval_loss": 1.3074647188186646, |
|
"eval_runtime": 9.435, |
|
"eval_samples_per_second": 316.164, |
|
"eval_steps_per_second": 19.82, |
|
"step": 2611 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.783774733543396, |
|
"eval_loss": 1.3387647867202759, |
|
"eval_runtime": 9.4842, |
|
"eval_samples_per_second": 314.525, |
|
"eval_steps_per_second": 19.717, |
|
"step": 2984 |
|
}, |
|
{ |
|
"epoch": 8.04289544235925, |
|
"grad_norm": 3.214566707611084, |
|
"learning_rate": 3.3396714005176278e-06, |
|
"loss": 0.0136, |
|
"step": 3000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 3730, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 1.7275775811630084e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"learning_rate": 1.7064348388946237e-05, |
|
"per_device_train_batch_size": 32 |
|
} |
|
} |
|
|