|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"global_step": 19, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3e-05, |
|
"loss": 2.6941, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.062219502243982046, |
|
"eval_loss": 2.654296875, |
|
"eval_runtime": 2.3869, |
|
"eval_samples_per_second": 23.881, |
|
"eval_steps_per_second": 1.676, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 3e-05, |
|
"loss": 2.6914, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.062219502243982046, |
|
"eval_loss": 2.654296875, |
|
"eval_runtime": 3.2001, |
|
"eval_samples_per_second": 17.812, |
|
"eval_steps_per_second": 1.25, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.9795419551040836e-05, |
|
"loss": 2.6003, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.06265016546534294, |
|
"eval_loss": 2.6015625, |
|
"eval_runtime": 3.1764, |
|
"eval_samples_per_second": 17.945, |
|
"eval_steps_per_second": 1.259, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.9187258625509518e-05, |
|
"loss": 2.5603, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.06260483249467337, |
|
"eval_loss": 2.5703125, |
|
"eval_runtime": 2.9857, |
|
"eval_samples_per_second": 19.091, |
|
"eval_steps_per_second": 1.34, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.8192106268097336e-05, |
|
"loss": 2.606, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.06292216328936036, |
|
"eval_loss": 2.55078125, |
|
"eval_runtime": 2.5694, |
|
"eval_samples_per_second": 22.184, |
|
"eval_steps_per_second": 1.557, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.6837107640945904e-05, |
|
"loss": 2.5439, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy": 0.06292216328936036, |
|
"eval_loss": 2.544921875, |
|
"eval_runtime": 3.1779, |
|
"eval_samples_per_second": 17.937, |
|
"eval_steps_per_second": 1.259, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.5159223574386117e-05, |
|
"loss": 2.4449, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.06287683031869079, |
|
"eval_loss": 2.546875, |
|
"eval_runtime": 2.3628, |
|
"eval_samples_per_second": 24.124, |
|
"eval_steps_per_second": 1.693, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.320422237183641e-05, |
|
"loss": 2.5422, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_accuracy": 0.0629901627453647, |
|
"eval_loss": 2.546875, |
|
"eval_runtime": 3.1712, |
|
"eval_samples_per_second": 17.974, |
|
"eval_steps_per_second": 1.261, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.1025431369794546e-05, |
|
"loss": 2.6101, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.06319416111337776, |
|
"eval_loss": 2.541015625, |
|
"eval_runtime": 2.3922, |
|
"eval_samples_per_second": 23.827, |
|
"eval_steps_per_second": 1.672, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.8682282307111988e-05, |
|
"loss": 2.4482, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.0629901627453647, |
|
"eval_loss": 2.53515625, |
|
"eval_runtime": 2.3705, |
|
"eval_samples_per_second": 24.045, |
|
"eval_steps_per_second": 1.687, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.623869018208499e-05, |
|
"loss": 2.501, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.0631261616573734, |
|
"eval_loss": 2.529296875, |
|
"eval_runtime": 2.7727, |
|
"eval_samples_per_second": 20.558, |
|
"eval_steps_per_second": 1.443, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.3761309817915017e-05, |
|
"loss": 2.5967, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_accuracy": 0.06337549299605603, |
|
"eval_loss": 2.521484375, |
|
"eval_runtime": 2.168, |
|
"eval_samples_per_second": 26.292, |
|
"eval_steps_per_second": 1.845, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.1317717692888014e-05, |
|
"loss": 2.4998, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.06346615893739517, |
|
"eval_loss": 2.513671875, |
|
"eval_runtime": 3.1858, |
|
"eval_samples_per_second": 17.892, |
|
"eval_steps_per_second": 1.256, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 8.974568630205462e-06, |
|
"loss": 2.5957, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.06364749082007344, |
|
"eval_loss": 2.509765625, |
|
"eval_runtime": 2.1678, |
|
"eval_samples_per_second": 26.294, |
|
"eval_steps_per_second": 1.845, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.795777628163599e-06, |
|
"loss": 2.5967, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.06387415567342128, |
|
"eval_loss": 2.50390625, |
|
"eval_runtime": 3.188, |
|
"eval_samples_per_second": 17.879, |
|
"eval_steps_per_second": 1.255, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.840776425613887e-06, |
|
"loss": 2.5022, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.06373815676141258, |
|
"eval_loss": 2.5, |
|
"eval_runtime": 3.178, |
|
"eval_samples_per_second": 17.936, |
|
"eval_steps_per_second": 1.259, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.162892359054098e-06, |
|
"loss": 2.4314, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.06371549027607779, |
|
"eval_loss": 2.498046875, |
|
"eval_runtime": 2.968, |
|
"eval_samples_per_second": 19.205, |
|
"eval_steps_per_second": 1.348, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.8078937319026655e-06, |
|
"loss": 2.6279, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.06362482433473865, |
|
"eval_loss": 2.49609375, |
|
"eval_runtime": 3.1736, |
|
"eval_samples_per_second": 17.961, |
|
"eval_steps_per_second": 1.26, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 8.127413744904805e-07, |
|
"loss": 2.571, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.06357949136406908, |
|
"eval_loss": 2.49609375, |
|
"eval_runtime": 2.9674, |
|
"eval_samples_per_second": 19.208, |
|
"eval_steps_per_second": 1.348, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 19, |
|
"total_flos": 1001216802816.0, |
|
"train_loss": 2.5612600226151314, |
|
"train_runtime": 437.2347, |
|
"train_samples_per_second": 0.693, |
|
"train_steps_per_second": 0.043 |
|
} |
|
], |
|
"max_steps": 19, |
|
"num_train_epochs": 1, |
|
"total_flos": 1001216802816.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|