|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 95214, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3e-05, |
|
"loss": 2.2074, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.6701050400733948, |
|
"eval_loss": 1.3863972425460815, |
|
"eval_runtime": 1030.5845, |
|
"eval_samples_per_second": 109.496, |
|
"eval_steps_per_second": 9.125, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.877048978688162e-05, |
|
"loss": 1.2979, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.7089281678199768, |
|
"eval_loss": 1.1698851585388184, |
|
"eval_runtime": 1023.3048, |
|
"eval_samples_per_second": 110.275, |
|
"eval_steps_per_second": 9.19, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.7540979573763234e-05, |
|
"loss": 1.1725, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.723913311958313, |
|
"eval_loss": 1.0914552211761475, |
|
"eval_runtime": 1021.0004, |
|
"eval_samples_per_second": 110.524, |
|
"eval_steps_per_second": 9.211, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6311223360202048e-05, |
|
"loss": 1.1066, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.7364969849586487, |
|
"eval_loss": 1.0439897775650024, |
|
"eval_runtime": 1023.0505, |
|
"eval_samples_per_second": 110.302, |
|
"eval_steps_per_second": 9.192, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.3351475380761303e-05, |
|
"loss": 1.0399, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.7490540146827698, |
|
"eval_loss": 0.9834885001182556, |
|
"eval_runtime": 1112.3584, |
|
"eval_samples_per_second": 101.447, |
|
"eval_steps_per_second": 6.341, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.1689427361606846e-05, |
|
"loss": 1.0012, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.7581638693809509, |
|
"eval_loss": 0.9408503770828247, |
|
"eval_runtime": 1113.6514, |
|
"eval_samples_per_second": 101.329, |
|
"eval_steps_per_second": 6.333, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.0027046799831514e-05, |
|
"loss": 0.8971, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_accuracy": 0.7663254737854004, |
|
"eval_loss": 0.9190825819969177, |
|
"eval_runtime": 1114.1162, |
|
"eval_samples_per_second": 101.287, |
|
"eval_steps_per_second": 6.331, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.8364998780677056e-05, |
|
"loss": 0.8405, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_accuracy": 0.7693473100662231, |
|
"eval_loss": 0.9057066440582275, |
|
"eval_runtime": 1056.7123, |
|
"eval_samples_per_second": 106.789, |
|
"eval_steps_per_second": 6.674, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.6702950761522602e-05, |
|
"loss": 0.834, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_accuracy": 0.775940477848053, |
|
"eval_loss": 0.8832579851150513, |
|
"eval_runtime": 1059.1014, |
|
"eval_samples_per_second": 106.548, |
|
"eval_steps_per_second": 6.659, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.5040570199747268e-05, |
|
"loss": 0.8212, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_accuracy": 0.7792636156082153, |
|
"eval_loss": 0.866415798664093, |
|
"eval_runtime": 1054.2119, |
|
"eval_samples_per_second": 107.042, |
|
"eval_steps_per_second": 6.69, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.3378854723213692e-05, |
|
"loss": 0.8133, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_accuracy": 0.7830652594566345, |
|
"eval_loss": 0.8482676148414612, |
|
"eval_runtime": 1054.5931, |
|
"eval_samples_per_second": 107.003, |
|
"eval_steps_per_second": 6.688, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.1716474161438358e-05, |
|
"loss": 0.7947, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_accuracy": 0.7862643599510193, |
|
"eval_loss": 0.8344442844390869, |
|
"eval_runtime": 1058.4611, |
|
"eval_samples_per_second": 106.612, |
|
"eval_steps_per_second": 6.663, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.0054758684904782e-05, |
|
"loss": 0.7446, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_accuracy": 0.7890114784240723, |
|
"eval_loss": 0.8323635458946228, |
|
"eval_runtime": 1054.4779, |
|
"eval_samples_per_second": 107.015, |
|
"eval_steps_per_second": 6.689, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 8.392378123129448e-06, |
|
"loss": 0.6463, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_accuracy": 0.7913243770599365, |
|
"eval_loss": 0.8241144418716431, |
|
"eval_runtime": 1057.5768, |
|
"eval_samples_per_second": 106.701, |
|
"eval_steps_per_second": 6.669, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.7306626465958724e-06, |
|
"loss": 0.6461, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_accuracy": 0.792999267578125, |
|
"eval_loss": 0.8187506794929504, |
|
"eval_runtime": 1055.2791, |
|
"eval_samples_per_second": 106.934, |
|
"eval_steps_per_second": 6.684, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 5.068282084820538e-06, |
|
"loss": 0.6403, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_accuracy": 0.7952146530151367, |
|
"eval_loss": 0.8096941709518433, |
|
"eval_runtime": 1058.933, |
|
"eval_samples_per_second": 106.565, |
|
"eval_steps_per_second": 6.66, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.406566608286962e-06, |
|
"loss": 0.6294, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_accuracy": 0.7971464991569519, |
|
"eval_loss": 0.8044777512550354, |
|
"eval_runtime": 1078.5713, |
|
"eval_samples_per_second": 104.625, |
|
"eval_steps_per_second": 6.539, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.744186046511628e-06, |
|
"loss": 0.6198, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_accuracy": 0.7985377907752991, |
|
"eval_loss": 0.7970817685127258, |
|
"eval_runtime": 1091.9096, |
|
"eval_samples_per_second": 103.346, |
|
"eval_steps_per_second": 6.459, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 8.213802735717294e-08, |
|
"loss": 0.6169, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.7989366054534912, |
|
"eval_loss": 0.794947624206543, |
|
"eval_runtime": 1091.2301, |
|
"eval_samples_per_second": 103.411, |
|
"eval_steps_per_second": 6.463, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 95214, |
|
"total_flos": 2.918866335440248e+17, |
|
"train_loss": 0.06961118817632395, |
|
"train_runtime": 8353.1581, |
|
"train_samples_per_second": 364.749, |
|
"train_steps_per_second": 11.399 |
|
} |
|
], |
|
"max_steps": 95214, |
|
"num_train_epochs": 3, |
|
"total_flos": 2.918866335440248e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|