|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"global_step": 750000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 3.3081, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.83344888888889e-06, |
|
"loss": 0.6857, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.5564, |
|
"eval_loss": 0.6798537373542786, |
|
"eval_runtime": 9.7283, |
|
"eval_samples_per_second": 1027.927, |
|
"eval_steps_per_second": 16.138, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.666862222222223e-06, |
|
"loss": 0.6804, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.5607, |
|
"eval_loss": 0.677202045917511, |
|
"eval_runtime": 9.8699, |
|
"eval_samples_per_second": 1013.18, |
|
"eval_steps_per_second": 15.907, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.500275555555555e-06, |
|
"loss": 0.679, |
|
"step": 56250 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.5559, |
|
"eval_loss": 0.6789325475692749, |
|
"eval_runtime": 9.7877, |
|
"eval_samples_per_second": 1021.686, |
|
"eval_steps_per_second": 16.04, |
|
"step": 56250 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.33368888888889e-06, |
|
"loss": 0.6785, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.5639, |
|
"eval_loss": 0.6760360598564148, |
|
"eval_runtime": 9.8616, |
|
"eval_samples_per_second": 1014.039, |
|
"eval_steps_per_second": 15.92, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.167093333333335e-06, |
|
"loss": 0.6777, |
|
"step": 93750 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.5672, |
|
"eval_loss": 0.6752009987831116, |
|
"eval_runtime": 9.8899, |
|
"eval_samples_per_second": 1011.135, |
|
"eval_steps_per_second": 15.875, |
|
"step": 93750 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.000515555555556e-06, |
|
"loss": 0.6771, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.5647, |
|
"eval_loss": 0.6761014461517334, |
|
"eval_runtime": 9.9903, |
|
"eval_samples_per_second": 1000.97, |
|
"eval_steps_per_second": 15.715, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.833928888888889e-06, |
|
"loss": 0.6765, |
|
"step": 131250 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.5665, |
|
"eval_loss": 0.6758425235748291, |
|
"eval_runtime": 9.7731, |
|
"eval_samples_per_second": 1023.22, |
|
"eval_steps_per_second": 16.065, |
|
"step": 131250 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.667333333333334e-06, |
|
"loss": 0.6765, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.5669, |
|
"eval_loss": 0.6743916869163513, |
|
"eval_runtime": 9.7984, |
|
"eval_samples_per_second": 1020.577, |
|
"eval_steps_per_second": 16.023, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.50073777777778e-06, |
|
"loss": 0.6762, |
|
"step": 168750 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.5682, |
|
"eval_loss": 0.6749153137207031, |
|
"eval_runtime": 9.9597, |
|
"eval_samples_per_second": 1004.049, |
|
"eval_steps_per_second": 15.764, |
|
"step": 168750 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.334142222222223e-06, |
|
"loss": 0.676, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.5673, |
|
"eval_loss": 0.6741730570793152, |
|
"eval_runtime": 9.9295, |
|
"eval_samples_per_second": 1007.098, |
|
"eval_steps_per_second": 15.811, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.167546666666666e-06, |
|
"loss": 0.676, |
|
"step": 206250 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.5708, |
|
"eval_loss": 0.6741572618484497, |
|
"eval_runtime": 9.6873, |
|
"eval_samples_per_second": 1032.278, |
|
"eval_steps_per_second": 16.207, |
|
"step": 206250 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.000951111111112e-06, |
|
"loss": 0.6758, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.569, |
|
"eval_loss": 0.6750027537345886, |
|
"eval_runtime": 10.218, |
|
"eval_samples_per_second": 978.668, |
|
"eval_steps_per_second": 15.365, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.834364444444446e-06, |
|
"loss": 0.6755, |
|
"step": 243750 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.5688, |
|
"eval_loss": 0.6740711331367493, |
|
"eval_runtime": 10.1203, |
|
"eval_samples_per_second": 988.117, |
|
"eval_steps_per_second": 15.513, |
|
"step": 243750 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 7.66776e-06, |
|
"loss": 0.6758, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.5676, |
|
"eval_loss": 0.6746249198913574, |
|
"eval_runtime": 9.7352, |
|
"eval_samples_per_second": 1027.2, |
|
"eval_steps_per_second": 16.127, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.501173333333334e-06, |
|
"loss": 0.6754, |
|
"step": 281250 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.5728, |
|
"eval_loss": 0.673089861869812, |
|
"eval_runtime": 9.866, |
|
"eval_samples_per_second": 1013.582, |
|
"eval_steps_per_second": 15.913, |
|
"step": 281250 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 7.334586666666668e-06, |
|
"loss": 0.6752, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.5678, |
|
"eval_loss": 0.6737684607505798, |
|
"eval_runtime": 10.0756, |
|
"eval_samples_per_second": 992.497, |
|
"eval_steps_per_second": 15.582, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.167982222222223e-06, |
|
"loss": 0.6753, |
|
"step": 318750 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_accuracy": 0.5744, |
|
"eval_loss": 0.6733357310295105, |
|
"eval_runtime": 9.7112, |
|
"eval_samples_per_second": 1029.741, |
|
"eval_steps_per_second": 16.167, |
|
"step": 318750 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 7.0013777777777784e-06, |
|
"loss": 0.6752, |
|
"step": 337500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.5692, |
|
"eval_loss": 0.6732030510902405, |
|
"eval_runtime": 9.8389, |
|
"eval_samples_per_second": 1016.377, |
|
"eval_steps_per_second": 15.957, |
|
"step": 337500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.834764444444445e-06, |
|
"loss": 0.6753, |
|
"step": 356250 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.5714, |
|
"eval_loss": 0.6738024950027466, |
|
"eval_runtime": 9.6915, |
|
"eval_samples_per_second": 1031.834, |
|
"eval_steps_per_second": 16.2, |
|
"step": 356250 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.66816888888889e-06, |
|
"loss": 0.6749, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5711, |
|
"eval_loss": 0.6741089820861816, |
|
"eval_runtime": 10.1172, |
|
"eval_samples_per_second": 988.413, |
|
"eval_steps_per_second": 15.518, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 6.501564444444445e-06, |
|
"loss": 0.6732, |
|
"step": 393750 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_accuracy": 0.572, |
|
"eval_loss": 0.6729293465614319, |
|
"eval_runtime": 9.7745, |
|
"eval_samples_per_second": 1023.073, |
|
"eval_steps_per_second": 16.062, |
|
"step": 393750 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 6.33496e-06, |
|
"loss": 0.6734, |
|
"step": 412500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_accuracy": 0.5715, |
|
"eval_loss": 0.6731555461883545, |
|
"eval_runtime": 9.9359, |
|
"eval_samples_per_second": 1006.454, |
|
"eval_steps_per_second": 15.801, |
|
"step": 412500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 6.168355555555556e-06, |
|
"loss": 0.6733, |
|
"step": 431250 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_accuracy": 0.5726, |
|
"eval_loss": 0.6726440191268921, |
|
"eval_runtime": 9.7596, |
|
"eval_samples_per_second": 1024.628, |
|
"eval_steps_per_second": 16.087, |
|
"step": 431250 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 6.001751111111111e-06, |
|
"loss": 0.6734, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_accuracy": 0.5725, |
|
"eval_loss": 0.6719476580619812, |
|
"eval_runtime": 10.1776, |
|
"eval_samples_per_second": 982.551, |
|
"eval_steps_per_second": 15.426, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5.8351555555555565e-06, |
|
"loss": 0.6732, |
|
"step": 468750 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_accuracy": 0.5721, |
|
"eval_loss": 0.6720800995826721, |
|
"eval_runtime": 9.8195, |
|
"eval_samples_per_second": 1018.38, |
|
"eval_steps_per_second": 15.989, |
|
"step": 468750 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5.668542222222223e-06, |
|
"loss": 0.6732, |
|
"step": 487500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_accuracy": 0.5719, |
|
"eval_loss": 0.6730443239212036, |
|
"eval_runtime": 9.8464, |
|
"eval_samples_per_second": 1015.602, |
|
"eval_steps_per_second": 15.945, |
|
"step": 487500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5.501928888888889e-06, |
|
"loss": 0.6732, |
|
"step": 506250 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_accuracy": 0.5731, |
|
"eval_loss": 0.67209392786026, |
|
"eval_runtime": 9.9162, |
|
"eval_samples_per_second": 1008.449, |
|
"eval_steps_per_second": 15.833, |
|
"step": 506250 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5.335342222222223e-06, |
|
"loss": 0.6733, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_accuracy": 0.5715, |
|
"eval_loss": 0.6724963188171387, |
|
"eval_runtime": 9.8765, |
|
"eval_samples_per_second": 1012.507, |
|
"eval_steps_per_second": 15.896, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.168737777777778e-06, |
|
"loss": 0.6733, |
|
"step": 543750 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_accuracy": 0.5757, |
|
"eval_loss": 0.6714832186698914, |
|
"eval_runtime": 10.1309, |
|
"eval_samples_per_second": 987.074, |
|
"eval_steps_per_second": 15.497, |
|
"step": 543750 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5.002133333333333e-06, |
|
"loss": 0.673, |
|
"step": 562500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_accuracy": 0.5762, |
|
"eval_loss": 0.6717364192008972, |
|
"eval_runtime": 9.8941, |
|
"eval_samples_per_second": 1010.706, |
|
"eval_steps_per_second": 15.868, |
|
"step": 562500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.835546666666668e-06, |
|
"loss": 0.6732, |
|
"step": 581250 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_accuracy": 0.5735, |
|
"eval_loss": 0.672535240650177, |
|
"eval_runtime": 9.8695, |
|
"eval_samples_per_second": 1013.226, |
|
"eval_steps_per_second": 15.908, |
|
"step": 581250 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.668942222222223e-06, |
|
"loss": 0.6733, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_accuracy": 0.5729, |
|
"eval_loss": 0.6723589897155762, |
|
"eval_runtime": 9.9575, |
|
"eval_samples_per_second": 1004.27, |
|
"eval_steps_per_second": 15.767, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.502346666666667e-06, |
|
"loss": 0.6729, |
|
"step": 618750 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_accuracy": 0.5714, |
|
"eval_loss": 0.6716250777244568, |
|
"eval_runtime": 10.0075, |
|
"eval_samples_per_second": 999.255, |
|
"eval_steps_per_second": 15.688, |
|
"step": 618750 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.335742222222223e-06, |
|
"loss": 0.6729, |
|
"step": 637500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_accuracy": 0.576, |
|
"eval_loss": 0.6714919805526733, |
|
"eval_runtime": 9.9861, |
|
"eval_samples_per_second": 1001.39, |
|
"eval_steps_per_second": 15.722, |
|
"step": 637500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 4.169155555555556e-06, |
|
"loss": 0.6726, |
|
"step": 656250 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_accuracy": 0.5785, |
|
"eval_loss": 0.6716631054878235, |
|
"eval_runtime": 9.8047, |
|
"eval_samples_per_second": 1019.923, |
|
"eval_steps_per_second": 16.013, |
|
"step": 656250 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 4.00256e-06, |
|
"loss": 0.6723, |
|
"step": 675000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_accuracy": 0.5791, |
|
"eval_loss": 0.6703997850418091, |
|
"eval_runtime": 9.8558, |
|
"eval_samples_per_second": 1014.636, |
|
"eval_steps_per_second": 15.93, |
|
"step": 675000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.835955555555556e-06, |
|
"loss": 0.6728, |
|
"step": 693750 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_accuracy": 0.5763, |
|
"eval_loss": 0.6714270710945129, |
|
"eval_runtime": 9.7423, |
|
"eval_samples_per_second": 1026.453, |
|
"eval_steps_per_second": 16.115, |
|
"step": 693750 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.6693511111111114e-06, |
|
"loss": 0.6724, |
|
"step": 712500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_accuracy": 0.5795, |
|
"eval_loss": 0.6705731749534607, |
|
"eval_runtime": 10.1221, |
|
"eval_samples_per_second": 987.933, |
|
"eval_steps_per_second": 15.511, |
|
"step": 712500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.502746666666667e-06, |
|
"loss": 0.6724, |
|
"step": 731250 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_accuracy": 0.5789, |
|
"eval_loss": 0.6716538071632385, |
|
"eval_runtime": 9.891, |
|
"eval_samples_per_second": 1011.024, |
|
"eval_steps_per_second": 15.873, |
|
"step": 731250 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.3361422222222222e-06, |
|
"loss": 0.6727, |
|
"step": 750000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5789, |
|
"eval_loss": 0.6710340976715088, |
|
"eval_runtime": 9.6333, |
|
"eval_samples_per_second": 1038.067, |
|
"eval_steps_per_second": 16.298, |
|
"step": 750000 |
|
} |
|
], |
|
"max_steps": 1125000, |
|
"num_train_epochs": 3, |
|
"total_flos": 1.2542244576561201e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|