{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "global_step": 750000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1e-05, "loss": 3.3081, "step": 1 }, { "epoch": 0.05, "learning_rate": 9.83344888888889e-06, "loss": 0.6857, "step": 18750 }, { "epoch": 0.05, "eval_accuracy": 0.5564, "eval_loss": 0.6798537373542786, "eval_runtime": 9.7283, "eval_samples_per_second": 1027.927, "eval_steps_per_second": 16.138, "step": 18750 }, { "epoch": 0.1, "learning_rate": 9.666862222222223e-06, "loss": 0.6804, "step": 37500 }, { "epoch": 0.1, "eval_accuracy": 0.5607, "eval_loss": 0.677202045917511, "eval_runtime": 9.8699, "eval_samples_per_second": 1013.18, "eval_steps_per_second": 15.907, "step": 37500 }, { "epoch": 0.15, "learning_rate": 9.500275555555555e-06, "loss": 0.679, "step": 56250 }, { "epoch": 0.15, "eval_accuracy": 0.5559, "eval_loss": 0.6789325475692749, "eval_runtime": 9.7877, "eval_samples_per_second": 1021.686, "eval_steps_per_second": 16.04, "step": 56250 }, { "epoch": 0.2, "learning_rate": 9.33368888888889e-06, "loss": 0.6785, "step": 75000 }, { "epoch": 0.2, "eval_accuracy": 0.5639, "eval_loss": 0.6760360598564148, "eval_runtime": 9.8616, "eval_samples_per_second": 1014.039, "eval_steps_per_second": 15.92, "step": 75000 }, { "epoch": 0.25, "learning_rate": 9.167093333333335e-06, "loss": 0.6777, "step": 93750 }, { "epoch": 0.25, "eval_accuracy": 0.5672, "eval_loss": 0.6752009987831116, "eval_runtime": 9.8899, "eval_samples_per_second": 1011.135, "eval_steps_per_second": 15.875, "step": 93750 }, { "epoch": 0.3, "learning_rate": 9.000515555555556e-06, "loss": 0.6771, "step": 112500 }, { "epoch": 0.3, "eval_accuracy": 0.5647, "eval_loss": 0.6761014461517334, "eval_runtime": 9.9903, "eval_samples_per_second": 1000.97, "eval_steps_per_second": 15.715, "step": 112500 }, { "epoch": 0.35, "learning_rate": 8.833928888888889e-06, "loss": 0.6765, "step": 131250 }, { "epoch": 0.35, "eval_accuracy": 0.5665, "eval_loss": 0.6758425235748291, "eval_runtime": 9.7731, "eval_samples_per_second": 1023.22, "eval_steps_per_second": 16.065, "step": 131250 }, { "epoch": 0.4, "learning_rate": 8.667333333333334e-06, "loss": 0.6765, "step": 150000 }, { "epoch": 0.4, "eval_accuracy": 0.5669, "eval_loss": 0.6743916869163513, "eval_runtime": 9.7984, "eval_samples_per_second": 1020.577, "eval_steps_per_second": 16.023, "step": 150000 }, { "epoch": 0.45, "learning_rate": 8.50073777777778e-06, "loss": 0.6762, "step": 168750 }, { "epoch": 0.45, "eval_accuracy": 0.5682, "eval_loss": 0.6749153137207031, "eval_runtime": 9.9597, "eval_samples_per_second": 1004.049, "eval_steps_per_second": 15.764, "step": 168750 }, { "epoch": 0.5, "learning_rate": 8.334142222222223e-06, "loss": 0.676, "step": 187500 }, { "epoch": 0.5, "eval_accuracy": 0.5673, "eval_loss": 0.6741730570793152, "eval_runtime": 9.9295, "eval_samples_per_second": 1007.098, "eval_steps_per_second": 15.811, "step": 187500 }, { "epoch": 0.55, "learning_rate": 8.167546666666666e-06, "loss": 0.676, "step": 206250 }, { "epoch": 0.55, "eval_accuracy": 0.5708, "eval_loss": 0.6741572618484497, "eval_runtime": 9.6873, "eval_samples_per_second": 1032.278, "eval_steps_per_second": 16.207, "step": 206250 }, { "epoch": 0.6, "learning_rate": 8.000951111111112e-06, "loss": 0.6758, "step": 225000 }, { "epoch": 0.6, "eval_accuracy": 0.569, "eval_loss": 0.6750027537345886, "eval_runtime": 10.218, "eval_samples_per_second": 978.668, "eval_steps_per_second": 15.365, "step": 225000 }, { "epoch": 0.65, "learning_rate": 7.834364444444446e-06, "loss": 0.6755, "step": 243750 }, { "epoch": 0.65, "eval_accuracy": 0.5688, "eval_loss": 0.6740711331367493, "eval_runtime": 10.1203, "eval_samples_per_second": 988.117, "eval_steps_per_second": 15.513, "step": 243750 }, { "epoch": 0.7, "learning_rate": 7.66776e-06, "loss": 0.6758, "step": 262500 }, { "epoch": 0.7, "eval_accuracy": 0.5676, "eval_loss": 0.6746249198913574, "eval_runtime": 9.7352, "eval_samples_per_second": 1027.2, "eval_steps_per_second": 16.127, "step": 262500 }, { "epoch": 0.75, "learning_rate": 7.501173333333334e-06, "loss": 0.6754, "step": 281250 }, { "epoch": 0.75, "eval_accuracy": 0.5728, "eval_loss": 0.673089861869812, "eval_runtime": 9.866, "eval_samples_per_second": 1013.582, "eval_steps_per_second": 15.913, "step": 281250 }, { "epoch": 0.8, "learning_rate": 7.334586666666668e-06, "loss": 0.6752, "step": 300000 }, { "epoch": 0.8, "eval_accuracy": 0.5678, "eval_loss": 0.6737684607505798, "eval_runtime": 10.0756, "eval_samples_per_second": 992.497, "eval_steps_per_second": 15.582, "step": 300000 }, { "epoch": 0.85, "learning_rate": 7.167982222222223e-06, "loss": 0.6753, "step": 318750 }, { "epoch": 0.85, "eval_accuracy": 0.5744, "eval_loss": 0.6733357310295105, "eval_runtime": 9.7112, "eval_samples_per_second": 1029.741, "eval_steps_per_second": 16.167, "step": 318750 }, { "epoch": 0.9, "learning_rate": 7.0013777777777784e-06, "loss": 0.6752, "step": 337500 }, { "epoch": 0.9, "eval_accuracy": 0.5692, "eval_loss": 0.6732030510902405, "eval_runtime": 9.8389, "eval_samples_per_second": 1016.377, "eval_steps_per_second": 15.957, "step": 337500 }, { "epoch": 0.95, "learning_rate": 6.834764444444445e-06, "loss": 0.6753, "step": 356250 }, { "epoch": 0.95, "eval_accuracy": 0.5714, "eval_loss": 0.6738024950027466, "eval_runtime": 9.6915, "eval_samples_per_second": 1031.834, "eval_steps_per_second": 16.2, "step": 356250 }, { "epoch": 1.0, "learning_rate": 6.66816888888889e-06, "loss": 0.6749, "step": 375000 }, { "epoch": 1.0, "eval_accuracy": 0.5711, "eval_loss": 0.6741089820861816, "eval_runtime": 10.1172, "eval_samples_per_second": 988.413, "eval_steps_per_second": 15.518, "step": 375000 }, { "epoch": 1.05, "learning_rate": 6.501564444444445e-06, "loss": 0.6732, "step": 393750 }, { "epoch": 1.05, "eval_accuracy": 0.572, "eval_loss": 0.6729293465614319, "eval_runtime": 9.7745, "eval_samples_per_second": 1023.073, "eval_steps_per_second": 16.062, "step": 393750 }, { "epoch": 1.1, "learning_rate": 6.33496e-06, "loss": 0.6734, "step": 412500 }, { "epoch": 1.1, "eval_accuracy": 0.5715, "eval_loss": 0.6731555461883545, "eval_runtime": 9.9359, "eval_samples_per_second": 1006.454, "eval_steps_per_second": 15.801, "step": 412500 }, { "epoch": 1.15, "learning_rate": 6.168355555555556e-06, "loss": 0.6733, "step": 431250 }, { "epoch": 1.15, "eval_accuracy": 0.5726, "eval_loss": 0.6726440191268921, "eval_runtime": 9.7596, "eval_samples_per_second": 1024.628, "eval_steps_per_second": 16.087, "step": 431250 }, { "epoch": 1.2, "learning_rate": 6.001751111111111e-06, "loss": 0.6734, "step": 450000 }, { "epoch": 1.2, "eval_accuracy": 0.5725, "eval_loss": 0.6719476580619812, "eval_runtime": 10.1776, "eval_samples_per_second": 982.551, "eval_steps_per_second": 15.426, "step": 450000 }, { "epoch": 1.25, "learning_rate": 5.8351555555555565e-06, "loss": 0.6732, "step": 468750 }, { "epoch": 1.25, "eval_accuracy": 0.5721, "eval_loss": 0.6720800995826721, "eval_runtime": 9.8195, "eval_samples_per_second": 1018.38, "eval_steps_per_second": 15.989, "step": 468750 }, { "epoch": 1.3, "learning_rate": 5.668542222222223e-06, "loss": 0.6732, "step": 487500 }, { "epoch": 1.3, "eval_accuracy": 0.5719, "eval_loss": 0.6730443239212036, "eval_runtime": 9.8464, "eval_samples_per_second": 1015.602, "eval_steps_per_second": 15.945, "step": 487500 }, { "epoch": 1.35, "learning_rate": 5.501928888888889e-06, "loss": 0.6732, "step": 506250 }, { "epoch": 1.35, "eval_accuracy": 0.5731, "eval_loss": 0.67209392786026, "eval_runtime": 9.9162, "eval_samples_per_second": 1008.449, "eval_steps_per_second": 15.833, "step": 506250 }, { "epoch": 1.4, "learning_rate": 5.335342222222223e-06, "loss": 0.6733, "step": 525000 }, { "epoch": 1.4, "eval_accuracy": 0.5715, "eval_loss": 0.6724963188171387, "eval_runtime": 9.8765, "eval_samples_per_second": 1012.507, "eval_steps_per_second": 15.896, "step": 525000 }, { "epoch": 1.45, "learning_rate": 5.168737777777778e-06, "loss": 0.6733, "step": 543750 }, { "epoch": 1.45, "eval_accuracy": 0.5757, "eval_loss": 0.6714832186698914, "eval_runtime": 10.1309, "eval_samples_per_second": 987.074, "eval_steps_per_second": 15.497, "step": 543750 }, { "epoch": 1.5, "learning_rate": 5.002133333333333e-06, "loss": 0.673, "step": 562500 }, { "epoch": 1.5, "eval_accuracy": 0.5762, "eval_loss": 0.6717364192008972, "eval_runtime": 9.8941, "eval_samples_per_second": 1010.706, "eval_steps_per_second": 15.868, "step": 562500 }, { "epoch": 1.55, "learning_rate": 4.835546666666668e-06, "loss": 0.6732, "step": 581250 }, { "epoch": 1.55, "eval_accuracy": 0.5735, "eval_loss": 0.672535240650177, "eval_runtime": 9.8695, "eval_samples_per_second": 1013.226, "eval_steps_per_second": 15.908, "step": 581250 }, { "epoch": 1.6, "learning_rate": 4.668942222222223e-06, "loss": 0.6733, "step": 600000 }, { "epoch": 1.6, "eval_accuracy": 0.5729, "eval_loss": 0.6723589897155762, "eval_runtime": 9.9575, "eval_samples_per_second": 1004.27, "eval_steps_per_second": 15.767, "step": 600000 }, { "epoch": 1.65, "learning_rate": 4.502346666666667e-06, "loss": 0.6729, "step": 618750 }, { "epoch": 1.65, "eval_accuracy": 0.5714, "eval_loss": 0.6716250777244568, "eval_runtime": 10.0075, "eval_samples_per_second": 999.255, "eval_steps_per_second": 15.688, "step": 618750 }, { "epoch": 1.7, "learning_rate": 4.335742222222223e-06, "loss": 0.6729, "step": 637500 }, { "epoch": 1.7, "eval_accuracy": 0.576, "eval_loss": 0.6714919805526733, "eval_runtime": 9.9861, "eval_samples_per_second": 1001.39, "eval_steps_per_second": 15.722, "step": 637500 }, { "epoch": 1.75, "learning_rate": 4.169155555555556e-06, "loss": 0.6726, "step": 656250 }, { "epoch": 1.75, "eval_accuracy": 0.5785, "eval_loss": 0.6716631054878235, "eval_runtime": 9.8047, "eval_samples_per_second": 1019.923, "eval_steps_per_second": 16.013, "step": 656250 }, { "epoch": 1.8, "learning_rate": 4.00256e-06, "loss": 0.6723, "step": 675000 }, { "epoch": 1.8, "eval_accuracy": 0.5791, "eval_loss": 0.6703997850418091, "eval_runtime": 9.8558, "eval_samples_per_second": 1014.636, "eval_steps_per_second": 15.93, "step": 675000 }, { "epoch": 1.85, "learning_rate": 3.835955555555556e-06, "loss": 0.6728, "step": 693750 }, { "epoch": 1.85, "eval_accuracy": 0.5763, "eval_loss": 0.6714270710945129, "eval_runtime": 9.7423, "eval_samples_per_second": 1026.453, "eval_steps_per_second": 16.115, "step": 693750 }, { "epoch": 1.9, "learning_rate": 3.6693511111111114e-06, "loss": 0.6724, "step": 712500 }, { "epoch": 1.9, "eval_accuracy": 0.5795, "eval_loss": 0.6705731749534607, "eval_runtime": 10.1221, "eval_samples_per_second": 987.933, "eval_steps_per_second": 15.511, "step": 712500 }, { "epoch": 1.95, "learning_rate": 3.502746666666667e-06, "loss": 0.6724, "step": 731250 }, { "epoch": 1.95, "eval_accuracy": 0.5789, "eval_loss": 0.6716538071632385, "eval_runtime": 9.891, "eval_samples_per_second": 1011.024, "eval_steps_per_second": 15.873, "step": 731250 }, { "epoch": 2.0, "learning_rate": 3.3361422222222222e-06, "loss": 0.6727, "step": 750000 }, { "epoch": 2.0, "eval_accuracy": 0.5789, "eval_loss": 0.6710340976715088, "eval_runtime": 9.6333, "eval_samples_per_second": 1038.067, "eval_steps_per_second": 16.298, "step": 750000 } ], "max_steps": 1125000, "num_train_epochs": 3, "total_flos": 1.2542244576561201e+19, "trial_name": null, "trial_params": null }