|
{ |
|
"best_metric": 0.9590172171592712, |
|
"best_model_checkpoint": "/kaggle/output/checkpoint-136000", |
|
"epoch": 5.826271186440678, |
|
"eval_steps": 1000, |
|
"global_step": 143000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.7777777777777777e-11, |
|
"loss": 1.2184, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.7750000000000004e-08, |
|
"loss": 1.1394, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.3327345309381238, |
|
"eval_loss": 1.1149410009384155, |
|
"eval_runtime": 20.6803, |
|
"eval_samples_per_second": 242.26, |
|
"eval_steps_per_second": 30.319, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 5.5527777777777784e-08, |
|
"loss": 1.1141, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.3401197604790419, |
|
"eval_loss": 1.104099988937378, |
|
"eval_runtime": 20.8477, |
|
"eval_samples_per_second": 240.314, |
|
"eval_steps_per_second": 30.075, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.330555555555556e-08, |
|
"loss": 1.116, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.3407185628742515, |
|
"eval_loss": 1.1040862798690796, |
|
"eval_runtime": 20.6818, |
|
"eval_samples_per_second": 242.242, |
|
"eval_steps_per_second": 30.317, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.1108333333333333e-07, |
|
"loss": 1.1158, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.32894211576846305, |
|
"eval_loss": 1.1020556688308716, |
|
"eval_runtime": 20.8541, |
|
"eval_samples_per_second": 240.241, |
|
"eval_steps_per_second": 30.066, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.3883333333333335e-07, |
|
"loss": 1.1135, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.34271457085828344, |
|
"eval_loss": 1.1008552312850952, |
|
"eval_runtime": 20.8055, |
|
"eval_samples_per_second": 240.802, |
|
"eval_steps_per_second": 30.136, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.6658333333333335e-07, |
|
"loss": 1.1121, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.3395209580838323, |
|
"eval_loss": 1.1004050970077515, |
|
"eval_runtime": 20.8985, |
|
"eval_samples_per_second": 239.731, |
|
"eval_steps_per_second": 30.002, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.9436111111111112e-07, |
|
"loss": 1.1089, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.35788423153692617, |
|
"eval_loss": 1.0985721349716187, |
|
"eval_runtime": 20.84, |
|
"eval_samples_per_second": 240.403, |
|
"eval_steps_per_second": 30.086, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.2213888888888891e-07, |
|
"loss": 1.1079, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.3331337325349301, |
|
"eval_loss": 1.098374843597412, |
|
"eval_runtime": 20.7886, |
|
"eval_samples_per_second": 240.998, |
|
"eval_steps_per_second": 30.161, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.4988888888888893e-07, |
|
"loss": 1.1087, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.34510978043912177, |
|
"eval_loss": 1.0993521213531494, |
|
"eval_runtime": 20.782, |
|
"eval_samples_per_second": 241.074, |
|
"eval_steps_per_second": 30.17, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.776666666666667e-07, |
|
"loss": 1.109, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.3475049900199601, |
|
"eval_loss": 1.0967597961425781, |
|
"eval_runtime": 20.6798, |
|
"eval_samples_per_second": 242.265, |
|
"eval_steps_per_second": 30.319, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.054444444444444e-07, |
|
"loss": 1.1052, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.37544910179640717, |
|
"eval_loss": 1.0941349267959595, |
|
"eval_runtime": 20.8641, |
|
"eval_samples_per_second": 240.126, |
|
"eval_steps_per_second": 30.052, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.3322222222222225e-07, |
|
"loss": 1.105, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.3834331337325349, |
|
"eval_loss": 1.0927647352218628, |
|
"eval_runtime": 20.6541, |
|
"eval_samples_per_second": 242.567, |
|
"eval_steps_per_second": 30.357, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.609722222222222e-07, |
|
"loss": 1.1016, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.3457085828343313, |
|
"eval_loss": 1.0942081212997437, |
|
"eval_runtime": 21.0733, |
|
"eval_samples_per_second": 237.742, |
|
"eval_steps_per_second": 29.753, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 3.8875e-07, |
|
"loss": 1.1031, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_accuracy": 0.37005988023952097, |
|
"eval_loss": 1.0918152332305908, |
|
"eval_runtime": 20.9151, |
|
"eval_samples_per_second": 239.54, |
|
"eval_steps_per_second": 29.978, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.1652777777777786e-07, |
|
"loss": 1.1026, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.3790419161676647, |
|
"eval_loss": 1.0895211696624756, |
|
"eval_runtime": 21.0591, |
|
"eval_samples_per_second": 237.902, |
|
"eval_steps_per_second": 29.773, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.4427777777777783e-07, |
|
"loss": 1.0988, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.4101796407185629, |
|
"eval_loss": 1.0852997303009033, |
|
"eval_runtime": 20.9509, |
|
"eval_samples_per_second": 239.131, |
|
"eval_steps_per_second": 29.927, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.720555555555556e-07, |
|
"loss": 1.0974, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_accuracy": 0.43213572854291415, |
|
"eval_loss": 1.0791982412338257, |
|
"eval_runtime": 20.7526, |
|
"eval_samples_per_second": 241.415, |
|
"eval_steps_per_second": 30.213, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.998055555555556e-07, |
|
"loss": 1.0932, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_accuracy": 0.4275449101796407, |
|
"eval_loss": 1.072191596031189, |
|
"eval_runtime": 21.2435, |
|
"eval_samples_per_second": 235.837, |
|
"eval_steps_per_second": 29.515, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5.275833333333334e-07, |
|
"loss": 1.0833, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_accuracy": 0.43233532934131735, |
|
"eval_loss": 1.06425940990448, |
|
"eval_runtime": 20.7923, |
|
"eval_samples_per_second": 240.955, |
|
"eval_steps_per_second": 30.155, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.553333333333334e-07, |
|
"loss": 1.0787, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.4295409181636727, |
|
"eval_loss": 1.0638529062271118, |
|
"eval_runtime": 21.0018, |
|
"eval_samples_per_second": 238.551, |
|
"eval_steps_per_second": 29.855, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.831111111111111e-07, |
|
"loss": 1.0779, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.4243512974051896, |
|
"eval_loss": 1.0603673458099365, |
|
"eval_runtime": 20.9689, |
|
"eval_samples_per_second": 238.926, |
|
"eval_steps_per_second": 29.901, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 6.108888888888888e-07, |
|
"loss": 1.0751, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.43233532934131735, |
|
"eval_loss": 1.0603009462356567, |
|
"eval_runtime": 20.8897, |
|
"eval_samples_per_second": 239.831, |
|
"eval_steps_per_second": 30.015, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.386388888888889e-07, |
|
"loss": 1.0776, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.42734530938123755, |
|
"eval_loss": 1.0591468811035156, |
|
"eval_runtime": 20.964, |
|
"eval_samples_per_second": 238.981, |
|
"eval_steps_per_second": 29.908, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.664166666666667e-07, |
|
"loss": 1.0754, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.4245508982035928, |
|
"eval_loss": 1.0589721202850342, |
|
"eval_runtime": 20.9053, |
|
"eval_samples_per_second": 239.652, |
|
"eval_steps_per_second": 29.992, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 6.941666666666667e-07, |
|
"loss": 1.0736, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_accuracy": 0.43213572854291415, |
|
"eval_loss": 1.0583962202072144, |
|
"eval_runtime": 21.3265, |
|
"eval_samples_per_second": 234.919, |
|
"eval_steps_per_second": 29.4, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 7.219444444444444e-07, |
|
"loss": 1.0717, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_accuracy": 0.4305389221556886, |
|
"eval_loss": 1.0561293363571167, |
|
"eval_runtime": 21.3034, |
|
"eval_samples_per_second": 235.174, |
|
"eval_steps_per_second": 29.432, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 7.496944444444444e-07, |
|
"loss": 1.0709, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_accuracy": 0.4281437125748503, |
|
"eval_loss": 1.0555357933044434, |
|
"eval_runtime": 21.2178, |
|
"eval_samples_per_second": 236.123, |
|
"eval_steps_per_second": 29.551, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 7.774722222222223e-07, |
|
"loss": 1.0701, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_accuracy": 0.4217564870259481, |
|
"eval_loss": 1.054961085319519, |
|
"eval_runtime": 21.1775, |
|
"eval_samples_per_second": 236.571, |
|
"eval_steps_per_second": 29.607, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.052222222222223e-07, |
|
"loss": 1.0641, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_accuracy": 0.4291417165668663, |
|
"eval_loss": 1.0518379211425781, |
|
"eval_runtime": 21.0932, |
|
"eval_samples_per_second": 237.517, |
|
"eval_steps_per_second": 29.725, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 8.330000000000001e-07, |
|
"loss": 1.064, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_accuracy": 0.43173652694610776, |
|
"eval_loss": 1.0493717193603516, |
|
"eval_runtime": 21.2843, |
|
"eval_samples_per_second": 235.385, |
|
"eval_steps_per_second": 29.458, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 8.607500000000001e-07, |
|
"loss": 1.0693, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_accuracy": 0.4291417165668663, |
|
"eval_loss": 1.0521764755249023, |
|
"eval_runtime": 21.2278, |
|
"eval_samples_per_second": 236.011, |
|
"eval_steps_per_second": 29.537, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 8.885277777777779e-07, |
|
"loss": 1.0649, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_accuracy": 0.42375249500998, |
|
"eval_loss": 1.0528494119644165, |
|
"eval_runtime": 21.1249, |
|
"eval_samples_per_second": 237.161, |
|
"eval_steps_per_second": 29.681, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 9.163055555555556e-07, |
|
"loss": 1.0619, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_accuracy": 0.43293413173652695, |
|
"eval_loss": 1.049193263053894, |
|
"eval_runtime": 21.241, |
|
"eval_samples_per_second": 235.865, |
|
"eval_steps_per_second": 29.518, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 9.440555555555557e-07, |
|
"loss": 1.0582, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_accuracy": 0.4357285429141717, |
|
"eval_loss": 1.04512619972229, |
|
"eval_runtime": 21.2531, |
|
"eval_samples_per_second": 235.73, |
|
"eval_steps_per_second": 29.502, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 9.718333333333334e-07, |
|
"loss": 1.0629, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_accuracy": 0.43253493013972055, |
|
"eval_loss": 1.043523907661438, |
|
"eval_runtime": 21.2176, |
|
"eval_samples_per_second": 236.124, |
|
"eval_steps_per_second": 29.551, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 9.995833333333334e-07, |
|
"loss": 1.0588, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_accuracy": 0.4307385229540918, |
|
"eval_loss": 1.0413768291473389, |
|
"eval_runtime": 21.2225, |
|
"eval_samples_per_second": 236.07, |
|
"eval_steps_per_second": 29.544, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.0273611111111112e-06, |
|
"loss": 1.0552, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_accuracy": 0.4415169660678643, |
|
"eval_loss": 1.0397700071334839, |
|
"eval_runtime": 21.0651, |
|
"eval_samples_per_second": 237.835, |
|
"eval_steps_per_second": 29.765, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.055138888888889e-06, |
|
"loss": 1.0567, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_accuracy": 0.4419161676646707, |
|
"eval_loss": 1.0390877723693848, |
|
"eval_runtime": 21.2858, |
|
"eval_samples_per_second": 235.368, |
|
"eval_steps_per_second": 29.456, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.0829166666666667e-06, |
|
"loss": 1.054, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_accuracy": 0.43952095808383235, |
|
"eval_loss": 1.0404103994369507, |
|
"eval_runtime": 21.2544, |
|
"eval_samples_per_second": 235.715, |
|
"eval_steps_per_second": 29.5, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.1106666666666668e-06, |
|
"loss": 1.049, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_accuracy": 0.4477045908183633, |
|
"eval_loss": 1.0360453128814697, |
|
"eval_runtime": 21.0993, |
|
"eval_samples_per_second": 237.448, |
|
"eval_steps_per_second": 29.717, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.1384444444444446e-06, |
|
"loss": 1.0522, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_accuracy": 0.443313373253493, |
|
"eval_loss": 1.0359567403793335, |
|
"eval_runtime": 21.2828, |
|
"eval_samples_per_second": 235.401, |
|
"eval_steps_per_second": 29.46, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.1661944444444447e-06, |
|
"loss": 1.0459, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_accuracy": 0.437125748502994, |
|
"eval_loss": 1.0328505039215088, |
|
"eval_runtime": 21.2626, |
|
"eval_samples_per_second": 235.625, |
|
"eval_steps_per_second": 29.488, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.1939722222222222e-06, |
|
"loss": 1.0488, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_accuracy": 0.4379241516966068, |
|
"eval_loss": 1.0460196733474731, |
|
"eval_runtime": 21.2023, |
|
"eval_samples_per_second": 236.295, |
|
"eval_steps_per_second": 29.572, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.2217222222222223e-06, |
|
"loss": 1.0504, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_accuracy": 0.4421157684630739, |
|
"eval_loss": 1.0345144271850586, |
|
"eval_runtime": 21.2684, |
|
"eval_samples_per_second": 235.561, |
|
"eval_steps_per_second": 29.48, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.2494722222222224e-06, |
|
"loss": 1.0482, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_accuracy": 0.46467065868263474, |
|
"eval_loss": 1.0280784368515015, |
|
"eval_runtime": 21.1069, |
|
"eval_samples_per_second": 237.363, |
|
"eval_steps_per_second": 29.706, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.2772500000000001e-06, |
|
"loss": 1.046, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_accuracy": 0.4554890219560878, |
|
"eval_loss": 1.031961441040039, |
|
"eval_runtime": 21.2098, |
|
"eval_samples_per_second": 236.211, |
|
"eval_steps_per_second": 29.562, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.3050277777777777e-06, |
|
"loss": 1.0391, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_accuracy": 0.46706586826347307, |
|
"eval_loss": 1.023941159248352, |
|
"eval_runtime": 21.2269, |
|
"eval_samples_per_second": 236.021, |
|
"eval_steps_per_second": 29.538, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.3328055555555555e-06, |
|
"loss": 1.0444, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_accuracy": 0.4491017964071856, |
|
"eval_loss": 1.036636471748352, |
|
"eval_runtime": 21.1179, |
|
"eval_samples_per_second": 237.239, |
|
"eval_steps_per_second": 29.69, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.3605555555555555e-06, |
|
"loss": 1.0386, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.47305389221556887, |
|
"eval_loss": 1.0199605226516724, |
|
"eval_runtime": 21.2669, |
|
"eval_samples_per_second": 235.577, |
|
"eval_steps_per_second": 29.482, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.3883333333333333e-06, |
|
"loss": 1.0379, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_accuracy": 0.4738522954091816, |
|
"eval_loss": 1.0184080600738525, |
|
"eval_runtime": 21.2806, |
|
"eval_samples_per_second": 235.426, |
|
"eval_steps_per_second": 29.463, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.416111111111111e-06, |
|
"loss": 1.0365, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_accuracy": 0.474251497005988, |
|
"eval_loss": 1.0143232345581055, |
|
"eval_runtime": 21.1003, |
|
"eval_samples_per_second": 237.437, |
|
"eval_steps_per_second": 29.715, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.4438888888888889e-06, |
|
"loss": 1.0356, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_accuracy": 0.4782435129740519, |
|
"eval_loss": 1.0179694890975952, |
|
"eval_runtime": 21.2847, |
|
"eval_samples_per_second": 235.38, |
|
"eval_steps_per_second": 29.458, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.471611111111111e-06, |
|
"loss": 1.0401, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_accuracy": 0.4864271457085828, |
|
"eval_loss": 1.008955955505371, |
|
"eval_runtime": 21.0808, |
|
"eval_samples_per_second": 237.657, |
|
"eval_steps_per_second": 29.743, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.4993888888888888e-06, |
|
"loss": 1.0338, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_accuracy": 0.4870259481037924, |
|
"eval_loss": 1.0100921392440796, |
|
"eval_runtime": 21.2581, |
|
"eval_samples_per_second": 235.674, |
|
"eval_steps_per_second": 29.495, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.5271388888888889e-06, |
|
"loss": 1.0322, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_accuracy": 0.48403193612774453, |
|
"eval_loss": 1.0082924365997314, |
|
"eval_runtime": 21.2835, |
|
"eval_samples_per_second": 235.394, |
|
"eval_steps_per_second": 29.459, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.5549166666666666e-06, |
|
"loss": 1.0287, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_accuracy": 0.47065868263473054, |
|
"eval_loss": 1.0229328870773315, |
|
"eval_runtime": 21.1356, |
|
"eval_samples_per_second": 237.04, |
|
"eval_steps_per_second": 29.666, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.5826944444444446e-06, |
|
"loss": 1.0291, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_accuracy": 0.4844311377245509, |
|
"eval_loss": 1.0074658393859863, |
|
"eval_runtime": 21.2919, |
|
"eval_samples_per_second": 235.301, |
|
"eval_steps_per_second": 29.448, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.6104722222222222e-06, |
|
"loss": 1.0304, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_accuracy": 0.4874251497005988, |
|
"eval_loss": 1.007895588874817, |
|
"eval_runtime": 21.1997, |
|
"eval_samples_per_second": 236.324, |
|
"eval_steps_per_second": 29.576, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.6381944444444445e-06, |
|
"loss": 1.0259, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_accuracy": 0.492814371257485, |
|
"eval_loss": 0.9993996620178223, |
|
"eval_runtime": 21.0668, |
|
"eval_samples_per_second": 237.815, |
|
"eval_steps_per_second": 29.763, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.6659722222222223e-06, |
|
"loss": 1.0314, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_accuracy": 0.48902195608782434, |
|
"eval_loss": 1.0036838054656982, |
|
"eval_runtime": 21.2359, |
|
"eval_samples_per_second": 235.921, |
|
"eval_steps_per_second": 29.525, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.6937500000000003e-06, |
|
"loss": 1.0276, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_accuracy": 0.48502994011976047, |
|
"eval_loss": 1.0065844058990479, |
|
"eval_runtime": 21.07, |
|
"eval_samples_per_second": 237.778, |
|
"eval_steps_per_second": 29.758, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.7215000000000002e-06, |
|
"loss": 1.0298, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_accuracy": 0.48263473053892214, |
|
"eval_loss": 1.0085350275039673, |
|
"eval_runtime": 21.4949, |
|
"eval_samples_per_second": 233.079, |
|
"eval_steps_per_second": 29.17, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.7492777777777777e-06, |
|
"loss": 1.0292, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_accuracy": 0.49560878243512974, |
|
"eval_loss": 0.9984678030014038, |
|
"eval_runtime": 21.4688, |
|
"eval_samples_per_second": 233.362, |
|
"eval_steps_per_second": 29.205, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.7770277777777778e-06, |
|
"loss": 1.0273, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_accuracy": 0.4942115768463074, |
|
"eval_loss": 0.9960917830467224, |
|
"eval_runtime": 21.307, |
|
"eval_samples_per_second": 235.134, |
|
"eval_steps_per_second": 29.427, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.8048055555555558e-06, |
|
"loss": 1.0256, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"eval_accuracy": 0.49840319361277446, |
|
"eval_loss": 0.9994720816612244, |
|
"eval_runtime": 21.5116, |
|
"eval_samples_per_second": 232.897, |
|
"eval_steps_per_second": 29.147, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.8325833333333333e-06, |
|
"loss": 1.0306, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_accuracy": 0.49660678642714573, |
|
"eval_loss": 0.9951530694961548, |
|
"eval_runtime": 21.5301, |
|
"eval_samples_per_second": 232.697, |
|
"eval_steps_per_second": 29.122, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.8603333333333334e-06, |
|
"loss": 1.0245, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_accuracy": 0.499001996007984, |
|
"eval_loss": 0.9978940486907959, |
|
"eval_runtime": 21.0729, |
|
"eval_samples_per_second": 237.746, |
|
"eval_steps_per_second": 29.754, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 1.8881111111111114e-06, |
|
"loss": 1.0247, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 0.9951012134552002, |
|
"eval_runtime": 21.1793, |
|
"eval_samples_per_second": 236.552, |
|
"eval_steps_per_second": 29.604, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.9158611111111115e-06, |
|
"loss": 1.0241, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"eval_accuracy": 0.5021956087824351, |
|
"eval_loss": 0.9969767332077026, |
|
"eval_runtime": 21.2229, |
|
"eval_samples_per_second": 236.066, |
|
"eval_steps_per_second": 29.544, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.943638888888889e-06, |
|
"loss": 1.0242, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_accuracy": 0.49720558882235527, |
|
"eval_loss": 1.00163733959198, |
|
"eval_runtime": 21.0442, |
|
"eval_samples_per_second": 238.071, |
|
"eval_steps_per_second": 29.794, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.971416666666667e-06, |
|
"loss": 1.017, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_accuracy": 0.49600798403193613, |
|
"eval_loss": 1.0031572580337524, |
|
"eval_runtime": 21.1987, |
|
"eval_samples_per_second": 236.336, |
|
"eval_steps_per_second": 29.577, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.999166666666667e-06, |
|
"loss": 1.0237, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_accuracy": 0.49001996007984033, |
|
"eval_loss": 1.0067389011383057, |
|
"eval_runtime": 21.2825, |
|
"eval_samples_per_second": 235.405, |
|
"eval_steps_per_second": 29.461, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.0269444444444444e-06, |
|
"loss": 1.0208, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_accuracy": 0.49560878243512974, |
|
"eval_loss": 1.0011804103851318, |
|
"eval_runtime": 21.137, |
|
"eval_samples_per_second": 237.025, |
|
"eval_steps_per_second": 29.664, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 2.0546944444444447e-06, |
|
"loss": 1.021, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_accuracy": 0.49580838323353293, |
|
"eval_loss": 0.9941307902336121, |
|
"eval_runtime": 21.161, |
|
"eval_samples_per_second": 236.757, |
|
"eval_steps_per_second": 29.63, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 2.0824722222222223e-06, |
|
"loss": 1.0154, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"eval_accuracy": 0.49580838323353293, |
|
"eval_loss": 0.9971462488174438, |
|
"eval_runtime": 21.0263, |
|
"eval_samples_per_second": 238.273, |
|
"eval_steps_per_second": 29.82, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 2.1102222222222226e-06, |
|
"loss": 1.0173, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_accuracy": 0.5011976047904192, |
|
"eval_loss": 0.9884433150291443, |
|
"eval_runtime": 21.2199, |
|
"eval_samples_per_second": 236.099, |
|
"eval_steps_per_second": 29.548, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 2.138e-06, |
|
"loss": 1.0181, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"eval_accuracy": 0.4808383233532934, |
|
"eval_loss": 1.0162075757980347, |
|
"eval_runtime": 21.2108, |
|
"eval_samples_per_second": 236.201, |
|
"eval_steps_per_second": 29.56, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 2.16575e-06, |
|
"loss": 1.0112, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"eval_accuracy": 0.49880239520958086, |
|
"eval_loss": 0.9902569651603699, |
|
"eval_runtime": 21.032, |
|
"eval_samples_per_second": 238.209, |
|
"eval_steps_per_second": 29.812, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 2.193527777777778e-06, |
|
"loss": 1.0166, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"eval_accuracy": 0.49500998003992014, |
|
"eval_loss": 1.0056451559066772, |
|
"eval_runtime": 21.1866, |
|
"eval_samples_per_second": 236.47, |
|
"eval_steps_per_second": 29.594, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 2.221277777777778e-06, |
|
"loss": 1.0135, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"eval_accuracy": 0.49161676646706587, |
|
"eval_loss": 1.004488229751587, |
|
"eval_runtime": 21.2075, |
|
"eval_samples_per_second": 236.237, |
|
"eval_steps_per_second": 29.565, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 2.249055555555556e-06, |
|
"loss": 1.0147, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_accuracy": 0.49101796407185627, |
|
"eval_loss": 1.0022324323654175, |
|
"eval_runtime": 21.1622, |
|
"eval_samples_per_second": 236.743, |
|
"eval_steps_per_second": 29.628, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 2.2768055555555557e-06, |
|
"loss": 1.0249, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"eval_accuracy": 0.49540918163672654, |
|
"eval_loss": 0.9961836338043213, |
|
"eval_runtime": 21.2661, |
|
"eval_samples_per_second": 235.586, |
|
"eval_steps_per_second": 29.484, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 2.3045833333333336e-06, |
|
"loss": 1.012, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"eval_accuracy": 0.49520958083832334, |
|
"eval_loss": 0.997968316078186, |
|
"eval_runtime": 21.1127, |
|
"eval_samples_per_second": 237.298, |
|
"eval_steps_per_second": 29.698, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 2.3323333333333335e-06, |
|
"loss": 1.0153, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"eval_accuracy": 0.5063872255489021, |
|
"eval_loss": 0.9885319471359253, |
|
"eval_runtime": 21.2015, |
|
"eval_samples_per_second": 236.304, |
|
"eval_steps_per_second": 29.573, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 2.360111111111111e-06, |
|
"loss": 1.0113, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"eval_accuracy": 0.49860279441117766, |
|
"eval_loss": 0.9927662014961243, |
|
"eval_runtime": 21.2447, |
|
"eval_samples_per_second": 235.823, |
|
"eval_steps_per_second": 29.513, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 2.3878611111111113e-06, |
|
"loss": 1.0222, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_accuracy": 0.5073852295409181, |
|
"eval_loss": 0.9855450987815857, |
|
"eval_runtime": 21.03, |
|
"eval_samples_per_second": 238.231, |
|
"eval_steps_per_second": 29.815, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 2.4156388888888893e-06, |
|
"loss": 1.0062, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"eval_accuracy": 0.5075848303393213, |
|
"eval_loss": 0.9856505990028381, |
|
"eval_runtime": 21.1796, |
|
"eval_samples_per_second": 236.548, |
|
"eval_steps_per_second": 29.604, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 2.443388888888889e-06, |
|
"loss": 1.0157, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"eval_accuracy": 0.5115768463073852, |
|
"eval_loss": 0.9844857454299927, |
|
"eval_runtime": 21.2024, |
|
"eval_samples_per_second": 236.294, |
|
"eval_steps_per_second": 29.572, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 2.4711666666666668e-06, |
|
"loss": 1.0069, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"eval_accuracy": 0.499001996007984, |
|
"eval_loss": 0.9894497394561768, |
|
"eval_runtime": 21.2532, |
|
"eval_samples_per_second": 235.729, |
|
"eval_steps_per_second": 29.501, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 2.4989166666666666e-06, |
|
"loss": 1.0164, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"eval_accuracy": 0.5101796407185629, |
|
"eval_loss": 0.9842925667762756, |
|
"eval_runtime": 21.1914, |
|
"eval_samples_per_second": 236.416, |
|
"eval_steps_per_second": 29.587, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 2.5266944444444446e-06, |
|
"loss": 1.0175, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"eval_accuracy": 0.5055888223552895, |
|
"eval_loss": 0.9913986325263977, |
|
"eval_runtime": 21.0589, |
|
"eval_samples_per_second": 237.904, |
|
"eval_steps_per_second": 29.774, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 2.554444444444445e-06, |
|
"loss": 1.013, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"eval_accuracy": 0.5057884231536927, |
|
"eval_loss": 0.9887382388114929, |
|
"eval_runtime": 21.2965, |
|
"eval_samples_per_second": 235.25, |
|
"eval_steps_per_second": 29.441, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 2.5822222222222224e-06, |
|
"loss": 1.0157, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"eval_accuracy": 0.5127744510978044, |
|
"eval_loss": 0.9854277968406677, |
|
"eval_runtime": 21.2839, |
|
"eval_samples_per_second": 235.389, |
|
"eval_steps_per_second": 29.459, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 2.6099722222222223e-06, |
|
"loss": 1.0108, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"eval_accuracy": 0.5039920159680639, |
|
"eval_loss": 0.9907957315444946, |
|
"eval_runtime": 21.0477, |
|
"eval_samples_per_second": 238.03, |
|
"eval_steps_per_second": 29.789, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 2.6377500000000003e-06, |
|
"loss": 1.0132, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"eval_accuracy": 0.5161676646706587, |
|
"eval_loss": 0.9830310940742493, |
|
"eval_runtime": 21.2625, |
|
"eval_samples_per_second": 235.626, |
|
"eval_steps_per_second": 29.489, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 2.6655e-06, |
|
"loss": 1.0139, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"eval_accuracy": 0.5125748502994012, |
|
"eval_loss": 0.9806137681007385, |
|
"eval_runtime": 21.1848, |
|
"eval_samples_per_second": 236.49, |
|
"eval_steps_per_second": 29.597, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 2.6932777777777777e-06, |
|
"loss": 1.0104, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"eval_accuracy": 0.49640718562874253, |
|
"eval_loss": 0.9965940117835999, |
|
"eval_runtime": 21.1145, |
|
"eval_samples_per_second": 237.278, |
|
"eval_steps_per_second": 29.695, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 2.721027777777778e-06, |
|
"loss": 1.011, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_accuracy": 0.49800399201596807, |
|
"eval_loss": 0.9846762418746948, |
|
"eval_runtime": 21.2444, |
|
"eval_samples_per_second": 235.827, |
|
"eval_steps_per_second": 29.514, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 2.748805555555556e-06, |
|
"loss": 1.0063, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"eval_accuracy": 0.5103792415169661, |
|
"eval_loss": 0.9853057265281677, |
|
"eval_runtime": 21.0515, |
|
"eval_samples_per_second": 237.988, |
|
"eval_steps_per_second": 29.784, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 2.776555555555556e-06, |
|
"loss": 1.007, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"eval_accuracy": 0.5115768463073852, |
|
"eval_loss": 0.983065128326416, |
|
"eval_runtime": 21.2494, |
|
"eval_samples_per_second": 235.771, |
|
"eval_steps_per_second": 29.507, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 2.8043333333333334e-06, |
|
"loss": 1.0107, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"eval_accuracy": 0.5129740518962076, |
|
"eval_loss": 0.9863881468772888, |
|
"eval_runtime": 21.2658, |
|
"eval_samples_per_second": 235.59, |
|
"eval_steps_per_second": 29.484, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 2.8320833333333333e-06, |
|
"loss": 1.0055, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"eval_accuracy": 0.49840319361277446, |
|
"eval_loss": 0.9891018867492676, |
|
"eval_runtime": 21.0702, |
|
"eval_samples_per_second": 237.777, |
|
"eval_steps_per_second": 29.758, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 2.8598611111111112e-06, |
|
"loss": 1.0115, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"eval_accuracy": 0.5115768463073852, |
|
"eval_loss": 0.9800674319267273, |
|
"eval_runtime": 21.2539, |
|
"eval_samples_per_second": 235.722, |
|
"eval_steps_per_second": 29.5, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 2.8876111111111115e-06, |
|
"loss": 1.0039, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"eval_accuracy": 0.49500998003992014, |
|
"eval_loss": 0.9979402422904968, |
|
"eval_runtime": 21.1981, |
|
"eval_samples_per_second": 236.342, |
|
"eval_steps_per_second": 29.578, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 2.915388888888889e-06, |
|
"loss": 1.0028, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"eval_accuracy": 0.492814371257485, |
|
"eval_loss": 1.007880687713623, |
|
"eval_runtime": 21.1024, |
|
"eval_samples_per_second": 237.413, |
|
"eval_steps_per_second": 29.712, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 2.943138888888889e-06, |
|
"loss": 0.9993, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"eval_accuracy": 0.513373253493014, |
|
"eval_loss": 0.9815743565559387, |
|
"eval_runtime": 21.2317, |
|
"eval_samples_per_second": 235.968, |
|
"eval_steps_per_second": 29.531, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 2.9709166666666665e-06, |
|
"loss": 1.0033, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"eval_accuracy": 0.5021956087824351, |
|
"eval_loss": 0.9937964081764221, |
|
"eval_runtime": 21.2363, |
|
"eval_samples_per_second": 235.917, |
|
"eval_steps_per_second": 29.525, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 2.9986666666666668e-06, |
|
"loss": 1.003, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"eval_accuracy": 0.5035928143712575, |
|
"eval_loss": 0.9884979128837585, |
|
"eval_runtime": 21.1776, |
|
"eval_samples_per_second": 236.57, |
|
"eval_steps_per_second": 29.607, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 3.0264444444444448e-06, |
|
"loss": 0.9986, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"eval_accuracy": 0.5155688622754491, |
|
"eval_loss": 0.982122004032135, |
|
"eval_runtime": 21.1623, |
|
"eval_samples_per_second": 236.741, |
|
"eval_steps_per_second": 29.628, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 3.0541944444444446e-06, |
|
"loss": 1.0062, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"eval_accuracy": 0.5125748502994012, |
|
"eval_loss": 0.9810440540313721, |
|
"eval_runtime": 21.0636, |
|
"eval_samples_per_second": 237.851, |
|
"eval_steps_per_second": 29.767, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 3.081972222222222e-06, |
|
"loss": 0.9979, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"eval_accuracy": 0.5147704590818363, |
|
"eval_loss": 0.9768257141113281, |
|
"eval_runtime": 21.2634, |
|
"eval_samples_per_second": 235.616, |
|
"eval_steps_per_second": 29.487, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 3.109722222222222e-06, |
|
"loss": 1.0098, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"eval_accuracy": 0.5179640718562875, |
|
"eval_loss": 0.9725316762924194, |
|
"eval_runtime": 21.2038, |
|
"eval_samples_per_second": 236.279, |
|
"eval_steps_per_second": 29.57, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 3.1375e-06, |
|
"loss": 0.9928, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"eval_accuracy": 0.5055888223552895, |
|
"eval_loss": 0.9796192049980164, |
|
"eval_runtime": 21.0973, |
|
"eval_samples_per_second": 237.471, |
|
"eval_steps_per_second": 29.719, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 3.1652500000000003e-06, |
|
"loss": 1.0021, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"eval_accuracy": 0.513373253493014, |
|
"eval_loss": 0.9703001976013184, |
|
"eval_runtime": 21.2766, |
|
"eval_samples_per_second": 235.47, |
|
"eval_steps_per_second": 29.469, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 3.1930277777777783e-06, |
|
"loss": 1.0028, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"eval_accuracy": 0.5207584830339321, |
|
"eval_loss": 0.9677809476852417, |
|
"eval_runtime": 21.2537, |
|
"eval_samples_per_second": 235.724, |
|
"eval_steps_per_second": 29.501, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 3.220777777777778e-06, |
|
"loss": 0.9967, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"eval_accuracy": 0.5211576846307385, |
|
"eval_loss": 0.970120906829834, |
|
"eval_runtime": 21.0552, |
|
"eval_samples_per_second": 237.946, |
|
"eval_steps_per_second": 29.779, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 3.2485555555555553e-06, |
|
"loss": 1.0032, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"eval_accuracy": 0.5167664670658683, |
|
"eval_loss": 0.9775295853614807, |
|
"eval_runtime": 21.2233, |
|
"eval_samples_per_second": 236.062, |
|
"eval_steps_per_second": 29.543, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 3.2763055555555556e-06, |
|
"loss": 1.0044, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"eval_accuracy": 0.5197604790419161, |
|
"eval_loss": 0.973882794380188, |
|
"eval_runtime": 21.2552, |
|
"eval_samples_per_second": 235.708, |
|
"eval_steps_per_second": 29.499, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 3.3040555555555554e-06, |
|
"loss": 0.999, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"eval_accuracy": 0.5191616766467065, |
|
"eval_loss": 0.9664270281791687, |
|
"eval_runtime": 21.4118, |
|
"eval_samples_per_second": 233.983, |
|
"eval_steps_per_second": 29.283, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 3.3318333333333334e-06, |
|
"loss": 1.0026, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"eval_accuracy": 0.5121756487025948, |
|
"eval_loss": 0.9835990071296692, |
|
"eval_runtime": 21.2614, |
|
"eval_samples_per_second": 235.639, |
|
"eval_steps_per_second": 29.49, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 3.3596111111111114e-06, |
|
"loss": 0.9907, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"eval_accuracy": 0.518562874251497, |
|
"eval_loss": 0.9704477787017822, |
|
"eval_runtime": 21.1011, |
|
"eval_samples_per_second": 237.429, |
|
"eval_steps_per_second": 29.714, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 3.3873611111111113e-06, |
|
"loss": 0.9993, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"eval_accuracy": 0.5187624750499003, |
|
"eval_loss": 0.967042863368988, |
|
"eval_runtime": 21.1973, |
|
"eval_samples_per_second": 236.35, |
|
"eval_steps_per_second": 29.579, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 3.4151388888888892e-06, |
|
"loss": 0.9963, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"eval_accuracy": 0.5079840319361277, |
|
"eval_loss": 0.9776150584220886, |
|
"eval_runtime": 21.2683, |
|
"eval_samples_per_second": 235.562, |
|
"eval_steps_per_second": 29.48, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 3.4429166666666664e-06, |
|
"loss": 0.9865, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"eval_accuracy": 0.52375249500998, |
|
"eval_loss": 0.9682861566543579, |
|
"eval_runtime": 21.044, |
|
"eval_samples_per_second": 238.073, |
|
"eval_steps_per_second": 29.795, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 3.4706944444444444e-06, |
|
"loss": 0.9942, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"eval_accuracy": 0.5167664670658683, |
|
"eval_loss": 0.9669526219367981, |
|
"eval_runtime": 21.3091, |
|
"eval_samples_per_second": 235.111, |
|
"eval_steps_per_second": 29.424, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 3.4984444444444447e-06, |
|
"loss": 0.9931, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"eval_accuracy": 0.52375249500998, |
|
"eval_loss": 0.9690492749214172, |
|
"eval_runtime": 21.2155, |
|
"eval_samples_per_second": 236.148, |
|
"eval_steps_per_second": 29.554, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 3.5262222222222226e-06, |
|
"loss": 0.9901, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"eval_accuracy": 0.5215568862275449, |
|
"eval_loss": 0.9688431620597839, |
|
"eval_runtime": 21.0922, |
|
"eval_samples_per_second": 237.528, |
|
"eval_steps_per_second": 29.727, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 3.5539722222222225e-06, |
|
"loss": 0.983, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"eval_accuracy": 0.5251497005988024, |
|
"eval_loss": 0.9629590511322021, |
|
"eval_runtime": 21.4048, |
|
"eval_samples_per_second": 234.06, |
|
"eval_steps_per_second": 29.292, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 3.5817500000000005e-06, |
|
"loss": 0.9896, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"eval_accuracy": 0.5261477045908184, |
|
"eval_loss": 0.9658498167991638, |
|
"eval_runtime": 21.3795, |
|
"eval_samples_per_second": 234.337, |
|
"eval_steps_per_second": 29.327, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 3.6095000000000003e-06, |
|
"loss": 0.9804, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"eval_accuracy": 0.5253493013972056, |
|
"eval_loss": 0.9743538498878479, |
|
"eval_runtime": 21.0654, |
|
"eval_samples_per_second": 237.831, |
|
"eval_steps_per_second": 29.764, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 3.6372777777777775e-06, |
|
"loss": 0.9837, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"eval_accuracy": 0.5285429141716567, |
|
"eval_loss": 0.9610263109207153, |
|
"eval_runtime": 21.4995, |
|
"eval_samples_per_second": 233.029, |
|
"eval_steps_per_second": 29.163, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 3.6650277777777778e-06, |
|
"loss": 0.9837, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"eval_accuracy": 0.5261477045908184, |
|
"eval_loss": 0.9654673933982849, |
|
"eval_runtime": 21.2182, |
|
"eval_samples_per_second": 236.118, |
|
"eval_steps_per_second": 29.55, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 3.6928055555555557e-06, |
|
"loss": 0.9907, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"eval_accuracy": 0.5215568862275449, |
|
"eval_loss": 0.9649068117141724, |
|
"eval_runtime": 21.1274, |
|
"eval_samples_per_second": 237.133, |
|
"eval_steps_per_second": 29.677, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 3.7205555555555556e-06, |
|
"loss": 0.9873, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"eval_accuracy": 0.5213572854291417, |
|
"eval_loss": 0.9645326733589172, |
|
"eval_runtime": 21.2708, |
|
"eval_samples_per_second": 235.534, |
|
"eval_steps_per_second": 29.477, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 3.7483333333333336e-06, |
|
"loss": 0.9945, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"eval_accuracy": 0.5213572854291417, |
|
"eval_loss": 0.9662097692489624, |
|
"eval_runtime": 21.2993, |
|
"eval_samples_per_second": 235.219, |
|
"eval_steps_per_second": 29.438, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 3.7760833333333334e-06, |
|
"loss": 0.9904, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"eval_accuracy": 0.5213572854291417, |
|
"eval_loss": 0.9590172171592712, |
|
"eval_runtime": 21.1516, |
|
"eval_samples_per_second": 236.862, |
|
"eval_steps_per_second": 29.643, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 3.8038611111111114e-06, |
|
"loss": 0.9866, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"eval_accuracy": 0.526746506986028, |
|
"eval_loss": 0.9692327976226807, |
|
"eval_runtime": 21.2988, |
|
"eval_samples_per_second": 235.224, |
|
"eval_steps_per_second": 29.438, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 3.831583333333333e-06, |
|
"loss": 0.9842, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"eval_accuracy": 0.5243512974051896, |
|
"eval_loss": 0.9735116958618164, |
|
"eval_runtime": 21.3467, |
|
"eval_samples_per_second": 234.697, |
|
"eval_steps_per_second": 29.372, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 3.859361111111111e-06, |
|
"loss": 0.9814, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"eval_accuracy": 0.525748502994012, |
|
"eval_loss": 0.9681652188301086, |
|
"eval_runtime": 21.1236, |
|
"eval_samples_per_second": 237.176, |
|
"eval_steps_per_second": 29.683, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 3.887138888888889e-06, |
|
"loss": 0.9831, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"eval_accuracy": 0.5197604790419161, |
|
"eval_loss": 0.9666500687599182, |
|
"eval_runtime": 21.235, |
|
"eval_samples_per_second": 235.931, |
|
"eval_steps_per_second": 29.527, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 3.914888888888889e-06, |
|
"loss": 0.9838, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"eval_accuracy": 0.5339321357285429, |
|
"eval_loss": 0.9595745801925659, |
|
"eval_runtime": 21.2241, |
|
"eval_samples_per_second": 236.053, |
|
"eval_steps_per_second": 29.542, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 3.942666666666667e-06, |
|
"loss": 0.9825, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"eval_accuracy": 0.5271457085828344, |
|
"eval_loss": 0.9641498327255249, |
|
"eval_runtime": 21.3787, |
|
"eval_samples_per_second": 234.345, |
|
"eval_steps_per_second": 29.328, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 3.970444444444445e-06, |
|
"loss": 0.979, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"eval_accuracy": 0.5339321357285429, |
|
"eval_loss": 0.9592242240905762, |
|
"eval_runtime": 21.4402, |
|
"eval_samples_per_second": 233.674, |
|
"eval_steps_per_second": 29.244, |
|
"step": 143000 |
|
} |
|
], |
|
"logging_steps": 1000, |
|
"max_steps": 10000000, |
|
"num_train_epochs": 408, |
|
"save_steps": 1000, |
|
"total_flos": 9.964522376921088e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|