|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 191.38755980861245, |
|
"global_step": 40000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 3.125e-06, |
|
"loss": 0.4016, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.202, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 9.375000000000001e-06, |
|
"loss": 0.1987, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1964, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 11.96, |
|
"learning_rate": 1.5625e-05, |
|
"loss": 0.1579, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 0.1305, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.75, |
|
"learning_rate": 2.1875e-05, |
|
"loss": 0.1194, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 19.14, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1094, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 21.53, |
|
"learning_rate": 2.465277777777778e-05, |
|
"loss": 0.0994, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 23.92, |
|
"learning_rate": 2.4305555555555558e-05, |
|
"loss": 0.0881, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 26.32, |
|
"learning_rate": 2.3958333333333334e-05, |
|
"loss": 0.0795, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 28.71, |
|
"learning_rate": 2.361111111111111e-05, |
|
"loss": 0.0738, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 31.1, |
|
"learning_rate": 2.326388888888889e-05, |
|
"loss": 0.0673, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 33.49, |
|
"learning_rate": 2.2916666666666667e-05, |
|
"loss": 0.0621, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 35.89, |
|
"learning_rate": 2.2569444444444447e-05, |
|
"loss": 0.058, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 38.28, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.0539, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 40.67, |
|
"learning_rate": 2.1875e-05, |
|
"loss": 0.0489, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 43.06, |
|
"learning_rate": 2.152777777777778e-05, |
|
"loss": 0.0463, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 45.45, |
|
"learning_rate": 2.1180555555555556e-05, |
|
"loss": 0.0424, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 47.85, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.0402, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 50.24, |
|
"learning_rate": 2.0486111111111113e-05, |
|
"loss": 0.0375, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 52.63, |
|
"learning_rate": 2.013888888888889e-05, |
|
"loss": 0.0348, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 55.02, |
|
"learning_rate": 1.9791666666666665e-05, |
|
"loss": 0.0328, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 57.42, |
|
"learning_rate": 1.9444444444444445e-05, |
|
"loss": 0.0306, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 59.81, |
|
"learning_rate": 1.9097222222222222e-05, |
|
"loss": 0.0291, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 62.2, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 0.0271, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 64.59, |
|
"learning_rate": 1.8402777777777778e-05, |
|
"loss": 0.0261, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 66.99, |
|
"learning_rate": 1.8055555555555555e-05, |
|
"loss": 0.0244, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 69.38, |
|
"learning_rate": 1.7708333333333335e-05, |
|
"loss": 0.0227, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 71.77, |
|
"learning_rate": 1.736111111111111e-05, |
|
"loss": 0.0217, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 74.16, |
|
"learning_rate": 1.701388888888889e-05, |
|
"loss": 0.0212, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 76.56, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.0199, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 78.95, |
|
"learning_rate": 1.6319444444444444e-05, |
|
"loss": 0.019, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 81.34, |
|
"learning_rate": 1.597222222222222e-05, |
|
"loss": 0.0183, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 83.73, |
|
"learning_rate": 1.5625e-05, |
|
"loss": 0.0169, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 86.12, |
|
"learning_rate": 1.527777777777778e-05, |
|
"loss": 0.0166, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 88.52, |
|
"learning_rate": 1.4930555555555557e-05, |
|
"loss": 0.016, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 90.91, |
|
"learning_rate": 1.4583333333333335e-05, |
|
"loss": 0.015, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 93.3, |
|
"learning_rate": 1.4236111111111111e-05, |
|
"loss": 0.0148, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 95.69, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.0139, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 98.09, |
|
"learning_rate": 1.3541666666666666e-05, |
|
"loss": 0.0138, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 100.48, |
|
"learning_rate": 1.3194444444444446e-05, |
|
"loss": 0.0137, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 102.87, |
|
"learning_rate": 1.2847222222222222e-05, |
|
"loss": 0.0123, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 105.26, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.0122, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 107.66, |
|
"learning_rate": 1.2152777777777779e-05, |
|
"loss": 0.0115, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 110.05, |
|
"learning_rate": 1.1805555555555555e-05, |
|
"loss": 0.0116, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 112.44, |
|
"learning_rate": 1.1458333333333333e-05, |
|
"loss": 0.0109, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 114.83, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.0105, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 117.22, |
|
"learning_rate": 1.076388888888889e-05, |
|
"loss": 0.0105, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 119.62, |
|
"learning_rate": 1.0416666666666668e-05, |
|
"loss": 0.01, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 122.01, |
|
"learning_rate": 1.0069444444444445e-05, |
|
"loss": 0.0095, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 124.4, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 0.0097, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 126.79, |
|
"learning_rate": 9.375000000000001e-06, |
|
"loss": 0.0092, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 129.19, |
|
"learning_rate": 9.027777777777777e-06, |
|
"loss": 0.0088, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 131.58, |
|
"learning_rate": 8.680555555555556e-06, |
|
"loss": 0.0087, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 133.97, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.0086, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 136.36, |
|
"learning_rate": 7.98611111111111e-06, |
|
"loss": 0.0085, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 138.76, |
|
"learning_rate": 7.63888888888889e-06, |
|
"loss": 0.0081, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 141.15, |
|
"learning_rate": 7.2916666666666674e-06, |
|
"loss": 0.0081, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 143.54, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.0077, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 145.93, |
|
"learning_rate": 6.597222222222223e-06, |
|
"loss": 0.0075, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 148.33, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.0077, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 150.72, |
|
"learning_rate": 5.902777777777778e-06, |
|
"loss": 0.0078, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 153.11, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.0071, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 155.5, |
|
"learning_rate": 5.208333333333334e-06, |
|
"loss": 0.0073, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 157.89, |
|
"learning_rate": 4.861111111111111e-06, |
|
"loss": 0.007, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 160.29, |
|
"learning_rate": 4.513888888888889e-06, |
|
"loss": 0.0068, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 162.68, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.0071, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 165.07, |
|
"learning_rate": 3.819444444444445e-06, |
|
"loss": 0.0068, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 167.46, |
|
"learning_rate": 3.4722222222222224e-06, |
|
"loss": 0.0068, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 169.86, |
|
"learning_rate": 3.125e-06, |
|
"loss": 0.0066, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 172.25, |
|
"learning_rate": 2.777777777777778e-06, |
|
"loss": 0.0065, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 174.64, |
|
"learning_rate": 2.4305555555555557e-06, |
|
"loss": 0.0064, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 177.03, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"loss": 0.0066, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 179.43, |
|
"learning_rate": 1.7361111111111112e-06, |
|
"loss": 0.0064, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 181.82, |
|
"learning_rate": 1.388888888888889e-06, |
|
"loss": 0.0062, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 184.21, |
|
"learning_rate": 1.0416666666666667e-06, |
|
"loss": 0.0061, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 186.6, |
|
"learning_rate": 6.944444444444445e-07, |
|
"loss": 0.0061, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 189.0, |
|
"learning_rate": 3.4722222222222224e-07, |
|
"loss": 0.006, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 191.39, |
|
"learning_rate": 0.0, |
|
"loss": 0.0063, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 191.39, |
|
"step": 40000, |
|
"total_flos": 2.8134821491393536e+17, |
|
"train_runtime": 19555.7785, |
|
"train_samples_per_second": 2.045 |
|
} |
|
], |
|
"max_steps": 40000, |
|
"num_train_epochs": 192, |
|
"total_flos": 2.8134821491393536e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|