|
{ |
|
"best_metric": 0.28431499004364014, |
|
"best_model_checkpoint": "./new_models/gpt2/checkpoint-25000", |
|
"epoch": 168.83116883116884, |
|
"global_step": 39000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 9.4041, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 7.6702, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 10.39, |
|
"learning_rate": 1.2e-05, |
|
"loss": 6.7042, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 13.85, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 5.8391, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 17.32, |
|
"learning_rate": 2e-05, |
|
"loss": 5.1775, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 20.78, |
|
"learning_rate": 1.9963963963963965e-05, |
|
"loss": 4.7103, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 24.24, |
|
"learning_rate": 1.992792792792793e-05, |
|
"loss": 4.353, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 27.71, |
|
"learning_rate": 1.9891891891891894e-05, |
|
"loss": 4.04, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 31.17, |
|
"learning_rate": 1.9855855855855857e-05, |
|
"loss": 3.7865, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 34.63, |
|
"learning_rate": 1.981981981981982e-05, |
|
"loss": 3.5376, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 34.63, |
|
"eval_loss": 3.2091352939605713, |
|
"eval_runtime": 3.6439, |
|
"eval_samples_per_second": 14.27, |
|
"eval_steps_per_second": 1.921, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 38.1, |
|
"learning_rate": 1.9783783783783786e-05, |
|
"loss": 3.3258, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 41.56, |
|
"learning_rate": 1.974774774774775e-05, |
|
"loss": 3.1155, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 45.02, |
|
"learning_rate": 1.9711711711711716e-05, |
|
"loss": 2.9341, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 48.48, |
|
"learning_rate": 1.967567567567568e-05, |
|
"loss": 2.7419, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 51.95, |
|
"learning_rate": 1.963963963963964e-05, |
|
"loss": 2.5793, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 55.41, |
|
"learning_rate": 1.9603603603603604e-05, |
|
"loss": 2.4091, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 58.87, |
|
"learning_rate": 1.956756756756757e-05, |
|
"loss": 2.2517, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 62.34, |
|
"learning_rate": 1.9531531531531534e-05, |
|
"loss": 2.0899, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 65.8, |
|
"learning_rate": 1.9495495495495497e-05, |
|
"loss": 1.9464, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 69.26, |
|
"learning_rate": 1.9459459459459463e-05, |
|
"loss": 1.803, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 69.26, |
|
"eval_loss": 1.7681734561920166, |
|
"eval_runtime": 3.5208, |
|
"eval_samples_per_second": 14.769, |
|
"eval_steps_per_second": 1.988, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 72.73, |
|
"learning_rate": 1.9423423423423423e-05, |
|
"loss": 1.6706, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 76.19, |
|
"learning_rate": 1.938738738738739e-05, |
|
"loss": 1.5401, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 79.65, |
|
"learning_rate": 1.9351351351351352e-05, |
|
"loss": 1.4045, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 83.12, |
|
"learning_rate": 1.931531531531532e-05, |
|
"loss": 1.2934, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 86.58, |
|
"learning_rate": 1.927927927927928e-05, |
|
"loss": 1.1735, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 90.04, |
|
"learning_rate": 1.9243243243243244e-05, |
|
"loss": 1.0624, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 93.51, |
|
"learning_rate": 1.9207207207207207e-05, |
|
"loss": 0.9525, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 96.97, |
|
"learning_rate": 1.9171171171171174e-05, |
|
"loss": 0.8541, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 100.43, |
|
"learning_rate": 1.9135135135135137e-05, |
|
"loss": 0.7571, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 103.9, |
|
"learning_rate": 1.90990990990991e-05, |
|
"loss": 0.6733, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 103.9, |
|
"eval_loss": 0.9859427213668823, |
|
"eval_runtime": 3.5218, |
|
"eval_samples_per_second": 14.765, |
|
"eval_steps_per_second": 1.988, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 107.36, |
|
"learning_rate": 1.9063063063063066e-05, |
|
"loss": 0.5883, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 110.82, |
|
"learning_rate": 1.902702702702703e-05, |
|
"loss": 0.5167, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 114.29, |
|
"learning_rate": 1.8990990990990992e-05, |
|
"loss": 0.4459, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 117.75, |
|
"learning_rate": 1.8954954954954955e-05, |
|
"loss": 0.385, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 121.21, |
|
"learning_rate": 1.891891891891892e-05, |
|
"loss": 0.3311, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 124.68, |
|
"learning_rate": 1.8882882882882884e-05, |
|
"loss": 0.2853, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 128.14, |
|
"learning_rate": 1.884684684684685e-05, |
|
"loss": 0.2442, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 131.6, |
|
"learning_rate": 1.8810810810810813e-05, |
|
"loss": 0.2097, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 135.06, |
|
"learning_rate": 1.8774774774774776e-05, |
|
"loss": 0.1802, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 138.53, |
|
"learning_rate": 1.873873873873874e-05, |
|
"loss": 0.1561, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 138.53, |
|
"eval_loss": 0.8047342300415039, |
|
"eval_runtime": 3.5244, |
|
"eval_samples_per_second": 14.754, |
|
"eval_steps_per_second": 1.986, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 141.99, |
|
"learning_rate": 1.8702702702702706e-05, |
|
"loss": 0.1359, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 145.45, |
|
"learning_rate": 1.866666666666667e-05, |
|
"loss": 0.12, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 148.92, |
|
"learning_rate": 1.863063063063063e-05, |
|
"loss": 0.1066, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 152.38, |
|
"learning_rate": 1.8594594594594598e-05, |
|
"loss": 0.0952, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 155.84, |
|
"learning_rate": 1.855855855855856e-05, |
|
"loss": 0.0866, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 159.31, |
|
"learning_rate": 1.8522522522522524e-05, |
|
"loss": 0.0791, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 162.77, |
|
"learning_rate": 1.8486486486486487e-05, |
|
"loss": 0.072, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 166.23, |
|
"learning_rate": 1.8450450450450453e-05, |
|
"loss": 0.0658, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 169.7, |
|
"learning_rate": 1.8414414414414416e-05, |
|
"loss": 0.0622, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 173.16, |
|
"learning_rate": 1.8378378378378383e-05, |
|
"loss": 0.058, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 173.16, |
|
"eval_loss": 0.8171238303184509, |
|
"eval_runtime": 3.5228, |
|
"eval_samples_per_second": 14.761, |
|
"eval_steps_per_second": 1.987, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 176.62, |
|
"learning_rate": 1.8342342342342342e-05, |
|
"loss": 0.0531, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 180.09, |
|
"learning_rate": 1.830630630630631e-05, |
|
"loss": 0.0504, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 183.55, |
|
"learning_rate": 1.827027027027027e-05, |
|
"loss": 0.046, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 187.01, |
|
"learning_rate": 1.8234234234234234e-05, |
|
"loss": 0.0447, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 190.48, |
|
"learning_rate": 1.81981981981982e-05, |
|
"loss": 0.0543, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 193.94, |
|
"learning_rate": 1.8162162162162164e-05, |
|
"loss": 0.0492, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 197.4, |
|
"learning_rate": 1.8126126126126127e-05, |
|
"loss": 0.0438, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 200.87, |
|
"learning_rate": 1.809009009009009e-05, |
|
"loss": 0.0547, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 204.33, |
|
"learning_rate": 1.8054054054054056e-05, |
|
"loss": 0.0615, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 207.79, |
|
"learning_rate": 1.801801801801802e-05, |
|
"loss": 0.072, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 207.79, |
|
"eval_loss": 0.8289902210235596, |
|
"eval_runtime": 3.5216, |
|
"eval_samples_per_second": 14.766, |
|
"eval_steps_per_second": 1.988, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 211.26, |
|
"learning_rate": 1.7981981981981985e-05, |
|
"loss": 0.1157, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 214.72, |
|
"learning_rate": 1.7945945945945948e-05, |
|
"loss": 0.0869, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 218.18, |
|
"learning_rate": 1.790990990990991e-05, |
|
"loss": 1.0166, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 221.65, |
|
"learning_rate": 1.7873873873873874e-05, |
|
"loss": 0.0771, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 225.11, |
|
"learning_rate": 1.783783783783784e-05, |
|
"loss": 0.0953, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 228.57, |
|
"learning_rate": 1.7801801801801804e-05, |
|
"loss": 0.6189, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 232.03, |
|
"learning_rate": 1.7765765765765767e-05, |
|
"loss": 0.5593, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 235.5, |
|
"learning_rate": 1.7729729729729733e-05, |
|
"loss": 0.376, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 238.96, |
|
"learning_rate": 1.7693693693693696e-05, |
|
"loss": 0.4129, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 242.42, |
|
"learning_rate": 1.765765765765766e-05, |
|
"loss": 2.2984, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 242.42, |
|
"eval_loss": 4.4349541664123535, |
|
"eval_runtime": 3.5205, |
|
"eval_samples_per_second": 14.77, |
|
"eval_steps_per_second": 1.988, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 245.89, |
|
"learning_rate": 1.7621621621621622e-05, |
|
"loss": 3.4028, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 249.35, |
|
"learning_rate": 1.7585585585585588e-05, |
|
"loss": 0.7196, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 252.81, |
|
"learning_rate": 1.754954954954955e-05, |
|
"loss": 1.162, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 256.28, |
|
"learning_rate": 1.7513513513513517e-05, |
|
"loss": 0.7413, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 259.74, |
|
"learning_rate": 1.7477477477477477e-05, |
|
"loss": 1.1918, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 263.2, |
|
"learning_rate": 1.7441441441441443e-05, |
|
"loss": 0.8564, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 266.67, |
|
"learning_rate": 1.7405405405405406e-05, |
|
"loss": 0.2815, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 270.13, |
|
"learning_rate": 1.7369369369369373e-05, |
|
"loss": 0.5848, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 273.59, |
|
"learning_rate": 1.7333333333333336e-05, |
|
"loss": 0.6489, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 277.06, |
|
"learning_rate": 1.72972972972973e-05, |
|
"loss": 1.0025, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 277.06, |
|
"eval_loss": 1.2763237953186035, |
|
"eval_runtime": 3.5102, |
|
"eval_samples_per_second": 14.814, |
|
"eval_steps_per_second": 1.994, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 280.52, |
|
"learning_rate": 1.726126126126126e-05, |
|
"loss": 0.7947, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 283.98, |
|
"learning_rate": 1.7225225225225225e-05, |
|
"loss": 0.558, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 287.45, |
|
"learning_rate": 1.718918918918919e-05, |
|
"loss": 0.6356, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 290.91, |
|
"learning_rate": 1.7153153153153154e-05, |
|
"loss": 0.5268, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 294.37, |
|
"learning_rate": 1.711711711711712e-05, |
|
"loss": 0.2633, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 297.84, |
|
"learning_rate": 1.7081081081081083e-05, |
|
"loss": 0.2457, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 301.3, |
|
"learning_rate": 1.7045045045045046e-05, |
|
"loss": 0.5308, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 304.76, |
|
"learning_rate": 1.700900900900901e-05, |
|
"loss": 0.369, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 308.23, |
|
"learning_rate": 1.6972972972972975e-05, |
|
"loss": 0.3203, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 311.69, |
|
"learning_rate": 1.693693693693694e-05, |
|
"loss": 2.5307, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 311.69, |
|
"eval_loss": 1.3849806785583496, |
|
"eval_runtime": 3.5124, |
|
"eval_samples_per_second": 14.805, |
|
"eval_steps_per_second": 1.993, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 39.39, |
|
"learning_rate": 1.96273022751896e-05, |
|
"loss": 3.0696, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 39.83, |
|
"learning_rate": 1.962296858071506e-05, |
|
"loss": 3.0068, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 40.26, |
|
"learning_rate": 1.9618634886240522e-05, |
|
"loss": 2.7896, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 40.69, |
|
"learning_rate": 1.9614301191765985e-05, |
|
"loss": 2.5042, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 41.13, |
|
"learning_rate": 1.960996749729144e-05, |
|
"loss": 2.8704, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 41.56, |
|
"learning_rate": 1.9605633802816904e-05, |
|
"loss": 3.4878, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 41.99, |
|
"learning_rate": 1.9601300108342363e-05, |
|
"loss": 3.0682, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 42.42, |
|
"learning_rate": 1.9596966413867822e-05, |
|
"loss": 2.9751, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 42.86, |
|
"learning_rate": 1.9592632719393285e-05, |
|
"loss": 3.3576, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 43.29, |
|
"learning_rate": 1.9588299024918744e-05, |
|
"loss": 2.9478, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 43.29, |
|
"eval_loss": 1.7224024534225464, |
|
"eval_runtime": 3.6186, |
|
"eval_samples_per_second": 14.37, |
|
"eval_steps_per_second": 1.934, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 47.62, |
|
"learning_rate": 1.954496208017335e-05, |
|
"loss": 2.4401, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 47.62, |
|
"eval_loss": 1.6094621419906616, |
|
"eval_runtime": 3.6227, |
|
"eval_samples_per_second": 14.354, |
|
"eval_steps_per_second": 1.932, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 51.95, |
|
"learning_rate": 1.9501625135427952e-05, |
|
"loss": 2.3021, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 51.95, |
|
"eval_loss": 1.9848077297210693, |
|
"eval_runtime": 3.511, |
|
"eval_samples_per_second": 14.81, |
|
"eval_steps_per_second": 1.994, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 56.28, |
|
"learning_rate": 1.945828819068256e-05, |
|
"loss": 1.8831, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 56.28, |
|
"eval_loss": 0.5190821290016174, |
|
"eval_runtime": 3.5109, |
|
"eval_samples_per_second": 14.811, |
|
"eval_steps_per_second": 1.994, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 60.61, |
|
"learning_rate": 1.9414951245937164e-05, |
|
"loss": 1.1329, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 60.61, |
|
"eval_loss": 0.9506992101669312, |
|
"eval_runtime": 3.511, |
|
"eval_samples_per_second": 14.81, |
|
"eval_steps_per_second": 1.994, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 64.94, |
|
"learning_rate": 1.9371614301191768e-05, |
|
"loss": 1.8788, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 64.94, |
|
"eval_loss": 1.937408685684204, |
|
"eval_runtime": 3.5081, |
|
"eval_samples_per_second": 14.823, |
|
"eval_steps_per_second": 1.995, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 69.26, |
|
"learning_rate": 1.932827735644637e-05, |
|
"loss": 1.6736, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 69.26, |
|
"eval_loss": 0.5699201226234436, |
|
"eval_runtime": 3.5113, |
|
"eval_samples_per_second": 14.809, |
|
"eval_steps_per_second": 1.994, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 73.59, |
|
"learning_rate": 1.9284940411700976e-05, |
|
"loss": 0.5165, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 73.59, |
|
"eval_loss": 0.4182128310203552, |
|
"eval_runtime": 3.5129, |
|
"eval_samples_per_second": 14.803, |
|
"eval_steps_per_second": 1.993, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 77.92, |
|
"learning_rate": 1.924160346695558e-05, |
|
"loss": 0.4656, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 77.92, |
|
"eval_loss": 0.4120073914527893, |
|
"eval_runtime": 3.5127, |
|
"eval_samples_per_second": 14.803, |
|
"eval_steps_per_second": 1.993, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 82.25, |
|
"learning_rate": 1.9198266522210184e-05, |
|
"loss": 0.6133, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 82.25, |
|
"eval_loss": 0.4980267286300659, |
|
"eval_runtime": 3.5108, |
|
"eval_samples_per_second": 14.811, |
|
"eval_steps_per_second": 1.994, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 86.58, |
|
"learning_rate": 1.9154929577464788e-05, |
|
"loss": 0.8087, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 86.58, |
|
"eval_loss": 0.5801683068275452, |
|
"eval_runtime": 3.5099, |
|
"eval_samples_per_second": 14.815, |
|
"eval_steps_per_second": 1.994, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 90.91, |
|
"learning_rate": 1.9111592632719395e-05, |
|
"loss": 2.2068, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 90.91, |
|
"eval_loss": 0.7701263427734375, |
|
"eval_runtime": 3.5112, |
|
"eval_samples_per_second": 14.81, |
|
"eval_steps_per_second": 1.994, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 95.24, |
|
"learning_rate": 1.9068255687974e-05, |
|
"loss": 1.0182, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 95.24, |
|
"eval_loss": 0.42168232798576355, |
|
"eval_runtime": 3.5098, |
|
"eval_samples_per_second": 14.816, |
|
"eval_steps_per_second": 1.994, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 99.57, |
|
"learning_rate": 1.9024918743228603e-05, |
|
"loss": 0.3515, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 99.57, |
|
"eval_loss": 0.2897047996520996, |
|
"eval_runtime": 3.5082, |
|
"eval_samples_per_second": 14.822, |
|
"eval_steps_per_second": 1.995, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 103.9, |
|
"learning_rate": 1.8981581798483207e-05, |
|
"loss": 1.007, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 103.9, |
|
"eval_loss": 0.28924015164375305, |
|
"eval_runtime": 3.5076, |
|
"eval_samples_per_second": 14.825, |
|
"eval_steps_per_second": 1.996, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 108.23, |
|
"learning_rate": 1.8938244853737814e-05, |
|
"loss": 0.1892, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 108.23, |
|
"eval_loss": 0.28431499004364014, |
|
"eval_runtime": 3.5124, |
|
"eval_samples_per_second": 14.805, |
|
"eval_steps_per_second": 1.993, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 112.55, |
|
"learning_rate": 1.8894907908992418e-05, |
|
"loss": 0.2349, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 112.55, |
|
"eval_loss": 0.2943420112133026, |
|
"eval_runtime": 3.5082, |
|
"eval_samples_per_second": 14.822, |
|
"eval_steps_per_second": 1.995, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 116.88, |
|
"learning_rate": 1.8851570964247022e-05, |
|
"loss": 0.1959, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 116.88, |
|
"eval_loss": 0.2937524616718292, |
|
"eval_runtime": 3.5084, |
|
"eval_samples_per_second": 14.822, |
|
"eval_steps_per_second": 1.995, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 121.21, |
|
"learning_rate": 1.8808234019501626e-05, |
|
"loss": 0.5489, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 121.21, |
|
"eval_loss": 0.3693106770515442, |
|
"eval_runtime": 3.5038, |
|
"eval_samples_per_second": 14.841, |
|
"eval_steps_per_second": 1.998, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 125.54, |
|
"learning_rate": 1.8764897074756233e-05, |
|
"loss": 0.1798, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 125.54, |
|
"eval_loss": 0.2986227571964264, |
|
"eval_runtime": 3.5089, |
|
"eval_samples_per_second": 14.819, |
|
"eval_steps_per_second": 1.995, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 129.87, |
|
"learning_rate": 1.8721560130010837e-05, |
|
"loss": 0.1638, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 129.87, |
|
"eval_loss": 0.3518519103527069, |
|
"eval_runtime": 3.5068, |
|
"eval_samples_per_second": 14.828, |
|
"eval_steps_per_second": 1.996, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 134.2, |
|
"learning_rate": 1.867822318526544e-05, |
|
"loss": 0.3161, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 134.2, |
|
"eval_loss": 0.37139639258384705, |
|
"eval_runtime": 3.5102, |
|
"eval_samples_per_second": 14.814, |
|
"eval_steps_per_second": 1.994, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 138.53, |
|
"learning_rate": 1.8634886240520045e-05, |
|
"loss": 0.4443, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 138.53, |
|
"eval_loss": 0.4150441288948059, |
|
"eval_runtime": 3.5081, |
|
"eval_samples_per_second": 14.823, |
|
"eval_steps_per_second": 1.995, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 142.86, |
|
"learning_rate": 1.859154929577465e-05, |
|
"loss": 0.6043, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 142.86, |
|
"eval_loss": 0.6062866449356079, |
|
"eval_runtime": 3.5067, |
|
"eval_samples_per_second": 14.829, |
|
"eval_steps_per_second": 1.996, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 147.19, |
|
"learning_rate": 1.8548212351029253e-05, |
|
"loss": 1.0402, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 147.19, |
|
"eval_loss": 0.5321042537689209, |
|
"eval_runtime": 3.6131, |
|
"eval_samples_per_second": 14.392, |
|
"eval_steps_per_second": 1.937, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 151.52, |
|
"learning_rate": 1.8504875406283857e-05, |
|
"loss": 0.8064, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 151.52, |
|
"eval_loss": 0.5623323917388916, |
|
"eval_runtime": 3.5113, |
|
"eval_samples_per_second": 14.809, |
|
"eval_steps_per_second": 1.994, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 155.84, |
|
"learning_rate": 1.8461538461538465e-05, |
|
"loss": 1.0081, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 155.84, |
|
"eval_loss": 0.8560149669647217, |
|
"eval_runtime": 3.5137, |
|
"eval_samples_per_second": 14.799, |
|
"eval_steps_per_second": 1.992, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 160.17, |
|
"learning_rate": 1.841820151679307e-05, |
|
"loss": 1.4319, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 160.17, |
|
"eval_loss": 0.7755089998245239, |
|
"eval_runtime": 3.5088, |
|
"eval_samples_per_second": 14.82, |
|
"eval_steps_per_second": 1.995, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 164.5, |
|
"learning_rate": 1.8374864572047673e-05, |
|
"loss": 1.5845, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 164.5, |
|
"eval_loss": 0.8413295745849609, |
|
"eval_runtime": 3.5072, |
|
"eval_samples_per_second": 14.827, |
|
"eval_steps_per_second": 1.996, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 168.83, |
|
"learning_rate": 1.8331527627302277e-05, |
|
"loss": 1.1751, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 168.83, |
|
"eval_loss": 1.2155665159225464, |
|
"eval_runtime": 3.5106, |
|
"eval_samples_per_second": 14.812, |
|
"eval_steps_per_second": 1.994, |
|
"step": 39000 |
|
} |
|
], |
|
"max_steps": 462000, |
|
"num_train_epochs": 2000, |
|
"total_flos": 1.06376689483776e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|