|
{ |
|
"best_metric": 2.785961627960205, |
|
"best_model_checkpoint": "experiments/qg/google/mt5-large_all/checkpoint-126000", |
|
"epoch": 1.9977723341340248, |
|
"global_step": 126000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3e-05, |
|
"loss": 8.8865, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.9976277372262774e-05, |
|
"loss": 4.0325, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 3.3621604442596436, |
|
"eval_runtime": 444.6073, |
|
"eval_samples_per_second": 89.967, |
|
"eval_steps_per_second": 11.246, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.9952554744525548e-05, |
|
"loss": 3.6162, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.992883211678832e-05, |
|
"loss": 3.4961, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 3.1849684715270996, |
|
"eval_runtime": 443.8466, |
|
"eval_samples_per_second": 90.121, |
|
"eval_steps_per_second": 11.265, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.9905109489051095e-05, |
|
"loss": 3.4531, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.988138686131387e-05, |
|
"loss": 3.3883, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 3.0837316513061523, |
|
"eval_runtime": 451.6674, |
|
"eval_samples_per_second": 88.561, |
|
"eval_steps_per_second": 11.07, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9857664233576645e-05, |
|
"loss": 3.3483, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9833941605839416e-05, |
|
"loss": 3.281, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 3.053595781326294, |
|
"eval_runtime": 451.6413, |
|
"eval_samples_per_second": 88.566, |
|
"eval_steps_per_second": 11.071, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.9810218978102192e-05, |
|
"loss": 3.2901, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.9786496350364963e-05, |
|
"loss": 3.2764, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 3.045032262802124, |
|
"eval_runtime": 450.7805, |
|
"eval_samples_per_second": 88.735, |
|
"eval_steps_per_second": 11.092, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.9762773722627736e-05, |
|
"loss": 3.255, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.9739051094890513e-05, |
|
"loss": 3.2352, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 3.018794059753418, |
|
"eval_runtime": 451.4803, |
|
"eval_samples_per_second": 88.597, |
|
"eval_steps_per_second": 11.075, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.9715328467153283e-05, |
|
"loss": 3.1954, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.969160583941606e-05, |
|
"loss": 3.1939, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 3.000720500946045, |
|
"eval_runtime": 450.9264, |
|
"eval_samples_per_second": 88.706, |
|
"eval_steps_per_second": 11.088, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.966788321167883e-05, |
|
"loss": 3.1893, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.9644160583941607e-05, |
|
"loss": 3.1936, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 2.988145589828491, |
|
"eval_runtime": 451.5214, |
|
"eval_samples_per_second": 88.589, |
|
"eval_steps_per_second": 11.074, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.962043795620438e-05, |
|
"loss": 3.1604, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.9596715328467154e-05, |
|
"loss": 3.1765, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 2.9744112491607666, |
|
"eval_runtime": 449.7283, |
|
"eval_samples_per_second": 88.943, |
|
"eval_steps_per_second": 11.118, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.9572992700729928e-05, |
|
"loss": 3.1478, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.95492700729927e-05, |
|
"loss": 3.1532, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 2.960232734680176, |
|
"eval_runtime": 450.2869, |
|
"eval_samples_per_second": 88.832, |
|
"eval_steps_per_second": 11.104, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.9525547445255475e-05, |
|
"loss": 3.1187, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.950182481751825e-05, |
|
"loss": 3.1318, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 2.956113338470459, |
|
"eval_runtime": 450.2104, |
|
"eval_samples_per_second": 88.847, |
|
"eval_steps_per_second": 11.106, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.9478102189781022e-05, |
|
"loss": 3.1086, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.9454379562043796e-05, |
|
"loss": 3.0946, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 2.946828842163086, |
|
"eval_runtime": 450.0294, |
|
"eval_samples_per_second": 88.883, |
|
"eval_steps_per_second": 11.11, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.9430656934306573e-05, |
|
"loss": 3.114, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.9406934306569343e-05, |
|
"loss": 3.0903, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 2.936713457107544, |
|
"eval_runtime": 449.1132, |
|
"eval_samples_per_second": 89.064, |
|
"eval_steps_per_second": 11.133, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.938321167883212e-05, |
|
"loss": 3.102, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.935948905109489e-05, |
|
"loss": 3.0916, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 2.9305248260498047, |
|
"eval_runtime": 449.5477, |
|
"eval_samples_per_second": 88.978, |
|
"eval_steps_per_second": 11.122, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.9335766423357667e-05, |
|
"loss": 3.0578, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.931204379562044e-05, |
|
"loss": 3.0754, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 2.92580246925354, |
|
"eval_runtime": 449.9439, |
|
"eval_samples_per_second": 88.9, |
|
"eval_steps_per_second": 11.112, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.928832116788321e-05, |
|
"loss": 3.0673, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.9264598540145987e-05, |
|
"loss": 3.0606, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 2.9198672771453857, |
|
"eval_runtime": 450.1912, |
|
"eval_samples_per_second": 88.851, |
|
"eval_steps_per_second": 11.106, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.9240875912408757e-05, |
|
"loss": 3.0472, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.9217153284671534e-05, |
|
"loss": 3.0598, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 2.91711163520813, |
|
"eval_runtime": 449.4944, |
|
"eval_samples_per_second": 88.989, |
|
"eval_steps_per_second": 11.124, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.9193430656934308e-05, |
|
"loss": 3.0376, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.916970802919708e-05, |
|
"loss": 3.0369, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 2.9090449810028076, |
|
"eval_runtime": 450.1639, |
|
"eval_samples_per_second": 88.857, |
|
"eval_steps_per_second": 11.107, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.9145985401459855e-05, |
|
"loss": 3.0517, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.912226277372263e-05, |
|
"loss": 3.0312, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 2.903899669647217, |
|
"eval_runtime": 450.5358, |
|
"eval_samples_per_second": 88.783, |
|
"eval_steps_per_second": 11.098, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.9098540145985402e-05, |
|
"loss": 3.0135, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.9074817518248176e-05, |
|
"loss": 3.0304, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 2.8915276527404785, |
|
"eval_runtime": 449.8834, |
|
"eval_samples_per_second": 88.912, |
|
"eval_steps_per_second": 11.114, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.905109489051095e-05, |
|
"loss": 3.0157, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.9027372262773723e-05, |
|
"loss": 3.008, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 2.89363431930542, |
|
"eval_runtime": 450.0699, |
|
"eval_samples_per_second": 88.875, |
|
"eval_steps_per_second": 11.109, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.9003649635036496e-05, |
|
"loss": 3.0125, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.897992700729927e-05, |
|
"loss": 3.0373, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 2.8869340419769287, |
|
"eval_runtime": 450.8527, |
|
"eval_samples_per_second": 88.721, |
|
"eval_steps_per_second": 11.09, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.8956204379562047e-05, |
|
"loss": 3.0246, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.8932481751824817e-05, |
|
"loss": 3.0107, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 2.882735252380371, |
|
"eval_runtime": 451.0618, |
|
"eval_samples_per_second": 88.68, |
|
"eval_steps_per_second": 11.085, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.8908759124087594e-05, |
|
"loss": 3.0006, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.8885036496350364e-05, |
|
"loss": 2.9781, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 2.8770592212677, |
|
"eval_runtime": 450.672, |
|
"eval_samples_per_second": 88.756, |
|
"eval_steps_per_second": 11.095, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.886131386861314e-05, |
|
"loss": 2.9744, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.8837591240875914e-05, |
|
"loss": 3.0146, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 2.8825135231018066, |
|
"eval_runtime": 450.4517, |
|
"eval_samples_per_second": 88.8, |
|
"eval_steps_per_second": 11.1, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.8813868613138685e-05, |
|
"loss": 2.997, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.879014598540146e-05, |
|
"loss": 2.9856, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 2.8855531215667725, |
|
"eval_runtime": 450.7359, |
|
"eval_samples_per_second": 88.744, |
|
"eval_steps_per_second": 11.093, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.8766423357664232e-05, |
|
"loss": 2.986, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.874270072992701e-05, |
|
"loss": 3.0001, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 2.886821985244751, |
|
"eval_runtime": 450.0432, |
|
"eval_samples_per_second": 88.88, |
|
"eval_steps_per_second": 11.11, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.8718978102189782e-05, |
|
"loss": 2.9924, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.8695255474452556e-05, |
|
"loss": 2.9633, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 2.8713860511779785, |
|
"eval_runtime": 450.4908, |
|
"eval_samples_per_second": 88.792, |
|
"eval_steps_per_second": 11.099, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.867153284671533e-05, |
|
"loss": 2.9684, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.8647810218978103e-05, |
|
"loss": 2.9563, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 2.8724424839019775, |
|
"eval_runtime": 450.9726, |
|
"eval_samples_per_second": 88.697, |
|
"eval_steps_per_second": 11.087, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.8624087591240876e-05, |
|
"loss": 2.9573, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.860036496350365e-05, |
|
"loss": 2.9812, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 2.86970591545105, |
|
"eval_runtime": 450.3055, |
|
"eval_samples_per_second": 88.829, |
|
"eval_steps_per_second": 11.104, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.8576642335766423e-05, |
|
"loss": 2.9677, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8552919708029197e-05, |
|
"loss": 2.9773, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 2.8727774620056152, |
|
"eval_runtime": 449.455, |
|
"eval_samples_per_second": 88.997, |
|
"eval_steps_per_second": 11.125, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.8529197080291974e-05, |
|
"loss": 2.971, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8505474452554744e-05, |
|
"loss": 2.9743, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 2.8665173053741455, |
|
"eval_runtime": 449.7846, |
|
"eval_samples_per_second": 88.931, |
|
"eval_steps_per_second": 11.116, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.848175182481752e-05, |
|
"loss": 2.9585, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.845802919708029e-05, |
|
"loss": 2.9428, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 2.8613767623901367, |
|
"eval_runtime": 450.8331, |
|
"eval_samples_per_second": 88.725, |
|
"eval_steps_per_second": 11.091, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.8434306569343068e-05, |
|
"loss": 2.9509, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.841058394160584e-05, |
|
"loss": 2.9683, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 2.86824369430542, |
|
"eval_runtime": 450.4958, |
|
"eval_samples_per_second": 88.791, |
|
"eval_steps_per_second": 11.099, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.8386861313868612e-05, |
|
"loss": 2.9332, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.836313868613139e-05, |
|
"loss": 2.9567, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 2.854598045349121, |
|
"eval_runtime": 450.2268, |
|
"eval_samples_per_second": 88.844, |
|
"eval_steps_per_second": 11.106, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.833941605839416e-05, |
|
"loss": 2.9529, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.8315693430656936e-05, |
|
"loss": 2.9515, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 2.8551025390625, |
|
"eval_runtime": 450.4445, |
|
"eval_samples_per_second": 88.801, |
|
"eval_steps_per_second": 11.1, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.829197080291971e-05, |
|
"loss": 2.9273, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.8268248175182483e-05, |
|
"loss": 2.965, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 2.8546838760375977, |
|
"eval_runtime": 450.6163, |
|
"eval_samples_per_second": 88.767, |
|
"eval_steps_per_second": 11.096, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.8244525547445256e-05, |
|
"loss": 2.9441, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.822080291970803e-05, |
|
"loss": 2.9326, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 2.848287582397461, |
|
"eval_runtime": 450.6373, |
|
"eval_samples_per_second": 88.763, |
|
"eval_steps_per_second": 11.095, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.8197080291970803e-05, |
|
"loss": 2.9547, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.817335766423358e-05, |
|
"loss": 2.9096, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 2.8502256870269775, |
|
"eval_runtime": 450.3731, |
|
"eval_samples_per_second": 88.815, |
|
"eval_steps_per_second": 11.102, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.814963503649635e-05, |
|
"loss": 2.9222, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.8125912408759124e-05, |
|
"loss": 2.9132, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 2.8453314304351807, |
|
"eval_runtime": 450.4787, |
|
"eval_samples_per_second": 88.794, |
|
"eval_steps_per_second": 11.099, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.8102189781021898e-05, |
|
"loss": 2.9172, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.807846715328467e-05, |
|
"loss": 2.9373, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 2.8487000465393066, |
|
"eval_runtime": 450.8104, |
|
"eval_samples_per_second": 88.729, |
|
"eval_steps_per_second": 11.091, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.8054744525547448e-05, |
|
"loss": 2.9451, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.8031021897810218e-05, |
|
"loss": 2.9333, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 2.8426833152770996, |
|
"eval_runtime": 449.7638, |
|
"eval_samples_per_second": 88.936, |
|
"eval_steps_per_second": 11.117, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.8007299270072995e-05, |
|
"loss": 2.9263, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.7983576642335765e-05, |
|
"loss": 2.9242, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 2.8440945148468018, |
|
"eval_runtime": 450.8592, |
|
"eval_samples_per_second": 88.719, |
|
"eval_steps_per_second": 11.09, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.795985401459854e-05, |
|
"loss": 2.9226, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.7936131386861316e-05, |
|
"loss": 2.9058, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 2.8383543491363525, |
|
"eval_runtime": 450.5891, |
|
"eval_samples_per_second": 88.773, |
|
"eval_steps_per_second": 11.097, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.7912408759124086e-05, |
|
"loss": 2.9113, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.7888686131386863e-05, |
|
"loss": 2.9237, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 2.8444628715515137, |
|
"eval_runtime": 450.7048, |
|
"eval_samples_per_second": 88.75, |
|
"eval_steps_per_second": 11.094, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.7864963503649633e-05, |
|
"loss": 2.904, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.784124087591241e-05, |
|
"loss": 2.9101, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 2.8378028869628906, |
|
"eval_runtime": 450.6185, |
|
"eval_samples_per_second": 88.767, |
|
"eval_steps_per_second": 11.096, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.7817518248175184e-05, |
|
"loss": 2.913, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.7793795620437957e-05, |
|
"loss": 2.9053, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 2.840611219406128, |
|
"eval_runtime": 448.4181, |
|
"eval_samples_per_second": 89.202, |
|
"eval_steps_per_second": 11.15, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.777007299270073e-05, |
|
"loss": 2.8649, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.7746350364963504e-05, |
|
"loss": 2.9016, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 2.832401752471924, |
|
"eval_runtime": 448.5723, |
|
"eval_samples_per_second": 89.172, |
|
"eval_steps_per_second": 11.146, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.7722627737226278e-05, |
|
"loss": 2.9178, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.769890510948905e-05, |
|
"loss": 2.8928, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 2.8320999145507812, |
|
"eval_runtime": 449.0119, |
|
"eval_samples_per_second": 89.084, |
|
"eval_steps_per_second": 11.136, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.7675182481751825e-05, |
|
"loss": 2.903, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.76514598540146e-05, |
|
"loss": 2.8792, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 2.836050033569336, |
|
"eval_runtime": 447.2366, |
|
"eval_samples_per_second": 89.438, |
|
"eval_steps_per_second": 11.18, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.7627737226277375e-05, |
|
"loss": 2.8807, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.7604014598540145e-05, |
|
"loss": 2.896, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 2.8326449394226074, |
|
"eval_runtime": 447.8025, |
|
"eval_samples_per_second": 89.325, |
|
"eval_steps_per_second": 11.166, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.7580291970802922e-05, |
|
"loss": 2.8805, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.7556569343065692e-05, |
|
"loss": 2.8708, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_loss": 2.830998420715332, |
|
"eval_runtime": 441.655, |
|
"eval_samples_per_second": 90.568, |
|
"eval_steps_per_second": 11.321, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.753284671532847e-05, |
|
"loss": 2.8656, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.7509124087591243e-05, |
|
"loss": 2.8882, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 2.8259706497192383, |
|
"eval_runtime": 442.0896, |
|
"eval_samples_per_second": 90.479, |
|
"eval_steps_per_second": 11.31, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.7485401459854017e-05, |
|
"loss": 2.8818, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.746167883211679e-05, |
|
"loss": 2.8682, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_loss": 2.8257410526275635, |
|
"eval_runtime": 441.6514, |
|
"eval_samples_per_second": 90.569, |
|
"eval_steps_per_second": 11.321, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.743795620437956e-05, |
|
"loss": 2.8765, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.7414233576642337e-05, |
|
"loss": 2.8843, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 2.8210554122924805, |
|
"eval_runtime": 441.9171, |
|
"eval_samples_per_second": 90.515, |
|
"eval_steps_per_second": 11.314, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.739051094890511e-05, |
|
"loss": 2.8785, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.7366788321167884e-05, |
|
"loss": 2.8631, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_loss": 2.825152635574341, |
|
"eval_runtime": 442.1075, |
|
"eval_samples_per_second": 90.476, |
|
"eval_steps_per_second": 11.309, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.7343065693430658e-05, |
|
"loss": 2.8557, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.731934306569343e-05, |
|
"loss": 2.8643, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 2.8247392177581787, |
|
"eval_runtime": 441.9526, |
|
"eval_samples_per_second": 90.507, |
|
"eval_steps_per_second": 11.313, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.7295620437956205e-05, |
|
"loss": 2.8976, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.727189781021898e-05, |
|
"loss": 2.8658, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_loss": 2.8236825466156006, |
|
"eval_runtime": 441.9598, |
|
"eval_samples_per_second": 90.506, |
|
"eval_steps_per_second": 11.313, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.7248175182481752e-05, |
|
"loss": 2.8608, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.7224452554744525e-05, |
|
"loss": 2.875, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 2.820819139480591, |
|
"eval_runtime": 442.0566, |
|
"eval_samples_per_second": 90.486, |
|
"eval_steps_per_second": 11.311, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.72007299270073e-05, |
|
"loss": 2.8704, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.7177007299270073e-05, |
|
"loss": 2.8531, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 2.821174144744873, |
|
"eval_runtime": 442.2056, |
|
"eval_samples_per_second": 90.456, |
|
"eval_steps_per_second": 11.307, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.715328467153285e-05, |
|
"loss": 2.8748, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.712956204379562e-05, |
|
"loss": 2.8654, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 2.825101613998413, |
|
"eval_runtime": 442.0952, |
|
"eval_samples_per_second": 90.478, |
|
"eval_steps_per_second": 11.31, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.7105839416058397e-05, |
|
"loss": 2.8515, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.7082116788321167e-05, |
|
"loss": 2.8432, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 2.819878101348877, |
|
"eval_runtime": 441.9364, |
|
"eval_samples_per_second": 90.511, |
|
"eval_steps_per_second": 11.314, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.7058394160583944e-05, |
|
"loss": 2.8591, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.7034671532846717e-05, |
|
"loss": 2.8871, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.8195338249206543, |
|
"eval_runtime": 442.0595, |
|
"eval_samples_per_second": 90.486, |
|
"eval_steps_per_second": 11.311, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.7010948905109487e-05, |
|
"loss": 2.7957, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.6987226277372264e-05, |
|
"loss": 2.7854, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 2.817440986633301, |
|
"eval_runtime": 442.0252, |
|
"eval_samples_per_second": 90.493, |
|
"eval_steps_per_second": 11.312, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.6963503649635034e-05, |
|
"loss": 2.7874, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.693978102189781e-05, |
|
"loss": 2.7703, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_loss": 2.8208272457122803, |
|
"eval_runtime": 442.1489, |
|
"eval_samples_per_second": 90.467, |
|
"eval_steps_per_second": 11.308, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.6916058394160585e-05, |
|
"loss": 2.7773, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.689233576642336e-05, |
|
"loss": 2.7764, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_loss": 2.8173325061798096, |
|
"eval_runtime": 443.1184, |
|
"eval_samples_per_second": 90.269, |
|
"eval_steps_per_second": 11.284, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.6868613138686132e-05, |
|
"loss": 2.7838, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.6844890510948906e-05, |
|
"loss": 2.7734, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_loss": 2.812627077102661, |
|
"eval_runtime": 443.0624, |
|
"eval_samples_per_second": 90.281, |
|
"eval_steps_per_second": 11.285, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.682116788321168e-05, |
|
"loss": 2.7541, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.6797445255474456e-05, |
|
"loss": 2.7637, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 2.817030668258667, |
|
"eval_runtime": 443.2279, |
|
"eval_samples_per_second": 90.247, |
|
"eval_steps_per_second": 11.281, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.6773722627737226e-05, |
|
"loss": 2.7809, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.675e-05, |
|
"loss": 2.775, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 2.819810628890991, |
|
"eval_runtime": 442.8797, |
|
"eval_samples_per_second": 90.318, |
|
"eval_steps_per_second": 11.29, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.6726277372262777e-05, |
|
"loss": 2.786, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.6702554744525547e-05, |
|
"loss": 2.747, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_loss": 2.820467233657837, |
|
"eval_runtime": 443.0381, |
|
"eval_samples_per_second": 90.286, |
|
"eval_steps_per_second": 11.286, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.6678832116788324e-05, |
|
"loss": 2.801, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.6655109489051094e-05, |
|
"loss": 2.7589, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_loss": 2.8193023204803467, |
|
"eval_runtime": 443.2053, |
|
"eval_samples_per_second": 90.252, |
|
"eval_steps_per_second": 11.281, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.663138686131387e-05, |
|
"loss": 2.7801, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.6607664233576644e-05, |
|
"loss": 2.7521, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_loss": 2.813857078552246, |
|
"eval_runtime": 443.2947, |
|
"eval_samples_per_second": 90.233, |
|
"eval_steps_per_second": 11.279, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.6583941605839414e-05, |
|
"loss": 2.7876, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.656021897810219e-05, |
|
"loss": 2.772, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_loss": 2.814068555831909, |
|
"eval_runtime": 443.3415, |
|
"eval_samples_per_second": 90.224, |
|
"eval_steps_per_second": 11.278, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.653649635036496e-05, |
|
"loss": 2.7618, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.651277372262774e-05, |
|
"loss": 2.785, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 2.8143815994262695, |
|
"eval_runtime": 443.2942, |
|
"eval_samples_per_second": 90.234, |
|
"eval_steps_per_second": 11.279, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.6489051094890512e-05, |
|
"loss": 2.7878, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.6465328467153286e-05, |
|
"loss": 2.743, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_loss": 2.8167881965637207, |
|
"eval_runtime": 443.0056, |
|
"eval_samples_per_second": 90.292, |
|
"eval_steps_per_second": 11.287, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.644160583941606e-05, |
|
"loss": 2.7564, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.6417883211678833e-05, |
|
"loss": 2.7694, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_loss": 2.8128414154052734, |
|
"eval_runtime": 443.1543, |
|
"eval_samples_per_second": 90.262, |
|
"eval_steps_per_second": 11.283, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.6394160583941606e-05, |
|
"loss": 2.7583, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.6370437956204383e-05, |
|
"loss": 2.7769, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_loss": 2.8131983280181885, |
|
"eval_runtime": 443.0764, |
|
"eval_samples_per_second": 90.278, |
|
"eval_steps_per_second": 11.285, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.6346715328467153e-05, |
|
"loss": 2.7623, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.6322992700729927e-05, |
|
"loss": 2.7523, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 2.813045024871826, |
|
"eval_runtime": 443.0803, |
|
"eval_samples_per_second": 90.277, |
|
"eval_steps_per_second": 11.285, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.62992700729927e-05, |
|
"loss": 2.7569, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.6275547445255474e-05, |
|
"loss": 2.7572, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 2.8214099407196045, |
|
"eval_runtime": 442.9545, |
|
"eval_samples_per_second": 90.303, |
|
"eval_steps_per_second": 11.288, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.625182481751825e-05, |
|
"loss": 2.7564, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.622810218978102e-05, |
|
"loss": 2.7573, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_loss": 2.811310052871704, |
|
"eval_runtime": 442.9337, |
|
"eval_samples_per_second": 90.307, |
|
"eval_steps_per_second": 11.288, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.6204379562043798e-05, |
|
"loss": 2.7635, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.6180656934306568e-05, |
|
"loss": 2.7514, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 2.8111774921417236, |
|
"eval_runtime": 442.9468, |
|
"eval_samples_per_second": 90.304, |
|
"eval_steps_per_second": 11.288, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.615693430656934e-05, |
|
"loss": 2.7534, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.613321167883212e-05, |
|
"loss": 2.7676, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_loss": 2.8070504665374756, |
|
"eval_runtime": 442.7918, |
|
"eval_samples_per_second": 90.336, |
|
"eval_steps_per_second": 11.292, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.6109489051094892e-05, |
|
"loss": 2.7212, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.6085766423357666e-05, |
|
"loss": 2.7044, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_loss": 2.8113818168640137, |
|
"eval_runtime": 440.1567, |
|
"eval_samples_per_second": 90.877, |
|
"eval_steps_per_second": 11.36, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.6062043795620436e-05, |
|
"loss": 2.742, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.6038321167883213e-05, |
|
"loss": 2.7205, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_loss": 2.810499906539917, |
|
"eval_runtime": 440.19, |
|
"eval_samples_per_second": 90.87, |
|
"eval_steps_per_second": 11.359, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.6014598540145986e-05, |
|
"loss": 2.7065, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.599087591240876e-05, |
|
"loss": 2.7168, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 2.813863754272461, |
|
"eval_runtime": 440.1828, |
|
"eval_samples_per_second": 90.871, |
|
"eval_steps_per_second": 11.359, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.5967153284671533e-05, |
|
"loss": 2.7367, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.5943430656934307e-05, |
|
"loss": 2.7337, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_loss": 2.80753231048584, |
|
"eval_runtime": 439.9369, |
|
"eval_samples_per_second": 90.922, |
|
"eval_steps_per_second": 11.365, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.591970802919708e-05, |
|
"loss": 2.718, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.5895985401459854e-05, |
|
"loss": 2.709, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 2.8110671043395996, |
|
"eval_runtime": 439.984, |
|
"eval_samples_per_second": 90.912, |
|
"eval_steps_per_second": 11.364, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.5872262773722628e-05, |
|
"loss": 2.7045, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.58485401459854e-05, |
|
"loss": 2.7449, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_loss": 2.806777238845825, |
|
"eval_runtime": 439.8672, |
|
"eval_samples_per_second": 90.937, |
|
"eval_steps_per_second": 11.367, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.5824817518248178e-05, |
|
"loss": 2.704, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.5801094890510948e-05, |
|
"loss": 2.7175, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 2.8058323860168457, |
|
"eval_runtime": 439.8993, |
|
"eval_samples_per_second": 90.93, |
|
"eval_steps_per_second": 11.366, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.5777372262773725e-05, |
|
"loss": 2.7292, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.5753649635036495e-05, |
|
"loss": 2.7096, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_loss": 2.808027744293213, |
|
"eval_runtime": 439.7855, |
|
"eval_samples_per_second": 90.953, |
|
"eval_steps_per_second": 11.369, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.572992700729927e-05, |
|
"loss": 2.7125, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.5706204379562046e-05, |
|
"loss": 2.7272, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 2.8089358806610107, |
|
"eval_runtime": 439.8804, |
|
"eval_samples_per_second": 90.934, |
|
"eval_steps_per_second": 11.367, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.568248175182482e-05, |
|
"loss": 2.719, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.5658759124087593e-05, |
|
"loss": 2.7092, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 2.806529998779297, |
|
"eval_runtime": 439.7422, |
|
"eval_samples_per_second": 90.962, |
|
"eval_steps_per_second": 11.37, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.5635036496350363e-05, |
|
"loss": 2.7121, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.561131386861314e-05, |
|
"loss": 2.6846, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_loss": 2.804901123046875, |
|
"eval_runtime": 440.0389, |
|
"eval_samples_per_second": 90.901, |
|
"eval_steps_per_second": 11.363, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.5587591240875913e-05, |
|
"loss": 2.7301, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.5563868613138687e-05, |
|
"loss": 2.6976, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_loss": 2.8061363697052, |
|
"eval_runtime": 439.8304, |
|
"eval_samples_per_second": 90.944, |
|
"eval_steps_per_second": 11.368, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.554014598540146e-05, |
|
"loss": 2.7008, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.5516423357664234e-05, |
|
"loss": 2.7057, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_loss": 2.815894603729248, |
|
"eval_runtime": 439.7552, |
|
"eval_samples_per_second": 90.96, |
|
"eval_steps_per_second": 11.37, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.5492700729927008e-05, |
|
"loss": 2.724, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.546897810218978e-05, |
|
"loss": 2.6922, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_loss": 2.8022561073303223, |
|
"eval_runtime": 439.8096, |
|
"eval_samples_per_second": 90.948, |
|
"eval_steps_per_second": 11.369, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.5445255474452555e-05, |
|
"loss": 2.7095, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.542153284671533e-05, |
|
"loss": 2.7011, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_loss": 2.801751136779785, |
|
"eval_runtime": 439.7019, |
|
"eval_samples_per_second": 90.971, |
|
"eval_steps_per_second": 11.371, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.5397810218978102e-05, |
|
"loss": 2.7212, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.5374087591240875e-05, |
|
"loss": 2.7029, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 2.8029119968414307, |
|
"eval_runtime": 439.744, |
|
"eval_samples_per_second": 90.962, |
|
"eval_steps_per_second": 11.37, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.5350364963503652e-05, |
|
"loss": 2.7055, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.5326642335766422e-05, |
|
"loss": 2.701, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_loss": 2.800555944442749, |
|
"eval_runtime": 439.7759, |
|
"eval_samples_per_second": 90.955, |
|
"eval_steps_per_second": 11.369, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.53029197080292e-05, |
|
"loss": 2.7136, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.527919708029197e-05, |
|
"loss": 2.7044, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_loss": 2.8019235134124756, |
|
"eval_runtime": 439.7518, |
|
"eval_samples_per_second": 90.96, |
|
"eval_steps_per_second": 11.37, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.5255474452554746e-05, |
|
"loss": 2.7141, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.523175182481752e-05, |
|
"loss": 2.7311, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 2.8053483963012695, |
|
"eval_runtime": 439.6812, |
|
"eval_samples_per_second": 90.975, |
|
"eval_steps_per_second": 11.372, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.520802919708029e-05, |
|
"loss": 2.7107, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.5184306569343067e-05, |
|
"loss": 2.711, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_loss": 2.805739641189575, |
|
"eval_runtime": 439.6645, |
|
"eval_samples_per_second": 90.978, |
|
"eval_steps_per_second": 11.372, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.5160583941605837e-05, |
|
"loss": 2.7212, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.5136861313868614e-05, |
|
"loss": 2.7344, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_loss": 2.799654722213745, |
|
"eval_runtime": 439.7601, |
|
"eval_samples_per_second": 90.959, |
|
"eval_steps_per_second": 11.37, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.5113138686131388e-05, |
|
"loss": 2.7122, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.508941605839416e-05, |
|
"loss": 2.7236, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_loss": 2.803441047668457, |
|
"eval_runtime": 439.7253, |
|
"eval_samples_per_second": 90.966, |
|
"eval_steps_per_second": 11.371, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.5065693430656935e-05, |
|
"loss": 2.7226, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.5041970802919705e-05, |
|
"loss": 2.6879, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_loss": 2.801588773727417, |
|
"eval_runtime": 439.6315, |
|
"eval_samples_per_second": 90.985, |
|
"eval_steps_per_second": 11.373, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.5018248175182482e-05, |
|
"loss": 2.732, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.499452554744526e-05, |
|
"loss": 2.7188, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 2.7991223335266113, |
|
"eval_runtime": 439.7752, |
|
"eval_samples_per_second": 90.956, |
|
"eval_steps_per_second": 11.369, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.497080291970803e-05, |
|
"loss": 2.7164, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.4947080291970802e-05, |
|
"loss": 2.6994, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_loss": 2.8013699054718018, |
|
"eval_runtime": 439.7722, |
|
"eval_samples_per_second": 90.956, |
|
"eval_steps_per_second": 11.37, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.492335766423358e-05, |
|
"loss": 2.6989, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.489963503649635e-05, |
|
"loss": 2.7129, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_loss": 2.7973504066467285, |
|
"eval_runtime": 439.8695, |
|
"eval_samples_per_second": 90.936, |
|
"eval_steps_per_second": 11.367, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.4875912408759126e-05, |
|
"loss": 2.7056, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.4852189781021897e-05, |
|
"loss": 2.7095, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_loss": 2.8017683029174805, |
|
"eval_runtime": 439.7668, |
|
"eval_samples_per_second": 90.957, |
|
"eval_steps_per_second": 11.37, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.4828467153284674e-05, |
|
"loss": 2.7021, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.4804744525547447e-05, |
|
"loss": 2.7029, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_loss": 2.7975335121154785, |
|
"eval_runtime": 439.9718, |
|
"eval_samples_per_second": 90.915, |
|
"eval_steps_per_second": 11.364, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.4781021897810217e-05, |
|
"loss": 2.7046, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.4757299270072994e-05, |
|
"loss": 2.7042, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_loss": 2.7961161136627197, |
|
"eval_runtime": 439.788, |
|
"eval_samples_per_second": 90.953, |
|
"eval_steps_per_second": 11.369, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.4733576642335768e-05, |
|
"loss": 2.7234, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.470985401459854e-05, |
|
"loss": 2.7134, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"eval_loss": 2.795431613922119, |
|
"eval_runtime": 439.911, |
|
"eval_samples_per_second": 90.927, |
|
"eval_steps_per_second": 11.366, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.4686131386861315e-05, |
|
"loss": 2.6937, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.4662408759124088e-05, |
|
"loss": 2.6905, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_loss": 2.7966976165771484, |
|
"eval_runtime": 439.7831, |
|
"eval_samples_per_second": 90.954, |
|
"eval_steps_per_second": 11.369, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.4638686131386862e-05, |
|
"loss": 2.7027, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.4614963503649635e-05, |
|
"loss": 2.6871, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_loss": 2.7926506996154785, |
|
"eval_runtime": 440.0624, |
|
"eval_samples_per_second": 90.896, |
|
"eval_steps_per_second": 11.362, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.459124087591241e-05, |
|
"loss": 2.697, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.4567518248175186e-05, |
|
"loss": 2.6931, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 2.8011257648468018, |
|
"eval_runtime": 439.8116, |
|
"eval_samples_per_second": 90.948, |
|
"eval_steps_per_second": 11.369, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.4543795620437956e-05, |
|
"loss": 2.7136, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.452007299270073e-05, |
|
"loss": 2.702, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_loss": 2.7955451011657715, |
|
"eval_runtime": 439.914, |
|
"eval_samples_per_second": 90.927, |
|
"eval_steps_per_second": 11.366, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.4496350364963503e-05, |
|
"loss": 2.6859, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.4472627737226277e-05, |
|
"loss": 2.712, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_loss": 2.7944655418395996, |
|
"eval_runtime": 439.809, |
|
"eval_samples_per_second": 90.949, |
|
"eval_steps_per_second": 11.369, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.4448905109489054e-05, |
|
"loss": 2.6984, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.4425182481751824e-05, |
|
"loss": 2.7091, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_loss": 2.7959797382354736, |
|
"eval_runtime": 439.9158, |
|
"eval_samples_per_second": 90.926, |
|
"eval_steps_per_second": 11.366, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.44014598540146e-05, |
|
"loss": 2.6978, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.437773722627737e-05, |
|
"loss": 2.6913, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 2.79091739654541, |
|
"eval_runtime": 439.7923, |
|
"eval_samples_per_second": 90.952, |
|
"eval_steps_per_second": 11.369, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.4354014598540144e-05, |
|
"loss": 2.6964, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.433029197080292e-05, |
|
"loss": 2.7008, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_loss": 2.7938427925109863, |
|
"eval_runtime": 439.9595, |
|
"eval_samples_per_second": 90.917, |
|
"eval_steps_per_second": 11.365, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.4306569343065695e-05, |
|
"loss": 2.7015, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.428284671532847e-05, |
|
"loss": 2.6968, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_loss": 2.796417236328125, |
|
"eval_runtime": 439.8462, |
|
"eval_samples_per_second": 90.941, |
|
"eval_steps_per_second": 11.368, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 2.425912408759124e-05, |
|
"loss": 2.6936, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 2.4235401459854015e-05, |
|
"loss": 2.688, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_loss": 2.790297269821167, |
|
"eval_runtime": 439.89, |
|
"eval_samples_per_second": 90.932, |
|
"eval_steps_per_second": 11.366, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 2.421167883211679e-05, |
|
"loss": 2.6885, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 2.4187956204379563e-05, |
|
"loss": 2.7015, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_loss": 2.7968406677246094, |
|
"eval_runtime": 439.8161, |
|
"eval_samples_per_second": 90.947, |
|
"eval_steps_per_second": 11.368, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 2.4164233576642336e-05, |
|
"loss": 2.7095, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.414051094890511e-05, |
|
"loss": 2.7038, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 2.79244327545166, |
|
"eval_runtime": 439.8929, |
|
"eval_samples_per_second": 90.931, |
|
"eval_steps_per_second": 11.366, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.4116788321167883e-05, |
|
"loss": 2.6901, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 2.4093065693430657e-05, |
|
"loss": 2.6991, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_loss": 2.7943027019500732, |
|
"eval_runtime": 439.7901, |
|
"eval_samples_per_second": 90.952, |
|
"eval_steps_per_second": 11.369, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2.406934306569343e-05, |
|
"loss": 2.6954, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2.4045620437956207e-05, |
|
"loss": 2.6804, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.785961627960205, |
|
"eval_runtime": 440.0425, |
|
"eval_samples_per_second": 90.9, |
|
"eval_steps_per_second": 11.363, |
|
"step": 126000 |
|
} |
|
], |
|
"max_steps": 630700, |
|
"num_train_epochs": 10, |
|
"total_flos": 8.47096165805568e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|