|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 27.796988998262883, |
|
"global_step": 120000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.669214732453091e-06, |
|
"loss": 49.8699, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.7355802640722724e-05, |
|
"loss": 41.2735, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.6042390548992352e-05, |
|
"loss": 36.9489, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.472897845726199e-05, |
|
"loss": 33.9779, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.341556636553162e-05, |
|
"loss": 31.7843, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 5.210215427380125e-05, |
|
"loss": 30.3453, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 6.0788742182070875e-05, |
|
"loss": 29.3517, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.945795691452397e-05, |
|
"loss": 28.5657, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 7.81445448227936e-05, |
|
"loss": 27.9059, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.683113273106324e-05, |
|
"loss": 27.256, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 9.550034746351632e-05, |
|
"loss": 26.6368, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00010418693537178596, |
|
"loss": 26.0616, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00011287352328005559, |
|
"loss": 25.5779, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00012154273801250867, |
|
"loss": 25.154, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0001302293259207783, |
|
"loss": 24.757, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00013891591382904795, |
|
"loss": 24.3901, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0001476025017373176, |
|
"loss": 24.0289, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.0001562890896455872, |
|
"loss": 23.7116, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.0001649756775538568, |
|
"loss": 23.3895, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.00017366226546212648, |
|
"loss": 23.1314, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.00018234885337039609, |
|
"loss": 22.8528, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.00019103544127866572, |
|
"loss": 22.5796, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 0.00019972202918693536, |
|
"loss": 22.3478, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 0.000208408617095205, |
|
"loss": 22.1018, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 0.0002170952050034746, |
|
"loss": 21.8626, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 0.0002257644197359277, |
|
"loss": 21.6681, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 0.00023445100764419736, |
|
"loss": 21.4255, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 0.00024313759555246697, |
|
"loss": 21.2724, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 0.0002518241834607366, |
|
"loss": 21.0849, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 0.00026051077136900625, |
|
"loss": 20.8968, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 0.00026919735927727584, |
|
"loss": 20.7233, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 0.0002777970813064628, |
|
"loss": 20.5867, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 0.00028648366921473243, |
|
"loss": 20.4368, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 0.00029517025712300207, |
|
"loss": 20.3374, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 0.000299571461663192, |
|
"loss": 20.2117, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 0.00029860628522893984, |
|
"loss": 20.0622, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 0.00029764110879468763, |
|
"loss": 19.9614, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 0.0002966759323604355, |
|
"loss": 19.8524, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 0.0002957107559261833, |
|
"loss": 19.7578, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 0.00029474557949193107, |
|
"loss": 19.6587, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 0.0002937804030576789, |
|
"loss": 19.5385, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 0.0002928152266234267, |
|
"loss": 19.4285, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 0.00029185005018917456, |
|
"loss": 19.3376, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 0.0002908848737549224, |
|
"loss": 19.2065, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 0.0002899196973206702, |
|
"loss": 19.1089, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 0.000288954520886418, |
|
"loss": 19.0411, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 0.0002879893444521658, |
|
"loss": 18.9864, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 0.00028702416801791364, |
|
"loss": 18.9157, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 0.0002860589915836615, |
|
"loss": 18.8606, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 0.0002850938151494093, |
|
"loss": 18.8049, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 0.0002841286387151571, |
|
"loss": 18.7415, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 0.0002831634622809049, |
|
"loss": 18.695, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 0.0002821982858466527, |
|
"loss": 18.5979, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 0.00028123310941240056, |
|
"loss": 18.5566, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 0.0002802698633310169, |
|
"loss": 18.5145, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 0.0002793046868967647, |
|
"loss": 18.465, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 0.0002783395104625125, |
|
"loss": 18.4232, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 0.00027737433402826034, |
|
"loss": 18.368, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 0.00027640915759400813, |
|
"loss": 18.315, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 0.000275443981159756, |
|
"loss": 18.2882, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 0.00027447880472550383, |
|
"loss": 18.2239, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 0.0002735136282912516, |
|
"loss": 18.163, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 0.0002725503822098679, |
|
"loss": 18.1283, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 0.00027158520577561575, |
|
"loss": 18.1016, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 0.0002706200293413636, |
|
"loss": 18.0663, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 0.0002696567832599799, |
|
"loss": 18.0263, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 0.00026869160682572773, |
|
"loss": 17.9946, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 0.00026772643039147553, |
|
"loss": 17.9719, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 0.0002667612539572234, |
|
"loss": 17.931, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 0.00026579607752297117, |
|
"loss": 17.8646, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 0.000264830901088719, |
|
"loss": 17.8358, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 0.0002638676550073353, |
|
"loss": 17.8081, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 0.00026290247857308315, |
|
"loss": 17.7928, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 0.00026193730213883095, |
|
"loss": 17.7663, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 0.0002609721257045788, |
|
"loss": 17.7224, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 0.0002600069492703266, |
|
"loss": 17.7134, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 0.0002590417728360744, |
|
"loss": 17.6934, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 0.00025807659640182223, |
|
"loss": 17.662, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 0.0002571114199675701, |
|
"loss": 17.5886, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 0.00025615010423905485, |
|
"loss": 17.5854, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 0.0002551849278048027, |
|
"loss": 17.5689, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 0.00025421975137055055, |
|
"loss": 17.5545, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 0.00025325650528916683, |
|
"loss": 17.5269, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 0.00025229132885491463, |
|
"loss": 17.5051, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 0.0002513261524206625, |
|
"loss": 17.4922, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 0.0002503609759864103, |
|
"loss": 17.4682, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"learning_rate": 0.0002493957995521581, |
|
"loss": 17.4151, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 10.19, |
|
"learning_rate": 0.0002484306231179059, |
|
"loss": 17.3825, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"learning_rate": 0.00024746544668365376, |
|
"loss": 17.3681, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 10.42, |
|
"learning_rate": 0.00024650027024940155, |
|
"loss": 17.341, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 10.54, |
|
"learning_rate": 0.0002455350938151494, |
|
"loss": 17.3344, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 10.66, |
|
"learning_rate": 0.0002445699173808972, |
|
"loss": 17.316, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 10.77, |
|
"learning_rate": 0.00024360474094664502, |
|
"loss": 17.3002, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 10.89, |
|
"learning_rate": 0.00024264149486526136, |
|
"loss": 17.279, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 0.00024167631843100918, |
|
"loss": 17.2714, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 11.12, |
|
"learning_rate": 0.00024071114199675697, |
|
"loss": 17.2032, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 11.23, |
|
"learning_rate": 0.00023974596556250482, |
|
"loss": 17.1982, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 11.35, |
|
"learning_rate": 0.00023878271948112113, |
|
"loss": 17.1764, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 11.47, |
|
"learning_rate": 0.00023781754304686895, |
|
"loss": 17.1697, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 11.58, |
|
"learning_rate": 0.00023685236661261674, |
|
"loss": 17.1619, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 11.7, |
|
"learning_rate": 0.0002358871901783646, |
|
"loss": 17.1475, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 11.81, |
|
"learning_rate": 0.0002349239440969809, |
|
"loss": 17.1354, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 11.93, |
|
"learning_rate": 0.00023395876766272873, |
|
"loss": 17.1193, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 12.05, |
|
"learning_rate": 0.00023299359122847657, |
|
"loss": 17.0976, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 12.16, |
|
"learning_rate": 0.00023203034514709289, |
|
"loss": 17.0559, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 12.28, |
|
"learning_rate": 0.00023106516871284068, |
|
"loss": 17.0478, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 12.39, |
|
"learning_rate": 0.0002300999922785885, |
|
"loss": 17.0457, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 12.51, |
|
"learning_rate": 0.00022913481584433635, |
|
"loss": 17.0296, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 12.62, |
|
"learning_rate": 0.00022816963941008414, |
|
"loss": 17.0185, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 12.74, |
|
"learning_rate": 0.00022720639332870045, |
|
"loss": 17.012, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"learning_rate": 0.0002262431472473168, |
|
"loss": 16.9982, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 12.97, |
|
"learning_rate": 0.00022527990116593313, |
|
"loss": 16.981, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 13.09, |
|
"learning_rate": 0.00022431472473168093, |
|
"loss": 16.9492, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 0.00022335147865029726, |
|
"loss": 16.9352, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 13.32, |
|
"learning_rate": 0.00022238630221604506, |
|
"loss": 16.9178, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"learning_rate": 0.0002214211257817929, |
|
"loss": 16.9156, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 13.55, |
|
"learning_rate": 0.00022045787970040922, |
|
"loss": 16.9093, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 13.67, |
|
"learning_rate": 0.00021949270326615704, |
|
"loss": 16.9013, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 13.78, |
|
"learning_rate": 0.00021852752683190483, |
|
"loss": 16.8895, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 13.9, |
|
"learning_rate": 0.00021756235039765268, |
|
"loss": 16.8733, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 0.0002165971739634005, |
|
"loss": 16.881, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 14.13, |
|
"learning_rate": 0.0002156319975291483, |
|
"loss": 16.8109, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 14.25, |
|
"learning_rate": 0.00021466682109489614, |
|
"loss": 16.8187, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 14.36, |
|
"learning_rate": 0.00021370164466064394, |
|
"loss": 16.8078, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"learning_rate": 0.00021273646822639176, |
|
"loss": 16.8041, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 14.59, |
|
"learning_rate": 0.0002117712917921396, |
|
"loss": 16.7933, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 14.71, |
|
"learning_rate": 0.0002108061153578874, |
|
"loss": 16.7964, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 14.83, |
|
"learning_rate": 0.00020984093892363522, |
|
"loss": 16.778, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 14.94, |
|
"learning_rate": 0.00020887576248938302, |
|
"loss": 16.7764, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 15.06, |
|
"learning_rate": 0.00020791058605513086, |
|
"loss": 16.7603, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 15.17, |
|
"learning_rate": 0.00020694540962087868, |
|
"loss": 16.7251, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 15.29, |
|
"learning_rate": 0.00020598023318662648, |
|
"loss": 16.712, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 15.4, |
|
"learning_rate": 0.00020501505675237433, |
|
"loss": 16.7072, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 15.52, |
|
"learning_rate": 0.00020404988031812215, |
|
"loss": 16.7106, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 15.64, |
|
"learning_rate": 0.00020308470388386994, |
|
"loss": 16.7037, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 15.75, |
|
"learning_rate": 0.0002021195274496178, |
|
"loss": 16.6918, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 15.87, |
|
"learning_rate": 0.00020115435101536558, |
|
"loss": 16.6886, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 15.98, |
|
"learning_rate": 0.0002001891745811134, |
|
"loss": 16.6947, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 16.1, |
|
"learning_rate": 0.00019922399814686125, |
|
"loss": 16.6516, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 16.21, |
|
"learning_rate": 0.00019825882171260905, |
|
"loss": 16.6403, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 16.33, |
|
"learning_rate": 0.00019729364527835687, |
|
"loss": 16.6343, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 16.45, |
|
"learning_rate": 0.00019632846884410466, |
|
"loss": 16.6257, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 16.56, |
|
"learning_rate": 0.00019536522276272103, |
|
"loss": 16.6181, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 16.68, |
|
"learning_rate": 0.00019440004632846882, |
|
"loss": 16.6204, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 16.79, |
|
"learning_rate": 0.00019343486989421664, |
|
"loss": 16.6101, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 16.91, |
|
"learning_rate": 0.00019247162381283295, |
|
"loss": 16.6116, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 17.03, |
|
"learning_rate": 0.0001915083777314493, |
|
"loss": 16.6047, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 17.14, |
|
"learning_rate": 0.0001905432012971971, |
|
"loss": 16.5427, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 17.26, |
|
"learning_rate": 0.00018957802486294493, |
|
"loss": 16.5453, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 17.37, |
|
"learning_rate": 0.00018861284842869276, |
|
"loss": 16.5489, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 17.49, |
|
"learning_rate": 0.00018764960234730907, |
|
"loss": 16.5505, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 0.0001866844259130569, |
|
"loss": 16.5423, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 17.72, |
|
"learning_rate": 0.0001857192494788047, |
|
"loss": 16.543, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 17.84, |
|
"learning_rate": 0.00018475407304455253, |
|
"loss": 16.5402, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 17.95, |
|
"learning_rate": 0.00018378889661030035, |
|
"loss": 16.5373, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 18.07, |
|
"learning_rate": 0.00018282372017604814, |
|
"loss": 16.5045, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"learning_rate": 0.000181858543741796, |
|
"loss": 16.4782, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 18.3, |
|
"learning_rate": 0.00018089336730754381, |
|
"loss": 16.4817, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 18.42, |
|
"learning_rate": 0.0001799281908732916, |
|
"loss": 16.4737, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 18.53, |
|
"learning_rate": 0.00017896494479190797, |
|
"loss": 16.4797, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 18.65, |
|
"learning_rate": 0.00017799976835765577, |
|
"loss": 16.4755, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 18.76, |
|
"learning_rate": 0.0001770345919234036, |
|
"loss": 16.464, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 18.88, |
|
"learning_rate": 0.00017606941548915138, |
|
"loss": 16.4599, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"learning_rate": 0.00017510423905489923, |
|
"loss": 16.4511, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 19.11, |
|
"learning_rate": 0.00017413906262064705, |
|
"loss": 16.42, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 0.00017317581653926336, |
|
"loss": 16.4039, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 19.34, |
|
"learning_rate": 0.00017221064010501116, |
|
"loss": 16.4083, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 19.46, |
|
"learning_rate": 0.000171245463670759, |
|
"loss": 16.413, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"learning_rate": 0.00017028028723650683, |
|
"loss": 16.4069, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 19.69, |
|
"learning_rate": 0.00016931511080225462, |
|
"loss": 16.3972, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"learning_rate": 0.00016834993436800247, |
|
"loss": 16.4007, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 19.92, |
|
"learning_rate": 0.0001673847579337503, |
|
"loss": 16.3945, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 20.04, |
|
"learning_rate": 0.00016641958149949808, |
|
"loss": 16.3928, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 20.15, |
|
"learning_rate": 0.00016545440506524593, |
|
"loss": 16.3468, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 20.27, |
|
"learning_rate": 0.00016448922863099373, |
|
"loss": 16.3459, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 20.38, |
|
"learning_rate": 0.00016352598254961006, |
|
"loss": 16.3429, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 20.5, |
|
"learning_rate": 0.00016256080611535786, |
|
"loss": 16.3394, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 20.62, |
|
"learning_rate": 0.0001615956296811057, |
|
"loss": 16.3417, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 20.73, |
|
"learning_rate": 0.0001606304532468535, |
|
"loss": 16.3369, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 20.85, |
|
"learning_rate": 0.00015966527681260132, |
|
"loss": 16.3392, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 20.96, |
|
"learning_rate": 0.00015870203073121763, |
|
"loss": 16.3402, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 21.08, |
|
"learning_rate": 0.00015773685429696548, |
|
"loss": 16.3009, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 21.2, |
|
"learning_rate": 0.00015677167786271327, |
|
"loss": 16.2924, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 21.31, |
|
"learning_rate": 0.0001558065014284611, |
|
"loss": 16.2908, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 21.43, |
|
"learning_rate": 0.00015484132499420894, |
|
"loss": 16.289, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 21.54, |
|
"learning_rate": 0.00015387614855995674, |
|
"loss": 16.2874, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 21.66, |
|
"learning_rate": 0.00015291097212570456, |
|
"loss": 16.2869, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 21.77, |
|
"learning_rate": 0.00015194772604432087, |
|
"loss": 16.2867, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 21.89, |
|
"learning_rate": 0.00015098254961006872, |
|
"loss": 16.2779, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 22.01, |
|
"learning_rate": 0.00015002123388155355, |
|
"loss": 16.2885, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 22.12, |
|
"learning_rate": 0.00014905605744730134, |
|
"loss": 16.2251, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 22.24, |
|
"learning_rate": 0.0001480908810130492, |
|
"loss": 16.247, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 22.35, |
|
"learning_rate": 0.00014712570457879698, |
|
"loss": 16.2315, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 22.47, |
|
"learning_rate": 0.0001461605281445448, |
|
"loss": 16.2464, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 22.59, |
|
"learning_rate": 0.00014519535171029263, |
|
"loss": 16.2326, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 22.7, |
|
"learning_rate": 0.00014423017527604045, |
|
"loss": 16.2327, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 22.82, |
|
"learning_rate": 0.00014326499884178827, |
|
"loss": 16.2269, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 22.93, |
|
"learning_rate": 0.0001422998224075361, |
|
"loss": 16.2186, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 23.05, |
|
"learning_rate": 0.0001413346459732839, |
|
"loss": 16.2302, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 23.16, |
|
"learning_rate": 0.0001403694695390317, |
|
"loss": 16.1879, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 23.28, |
|
"learning_rate": 0.00013940622345764804, |
|
"loss": 16.1834, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 23.4, |
|
"learning_rate": 0.00013844104702339586, |
|
"loss": 16.1779, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 23.51, |
|
"learning_rate": 0.00013747587058914368, |
|
"loss": 16.1835, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 23.63, |
|
"learning_rate": 0.00013651455486062851, |
|
"loss": 16.1829, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 23.74, |
|
"learning_rate": 0.00013554937842637633, |
|
"loss": 16.1814, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 23.86, |
|
"learning_rate": 0.00013458420199212413, |
|
"loss": 16.1864, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 23.97, |
|
"learning_rate": 0.00013361902555787198, |
|
"loss": 16.1784, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 24.09, |
|
"learning_rate": 0.0001326538491236198, |
|
"loss": 16.149, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 24.21, |
|
"learning_rate": 0.0001316886726893676, |
|
"loss": 16.1459, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 24.32, |
|
"learning_rate": 0.0001307234962551154, |
|
"loss": 16.1374, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 24.44, |
|
"learning_rate": 0.00012975831982086326, |
|
"loss": 16.143, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 24.55, |
|
"learning_rate": 0.00012879314338661105, |
|
"loss": 16.1267, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 24.67, |
|
"learning_rate": 0.00012782796695235888, |
|
"loss": 16.1363, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 24.79, |
|
"learning_rate": 0.0001268666512238437, |
|
"loss": 16.1328, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 24.9, |
|
"learning_rate": 0.00012590147478959153, |
|
"loss": 16.1314, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 25.02, |
|
"learning_rate": 0.00012493629835533935, |
|
"loss": 16.1427, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 25.13, |
|
"learning_rate": 0.00012397112192108717, |
|
"loss": 16.0975, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 25.25, |
|
"learning_rate": 0.000123005945486835, |
|
"loss": 16.091, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 25.36, |
|
"learning_rate": 0.00012204076905258281, |
|
"loss": 16.0959, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 25.48, |
|
"learning_rate": 0.00012107559261833062, |
|
"loss": 16.0919, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"learning_rate": 0.00012011041618407844, |
|
"loss": 16.077, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 25.71, |
|
"learning_rate": 0.00011914523974982626, |
|
"loss": 16.0942, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 25.83, |
|
"learning_rate": 0.00011818006331557408, |
|
"loss": 16.0786, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 25.94, |
|
"learning_rate": 0.00011721681723419039, |
|
"loss": 16.0852, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 26.06, |
|
"learning_rate": 0.00011625164079993821, |
|
"loss": 16.0732, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 26.18, |
|
"learning_rate": 0.00011528646436568605, |
|
"loss": 16.0445, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 26.29, |
|
"learning_rate": 0.00011432128793143386, |
|
"loss": 16.043, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 26.41, |
|
"learning_rate": 0.00011335611149718168, |
|
"loss": 16.0484, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 26.52, |
|
"learning_rate": 0.00011239093506292948, |
|
"loss": 16.0494, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 26.64, |
|
"learning_rate": 0.00011142575862867732, |
|
"loss": 16.043, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 26.75, |
|
"learning_rate": 0.00011046058219442514, |
|
"loss": 16.0415, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 26.87, |
|
"learning_rate": 0.00010949540576017295, |
|
"loss": 16.032, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 26.99, |
|
"learning_rate": 0.00010853022932592077, |
|
"loss": 16.0346, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 27.1, |
|
"learning_rate": 0.00010756505289166857, |
|
"loss": 16.0145, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 27.22, |
|
"learning_rate": 0.00010659987645741641, |
|
"loss": 16.0032, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 27.33, |
|
"learning_rate": 0.00010563470002316423, |
|
"loss": 15.9992, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 27.45, |
|
"learning_rate": 0.00010467145394178054, |
|
"loss": 16.0033, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 27.57, |
|
"learning_rate": 0.00010370627750752838, |
|
"loss": 15.9964, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 27.68, |
|
"learning_rate": 0.00010274303142614469, |
|
"loss": 15.9994, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 27.8, |
|
"learning_rate": 0.00010177785499189251, |
|
"loss": 16.0, |
|
"step": 120000 |
|
} |
|
], |
|
"max_steps": 172680, |
|
"num_train_epochs": 40, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|