|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.4926270156655391, |
|
"eval_steps": 99999, |
|
"global_step": 15000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.9800000000000004e-05, |
|
"loss": 3.0349, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.94e-05, |
|
"loss": 3.2422, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.92e-05, |
|
"loss": 3.2539, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.920000000000001e-05, |
|
"loss": 3.111, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.92e-05, |
|
"loss": 3.0833, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.967945507362518e-05, |
|
"loss": 3.0845, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.934555410865138e-05, |
|
"loss": 3.0598, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.901165314367759e-05, |
|
"loss": 3.0556, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.86777521787038e-05, |
|
"loss": 3.0666, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.834385121373001e-05, |
|
"loss": 3.0681, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.800995024875622e-05, |
|
"loss": 3.0349, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.767604928378244e-05, |
|
"loss": 3.0354, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.734214831880864e-05, |
|
"loss": 3.1831, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.700824735383486e-05, |
|
"loss": 3.0632, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.667434638886107e-05, |
|
"loss": 3.1388, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.634044542388728e-05, |
|
"loss": 3.0572, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.600654445891349e-05, |
|
"loss": 3.0525, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.567264349393971e-05, |
|
"loss": 3.4111, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.533874252896591e-05, |
|
"loss": 3.1258, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.500484156399212e-05, |
|
"loss": 3.2196, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.467094059901834e-05, |
|
"loss": 3.0257, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.433703963404454e-05, |
|
"loss": 3.0797, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.400313866907076e-05, |
|
"loss": 3.4158, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.366923770409697e-05, |
|
"loss": 3.0948, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.333533673912319e-05, |
|
"loss": 3.0128, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.300143577414939e-05, |
|
"loss": 3.0169, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.266753480917561e-05, |
|
"loss": 3.0737, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.233363384420181e-05, |
|
"loss": 3.0862, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.199973287922803e-05, |
|
"loss": 3.0661, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.166583191425424e-05, |
|
"loss": 3.0505, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.133193094928046e-05, |
|
"loss": 3.0437, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.099802998430665e-05, |
|
"loss": 3.0424, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.066412901933287e-05, |
|
"loss": 3.0934, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.033356706400882e-05, |
|
"loss": 3.2011, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.999966609903502e-05, |
|
"loss": 3.1089, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.966576513406124e-05, |
|
"loss": 3.0698, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.933186416908745e-05, |
|
"loss": 3.0202, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.899796320411367e-05, |
|
"loss": 3.0918, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.866406223913988e-05, |
|
"loss": 3.1013, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.833016127416609e-05, |
|
"loss": 3.3244, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.799626030919231e-05, |
|
"loss": 3.0985, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.766235934421851e-05, |
|
"loss": 3.0286, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.732845837924472e-05, |
|
"loss": 3.0809, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.699455741427092e-05, |
|
"loss": 3.0599, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.666065644929714e-05, |
|
"loss": 3.0703, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.632675548432335e-05, |
|
"loss": 3.0816, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.599285451934957e-05, |
|
"loss": 3.0659, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.565895355437577e-05, |
|
"loss": 3.0197, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.532505258940199e-05, |
|
"loss": 3.0335, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.49911516244282e-05, |
|
"loss": 3.1091, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.465725065945441e-05, |
|
"loss": 3.0402, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.432334969448062e-05, |
|
"loss": 3.0689, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.398944872950684e-05, |
|
"loss": 3.0209, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.365554776453304e-05, |
|
"loss": 3.0484, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.332164679955925e-05, |
|
"loss": 3.0419, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.29910848442352e-05, |
|
"loss": 3.1417, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.265718387926142e-05, |
|
"loss": 3.03, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.232328291428762e-05, |
|
"loss": 3.0755, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.198938194931384e-05, |
|
"loss": 3.0759, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.165548098434005e-05, |
|
"loss": 3.0822, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.132158001936627e-05, |
|
"loss": 3.3225, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.098767905439247e-05, |
|
"loss": 4.2645, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 8.065377808941869e-05, |
|
"loss": 3.3215, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 8.03198771244449e-05, |
|
"loss": 3.3495, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.998597615947111e-05, |
|
"loss": 3.9503, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.96520751944973e-05, |
|
"loss": 3.2799, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.931817422952352e-05, |
|
"loss": 3.3251, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.898427326454973e-05, |
|
"loss": 3.3824, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.865037229957595e-05, |
|
"loss": 3.2103, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.831647133460215e-05, |
|
"loss": 3.4477, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.798257036962837e-05, |
|
"loss": 3.3625, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.764866940465458e-05, |
|
"loss": 3.4537, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.73147684396808e-05, |
|
"loss": 3.5719, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.698086747470701e-05, |
|
"loss": 3.1677, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.664696650973322e-05, |
|
"loss": 3.3993, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.631306554475944e-05, |
|
"loss": 3.1136, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.597916457978564e-05, |
|
"loss": 3.1078, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.564526361481185e-05, |
|
"loss": 3.0449, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.53147016594878e-05, |
|
"loss": 3.1574, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.4980800694514e-05, |
|
"loss": 3.2764, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.464689972954022e-05, |
|
"loss": 3.0553, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.431299876456643e-05, |
|
"loss": 3.1408, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.397909779959265e-05, |
|
"loss": 3.1765, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.364519683461885e-05, |
|
"loss": 3.0974, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.331129586964507e-05, |
|
"loss": 3.0998, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.297739490467128e-05, |
|
"loss": 3.1488, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.26434939396975e-05, |
|
"loss": 3.1722, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.23095929747237e-05, |
|
"loss": 3.1605, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.19756920097499e-05, |
|
"loss": 3.0762, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.164179104477612e-05, |
|
"loss": 3.1019, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.130789007980233e-05, |
|
"loss": 3.0633, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.097398911482855e-05, |
|
"loss": 3.1291, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 7.064008814985475e-05, |
|
"loss": 3.1701, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 7.030618718488097e-05, |
|
"loss": 3.1169, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.997228621990718e-05, |
|
"loss": 3.3379, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.96383852549334e-05, |
|
"loss": 3.0393, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.93044842899596e-05, |
|
"loss": 3.1069, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.897058332498582e-05, |
|
"loss": 3.1976, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.863668236001203e-05, |
|
"loss": 3.0431, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.830278139503824e-05, |
|
"loss": 3.063, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.796888043006444e-05, |
|
"loss": 3.0741, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.763497946509065e-05, |
|
"loss": 3.1159, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.730107850011686e-05, |
|
"loss": 3.1405, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.697051654479281e-05, |
|
"loss": 3.45, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.663661557981903e-05, |
|
"loss": 3.0671, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.630271461484523e-05, |
|
"loss": 3.1104, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.596881364987145e-05, |
|
"loss": 3.262, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.563491268489766e-05, |
|
"loss": 3.3126, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.530101171992388e-05, |
|
"loss": 3.136, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.49671107549501e-05, |
|
"loss": 3.0665, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.46332097899763e-05, |
|
"loss": 3.1046, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.42993088250025e-05, |
|
"loss": 3.072, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.396540786002871e-05, |
|
"loss": 3.1027, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.363150689505493e-05, |
|
"loss": 3.1536, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.329760593008114e-05, |
|
"loss": 3.0932, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.296370496510735e-05, |
|
"loss": 3.0798, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.262980400013356e-05, |
|
"loss": 3.1229, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.229590303515978e-05, |
|
"loss": 3.1848, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.196200207018598e-05, |
|
"loss": 3.2213, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.16281011052122e-05, |
|
"loss": 3.3552, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.129420014023841e-05, |
|
"loss": 3.2678, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.0960299175264626e-05, |
|
"loss": 3.2389, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.062639821029084e-05, |
|
"loss": 3.3173, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6.0292497245317036e-05, |
|
"loss": 3.0254, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.995859628034325e-05, |
|
"loss": 3.0548, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.962469531536946e-05, |
|
"loss": 3.0759, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.929079435039567e-05, |
|
"loss": 3.0964, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.8956893385421884e-05, |
|
"loss": 3.2731, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.8622992420448096e-05, |
|
"loss": 3.0974, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.828909145547431e-05, |
|
"loss": 3.2184, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.795519049050052e-05, |
|
"loss": 3.1414, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.762128952552673e-05, |
|
"loss": 3.3148, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.7287388560552944e-05, |
|
"loss": 3.2656, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.6953487595579156e-05, |
|
"loss": 3.0808, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.661958663060537e-05, |
|
"loss": 3.1319, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.628568566563157e-05, |
|
"loss": 3.0652, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.5951784700657785e-05, |
|
"loss": 3.2544, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.5617883735684e-05, |
|
"loss": 3.1063, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.528398277071021e-05, |
|
"loss": 3.146, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.495008180573642e-05, |
|
"loss": 3.0885, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.461618084076263e-05, |
|
"loss": 3.0842, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.4282279875788845e-05, |
|
"loss": 3.2433, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.3948378910815057e-05, |
|
"loss": 3.2303, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.361447794584127e-05, |
|
"loss": 3.0701, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.328391599051722e-05, |
|
"loss": 3.303, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.295001502554343e-05, |
|
"loss": 3.1973, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.261611406056963e-05, |
|
"loss": 3.1066, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.228221309559584e-05, |
|
"loss": 3.2033, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.195165114027179e-05, |
|
"loss": 3.2673, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.161775017529801e-05, |
|
"loss": 3.0243, |
|
"step": 15000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 30449, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 1.105248502226903e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|