{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.4926270156655391, "eval_steps": 99999, "global_step": 15000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.9800000000000004e-05, "loss": 3.0349, "step": 100 }, { "epoch": 0.01, "learning_rate": 3.94e-05, "loss": 3.2422, "step": 200 }, { "epoch": 0.01, "learning_rate": 5.92e-05, "loss": 3.2539, "step": 300 }, { "epoch": 0.01, "learning_rate": 7.920000000000001e-05, "loss": 3.111, "step": 400 }, { "epoch": 0.02, "learning_rate": 9.92e-05, "loss": 3.0833, "step": 500 }, { "epoch": 0.02, "learning_rate": 9.967945507362518e-05, "loss": 3.0845, "step": 600 }, { "epoch": 0.02, "learning_rate": 9.934555410865138e-05, "loss": 3.0598, "step": 700 }, { "epoch": 0.03, "learning_rate": 9.901165314367759e-05, "loss": 3.0556, "step": 800 }, { "epoch": 0.03, "learning_rate": 9.86777521787038e-05, "loss": 3.0666, "step": 900 }, { "epoch": 0.03, "learning_rate": 9.834385121373001e-05, "loss": 3.0681, "step": 1000 }, { "epoch": 0.04, "learning_rate": 9.800995024875622e-05, "loss": 3.0349, "step": 1100 }, { "epoch": 0.04, "learning_rate": 9.767604928378244e-05, "loss": 3.0354, "step": 1200 }, { "epoch": 0.04, "learning_rate": 9.734214831880864e-05, "loss": 3.1831, "step": 1300 }, { "epoch": 0.05, "learning_rate": 9.700824735383486e-05, "loss": 3.0632, "step": 1400 }, { "epoch": 0.05, "learning_rate": 9.667434638886107e-05, "loss": 3.1388, "step": 1500 }, { "epoch": 0.05, "learning_rate": 9.634044542388728e-05, "loss": 3.0572, "step": 1600 }, { "epoch": 0.06, "learning_rate": 9.600654445891349e-05, "loss": 3.0525, "step": 1700 }, { "epoch": 0.06, "learning_rate": 9.567264349393971e-05, "loss": 3.4111, "step": 1800 }, { "epoch": 0.06, "learning_rate": 9.533874252896591e-05, "loss": 3.1258, "step": 1900 }, { "epoch": 0.07, "learning_rate": 9.500484156399212e-05, "loss": 3.2196, "step": 2000 }, { "epoch": 0.07, "learning_rate": 9.467094059901834e-05, "loss": 3.0257, "step": 2100 }, { "epoch": 0.07, "learning_rate": 9.433703963404454e-05, "loss": 3.0797, "step": 2200 }, { "epoch": 0.08, "learning_rate": 9.400313866907076e-05, "loss": 3.4158, "step": 2300 }, { "epoch": 0.08, "learning_rate": 9.366923770409697e-05, "loss": 3.0948, "step": 2400 }, { "epoch": 0.08, "learning_rate": 9.333533673912319e-05, "loss": 3.0128, "step": 2500 }, { "epoch": 0.09, "learning_rate": 9.300143577414939e-05, "loss": 3.0169, "step": 2600 }, { "epoch": 0.09, "learning_rate": 9.266753480917561e-05, "loss": 3.0737, "step": 2700 }, { "epoch": 0.09, "learning_rate": 9.233363384420181e-05, "loss": 3.0862, "step": 2800 }, { "epoch": 0.1, "learning_rate": 9.199973287922803e-05, "loss": 3.0661, "step": 2900 }, { "epoch": 0.1, "learning_rate": 9.166583191425424e-05, "loss": 3.0505, "step": 3000 }, { "epoch": 0.1, "learning_rate": 9.133193094928046e-05, "loss": 3.0437, "step": 3100 }, { "epoch": 0.11, "learning_rate": 9.099802998430665e-05, "loss": 3.0424, "step": 3200 }, { "epoch": 0.11, "learning_rate": 9.066412901933287e-05, "loss": 3.0934, "step": 3300 }, { "epoch": 0.11, "learning_rate": 9.033356706400882e-05, "loss": 3.2011, "step": 3400 }, { "epoch": 0.11, "learning_rate": 8.999966609903502e-05, "loss": 3.1089, "step": 3500 }, { "epoch": 0.12, "learning_rate": 8.966576513406124e-05, "loss": 3.0698, "step": 3600 }, { "epoch": 0.12, "learning_rate": 8.933186416908745e-05, "loss": 3.0202, "step": 3700 }, { "epoch": 0.12, "learning_rate": 8.899796320411367e-05, "loss": 3.0918, "step": 3800 }, { "epoch": 0.13, "learning_rate": 8.866406223913988e-05, "loss": 3.1013, "step": 3900 }, { "epoch": 0.13, "learning_rate": 8.833016127416609e-05, "loss": 3.3244, "step": 4000 }, { "epoch": 0.13, "learning_rate": 8.799626030919231e-05, "loss": 3.0985, "step": 4100 }, { "epoch": 0.14, "learning_rate": 8.766235934421851e-05, "loss": 3.0286, "step": 4200 }, { "epoch": 0.14, "learning_rate": 8.732845837924472e-05, "loss": 3.0809, "step": 4300 }, { "epoch": 0.14, "learning_rate": 8.699455741427092e-05, "loss": 3.0599, "step": 4400 }, { "epoch": 0.15, "learning_rate": 8.666065644929714e-05, "loss": 3.0703, "step": 4500 }, { "epoch": 0.15, "learning_rate": 8.632675548432335e-05, "loss": 3.0816, "step": 4600 }, { "epoch": 0.15, "learning_rate": 8.599285451934957e-05, "loss": 3.0659, "step": 4700 }, { "epoch": 0.16, "learning_rate": 8.565895355437577e-05, "loss": 3.0197, "step": 4800 }, { "epoch": 0.16, "learning_rate": 8.532505258940199e-05, "loss": 3.0335, "step": 4900 }, { "epoch": 0.16, "learning_rate": 8.49911516244282e-05, "loss": 3.1091, "step": 5000 }, { "epoch": 0.17, "learning_rate": 8.465725065945441e-05, "loss": 3.0402, "step": 5100 }, { "epoch": 0.17, "learning_rate": 8.432334969448062e-05, "loss": 3.0689, "step": 5200 }, { "epoch": 0.17, "learning_rate": 8.398944872950684e-05, "loss": 3.0209, "step": 5300 }, { "epoch": 0.18, "learning_rate": 8.365554776453304e-05, "loss": 3.0484, "step": 5400 }, { "epoch": 0.18, "learning_rate": 8.332164679955925e-05, "loss": 3.0419, "step": 5500 }, { "epoch": 0.18, "learning_rate": 8.29910848442352e-05, "loss": 3.1417, "step": 5600 }, { "epoch": 0.19, "learning_rate": 8.265718387926142e-05, "loss": 3.03, "step": 5700 }, { "epoch": 0.19, "learning_rate": 8.232328291428762e-05, "loss": 3.0755, "step": 5800 }, { "epoch": 0.19, "learning_rate": 8.198938194931384e-05, "loss": 3.0759, "step": 5900 }, { "epoch": 0.2, "learning_rate": 8.165548098434005e-05, "loss": 3.0822, "step": 6000 }, { "epoch": 0.2, "learning_rate": 8.132158001936627e-05, "loss": 3.3225, "step": 6100 }, { "epoch": 0.2, "learning_rate": 8.098767905439247e-05, "loss": 4.2645, "step": 6200 }, { "epoch": 0.21, "learning_rate": 8.065377808941869e-05, "loss": 3.3215, "step": 6300 }, { "epoch": 0.21, "learning_rate": 8.03198771244449e-05, "loss": 3.3495, "step": 6400 }, { "epoch": 0.21, "learning_rate": 7.998597615947111e-05, "loss": 3.9503, "step": 6500 }, { "epoch": 0.22, "learning_rate": 7.96520751944973e-05, "loss": 3.2799, "step": 6600 }, { "epoch": 0.22, "learning_rate": 7.931817422952352e-05, "loss": 3.3251, "step": 6700 }, { "epoch": 0.22, "learning_rate": 7.898427326454973e-05, "loss": 3.3824, "step": 6800 }, { "epoch": 0.23, "learning_rate": 7.865037229957595e-05, "loss": 3.2103, "step": 6900 }, { "epoch": 0.23, "learning_rate": 7.831647133460215e-05, "loss": 3.4477, "step": 7000 }, { "epoch": 0.23, "learning_rate": 7.798257036962837e-05, "loss": 3.3625, "step": 7100 }, { "epoch": 0.24, "learning_rate": 7.764866940465458e-05, "loss": 3.4537, "step": 7200 }, { "epoch": 0.24, "learning_rate": 7.73147684396808e-05, "loss": 3.5719, "step": 7300 }, { "epoch": 0.24, "learning_rate": 7.698086747470701e-05, "loss": 3.1677, "step": 7400 }, { "epoch": 0.25, "learning_rate": 7.664696650973322e-05, "loss": 3.3993, "step": 7500 }, { "epoch": 0.25, "learning_rate": 7.631306554475944e-05, "loss": 3.1136, "step": 7600 }, { "epoch": 0.25, "learning_rate": 7.597916457978564e-05, "loss": 3.1078, "step": 7700 }, { "epoch": 0.26, "learning_rate": 7.564526361481185e-05, "loss": 3.0449, "step": 7800 }, { "epoch": 0.26, "learning_rate": 7.53147016594878e-05, "loss": 3.1574, "step": 7900 }, { "epoch": 0.26, "learning_rate": 7.4980800694514e-05, "loss": 3.2764, "step": 8000 }, { "epoch": 0.27, "learning_rate": 7.464689972954022e-05, "loss": 3.0553, "step": 8100 }, { "epoch": 0.27, "learning_rate": 7.431299876456643e-05, "loss": 3.1408, "step": 8200 }, { "epoch": 0.27, "learning_rate": 7.397909779959265e-05, "loss": 3.1765, "step": 8300 }, { "epoch": 0.28, "learning_rate": 7.364519683461885e-05, "loss": 3.0974, "step": 8400 }, { "epoch": 0.28, "learning_rate": 7.331129586964507e-05, "loss": 3.0998, "step": 8500 }, { "epoch": 0.28, "learning_rate": 7.297739490467128e-05, "loss": 3.1488, "step": 8600 }, { "epoch": 0.29, "learning_rate": 7.26434939396975e-05, "loss": 3.1722, "step": 8700 }, { "epoch": 0.29, "learning_rate": 7.23095929747237e-05, "loss": 3.1605, "step": 8800 }, { "epoch": 0.29, "learning_rate": 7.19756920097499e-05, "loss": 3.0762, "step": 8900 }, { "epoch": 0.3, "learning_rate": 7.164179104477612e-05, "loss": 3.1019, "step": 9000 }, { "epoch": 0.3, "learning_rate": 7.130789007980233e-05, "loss": 3.0633, "step": 9100 }, { "epoch": 0.3, "learning_rate": 7.097398911482855e-05, "loss": 3.1291, "step": 9200 }, { "epoch": 0.31, "learning_rate": 7.064008814985475e-05, "loss": 3.1701, "step": 9300 }, { "epoch": 0.31, "learning_rate": 7.030618718488097e-05, "loss": 3.1169, "step": 9400 }, { "epoch": 0.31, "learning_rate": 6.997228621990718e-05, "loss": 3.3379, "step": 9500 }, { "epoch": 0.32, "learning_rate": 6.96383852549334e-05, "loss": 3.0393, "step": 9600 }, { "epoch": 0.32, "learning_rate": 6.93044842899596e-05, "loss": 3.1069, "step": 9700 }, { "epoch": 0.32, "learning_rate": 6.897058332498582e-05, "loss": 3.1976, "step": 9800 }, { "epoch": 0.33, "learning_rate": 6.863668236001203e-05, "loss": 3.0431, "step": 9900 }, { "epoch": 0.33, "learning_rate": 6.830278139503824e-05, "loss": 3.063, "step": 10000 }, { "epoch": 0.33, "learning_rate": 6.796888043006444e-05, "loss": 3.0741, "step": 10100 }, { "epoch": 0.33, "learning_rate": 6.763497946509065e-05, "loss": 3.1159, "step": 10200 }, { "epoch": 0.34, "learning_rate": 6.730107850011686e-05, "loss": 3.1405, "step": 10300 }, { "epoch": 0.34, "learning_rate": 6.697051654479281e-05, "loss": 3.45, "step": 10400 }, { "epoch": 0.34, "learning_rate": 6.663661557981903e-05, "loss": 3.0671, "step": 10500 }, { "epoch": 0.35, "learning_rate": 6.630271461484523e-05, "loss": 3.1104, "step": 10600 }, { "epoch": 0.35, "learning_rate": 6.596881364987145e-05, "loss": 3.262, "step": 10700 }, { "epoch": 0.35, "learning_rate": 6.563491268489766e-05, "loss": 3.3126, "step": 10800 }, { "epoch": 0.36, "learning_rate": 6.530101171992388e-05, "loss": 3.136, "step": 10900 }, { "epoch": 0.36, "learning_rate": 6.49671107549501e-05, "loss": 3.0665, "step": 11000 }, { "epoch": 0.36, "learning_rate": 6.46332097899763e-05, "loss": 3.1046, "step": 11100 }, { "epoch": 0.37, "learning_rate": 6.42993088250025e-05, "loss": 3.072, "step": 11200 }, { "epoch": 0.37, "learning_rate": 6.396540786002871e-05, "loss": 3.1027, "step": 11300 }, { "epoch": 0.37, "learning_rate": 6.363150689505493e-05, "loss": 3.1536, "step": 11400 }, { "epoch": 0.38, "learning_rate": 6.329760593008114e-05, "loss": 3.0932, "step": 11500 }, { "epoch": 0.38, "learning_rate": 6.296370496510735e-05, "loss": 3.0798, "step": 11600 }, { "epoch": 0.38, "learning_rate": 6.262980400013356e-05, "loss": 3.1229, "step": 11700 }, { "epoch": 0.39, "learning_rate": 6.229590303515978e-05, "loss": 3.1848, "step": 11800 }, { "epoch": 0.39, "learning_rate": 6.196200207018598e-05, "loss": 3.2213, "step": 11900 }, { "epoch": 0.39, "learning_rate": 6.16281011052122e-05, "loss": 3.3552, "step": 12000 }, { "epoch": 0.4, "learning_rate": 6.129420014023841e-05, "loss": 3.2678, "step": 12100 }, { "epoch": 0.4, "learning_rate": 6.0960299175264626e-05, "loss": 3.2389, "step": 12200 }, { "epoch": 0.4, "learning_rate": 6.062639821029084e-05, "loss": 3.3173, "step": 12300 }, { "epoch": 0.41, "learning_rate": 6.0292497245317036e-05, "loss": 3.0254, "step": 12400 }, { "epoch": 0.41, "learning_rate": 5.995859628034325e-05, "loss": 3.0548, "step": 12500 }, { "epoch": 0.41, "learning_rate": 5.962469531536946e-05, "loss": 3.0759, "step": 12600 }, { "epoch": 0.42, "learning_rate": 5.929079435039567e-05, "loss": 3.0964, "step": 12700 }, { "epoch": 0.42, "learning_rate": 5.8956893385421884e-05, "loss": 3.2731, "step": 12800 }, { "epoch": 0.42, "learning_rate": 5.8622992420448096e-05, "loss": 3.0974, "step": 12900 }, { "epoch": 0.43, "learning_rate": 5.828909145547431e-05, "loss": 3.2184, "step": 13000 }, { "epoch": 0.43, "learning_rate": 5.795519049050052e-05, "loss": 3.1414, "step": 13100 }, { "epoch": 0.43, "learning_rate": 5.762128952552673e-05, "loss": 3.3148, "step": 13200 }, { "epoch": 0.44, "learning_rate": 5.7287388560552944e-05, "loss": 3.2656, "step": 13300 }, { "epoch": 0.44, "learning_rate": 5.6953487595579156e-05, "loss": 3.0808, "step": 13400 }, { "epoch": 0.44, "learning_rate": 5.661958663060537e-05, "loss": 3.1319, "step": 13500 }, { "epoch": 0.45, "learning_rate": 5.628568566563157e-05, "loss": 3.0652, "step": 13600 }, { "epoch": 0.45, "learning_rate": 5.5951784700657785e-05, "loss": 3.2544, "step": 13700 }, { "epoch": 0.45, "learning_rate": 5.5617883735684e-05, "loss": 3.1063, "step": 13800 }, { "epoch": 0.46, "learning_rate": 5.528398277071021e-05, "loss": 3.146, "step": 13900 }, { "epoch": 0.46, "learning_rate": 5.495008180573642e-05, "loss": 3.0885, "step": 14000 }, { "epoch": 0.46, "learning_rate": 5.461618084076263e-05, "loss": 3.0842, "step": 14100 }, { "epoch": 0.47, "learning_rate": 5.4282279875788845e-05, "loss": 3.2433, "step": 14200 }, { "epoch": 0.47, "learning_rate": 5.3948378910815057e-05, "loss": 3.2303, "step": 14300 }, { "epoch": 0.47, "learning_rate": 5.361447794584127e-05, "loss": 3.0701, "step": 14400 }, { "epoch": 0.48, "learning_rate": 5.328391599051722e-05, "loss": 3.303, "step": 14500 }, { "epoch": 0.48, "learning_rate": 5.295001502554343e-05, "loss": 3.1973, "step": 14600 }, { "epoch": 0.48, "learning_rate": 5.261611406056963e-05, "loss": 3.1066, "step": 14700 }, { "epoch": 0.49, "learning_rate": 5.228221309559584e-05, "loss": 3.2033, "step": 14800 }, { "epoch": 0.49, "learning_rate": 5.195165114027179e-05, "loss": 3.2673, "step": 14900 }, { "epoch": 0.49, "learning_rate": 5.161775017529801e-05, "loss": 3.0243, "step": 15000 } ], "logging_steps": 100, "max_steps": 30449, "num_train_epochs": 1, "save_steps": 100, "total_flos": 1.105248502226903e+18, "trial_name": null, "trial_params": null }