{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.758733462167385, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 5e-06, "loss": 5.2948, "step": 50 }, { "epoch": 0.06, "learning_rate": 1e-05, "loss": 4.9959, "step": 100 }, { "epoch": 0.09, "learning_rate": 1.5e-05, "loss": 4.6899, "step": 150 }, { "epoch": 0.12, "learning_rate": 2e-05, "loss": 4.5063, "step": 200 }, { "epoch": 0.15, "learning_rate": 2.5e-05, "loss": 4.4335, "step": 250 }, { "epoch": 0.18, "learning_rate": 3e-05, "loss": 4.3943, "step": 300 }, { "epoch": 0.21, "learning_rate": 3.5e-05, "loss": 4.35, "step": 350 }, { "epoch": 0.24, "learning_rate": 4e-05, "loss": 4.2928, "step": 400 }, { "epoch": 0.27, "learning_rate": 4.5e-05, "loss": 4.2246, "step": 450 }, { "epoch": 0.3, "learning_rate": 5e-05, "loss": 4.1448, "step": 500 }, { "epoch": 0.33, "learning_rate": 5.500000000000001e-05, "loss": 4.0853, "step": 550 }, { "epoch": 0.36, "learning_rate": 6e-05, "loss": 4.0358, "step": 600 }, { "epoch": 0.39, "learning_rate": 6.500000000000001e-05, "loss": 3.9763, "step": 650 }, { "epoch": 0.42, "learning_rate": 7e-05, "loss": 3.8816, "step": 700 }, { "epoch": 0.45, "learning_rate": 7.500000000000001e-05, "loss": 3.7246, "step": 750 }, { "epoch": 0.48, "learning_rate": 8e-05, "loss": 3.5085, "step": 800 }, { "epoch": 0.51, "learning_rate": 8.5e-05, "loss": 3.2879, "step": 850 }, { "epoch": 0.54, "learning_rate": 9e-05, "loss": 3.0898, "step": 900 }, { "epoch": 0.56, "learning_rate": 9.5e-05, "loss": 2.8867, "step": 950 }, { "epoch": 0.59, "learning_rate": 0.0001, "loss": 2.6323, "step": 1000 }, { "epoch": 0.62, "learning_rate": 9.932478055367996e-05, "loss": 2.3103, "step": 1050 }, { "epoch": 0.65, "learning_rate": 9.86495611073599e-05, "loss": 2.0185, "step": 1100 }, { "epoch": 0.68, "learning_rate": 9.797434166103985e-05, "loss": 1.7382, "step": 1150 }, { "epoch": 0.71, "learning_rate": 9.729912221471978e-05, "loss": 1.5627, "step": 1200 }, { "epoch": 0.74, "learning_rate": 9.662390276839974e-05, "loss": 1.4679, "step": 1250 }, { "epoch": 0.77, "learning_rate": 9.594868332207968e-05, "loss": 1.4046, "step": 1300 }, { "epoch": 0.8, "learning_rate": 9.527346387575963e-05, "loss": 1.3544, "step": 1350 }, { "epoch": 0.83, "learning_rate": 9.459824442943957e-05, "loss": 1.325, "step": 1400 }, { "epoch": 0.86, "learning_rate": 9.392302498311952e-05, "loss": 1.2984, "step": 1450 }, { "epoch": 0.89, "learning_rate": 9.324780553679947e-05, "loss": 1.2735, "step": 1500 }, { "epoch": 0.92, "learning_rate": 9.25725860904794e-05, "loss": 1.2617, "step": 1550 }, { "epoch": 0.95, "learning_rate": 9.189736664415936e-05, "loss": 1.2475, "step": 1600 }, { "epoch": 0.98, "learning_rate": 9.12221471978393e-05, "loss": 1.2348, "step": 1650 }, { "epoch": 1.01, "learning_rate": 9.054692775151925e-05, "loss": 1.2387, "step": 1700 }, { "epoch": 1.04, "learning_rate": 8.98717083051992e-05, "loss": 1.2149, "step": 1750 }, { "epoch": 1.07, "learning_rate": 8.919648885887914e-05, "loss": 1.2004, "step": 1800 }, { "epoch": 1.1, "learning_rate": 8.852126941255908e-05, "loss": 1.1918, "step": 1850 }, { "epoch": 1.13, "learning_rate": 8.784604996623903e-05, "loss": 1.1825, "step": 1900 }, { "epoch": 1.16, "learning_rate": 8.717083051991897e-05, "loss": 1.1794, "step": 1950 }, { "epoch": 1.19, "learning_rate": 8.649561107359893e-05, "loss": 1.175, "step": 2000 }, { "epoch": 1.22, "learning_rate": 8.582039162727888e-05, "loss": 1.1657, "step": 2050 }, { "epoch": 1.25, "learning_rate": 8.514517218095882e-05, "loss": 1.1621, "step": 2100 }, { "epoch": 1.28, "learning_rate": 8.446995273463876e-05, "loss": 1.1564, "step": 2150 }, { "epoch": 1.31, "learning_rate": 8.37947332883187e-05, "loss": 1.149, "step": 2200 }, { "epoch": 1.34, "learning_rate": 8.311951384199866e-05, "loss": 1.1478, "step": 2250 }, { "epoch": 1.37, "learning_rate": 8.24442943956786e-05, "loss": 1.1449, "step": 2300 }, { "epoch": 1.4, "learning_rate": 8.176907494935855e-05, "loss": 1.1393, "step": 2350 }, { "epoch": 1.43, "learning_rate": 8.109385550303849e-05, "loss": 1.135, "step": 2400 }, { "epoch": 1.46, "learning_rate": 8.041863605671843e-05, "loss": 1.1308, "step": 2450 }, { "epoch": 1.49, "learning_rate": 7.974341661039839e-05, "loss": 1.1282, "step": 2500 }, { "epoch": 1.52, "learning_rate": 7.906819716407833e-05, "loss": 1.1197, "step": 2550 }, { "epoch": 1.55, "learning_rate": 7.839297771775828e-05, "loss": 1.1227, "step": 2600 }, { "epoch": 1.58, "learning_rate": 7.771775827143822e-05, "loss": 1.1192, "step": 2650 }, { "epoch": 1.61, "learning_rate": 7.704253882511818e-05, "loss": 1.1109, "step": 2700 }, { "epoch": 1.64, "learning_rate": 7.63673193787981e-05, "loss": 1.1166, "step": 2750 }, { "epoch": 1.67, "learning_rate": 7.569209993247806e-05, "loss": 1.1132, "step": 2800 }, { "epoch": 1.7, "learning_rate": 7.5016880486158e-05, "loss": 1.1043, "step": 2850 }, { "epoch": 1.72, "learning_rate": 7.434166103983795e-05, "loss": 1.1085, "step": 2900 }, { "epoch": 1.75, "learning_rate": 7.366644159351789e-05, "loss": 1.1018, "step": 2950 }, { "epoch": 1.78, "learning_rate": 7.299122214719785e-05, "loss": 1.1034, "step": 3000 }, { "epoch": 1.81, "learning_rate": 7.23160027008778e-05, "loss": 1.0954, "step": 3050 }, { "epoch": 1.84, "learning_rate": 7.164078325455773e-05, "loss": 1.0973, "step": 3100 }, { "epoch": 1.87, "learning_rate": 7.096556380823768e-05, "loss": 1.0971, "step": 3150 }, { "epoch": 1.9, "learning_rate": 7.029034436191762e-05, "loss": 1.0893, "step": 3200 }, { "epoch": 1.93, "learning_rate": 6.961512491559758e-05, "loss": 1.0891, "step": 3250 }, { "epoch": 1.96, "learning_rate": 6.893990546927752e-05, "loss": 1.0866, "step": 3300 }, { "epoch": 1.99, "learning_rate": 6.826468602295747e-05, "loss": 1.0938, "step": 3350 }, { "epoch": 2.02, "learning_rate": 6.758946657663741e-05, "loss": 1.1025, "step": 3400 }, { "epoch": 2.05, "learning_rate": 6.691424713031735e-05, "loss": 1.0858, "step": 3450 }, { "epoch": 2.08, "learning_rate": 6.623902768399731e-05, "loss": 1.0835, "step": 3500 }, { "epoch": 2.11, "learning_rate": 6.556380823767725e-05, "loss": 1.0793, "step": 3550 }, { "epoch": 2.14, "learning_rate": 6.48885887913572e-05, "loss": 1.0754, "step": 3600 }, { "epoch": 2.17, "learning_rate": 6.421336934503714e-05, "loss": 1.073, "step": 3650 }, { "epoch": 2.2, "learning_rate": 6.353814989871708e-05, "loss": 1.0774, "step": 3700 }, { "epoch": 2.23, "learning_rate": 6.286293045239702e-05, "loss": 1.0763, "step": 3750 }, { "epoch": 2.26, "learning_rate": 6.218771100607698e-05, "loss": 1.0693, "step": 3800 }, { "epoch": 2.29, "learning_rate": 6.151249155975692e-05, "loss": 1.0736, "step": 3850 }, { "epoch": 2.32, "learning_rate": 6.083727211343687e-05, "loss": 1.0762, "step": 3900 }, { "epoch": 2.35, "learning_rate": 6.016205266711682e-05, "loss": 1.0691, "step": 3950 }, { "epoch": 2.38, "learning_rate": 5.948683322079677e-05, "loss": 1.0668, "step": 4000 }, { "epoch": 2.41, "learning_rate": 5.88116137744767e-05, "loss": 1.0638, "step": 4050 }, { "epoch": 2.44, "learning_rate": 5.813639432815665e-05, "loss": 1.0671, "step": 4100 }, { "epoch": 2.47, "learning_rate": 5.7461174881836596e-05, "loss": 1.0595, "step": 4150 }, { "epoch": 2.5, "learning_rate": 5.6785955435516544e-05, "loss": 1.0606, "step": 4200 }, { "epoch": 2.53, "learning_rate": 5.61107359891965e-05, "loss": 1.0621, "step": 4250 }, { "epoch": 2.56, "learning_rate": 5.5435516542876445e-05, "loss": 1.0633, "step": 4300 }, { "epoch": 2.59, "learning_rate": 5.476029709655638e-05, "loss": 1.056, "step": 4350 }, { "epoch": 2.62, "learning_rate": 5.4085077650236326e-05, "loss": 1.0598, "step": 4400 }, { "epoch": 2.65, "learning_rate": 5.3409858203916274e-05, "loss": 1.0532, "step": 4450 }, { "epoch": 2.68, "learning_rate": 5.273463875759622e-05, "loss": 1.0536, "step": 4500 }, { "epoch": 2.71, "learning_rate": 5.205941931127617e-05, "loss": 1.0522, "step": 4550 }, { "epoch": 2.74, "learning_rate": 5.1384199864956116e-05, "loss": 1.0478, "step": 4600 }, { "epoch": 2.77, "learning_rate": 5.0708980418636057e-05, "loss": 1.0514, "step": 4650 }, { "epoch": 2.8, "learning_rate": 5.0033760972316004e-05, "loss": 1.0526, "step": 4700 }, { "epoch": 2.83, "learning_rate": 4.935854152599595e-05, "loss": 1.0476, "step": 4750 }, { "epoch": 2.86, "learning_rate": 4.86833220796759e-05, "loss": 1.0459, "step": 4800 }, { "epoch": 2.88, "learning_rate": 4.8008102633355846e-05, "loss": 1.0463, "step": 4850 }, { "epoch": 2.91, "learning_rate": 4.733288318703579e-05, "loss": 1.0469, "step": 4900 }, { "epoch": 2.94, "learning_rate": 4.6657663740715734e-05, "loss": 1.0444, "step": 4950 }, { "epoch": 2.97, "learning_rate": 4.598244429439568e-05, "loss": 1.0461, "step": 5000 }, { "epoch": 3.0, "learning_rate": 4.530722484807562e-05, "loss": 1.0613, "step": 5050 }, { "epoch": 3.03, "learning_rate": 4.463200540175557e-05, "loss": 1.0418, "step": 5100 }, { "epoch": 3.06, "learning_rate": 4.395678595543552e-05, "loss": 1.0415, "step": 5150 }, { "epoch": 3.09, "learning_rate": 4.3281566509115464e-05, "loss": 1.0401, "step": 5200 }, { "epoch": 3.12, "learning_rate": 4.260634706279541e-05, "loss": 1.0404, "step": 5250 }, { "epoch": 3.15, "learning_rate": 4.193112761647536e-05, "loss": 1.0372, "step": 5300 }, { "epoch": 3.18, "learning_rate": 4.125590817015531e-05, "loss": 1.0423, "step": 5350 }, { "epoch": 3.21, "learning_rate": 4.058068872383525e-05, "loss": 1.0343, "step": 5400 }, { "epoch": 3.24, "learning_rate": 3.9905469277515195e-05, "loss": 1.0374, "step": 5450 }, { "epoch": 3.27, "learning_rate": 3.923024983119514e-05, "loss": 1.0355, "step": 5500 }, { "epoch": 3.3, "learning_rate": 3.855503038487508e-05, "loss": 1.0312, "step": 5550 }, { "epoch": 3.33, "learning_rate": 3.787981093855503e-05, "loss": 1.0374, "step": 5600 }, { "epoch": 3.36, "learning_rate": 3.720459149223498e-05, "loss": 1.034, "step": 5650 }, { "epoch": 3.39, "learning_rate": 3.6529372045914925e-05, "loss": 1.0327, "step": 5700 }, { "epoch": 3.42, "learning_rate": 3.585415259959487e-05, "loss": 1.0317, "step": 5750 }, { "epoch": 3.45, "learning_rate": 3.517893315327482e-05, "loss": 1.0317, "step": 5800 }, { "epoch": 3.48, "learning_rate": 3.450371370695476e-05, "loss": 1.028, "step": 5850 }, { "epoch": 3.51, "learning_rate": 3.382849426063471e-05, "loss": 1.027, "step": 5900 }, { "epoch": 3.54, "learning_rate": 3.3153274814314655e-05, "loss": 1.0253, "step": 5950 }, { "epoch": 3.57, "learning_rate": 3.2478055367994596e-05, "loss": 1.0291, "step": 6000 }, { "epoch": 3.6, "learning_rate": 3.180283592167454e-05, "loss": 1.0266, "step": 6050 }, { "epoch": 3.63, "learning_rate": 3.112761647535449e-05, "loss": 1.0311, "step": 6100 }, { "epoch": 3.66, "learning_rate": 3.0452397029034435e-05, "loss": 1.0281, "step": 6150 }, { "epoch": 3.69, "learning_rate": 2.9777177582714382e-05, "loss": 1.0238, "step": 6200 }, { "epoch": 3.72, "learning_rate": 2.910195813639433e-05, "loss": 1.0248, "step": 6250 }, { "epoch": 3.75, "learning_rate": 2.8426738690074277e-05, "loss": 1.0235, "step": 6300 }, { "epoch": 3.78, "learning_rate": 2.775151924375422e-05, "loss": 1.0252, "step": 6350 }, { "epoch": 3.81, "learning_rate": 2.7076299797434168e-05, "loss": 1.0221, "step": 6400 }, { "epoch": 3.84, "learning_rate": 2.6401080351114116e-05, "loss": 1.0204, "step": 6450 }, { "epoch": 3.87, "learning_rate": 2.572586090479406e-05, "loss": 1.0205, "step": 6500 }, { "epoch": 3.9, "learning_rate": 2.5050641458474007e-05, "loss": 1.0207, "step": 6550 }, { "epoch": 3.93, "learning_rate": 2.437542201215395e-05, "loss": 1.0165, "step": 6600 }, { "epoch": 3.96, "learning_rate": 2.37002025658339e-05, "loss": 1.0182, "step": 6650 }, { "epoch": 3.99, "learning_rate": 2.3024983119513842e-05, "loss": 1.0165, "step": 6700 }, { "epoch": 4.02, "learning_rate": 2.234976367319379e-05, "loss": 1.0316, "step": 6750 }, { "epoch": 4.05, "learning_rate": 2.1674544226873737e-05, "loss": 1.0146, "step": 6800 }, { "epoch": 4.07, "learning_rate": 2.099932478055368e-05, "loss": 1.015, "step": 6850 }, { "epoch": 4.1, "learning_rate": 2.0324105334233625e-05, "loss": 1.0156, "step": 6900 }, { "epoch": 4.13, "learning_rate": 1.9648885887913573e-05, "loss": 1.016, "step": 6950 }, { "epoch": 4.16, "learning_rate": 1.8973666441593517e-05, "loss": 1.0149, "step": 7000 }, { "epoch": 4.19, "learning_rate": 1.8298446995273467e-05, "loss": 1.0127, "step": 7050 }, { "epoch": 4.22, "learning_rate": 1.762322754895341e-05, "loss": 1.0085, "step": 7100 }, { "epoch": 4.25, "learning_rate": 1.6948008102633355e-05, "loss": 1.0151, "step": 7150 }, { "epoch": 4.28, "learning_rate": 1.6272788656313303e-05, "loss": 1.0136, "step": 7200 }, { "epoch": 4.31, "learning_rate": 1.5597569209993247e-05, "loss": 1.0077, "step": 7250 }, { "epoch": 4.34, "learning_rate": 1.4922349763673194e-05, "loss": 1.0103, "step": 7300 }, { "epoch": 4.37, "learning_rate": 1.4247130317353142e-05, "loss": 1.0155, "step": 7350 }, { "epoch": 4.4, "learning_rate": 1.3571910871033086e-05, "loss": 1.0098, "step": 7400 }, { "epoch": 4.43, "learning_rate": 1.2896691424713031e-05, "loss": 1.0093, "step": 7450 }, { "epoch": 4.46, "learning_rate": 1.2221471978392979e-05, "loss": 1.0112, "step": 7500 }, { "epoch": 4.49, "learning_rate": 1.1546252532072925e-05, "loss": 1.0075, "step": 7550 }, { "epoch": 4.52, "learning_rate": 1.087103308575287e-05, "loss": 1.0086, "step": 7600 }, { "epoch": 4.55, "learning_rate": 1.0195813639432816e-05, "loss": 1.0072, "step": 7650 }, { "epoch": 4.58, "learning_rate": 9.520594193112763e-06, "loss": 1.0057, "step": 7700 }, { "epoch": 4.61, "learning_rate": 8.845374746792707e-06, "loss": 1.0101, "step": 7750 }, { "epoch": 4.64, "learning_rate": 8.170155300472653e-06, "loss": 1.006, "step": 7800 }, { "epoch": 4.67, "learning_rate": 7.4949358541526005e-06, "loss": 1.0079, "step": 7850 }, { "epoch": 4.7, "learning_rate": 6.819716407832546e-06, "loss": 1.0066, "step": 7900 }, { "epoch": 4.73, "learning_rate": 6.144496961512492e-06, "loss": 1.0113, "step": 7950 }, { "epoch": 4.76, "learning_rate": 5.4692775151924376e-06, "loss": 1.0065, "step": 8000 } ], "max_steps": 8405, "num_train_epochs": 5, "total_flos": 5.140843815167534e+17, "trial_name": null, "trial_params": null }