{ "best_metric": null, "best_model_checkpoint": null, "epoch": 12.0, "global_step": 57432, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "learning_rate": 4.826577517760134e-05, "loss": 2.376, "step": 500 }, { "epoch": 0.21, "learning_rate": 4.652458559687979e-05, "loss": 1.9479, "step": 1000 }, { "epoch": 0.31, "learning_rate": 4.478339601615824e-05, "loss": 1.8326, "step": 1500 }, { "epoch": 0.42, "learning_rate": 4.304568881459813e-05, "loss": 1.7996, "step": 2000 }, { "epoch": 0.52, "learning_rate": 4.1304499233876586e-05, "loss": 1.7476, "step": 2500 }, { "epoch": 0.63, "learning_rate": 3.956330965315504e-05, "loss": 1.7537, "step": 3000 }, { "epoch": 0.73, "learning_rate": 3.782212007243349e-05, "loss": 1.7133, "step": 3500 }, { "epoch": 0.84, "learning_rate": 3.608093049171194e-05, "loss": 1.6888, "step": 4000 }, { "epoch": 0.94, "learning_rate": 3.434322329015183e-05, "loss": 1.6906, "step": 4500 }, { "epoch": 1.0, "eval_loss": 1.6714558601379395, "eval_runtime": 47.5103, "eval_samples_per_second": 9.577, "eval_steps_per_second": 1.2, "step": 4786 }, { "epoch": 1.04, "learning_rate": 3.2602033709430284e-05, "loss": 1.6771, "step": 5000 }, { "epoch": 1.15, "learning_rate": 3.086084412870874e-05, "loss": 1.6319, "step": 5500 }, { "epoch": 1.25, "learning_rate": 2.9119654547987185e-05, "loss": 1.6327, "step": 6000 }, { "epoch": 1.36, "learning_rate": 2.737846496726564e-05, "loss": 1.6368, "step": 6500 }, { "epoch": 1.46, "learning_rate": 2.563727538654409e-05, "loss": 1.6329, "step": 7000 }, { "epoch": 1.57, "learning_rate": 2.3899568184983983e-05, "loss": 1.6143, "step": 7500 }, { "epoch": 1.67, "learning_rate": 2.2158378604262433e-05, "loss": 1.6044, "step": 8000 }, { "epoch": 1.78, "learning_rate": 2.0417189023540884e-05, "loss": 1.6295, "step": 8500 }, { "epoch": 1.88, "learning_rate": 1.8675999442819337e-05, "loss": 1.6023, "step": 9000 }, { "epoch": 1.98, "learning_rate": 1.6934809862097788e-05, "loss": 1.6021, "step": 9500 }, { "epoch": 2.0, "eval_loss": 1.6306262016296387, "eval_runtime": 43.7183, "eval_samples_per_second": 10.408, "eval_steps_per_second": 1.304, "step": 9572 }, { "epoch": 2.09, "learning_rate": 1.5193620281376237e-05, "loss": 1.5842, "step": 10000 }, { "epoch": 2.19, "learning_rate": 1.3452430700654689e-05, "loss": 1.587, "step": 10500 }, { "epoch": 2.3, "learning_rate": 1.1714723499094582e-05, "loss": 1.5566, "step": 11000 }, { "epoch": 2.4, "learning_rate": 9.973533918373034e-06, "loss": 1.6022, "step": 11500 }, { "epoch": 2.51, "learning_rate": 8.232344337651483e-06, "loss": 1.5785, "step": 12000 }, { "epoch": 2.61, "learning_rate": 6.491154756929935e-06, "loss": 1.5775, "step": 12500 }, { "epoch": 2.72, "learning_rate": 4.753447555369829e-06, "loss": 1.5684, "step": 13000 }, { "epoch": 2.82, "learning_rate": 3.01225797464828e-06, "loss": 1.5719, "step": 13500 }, { "epoch": 2.93, "learning_rate": 1.2710683939267307e-06, "loss": 1.5819, "step": 14000 }, { "epoch": 3.0, "eval_loss": 1.6228220462799072, "eval_runtime": 42.6413, "eval_samples_per_second": 10.67, "eval_steps_per_second": 1.337, "step": 14358 }, { "epoch": 3.0, "step": 14358, "total_flos": 1.5006523981824e+16, "train_loss": 1.6758505107466766, "train_runtime": 8429.8505, "train_samples_per_second": 3.406, "train_steps_per_second": 1.703 }, { "epoch": 3.03, "learning_rate": 4.987811672934949e-05, "loss": 1.5627, "step": 14500 }, { "epoch": 3.13, "learning_rate": 4.944368992895947e-05, "loss": 1.5686, "step": 15000 }, { "epoch": 3.24, "learning_rate": 4.900839253377908e-05, "loss": 1.5776, "step": 15500 }, { "epoch": 3.34, "learning_rate": 4.85730951385987e-05, "loss": 1.5699, "step": 16000 }, { "epoch": 3.45, "learning_rate": 4.8137797743418304e-05, "loss": 1.5476, "step": 16500 }, { "epoch": 3.55, "learning_rate": 4.770250034823792e-05, "loss": 1.5448, "step": 17000 }, { "epoch": 3.66, "learning_rate": 4.726720295305753e-05, "loss": 1.5656, "step": 17500 }, { "epoch": 3.76, "learning_rate": 4.683277615266751e-05, "loss": 1.5694, "step": 18000 }, { "epoch": 3.87, "learning_rate": 4.6397478757487115e-05, "loss": 1.542, "step": 18500 }, { "epoch": 3.97, "learning_rate": 4.596218136230673e-05, "loss": 1.5474, "step": 19000 }, { "epoch": 4.0, "eval_loss": 1.5994045734405518, "eval_runtime": 39.2637, "eval_samples_per_second": 11.588, "eval_steps_per_second": 1.452, "step": 19144 }, { "epoch": 4.07, "learning_rate": 4.552688396712634e-05, "loss": 1.5336, "step": 19500 }, { "epoch": 4.18, "learning_rate": 4.5091586571945955e-05, "loss": 1.5259, "step": 20000 }, { "epoch": 4.28, "learning_rate": 4.465628917676557e-05, "loss": 1.495, "step": 20500 }, { "epoch": 4.39, "learning_rate": 4.422099178158518e-05, "loss": 1.5003, "step": 21000 }, { "epoch": 4.49, "learning_rate": 4.378656498119515e-05, "loss": 1.4912, "step": 21500 }, { "epoch": 4.6, "learning_rate": 4.3351267586014767e-05, "loss": 1.491, "step": 22000 }, { "epoch": 4.7, "learning_rate": 4.291597019083438e-05, "loss": 1.4888, "step": 22500 }, { "epoch": 4.81, "learning_rate": 4.2480672795653994e-05, "loss": 1.5098, "step": 23000 }, { "epoch": 4.91, "learning_rate": 4.2046245995263964e-05, "loss": 1.5011, "step": 23500 }, { "epoch": 5.0, "eval_loss": 1.5889110565185547, "eval_runtime": 42.2475, "eval_samples_per_second": 10.77, "eval_steps_per_second": 1.349, "step": 23930 }, { "epoch": 5.01, "learning_rate": 4.161094860008358e-05, "loss": 1.4888, "step": 24000 }, { "epoch": 5.12, "learning_rate": 4.117565120490319e-05, "loss": 1.4664, "step": 24500 }, { "epoch": 5.22, "learning_rate": 4.0740353809722805e-05, "loss": 1.455, "step": 25000 }, { "epoch": 5.33, "learning_rate": 4.030505641454242e-05, "loss": 1.4491, "step": 25500 }, { "epoch": 5.43, "learning_rate": 3.986975901936203e-05, "loss": 1.4543, "step": 26000 }, { "epoch": 5.54, "learning_rate": 3.9435332218972e-05, "loss": 1.4588, "step": 26500 }, { "epoch": 5.64, "learning_rate": 3.9000034823791616e-05, "loss": 1.446, "step": 27000 }, { "epoch": 5.75, "learning_rate": 3.856473742861123e-05, "loss": 1.4386, "step": 27500 }, { "epoch": 5.85, "learning_rate": 3.812944003343084e-05, "loss": 1.4473, "step": 28000 }, { "epoch": 5.95, "learning_rate": 3.7694142638250456e-05, "loss": 1.4724, "step": 28500 }, { "epoch": 6.0, "eval_loss": 1.5893619060516357, "eval_runtime": 39.6175, "eval_samples_per_second": 11.485, "eval_steps_per_second": 1.439, "step": 28716 }, { "epoch": 6.06, "learning_rate": 3.725884524307007e-05, "loss": 1.4439, "step": 29000 }, { "epoch": 6.16, "learning_rate": 3.6823547847889676e-05, "loss": 1.4021, "step": 29500 }, { "epoch": 6.27, "learning_rate": 3.6388250452709297e-05, "loss": 1.4147, "step": 30000 }, { "epoch": 6.37, "learning_rate": 3.595382365231927e-05, "loss": 1.4157, "step": 30500 }, { "epoch": 6.48, "learning_rate": 3.551852625713888e-05, "loss": 1.417, "step": 31000 }, { "epoch": 6.58, "learning_rate": 3.508322886195849e-05, "loss": 1.4094, "step": 31500 }, { "epoch": 6.69, "learning_rate": 3.464793146677811e-05, "loss": 1.4209, "step": 32000 }, { "epoch": 6.79, "learning_rate": 3.4212634071597714e-05, "loss": 1.415, "step": 32500 }, { "epoch": 6.9, "learning_rate": 3.377907786599805e-05, "loss": 1.4128, "step": 33000 }, { "epoch": 7.0, "learning_rate": 3.334378047081766e-05, "loss": 1.414, "step": 33500 }, { "epoch": 7.0, "eval_loss": 1.5933945178985596, "eval_runtime": 39.5291, "eval_samples_per_second": 11.51, "eval_steps_per_second": 1.442, "step": 33502 }, { "epoch": 7.1, "learning_rate": 3.2908483075637276e-05, "loss": 1.358, "step": 34000 }, { "epoch": 7.21, "learning_rate": 3.247318568045689e-05, "loss": 1.3821, "step": 34500 }, { "epoch": 7.31, "learning_rate": 3.20378882852765e-05, "loss": 1.377, "step": 35000 }, { "epoch": 7.42, "learning_rate": 3.1602590890096116e-05, "loss": 1.3772, "step": 35500 }, { "epoch": 7.52, "learning_rate": 3.116729349491573e-05, "loss": 1.3777, "step": 36000 }, { "epoch": 7.63, "learning_rate": 3.0731996099735337e-05, "loss": 1.3672, "step": 36500 }, { "epoch": 7.73, "learning_rate": 3.0298439894135678e-05, "loss": 1.3949, "step": 37000 }, { "epoch": 7.84, "learning_rate": 2.9864013093745645e-05, "loss": 1.3626, "step": 37500 }, { "epoch": 7.94, "learning_rate": 2.9428715698565262e-05, "loss": 1.3939, "step": 38000 }, { "epoch": 8.0, "eval_loss": 1.6080235242843628, "eval_runtime": 39.1335, "eval_samples_per_second": 11.627, "eval_steps_per_second": 1.457, "step": 38288 }, { "epoch": 8.04, "learning_rate": 2.8993418303384872e-05, "loss": 1.3667, "step": 38500 }, { "epoch": 8.15, "learning_rate": 2.855812090820449e-05, "loss": 1.336, "step": 39000 }, { "epoch": 8.25, "learning_rate": 2.81228235130241e-05, "loss": 1.3212, "step": 39500 }, { "epoch": 8.36, "learning_rate": 2.768752611784371e-05, "loss": 1.3235, "step": 40000 }, { "epoch": 8.46, "learning_rate": 2.7252228722663326e-05, "loss": 1.3649, "step": 40500 }, { "epoch": 8.57, "learning_rate": 2.6816931327482936e-05, "loss": 1.3423, "step": 41000 }, { "epoch": 8.67, "learning_rate": 2.6381633932302553e-05, "loss": 1.3396, "step": 41500 }, { "epoch": 8.78, "learning_rate": 2.5946336537122163e-05, "loss": 1.3528, "step": 42000 }, { "epoch": 8.88, "learning_rate": 2.5511909736732137e-05, "loss": 1.3488, "step": 42500 }, { "epoch": 8.98, "learning_rate": 2.5076612341551747e-05, "loss": 1.3448, "step": 43000 }, { "epoch": 9.0, "eval_loss": 1.626845121383667, "eval_runtime": 39.7529, "eval_samples_per_second": 11.446, "eval_steps_per_second": 1.434, "step": 43074 }, { "epoch": 9.09, "learning_rate": 2.4641314946371364e-05, "loss": 1.3208, "step": 43500 }, { "epoch": 9.19, "learning_rate": 2.4206017551190974e-05, "loss": 1.3107, "step": 44000 }, { "epoch": 9.3, "learning_rate": 2.377159075080095e-05, "loss": 1.3029, "step": 44500 }, { "epoch": 9.4, "learning_rate": 2.3336293355620562e-05, "loss": 1.3034, "step": 45000 }, { "epoch": 9.51, "learning_rate": 2.2900995960440175e-05, "loss": 1.3041, "step": 45500 }, { "epoch": 9.61, "learning_rate": 2.2466569160050146e-05, "loss": 1.3025, "step": 46000 }, { "epoch": 9.72, "learning_rate": 2.203127176486976e-05, "loss": 1.3186, "step": 46500 }, { "epoch": 9.82, "learning_rate": 2.1595974369689373e-05, "loss": 1.3185, "step": 47000 }, { "epoch": 9.92, "learning_rate": 2.1160676974508987e-05, "loss": 1.3227, "step": 47500 }, { "epoch": 10.0, "eval_loss": 1.6497271060943604, "eval_runtime": 39.0443, "eval_samples_per_second": 11.653, "eval_steps_per_second": 1.46, "step": 47860 }, { "epoch": 10.03, "learning_rate": 2.07253795793286e-05, "loss": 1.3038, "step": 48000 }, { "epoch": 10.13, "learning_rate": 2.029008218414821e-05, "loss": 1.2825, "step": 48500 }, { "epoch": 10.24, "learning_rate": 1.9854784788967824e-05, "loss": 1.2657, "step": 49000 }, { "epoch": 10.34, "learning_rate": 1.9419487393787437e-05, "loss": 1.2851, "step": 49500 }, { "epoch": 10.45, "learning_rate": 1.898418999860705e-05, "loss": 1.294, "step": 50000 }, { "epoch": 10.55, "learning_rate": 1.8549763198217025e-05, "loss": 1.2857, "step": 50500 }, { "epoch": 10.66, "learning_rate": 1.8114465803036635e-05, "loss": 1.2665, "step": 51000 }, { "epoch": 10.76, "learning_rate": 1.7679168407856248e-05, "loss": 1.273, "step": 51500 }, { "epoch": 10.87, "learning_rate": 1.724387101267586e-05, "loss": 1.2838, "step": 52000 }, { "epoch": 10.97, "learning_rate": 1.6809444212285836e-05, "loss": 1.2873, "step": 52500 }, { "epoch": 11.0, "eval_loss": 1.673952341079712, "eval_runtime": 38.6787, "eval_samples_per_second": 11.764, "eval_steps_per_second": 1.474, "step": 52646 }, { "epoch": 11.07, "learning_rate": 1.637414681710545e-05, "loss": 1.2753, "step": 53000 }, { "epoch": 11.18, "learning_rate": 1.593884942192506e-05, "loss": 1.2625, "step": 53500 }, { "epoch": 11.28, "learning_rate": 1.5503552026744673e-05, "loss": 1.2471, "step": 54000 }, { "epoch": 11.39, "learning_rate": 1.5068254631564285e-05, "loss": 1.2523, "step": 54500 }, { "epoch": 11.49, "learning_rate": 1.463382783117426e-05, "loss": 1.2534, "step": 55000 }, { "epoch": 11.6, "learning_rate": 1.4199401030784231e-05, "loss": 1.2606, "step": 55500 }, { "epoch": 11.7, "learning_rate": 1.3764103635603845e-05, "loss": 1.244, "step": 56000 }, { "epoch": 11.81, "learning_rate": 1.332967683521382e-05, "loss": 1.2577, "step": 56500 }, { "epoch": 11.91, "learning_rate": 1.2894379440033432e-05, "loss": 1.2671, "step": 57000 }, { "epoch": 12.0, "eval_loss": 1.7010221481323242, "eval_runtime": 38.3243, "eval_samples_per_second": 11.872, "eval_steps_per_second": 1.487, "step": 57432 }, { "epoch": 12.0, "step": 57432, "total_flos": 6.0026095927296e+16, "train_loss": 1.0414019944791026, "train_runtime": 23906.9056, "train_samples_per_second": 4.805, "train_steps_per_second": 2.402 } ], "max_steps": 57432, "num_train_epochs": 12, "total_flos": 6.0026095927296e+16, "trial_name": null, "trial_params": null }