{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 44157, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 1.9774441198450984e-05, "loss": 2.5305, "step": 500 }, { "epoch": 0.07, "learning_rate": 1.9548429467581584e-05, "loss": 2.4174, "step": 1000 }, { "epoch": 0.1, "learning_rate": 1.9322417736712188e-05, "loss": 2.4269, "step": 1500 }, { "epoch": 0.14, "learning_rate": 1.909595307652241e-05, "loss": 2.3693, "step": 2000 }, { "epoch": 0.17, "learning_rate": 1.8869488416332635e-05, "loss": 2.3566, "step": 2500 }, { "epoch": 0.2, "learning_rate": 1.8643023756142857e-05, "loss": 2.3586, "step": 3000 }, { "epoch": 0.24, "learning_rate": 1.841655909595308e-05, "loss": 2.3499, "step": 3500 }, { "epoch": 0.27, "learning_rate": 1.81900944357633e-05, "loss": 2.3594, "step": 4000 }, { "epoch": 0.31, "learning_rate": 1.7963629775573522e-05, "loss": 2.345, "step": 4500 }, { "epoch": 0.34, "learning_rate": 1.7737165115383744e-05, "loss": 2.321, "step": 5000 }, { "epoch": 0.37, "learning_rate": 1.7510700455193966e-05, "loss": 2.3003, "step": 5500 }, { "epoch": 0.41, "learning_rate": 1.728423579500419e-05, "loss": 2.3128, "step": 6000 }, { "epoch": 0.44, "learning_rate": 1.7058224064134795e-05, "loss": 2.3011, "step": 6500 }, { "epoch": 0.48, "learning_rate": 1.6831759403945017e-05, "loss": 2.3096, "step": 7000 }, { "epoch": 0.51, "learning_rate": 1.660529474375524e-05, "loss": 2.3256, "step": 7500 }, { "epoch": 0.54, "learning_rate": 1.637883008356546e-05, "loss": 2.2922, "step": 8000 }, { "epoch": 0.58, "learning_rate": 1.6152365423375686e-05, "loss": 2.3021, "step": 8500 }, { "epoch": 0.61, "learning_rate": 1.5925900763185907e-05, "loss": 2.3011, "step": 9000 }, { "epoch": 0.65, "learning_rate": 1.569943610299613e-05, "loss": 2.2814, "step": 9500 }, { "epoch": 0.68, "learning_rate": 1.547297144280635e-05, "loss": 2.2816, "step": 10000 }, { "epoch": 0.71, "learning_rate": 1.5246959711936953e-05, "loss": 2.2801, "step": 10500 }, { "epoch": 0.75, "learning_rate": 1.5020947981067555e-05, "loss": 2.2993, "step": 11000 }, { "epoch": 0.78, "learning_rate": 1.4794483320877777e-05, "loss": 2.3057, "step": 11500 }, { "epoch": 0.82, "learning_rate": 1.4568018660688002e-05, "loss": 2.2701, "step": 12000 }, { "epoch": 0.85, "learning_rate": 1.4341554000498224e-05, "loss": 2.2742, "step": 12500 }, { "epoch": 0.88, "learning_rate": 1.4115089340308446e-05, "loss": 2.2872, "step": 13000 }, { "epoch": 0.92, "learning_rate": 1.388862468011867e-05, "loss": 2.256, "step": 13500 }, { "epoch": 0.95, "learning_rate": 1.3662160019928891e-05, "loss": 2.2959, "step": 14000 }, { "epoch": 0.99, "learning_rate": 1.3435695359739113e-05, "loss": 2.2583, "step": 14500 }, { "epoch": 1.0, "eval_loss": 2.116374969482422, "eval_runtime": 353.9363, "eval_samples_per_second": 166.341, "eval_steps_per_second": 10.397, "step": 14719 }, { "epoch": 1.02, "learning_rate": 1.3209230699549337e-05, "loss": 2.2496, "step": 15000 }, { "epoch": 1.05, "learning_rate": 1.2983218968679939e-05, "loss": 2.2561, "step": 15500 }, { "epoch": 1.09, "learning_rate": 1.275675430849016e-05, "loss": 2.2408, "step": 16000 }, { "epoch": 1.12, "learning_rate": 1.2530289648300384e-05, "loss": 2.2835, "step": 16500 }, { "epoch": 1.15, "learning_rate": 1.2303824988110606e-05, "loss": 2.2636, "step": 17000 }, { "epoch": 1.19, "learning_rate": 1.2077360327920828e-05, "loss": 2.2751, "step": 17500 }, { "epoch": 1.22, "learning_rate": 1.1850895667731051e-05, "loss": 2.279, "step": 18000 }, { "epoch": 1.26, "learning_rate": 1.1624431007541275e-05, "loss": 2.27, "step": 18500 }, { "epoch": 1.29, "learning_rate": 1.1398419276671877e-05, "loss": 2.2436, "step": 19000 }, { "epoch": 1.32, "learning_rate": 1.1171954616482099e-05, "loss": 2.2575, "step": 19500 }, { "epoch": 1.36, "learning_rate": 1.094548995629232e-05, "loss": 2.2669, "step": 20000 }, { "epoch": 1.39, "learning_rate": 1.0719025296102544e-05, "loss": 2.269, "step": 20500 }, { "epoch": 1.43, "learning_rate": 1.0492560635912768e-05, "loss": 2.2429, "step": 21000 }, { "epoch": 1.46, "learning_rate": 1.026609597572299e-05, "loss": 2.2424, "step": 21500 }, { "epoch": 1.49, "learning_rate": 1.0039631315533211e-05, "loss": 2.2549, "step": 22000 }, { "epoch": 1.53, "learning_rate": 9.813166655343435e-06, "loss": 2.2623, "step": 22500 }, { "epoch": 1.56, "learning_rate": 9.587154924474037e-06, "loss": 2.2634, "step": 23000 }, { "epoch": 1.6, "learning_rate": 9.360690264284259e-06, "loss": 2.2674, "step": 23500 }, { "epoch": 1.63, "learning_rate": 9.134225604094482e-06, "loss": 2.2611, "step": 24000 }, { "epoch": 1.66, "learning_rate": 8.907760943904704e-06, "loss": 2.2418, "step": 24500 }, { "epoch": 1.7, "learning_rate": 8.681749213035306e-06, "loss": 2.2493, "step": 25000 }, { "epoch": 1.73, "learning_rate": 8.45528455284553e-06, "loss": 2.2259, "step": 25500 }, { "epoch": 1.77, "learning_rate": 8.229272821976131e-06, "loss": 2.2296, "step": 26000 }, { "epoch": 1.8, "learning_rate": 8.002808161786353e-06, "loss": 2.2258, "step": 26500 }, { "epoch": 1.83, "learning_rate": 7.776343501596577e-06, "loss": 2.2407, "step": 27000 }, { "epoch": 1.87, "learning_rate": 7.5498788414067996e-06, "loss": 2.2242, "step": 27500 }, { "epoch": 1.9, "learning_rate": 7.323414181217021e-06, "loss": 2.2227, "step": 28000 }, { "epoch": 1.94, "learning_rate": 7.096949521027244e-06, "loss": 2.2264, "step": 28500 }, { "epoch": 1.97, "learning_rate": 6.870484860837466e-06, "loss": 2.2649, "step": 29000 }, { "epoch": 2.0, "eval_loss": 2.092484951019287, "eval_runtime": 353.9215, "eval_samples_per_second": 166.348, "eval_steps_per_second": 10.398, "step": 29438 }, { "epoch": 2.0, "learning_rate": 6.6440202006476895e-06, "loss": 2.2415, "step": 29500 }, { "epoch": 2.04, "learning_rate": 6.417555540457911e-06, "loss": 2.2354, "step": 30000 }, { "epoch": 2.07, "learning_rate": 6.191090880268135e-06, "loss": 2.2296, "step": 30500 }, { "epoch": 2.11, "learning_rate": 5.965079149398737e-06, "loss": 2.241, "step": 31000 }, { "epoch": 2.14, "learning_rate": 5.738614489208959e-06, "loss": 2.2279, "step": 31500 }, { "epoch": 2.17, "learning_rate": 5.512149829019182e-06, "loss": 2.2405, "step": 32000 }, { "epoch": 2.21, "learning_rate": 5.285685168829404e-06, "loss": 2.2379, "step": 32500 }, { "epoch": 2.24, "learning_rate": 5.059673437960007e-06, "loss": 2.2386, "step": 33000 }, { "epoch": 2.28, "learning_rate": 4.833208777770229e-06, "loss": 2.2298, "step": 33500 }, { "epoch": 2.31, "learning_rate": 4.6067441175804515e-06, "loss": 2.2259, "step": 34000 }, { "epoch": 2.34, "learning_rate": 4.380279457390674e-06, "loss": 2.2537, "step": 34500 }, { "epoch": 2.38, "learning_rate": 4.153814797200897e-06, "loss": 2.2521, "step": 35000 }, { "epoch": 2.41, "learning_rate": 3.927803066331499e-06, "loss": 2.2049, "step": 35500 }, { "epoch": 2.45, "learning_rate": 3.701338406141722e-06, "loss": 2.2317, "step": 36000 }, { "epoch": 2.48, "learning_rate": 3.4748737459519442e-06, "loss": 2.2476, "step": 36500 }, { "epoch": 2.51, "learning_rate": 3.248409085762167e-06, "loss": 2.2435, "step": 37000 }, { "epoch": 2.55, "learning_rate": 3.0219444255723896e-06, "loss": 2.2523, "step": 37500 }, { "epoch": 2.58, "learning_rate": 2.795479765382612e-06, "loss": 2.2264, "step": 38000 }, { "epoch": 2.62, "learning_rate": 2.5694680345132143e-06, "loss": 2.2287, "step": 38500 }, { "epoch": 2.65, "learning_rate": 2.343003374323437e-06, "loss": 2.2238, "step": 39000 }, { "epoch": 2.68, "learning_rate": 2.116991643454039e-06, "loss": 2.2383, "step": 39500 }, { "epoch": 2.72, "learning_rate": 1.8905269832642617e-06, "loss": 2.234, "step": 40000 }, { "epoch": 2.75, "learning_rate": 1.6640623230744844e-06, "loss": 2.2298, "step": 40500 }, { "epoch": 2.79, "learning_rate": 1.4375976628847069e-06, "loss": 2.2237, "step": 41000 }, { "epoch": 2.82, "learning_rate": 1.2111330026949296e-06, "loss": 2.2489, "step": 41500 }, { "epoch": 2.85, "learning_rate": 9.846683425051522e-07, "loss": 2.2263, "step": 42000 }, { "epoch": 2.89, "learning_rate": 7.582036823153747e-07, "loss": 2.2223, "step": 42500 }, { "epoch": 2.92, "learning_rate": 5.317390221255973e-07, "loss": 2.2229, "step": 43000 }, { "epoch": 2.96, "learning_rate": 3.0527436193581997e-07, "loss": 2.2121, "step": 43500 }, { "epoch": 2.99, "learning_rate": 7.880970174604252e-08, "loss": 2.209, "step": 44000 }, { "epoch": 3.0, "eval_loss": 2.086761713027954, "eval_runtime": 355.3374, "eval_samples_per_second": 165.685, "eval_steps_per_second": 10.356, "step": 44157 } ], "max_steps": 44157, "num_train_epochs": 3, "total_flos": 6.916093810478285e+16, "trial_name": null, "trial_params": null }