{ "best_metric": null, "best_model_checkpoint": null, "epoch": 116.47855530474041, "global_step": 774000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.68, "learning_rate": 4.988713318284425e-05, "loss": 6.9537, "step": 4500 }, { "epoch": 0.68, "eval_loss": 6.797055244445801, "eval_runtime": 118.7849, "eval_samples_per_second": 103.456, "eval_steps_per_second": 6.474, "step": 4500 }, { "epoch": 1.35, "learning_rate": 4.9774266365688486e-05, "loss": 6.6451, "step": 9000 }, { "epoch": 1.35, "eval_loss": 6.689827919006348, "eval_runtime": 118.3538, "eval_samples_per_second": 103.833, "eval_steps_per_second": 6.497, "step": 9000 }, { "epoch": 2.03, "learning_rate": 4.966139954853273e-05, "loss": 6.5518, "step": 13500 }, { "epoch": 2.03, "eval_loss": 6.630918979644775, "eval_runtime": 118.3302, "eval_samples_per_second": 103.853, "eval_steps_per_second": 6.499, "step": 13500 }, { "epoch": 2.71, "learning_rate": 4.954853273137698e-05, "loss": 6.4713, "step": 18000 }, { "epoch": 2.71, "eval_loss": 6.497533798217773, "eval_runtime": 118.3323, "eval_samples_per_second": 103.852, "eval_steps_per_second": 6.499, "step": 18000 }, { "epoch": 3.39, "learning_rate": 4.9435665914221216e-05, "loss": 6.0827, "step": 22500 }, { "epoch": 3.39, "eval_loss": 5.684892654418945, "eval_runtime": 118.3572, "eval_samples_per_second": 103.83, "eval_steps_per_second": 6.497, "step": 22500 }, { "epoch": 4.06, "learning_rate": 4.932279909706546e-05, "loss": 5.0663, "step": 27000 }, { "epoch": 4.06, "eval_loss": 4.617100715637207, "eval_runtime": 118.3432, "eval_samples_per_second": 103.842, "eval_steps_per_second": 6.498, "step": 27000 }, { "epoch": 4.74, "learning_rate": 4.920993227990971e-05, "loss": 4.3025, "step": 31500 }, { "epoch": 4.74, "eval_loss": 4.159748077392578, "eval_runtime": 118.3398, "eval_samples_per_second": 103.845, "eval_steps_per_second": 6.498, "step": 31500 }, { "epoch": 5.42, "learning_rate": 4.909706546275395e-05, "loss": 3.9214, "step": 36000 }, { "epoch": 5.42, "eval_loss": 3.8544375896453857, "eval_runtime": 118.3517, "eval_samples_per_second": 103.835, "eval_steps_per_second": 6.498, "step": 36000 }, { "epoch": 6.09, "learning_rate": 4.89841986455982e-05, "loss": 3.6779, "step": 40500 }, { "epoch": 6.09, "eval_loss": 3.664363384246826, "eval_runtime": 118.2895, "eval_samples_per_second": 103.889, "eval_steps_per_second": 6.501, "step": 40500 }, { "epoch": 6.77, "learning_rate": 4.887133182844244e-05, "loss": 3.502, "step": 45000 }, { "epoch": 6.77, "eval_loss": 3.5141005516052246, "eval_runtime": 118.2983, "eval_samples_per_second": 103.881, "eval_steps_per_second": 6.501, "step": 45000 }, { "epoch": 7.45, "learning_rate": 4.875846501128669e-05, "loss": 3.366, "step": 49500 }, { "epoch": 7.45, "eval_loss": 3.4036142826080322, "eval_runtime": 118.3098, "eval_samples_per_second": 103.871, "eval_steps_per_second": 6.5, "step": 49500 }, { "epoch": 8.13, "learning_rate": 4.864559819413093e-05, "loss": 3.2695, "step": 54000 }, { "epoch": 8.13, "eval_loss": 3.321831226348877, "eval_runtime": 118.3723, "eval_samples_per_second": 103.817, "eval_steps_per_second": 6.496, "step": 54000 }, { "epoch": 8.8, "learning_rate": 4.853273137697517e-05, "loss": 3.1758, "step": 58500 }, { "epoch": 8.8, "eval_loss": 3.2432045936584473, "eval_runtime": 118.355, "eval_samples_per_second": 103.832, "eval_steps_per_second": 6.497, "step": 58500 }, { "epoch": 9.48, "learning_rate": 4.841986455981942e-05, "loss": 3.1008, "step": 63000 }, { "epoch": 9.48, "eval_loss": 3.186511754989624, "eval_runtime": 118.3602, "eval_samples_per_second": 103.827, "eval_steps_per_second": 6.497, "step": 63000 }, { "epoch": 10.16, "learning_rate": 4.830699774266366e-05, "loss": 3.0354, "step": 67500 }, { "epoch": 10.16, "eval_loss": 3.115652561187744, "eval_runtime": 118.3431, "eval_samples_per_second": 103.842, "eval_steps_per_second": 6.498, "step": 67500 }, { "epoch": 10.84, "learning_rate": 4.81941309255079e-05, "loss": 2.9798, "step": 72000 }, { "epoch": 10.84, "eval_loss": 3.0752041339874268, "eval_runtime": 118.2033, "eval_samples_per_second": 103.965, "eval_steps_per_second": 6.506, "step": 72000 }, { "epoch": 11.51, "learning_rate": 4.808126410835215e-05, "loss": 2.9252, "step": 76500 }, { "epoch": 11.51, "eval_loss": 3.028315305709839, "eval_runtime": 118.2129, "eval_samples_per_second": 103.956, "eval_steps_per_second": 6.505, "step": 76500 }, { "epoch": 12.19, "learning_rate": 4.796839729119639e-05, "loss": 2.881, "step": 81000 }, { "epoch": 12.19, "eval_loss": 2.9871439933776855, "eval_runtime": 118.1982, "eval_samples_per_second": 103.969, "eval_steps_per_second": 6.506, "step": 81000 }, { "epoch": 12.87, "learning_rate": 4.785553047404063e-05, "loss": 2.8366, "step": 85500 }, { "epoch": 12.87, "eval_loss": 2.9422881603240967, "eval_runtime": 118.1796, "eval_samples_per_second": 103.986, "eval_steps_per_second": 6.507, "step": 85500 }, { "epoch": 13.54, "learning_rate": 4.774266365688488e-05, "loss": 2.7917, "step": 90000 }, { "epoch": 13.54, "eval_loss": 2.9027907848358154, "eval_runtime": 118.1933, "eval_samples_per_second": 103.974, "eval_steps_per_second": 6.506, "step": 90000 }, { "epoch": 14.22, "learning_rate": 4.762979683972912e-05, "loss": 2.7592, "step": 94500 }, { "epoch": 14.22, "eval_loss": 2.8720462322235107, "eval_runtime": 118.2133, "eval_samples_per_second": 103.956, "eval_steps_per_second": 6.505, "step": 94500 }, { "epoch": 14.9, "learning_rate": 4.751693002257336e-05, "loss": 2.7278, "step": 99000 }, { "epoch": 14.9, "eval_loss": 2.8500328063964844, "eval_runtime": 118.2044, "eval_samples_per_second": 103.964, "eval_steps_per_second": 6.506, "step": 99000 }, { "epoch": 15.58, "learning_rate": 4.740406320541761e-05, "loss": 2.693, "step": 103500 }, { "epoch": 15.58, "eval_loss": 2.817178249359131, "eval_runtime": 118.1867, "eval_samples_per_second": 103.98, "eval_steps_per_second": 6.507, "step": 103500 }, { "epoch": 16.25, "learning_rate": 4.729119638826185e-05, "loss": 2.6645, "step": 108000 }, { "epoch": 16.25, "eval_loss": 2.786304235458374, "eval_runtime": 118.2219, "eval_samples_per_second": 103.949, "eval_steps_per_second": 6.505, "step": 108000 }, { "epoch": 16.93, "learning_rate": 4.71783295711061e-05, "loss": 2.6361, "step": 112500 }, { "epoch": 16.93, "eval_loss": 2.770569324493408, "eval_runtime": 118.2234, "eval_samples_per_second": 103.947, "eval_steps_per_second": 6.505, "step": 112500 }, { "epoch": 17.61, "learning_rate": 4.706546275395034e-05, "loss": 2.6083, "step": 117000 }, { "epoch": 17.61, "eval_loss": 2.7391059398651123, "eval_runtime": 118.2576, "eval_samples_per_second": 103.917, "eval_steps_per_second": 6.503, "step": 117000 }, { "epoch": 18.28, "learning_rate": 4.695259593679459e-05, "loss": 2.5847, "step": 121500 }, { "epoch": 18.28, "eval_loss": 2.718665838241577, "eval_runtime": 118.2124, "eval_samples_per_second": 103.957, "eval_steps_per_second": 6.505, "step": 121500 }, { "epoch": 18.96, "learning_rate": 4.6839729119638834e-05, "loss": 2.5619, "step": 126000 }, { "epoch": 18.96, "eval_loss": 2.7032158374786377, "eval_runtime": 118.2283, "eval_samples_per_second": 103.943, "eval_steps_per_second": 6.504, "step": 126000 }, { "epoch": 19.64, "learning_rate": 4.672686230248307e-05, "loss": 2.5368, "step": 130500 }, { "epoch": 19.64, "eval_loss": 2.6911468505859375, "eval_runtime": 118.3184, "eval_samples_per_second": 103.864, "eval_steps_per_second": 6.499, "step": 130500 }, { "epoch": 20.32, "learning_rate": 4.661399548532732e-05, "loss": 2.5203, "step": 135000 }, { "epoch": 20.32, "eval_loss": 2.666966676712036, "eval_runtime": 118.3095, "eval_samples_per_second": 103.872, "eval_steps_per_second": 6.5, "step": 135000 }, { "epoch": 20.99, "learning_rate": 4.6501128668171564e-05, "loss": 2.4997, "step": 139500 }, { "epoch": 20.99, "eval_loss": 2.6472320556640625, "eval_runtime": 118.1415, "eval_samples_per_second": 104.019, "eval_steps_per_second": 6.509, "step": 139500 }, { "epoch": 21.67, "learning_rate": 4.63882618510158e-05, "loss": 2.4755, "step": 144000 }, { "epoch": 21.67, "eval_loss": 2.6281678676605225, "eval_runtime": 118.147, "eval_samples_per_second": 104.014, "eval_steps_per_second": 6.509, "step": 144000 }, { "epoch": 22.35, "learning_rate": 4.627539503386005e-05, "loss": 2.4593, "step": 148500 }, { "epoch": 22.35, "eval_loss": 2.6077518463134766, "eval_runtime": 118.4066, "eval_samples_per_second": 103.786, "eval_steps_per_second": 6.495, "step": 148500 }, { "epoch": 23.02, "learning_rate": 4.616252821670429e-05, "loss": 2.4468, "step": 153000 }, { "epoch": 23.02, "eval_loss": 2.60119366645813, "eval_runtime": 118.2017, "eval_samples_per_second": 103.966, "eval_steps_per_second": 6.506, "step": 153000 }, { "epoch": 23.7, "learning_rate": 4.604966139954853e-05, "loss": 2.4243, "step": 157500 }, { "epoch": 23.7, "eval_loss": 2.583709239959717, "eval_runtime": 118.0992, "eval_samples_per_second": 104.057, "eval_steps_per_second": 6.511, "step": 157500 }, { "epoch": 24.38, "learning_rate": 4.593679458239278e-05, "loss": 2.4093, "step": 162000 }, { "epoch": 24.38, "eval_loss": 2.5716421604156494, "eval_runtime": 118.1155, "eval_samples_per_second": 104.042, "eval_steps_per_second": 6.511, "step": 162000 }, { "epoch": 25.06, "learning_rate": 4.582392776523702e-05, "loss": 2.396, "step": 166500 }, { "epoch": 25.06, "eval_loss": 2.561039686203003, "eval_runtime": 118.1545, "eval_samples_per_second": 104.008, "eval_steps_per_second": 6.508, "step": 166500 }, { "epoch": 25.73, "learning_rate": 4.571106094808127e-05, "loss": 2.3764, "step": 171000 }, { "epoch": 25.73, "eval_loss": 2.543470859527588, "eval_runtime": 118.1796, "eval_samples_per_second": 103.986, "eval_steps_per_second": 6.507, "step": 171000 }, { "epoch": 26.41, "learning_rate": 4.559819413092551e-05, "loss": 2.3623, "step": 175500 }, { "epoch": 26.41, "eval_loss": 2.5341155529022217, "eval_runtime": 118.1214, "eval_samples_per_second": 104.037, "eval_steps_per_second": 6.51, "step": 175500 }, { "epoch": 27.09, "learning_rate": 4.548532731376975e-05, "loss": 2.3529, "step": 180000 }, { "epoch": 27.09, "eval_loss": 2.5200819969177246, "eval_runtime": 118.324, "eval_samples_per_second": 103.859, "eval_steps_per_second": 6.499, "step": 180000 }, { "epoch": 27.77, "learning_rate": 4.5372460496614e-05, "loss": 2.3393, "step": 184500 }, { "epoch": 27.77, "eval_loss": 2.5085155963897705, "eval_runtime": 118.3431, "eval_samples_per_second": 103.842, "eval_steps_per_second": 6.498, "step": 184500 }, { "epoch": 28.44, "learning_rate": 4.525959367945824e-05, "loss": 2.3247, "step": 189000 }, { "epoch": 28.44, "eval_loss": 2.5002756118774414, "eval_runtime": 118.3427, "eval_samples_per_second": 103.842, "eval_steps_per_second": 6.498, "step": 189000 }, { "epoch": 29.12, "learning_rate": 4.514672686230249e-05, "loss": 2.3127, "step": 193500 }, { "epoch": 29.12, "eval_loss": 2.4838666915893555, "eval_runtime": 118.3361, "eval_samples_per_second": 103.848, "eval_steps_per_second": 6.498, "step": 193500 }, { "epoch": 29.8, "learning_rate": 4.5033860045146734e-05, "loss": 2.3006, "step": 198000 }, { "epoch": 29.8, "eval_loss": 2.480976104736328, "eval_runtime": 118.3346, "eval_samples_per_second": 103.85, "eval_steps_per_second": 6.499, "step": 198000 }, { "epoch": 30.47, "learning_rate": 4.492099322799097e-05, "loss": 2.2896, "step": 202500 }, { "epoch": 30.47, "eval_loss": 2.4641942977905273, "eval_runtime": 118.3398, "eval_samples_per_second": 103.845, "eval_steps_per_second": 6.498, "step": 202500 }, { "epoch": 31.15, "learning_rate": 4.480812641083522e-05, "loss": 2.2789, "step": 207000 }, { "epoch": 31.15, "eval_loss": 2.4657058715820312, "eval_runtime": 118.3281, "eval_samples_per_second": 103.855, "eval_steps_per_second": 6.499, "step": 207000 }, { "epoch": 31.83, "learning_rate": 4.4695259593679463e-05, "loss": 2.2665, "step": 211500 }, { "epoch": 31.83, "eval_loss": 2.4447216987609863, "eval_runtime": 118.3432, "eval_samples_per_second": 103.842, "eval_steps_per_second": 6.498, "step": 211500 }, { "epoch": 32.51, "learning_rate": 4.45823927765237e-05, "loss": 2.2545, "step": 216000 }, { "epoch": 32.51, "eval_loss": 2.4484477043151855, "eval_runtime": 118.3442, "eval_samples_per_second": 103.841, "eval_steps_per_second": 6.498, "step": 216000 }, { "epoch": 33.18, "learning_rate": 4.446952595936795e-05, "loss": 2.2446, "step": 220500 }, { "epoch": 33.18, "eval_loss": 2.4324302673339844, "eval_runtime": 118.3492, "eval_samples_per_second": 103.837, "eval_steps_per_second": 6.498, "step": 220500 }, { "epoch": 33.86, "learning_rate": 4.435665914221219e-05, "loss": 2.2352, "step": 225000 }, { "epoch": 33.86, "eval_loss": 2.426417827606201, "eval_runtime": 118.3545, "eval_samples_per_second": 103.832, "eval_steps_per_second": 6.497, "step": 225000 }, { "epoch": 34.54, "learning_rate": 4.424379232505644e-05, "loss": 2.2218, "step": 229500 }, { "epoch": 34.54, "eval_loss": 2.4175431728363037, "eval_runtime": 121.4712, "eval_samples_per_second": 101.168, "eval_steps_per_second": 6.331, "step": 229500 }, { "epoch": 35.21, "learning_rate": 4.413092550790068e-05, "loss": 2.2153, "step": 234000 }, { "epoch": 35.21, "eval_loss": 2.4122180938720703, "eval_runtime": 121.3685, "eval_samples_per_second": 101.254, "eval_steps_per_second": 6.336, "step": 234000 }, { "epoch": 35.89, "learning_rate": 4.401805869074492e-05, "loss": 2.206, "step": 238500 }, { "epoch": 35.89, "eval_loss": 2.392340660095215, "eval_runtime": 121.4694, "eval_samples_per_second": 101.169, "eval_steps_per_second": 6.331, "step": 238500 }, { "epoch": 36.57, "learning_rate": 4.390519187358917e-05, "loss": 2.1931, "step": 243000 }, { "epoch": 36.57, "eval_loss": 2.386526584625244, "eval_runtime": 121.4241, "eval_samples_per_second": 101.207, "eval_steps_per_second": 6.333, "step": 243000 }, { "epoch": 37.25, "learning_rate": 4.379232505643341e-05, "loss": 2.1876, "step": 247500 }, { "epoch": 37.25, "eval_loss": 2.383101224899292, "eval_runtime": 121.3529, "eval_samples_per_second": 101.267, "eval_steps_per_second": 6.337, "step": 247500 }, { "epoch": 37.92, "learning_rate": 4.367945823927765e-05, "loss": 2.1817, "step": 252000 }, { "epoch": 37.92, "eval_loss": 2.3782711029052734, "eval_runtime": 121.4029, "eval_samples_per_second": 101.225, "eval_steps_per_second": 6.334, "step": 252000 }, { "epoch": 38.6, "learning_rate": 4.35665914221219e-05, "loss": 2.1661, "step": 256500 }, { "epoch": 38.6, "eval_loss": 2.3761754035949707, "eval_runtime": 121.5223, "eval_samples_per_second": 101.126, "eval_steps_per_second": 6.328, "step": 256500 }, { "epoch": 39.28, "learning_rate": 4.3453724604966136e-05, "loss": 2.1635, "step": 261000 }, { "epoch": 39.28, "eval_loss": 2.365755319595337, "eval_runtime": 121.4762, "eval_samples_per_second": 101.164, "eval_steps_per_second": 6.33, "step": 261000 }, { "epoch": 39.95, "learning_rate": 4.334085778781038e-05, "loss": 2.1533, "step": 265500 }, { "epoch": 39.95, "eval_loss": 2.359434127807617, "eval_runtime": 121.2612, "eval_samples_per_second": 101.343, "eval_steps_per_second": 6.342, "step": 265500 }, { "epoch": 40.63, "learning_rate": 4.322799097065463e-05, "loss": 2.1444, "step": 270000 }, { "epoch": 40.63, "eval_loss": 2.3534085750579834, "eval_runtime": 121.4588, "eval_samples_per_second": 101.178, "eval_steps_per_second": 6.331, "step": 270000 }, { "epoch": 41.31, "learning_rate": 4.311512415349887e-05, "loss": 2.1389, "step": 274500 }, { "epoch": 41.31, "eval_loss": 2.3499608039855957, "eval_runtime": 121.4347, "eval_samples_per_second": 101.198, "eval_steps_per_second": 6.333, "step": 274500 }, { "epoch": 41.99, "learning_rate": 4.300225733634312e-05, "loss": 2.1343, "step": 279000 }, { "epoch": 41.99, "eval_loss": 2.33479642868042, "eval_runtime": 121.4769, "eval_samples_per_second": 101.163, "eval_steps_per_second": 6.33, "step": 279000 }, { "epoch": 42.66, "learning_rate": 4.2889390519187363e-05, "loss": 2.1204, "step": 283500 }, { "epoch": 42.66, "eval_loss": 2.338609457015991, "eval_runtime": 119.9199, "eval_samples_per_second": 102.477, "eval_steps_per_second": 6.413, "step": 283500 }, { "epoch": 43.34, "learning_rate": 4.277652370203161e-05, "loss": 2.1149, "step": 288000 }, { "epoch": 43.34, "eval_loss": 2.3366451263427734, "eval_runtime": 121.3615, "eval_samples_per_second": 101.259, "eval_steps_per_second": 6.336, "step": 288000 }, { "epoch": 44.02, "learning_rate": 4.266365688487585e-05, "loss": 2.1124, "step": 292500 }, { "epoch": 44.02, "eval_loss": 2.3272287845611572, "eval_runtime": 121.4223, "eval_samples_per_second": 101.209, "eval_steps_per_second": 6.333, "step": 292500 }, { "epoch": 44.7, "learning_rate": 4.255079006772009e-05, "loss": 2.0996, "step": 297000 }, { "epoch": 44.7, "eval_loss": 2.3160288333892822, "eval_runtime": 121.4331, "eval_samples_per_second": 101.2, "eval_steps_per_second": 6.333, "step": 297000 }, { "epoch": 45.37, "learning_rate": 4.243792325056434e-05, "loss": 2.0954, "step": 301500 }, { "epoch": 45.37, "eval_loss": 2.3165717124938965, "eval_runtime": 121.3269, "eval_samples_per_second": 101.288, "eval_steps_per_second": 6.338, "step": 301500 }, { "epoch": 46.05, "learning_rate": 4.232505643340858e-05, "loss": 2.0901, "step": 306000 }, { "epoch": 46.05, "eval_loss": 2.303679943084717, "eval_runtime": 121.3845, "eval_samples_per_second": 101.24, "eval_steps_per_second": 6.335, "step": 306000 }, { "epoch": 46.73, "learning_rate": 4.221218961625282e-05, "loss": 2.0819, "step": 310500 }, { "epoch": 46.73, "eval_loss": 2.2977073192596436, "eval_runtime": 121.4379, "eval_samples_per_second": 101.196, "eval_steps_per_second": 6.332, "step": 310500 }, { "epoch": 47.4, "learning_rate": 4.209932279909707e-05, "loss": 2.0725, "step": 315000 }, { "epoch": 47.4, "eval_loss": 2.3036298751831055, "eval_runtime": 121.4278, "eval_samples_per_second": 101.204, "eval_steps_per_second": 6.333, "step": 315000 }, { "epoch": 48.08, "learning_rate": 4.198645598194131e-05, "loss": 2.0729, "step": 319500 }, { "epoch": 48.08, "eval_loss": 2.2955193519592285, "eval_runtime": 121.3513, "eval_samples_per_second": 101.268, "eval_steps_per_second": 6.337, "step": 319500 }, { "epoch": 48.76, "learning_rate": 4.187358916478555e-05, "loss": 2.0621, "step": 324000 }, { "epoch": 48.76, "eval_loss": 2.284827947616577, "eval_runtime": 121.3713, "eval_samples_per_second": 101.251, "eval_steps_per_second": 6.336, "step": 324000 }, { "epoch": 49.44, "learning_rate": 4.17607223476298e-05, "loss": 2.055, "step": 328500 }, { "epoch": 49.44, "eval_loss": 2.2865021228790283, "eval_runtime": 121.4042, "eval_samples_per_second": 101.224, "eval_steps_per_second": 6.334, "step": 328500 }, { "epoch": 50.11, "learning_rate": 4.164785553047404e-05, "loss": 2.0514, "step": 333000 }, { "epoch": 50.11, "eval_loss": 2.2722549438476562, "eval_runtime": 121.3871, "eval_samples_per_second": 101.238, "eval_steps_per_second": 6.335, "step": 333000 }, { "epoch": 50.79, "learning_rate": 4.153498871331828e-05, "loss": 2.0427, "step": 337500 }, { "epoch": 50.79, "eval_loss": 2.2747364044189453, "eval_runtime": 121.4348, "eval_samples_per_second": 101.198, "eval_steps_per_second": 6.333, "step": 337500 }, { "epoch": 51.47, "learning_rate": 4.142212189616253e-05, "loss": 2.0398, "step": 342000 }, { "epoch": 51.47, "eval_loss": 2.275329113006592, "eval_runtime": 121.4165, "eval_samples_per_second": 101.214, "eval_steps_per_second": 6.334, "step": 342000 }, { "epoch": 52.14, "learning_rate": 4.130925507900677e-05, "loss": 2.0373, "step": 346500 }, { "epoch": 52.14, "eval_loss": 2.263934850692749, "eval_runtime": 121.4832, "eval_samples_per_second": 101.158, "eval_steps_per_second": 6.33, "step": 346500 }, { "epoch": 52.82, "learning_rate": 4.119638826185102e-05, "loss": 2.0293, "step": 351000 }, { "epoch": 52.82, "eval_loss": 2.2591116428375244, "eval_runtime": 121.453, "eval_samples_per_second": 101.183, "eval_steps_per_second": 6.332, "step": 351000 }, { "epoch": 53.5, "learning_rate": 4.108352144469526e-05, "loss": 2.0222, "step": 355500 }, { "epoch": 53.5, "eval_loss": 2.251147508621216, "eval_runtime": 121.2819, "eval_samples_per_second": 101.326, "eval_steps_per_second": 6.341, "step": 355500 }, { "epoch": 54.18, "learning_rate": 4.097065462753951e-05, "loss": 2.018, "step": 360000 }, { "epoch": 54.18, "eval_loss": 2.2465593814849854, "eval_runtime": 121.4341, "eval_samples_per_second": 101.199, "eval_steps_per_second": 6.333, "step": 360000 }, { "epoch": 54.85, "learning_rate": 4.085778781038375e-05, "loss": 2.0129, "step": 364500 }, { "epoch": 54.85, "eval_loss": 2.244495153427124, "eval_runtime": 121.4224, "eval_samples_per_second": 101.209, "eval_steps_per_second": 6.333, "step": 364500 }, { "epoch": 55.53, "learning_rate": 4.074492099322799e-05, "loss": 2.0071, "step": 369000 }, { "epoch": 55.53, "eval_loss": 2.244058609008789, "eval_runtime": 121.4271, "eval_samples_per_second": 101.205, "eval_steps_per_second": 6.333, "step": 369000 }, { "epoch": 56.21, "learning_rate": 4.063205417607224e-05, "loss": 2.0026, "step": 373500 }, { "epoch": 56.21, "eval_loss": 2.2374625205993652, "eval_runtime": 121.4591, "eval_samples_per_second": 101.178, "eval_steps_per_second": 6.331, "step": 373500 }, { "epoch": 56.88, "learning_rate": 4.0519187358916484e-05, "loss": 1.9989, "step": 378000 }, { "epoch": 56.88, "eval_loss": 2.2266647815704346, "eval_runtime": 121.4333, "eval_samples_per_second": 101.2, "eval_steps_per_second": 6.333, "step": 378000 }, { "epoch": 57.56, "learning_rate": 4.040632054176072e-05, "loss": 1.9917, "step": 382500 }, { "epoch": 57.56, "eval_loss": 2.2338218688964844, "eval_runtime": 121.4506, "eval_samples_per_second": 101.185, "eval_steps_per_second": 6.332, "step": 382500 }, { "epoch": 58.24, "learning_rate": 4.029345372460497e-05, "loss": 1.9869, "step": 387000 }, { "epoch": 58.24, "eval_loss": 2.226421594619751, "eval_runtime": 121.395, "eval_samples_per_second": 101.232, "eval_steps_per_second": 6.335, "step": 387000 }, { "epoch": 58.92, "learning_rate": 4.018058690744921e-05, "loss": 1.9855, "step": 391500 }, { "epoch": 58.92, "eval_loss": 2.22316837310791, "eval_runtime": 121.4361, "eval_samples_per_second": 101.197, "eval_steps_per_second": 6.333, "step": 391500 }, { "epoch": 59.59, "learning_rate": 4.006772009029345e-05, "loss": 1.975, "step": 396000 }, { "epoch": 59.59, "eval_loss": 2.221580743789673, "eval_runtime": 121.4217, "eval_samples_per_second": 101.209, "eval_steps_per_second": 6.333, "step": 396000 }, { "epoch": 60.27, "learning_rate": 3.99548532731377e-05, "loss": 1.9738, "step": 400500 }, { "epoch": 60.27, "eval_loss": 2.2099127769470215, "eval_runtime": 121.4414, "eval_samples_per_second": 101.193, "eval_steps_per_second": 6.332, "step": 400500 }, { "epoch": 60.95, "learning_rate": 3.984198645598194e-05, "loss": 1.9724, "step": 405000 }, { "epoch": 60.95, "eval_loss": 2.217116355895996, "eval_runtime": 121.4225, "eval_samples_per_second": 101.209, "eval_steps_per_second": 6.333, "step": 405000 }, { "epoch": 61.63, "learning_rate": 3.972911963882618e-05, "loss": 1.9643, "step": 409500 }, { "epoch": 61.63, "eval_loss": 2.2091891765594482, "eval_runtime": 121.3749, "eval_samples_per_second": 101.248, "eval_steps_per_second": 6.336, "step": 409500 }, { "epoch": 62.3, "learning_rate": 3.961625282167043e-05, "loss": 1.9582, "step": 414000 }, { "epoch": 62.3, "eval_loss": 2.2050740718841553, "eval_runtime": 121.3877, "eval_samples_per_second": 101.238, "eval_steps_per_second": 6.335, "step": 414000 }, { "epoch": 62.98, "learning_rate": 3.950338600451467e-05, "loss": 1.9596, "step": 418500 }, { "epoch": 62.98, "eval_loss": 2.2095320224761963, "eval_runtime": 121.3734, "eval_samples_per_second": 101.25, "eval_steps_per_second": 6.336, "step": 418500 }, { "epoch": 63.66, "learning_rate": 3.939051918735892e-05, "loss": 1.9491, "step": 423000 }, { "epoch": 63.66, "eval_loss": 2.201195478439331, "eval_runtime": 121.4462, "eval_samples_per_second": 101.189, "eval_steps_per_second": 6.332, "step": 423000 }, { "epoch": 64.33, "learning_rate": 3.927765237020316e-05, "loss": 1.9493, "step": 427500 }, { "epoch": 64.33, "eval_loss": 2.1953182220458984, "eval_runtime": 121.4109, "eval_samples_per_second": 101.218, "eval_steps_per_second": 6.334, "step": 427500 }, { "epoch": 65.01, "learning_rate": 3.916478555304741e-05, "loss": 1.946, "step": 432000 }, { "epoch": 65.01, "eval_loss": 2.1867878437042236, "eval_runtime": 119.4236, "eval_samples_per_second": 102.903, "eval_steps_per_second": 6.439, "step": 432000 }, { "epoch": 65.69, "learning_rate": 3.9051918735891654e-05, "loss": 1.9359, "step": 436500 }, { "epoch": 65.69, "eval_loss": 2.1980998516082764, "eval_runtime": 121.4109, "eval_samples_per_second": 101.218, "eval_steps_per_second": 6.334, "step": 436500 }, { "epoch": 66.37, "learning_rate": 3.893905191873589e-05, "loss": 1.9333, "step": 441000 }, { "epoch": 66.37, "eval_loss": 2.1945624351501465, "eval_runtime": 121.3328, "eval_samples_per_second": 101.283, "eval_steps_per_second": 6.338, "step": 441000 }, { "epoch": 67.04, "learning_rate": 3.882618510158014e-05, "loss": 1.9327, "step": 445500 }, { "epoch": 67.04, "eval_loss": 2.1841721534729004, "eval_runtime": 121.4364, "eval_samples_per_second": 101.197, "eval_steps_per_second": 6.333, "step": 445500 }, { "epoch": 67.72, "learning_rate": 3.8713318284424384e-05, "loss": 1.9243, "step": 450000 }, { "epoch": 67.72, "eval_loss": 2.189509391784668, "eval_runtime": 121.4244, "eval_samples_per_second": 101.207, "eval_steps_per_second": 6.333, "step": 450000 }, { "epoch": 68.4, "learning_rate": 3.860045146726862e-05, "loss": 1.9241, "step": 454500 }, { "epoch": 68.4, "eval_loss": 2.170930862426758, "eval_runtime": 121.4233, "eval_samples_per_second": 101.208, "eval_steps_per_second": 6.333, "step": 454500 }, { "epoch": 69.07, "learning_rate": 3.848758465011287e-05, "loss": 1.9183, "step": 459000 }, { "epoch": 69.07, "eval_loss": 2.18388032913208, "eval_runtime": 121.4294, "eval_samples_per_second": 101.203, "eval_steps_per_second": 6.333, "step": 459000 }, { "epoch": 69.75, "learning_rate": 3.837471783295711e-05, "loss": 1.916, "step": 463500 }, { "epoch": 69.75, "eval_loss": 2.179508686065674, "eval_runtime": 121.3882, "eval_samples_per_second": 101.237, "eval_steps_per_second": 6.335, "step": 463500 }, { "epoch": 70.43, "learning_rate": 3.826185101580136e-05, "loss": 1.9105, "step": 468000 }, { "epoch": 70.43, "eval_loss": 2.1771745681762695, "eval_runtime": 121.4394, "eval_samples_per_second": 101.194, "eval_steps_per_second": 6.332, "step": 468000 }, { "epoch": 71.11, "learning_rate": 3.81489841986456e-05, "loss": 1.9117, "step": 472500 }, { "epoch": 71.11, "eval_loss": 2.181852340698242, "eval_runtime": 121.3465, "eval_samples_per_second": 101.272, "eval_steps_per_second": 6.337, "step": 472500 }, { "epoch": 71.78, "learning_rate": 3.803611738148984e-05, "loss": 1.9041, "step": 477000 }, { "epoch": 71.78, "eval_loss": 2.1662580966949463, "eval_runtime": 121.4449, "eval_samples_per_second": 101.19, "eval_steps_per_second": 6.332, "step": 477000 }, { "epoch": 72.46, "learning_rate": 3.792325056433409e-05, "loss": 1.9021, "step": 481500 }, { "epoch": 72.46, "eval_loss": 2.165590524673462, "eval_runtime": 121.4569, "eval_samples_per_second": 101.18, "eval_steps_per_second": 6.331, "step": 481500 }, { "epoch": 73.14, "learning_rate": 3.781038374717833e-05, "loss": 1.8995, "step": 486000 }, { "epoch": 73.14, "eval_loss": 2.1610703468322754, "eval_runtime": 121.4207, "eval_samples_per_second": 101.21, "eval_steps_per_second": 6.333, "step": 486000 }, { "epoch": 73.81, "learning_rate": 3.769751693002257e-05, "loss": 1.896, "step": 490500 }, { "epoch": 73.81, "eval_loss": 2.1603007316589355, "eval_runtime": 121.3989, "eval_samples_per_second": 101.228, "eval_steps_per_second": 6.334, "step": 490500 }, { "epoch": 74.49, "learning_rate": 3.758465011286682e-05, "loss": 1.8901, "step": 495000 }, { "epoch": 74.49, "eval_loss": 2.158705711364746, "eval_runtime": 121.4675, "eval_samples_per_second": 101.171, "eval_steps_per_second": 6.331, "step": 495000 }, { "epoch": 75.17, "learning_rate": 3.747178329571106e-05, "loss": 1.8887, "step": 499500 }, { "epoch": 75.17, "eval_loss": 2.152022123336792, "eval_runtime": 121.4184, "eval_samples_per_second": 101.212, "eval_steps_per_second": 6.333, "step": 499500 }, { "epoch": 75.85, "learning_rate": 3.735891647855531e-05, "loss": 1.8855, "step": 504000 }, { "epoch": 75.85, "eval_loss": 2.1545896530151367, "eval_runtime": 121.4036, "eval_samples_per_second": 101.224, "eval_steps_per_second": 6.334, "step": 504000 }, { "epoch": 76.52, "learning_rate": 3.7246049661399554e-05, "loss": 1.8802, "step": 508500 }, { "epoch": 76.52, "eval_loss": 2.1495370864868164, "eval_runtime": 121.4898, "eval_samples_per_second": 101.153, "eval_steps_per_second": 6.33, "step": 508500 }, { "epoch": 77.2, "learning_rate": 3.71331828442438e-05, "loss": 1.8783, "step": 513000 }, { "epoch": 77.2, "eval_loss": 2.1447861194610596, "eval_runtime": 121.3997, "eval_samples_per_second": 101.228, "eval_steps_per_second": 6.334, "step": 513000 }, { "epoch": 77.88, "learning_rate": 3.702031602708804e-05, "loss": 1.8743, "step": 517500 }, { "epoch": 77.88, "eval_loss": 2.1373064517974854, "eval_runtime": 121.3755, "eval_samples_per_second": 101.248, "eval_steps_per_second": 6.336, "step": 517500 }, { "epoch": 78.56, "learning_rate": 3.6907449209932284e-05, "loss": 1.8679, "step": 522000 }, { "epoch": 78.56, "eval_loss": 2.1462478637695312, "eval_runtime": 121.3988, "eval_samples_per_second": 101.228, "eval_steps_per_second": 6.334, "step": 522000 }, { "epoch": 79.23, "learning_rate": 3.679458239277653e-05, "loss": 1.8681, "step": 526500 }, { "epoch": 79.23, "eval_loss": 2.14402174949646, "eval_runtime": 121.4163, "eval_samples_per_second": 101.214, "eval_steps_per_second": 6.334, "step": 526500 }, { "epoch": 79.91, "learning_rate": 3.668171557562077e-05, "loss": 1.8649, "step": 531000 }, { "epoch": 79.91, "eval_loss": 2.1338822841644287, "eval_runtime": 121.3916, "eval_samples_per_second": 101.234, "eval_steps_per_second": 6.335, "step": 531000 }, { "epoch": 80.59, "learning_rate": 3.656884875846501e-05, "loss": 1.8593, "step": 535500 }, { "epoch": 80.59, "eval_loss": 2.139404296875, "eval_runtime": 121.4277, "eval_samples_per_second": 101.204, "eval_steps_per_second": 6.333, "step": 535500 }, { "epoch": 81.26, "learning_rate": 3.645598194130926e-05, "loss": 1.8592, "step": 540000 }, { "epoch": 81.26, "eval_loss": 2.1354503631591797, "eval_runtime": 121.402, "eval_samples_per_second": 101.226, "eval_steps_per_second": 6.334, "step": 540000 }, { "epoch": 81.94, "learning_rate": 3.63431151241535e-05, "loss": 1.8569, "step": 544500 }, { "epoch": 81.94, "eval_loss": 2.135469436645508, "eval_runtime": 121.3789, "eval_samples_per_second": 101.245, "eval_steps_per_second": 6.336, "step": 544500 }, { "epoch": 82.62, "learning_rate": 3.623024830699774e-05, "loss": 1.849, "step": 549000 }, { "epoch": 82.62, "eval_loss": 2.1346044540405273, "eval_runtime": 121.4745, "eval_samples_per_second": 101.165, "eval_steps_per_second": 6.331, "step": 549000 }, { "epoch": 83.3, "learning_rate": 3.611738148984199e-05, "loss": 1.8481, "step": 553500 }, { "epoch": 83.3, "eval_loss": 2.1314146518707275, "eval_runtime": 121.3262, "eval_samples_per_second": 101.289, "eval_steps_per_second": 6.338, "step": 553500 }, { "epoch": 83.97, "learning_rate": 3.600451467268623e-05, "loss": 1.8499, "step": 558000 }, { "epoch": 83.97, "eval_loss": 2.126936197280884, "eval_runtime": 121.4278, "eval_samples_per_second": 101.204, "eval_steps_per_second": 6.333, "step": 558000 }, { "epoch": 84.65, "learning_rate": 3.589164785553047e-05, "loss": 1.8394, "step": 562500 }, { "epoch": 84.65, "eval_loss": 2.118168592453003, "eval_runtime": 121.4516, "eval_samples_per_second": 101.184, "eval_steps_per_second": 6.332, "step": 562500 }, { "epoch": 85.33, "learning_rate": 3.577878103837472e-05, "loss": 1.8394, "step": 567000 }, { "epoch": 85.33, "eval_loss": 2.1206483840942383, "eval_runtime": 121.4259, "eval_samples_per_second": 101.206, "eval_steps_per_second": 6.333, "step": 567000 }, { "epoch": 86.0, "learning_rate": 3.566591422121896e-05, "loss": 1.8408, "step": 571500 }, { "epoch": 86.0, "eval_loss": 2.1219327449798584, "eval_runtime": 118.4077, "eval_samples_per_second": 103.785, "eval_steps_per_second": 6.495, "step": 571500 }, { "epoch": 86.68, "learning_rate": 3.555304740406321e-05, "loss": 1.832, "step": 576000 }, { "epoch": 86.68, "eval_loss": 2.1104817390441895, "eval_runtime": 118.1444, "eval_samples_per_second": 104.017, "eval_steps_per_second": 6.509, "step": 576000 }, { "epoch": 87.36, "learning_rate": 3.5440180586907454e-05, "loss": 1.8305, "step": 580500 }, { "epoch": 87.36, "eval_loss": 2.1246631145477295, "eval_runtime": 118.1028, "eval_samples_per_second": 104.053, "eval_steps_per_second": 6.511, "step": 580500 }, { "epoch": 88.04, "learning_rate": 3.53273137697517e-05, "loss": 1.8333, "step": 585000 }, { "epoch": 88.04, "eval_loss": 2.1201488971710205, "eval_runtime": 118.0958, "eval_samples_per_second": 104.06, "eval_steps_per_second": 6.512, "step": 585000 }, { "epoch": 88.71, "learning_rate": 3.521444695259594e-05, "loss": 1.8227, "step": 589500 }, { "epoch": 88.71, "eval_loss": 2.104985475540161, "eval_runtime": 118.0975, "eval_samples_per_second": 104.058, "eval_steps_per_second": 6.512, "step": 589500 }, { "epoch": 89.39, "learning_rate": 3.5101580135440183e-05, "loss": 1.8216, "step": 594000 }, { "epoch": 89.39, "eval_loss": 2.1118545532226562, "eval_runtime": 118.0937, "eval_samples_per_second": 104.061, "eval_steps_per_second": 6.512, "step": 594000 }, { "epoch": 90.07, "learning_rate": 3.498871331828443e-05, "loss": 1.8234, "step": 598500 }, { "epoch": 90.07, "eval_loss": 2.109297037124634, "eval_runtime": 118.1025, "eval_samples_per_second": 104.054, "eval_steps_per_second": 6.511, "step": 598500 }, { "epoch": 90.74, "learning_rate": 3.487584650112867e-05, "loss": 1.8162, "step": 603000 }, { "epoch": 90.74, "eval_loss": 2.0999834537506104, "eval_runtime": 118.0968, "eval_samples_per_second": 104.059, "eval_steps_per_second": 6.512, "step": 603000 }, { "epoch": 91.42, "learning_rate": 3.476297968397291e-05, "loss": 1.8153, "step": 607500 }, { "epoch": 91.42, "eval_loss": 2.110783576965332, "eval_runtime": 118.1749, "eval_samples_per_second": 103.99, "eval_steps_per_second": 6.507, "step": 607500 }, { "epoch": 92.1, "learning_rate": 3.465011286681716e-05, "loss": 1.8153, "step": 612000 }, { "epoch": 92.1, "eval_loss": 2.1009647846221924, "eval_runtime": 118.1986, "eval_samples_per_second": 103.969, "eval_steps_per_second": 6.506, "step": 612000 }, { "epoch": 92.78, "learning_rate": 3.4537246049661404e-05, "loss": 1.8095, "step": 616500 }, { "epoch": 92.78, "eval_loss": 2.0992209911346436, "eval_runtime": 118.1834, "eval_samples_per_second": 103.982, "eval_steps_per_second": 6.507, "step": 616500 }, { "epoch": 93.45, "learning_rate": 3.442437923250564e-05, "loss": 1.807, "step": 621000 }, { "epoch": 93.45, "eval_loss": 2.098292827606201, "eval_runtime": 118.1816, "eval_samples_per_second": 103.984, "eval_steps_per_second": 6.507, "step": 621000 }, { "epoch": 94.13, "learning_rate": 3.431151241534989e-05, "loss": 1.805, "step": 625500 }, { "epoch": 94.13, "eval_loss": 2.0988106727600098, "eval_runtime": 118.1659, "eval_samples_per_second": 103.998, "eval_steps_per_second": 6.508, "step": 625500 }, { "epoch": 94.81, "learning_rate": 3.4198645598194133e-05, "loss": 1.8015, "step": 630000 }, { "epoch": 94.81, "eval_loss": 2.0965840816497803, "eval_runtime": 118.1678, "eval_samples_per_second": 103.996, "eval_steps_per_second": 6.508, "step": 630000 }, { "epoch": 95.49, "learning_rate": 3.408577878103837e-05, "loss": 1.7964, "step": 634500 }, { "epoch": 95.49, "eval_loss": 2.0856127738952637, "eval_runtime": 118.3087, "eval_samples_per_second": 103.872, "eval_steps_per_second": 6.5, "step": 634500 }, { "epoch": 96.16, "learning_rate": 3.397291196388262e-05, "loss": 1.7988, "step": 639000 }, { "epoch": 96.16, "eval_loss": 2.0870988368988037, "eval_runtime": 118.2923, "eval_samples_per_second": 103.887, "eval_steps_per_second": 6.501, "step": 639000 }, { "epoch": 96.84, "learning_rate": 3.386004514672686e-05, "loss": 1.794, "step": 643500 }, { "epoch": 96.84, "eval_loss": 2.092172861099243, "eval_runtime": 118.3047, "eval_samples_per_second": 103.876, "eval_steps_per_second": 6.5, "step": 643500 }, { "epoch": 97.52, "learning_rate": 3.374717832957111e-05, "loss": 1.7917, "step": 648000 }, { "epoch": 97.52, "eval_loss": 2.0861566066741943, "eval_runtime": 118.2327, "eval_samples_per_second": 103.939, "eval_steps_per_second": 6.504, "step": 648000 }, { "epoch": 98.19, "learning_rate": 3.3634311512415354e-05, "loss": 1.79, "step": 652500 }, { "epoch": 98.19, "eval_loss": 2.0844566822052, "eval_runtime": 118.1297, "eval_samples_per_second": 104.03, "eval_steps_per_second": 6.51, "step": 652500 }, { "epoch": 98.87, "learning_rate": 3.35214446952596e-05, "loss": 1.788, "step": 657000 }, { "epoch": 98.87, "eval_loss": 2.0832607746124268, "eval_runtime": 118.1145, "eval_samples_per_second": 104.043, "eval_steps_per_second": 6.511, "step": 657000 }, { "epoch": 99.55, "learning_rate": 3.3408577878103845e-05, "loss": 1.7833, "step": 661500 }, { "epoch": 99.55, "eval_loss": 2.082475185394287, "eval_runtime": 118.0929, "eval_samples_per_second": 104.062, "eval_steps_per_second": 6.512, "step": 661500 }, { "epoch": 100.23, "learning_rate": 3.3295711060948083e-05, "loss": 1.7821, "step": 666000 }, { "epoch": 100.23, "eval_loss": 2.0830888748168945, "eval_runtime": 118.1331, "eval_samples_per_second": 104.027, "eval_steps_per_second": 6.51, "step": 666000 }, { "epoch": 100.9, "learning_rate": 3.318284424379233e-05, "loss": 1.7809, "step": 670500 }, { "epoch": 100.9, "eval_loss": 2.080984115600586, "eval_runtime": 118.0711, "eval_samples_per_second": 104.081, "eval_steps_per_second": 6.513, "step": 670500 }, { "epoch": 101.58, "learning_rate": 3.3069977426636574e-05, "loss": 1.7757, "step": 675000 }, { "epoch": 101.58, "eval_loss": 2.078061819076538, "eval_runtime": 118.0799, "eval_samples_per_second": 104.074, "eval_steps_per_second": 6.513, "step": 675000 }, { "epoch": 102.26, "learning_rate": 3.295711060948081e-05, "loss": 1.7737, "step": 679500 }, { "epoch": 102.26, "eval_loss": 2.079832077026367, "eval_runtime": 118.3274, "eval_samples_per_second": 103.856, "eval_steps_per_second": 6.499, "step": 679500 }, { "epoch": 102.93, "learning_rate": 3.284424379232506e-05, "loss": 1.7738, "step": 684000 }, { "epoch": 102.93, "eval_loss": 2.0702972412109375, "eval_runtime": 118.0753, "eval_samples_per_second": 104.078, "eval_steps_per_second": 6.513, "step": 684000 }, { "epoch": 103.61, "learning_rate": 3.2731376975169304e-05, "loss": 1.7701, "step": 688500 }, { "epoch": 103.61, "eval_loss": 2.0662286281585693, "eval_runtime": 118.9742, "eval_samples_per_second": 103.291, "eval_steps_per_second": 6.464, "step": 688500 }, { "epoch": 104.29, "learning_rate": 3.261851015801354e-05, "loss": 1.7682, "step": 693000 }, { "epoch": 104.29, "eval_loss": 2.070204973220825, "eval_runtime": 118.3434, "eval_samples_per_second": 103.842, "eval_steps_per_second": 6.498, "step": 693000 }, { "epoch": 104.97, "learning_rate": 3.250564334085779e-05, "loss": 1.7669, "step": 697500 }, { "epoch": 104.97, "eval_loss": 2.063176155090332, "eval_runtime": 118.357, "eval_samples_per_second": 103.83, "eval_steps_per_second": 6.497, "step": 697500 }, { "epoch": 105.64, "learning_rate": 3.239277652370203e-05, "loss": 1.7616, "step": 702000 }, { "epoch": 105.64, "eval_loss": 2.067533254623413, "eval_runtime": 118.3349, "eval_samples_per_second": 103.849, "eval_steps_per_second": 6.499, "step": 702000 }, { "epoch": 106.32, "learning_rate": 3.227990970654628e-05, "loss": 1.7623, "step": 706500 }, { "epoch": 106.32, "eval_loss": 2.0670344829559326, "eval_runtime": 118.3389, "eval_samples_per_second": 103.846, "eval_steps_per_second": 6.498, "step": 706500 }, { "epoch": 107.0, "learning_rate": 3.216704288939052e-05, "loss": 1.7623, "step": 711000 }, { "epoch": 107.0, "eval_loss": 2.0591020584106445, "eval_runtime": 118.3407, "eval_samples_per_second": 103.844, "eval_steps_per_second": 6.498, "step": 711000 }, { "epoch": 107.67, "learning_rate": 3.205417607223476e-05, "loss": 1.7553, "step": 715500 }, { "epoch": 107.67, "eval_loss": 2.0580272674560547, "eval_runtime": 118.3269, "eval_samples_per_second": 103.856, "eval_steps_per_second": 6.499, "step": 715500 }, { "epoch": 108.35, "learning_rate": 3.194130925507901e-05, "loss": 1.753, "step": 720000 }, { "epoch": 108.35, "eval_loss": 2.0603325366973877, "eval_runtime": 118.3271, "eval_samples_per_second": 103.856, "eval_steps_per_second": 6.499, "step": 720000 }, { "epoch": 109.03, "learning_rate": 3.1828442437923254e-05, "loss": 1.7549, "step": 724500 }, { "epoch": 109.03, "eval_loss": 2.0661327838897705, "eval_runtime": 118.3395, "eval_samples_per_second": 103.845, "eval_steps_per_second": 6.498, "step": 724500 }, { "epoch": 109.71, "learning_rate": 3.17155756207675e-05, "loss": 1.7474, "step": 729000 }, { "epoch": 109.71, "eval_loss": 2.0576255321502686, "eval_runtime": 118.3567, "eval_samples_per_second": 103.83, "eval_steps_per_second": 6.497, "step": 729000 }, { "epoch": 110.38, "learning_rate": 3.1602708803611745e-05, "loss": 1.7504, "step": 733500 }, { "epoch": 110.38, "eval_loss": 2.0617053508758545, "eval_runtime": 118.3259, "eval_samples_per_second": 103.857, "eval_steps_per_second": 6.499, "step": 733500 }, { "epoch": 111.06, "learning_rate": 3.148984198645598e-05, "loss": 1.7464, "step": 738000 }, { "epoch": 111.06, "eval_loss": 2.049917459487915, "eval_runtime": 118.3445, "eval_samples_per_second": 103.841, "eval_steps_per_second": 6.498, "step": 738000 }, { "epoch": 111.74, "learning_rate": 3.137697516930023e-05, "loss": 1.7432, "step": 742500 }, { "epoch": 111.74, "eval_loss": 2.056652069091797, "eval_runtime": 118.3587, "eval_samples_per_second": 103.828, "eval_steps_per_second": 6.497, "step": 742500 }, { "epoch": 112.42, "learning_rate": 3.1264108352144474e-05, "loss": 1.7404, "step": 747000 }, { "epoch": 112.42, "eval_loss": 2.0593619346618652, "eval_runtime": 118.3421, "eval_samples_per_second": 103.843, "eval_steps_per_second": 6.498, "step": 747000 }, { "epoch": 113.09, "learning_rate": 3.115124153498871e-05, "loss": 1.7438, "step": 751500 }, { "epoch": 113.09, "eval_loss": 2.045955181121826, "eval_runtime": 118.325, "eval_samples_per_second": 103.858, "eval_steps_per_second": 6.499, "step": 751500 }, { "epoch": 113.77, "learning_rate": 3.103837471783296e-05, "loss": 1.7375, "step": 756000 }, { "epoch": 113.77, "eval_loss": 2.055434465408325, "eval_runtime": 118.3498, "eval_samples_per_second": 103.836, "eval_steps_per_second": 6.498, "step": 756000 }, { "epoch": 114.45, "learning_rate": 3.0925507900677204e-05, "loss": 1.7336, "step": 760500 }, { "epoch": 114.45, "eval_loss": 2.040349006652832, "eval_runtime": 118.3546, "eval_samples_per_second": 103.832, "eval_steps_per_second": 6.497, "step": 760500 }, { "epoch": 115.12, "learning_rate": 3.081264108352145e-05, "loss": 1.7344, "step": 765000 }, { "epoch": 115.12, "eval_loss": 2.054170846939087, "eval_runtime": 118.3514, "eval_samples_per_second": 103.835, "eval_steps_per_second": 6.498, "step": 765000 }, { "epoch": 115.8, "learning_rate": 3.069977426636569e-05, "loss": 1.7324, "step": 769500 }, { "epoch": 115.8, "eval_loss": 2.042388439178467, "eval_runtime": 118.3282, "eval_samples_per_second": 103.855, "eval_steps_per_second": 6.499, "step": 769500 }, { "epoch": 116.48, "learning_rate": 3.058690744920993e-05, "loss": 1.7255, "step": 774000 }, { "epoch": 116.48, "eval_loss": 2.0501296520233154, "eval_runtime": 118.1548, "eval_samples_per_second": 104.008, "eval_steps_per_second": 6.508, "step": 774000 } ], "max_steps": 1993500, "num_train_epochs": 300, "total_flos": 6.520019673893634e+18, "trial_name": null, "trial_params": null }