{ "best_metric": null, "best_model_checkpoint": null, "epoch": 49.53191489361702, "eval_steps": 500, "global_step": 337, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "learning_rate": 9.999918433243253e-06, "loss": 2.0691, "step": 1 }, { "epoch": 0.43, "learning_rate": 9.997960964140946e-06, "loss": 2.078, "step": 5 }, { "epoch": 0.51, "eval_loss": 2.030493974685669, "eval_runtime": 3.9443, "eval_samples_per_second": 21.043, "eval_steps_per_second": 1.521, "step": 6 }, { "epoch": 1.28, "learning_rate": 9.991845519630679e-06, "loss": 2.0322, "step": 10 }, { "epoch": 1.53, "eval_loss": 1.952285647392273, "eval_runtime": 3.904, "eval_samples_per_second": 21.26, "eval_steps_per_second": 1.537, "step": 13 }, { "epoch": 2.13, "learning_rate": 9.981658654313458e-06, "loss": 1.9849, "step": 15 }, { "epoch": 2.55, "learning_rate": 9.96740867674275e-06, "loss": 1.9301, "step": 20 }, { "epoch": 2.55, "eval_loss": 1.8813837766647339, "eval_runtime": 3.9106, "eval_samples_per_second": 21.224, "eval_steps_per_second": 1.534, "step": 20 }, { "epoch": 3.4, "learning_rate": 9.949107209404664e-06, "loss": 1.8757, "step": 25 }, { "epoch": 3.57, "eval_loss": 1.8208731412887573, "eval_runtime": 3.8907, "eval_samples_per_second": 21.333, "eval_steps_per_second": 1.542, "step": 27 }, { "epoch": 4.26, "learning_rate": 9.926769179238467e-06, "loss": 1.841, "step": 30 }, { "epoch": 4.51, "eval_loss": 1.7722175121307373, "eval_runtime": 3.8932, "eval_samples_per_second": 21.319, "eval_steps_per_second": 1.541, "step": 33 }, { "epoch": 5.11, "learning_rate": 9.900412805461968e-06, "loss": 1.7965, "step": 35 }, { "epoch": 5.53, "learning_rate": 9.870059584711668e-06, "loss": 1.7661, "step": 40 }, { "epoch": 5.53, "eval_loss": 1.7291498184204102, "eval_runtime": 3.8915, "eval_samples_per_second": 21.329, "eval_steps_per_second": 1.542, "step": 40 }, { "epoch": 6.38, "learning_rate": 9.835734273509787e-06, "loss": 1.731, "step": 45 }, { "epoch": 6.55, "eval_loss": 1.6903846263885498, "eval_runtime": 3.9149, "eval_samples_per_second": 21.201, "eval_steps_per_second": 1.533, "step": 47 }, { "epoch": 7.23, "learning_rate": 9.797464868072489e-06, "loss": 1.713, "step": 50 }, { "epoch": 7.57, "eval_loss": 1.6531494855880737, "eval_runtime": 3.8992, "eval_samples_per_second": 21.286, "eval_steps_per_second": 1.539, "step": 54 }, { "epoch": 8.09, "learning_rate": 9.755282581475769e-06, "loss": 1.6854, "step": 55 }, { "epoch": 8.51, "learning_rate": 9.709221818197626e-06, "loss": 1.6557, "step": 60 }, { "epoch": 8.51, "eval_loss": 1.6243276596069336, "eval_runtime": 3.917, "eval_samples_per_second": 21.19, "eval_steps_per_second": 1.532, "step": 60 }, { "epoch": 9.36, "learning_rate": 9.659320146057263e-06, "loss": 1.6319, "step": 65 }, { "epoch": 9.53, "eval_loss": 1.5888899564743042, "eval_runtime": 3.9117, "eval_samples_per_second": 21.218, "eval_steps_per_second": 1.534, "step": 67 }, { "epoch": 10.21, "learning_rate": 9.60561826557425e-06, "loss": 1.5989, "step": 70 }, { "epoch": 10.55, "eval_loss": 1.5500489473342896, "eval_runtime": 3.8961, "eval_samples_per_second": 21.303, "eval_steps_per_second": 1.54, "step": 74 }, { "epoch": 11.06, "learning_rate": 9.548159976772593e-06, "loss": 1.581, "step": 75 }, { "epoch": 11.49, "learning_rate": 9.486992143456792e-06, "loss": 1.5556, "step": 80 }, { "epoch": 11.57, "eval_loss": 1.5097848176956177, "eval_runtime": 3.8656, "eval_samples_per_second": 21.471, "eval_steps_per_second": 1.552, "step": 81 }, { "epoch": 12.34, "learning_rate": 9.422164654989073e-06, "loss": 1.5165, "step": 85 }, { "epoch": 12.51, "eval_loss": 1.4754232168197632, "eval_runtime": 3.8958, "eval_samples_per_second": 21.305, "eval_steps_per_second": 1.54, "step": 87 }, { "epoch": 13.19, "learning_rate": 9.353730385598887e-06, "loss": 1.4945, "step": 90 }, { "epoch": 13.53, "eval_loss": 1.4281667470932007, "eval_runtime": 3.8886, "eval_samples_per_second": 21.345, "eval_steps_per_second": 1.543, "step": 94 }, { "epoch": 14.04, "learning_rate": 9.281745151257946e-06, "loss": 1.4578, "step": 95 }, { "epoch": 14.47, "learning_rate": 9.206267664155906e-06, "loss": 1.4198, "step": 100 }, { "epoch": 14.55, "eval_loss": 1.377801775932312, "eval_runtime": 3.8937, "eval_samples_per_second": 21.317, "eval_steps_per_second": 1.541, "step": 101 }, { "epoch": 15.32, "learning_rate": 9.12735948481387e-06, "loss": 1.3823, "step": 105 }, { "epoch": 15.57, "eval_loss": 1.3291016817092896, "eval_runtime": 3.8883, "eval_samples_per_second": 21.346, "eval_steps_per_second": 1.543, "step": 108 }, { "epoch": 16.17, "learning_rate": 9.045084971874738e-06, "loss": 1.3576, "step": 110 }, { "epoch": 16.51, "eval_loss": 1.2924872636795044, "eval_runtime": 3.9257, "eval_samples_per_second": 21.143, "eval_steps_per_second": 1.528, "step": 114 }, { "epoch": 17.02, "learning_rate": 8.959511229611377e-06, "loss": 1.3142, "step": 115 }, { "epoch": 17.45, "learning_rate": 8.870708053195414e-06, "loss": 1.2917, "step": 120 }, { "epoch": 17.53, "eval_loss": 1.25697660446167, "eval_runtime": 3.9218, "eval_samples_per_second": 21.163, "eval_steps_per_second": 1.53, "step": 121 }, { "epoch": 18.3, "learning_rate": 8.778747871771293e-06, "loss": 1.2599, "step": 125 }, { "epoch": 18.55, "eval_loss": 1.2282600402832031, "eval_runtime": 3.9116, "eval_samples_per_second": 21.219, "eval_steps_per_second": 1.534, "step": 128 }, { "epoch": 19.15, "learning_rate": 8.683705689382025e-06, "loss": 1.2529, "step": 130 }, { "epoch": 19.57, "learning_rate": 8.585659023794818e-06, "loss": 1.2257, "step": 135 }, { "epoch": 19.57, "eval_loss": 1.203251838684082, "eval_runtime": 3.8892, "eval_samples_per_second": 21.341, "eval_steps_per_second": 1.543, "step": 135 }, { "epoch": 20.43, "learning_rate": 8.48468784327647e-06, "loss": 1.2123, "step": 140 }, { "epoch": 20.51, "eval_loss": 1.1904693841934204, "eval_runtime": 3.8787, "eval_samples_per_second": 21.399, "eval_steps_per_second": 1.547, "step": 141 }, { "epoch": 21.28, "learning_rate": 8.380874501370098e-06, "loss": 1.1966, "step": 145 }, { "epoch": 21.53, "eval_loss": 1.172432541847229, "eval_runtime": 3.8728, "eval_samples_per_second": 21.432, "eval_steps_per_second": 1.549, "step": 148 }, { "epoch": 22.13, "learning_rate": 8.274303669726427e-06, "loss": 1.1938, "step": 150 }, { "epoch": 22.55, "learning_rate": 8.165062269044353e-06, "loss": 1.1694, "step": 155 }, { "epoch": 22.55, "eval_loss": 1.1592084169387817, "eval_runtime": 3.8798, "eval_samples_per_second": 21.393, "eval_steps_per_second": 1.546, "step": 155 }, { "epoch": 23.4, "learning_rate": 8.053239398177191e-06, "loss": 1.1665, "step": 160 }, { "epoch": 23.57, "eval_loss": 1.1471155881881714, "eval_runtime": 3.891, "eval_samples_per_second": 21.331, "eval_steps_per_second": 1.542, "step": 162 }, { "epoch": 24.26, "learning_rate": 7.938926261462366e-06, "loss": 1.1559, "step": 165 }, { "epoch": 24.51, "eval_loss": 1.1368687152862549, "eval_runtime": 3.9159, "eval_samples_per_second": 21.196, "eval_steps_per_second": 1.532, "step": 168 }, { "epoch": 25.11, "learning_rate": 7.822216094333847e-06, "loss": 1.1552, "step": 170 }, { "epoch": 25.53, "learning_rate": 7.703204087277989e-06, "loss": 1.1383, "step": 175 }, { "epoch": 25.53, "eval_loss": 1.128839373588562, "eval_runtime": 3.89, "eval_samples_per_second": 21.337, "eval_steps_per_second": 1.542, "step": 175 }, { "epoch": 26.38, "learning_rate": 7.5819873081948105e-06, "loss": 1.141, "step": 180 }, { "epoch": 26.55, "eval_loss": 1.1200487613677979, "eval_runtime": 3.8914, "eval_samples_per_second": 21.329, "eval_steps_per_second": 1.542, "step": 182 }, { "epoch": 27.23, "learning_rate": 7.45866462322802e-06, "loss": 1.1334, "step": 185 }, { "epoch": 27.57, "eval_loss": 1.1138092279434204, "eval_runtime": 3.8814, "eval_samples_per_second": 21.384, "eval_steps_per_second": 1.546, "step": 189 }, { "epoch": 28.09, "learning_rate": 7.333336616128369e-06, "loss": 1.1201, "step": 190 }, { "epoch": 28.51, "learning_rate": 7.206105506216107e-06, "loss": 1.1193, "step": 195 }, { "epoch": 28.51, "eval_loss": 1.1078517436981201, "eval_runtime": 3.9032, "eval_samples_per_second": 21.265, "eval_steps_per_second": 1.537, "step": 195 }, { "epoch": 29.36, "learning_rate": 7.0770750650094335e-06, "loss": 1.1079, "step": 200 }, { "epoch": 29.53, "eval_loss": 1.1015825271606445, "eval_runtime": 3.9112, "eval_samples_per_second": 21.221, "eval_steps_per_second": 1.534, "step": 202 }, { "epoch": 30.21, "learning_rate": 6.946350531586959e-06, "loss": 1.1188, "step": 205 }, { "epoch": 30.55, "eval_loss": 1.0960975885391235, "eval_runtime": 3.9135, "eval_samples_per_second": 21.209, "eval_steps_per_second": 1.533, "step": 209 }, { "epoch": 31.06, "learning_rate": 6.814038526753205e-06, "loss": 1.1013, "step": 210 }, { "epoch": 31.49, "learning_rate": 6.680246966077151e-06, "loss": 1.1006, "step": 215 }, { "epoch": 31.57, "eval_loss": 1.0916202068328857, "eval_runtime": 3.8812, "eval_samples_per_second": 21.385, "eval_steps_per_second": 1.546, "step": 216 }, { "epoch": 32.34, "learning_rate": 6.545084971874738e-06, "loss": 1.1016, "step": 220 }, { "epoch": 32.51, "eval_loss": 1.0850551128387451, "eval_runtime": 3.897, "eval_samples_per_second": 21.298, "eval_steps_per_second": 1.54, "step": 222 }, { "epoch": 33.19, "learning_rate": 6.408662784207149e-06, "loss": 1.0801, "step": 225 }, { "epoch": 33.53, "eval_loss": 1.078303337097168, "eval_runtime": 3.8819, "eval_samples_per_second": 21.381, "eval_steps_per_second": 1.546, "step": 229 }, { "epoch": 34.04, "learning_rate": 6.271091670967437e-06, "loss": 1.0942, "step": 230 }, { "epoch": 34.47, "learning_rate": 6.132483837128823e-06, "loss": 1.0846, "step": 235 }, { "epoch": 34.55, "eval_loss": 1.075777292251587, "eval_runtime": 3.9038, "eval_samples_per_second": 21.261, "eval_steps_per_second": 1.537, "step": 236 }, { "epoch": 35.32, "learning_rate": 5.9929523332287275e-06, "loss": 1.0828, "step": 240 }, { "epoch": 35.57, "eval_loss": 1.0725317001342773, "eval_runtime": 3.9021, "eval_samples_per_second": 21.271, "eval_steps_per_second": 1.538, "step": 243 }, { "epoch": 36.17, "learning_rate": 5.85261096316312e-06, "loss": 1.0758, "step": 245 }, { "epoch": 36.51, "eval_loss": 1.0693665742874146, "eval_runtime": 3.9189, "eval_samples_per_second": 21.179, "eval_steps_per_second": 1.531, "step": 249 }, { "epoch": 37.02, "learning_rate": 5.711574191366427e-06, "loss": 1.0707, "step": 250 }, { "epoch": 37.45, "learning_rate": 5.569957049452703e-06, "loss": 1.0749, "step": 255 }, { "epoch": 37.53, "eval_loss": 1.0646324157714844, "eval_runtime": 3.9232, "eval_samples_per_second": 21.156, "eval_steps_per_second": 1.529, "step": 256 }, { "epoch": 38.3, "learning_rate": 5.4278750423942e-06, "loss": 1.0626, "step": 260 }, { "epoch": 38.55, "eval_loss": 1.0626643896102905, "eval_runtime": 3.9102, "eval_samples_per_second": 21.227, "eval_steps_per_second": 1.534, "step": 263 }, { "epoch": 39.15, "learning_rate": 5.285444054313841e-06, "loss": 1.0698, "step": 265 }, { "epoch": 39.57, "learning_rate": 5.142780253968481e-06, "loss": 1.0575, "step": 270 }, { "epoch": 39.57, "eval_loss": 1.059247612953186, "eval_runtime": 3.9014, "eval_samples_per_second": 21.274, "eval_steps_per_second": 1.538, "step": 270 }, { "epoch": 40.43, "learning_rate": 5e-06, "loss": 1.0583, "step": 275 }, { "epoch": 40.51, "eval_loss": 1.0554794073104858, "eval_runtime": 3.924, "eval_samples_per_second": 21.152, "eval_steps_per_second": 1.529, "step": 276 }, { "epoch": 41.28, "learning_rate": 4.85721974603152e-06, "loss": 1.0548, "step": 280 }, { "epoch": 41.53, "eval_loss": 1.051832914352417, "eval_runtime": 3.9145, "eval_samples_per_second": 21.203, "eval_steps_per_second": 1.533, "step": 283 }, { "epoch": 42.13, "learning_rate": 4.71455594568616e-06, "loss": 1.0506, "step": 285 }, { "epoch": 42.55, "learning_rate": 4.572124957605803e-06, "loss": 1.0495, "step": 290 }, { "epoch": 42.55, "eval_loss": 1.0468100309371948, "eval_runtime": 3.9072, "eval_samples_per_second": 21.243, "eval_steps_per_second": 1.536, "step": 290 }, { "epoch": 43.4, "learning_rate": 4.430042950547298e-06, "loss": 1.0449, "step": 295 }, { "epoch": 43.57, "eval_loss": 1.0469093322753906, "eval_runtime": 3.8777, "eval_samples_per_second": 21.404, "eval_steps_per_second": 1.547, "step": 297 }, { "epoch": 44.26, "learning_rate": 4.2884258086335755e-06, "loss": 1.0527, "step": 300 }, { "epoch": 44.51, "eval_loss": 1.041974663734436, "eval_runtime": 3.9068, "eval_samples_per_second": 21.245, "eval_steps_per_second": 1.536, "step": 303 }, { "epoch": 45.11, "learning_rate": 4.147389036836881e-06, "loss": 1.0413, "step": 305 }, { "epoch": 45.53, "learning_rate": 4.007047666771274e-06, "loss": 1.0411, "step": 310 }, { "epoch": 45.53, "eval_loss": 1.041487693786621, "eval_runtime": 3.9044, "eval_samples_per_second": 21.258, "eval_steps_per_second": 1.537, "step": 310 }, { "epoch": 46.38, "learning_rate": 3.867516162871177e-06, "loss": 1.0325, "step": 315 }, { "epoch": 46.55, "eval_loss": 1.0384427309036255, "eval_runtime": 3.8991, "eval_samples_per_second": 21.287, "eval_steps_per_second": 1.539, "step": 317 }, { "epoch": 47.23, "learning_rate": 3.7289083290325668e-06, "loss": 1.0404, "step": 320 }, { "epoch": 47.57, "eval_loss": 1.0352696180343628, "eval_runtime": 3.8837, "eval_samples_per_second": 21.371, "eval_steps_per_second": 1.545, "step": 324 }, { "epoch": 48.09, "learning_rate": 3.5913372157928515e-06, "loss": 1.034, "step": 325 }, { "epoch": 48.51, "learning_rate": 3.4549150281252635e-06, "loss": 1.0326, "step": 330 }, { "epoch": 48.51, "eval_loss": 1.033745527267456, "eval_runtime": 3.9032, "eval_samples_per_second": 21.265, "eval_steps_per_second": 1.537, "step": 330 }, { "epoch": 49.36, "learning_rate": 3.319753033922849e-06, "loss": 1.0262, "step": 335 }, { "epoch": 49.53, "eval_loss": 1.0316756963729858, "eval_runtime": 3.8951, "eval_samples_per_second": 21.309, "eval_steps_per_second": 1.54, "step": 337 }, { "epoch": 49.53, "step": 337, "total_flos": 7152558019706880.0, "train_loss": 1.3011242423637683, "train_runtime": 6393.7569, "train_samples_per_second": 5.834, "train_steps_per_second": 0.086 } ], "logging_steps": 5, "max_steps": 550, "num_train_epochs": 50, "save_steps": 500, "total_flos": 7152558019706880.0, "trial_name": null, "trial_params": null }