{ "best_metric": 0.13464748859405518, "best_model_checkpoint": "./lora-out/checkpoint-204", "epoch": 14.442477876106194, "eval_steps": 4, "global_step": 204, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.28, "eval_loss": 0.3006412982940674, "eval_runtime": 54.7333, "eval_samples_per_second": 0.914, "eval_steps_per_second": 0.238, "step": 4 }, { "epoch": 0.57, "eval_loss": 0.300335168838501, "eval_runtime": 54.8113, "eval_samples_per_second": 0.912, "eval_steps_per_second": 0.237, "step": 8 }, { "epoch": 0.71, "learning_rate": 1e-05, "loss": 0.3024, "step": 10 }, { "epoch": 0.85, "eval_loss": 0.2993900179862976, "eval_runtime": 54.782, "eval_samples_per_second": 0.913, "eval_steps_per_second": 0.237, "step": 12 }, { "epoch": 1.13, "eval_loss": 0.29816487431526184, "eval_runtime": 54.8049, "eval_samples_per_second": 0.912, "eval_steps_per_second": 0.237, "step": 16 }, { "epoch": 1.42, "learning_rate": 2e-05, "loss": 0.3035, "step": 20 }, { "epoch": 1.42, "eval_loss": 0.29595255851745605, "eval_runtime": 54.7885, "eval_samples_per_second": 0.913, "eval_steps_per_second": 0.237, "step": 20 }, { "epoch": 1.7, "eval_loss": 0.2939557135105133, "eval_runtime": 54.7999, "eval_samples_per_second": 0.912, "eval_steps_per_second": 0.237, "step": 24 }, { "epoch": 1.98, "eval_loss": 0.29013773798942566, "eval_runtime": 54.7805, "eval_samples_per_second": 0.913, "eval_steps_per_second": 0.237, "step": 28 }, { "epoch": 2.12, "learning_rate": 3e-05, "loss": 0.2959, "step": 30 }, { "epoch": 2.27, "eval_loss": 0.28251081705093384, "eval_runtime": 54.7706, "eval_samples_per_second": 0.913, "eval_steps_per_second": 0.237, "step": 32 }, { "epoch": 2.55, "eval_loss": 0.2771329879760742, "eval_runtime": 54.7818, "eval_samples_per_second": 0.913, "eval_steps_per_second": 0.237, "step": 36 }, { "epoch": 2.83, "learning_rate": 4e-05, "loss": 0.284, "step": 40 }, { "epoch": 2.83, "eval_loss": 0.27146488428115845, "eval_runtime": 54.803, "eval_samples_per_second": 0.912, "eval_steps_per_second": 0.237, "step": 40 }, { "epoch": 3.12, "eval_loss": 0.26464152336120605, "eval_runtime": 54.8467, "eval_samples_per_second": 0.912, "eval_steps_per_second": 0.237, "step": 44 }, { "epoch": 3.4, "eval_loss": 0.25653430819511414, "eval_runtime": 54.8327, "eval_samples_per_second": 0.912, "eval_steps_per_second": 0.237, "step": 48 }, { "epoch": 3.54, "learning_rate": 5e-05, "loss": 0.263, "step": 50 }, { "epoch": 3.68, "eval_loss": 0.24627122282981873, "eval_runtime": 54.813, "eval_samples_per_second": 0.912, "eval_steps_per_second": 0.237, "step": 52 }, { "epoch": 3.96, "eval_loss": 0.23474617302417755, "eval_runtime": 54.7901, "eval_samples_per_second": 0.913, "eval_steps_per_second": 0.237, "step": 56 }, { "epoch": 4.25, "learning_rate": 6e-05, "loss": 0.241, "step": 60 }, { "epoch": 4.25, "eval_loss": 0.2220366895198822, "eval_runtime": 54.7983, "eval_samples_per_second": 0.912, "eval_steps_per_second": 0.237, "step": 60 }, { "epoch": 4.53, "eval_loss": 0.20926769077777863, "eval_runtime": 54.7403, "eval_samples_per_second": 0.913, "eval_steps_per_second": 0.237, "step": 64 }, { "epoch": 4.81, "eval_loss": 0.19629451632499695, "eval_runtime": 54.7525, "eval_samples_per_second": 0.913, "eval_steps_per_second": 0.237, "step": 68 }, { "epoch": 4.96, "learning_rate": 7e-05, "loss": 0.2101, "step": 70 }, { "epoch": 5.1, "eval_loss": 0.18524658679962158, "eval_runtime": 54.7303, "eval_samples_per_second": 0.914, "eval_steps_per_second": 0.238, "step": 72 }, { "epoch": 5.38, "eval_loss": 0.17731742560863495, "eval_runtime": 54.7552, "eval_samples_per_second": 0.913, "eval_steps_per_second": 0.237, "step": 76 }, { "epoch": 5.66, "learning_rate": 8e-05, "loss": 0.1788, "step": 80 }, { "epoch": 5.66, "eval_loss": 0.16993452608585358, "eval_runtime": 54.729, "eval_samples_per_second": 0.914, "eval_steps_per_second": 0.238, "step": 80 }, { "epoch": 5.95, "eval_loss": 0.164781853556633, "eval_runtime": 54.741, "eval_samples_per_second": 0.913, "eval_steps_per_second": 0.237, "step": 84 }, { "epoch": 6.23, "eval_loss": 0.16103117167949677, "eval_runtime": 54.7837, "eval_samples_per_second": 0.913, "eval_steps_per_second": 0.237, "step": 88 }, { "epoch": 6.37, "learning_rate": 9e-05, "loss": 0.1615, "step": 90 }, { "epoch": 6.51, "eval_loss": 0.15781742334365845, "eval_runtime": 54.7138, "eval_samples_per_second": 0.914, "eval_steps_per_second": 0.238, "step": 92 }, { "epoch": 6.8, "eval_loss": 0.15516981482505798, "eval_runtime": 54.7516, "eval_samples_per_second": 0.913, "eval_steps_per_second": 0.237, "step": 96 }, { "epoch": 7.08, "learning_rate": 0.0001, "loss": 0.1533, "step": 100 }, { "epoch": 7.08, "eval_loss": 0.15261690318584442, "eval_runtime": 54.6891, "eval_samples_per_second": 0.914, "eval_steps_per_second": 0.238, "step": 100 }, { "epoch": 7.36, "eval_loss": 0.15066812932491302, "eval_runtime": 54.6884, "eval_samples_per_second": 0.914, "eval_steps_per_second": 0.238, "step": 104 }, { "epoch": 7.65, "eval_loss": 0.14893724024295807, "eval_runtime": 54.6275, "eval_samples_per_second": 0.915, "eval_steps_per_second": 0.238, "step": 108 }, { "epoch": 7.79, "learning_rate": 9.090909090909092e-05, "loss": 0.1463, "step": 110 }, { "epoch": 7.93, "eval_loss": 0.14742153882980347, "eval_runtime": 54.6174, "eval_samples_per_second": 0.915, "eval_steps_per_second": 0.238, "step": 112 }, { "epoch": 8.21, "eval_loss": 0.14575307071208954, "eval_runtime": 54.6366, "eval_samples_per_second": 0.915, "eval_steps_per_second": 0.238, "step": 116 }, { "epoch": 8.5, "learning_rate": 8.181818181818183e-05, "loss": 0.1399, "step": 120 }, { "epoch": 8.5, "eval_loss": 0.14450186491012573, "eval_runtime": 54.6303, "eval_samples_per_second": 0.915, "eval_steps_per_second": 0.238, "step": 120 }, { "epoch": 8.78, "eval_loss": 0.1431863009929657, "eval_runtime": 54.6358, "eval_samples_per_second": 0.915, "eval_steps_per_second": 0.238, "step": 124 }, { "epoch": 9.06, "eval_loss": 0.1424635797739029, "eval_runtime": 54.6449, "eval_samples_per_second": 0.915, "eval_steps_per_second": 0.238, "step": 128 }, { "epoch": 9.2, "learning_rate": 7.272727272727273e-05, "loss": 0.1357, "step": 130 }, { "epoch": 9.35, "eval_loss": 0.14175941050052643, "eval_runtime": 54.6307, "eval_samples_per_second": 0.915, "eval_steps_per_second": 0.238, "step": 132 }, { "epoch": 9.63, "eval_loss": 0.14105737209320068, "eval_runtime": 54.6121, "eval_samples_per_second": 0.916, "eval_steps_per_second": 0.238, "step": 136 }, { "epoch": 9.91, "learning_rate": 6.363636363636364e-05, "loss": 0.1322, "step": 140 }, { "epoch": 9.91, "eval_loss": 0.14027251303195953, "eval_runtime": 54.6594, "eval_samples_per_second": 0.915, "eval_steps_per_second": 0.238, "step": 140 }, { "epoch": 10.19, "eval_loss": 0.13963991403579712, "eval_runtime": 54.6378, "eval_samples_per_second": 0.915, "eval_steps_per_second": 0.238, "step": 144 }, { "epoch": 10.48, "eval_loss": 0.138994500041008, "eval_runtime": 54.6115, "eval_samples_per_second": 0.916, "eval_steps_per_second": 0.238, "step": 148 }, { "epoch": 10.62, "learning_rate": 5.4545454545454546e-05, "loss": 0.1355, "step": 150 }, { "epoch": 10.76, "eval_loss": 0.13857363164424896, "eval_runtime": 54.6622, "eval_samples_per_second": 0.915, "eval_steps_per_second": 0.238, "step": 152 }, { "epoch": 11.04, "eval_loss": 0.13809233903884888, "eval_runtime": 54.7824, "eval_samples_per_second": 0.913, "eval_steps_per_second": 0.237, "step": 156 }, { "epoch": 11.33, "learning_rate": 4.545454545454546e-05, "loss": 0.1216, "step": 160 }, { "epoch": 11.33, "eval_loss": 0.137764573097229, "eval_runtime": 54.6049, "eval_samples_per_second": 0.916, "eval_steps_per_second": 0.238, "step": 160 }, { "epoch": 11.61, "eval_loss": 0.1369408369064331, "eval_runtime": 54.6188, "eval_samples_per_second": 0.915, "eval_steps_per_second": 0.238, "step": 164 }, { "epoch": 11.89, "eval_loss": 0.13684938848018646, "eval_runtime": 54.5949, "eval_samples_per_second": 0.916, "eval_steps_per_second": 0.238, "step": 168 }, { "epoch": 12.04, "learning_rate": 3.6363636363636364e-05, "loss": 0.1265, "step": 170 }, { "epoch": 12.18, "eval_loss": 0.1366124004125595, "eval_runtime": 54.5928, "eval_samples_per_second": 0.916, "eval_steps_per_second": 0.238, "step": 172 }, { "epoch": 12.46, "eval_loss": 0.1361435353755951, "eval_runtime": 54.6703, "eval_samples_per_second": 0.915, "eval_steps_per_second": 0.238, "step": 176 }, { "epoch": 12.74, "learning_rate": 2.7272727272727273e-05, "loss": 0.127, "step": 180 }, { "epoch": 12.74, "eval_loss": 0.13553684949874878, "eval_runtime": 54.6232, "eval_samples_per_second": 0.915, "eval_steps_per_second": 0.238, "step": 180 }, { "epoch": 13.03, "eval_loss": 0.13531364500522614, "eval_runtime": 54.7712, "eval_samples_per_second": 0.913, "eval_steps_per_second": 0.237, "step": 184 }, { "epoch": 13.31, "eval_loss": 0.1353050172328949, "eval_runtime": 54.8052, "eval_samples_per_second": 0.912, "eval_steps_per_second": 0.237, "step": 188 }, { "epoch": 13.45, "learning_rate": 1.8181818181818182e-05, "loss": 0.1233, "step": 190 }, { "epoch": 13.59, "eval_loss": 0.1348796784877777, "eval_runtime": 54.5607, "eval_samples_per_second": 0.916, "eval_steps_per_second": 0.238, "step": 192 }, { "epoch": 13.88, "eval_loss": 0.13486500084400177, "eval_runtime": 54.5618, "eval_samples_per_second": 0.916, "eval_steps_per_second": 0.238, "step": 196 }, { "epoch": 14.16, "learning_rate": 9.090909090909091e-06, "loss": 0.1189, "step": 200 }, { "epoch": 14.16, "eval_loss": 0.13471217453479767, "eval_runtime": 54.6065, "eval_samples_per_second": 0.916, "eval_steps_per_second": 0.238, "step": 200 }, { "epoch": 14.44, "eval_loss": 0.13464748859405518, "eval_runtime": 54.5803, "eval_samples_per_second": 0.916, "eval_steps_per_second": 0.238, "step": 204 } ], "logging_steps": 10, "max_steps": 210, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 12, "total_flos": 6.711268353280573e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }