{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.201000834028357, "global_step": 60, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 2e-05, "loss": 1.7915, "step": 1 }, { "epoch": 0.04, "learning_rate": 4e-05, "loss": 1.7449, "step": 2 }, { "epoch": 0.06, "learning_rate": 6e-05, "loss": 1.7538, "step": 3 }, { "epoch": 0.08, "learning_rate": 8e-05, "loss": 1.713, "step": 4 }, { "epoch": 0.1, "learning_rate": 0.0001, "loss": 1.7313, "step": 5 }, { "epoch": 0.1, "eval_loss": 1.6295605897903442, "eval_runtime": 6.0457, "eval_samples_per_second": 0.331, "eval_steps_per_second": 0.331, "step": 5 }, { "epoch": 0.12, "learning_rate": 0.00012, "loss": 1.8184, "step": 6 }, { "epoch": 0.14, "learning_rate": 0.00014, "loss": 1.7586, "step": 7 }, { "epoch": 0.16, "learning_rate": 0.00016, "loss": 1.8926, "step": 8 }, { "epoch": 0.18, "learning_rate": 0.00018, "loss": 1.9176, "step": 9 }, { "epoch": 0.2, "learning_rate": 0.0002, "loss": 1.584, "step": 10 }, { "epoch": 0.2, "eval_loss": 1.606095790863037, "eval_runtime": 6.0412, "eval_samples_per_second": 0.331, "eval_steps_per_second": 0.331, "step": 10 }, { "epoch": 0.22, "learning_rate": 0.00019997370884991842, "loss": 1.2008, "step": 11 }, { "epoch": 0.24, "learning_rate": 0.00019989484922416502, "loss": 1.2531, "step": 12 }, { "epoch": 0.26, "learning_rate": 0.00019976346258894503, "loss": 1.6787, "step": 13 }, { "epoch": 0.28, "learning_rate": 0.00019957961803037326, "loss": 1.7146, "step": 14 }, { "epoch": 0.3, "learning_rate": 0.00019934341221814739, "loss": 1.6937, "step": 15 }, { "epoch": 0.3, "eval_loss": 1.5841087102890015, "eval_runtime": 6.0341, "eval_samples_per_second": 0.331, "eval_steps_per_second": 0.331, "step": 15 }, { "epoch": 0.32, "learning_rate": 0.00019905496935471658, "loss": 1.7531, "step": 16 }, { "epoch": 0.34, "learning_rate": 0.0001987144411099731, "loss": 1.7094, "step": 17 }, { "epoch": 0.36, "learning_rate": 0.00019832200654150076, "loss": 1.7333, "step": 18 }, { "epoch": 0.38, "learning_rate": 0.00019787787200042223, "loss": 1.7736, "step": 19 }, { "epoch": 0.4, "learning_rate": 0.0001973822710228951, "loss": 1.6655, "step": 20 }, { "epoch": 0.4, "eval_loss": 1.5755609273910522, "eval_runtime": 6.0389, "eval_samples_per_second": 0.331, "eval_steps_per_second": 0.331, "step": 20 }, { "epoch": 0.42, "learning_rate": 0.0001968354642073129, "loss": 1.8054, "step": 21 }, { "epoch": 0.44, "learning_rate": 0.00019623773907727682, "loss": 1.8327, "step": 22 }, { "epoch": 0.46, "learning_rate": 0.00019558940993040885, "loss": 1.3822, "step": 23 }, { "epoch": 0.48, "learning_rate": 0.00019489081767308698, "loss": 1.254, "step": 24 }, { "epoch": 0.5, "learning_rate": 0.00019414232964118892, "loss": 1.7648, "step": 25 }, { "epoch": 0.5, "eval_loss": 1.5711314678192139, "eval_runtime": 6.0254, "eval_samples_per_second": 0.332, "eval_steps_per_second": 0.332, "step": 25 }, { "epoch": 0.52, "learning_rate": 0.0001933443394069383, "loss": 1.641, "step": 26 }, { "epoch": 0.54, "learning_rate": 0.00019249726657195532, "loss": 1.7777, "step": 27 }, { "epoch": 0.56, "learning_rate": 0.00019160155654662076, "loss": 1.7038, "step": 28 }, { "epoch": 0.58, "learning_rate": 0.0001906576803158686, "loss": 1.6179, "step": 29 }, { "epoch": 0.6, "learning_rate": 0.0001896661341915318, "loss": 1.7813, "step": 30 }, { "epoch": 0.6, "eval_loss": 1.5650010108947754, "eval_runtime": 6.0228, "eval_samples_per_second": 0.332, "eval_steps_per_second": 0.332, "step": 30 }, { "epoch": 0.62, "learning_rate": 0.00018862743955136966, "loss": 1.661, "step": 31 }, { "epoch": 0.64, "learning_rate": 0.00018754214256491562, "loss": 1.7948, "step": 32 }, { "epoch": 0.66, "learning_rate": 0.00018641081390628877, "loss": 1.92, "step": 33 }, { "epoch": 0.68, "learning_rate": 0.00018523404845412027, "loss": 1.6941, "step": 34 }, { "epoch": 0.7, "learning_rate": 0.0001840124649787524, "loss": 1.3461, "step": 35 }, { "epoch": 0.7, "eval_loss": 1.5624847412109375, "eval_runtime": 6.0325, "eval_samples_per_second": 0.332, "eval_steps_per_second": 0.332, "step": 35 }, { "epoch": 0.72, "learning_rate": 0.0001827467058168748, "loss": 0.8176, "step": 36 }, { "epoch": 0.74, "learning_rate": 0.00018143743653376942, "loss": 1.7262, "step": 37 }, { "epoch": 0.76, "learning_rate": 0.00018008534557334064, "loss": 1.7333, "step": 38 }, { "epoch": 0.78, "learning_rate": 0.00017869114389611575, "loss": 1.5991, "step": 39 }, { "epoch": 0.8, "learning_rate": 0.0001772555646054055, "loss": 1.7267, "step": 40 }, { "epoch": 0.8, "eval_loss": 1.5579214096069336, "eval_runtime": 6.0388, "eval_samples_per_second": 0.331, "eval_steps_per_second": 0.331, "step": 40 }, { "epoch": 0.82, "learning_rate": 0.00017577936256182167, "loss": 1.6694, "step": 41 }, { "epoch": 0.84, "learning_rate": 0.0001742633139863538, "loss": 1.8201, "step": 42 }, { "epoch": 0.86, "learning_rate": 0.0001727082160522145, "loss": 1.7913, "step": 43 }, { "epoch": 0.88, "learning_rate": 0.00017111488646566727, "loss": 1.825, "step": 44 }, { "epoch": 0.9, "learning_rate": 0.00016948416303605795, "loss": 1.7778, "step": 45 }, { "epoch": 0.9, "eval_loss": 1.555617094039917, "eval_runtime": 6.0421, "eval_samples_per_second": 0.331, "eval_steps_per_second": 0.331, "step": 45 }, { "epoch": 0.92, "learning_rate": 0.00016781690323527511, "loss": 1.6311, "step": 46 }, { "epoch": 0.94, "learning_rate": 0.0001661139837468717, "loss": 1.1499, "step": 47 }, { "epoch": 0.96, "learning_rate": 0.00016437630000508464, "loss": 1.0455, "step": 48 }, { "epoch": 0.98, "learning_rate": 0.00016260476572399496, "loss": 1.7178, "step": 49 }, { "epoch": 1.0, "learning_rate": 0.00016080031241707578, "loss": 1.4832, "step": 50 }, { "epoch": 1.0, "eval_loss": 1.554579257965088, "eval_runtime": 6.0239, "eval_samples_per_second": 0.332, "eval_steps_per_second": 0.332, "step": 50 }, { "epoch": 1.02, "learning_rate": 0.00015896388890738127, "loss": 1.6801, "step": 51 }, { "epoch": 1.04, "learning_rate": 0.0001570964608286336, "loss": 1.6462, "step": 52 }, { "epoch": 1.06, "learning_rate": 0.00015519901011747044, "loss": 1.7264, "step": 53 }, { "epoch": 1.08, "learning_rate": 0.0001532725344971202, "loss": 1.63, "step": 54 }, { "epoch": 1.1, "learning_rate": 0.00015131804695277612, "loss": 1.7584, "step": 55 }, { "epoch": 1.1, "eval_loss": 1.5519572496414185, "eval_runtime": 6.0181, "eval_samples_per_second": 0.332, "eval_steps_per_second": 0.332, "step": 55 }, { "epoch": 1.12, "learning_rate": 0.0001493365751989454, "loss": 1.7956, "step": 56 }, { "epoch": 1.14, "learning_rate": 0.00014732916113905335, "loss": 1.6528, "step": 57 }, { "epoch": 1.16, "learning_rate": 0.00014529686031758643, "loss": 1.7006, "step": 58 }, { "epoch": 1.18, "learning_rate": 0.00014324074136506284, "loss": 1.8171, "step": 59 }, { "epoch": 1.2, "learning_rate": 0.0001411618854361218, "loss": 1.4825, "step": 60 }, { "epoch": 1.2, "eval_loss": 1.5512959957122803, "eval_runtime": 6.0143, "eval_samples_per_second": 0.333, "eval_steps_per_second": 0.333, "step": 60 } ], "max_steps": 147, "num_train_epochs": 3, "total_flos": 3.4027598710554624e+17, "trial_name": null, "trial_params": null }