{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.792, "eval_steps": 1, "global_step": 14, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.128, "grad_norm": 426.2940979003906, "learning_rate": 2.5e-05, "loss": 7.2927, "step": 1 }, { "epoch": 0.128, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 6.642009735107422, "eval_runtime": 84.411, "eval_samples_per_second": 2.962, "eval_steps_per_second": 0.746, "step": 1 }, { "epoch": 0.256, "grad_norm": 358.92913818359375, "learning_rate": 5e-05, "loss": 6.6477, "step": 2 }, { "epoch": 0.256, "eval_exact_match": 0.16326530612244897, "eval_f1_a": 0.6279863481228669, "eval_f1_m": 0.5070784590892632, "eval_loss": 2.5748679637908936, "eval_runtime": 84.5004, "eval_samples_per_second": 2.959, "eval_steps_per_second": 0.746, "step": 2 }, { "epoch": 0.384, "grad_norm": 202.6668243408203, "learning_rate": 4.5833333333333334e-05, "loss": 1.5318, "step": 3 }, { "epoch": 0.384, "eval_exact_match": 0.2653061224489796, "eval_f1_a": 0.6779661016949152, "eval_f1_m": 0.6622228112024031, "eval_loss": 1.5085991621017456, "eval_runtime": 84.4987, "eval_samples_per_second": 2.959, "eval_steps_per_second": 0.746, "step": 3 }, { "epoch": 0.512, "grad_norm": 89.18372344970703, "learning_rate": 4.166666666666667e-05, "loss": 1.021, "step": 4 }, { "epoch": 0.512, "eval_exact_match": 0.0, "eval_f1_a": 0.038834951456310676, "eval_f1_m": 0.36717514511632166, "eval_loss": 3.572444438934326, "eval_runtime": 84.4787, "eval_samples_per_second": 2.959, "eval_steps_per_second": 0.746, "step": 4 }, { "epoch": 0.64, "grad_norm": 249.71437072753906, "learning_rate": 3.7500000000000003e-05, "loss": 3.3149, "step": 5 }, { "epoch": 0.64, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.05769230769230769, "eval_f1_m": 0.3837689254856121, "eval_loss": 2.9223897457122803, "eval_runtime": 84.4654, "eval_samples_per_second": 2.96, "eval_steps_per_second": 0.746, "step": 5 }, { "epoch": 0.768, "grad_norm": 234.4349365234375, "learning_rate": 3.3333333333333335e-05, "loss": 3.0938, "step": 6 }, { "epoch": 0.768, "eval_exact_match": 0.24489795918367346, "eval_f1_a": 0.6127167630057804, "eval_f1_m": 0.6370587195917328, "eval_loss": 1.037977933883667, "eval_runtime": 84.4697, "eval_samples_per_second": 2.96, "eval_steps_per_second": 0.746, "step": 6 }, { "epoch": 0.896, "grad_norm": 44.01259994506836, "learning_rate": 2.916666666666667e-05, "loss": 1.3183, "step": 7 }, { "epoch": 0.896, "eval_exact_match": 0.2653061224489796, "eval_f1_a": 0.6926406926406927, "eval_f1_m": 0.6772793532997615, "eval_loss": 0.9939583539962769, "eval_runtime": 84.4865, "eval_samples_per_second": 2.959, "eval_steps_per_second": 0.746, "step": 7 }, { "epoch": 1.024, "grad_norm": 80.42048645019531, "learning_rate": 2.5e-05, "loss": 0.9766, "step": 8 }, { "epoch": 1.024, "eval_exact_match": 0.32653061224489793, "eval_f1_a": 0.709090909090909, "eval_f1_m": 0.7124046588332301, "eval_loss": 0.9605630040168762, "eval_runtime": 84.508, "eval_samples_per_second": 2.958, "eval_steps_per_second": 0.745, "step": 8 }, { "epoch": 1.152, "grad_norm": 50.82283401489258, "learning_rate": 2.0833333333333336e-05, "loss": 0.5821, "step": 9 }, { "epoch": 1.152, "eval_exact_match": 0.3469387755102041, "eval_f1_a": 0.694300518134715, "eval_f1_m": 0.6903760638454516, "eval_loss": 0.932707667350769, "eval_runtime": 84.4808, "eval_samples_per_second": 2.959, "eval_steps_per_second": 0.746, "step": 9 }, { "epoch": 1.28, "grad_norm": 47.22996139526367, "learning_rate": 1.6666666666666667e-05, "loss": 0.6158, "step": 10 }, { "epoch": 1.28, "eval_exact_match": 0.3673469387755102, "eval_f1_a": 0.6989247311827956, "eval_f1_m": 0.7010895000690919, "eval_loss": 0.9111232757568359, "eval_runtime": 84.4887, "eval_samples_per_second": 2.959, "eval_steps_per_second": 0.746, "step": 10 }, { "epoch": 1.408, "grad_norm": 70.53364562988281, "learning_rate": 1.25e-05, "loss": 0.5635, "step": 11 }, { "epoch": 1.408, "eval_exact_match": 0.3469387755102041, "eval_f1_a": 0.7040816326530612, "eval_f1_m": 0.7049636304738346, "eval_loss": 0.8203014135360718, "eval_runtime": 84.4658, "eval_samples_per_second": 2.96, "eval_steps_per_second": 0.746, "step": 11 }, { "epoch": 1.536, "grad_norm": 24.0622501373291, "learning_rate": 8.333333333333334e-06, "loss": 0.5367, "step": 12 }, { "epoch": 1.536, "eval_exact_match": 0.2857142857142857, "eval_f1_a": 0.7115384615384615, "eval_f1_m": 0.7037444414995435, "eval_loss": 0.8254165649414062, "eval_runtime": 84.4794, "eval_samples_per_second": 2.959, "eval_steps_per_second": 0.746, "step": 12 }, { "epoch": 1.6640000000000001, "grad_norm": 49.5471076965332, "learning_rate": 4.166666666666667e-06, "loss": 0.5805, "step": 13 }, { "epoch": 1.6640000000000001, "eval_exact_match": 0.3877551020408163, "eval_f1_a": 0.7317073170731707, "eval_f1_m": 0.7391583473216126, "eval_loss": 0.7916401624679565, "eval_runtime": 84.4743, "eval_samples_per_second": 2.959, "eval_steps_per_second": 0.746, "step": 13 }, { "epoch": 1.792, "grad_norm": 32.582847595214844, "learning_rate": 0.0, "loss": 0.4737, "step": 14 }, { "epoch": 1.792, "eval_exact_match": 0.3469387755102041, "eval_f1_a": 0.6903553299492386, "eval_f1_m": 0.6909562093235563, "eval_loss": 0.7645593881607056, "eval_runtime": 84.4968, "eval_samples_per_second": 2.959, "eval_steps_per_second": 0.746, "step": 14 }, { "epoch": 1.792, "step": 14, "total_flos": 4.3944687722496e+16, "train_loss": 2.0392316984278813, "train_runtime": 3611.0931, "train_samples_per_second": 0.554, "train_steps_per_second": 0.004 } ], "logging_steps": 1, "max_steps": 14, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "total_flos": 4.3944687722496e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }