{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 1000, "global_step": 125, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "grad_norm": 5.490319312063127, "learning_rate": 7.692307692307694e-07, "logits/chosen": -1.73323655128479, "logits/rejected": -1.963712453842163, "logps/chosen": -64.71795654296875, "logps/rejected": -92.56527709960938, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.4, "grad_norm": 2.4753255673765837, "learning_rate": 7.692307692307694e-06, "logits/chosen": -1.6273242235183716, "logits/rejected": -1.7998943328857422, "logps/chosen": -78.1596450805664, "logps/rejected": -136.78704833984375, "loss": 0.5986, "rewards/accuracies": 0.625, "rewards/chosen": -0.14767809212207794, "rewards/margins": 0.3422772288322449, "rewards/rejected": -0.48995527625083923, "step": 10 }, { "epoch": 0.8, "grad_norm": 1.4922013042570934, "learning_rate": 9.903926402016153e-06, "logits/chosen": -1.132021188735962, "logits/rejected": -1.4362722635269165, "logps/chosen": -60.54664993286133, "logps/rejected": -335.38629150390625, "loss": 0.3069, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": 0.005078454967588186, "rewards/margins": 2.449418783187866, "rewards/rejected": -2.444340467453003, "step": 20 }, { "epoch": 1.2, "grad_norm": 0.08650022357203001, "learning_rate": 9.442228179894362e-06, "logits/chosen": -1.4244499206542969, "logits/rejected": -1.7115017175674438, "logps/chosen": -159.7740020751953, "logps/rejected": -776.4279174804688, "loss": 0.0327, "rewards/accuracies": 1.0, "rewards/chosen": -0.981249213218689, "rewards/margins": 5.87949275970459, "rewards/rejected": -6.86074161529541, "step": 30 }, { "epoch": 1.6, "grad_norm": 0.6071478933413408, "learning_rate": 8.633301610170136e-06, "logits/chosen": -1.8324272632598877, "logits/rejected": -2.274733781814575, "logps/chosen": -173.323486328125, "logps/rejected": -1064.7208251953125, "loss": 0.017, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -1.1587677001953125, "rewards/margins": 8.547063827514648, "rewards/rejected": -9.705831527709961, "step": 40 }, { "epoch": 2.0, "grad_norm": 10.37379661402861, "learning_rate": 7.540376726232648e-06, "logits/chosen": -2.072046995162964, "logits/rejected": -2.5416159629821777, "logps/chosen": -201.16822814941406, "logps/rejected": -1270.976806640625, "loss": 0.0143, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.3896355628967285, "rewards/margins": 10.439592361450195, "rewards/rejected": -11.829228401184082, "step": 50 }, { "epoch": 2.4, "grad_norm": 0.08724694925694036, "learning_rate": 6.248882390836135e-06, "logits/chosen": -1.5749847888946533, "logits/rejected": -2.108443021774292, "logps/chosen": -206.0604248046875, "logps/rejected": -1066.5167236328125, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -1.477189302444458, "rewards/margins": 8.241119384765625, "rewards/rejected": -9.718308448791504, "step": 60 }, { "epoch": 2.8, "grad_norm": 1.5412553598400243, "learning_rate": 4.859768718620656e-06, "logits/chosen": -1.5107686519622803, "logits/rejected": -2.1837661266326904, "logps/chosen": -248.97695922851562, "logps/rejected": -1404.155517578125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.8567733764648438, "rewards/margins": 11.288753509521484, "rewards/rejected": -13.145527839660645, "step": 70 }, { "epoch": 3.2, "grad_norm": 0.008991447421855448, "learning_rate": 3.4816162744519266e-06, "logits/chosen": -1.091412901878357, "logits/rejected": -1.767606496810913, "logps/chosen": -267.06280517578125, "logps/rejected": -1556.9952392578125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.066657066345215, "rewards/margins": 12.597526550292969, "rewards/rejected": -14.6641845703125, "step": 80 }, { "epoch": 3.6, "grad_norm": 0.07467711604243485, "learning_rate": 2.2221488349019903e-06, "logits/chosen": -1.1434813737869263, "logits/rejected": -1.8074337244033813, "logps/chosen": -272.1551513671875, "logps/rejected": -1601.199951171875, "loss": 0.0042, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -2.1011674404144287, "rewards/margins": 13.020586013793945, "rewards/rejected": -15.121752738952637, "step": 90 }, { "epoch": 4.0, "grad_norm": 0.002962899425955675, "learning_rate": 1.1798131208919628e-06, "logits/chosen": -1.2595702409744263, "logits/rejected": -1.8675899505615234, "logps/chosen": -248.87960815429688, "logps/rejected": -1562.575927734375, "loss": 0.0017, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -1.868859887123108, "rewards/margins": 12.840827941894531, "rewards/rejected": -14.709688186645508, "step": 100 }, { "epoch": 4.4, "grad_norm": 0.0049241554457892705, "learning_rate": 4.3608367469340553e-07, "logits/chosen": -1.464623212814331, "logits/rejected": -2.089043378829956, "logps/chosen": -271.04827880859375, "logps/rejected": -1640.91796875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.086467742919922, "rewards/margins": 13.438623428344727, "rewards/rejected": -15.525090217590332, "step": 110 }, { "epoch": 4.8, "grad_norm": 0.016229074824409925, "learning_rate": 4.909437331777178e-08, "logits/chosen": -1.3727686405181885, "logits/rejected": -2.019606351852417, "logps/chosen": -249.13632202148438, "logps/rejected": -1610.74951171875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.8820005655288696, "rewards/margins": 13.32276439666748, "rewards/rejected": -15.204765319824219, "step": 120 }, { "epoch": 5.0, "step": 125, "total_flos": 0.0, "train_loss": 0.07995766662200912, "train_runtime": 9770.964, "train_samples_per_second": 0.819, "train_steps_per_second": 0.013 } ], "logging_steps": 10, "max_steps": 125, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }