{ "epoch": 3.0, "eval_logits/chosen": -1.588410496711731, "eval_logits/rejected": -1.3595237731933594, "eval_logps/chosen": -383.10223388671875, "eval_logps/rejected": -666.2252807617188, "eval_loss": 0.005952088162302971, "eval_rewards/accuracies": 0.996632993221283, "eval_rewards/chosen": 3.6820499897003174, "eval_rewards/margins": 14.352936744689941, "eval_rewards/rejected": -10.670886993408203, "eval_runtime": 577.443, "eval_samples": 9500, "eval_samples_per_second": 16.452, "eval_steps_per_second": 0.514, "train_loss": 0.053706495013128505, "train_runtime": 110737.0914, "train_samples": 188284, "train_samples_per_second": 5.101, "train_steps_per_second": 0.08 }