{ "epoch": 3.0, "eval_logits/chosen": -0.3196437656879425, "eval_logits/rejected": 0.2021237313747406, "eval_logps/chosen": -275.47796630859375, "eval_logps/rejected": -644.0859375, "eval_loss": 0.005032053682953119, "eval_rewards/accuracies": 0.9991582632064819, "eval_rewards/chosen": -0.09963408857584, "eval_rewards/margins": 19.299285888671875, "eval_rewards/rejected": -19.3989200592041, "eval_runtime": 525.9739, "eval_samples": 9500, "eval_samples_per_second": 18.062, "eval_steps_per_second": 0.565, "train_loss": 0.03813637946047515, "train_runtime": 102663.2378, "train_samples": 188284, "train_samples_per_second": 5.502, "train_steps_per_second": 0.086 }