{ "epoch": 1.0, "eval_logits/chosen": -2.4359169006347656, "eval_logits/rejected": -2.4180893898010254, "eval_logps/chosen": -74.32483673095703, "eval_logps/rejected": -86.18550872802734, "eval_loss": 0.6700397729873657, "eval_rewards/accuracies": 0.3154761791229248, "eval_rewards/chosen": 0.0016639787936583161, "eval_rewards/margins": 0.05223553627729416, "eval_rewards/rejected": -0.05057155340909958, "eval_runtime": 113.9808, "eval_samples": 2000, "eval_samples_per_second": 17.547, "eval_steps_per_second": 0.553, "total_flos": 0.0, "train_loss": 0.0, "train_runtime": 0.0257, "train_samples": 6113, "train_samples_per_second": 237872.308, "train_steps_per_second": 3735.603 }