{ "epoch": 3.0, "eval_logits/chosen": -0.17340299487113953, "eval_logits/rejected": -0.31969568133354187, "eval_logps/chosen": -359.973876953125, "eval_logps/rejected": -288.69586181640625, "eval_loss": 0.6869306564331055, "eval_rewards/accuracies": 0.5833333134651184, "eval_rewards/chosen": 0.028966180980205536, "eval_rewards/margins": 0.01931903511285782, "eval_rewards/rejected": 0.009647144004702568, "eval_runtime": 155.4392, "eval_samples": 2000, "eval_samples_per_second": 12.867, "eval_steps_per_second": 0.405 }