{ "epoch": 1.0, "eval_logits/chosen": -23.520809173583984, "eval_logits/rejected": -23.798635482788086, "eval_logps/chosen": -0.5238760113716125, "eval_logps/rejected": -0.7024902105331421, "eval_loss": 3.6378085613250732, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -5.238759994506836, "eval_rewards/margins": 1.786142349243164, "eval_rewards/rejected": -7.02490234375, "eval_runtime": 1.2939, "eval_samples": 100, "eval_samples_per_second": 77.284, "eval_steps_per_second": 0.773 }