{ "epoch": 1.0, "eval_logits/chosen": -2.5889480113983154, "eval_logits/rejected": -2.5375468730926514, "eval_logps/chosen": -302.0460205078125, "eval_logps/rejected": -314.4243469238281, "eval_loss": 0.5835465788841248, "eval_rewards/accuracies": 0.7105000019073486, "eval_rewards/chosen": -0.1485649198293686, "eval_rewards/margins": 0.33676549792289734, "eval_rewards/rejected": -0.4853304326534271, "eval_runtime": 1163.5942, "eval_samples": 2000, "eval_samples_per_second": 1.719, "eval_steps_per_second": 0.215 }