{ "epoch": 2.0, "eval_logits/chosen": 1.4759222269058228, "eval_logits/rejected": 2.240403652191162, "eval_logps/chosen": -421.74468994140625, "eval_logps/rejected": -544.37890625, "eval_loss": 0.5015926957130432, "eval_rewards/accuracies": 0.76953125, "eval_rewards/chosen": -1.5911476612091064, "eval_rewards/margins": 1.2260199785232544, "eval_rewards/rejected": -2.8171677589416504, "eval_runtime": 93.5372, "eval_samples": 2000, "eval_samples_per_second": 21.382, "eval_steps_per_second": 0.342 }