{ "epoch": 0.9999343530493009, "eval_logits/chosen": -0.8756721019744873, "eval_logits/rejected": -0.9693624377250671, "eval_logps/chosen": -103.03213500976562, "eval_logps/rejected": -254.58119201660156, "eval_loss": 0.0005562438163906336, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -3.5039849281311035, "eval_rewards/margins": 14.41648006439209, "eval_rewards/rejected": -17.92046546936035, "eval_runtime": 161.905, "eval_samples_per_second": 33.211, "eval_steps_per_second": 4.157 }