{ "epoch": 1.0, "eval_logits/chosen": -0.9365912079811096, "eval_logits/rejected": -0.926014244556427, "eval_logps/chosen": -352.8179626464844, "eval_logps/rejected": -349.7088317871094, "eval_loss": 0.18906600773334503, "eval_pred_label": 5005.27001953125, "eval_rewards/accuracies": 0.7579365372657776, "eval_rewards/chosen": 0.2332553267478943, "eval_rewards/margins": 3.972490072250366, "eval_rewards/rejected": -3.7392351627349854, "eval_runtime": 277.9429, "eval_samples": 2000, "eval_samples_per_second": 7.196, "eval_steps_per_second": 0.227, "eval_use_label": 3014.730224609375 }