{ "epoch": 1.0, "eval_logits/chosen": -2.5399038791656494, "eval_logits/rejected": -2.523594379425049, "eval_logps/chosen": -73.60810089111328, "eval_logps/rejected": -80.95262908935547, "eval_loss": 0.6417149901390076, "eval_rewards/accuracies": 0.335317462682724, "eval_rewards/chosen": 0.2396649718284607, "eval_rewards/margins": 0.19197754561901093, "eval_rewards/rejected": 0.04768744856119156, "eval_runtime": 114.5526, "eval_samples": 2000, "eval_samples_per_second": 17.459, "eval_steps_per_second": 0.55 }