{ "epoch": 0.9999343530493009, "eval_logits/chosen": -0.8205632567405701, "eval_logits/rejected": -0.9320290684700012, "eval_logps/chosen": -104.04210662841797, "eval_logps/rejected": -255.28575134277344, "eval_loss": 0.000569345080293715, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -3.6049818992614746, "eval_rewards/margins": 14.385939598083496, "eval_rewards/rejected": -17.990922927856445, "eval_runtime": 161.6764, "eval_samples_per_second": 33.258, "eval_steps_per_second": 4.163 }