{ "epoch": 0.9880609304240429, "eval_logits/chosen": 9.247183799743652, "eval_logits/rejected": 10.069817543029785, "eval_logps/chosen": -0.4010205566883087, "eval_logps/rejected": -0.41174402832984924, "eval_loss": 1.3837605714797974, "eval_rewards/accuracies": 0.5231481194496155, "eval_rewards/chosen": -0.8020411133766174, "eval_rewards/margins": 0.02144695073366165, "eval_rewards/rejected": -0.8234880566596985, "eval_runtime": 145.5386, "eval_samples": 1080, "eval_samples_per_second": 7.421, "eval_steps_per_second": 1.855 }