{ "epoch": 1.0, "eval_dpo_losses": 0.6846575736999512, "eval_logits/chosen": -2.76676082611084, "eval_logits/rejected": -2.728935718536377, "eval_logps/chosen": -282.11871337890625, "eval_logps/rejected": -257.9208679199219, "eval_loss": 0.7750731110572815, "eval_positive_losses": 0.8595818281173706, "eval_rewards/accuracies": 0.5899999737739563, "eval_rewards/chosen": 0.024747073650360107, "eval_rewards/margins": 0.018166616559028625, "eval_rewards/margins_max": 0.12387742102146149, "eval_rewards/margins_min": -0.0705643892288208, "eval_rewards/margins_std": 0.06444399803876877, "eval_rewards/rejected": 0.006580457091331482, "eval_runtime": 427.7975, "eval_samples": 2000, "eval_samples_per_second": 4.675, "eval_steps_per_second": 0.292, "train_loss": 0.6494339942932129, "train_runtime": 4013.1211, "train_samples": 5678, "train_samples_per_second": 1.415, "train_steps_per_second": 0.088 }