{ "epoch": 1.0, "eval_logits/chosen": -2.226208448410034, "eval_logits/rejected": -2.1488096714019775, "eval_logps/chosen": -313.5163879394531, "eval_logps/rejected": -320.47540283203125, "eval_loss": 0.5939346551895142, "eval_rewards/accuracies": 0.6919999718666077, "eval_rewards/chosen": -0.21535076200962067, "eval_rewards/margins": 0.30932918190956116, "eval_rewards/rejected": -0.524679958820343, "eval_runtime": 1172.6262, "eval_samples": 2000, "eval_samples_per_second": 1.706, "eval_steps_per_second": 0.213, "train_loss": 0.6084560017191406, "train_runtime": 59476.7244, "train_samples": 61135, "train_samples_per_second": 1.028, "train_steps_per_second": 0.128 }