{ | |
"epoch": 1.0, | |
"eval_logits/chosen": -3.813626766204834, | |
"eval_logits/rejected": -3.8465464115142822, | |
"eval_logps/chosen": -292.01556396484375, | |
"eval_logps/rejected": -185.1787109375, | |
"eval_loss": 0.5876513123512268, | |
"eval_rewards/accuracies": 0.6875, | |
"eval_rewards/chosen": -0.18421681225299835, | |
"eval_rewards/diff": -2.171558380126953, | |
"eval_rewards/diff_abs": 2.208705186843872, | |
"eval_rewards/rejected": -0.4189082384109497, | |
"eval_rewards/student_margin": 0.23469144105911255, | |
"eval_rewards/teacher_margin": 2.40625, | |
"eval_runtime": 13.1071, | |
"eval_samples": 1470, | |
"eval_samples_per_second": 112.153, | |
"eval_steps_per_second": 0.153 | |
} |