{ | |
"epoch": 1.0, | |
"eval_logits/chosen": -3.411515712738037, | |
"eval_logits/rejected": -3.456860065460205, | |
"eval_logps/chosen": -564.323974609375, | |
"eval_logps/rejected": -567.8529052734375, | |
"eval_loss": 0.7979298830032349, | |
"eval_rewards/accuracies": 0.46875, | |
"eval_rewards/chosen": 4.5177483558654785, | |
"eval_rewards/diff": -0.33996284008026123, | |
"eval_rewards/diff_abs": 1.2063032388687134, | |
"eval_rewards/rejected": 4.610641002655029, | |
"eval_rewards/student_margin": -0.09289252758026123, | |
"eval_rewards/teacher_margin": 0.2470703125, | |
"eval_runtime": 26.855, | |
"eval_samples": 1543, | |
"eval_samples_per_second": 57.457, | |
"eval_steps_per_second": 0.149 | |
} |