|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -3.900073289871216, |
|
"eval_logits/rejected": -3.9142141342163086, |
|
"eval_logps/chosen": -293.32354736328125, |
|
"eval_logps/rejected": -187.59616088867188, |
|
"eval_loss": 0.6149587035179138, |
|
"eval_rewards/accuracies": 0.6875, |
|
"eval_rewards/chosen": -0.1575067937374115, |
|
"eval_rewards/diff": -2.2334296703338623, |
|
"eval_rewards/diff_abs": 2.236445188522339, |
|
"eval_rewards/rejected": -0.33032703399658203, |
|
"eval_rewards/student_margin": 0.17282025516033173, |
|
"eval_rewards/teacher_margin": 2.40625, |
|
"eval_runtime": 11.4547, |
|
"eval_samples": 1470, |
|
"eval_samples_per_second": 128.332, |
|
"eval_steps_per_second": 0.175, |
|
"train_loss": 0.6488963676806219, |
|
"train_runtime": 2900.3403, |
|
"train_samples": 147002, |
|
"train_samples_per_second": 50.684, |
|
"train_steps_per_second": 0.132 |
|
} |