zephyr-2b-gemma-sft-dpo / eval_results.json
ale-bay's picture
End of training
8580e09 verified
{
"epoch": 1.971563981042654,
"eval_logits/chosen": -16.5866641998291,
"eval_logits/rejected": -15.948689460754395,
"eval_logps/chosen": -363.826171875,
"eval_logps/rejected": -363.9148864746094,
"eval_loss": 0.5950456857681274,
"eval_rewards/accuracies": 0.71875,
"eval_rewards/chosen": 0.22188298404216766,
"eval_rewards/margins": 0.26559606194496155,
"eval_rewards/rejected": -0.043713077902793884,
"eval_runtime": 6.8121,
"eval_samples": 750,
"eval_samples_per_second": 110.098,
"eval_steps_per_second": 3.523
}