dpo-selective-buffer-safeipo / eval_results.json
wxzhang's picture
Model save
a2514bf verified
raw
history blame contribute delete
678 Bytes
{
"epoch": 1.0,
"eval_logits/chosen": 0.9053679704666138,
"eval_logits/rejected": 1.7481720447540283,
"eval_logps/chosen": -228.00465393066406,
"eval_logps/rejected": -198.00367736816406,
"eval_loss": 4449.90234375,
"eval_rewards/accuracies": 0.616104006767273,
"eval_rewards/chosen": -0.8765509724617004,
"eval_rewards/margins": 0.08215557038784027,
"eval_rewards/rejected": -0.9587064981460571,
"eval_rewards/safe_rewards": -0.865267813205719,
"eval_rewards/unsafe_rewards": -0.860846996307373,
"eval_runtime": 2354.4025,
"eval_samples": 35044,
"eval_samples_per_second": 14.884,
"eval_steps_per_second": 0.466
}