PEFT
Safetensors
mixtral
alignment-handbook
trl
dpo
Generated from Trainer
4-bit precision
bitsandbytes
stealth-finance-v2-dpo-adapter / eval_results.json
jan-hq's picture
Model save
c7f70d3 verified
{
"epoch": 1.0,
"eval_logits/chosen": -0.7773212790489197,
"eval_logits/rejected": -0.7749085426330566,
"eval_logps/chosen": -275.3572082519531,
"eval_logps/rejected": -324.0383605957031,
"eval_loss": 0.12900209426879883,
"eval_rewards/accuracies": 0.8596742749214172,
"eval_rewards/chosen": -0.17991267144680023,
"eval_rewards/margins": 5.889674186706543,
"eval_rewards/rejected": -6.069586277008057,
"eval_runtime": 76159.9506,
"eval_samples": 197968,
"eval_samples_per_second": 2.599,
"eval_steps_per_second": 1.3
}