zephyr-2b-gemma-sft-dpo / all_results.json
ale-bay's picture
End of training
8580e09 verified
{
"epoch": 1.971563981042654,
"eval_logits/chosen": -16.5866641998291,
"eval_logits/rejected": -15.948689460754395,
"eval_logps/chosen": -363.826171875,
"eval_logps/rejected": -363.9148864746094,
"eval_loss": 0.5950456857681274,
"eval_rewards/accuracies": 0.71875,
"eval_rewards/chosen": 0.22188298404216766,
"eval_rewards/margins": 0.26559606194496155,
"eval_rewards/rejected": -0.043713077902793884,
"eval_runtime": 6.8121,
"eval_samples": 750,
"eval_samples_per_second": 110.098,
"eval_steps_per_second": 3.523,
"total_flos": 0.0,
"train_loss": 0.6262502945386447,
"train_runtime": 431.1142,
"train_samples": 6750,
"train_samples_per_second": 31.314,
"train_steps_per_second": 0.241
}