zephyr-dpop-qlora-gpt4-5e-7 / all_results.json
just1nseo's picture
End of training
7d4bf69 verified
raw
history blame contribute delete
997 Bytes
{
"epoch": 1.0,
"eval_dpo_losses": 0.6846575736999512,
"eval_logits/chosen": -2.76676082611084,
"eval_logits/rejected": -2.728935718536377,
"eval_logps/chosen": -282.11871337890625,
"eval_logps/rejected": -257.9208679199219,
"eval_loss": 0.7750731110572815,
"eval_positive_losses": 0.8595818281173706,
"eval_rewards/accuracies": 0.5899999737739563,
"eval_rewards/chosen": 0.024747073650360107,
"eval_rewards/margins": 0.018166616559028625,
"eval_rewards/margins_max": 0.12387742102146149,
"eval_rewards/margins_min": -0.0705643892288208,
"eval_rewards/margins_std": 0.06444399803876877,
"eval_rewards/rejected": 0.006580457091331482,
"eval_runtime": 427.7975,
"eval_samples": 2000,
"eval_samples_per_second": 4.675,
"eval_steps_per_second": 0.292,
"train_loss": 0.6494339942932129,
"train_runtime": 4013.1211,
"train_samples": 5678,
"train_samples_per_second": 1.415,
"train_steps_per_second": 0.088
}