phi-2-gpo-test-longest-iter-1 / eval_results.json
BraylonDash's picture
Model save
b9d6f4f verified
raw
history blame
589 Bytes
{
"epoch": 1.98,
"eval_logits/chosen": -0.010728351771831512,
"eval_logits/rejected": 0.08673479408025742,
"eval_logps/chosen": -306.4167785644531,
"eval_logps/rejected": -278.7133483886719,
"eval_loss": 0.01081769447773695,
"eval_rewards/accuracies": 0.49950000643730164,
"eval_rewards/chosen": -0.0007103218231350183,
"eval_rewards/margins": -0.00023564710863865912,
"eval_rewards/rejected": -0.00047467477270402014,
"eval_runtime": 470.0123,
"eval_samples": 2000,
"eval_samples_per_second": 4.255,
"eval_steps_per_second": 1.064
}