OLMo-1B-hf-DPO-constitution-2 / all_results.json
Shahradmz's picture
Training in progress, step 500
8136bc8 verified
raw
history blame
560 Bytes
{
"epoch": 0.9998604326587579,
"eval_logits/chosen": -1.0269631147384644,
"eval_logits/rejected": -1.061506986618042,
"eval_logps/chosen": -112.56452178955078,
"eval_logps/rejected": -272.78466796875,
"eval_loss": 0.000979769160039723,
"eval_rewards/accuracies": 0.9996050596237183,
"eval_rewards/chosen": -3.615877628326416,
"eval_rewards/margins": 16.76466178894043,
"eval_rewards/rejected": -20.380538940429688,
"eval_runtime": 151.8806,
"eval_samples_per_second": 33.302,
"eval_steps_per_second": 4.168
}