beamaia's picture
Training in progress, step 100, checkpoint
2e3aeb6 verified
{
"best_metric": 0.014343788847327232,
"best_model_checkpoint": "./Zephyr/28-03-24-Weni-WeniGPT-QA-Zephyr-7B-4.0.1-KTO_WeniGPT Experiment using KTO trainer with no collator, Zephyr model and no system prompt.-2_max_steps-786_batch_32_2024-03-28_ppid_9/checkpoint-100",
"epoch": 0.7561436672967864,
"eval_steps": 50,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.15,
"grad_norm": 1.94673752784729,
"kl": 0.3060356676578522,
"learning_rate": 0.0001666666666666667,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 1.0194,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 20
},
{
"epoch": 0.3,
"grad_norm": 0.799897313117981,
"kl": 0.08012839406728745,
"learning_rate": 0.00019580052493438322,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 0.204,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 40
},
{
"epoch": 0.38,
"eval_kl": 0.0,
"eval_logps/chosen": -120.33751678466797,
"eval_logps/rejected": -405.6329345703125,
"eval_loss": 0.02746938355267048,
"eval_rewards/chosen": 5.632839202880859,
"eval_rewards/margins": 25.979337692260742,
"eval_rewards/rejected": -20.346500396728516,
"eval_runtime": 215.3361,
"eval_samples_per_second": 2.322,
"eval_steps_per_second": 0.58,
"step": 50
},
{
"epoch": 0.45,
"grad_norm": 0.1745920479297638,
"kl": 0.20441873371601105,
"learning_rate": 0.0001905511811023622,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 0.0962,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 60
},
{
"epoch": 0.6,
"grad_norm": 1.8645330667495728,
"kl": 0.0,
"learning_rate": 0.00018556430446194227,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 0.0881,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 80
},
{
"epoch": 0.76,
"grad_norm": 0.13575485348701477,
"kl": 0.0,
"learning_rate": 0.00018031496062992125,
"logps/chosen": NaN,
"logps/rejected": NaN,
"loss": 0.073,
"rewards/chosen": NaN,
"rewards/margins": NaN,
"rewards/rejected": NaN,
"step": 100
},
{
"epoch": 0.76,
"eval_kl": 0.0,
"eval_logps/chosen": -118.76765441894531,
"eval_logps/rejected": -394.83203125,
"eval_loss": 0.014343788847327232,
"eval_rewards/chosen": 5.789826393127441,
"eval_rewards/margins": 25.05623435974121,
"eval_rewards/rejected": -19.266408920288086,
"eval_runtime": 215.2593,
"eval_samples_per_second": 2.323,
"eval_steps_per_second": 0.581,
"step": 100
}
],
"logging_steps": 20,
"max_steps": 786,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}