|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.010141490144761593, |
|
"eval_steps": 100, |
|
"global_step": 31, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.421875, |
|
"learning_rate": 1.25e-06, |
|
"logits/chosen": -2.3689165115356445, |
|
"logits/rejected": -2.3419089317321777, |
|
"logps/chosen": -304.96429443359375, |
|
"logps/rejected": -224.31954956054688, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.9296875, |
|
"learning_rate": 4.415111107797445e-06, |
|
"logits/chosen": -2.3774471282958984, |
|
"logits/rejected": -2.358837127685547, |
|
"logps/chosen": -267.6408386230469, |
|
"logps/rejected": -221.9726104736328, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.5166666507720947, |
|
"rewards/chosen": 0.008927525021135807, |
|
"rewards/margins": 0.002250629710033536, |
|
"rewards/rejected": 0.006676895078271627, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.203125, |
|
"learning_rate": 1.7829919182222752e-06, |
|
"logits/chosen": -2.4560706615448, |
|
"logits/rejected": -2.402303695678711, |
|
"logps/chosen": -265.12762451171875, |
|
"logps/rejected": -272.61566162109375, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.5900000333786011, |
|
"rewards/chosen": 0.028245043009519577, |
|
"rewards/margins": 0.009032377041876316, |
|
"rewards/rejected": 0.019212666898965836, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.375, |
|
"learning_rate": 1.6904105645142443e-08, |
|
"logits/chosen": -2.3814165592193604, |
|
"logits/rejected": -2.3470723628997803, |
|
"logps/chosen": -304.08697509765625, |
|
"logps/rejected": -281.0203552246094, |
|
"loss": 0.6844, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": 0.03472686558961868, |
|
"rewards/margins": 0.019056813791394234, |
|
"rewards/rejected": 0.015670055523514748, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"step": 31, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6888245363389293, |
|
"train_runtime": 439.9957, |
|
"train_samples_per_second": 1.389, |
|
"train_steps_per_second": 0.07 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 31, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 5, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|