|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.984, |
|
"eval_steps": 100, |
|
"global_step": 124, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.846153846153847e-07, |
|
"logits/chosen": 0.48606717586517334, |
|
"logits/rejected": 0.39871031045913696, |
|
"logps/chosen": -212.65087890625, |
|
"logps/rejected": -202.6477508544922, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": 0.15301527082920074, |
|
"logits/rejected": 0.20366798341274261, |
|
"logps/chosen": -161.48782348632812, |
|
"logps/rejected": -135.7443389892578, |
|
"loss": 0.0105, |
|
"rewards/accuracies": 0.3194444477558136, |
|
"rewards/chosen": -0.002633917611092329, |
|
"rewards/margins": -0.0009920704178512096, |
|
"rewards/rejected": -0.0016418471932411194, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.951096619903317e-06, |
|
"logits/chosen": 0.09449799358844757, |
|
"logits/rejected": 0.0444490909576416, |
|
"logps/chosen": -182.85354614257812, |
|
"logps/rejected": -149.98532104492188, |
|
"loss": 0.0104, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.0008511155610904098, |
|
"rewards/margins": -0.0002212001709267497, |
|
"rewards/rejected": -0.000629915448371321, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.716164218065246e-06, |
|
"logits/chosen": 0.19659022986888885, |
|
"logits/rejected": 0.24700064957141876, |
|
"logps/chosen": -175.32278442382812, |
|
"logps/rejected": -143.53839111328125, |
|
"loss": 0.0104, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.0011144612217321992, |
|
"rewards/margins": -0.000345538865076378, |
|
"rewards/rejected": -0.000768922152929008, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.3048902348863116e-06, |
|
"logits/chosen": 0.14261968433856964, |
|
"logits/rejected": 0.19110862910747528, |
|
"logps/chosen": -181.75755310058594, |
|
"logps/rejected": -164.41835021972656, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.0007939288625493646, |
|
"rewards/margins": 0.00022867057123221457, |
|
"rewards/rejected": -0.0010225994046777487, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"logits/chosen": 0.07993963360786438, |
|
"logits/rejected": 0.1553264558315277, |
|
"logps/chosen": -170.30532836914062, |
|
"logps/rejected": -144.61050415039062, |
|
"loss": 0.0101, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.00015746071585454047, |
|
"rewards/margins": 0.0011130261700600386, |
|
"rewards/rejected": -0.0009555654833093286, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.0956464785579125e-06, |
|
"logits/chosen": 0.18755468726158142, |
|
"logits/rejected": 0.17403154075145721, |
|
"logps/chosen": -173.29977416992188, |
|
"logps/rejected": -144.7869873046875, |
|
"loss": 0.0105, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.0005446333670988679, |
|
"rewards/margins": -0.000993421534076333, |
|
"rewards/rejected": 0.00044878822518512607, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.39389699200963e-06, |
|
"logits/chosen": 0.2639772593975067, |
|
"logits/rejected": 0.2394874542951584, |
|
"logps/chosen": -188.4938201904297, |
|
"logps/rejected": -160.391357421875, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.00019804532348643988, |
|
"rewards/margins": 0.000605274923145771, |
|
"rewards/rejected": -0.0008033200865611434, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.700590188571887e-06, |
|
"logits/chosen": 0.1733393371105194, |
|
"logits/rejected": 0.2405407875776291, |
|
"logps/chosen": -166.1864471435547, |
|
"logps/rejected": -136.4402618408203, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": 0.0004581899265758693, |
|
"rewards/margins": 0.0007379798917099833, |
|
"rewards/rejected": -0.0002797898487187922, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.0708929268538034e-06, |
|
"logits/chosen": 0.16160213947296143, |
|
"logits/rejected": 0.13504673540592194, |
|
"logps/chosen": -175.88116455078125, |
|
"logps/rejected": -145.18658447265625, |
|
"loss": 0.01, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": 0.0007930597057566047, |
|
"rewards/margins": 0.001615246757864952, |
|
"rewards/rejected": -0.0008221869356930256, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 5.549106142039018e-07, |
|
"logits/chosen": 0.18513503670692444, |
|
"logits/rejected": 0.14850696921348572, |
|
"logps/chosen": -177.84588623046875, |
|
"logps/rejected": -152.69290161132812, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": 0.0004619792161975056, |
|
"rewards/margins": 0.00023423954553436488, |
|
"rewards/rejected": 0.00022773972887080163, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_logits/chosen": -0.019983915612101555, |
|
"eval_logits/rejected": 0.0771104097366333, |
|
"eval_logps/chosen": -306.49530029296875, |
|
"eval_logps/rejected": -278.806396484375, |
|
"eval_loss": 0.010805144906044006, |
|
"eval_rewards/accuracies": 0.5009999871253967, |
|
"eval_rewards/chosen": -0.0014956285012885928, |
|
"eval_rewards/margins": -9.011628571897745e-05, |
|
"eval_rewards/rejected": -0.0014055122155696154, |
|
"eval_runtime": 470.6682, |
|
"eval_samples_per_second": 4.249, |
|
"eval_steps_per_second": 1.062, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.937002879188285e-07, |
|
"logits/chosen": 0.18757307529449463, |
|
"logits/rejected": 0.19283342361450195, |
|
"logps/chosen": -186.30393981933594, |
|
"logps/rejected": -159.83816528320312, |
|
"loss": 0.0104, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.0009433077648282051, |
|
"rewards/margins": -0.0004140757955610752, |
|
"rewards/rejected": 0.0013573834439739585, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.6003680950742728e-08, |
|
"logits/chosen": 0.07835674285888672, |
|
"logits/rejected": 0.13737662136554718, |
|
"logps/chosen": -174.80299377441406, |
|
"logps/rejected": -154.0167694091797, |
|
"loss": 0.0104, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -5.471259282785468e-05, |
|
"rewards/margins": -0.0002534462546464056, |
|
"rewards/rejected": 0.0001987336145248264, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"step": 124, |
|
"total_flos": 0.0, |
|
"train_loss": 0.010314414998696696, |
|
"train_runtime": 1522.7247, |
|
"train_samples_per_second": 1.313, |
|
"train_steps_per_second": 0.081 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 124, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|