|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 121, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 44.33979039021003, |
|
"learning_rate": 3.846153846153846e-08, |
|
"logits/chosen": -3.751237154006958, |
|
"logits/rejected": -3.652125358581543, |
|
"logps/chosen": -995.5263671875, |
|
"logps/rejected": -1318.9669189453125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 35.835351014179686, |
|
"learning_rate": 3.8461538461538463e-07, |
|
"logits/chosen": -3.71555233001709, |
|
"logits/rejected": -3.6416714191436768, |
|
"logps/chosen": -873.4622192382812, |
|
"logps/rejected": -1458.0814208984375, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.000241430607275106, |
|
"rewards/margins": 0.014367452822625637, |
|
"rewards/rejected": -0.014608883298933506, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 29.834055642040035, |
|
"learning_rate": 4.948351554413879e-07, |
|
"logits/chosen": -3.8361048698425293, |
|
"logits/rejected": -3.7403998374938965, |
|
"logps/chosen": -946.3114013671875, |
|
"logps/rejected": -1385.838134765625, |
|
"loss": 0.5664, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.045135729014873505, |
|
"rewards/margins": 0.27529585361480713, |
|
"rewards/rejected": -0.32043159008026123, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 19.851574913053625, |
|
"learning_rate": 4.700503477950277e-07, |
|
"logits/chosen": -4.062998294830322, |
|
"logits/rejected": -4.014255046844482, |
|
"logps/chosen": -949.5398559570312, |
|
"logps/rejected": -1611.4945068359375, |
|
"loss": 0.3144, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -0.3165217936038971, |
|
"rewards/margins": 1.7194368839263916, |
|
"rewards/rejected": -2.035958766937256, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 20.09319706827758, |
|
"learning_rate": 4.2677669529663686e-07, |
|
"logits/chosen": -4.247502326965332, |
|
"logits/rejected": -4.206511974334717, |
|
"logps/chosen": -1011.1369018554688, |
|
"logps/rejected": -1838.808349609375, |
|
"loss": 0.2629, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.8711185455322266, |
|
"rewards/margins": 3.5932796001434326, |
|
"rewards/rejected": -4.464398384094238, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 17.14618338383071, |
|
"learning_rate": 3.686500924369101e-07, |
|
"logits/chosen": -4.1785712242126465, |
|
"logits/rejected": -4.129928112030029, |
|
"logps/chosen": -1003.81005859375, |
|
"logps/rejected": -1773.5804443359375, |
|
"loss": 0.2915, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.6944042444229126, |
|
"rewards/margins": 3.5044853687286377, |
|
"rewards/rejected": -4.19888973236084, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 22.001296504737557, |
|
"learning_rate": 3.005543930830095e-07, |
|
"logits/chosen": -4.102808475494385, |
|
"logits/rejected": -4.0734052658081055, |
|
"logps/chosen": -1009.1546020507812, |
|
"logps/rejected": -1869.724365234375, |
|
"loss": 0.1679, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.5237128138542175, |
|
"rewards/margins": 4.181941509246826, |
|
"rewards/rejected": -4.705655097961426, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 22.911683114341198, |
|
"learning_rate": 2.2821106431308543e-07, |
|
"logits/chosen": -4.085400104522705, |
|
"logits/rejected": -4.0569257736206055, |
|
"logps/chosen": -1070.66650390625, |
|
"logps/rejected": -1899.2978515625, |
|
"loss": 0.1352, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.6565460562705994, |
|
"rewards/margins": 4.237053871154785, |
|
"rewards/rejected": -4.893599987030029, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 13.574682459891369, |
|
"learning_rate": 1.5769846317182892e-07, |
|
"logits/chosen": -4.115840435028076, |
|
"logits/rejected": -4.082175254821777, |
|
"logps/chosen": -976.5784912109375, |
|
"logps/rejected": -2003.0341796875, |
|
"loss": 0.119, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.6749869585037231, |
|
"rewards/margins": 5.019448757171631, |
|
"rewards/rejected": -5.694436073303223, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 17.137536022407392, |
|
"learning_rate": 9.494112718293502e-08, |
|
"logits/chosen": -4.116685390472412, |
|
"logits/rejected": -4.105366230010986, |
|
"logps/chosen": -969.3150634765625, |
|
"logps/rejected": -2016.6741943359375, |
|
"loss": 0.105, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.6770291328430176, |
|
"rewards/margins": 5.554581642150879, |
|
"rewards/rejected": -6.231610298156738, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 19.248054573741623, |
|
"learning_rate": 4.521198892775202e-08, |
|
"logits/chosen": -4.130964279174805, |
|
"logits/rejected": -4.107068061828613, |
|
"logps/chosen": -997.1163940429688, |
|
"logps/rejected": -2062.62109375, |
|
"loss": 0.1232, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.8503810167312622, |
|
"rewards/margins": 5.699716091156006, |
|
"rewards/rejected": -6.550097465515137, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_logits/chosen": -4.124736309051514, |
|
"eval_logits/rejected": -4.084261894226074, |
|
"eval_logps/chosen": -456.256591796875, |
|
"eval_logps/rejected": -691.86181640625, |
|
"eval_loss": 0.46714693307876587, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -0.502022922039032, |
|
"eval_rewards/margins": 0.8244528770446777, |
|
"eval_rewards/rejected": -1.326475739479065, |
|
"eval_runtime": 14.3746, |
|
"eval_samples_per_second": 5.287, |
|
"eval_steps_per_second": 0.209, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 12.936607152276345, |
|
"learning_rate": 1.2689339106741526e-08, |
|
"logits/chosen": -4.11704158782959, |
|
"logits/rejected": -4.1085028648376465, |
|
"logps/chosen": -983.2184448242188, |
|
"logps/rejected": -2123.88623046875, |
|
"loss": 0.1405, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.7897412776947021, |
|
"rewards/margins": 5.98792028427124, |
|
"rewards/rejected": -6.7776618003845215, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 18.334577187135583, |
|
"learning_rate": 1.0576247944985018e-10, |
|
"logits/chosen": -4.1415205001831055, |
|
"logits/rejected": -4.129425048828125, |
|
"logps/chosen": -978.7972412109375, |
|
"logps/rejected": -2053.748046875, |
|
"loss": 0.1121, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.8511762619018555, |
|
"rewards/margins": 5.528683185577393, |
|
"rewards/rejected": -6.37985897064209, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 121, |
|
"total_flos": 0.0, |
|
"train_loss": 0.021612109477854958, |
|
"train_runtime": 343.2656, |
|
"train_samples_per_second": 22.536, |
|
"train_steps_per_second": 0.352 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 121, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|