avemio-digital's picture
Add files using upload-large-folder tool
da0a907 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 15.886524822695035,
"eval_steps": 80,
"global_step": 840,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.6052009456264775,
"grad_norm": 897.57861328125,
"learning_rate": 1.9047619047619045e-07,
"log_odds_chosen": 0.061996445059776306,
"log_odds_ratio": -0.7339106202125549,
"logits/chosen": -2.475102663040161,
"logits/rejected": -2.5303642749786377,
"logps/chosen": -1.3030776977539062,
"logps/rejected": -1.351835012435913,
"loss": 1.9448,
"nll_loss": 1.881751298904419,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.13030776381492615,
"rewards/margins": 0.004875739570707083,
"rewards/rejected": -0.13518351316452026,
"step": 32
},
{
"epoch": 1.210401891252955,
"grad_norm": 555.6674194335938,
"learning_rate": 3.809523809523809e-07,
"log_odds_chosen": 0.13086628913879395,
"log_odds_ratio": -0.6972255110740662,
"logits/chosen": -2.408938407897949,
"logits/rejected": -2.4563820362091064,
"logps/chosen": -1.2012869119644165,
"logps/rejected": -1.3022348880767822,
"loss": 1.5253,
"nll_loss": 1.4454330205917358,
"rewards/accuracies": 0.54296875,
"rewards/chosen": -0.12012868374586105,
"rewards/margins": 0.010094808414578438,
"rewards/rejected": -0.13022349774837494,
"step": 64
},
{
"epoch": 1.5130023640661938,
"eval_log_odds_chosen": 1.2037408351898193,
"eval_log_odds_ratio": -0.2748129367828369,
"eval_logits/chosen": -2.1409010887145996,
"eval_logits/rejected": -2.1931569576263428,
"eval_logps/chosen": -1.156149983406067,
"eval_logps/rejected": -2.1107430458068848,
"eval_loss": 1.3948438167572021,
"eval_nll_loss": 1.5358692407608032,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.11561501026153564,
"eval_rewards/margins": 0.09545929729938507,
"eval_rewards/rejected": -0.21107430756092072,
"eval_runtime": 0.8754,
"eval_samples_per_second": 156.493,
"eval_steps_per_second": 5.711,
"step": 80
},
{
"epoch": 1.8156028368794326,
"grad_norm": 140.96612548828125,
"learning_rate": 4.996892303047305e-07,
"log_odds_chosen": 0.16402098536491394,
"log_odds_ratio": -0.6756913065910339,
"logits/chosen": -2.3939661979675293,
"logits/rejected": -2.389753580093384,
"logps/chosen": -1.0995960235595703,
"logps/rejected": -1.2302087545394897,
"loss": 1.3931,
"nll_loss": 1.3130543231964111,
"rewards/accuracies": 0.49609375,
"rewards/chosen": -0.10995960980653763,
"rewards/margins": 0.013061259873211384,
"rewards/rejected": -0.12302087247371674,
"step": 96
},
{
"epoch": 2.42080378250591,
"grad_norm": 3005.20654296875,
"learning_rate": 4.958326378681848e-07,
"log_odds_chosen": 0.05211365222930908,
"log_odds_ratio": -0.7710955142974854,
"logits/chosen": -2.4226865768432617,
"logits/rejected": -2.4471077919006348,
"logps/chosen": -1.8894121646881104,
"logps/rejected": -1.878553867340088,
"loss": 1.9751,
"nll_loss": 1.9949692487716675,
"rewards/accuracies": 0.54296875,
"rewards/chosen": -0.18894124031066895,
"rewards/margins": -0.001085837371647358,
"rewards/rejected": -0.18785539269447327,
"step": 128
},
{
"epoch": 3.0260047281323876,
"grad_norm": 3593.66064453125,
"learning_rate": 4.876353872369572e-07,
"log_odds_chosen": 0.010831637308001518,
"log_odds_ratio": -0.8205243349075317,
"logits/chosen": -2.4603629112243652,
"logits/rejected": -2.4731788635253906,
"logps/chosen": -1.9289910793304443,
"logps/rejected": -1.854127049446106,
"loss": 2.0756,
"nll_loss": 2.116929769515991,
"rewards/accuracies": 0.58203125,
"rewards/chosen": -0.19289910793304443,
"rewards/margins": -0.0074864043854177,
"rewards/rejected": -0.1854127049446106,
"step": 160
},
{
"epoch": 3.0260047281323876,
"eval_log_odds_chosen": 1.280719518661499,
"eval_log_odds_ratio": -0.25084003806114197,
"eval_logits/chosen": -2.156606912612915,
"eval_logits/rejected": -2.2219834327697754,
"eval_logps/chosen": -1.4854581356048584,
"eval_logps/rejected": -2.5444798469543457,
"eval_loss": 1.3283345699310303,
"eval_nll_loss": 1.4989588260650635,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.14854581654071808,
"eval_rewards/margins": 0.10590219497680664,
"eval_rewards/rejected": -0.2544480264186859,
"eval_runtime": 0.8785,
"eval_samples_per_second": 155.943,
"eval_steps_per_second": 5.691,
"step": 160
},
{
"epoch": 3.631205673758865,
"grad_norm": 817.2037353515625,
"learning_rate": 4.752422169756047e-07,
"log_odds_chosen": 0.09074901789426804,
"log_odds_ratio": -0.7456185817718506,
"logits/chosen": -2.377356767654419,
"logits/rejected": -2.396003007888794,
"logps/chosen": -1.5171489715576172,
"logps/rejected": -1.5251379013061523,
"loss": 1.9051,
"nll_loss": 1.6593117713928223,
"rewards/accuracies": 0.609375,
"rewards/chosen": -0.15171489119529724,
"rewards/margins": 0.0007988963043317199,
"rewards/rejected": -0.152513787150383,
"step": 192
},
{
"epoch": 4.236406619385343,
"grad_norm": 954.6674194335938,
"learning_rate": 4.588719528532341e-07,
"log_odds_chosen": 0.1411646008491516,
"log_odds_ratio": -0.6900860667228699,
"logits/chosen": -2.398102283477783,
"logits/rejected": -2.397972345352173,
"logps/chosen": -1.2695732116699219,
"logps/rejected": -1.3286174535751343,
"loss": 1.4204,
"nll_loss": 1.3868590593338013,
"rewards/accuracies": 0.6171875,
"rewards/chosen": -0.12695731222629547,
"rewards/margins": 0.005904428660869598,
"rewards/rejected": -0.13286174833774567,
"step": 224
},
{
"epoch": 4.539007092198582,
"eval_log_odds_chosen": 1.1990762948989868,
"eval_log_odds_ratio": -0.2697806656360626,
"eval_logits/chosen": -2.137376546859741,
"eval_logits/rejected": -2.1972498893737793,
"eval_logps/chosen": -1.2540639638900757,
"eval_logps/rejected": -2.2160115242004395,
"eval_loss": 1.2844356298446655,
"eval_nll_loss": 1.4172712564468384,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.125406414270401,
"eval_rewards/margins": 0.09619472920894623,
"eval_rewards/rejected": -0.22160112857818604,
"eval_runtime": 0.8664,
"eval_samples_per_second": 158.119,
"eval_steps_per_second": 5.771,
"step": 240
},
{
"epoch": 4.84160756501182,
"grad_norm": 790.6442260742188,
"learning_rate": 4.3881364404463375e-07,
"log_odds_chosen": 0.20751571655273438,
"log_odds_ratio": -0.6614270210266113,
"logits/chosen": -2.3498642444610596,
"logits/rejected": -2.370640993118286,
"logps/chosen": -1.1192173957824707,
"logps/rejected": -1.2252520322799683,
"loss": 1.3469,
"nll_loss": 1.2267839908599854,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.1119217574596405,
"rewards/margins": 0.010603459551930428,
"rewards/rejected": -0.12252521514892578,
"step": 256
},
{
"epoch": 5.446808510638298,
"grad_norm": 2515.4189453125,
"learning_rate": 4.154214593992149e-07,
"log_odds_chosen": 0.23377765715122223,
"log_odds_ratio": -0.6729075312614441,
"logits/chosen": -2.322608709335327,
"logits/rejected": -2.361389636993408,
"logps/chosen": -1.1726882457733154,
"logps/rejected": -1.2837783098220825,
"loss": 1.3539,
"nll_loss": 1.2735731601715088,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.11726883798837662,
"rewards/margins": 0.011109001003205776,
"rewards/rejected": -0.12837782502174377,
"step": 288
},
{
"epoch": 6.052009456264775,
"grad_norm": 2093.776611328125,
"learning_rate": 3.891084338941603e-07,
"log_odds_chosen": 0.16962425410747528,
"log_odds_ratio": -0.6696641445159912,
"logits/chosen": -2.325108051300049,
"logits/rejected": -2.3817710876464844,
"logps/chosen": -3.6265933513641357,
"logps/rejected": -3.700042724609375,
"loss": 3.6173,
"nll_loss": 3.7216219902038574,
"rewards/accuracies": 0.59765625,
"rewards/chosen": -0.3626593351364136,
"rewards/margins": 0.007344960235059261,
"rewards/rejected": -0.3700042963027954,
"step": 320
},
{
"epoch": 6.052009456264775,
"eval_log_odds_chosen": 1.1228582859039307,
"eval_log_odds_ratio": -0.2914997637271881,
"eval_logits/chosen": -2.153041362762451,
"eval_logits/rejected": -2.239081621170044,
"eval_logps/chosen": -1.1402614116668701,
"eval_logps/rejected": -2.0236728191375732,
"eval_loss": 1.2484513521194458,
"eval_nll_loss": 1.3337957859039307,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.11402615159749985,
"eval_rewards/margins": 0.0883411318063736,
"eval_rewards/rejected": -0.20236727595329285,
"eval_runtime": 0.8835,
"eval_samples_per_second": 155.057,
"eval_steps_per_second": 5.659,
"step": 320
},
{
"epoch": 6.657210401891253,
"grad_norm": 750.7427978515625,
"learning_rate": 3.6033917569043597e-07,
"log_odds_chosen": 0.2158849686384201,
"log_odds_ratio": -0.651162326335907,
"logits/chosen": -2.2999160289764404,
"logits/rejected": -2.3155159950256348,
"logps/chosen": -3.3152918815612793,
"logps/rejected": -3.4116926193237305,
"loss": 3.4506,
"nll_loss": 3.4377260208129883,
"rewards/accuracies": 0.6015625,
"rewards/chosen": -0.3315292000770569,
"rewards/margins": 0.00964003149420023,
"rewards/rejected": -0.34116923809051514,
"step": 352
},
{
"epoch": 7.26241134751773,
"grad_norm": 466.0474548339844,
"learning_rate": 3.296216625629211e-07,
"log_odds_chosen": 0.2518257200717926,
"log_odds_ratio": -0.6292858123779297,
"logits/chosen": -2.287289619445801,
"logits/rejected": -2.274383783340454,
"logps/chosen": -2.936006784439087,
"logps/rejected": -3.0706114768981934,
"loss": 3.1836,
"nll_loss": 3.031456708908081,
"rewards/accuracies": 0.66796875,
"rewards/chosen": -0.2936007082462311,
"rewards/margins": 0.013460462912917137,
"rewards/rejected": -0.30706116557121277,
"step": 384
},
{
"epoch": 7.5650118203309695,
"eval_log_odds_chosen": 1.1787246465682983,
"eval_log_odds_ratio": -0.27878421545028687,
"eval_logits/chosen": -2.131922721862793,
"eval_logits/rejected": -2.198315143585205,
"eval_logps/chosen": -1.1629152297973633,
"eval_logps/rejected": -2.102142810821533,
"eval_loss": 1.2289972305297852,
"eval_nll_loss": 1.3089702129364014,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.11629153788089752,
"eval_rewards/margins": 0.09392273426055908,
"eval_rewards/rejected": -0.2102142572402954,
"eval_runtime": 0.8657,
"eval_samples_per_second": 158.25,
"eval_steps_per_second": 5.776,
"step": 400
},
{
"epoch": 7.867612293144208,
"grad_norm": 493.2022399902344,
"learning_rate": 2.974982725547975e-07,
"log_odds_chosen": 0.29160410165786743,
"log_odds_ratio": -0.6114708781242371,
"logits/chosen": -2.296574115753174,
"logits/rejected": -2.3063693046569824,
"logps/chosen": -2.907156229019165,
"logps/rejected": -3.049989938735962,
"loss": 3.0885,
"nll_loss": 2.9950599670410156,
"rewards/accuracies": 0.6953125,
"rewards/chosen": -0.29071560502052307,
"rewards/margins": 0.014283367432653904,
"rewards/rejected": -0.3049989938735962,
"step": 416
},
{
"epoch": 8.472813238770685,
"grad_norm": 2084.139892578125,
"learning_rate": 2.6453620722761895e-07,
"log_odds_chosen": 0.2739107012748718,
"log_odds_ratio": -0.6295269727706909,
"logits/chosen": -2.3001277446746826,
"logits/rejected": -2.2884907722473145,
"logps/chosen": -2.9699883460998535,
"logps/rejected": -3.114020586013794,
"loss": 2.9983,
"nll_loss": 3.031224012374878,
"rewards/accuracies": 0.65234375,
"rewards/chosen": -0.29699885845184326,
"rewards/margins": 0.014403235167264938,
"rewards/rejected": -0.3114020824432373,
"step": 448
},
{
"epoch": 9.078014184397164,
"grad_norm": 557.6774291992188,
"learning_rate": 2.3131747660339394e-07,
"log_odds_chosen": 0.2703976333141327,
"log_odds_ratio": -0.6236827969551086,
"logits/chosen": -2.2672348022460938,
"logits/rejected": -2.2586584091186523,
"logps/chosen": -2.937666893005371,
"logps/rejected": -3.061203718185425,
"loss": 2.8082,
"nll_loss": 3.023472785949707,
"rewards/accuracies": 0.66015625,
"rewards/chosen": -0.29376670718193054,
"rewards/margins": 0.012353670783340931,
"rewards/rejected": -0.306120365858078,
"step": 480
},
{
"epoch": 9.078014184397164,
"eval_log_odds_chosen": 1.1980304718017578,
"eval_log_odds_ratio": -0.27382025122642517,
"eval_logits/chosen": -2.1204967498779297,
"eval_logits/rejected": -2.1800942420959473,
"eval_logps/chosen": -1.192492961883545,
"eval_logps/rejected": -2.1554245948791504,
"eval_loss": 1.2367494106292725,
"eval_nll_loss": 1.3177238702774048,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.11924929916858673,
"eval_rewards/margins": 0.09629315137863159,
"eval_rewards/rejected": -0.21554246544837952,
"eval_runtime": 0.88,
"eval_samples_per_second": 155.689,
"eval_steps_per_second": 5.682,
"step": 480
},
{
"epoch": 9.68321513002364,
"grad_norm": 8620.671875,
"learning_rate": 1.984286226342056e-07,
"log_odds_chosen": 0.36868974566459656,
"log_odds_ratio": -0.6002693176269531,
"logits/chosen": -2.237966537475586,
"logits/rejected": -2.2450058460235596,
"logps/chosen": -2.536555290222168,
"logps/rejected": -2.738464117050171,
"loss": 2.7562,
"nll_loss": 2.642591714859009,
"rewards/accuracies": 0.69921875,
"rewards/chosen": -0.2536555230617523,
"rewards/margins": 0.020190902054309845,
"rewards/rejected": -0.27384641766548157,
"step": 512
},
{
"epoch": 10.288416075650119,
"grad_norm": 8913.7607421875,
"learning_rate": 1.6645036265170313e-07,
"log_odds_chosen": 0.23036888241767883,
"log_odds_ratio": -0.6965319514274597,
"logits/chosen": -2.346311092376709,
"logits/rejected": -2.3196349143981934,
"logps/chosen": -2.625997543334961,
"logps/rejected": -2.695284605026245,
"loss": 2.9109,
"nll_loss": 2.6460041999816895,
"rewards/accuracies": 0.68359375,
"rewards/chosen": -0.26259979605674744,
"rewards/margins": 0.006928655784577131,
"rewards/rejected": -0.26952844858169556,
"step": 544
},
{
"epoch": 10.591016548463356,
"eval_log_odds_chosen": 1.2137528657913208,
"eval_log_odds_ratio": -0.2704525589942932,
"eval_logits/chosen": -2.1178054809570312,
"eval_logits/rejected": -2.1774165630340576,
"eval_logps/chosen": -1.1941485404968262,
"eval_logps/rejected": -2.171353340148926,
"eval_loss": 1.237461805343628,
"eval_nll_loss": 1.3179538249969482,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.11941485106945038,
"eval_rewards/margins": 0.09772048145532608,
"eval_rewards/rejected": -0.21713533997535706,
"eval_runtime": 0.8664,
"eval_samples_per_second": 158.121,
"eval_steps_per_second": 5.771,
"step": 560
},
{
"epoch": 10.893617021276595,
"grad_norm": 11410.7939453125,
"learning_rate": 1.3594733566170925e-07,
"log_odds_chosen": 0.2925941050052643,
"log_odds_ratio": -0.6866809725761414,
"logits/chosen": -2.3280415534973145,
"logits/rejected": -2.308394432067871,
"logps/chosen": -3.2026498317718506,
"logps/rejected": -3.3342018127441406,
"loss": 2.9544,
"nll_loss": 3.251168966293335,
"rewards/accuracies": 0.66796875,
"rewards/chosen": -0.320264995098114,
"rewards/margins": 0.013155205175280571,
"rewards/rejected": -0.33342018723487854,
"step": 576
},
{
"epoch": 11.498817966903074,
"grad_norm": 2123.895751953125,
"learning_rate": 1.0745813253325956e-07,
"log_odds_chosen": 0.3092188239097595,
"log_odds_ratio": -0.6492509245872498,
"logits/chosen": -2.3580808639526367,
"logits/rejected": -2.349421501159668,
"logps/chosen": -2.5302317142486572,
"logps/rejected": -2.662865161895752,
"loss": 2.8523,
"nll_loss": 2.578503131866455,
"rewards/accuracies": 0.6953125,
"rewards/chosen": -0.2530231475830078,
"rewards/margins": 0.013263333588838577,
"rewards/rejected": -0.2662864923477173,
"step": 608
},
{
"epoch": 12.10401891252955,
"grad_norm": 1731.5615234375,
"learning_rate": 8.148578611867113e-08,
"log_odds_chosen": 0.3393189013004303,
"log_odds_ratio": -0.6164168119430542,
"logits/chosen": -2.1805524826049805,
"logits/rejected": -2.177432060241699,
"logps/chosen": -2.5276594161987305,
"logps/rejected": -2.710268497467041,
"loss": 2.5512,
"nll_loss": 2.681882381439209,
"rewards/accuracies": 0.67578125,
"rewards/chosen": -0.252765953540802,
"rewards/margins": 0.01826086826622486,
"rewards/rejected": -0.2710268199443817,
"step": 640
},
{
"epoch": 12.10401891252955,
"eval_log_odds_chosen": 1.2131071090698242,
"eval_log_odds_ratio": -0.270443856716156,
"eval_logits/chosen": -2.1185622215270996,
"eval_logits/rejected": -2.178537368774414,
"eval_logps/chosen": -1.198697566986084,
"eval_logps/rejected": -2.176114559173584,
"eval_loss": 1.2388056516647339,
"eval_nll_loss": 1.3213987350463867,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.11986975371837616,
"eval_rewards/margins": 0.09774170815944672,
"eval_rewards/rejected": -0.21761147677898407,
"eval_runtime": 0.8825,
"eval_samples_per_second": 155.235,
"eval_steps_per_second": 5.666,
"step": 640
},
{
"epoch": 12.709219858156029,
"grad_norm": 1997.690185546875,
"learning_rate": 5.848888922025552e-08,
"log_odds_chosen": 0.3624497354030609,
"log_odds_ratio": -0.6123137474060059,
"logits/chosen": -2.179229259490967,
"logits/rejected": -2.1755523681640625,
"logps/chosen": -2.486222743988037,
"logps/rejected": -2.6808714866638184,
"loss": 2.5628,
"nll_loss": 2.6199562549591064,
"rewards/accuracies": 0.69140625,
"rewards/chosen": -0.24862225353717804,
"rewards/margins": 0.019464917480945587,
"rewards/rejected": -0.2680871784687042,
"step": 672
},
{
"epoch": 13.314420803782506,
"grad_norm": 2036.525146484375,
"learning_rate": 3.887349723342303e-08,
"log_odds_chosen": 0.3463588356971741,
"log_odds_ratio": -0.6327537298202515,
"logits/chosen": -2.181072235107422,
"logits/rejected": -2.1947262287139893,
"logps/chosen": -2.517810344696045,
"logps/rejected": -2.672647476196289,
"loss": 2.6212,
"nll_loss": 2.6852023601531982,
"rewards/accuracies": 0.68359375,
"rewards/chosen": -0.25178101658821106,
"rewards/margins": 0.01548372209072113,
"rewards/rejected": -0.2672647535800934,
"step": 704
},
{
"epoch": 13.617021276595745,
"eval_log_odds_chosen": 1.2199119329452515,
"eval_log_odds_ratio": -0.26896363496780396,
"eval_logits/chosen": -2.1166138648986816,
"eval_logits/rejected": -2.1762003898620605,
"eval_logps/chosen": -1.1962625980377197,
"eval_logps/rejected": -2.1790993213653564,
"eval_loss": 1.2387369871139526,
"eval_nll_loss": 1.3203083276748657,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.11962626129388809,
"eval_rewards/margins": 0.09828367829322815,
"eval_rewards/rejected": -0.21790993213653564,
"eval_runtime": 0.8708,
"eval_samples_per_second": 157.334,
"eval_steps_per_second": 5.742,
"step": 720
},
{
"epoch": 13.919621749408984,
"grad_norm": 9910.3740234375,
"learning_rate": 2.298595844092377e-08,
"log_odds_chosen": 0.3617098927497864,
"log_odds_ratio": -0.6060731410980225,
"logits/chosen": -2.2685229778289795,
"logits/rejected": -2.2752606868743896,
"logps/chosen": -1.9047422409057617,
"logps/rejected": -2.0876576900482178,
"loss": 2.4957,
"nll_loss": 1.975754737854004,
"rewards/accuracies": 0.71484375,
"rewards/chosen": -0.19047421216964722,
"rewards/margins": 0.01829155907034874,
"rewards/rejected": -0.20876577496528625,
"step": 736
},
{
"epoch": 14.52482269503546,
"grad_norm": 1543.000244140625,
"learning_rate": 1.1106798553464802e-08,
"log_odds_chosen": 0.42522603273391724,
"log_odds_ratio": -0.5653746128082275,
"logits/chosen": -2.353919744491577,
"logits/rejected": -2.358372688293457,
"logps/chosen": -1.2913402318954468,
"logps/rejected": -1.542799711227417,
"loss": 1.4582,
"nll_loss": 1.3932266235351562,
"rewards/accuracies": 0.765625,
"rewards/chosen": -0.12913402915000916,
"rewards/margins": 0.025145962834358215,
"rewards/rejected": -0.15427997708320618,
"step": 768
},
{
"epoch": 15.130023640661939,
"grad_norm": 698.0999755859375,
"learning_rate": 3.4457674771554422e-09,
"log_odds_chosen": 0.4467349052429199,
"log_odds_ratio": -0.545281171798706,
"logits/chosen": -2.313391923904419,
"logits/rejected": -2.3118624687194824,
"logps/chosen": -1.2114390134811401,
"logps/rejected": -1.4863505363464355,
"loss": 1.3504,
"nll_loss": 1.3252184391021729,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.1211438924074173,
"rewards/margins": 0.027491170912981033,
"rewards/rejected": -0.14863505959510803,
"step": 800
},
{
"epoch": 15.130023640661939,
"eval_log_odds_chosen": 1.211981177330017,
"eval_log_odds_ratio": -0.27068275213241577,
"eval_logits/chosen": -2.118680715560913,
"eval_logits/rejected": -2.1784884929656982,
"eval_logps/chosen": -1.1996212005615234,
"eval_logps/rejected": -2.176278829574585,
"eval_loss": 1.2384228706359863,
"eval_nll_loss": 1.3189568519592285,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.11996213346719742,
"eval_rewards/margins": 0.09766574203968048,
"eval_rewards/rejected": -0.2176278829574585,
"eval_runtime": 0.8764,
"eval_samples_per_second": 156.329,
"eval_steps_per_second": 5.705,
"step": 800
},
{
"epoch": 15.735224586288416,
"grad_norm": 295.0424499511719,
"learning_rate": 1.3813576683111006e-10,
"log_odds_chosen": 0.44846177101135254,
"log_odds_ratio": -0.5449205636978149,
"logits/chosen": -2.3085861206054688,
"logits/rejected": -2.3130688667297363,
"logps/chosen": -1.1487438678741455,
"logps/rejected": -1.4232044219970703,
"loss": 1.3316,
"nll_loss": 1.246992588043213,
"rewards/accuracies": 0.74609375,
"rewards/chosen": -0.11487438529729843,
"rewards/margins": 0.027446046471595764,
"rewards/rejected": -0.142320454120636,
"step": 832
},
{
"epoch": 15.886524822695035,
"grad_norm": 305.3218078613281,
"learning_rate": 0.0,
"log_odds_chosen": 0.500209391117096,
"log_odds_ratio": -0.5302451848983765,
"logits/chosen": -2.2818732261657715,
"logits/rejected": -2.2850182056427,
"logps/chosen": -1.1465669870376587,
"logps/rejected": -1.4646430015563965,
"loss": 1.3265,
"nll_loss": 1.2768977880477905,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.11465670168399811,
"rewards/margins": 0.03180759772658348,
"rewards/rejected": -0.1464642882347107,
"step": 840
},
{
"epoch": 15.886524822695035,
"eval_log_odds_chosen": 1.2168288230895996,
"eval_log_odds_ratio": -0.26950639486312866,
"eval_logits/chosen": -2.1189827919006348,
"eval_logits/rejected": -2.1787045001983643,
"eval_logps/chosen": -1.1971455812454224,
"eval_logps/rejected": -2.1773040294647217,
"eval_loss": 1.2378294467926025,
"eval_nll_loss": 1.3174165487289429,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.11971455812454224,
"eval_rewards/margins": 0.09801585972309113,
"eval_rewards/rejected": -0.21773043274879456,
"eval_runtime": 0.8739,
"eval_samples_per_second": 156.768,
"eval_steps_per_second": 5.721,
"step": 840
}
],
"logging_steps": 32,
"max_steps": 840,
"num_input_tokens_seen": 0,
"num_train_epochs": 17,
"save_steps": 80,
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}