{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.886524822695035, "eval_steps": 80, "global_step": 840, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.6052009456264775, "grad_norm": 897.57861328125, "learning_rate": 1.9047619047619045e-07, "log_odds_chosen": 0.061996445059776306, "log_odds_ratio": -0.7339106202125549, "logits/chosen": -2.475102663040161, "logits/rejected": -2.5303642749786377, "logps/chosen": -1.3030776977539062, "logps/rejected": -1.351835012435913, "loss": 1.9448, "nll_loss": 1.881751298904419, "rewards/accuracies": 0.5, "rewards/chosen": -0.13030776381492615, "rewards/margins": 0.004875739570707083, "rewards/rejected": -0.13518351316452026, "step": 32 }, { "epoch": 1.210401891252955, "grad_norm": 555.6674194335938, "learning_rate": 3.809523809523809e-07, "log_odds_chosen": 0.13086628913879395, "log_odds_ratio": -0.6972255110740662, "logits/chosen": -2.408938407897949, "logits/rejected": -2.4563820362091064, "logps/chosen": -1.2012869119644165, "logps/rejected": -1.3022348880767822, "loss": 1.5253, "nll_loss": 1.4454330205917358, "rewards/accuracies": 0.54296875, "rewards/chosen": -0.12012868374586105, "rewards/margins": 0.010094808414578438, "rewards/rejected": -0.13022349774837494, "step": 64 }, { "epoch": 1.5130023640661938, "eval_log_odds_chosen": 1.2037408351898193, "eval_log_odds_ratio": -0.2748129367828369, "eval_logits/chosen": -2.1409010887145996, "eval_logits/rejected": -2.1931569576263428, "eval_logps/chosen": -1.156149983406067, "eval_logps/rejected": -2.1107430458068848, "eval_loss": 1.3948438167572021, "eval_nll_loss": 1.5358692407608032, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.11561501026153564, "eval_rewards/margins": 0.09545929729938507, "eval_rewards/rejected": -0.21107430756092072, "eval_runtime": 0.8754, "eval_samples_per_second": 156.493, "eval_steps_per_second": 5.711, "step": 80 }, { "epoch": 1.8156028368794326, "grad_norm": 140.96612548828125, "learning_rate": 4.996892303047305e-07, "log_odds_chosen": 0.16402098536491394, "log_odds_ratio": -0.6756913065910339, "logits/chosen": -2.3939661979675293, "logits/rejected": -2.389753580093384, "logps/chosen": -1.0995960235595703, "logps/rejected": -1.2302087545394897, "loss": 1.3931, "nll_loss": 1.3130543231964111, "rewards/accuracies": 0.49609375, "rewards/chosen": -0.10995960980653763, "rewards/margins": 0.013061259873211384, "rewards/rejected": -0.12302087247371674, "step": 96 }, { "epoch": 2.42080378250591, "grad_norm": 3005.20654296875, "learning_rate": 4.958326378681848e-07, "log_odds_chosen": 0.05211365222930908, "log_odds_ratio": -0.7710955142974854, "logits/chosen": -2.4226865768432617, "logits/rejected": -2.4471077919006348, "logps/chosen": -1.8894121646881104, "logps/rejected": -1.878553867340088, "loss": 1.9751, "nll_loss": 1.9949692487716675, "rewards/accuracies": 0.54296875, "rewards/chosen": -0.18894124031066895, "rewards/margins": -0.001085837371647358, "rewards/rejected": -0.18785539269447327, "step": 128 }, { "epoch": 3.0260047281323876, "grad_norm": 3593.66064453125, "learning_rate": 4.876353872369572e-07, "log_odds_chosen": 0.010831637308001518, "log_odds_ratio": -0.8205243349075317, "logits/chosen": -2.4603629112243652, "logits/rejected": -2.4731788635253906, "logps/chosen": -1.9289910793304443, "logps/rejected": -1.854127049446106, "loss": 2.0756, "nll_loss": 2.116929769515991, "rewards/accuracies": 0.58203125, "rewards/chosen": -0.19289910793304443, "rewards/margins": -0.0074864043854177, "rewards/rejected": -0.1854127049446106, "step": 160 }, { "epoch": 3.0260047281323876, "eval_log_odds_chosen": 1.280719518661499, "eval_log_odds_ratio": -0.25084003806114197, "eval_logits/chosen": -2.156606912612915, "eval_logits/rejected": -2.2219834327697754, "eval_logps/chosen": -1.4854581356048584, "eval_logps/rejected": -2.5444798469543457, "eval_loss": 1.3283345699310303, "eval_nll_loss": 1.4989588260650635, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.14854581654071808, "eval_rewards/margins": 0.10590219497680664, "eval_rewards/rejected": -0.2544480264186859, "eval_runtime": 0.8785, "eval_samples_per_second": 155.943, "eval_steps_per_second": 5.691, "step": 160 }, { "epoch": 3.631205673758865, "grad_norm": 817.2037353515625, "learning_rate": 4.752422169756047e-07, "log_odds_chosen": 0.09074901789426804, "log_odds_ratio": -0.7456185817718506, "logits/chosen": -2.377356767654419, "logits/rejected": -2.396003007888794, "logps/chosen": -1.5171489715576172, "logps/rejected": -1.5251379013061523, "loss": 1.9051, "nll_loss": 1.6593117713928223, "rewards/accuracies": 0.609375, "rewards/chosen": -0.15171489119529724, "rewards/margins": 0.0007988963043317199, "rewards/rejected": -0.152513787150383, "step": 192 }, { "epoch": 4.236406619385343, "grad_norm": 954.6674194335938, "learning_rate": 4.588719528532341e-07, "log_odds_chosen": 0.1411646008491516, "log_odds_ratio": -0.6900860667228699, "logits/chosen": -2.398102283477783, "logits/rejected": -2.397972345352173, "logps/chosen": -1.2695732116699219, "logps/rejected": -1.3286174535751343, "loss": 1.4204, "nll_loss": 1.3868590593338013, "rewards/accuracies": 0.6171875, "rewards/chosen": -0.12695731222629547, "rewards/margins": 0.005904428660869598, "rewards/rejected": -0.13286174833774567, "step": 224 }, { "epoch": 4.539007092198582, "eval_log_odds_chosen": 1.1990762948989868, "eval_log_odds_ratio": -0.2697806656360626, "eval_logits/chosen": -2.137376546859741, "eval_logits/rejected": -2.1972498893737793, "eval_logps/chosen": -1.2540639638900757, "eval_logps/rejected": -2.2160115242004395, "eval_loss": 1.2844356298446655, "eval_nll_loss": 1.4172712564468384, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.125406414270401, "eval_rewards/margins": 0.09619472920894623, "eval_rewards/rejected": -0.22160112857818604, "eval_runtime": 0.8664, "eval_samples_per_second": 158.119, "eval_steps_per_second": 5.771, "step": 240 }, { "epoch": 4.84160756501182, "grad_norm": 790.6442260742188, "learning_rate": 4.3881364404463375e-07, "log_odds_chosen": 0.20751571655273438, "log_odds_ratio": -0.6614270210266113, "logits/chosen": -2.3498642444610596, "logits/rejected": -2.370640993118286, "logps/chosen": -1.1192173957824707, "logps/rejected": -1.2252520322799683, "loss": 1.3469, "nll_loss": 1.2267839908599854, "rewards/accuracies": 0.65625, "rewards/chosen": -0.1119217574596405, "rewards/margins": 0.010603459551930428, "rewards/rejected": -0.12252521514892578, "step": 256 }, { "epoch": 5.446808510638298, "grad_norm": 2515.4189453125, "learning_rate": 4.154214593992149e-07, "log_odds_chosen": 0.23377765715122223, "log_odds_ratio": -0.6729075312614441, "logits/chosen": -2.322608709335327, "logits/rejected": -2.361389636993408, "logps/chosen": -1.1726882457733154, "logps/rejected": -1.2837783098220825, "loss": 1.3539, "nll_loss": 1.2735731601715088, "rewards/accuracies": 0.6875, "rewards/chosen": -0.11726883798837662, "rewards/margins": 0.011109001003205776, "rewards/rejected": -0.12837782502174377, "step": 288 }, { "epoch": 6.052009456264775, "grad_norm": 2093.776611328125, "learning_rate": 3.891084338941603e-07, "log_odds_chosen": 0.16962425410747528, "log_odds_ratio": -0.6696641445159912, "logits/chosen": -2.325108051300049, "logits/rejected": -2.3817710876464844, "logps/chosen": -3.6265933513641357, "logps/rejected": -3.700042724609375, "loss": 3.6173, "nll_loss": 3.7216219902038574, "rewards/accuracies": 0.59765625, "rewards/chosen": -0.3626593351364136, "rewards/margins": 0.007344960235059261, "rewards/rejected": -0.3700042963027954, "step": 320 }, { "epoch": 6.052009456264775, "eval_log_odds_chosen": 1.1228582859039307, "eval_log_odds_ratio": -0.2914997637271881, "eval_logits/chosen": -2.153041362762451, "eval_logits/rejected": -2.239081621170044, "eval_logps/chosen": -1.1402614116668701, "eval_logps/rejected": -2.0236728191375732, "eval_loss": 1.2484513521194458, "eval_nll_loss": 1.3337957859039307, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.11402615159749985, "eval_rewards/margins": 0.0883411318063736, "eval_rewards/rejected": -0.20236727595329285, "eval_runtime": 0.8835, "eval_samples_per_second": 155.057, "eval_steps_per_second": 5.659, "step": 320 }, { "epoch": 6.657210401891253, "grad_norm": 750.7427978515625, "learning_rate": 3.6033917569043597e-07, "log_odds_chosen": 0.2158849686384201, "log_odds_ratio": -0.651162326335907, "logits/chosen": -2.2999160289764404, "logits/rejected": -2.3155159950256348, "logps/chosen": -3.3152918815612793, "logps/rejected": -3.4116926193237305, "loss": 3.4506, "nll_loss": 3.4377260208129883, "rewards/accuracies": 0.6015625, "rewards/chosen": -0.3315292000770569, "rewards/margins": 0.00964003149420023, "rewards/rejected": -0.34116923809051514, "step": 352 }, { "epoch": 7.26241134751773, "grad_norm": 466.0474548339844, "learning_rate": 3.296216625629211e-07, "log_odds_chosen": 0.2518257200717926, "log_odds_ratio": -0.6292858123779297, "logits/chosen": -2.287289619445801, "logits/rejected": -2.274383783340454, "logps/chosen": -2.936006784439087, "logps/rejected": -3.0706114768981934, "loss": 3.1836, "nll_loss": 3.031456708908081, "rewards/accuracies": 0.66796875, "rewards/chosen": -0.2936007082462311, "rewards/margins": 0.013460462912917137, "rewards/rejected": -0.30706116557121277, "step": 384 }, { "epoch": 7.5650118203309695, "eval_log_odds_chosen": 1.1787246465682983, "eval_log_odds_ratio": -0.27878421545028687, "eval_logits/chosen": -2.131922721862793, "eval_logits/rejected": -2.198315143585205, "eval_logps/chosen": -1.1629152297973633, "eval_logps/rejected": -2.102142810821533, "eval_loss": 1.2289972305297852, "eval_nll_loss": 1.3089702129364014, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.11629153788089752, "eval_rewards/margins": 0.09392273426055908, "eval_rewards/rejected": -0.2102142572402954, "eval_runtime": 0.8657, "eval_samples_per_second": 158.25, "eval_steps_per_second": 5.776, "step": 400 }, { "epoch": 7.867612293144208, "grad_norm": 493.2022399902344, "learning_rate": 2.974982725547975e-07, "log_odds_chosen": 0.29160410165786743, "log_odds_ratio": -0.6114708781242371, "logits/chosen": -2.296574115753174, "logits/rejected": -2.3063693046569824, "logps/chosen": -2.907156229019165, "logps/rejected": -3.049989938735962, "loss": 3.0885, "nll_loss": 2.9950599670410156, "rewards/accuracies": 0.6953125, "rewards/chosen": -0.29071560502052307, "rewards/margins": 0.014283367432653904, "rewards/rejected": -0.3049989938735962, "step": 416 }, { "epoch": 8.472813238770685, "grad_norm": 2084.139892578125, "learning_rate": 2.6453620722761895e-07, "log_odds_chosen": 0.2739107012748718, "log_odds_ratio": -0.6295269727706909, "logits/chosen": -2.3001277446746826, "logits/rejected": -2.2884907722473145, "logps/chosen": -2.9699883460998535, "logps/rejected": -3.114020586013794, "loss": 2.9983, "nll_loss": 3.031224012374878, "rewards/accuracies": 0.65234375, "rewards/chosen": -0.29699885845184326, "rewards/margins": 0.014403235167264938, "rewards/rejected": -0.3114020824432373, "step": 448 }, { "epoch": 9.078014184397164, "grad_norm": 557.6774291992188, "learning_rate": 2.3131747660339394e-07, "log_odds_chosen": 0.2703976333141327, "log_odds_ratio": -0.6236827969551086, "logits/chosen": -2.2672348022460938, "logits/rejected": -2.2586584091186523, "logps/chosen": -2.937666893005371, "logps/rejected": -3.061203718185425, "loss": 2.8082, "nll_loss": 3.023472785949707, "rewards/accuracies": 0.66015625, "rewards/chosen": -0.29376670718193054, "rewards/margins": 0.012353670783340931, "rewards/rejected": -0.306120365858078, "step": 480 }, { "epoch": 9.078014184397164, "eval_log_odds_chosen": 1.1980304718017578, "eval_log_odds_ratio": -0.27382025122642517, "eval_logits/chosen": -2.1204967498779297, "eval_logits/rejected": -2.1800942420959473, "eval_logps/chosen": -1.192492961883545, "eval_logps/rejected": -2.1554245948791504, "eval_loss": 1.2367494106292725, "eval_nll_loss": 1.3177238702774048, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.11924929916858673, "eval_rewards/margins": 0.09629315137863159, "eval_rewards/rejected": -0.21554246544837952, "eval_runtime": 0.88, "eval_samples_per_second": 155.689, "eval_steps_per_second": 5.682, "step": 480 }, { "epoch": 9.68321513002364, "grad_norm": 8620.671875, "learning_rate": 1.984286226342056e-07, "log_odds_chosen": 0.36868974566459656, "log_odds_ratio": -0.6002693176269531, "logits/chosen": -2.237966537475586, "logits/rejected": -2.2450058460235596, "logps/chosen": -2.536555290222168, "logps/rejected": -2.738464117050171, "loss": 2.7562, "nll_loss": 2.642591714859009, "rewards/accuracies": 0.69921875, "rewards/chosen": -0.2536555230617523, "rewards/margins": 0.020190902054309845, "rewards/rejected": -0.27384641766548157, "step": 512 }, { "epoch": 10.288416075650119, "grad_norm": 8913.7607421875, "learning_rate": 1.6645036265170313e-07, "log_odds_chosen": 0.23036888241767883, "log_odds_ratio": -0.6965319514274597, "logits/chosen": -2.346311092376709, "logits/rejected": -2.3196349143981934, "logps/chosen": -2.625997543334961, "logps/rejected": -2.695284605026245, "loss": 2.9109, "nll_loss": 2.6460041999816895, "rewards/accuracies": 0.68359375, "rewards/chosen": -0.26259979605674744, "rewards/margins": 0.006928655784577131, "rewards/rejected": -0.26952844858169556, "step": 544 }, { "epoch": 10.591016548463356, "eval_log_odds_chosen": 1.2137528657913208, "eval_log_odds_ratio": -0.2704525589942932, "eval_logits/chosen": -2.1178054809570312, "eval_logits/rejected": -2.1774165630340576, "eval_logps/chosen": -1.1941485404968262, "eval_logps/rejected": -2.171353340148926, "eval_loss": 1.237461805343628, "eval_nll_loss": 1.3179538249969482, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.11941485106945038, "eval_rewards/margins": 0.09772048145532608, "eval_rewards/rejected": -0.21713533997535706, "eval_runtime": 0.8664, "eval_samples_per_second": 158.121, "eval_steps_per_second": 5.771, "step": 560 }, { "epoch": 10.893617021276595, "grad_norm": 11410.7939453125, "learning_rate": 1.3594733566170925e-07, "log_odds_chosen": 0.2925941050052643, "log_odds_ratio": -0.6866809725761414, "logits/chosen": -2.3280415534973145, "logits/rejected": -2.308394432067871, "logps/chosen": -3.2026498317718506, "logps/rejected": -3.3342018127441406, "loss": 2.9544, "nll_loss": 3.251168966293335, "rewards/accuracies": 0.66796875, "rewards/chosen": -0.320264995098114, "rewards/margins": 0.013155205175280571, "rewards/rejected": -0.33342018723487854, "step": 576 }, { "epoch": 11.498817966903074, "grad_norm": 2123.895751953125, "learning_rate": 1.0745813253325956e-07, "log_odds_chosen": 0.3092188239097595, "log_odds_ratio": -0.6492509245872498, "logits/chosen": -2.3580808639526367, "logits/rejected": -2.349421501159668, "logps/chosen": -2.5302317142486572, "logps/rejected": -2.662865161895752, "loss": 2.8523, "nll_loss": 2.578503131866455, "rewards/accuracies": 0.6953125, "rewards/chosen": -0.2530231475830078, "rewards/margins": 0.013263333588838577, "rewards/rejected": -0.2662864923477173, "step": 608 }, { "epoch": 12.10401891252955, "grad_norm": 1731.5615234375, "learning_rate": 8.148578611867113e-08, "log_odds_chosen": 0.3393189013004303, "log_odds_ratio": -0.6164168119430542, "logits/chosen": -2.1805524826049805, "logits/rejected": -2.177432060241699, "logps/chosen": -2.5276594161987305, "logps/rejected": -2.710268497467041, "loss": 2.5512, "nll_loss": 2.681882381439209, "rewards/accuracies": 0.67578125, "rewards/chosen": -0.252765953540802, "rewards/margins": 0.01826086826622486, "rewards/rejected": -0.2710268199443817, "step": 640 }, { "epoch": 12.10401891252955, "eval_log_odds_chosen": 1.2131071090698242, "eval_log_odds_ratio": -0.270443856716156, "eval_logits/chosen": -2.1185622215270996, "eval_logits/rejected": -2.178537368774414, "eval_logps/chosen": -1.198697566986084, "eval_logps/rejected": -2.176114559173584, "eval_loss": 1.2388056516647339, "eval_nll_loss": 1.3213987350463867, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.11986975371837616, "eval_rewards/margins": 0.09774170815944672, "eval_rewards/rejected": -0.21761147677898407, "eval_runtime": 0.8825, "eval_samples_per_second": 155.235, "eval_steps_per_second": 5.666, "step": 640 }, { "epoch": 12.709219858156029, "grad_norm": 1997.690185546875, "learning_rate": 5.848888922025552e-08, "log_odds_chosen": 0.3624497354030609, "log_odds_ratio": -0.6123137474060059, "logits/chosen": -2.179229259490967, "logits/rejected": -2.1755523681640625, "logps/chosen": -2.486222743988037, "logps/rejected": -2.6808714866638184, "loss": 2.5628, "nll_loss": 2.6199562549591064, "rewards/accuracies": 0.69140625, "rewards/chosen": -0.24862225353717804, "rewards/margins": 0.019464917480945587, "rewards/rejected": -0.2680871784687042, "step": 672 }, { "epoch": 13.314420803782506, "grad_norm": 2036.525146484375, "learning_rate": 3.887349723342303e-08, "log_odds_chosen": 0.3463588356971741, "log_odds_ratio": -0.6327537298202515, "logits/chosen": -2.181072235107422, "logits/rejected": -2.1947262287139893, "logps/chosen": -2.517810344696045, "logps/rejected": -2.672647476196289, "loss": 2.6212, "nll_loss": 2.6852023601531982, "rewards/accuracies": 0.68359375, "rewards/chosen": -0.25178101658821106, "rewards/margins": 0.01548372209072113, "rewards/rejected": -0.2672647535800934, "step": 704 }, { "epoch": 13.617021276595745, "eval_log_odds_chosen": 1.2199119329452515, "eval_log_odds_ratio": -0.26896363496780396, "eval_logits/chosen": -2.1166138648986816, "eval_logits/rejected": -2.1762003898620605, "eval_logps/chosen": -1.1962625980377197, "eval_logps/rejected": -2.1790993213653564, "eval_loss": 1.2387369871139526, "eval_nll_loss": 1.3203083276748657, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.11962626129388809, "eval_rewards/margins": 0.09828367829322815, "eval_rewards/rejected": -0.21790993213653564, "eval_runtime": 0.8708, "eval_samples_per_second": 157.334, "eval_steps_per_second": 5.742, "step": 720 }, { "epoch": 13.919621749408984, "grad_norm": 9910.3740234375, "learning_rate": 2.298595844092377e-08, "log_odds_chosen": 0.3617098927497864, "log_odds_ratio": -0.6060731410980225, "logits/chosen": -2.2685229778289795, "logits/rejected": -2.2752606868743896, "logps/chosen": -1.9047422409057617, "logps/rejected": -2.0876576900482178, "loss": 2.4957, "nll_loss": 1.975754737854004, "rewards/accuracies": 0.71484375, "rewards/chosen": -0.19047421216964722, "rewards/margins": 0.01829155907034874, "rewards/rejected": -0.20876577496528625, "step": 736 }, { "epoch": 14.52482269503546, "grad_norm": 1543.000244140625, "learning_rate": 1.1106798553464802e-08, "log_odds_chosen": 0.42522603273391724, "log_odds_ratio": -0.5653746128082275, "logits/chosen": -2.353919744491577, "logits/rejected": -2.358372688293457, "logps/chosen": -1.2913402318954468, "logps/rejected": -1.542799711227417, "loss": 1.4582, "nll_loss": 1.3932266235351562, "rewards/accuracies": 0.765625, "rewards/chosen": -0.12913402915000916, "rewards/margins": 0.025145962834358215, "rewards/rejected": -0.15427997708320618, "step": 768 }, { "epoch": 15.130023640661939, "grad_norm": 698.0999755859375, "learning_rate": 3.4457674771554422e-09, "log_odds_chosen": 0.4467349052429199, "log_odds_ratio": -0.545281171798706, "logits/chosen": -2.313391923904419, "logits/rejected": -2.3118624687194824, "logps/chosen": -1.2114390134811401, "logps/rejected": -1.4863505363464355, "loss": 1.3504, "nll_loss": 1.3252184391021729, "rewards/accuracies": 0.75, "rewards/chosen": -0.1211438924074173, "rewards/margins": 0.027491170912981033, "rewards/rejected": -0.14863505959510803, "step": 800 }, { "epoch": 15.130023640661939, "eval_log_odds_chosen": 1.211981177330017, "eval_log_odds_ratio": -0.27068275213241577, "eval_logits/chosen": -2.118680715560913, "eval_logits/rejected": -2.1784884929656982, "eval_logps/chosen": -1.1996212005615234, "eval_logps/rejected": -2.176278829574585, "eval_loss": 1.2384228706359863, "eval_nll_loss": 1.3189568519592285, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.11996213346719742, "eval_rewards/margins": 0.09766574203968048, "eval_rewards/rejected": -0.2176278829574585, "eval_runtime": 0.8764, "eval_samples_per_second": 156.329, "eval_steps_per_second": 5.705, "step": 800 }, { "epoch": 15.735224586288416, "grad_norm": 295.0424499511719, "learning_rate": 1.3813576683111006e-10, "log_odds_chosen": 0.44846177101135254, "log_odds_ratio": -0.5449205636978149, "logits/chosen": -2.3085861206054688, "logits/rejected": -2.3130688667297363, "logps/chosen": -1.1487438678741455, "logps/rejected": -1.4232044219970703, "loss": 1.3316, "nll_loss": 1.246992588043213, "rewards/accuracies": 0.74609375, "rewards/chosen": -0.11487438529729843, "rewards/margins": 0.027446046471595764, "rewards/rejected": -0.142320454120636, "step": 832 }, { "epoch": 15.886524822695035, "grad_norm": 305.3218078613281, "learning_rate": 0.0, "log_odds_chosen": 0.500209391117096, "log_odds_ratio": -0.5302451848983765, "logits/chosen": -2.2818732261657715, "logits/rejected": -2.2850182056427, "logps/chosen": -1.1465669870376587, "logps/rejected": -1.4646430015563965, "loss": 1.3265, "nll_loss": 1.2768977880477905, "rewards/accuracies": 0.75, "rewards/chosen": -0.11465670168399811, "rewards/margins": 0.03180759772658348, "rewards/rejected": -0.1464642882347107, "step": 840 }, { "epoch": 15.886524822695035, "eval_log_odds_chosen": 1.2168288230895996, "eval_log_odds_ratio": -0.26950639486312866, "eval_logits/chosen": -2.1189827919006348, "eval_logits/rejected": -2.1787045001983643, "eval_logps/chosen": -1.1971455812454224, "eval_logps/rejected": -2.1773040294647217, "eval_loss": 1.2378294467926025, "eval_nll_loss": 1.3174165487289429, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.11971455812454224, "eval_rewards/margins": 0.09801585972309113, "eval_rewards/rejected": -0.21773043274879456, "eval_runtime": 0.8739, "eval_samples_per_second": 156.768, "eval_steps_per_second": 5.721, "step": 840 } ], "logging_steps": 32, "max_steps": 840, "num_input_tokens_seen": 0, "num_train_epochs": 17, "save_steps": 80, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }