{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.89591957421644, "eval_steps": 400, "global_step": 3360, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15138971023063277, "grad_norm": 27.229875564575195, "learning_rate": 4.7619047619047613e-08, "log_odds_chosen": -0.0525120347738266, "log_odds_ratio": -0.7864450216293335, "logits/chosen": 1.5500602722167969, "logits/rejected": 1.3292943239212036, "logps/chosen": -1.191162109375, "logps/rejected": -1.1635648012161255, "loss": 1.6156, "nll_loss": 1.46018648147583, "rewards/accuracies": 0.43359375, "rewards/chosen": -0.1786743402481079, "rewards/margins": -0.004139607772231102, "rewards/rejected": -0.17453473806381226, "step": 32 }, { "epoch": 0.30277942046126555, "grad_norm": 27.179594039916992, "learning_rate": 9.523809523809523e-08, "log_odds_chosen": 0.019624141976237297, "log_odds_ratio": -0.748144805431366, "logits/chosen": 1.5089401006698608, "logits/rejected": 1.3729290962219238, "logps/chosen": -1.2469431161880493, "logps/rejected": -1.26250422000885, "loss": 1.5984, "nll_loss": 1.5403207540512085, "rewards/accuracies": 0.5078125, "rewards/chosen": -0.1870414763689041, "rewards/margins": 0.002334160730242729, "rewards/rejected": -0.1893756240606308, "step": 64 }, { "epoch": 0.4541691306918983, "grad_norm": 25.431949615478516, "learning_rate": 1.4285714285714285e-07, "log_odds_chosen": -0.053292229771614075, "log_odds_ratio": -0.78084397315979, "logits/chosen": 1.5771101713180542, "logits/rejected": 1.4359058141708374, "logps/chosen": -1.20406174659729, "logps/rejected": -1.1857094764709473, "loss": 1.6007, "nll_loss": 1.4434431791305542, "rewards/accuracies": 0.421875, "rewards/chosen": -0.18060927093029022, "rewards/margins": -0.0027528139762580395, "rewards/rejected": -0.1778564453125, "step": 96 }, { "epoch": 0.6055588409225311, "grad_norm": 21.6727294921875, "learning_rate": 1.9047619047619045e-07, "log_odds_chosen": 0.05749227851629257, "log_odds_ratio": -0.7303333878517151, "logits/chosen": 1.468267560005188, "logits/rejected": 1.4105079174041748, "logps/chosen": -1.216729998588562, "logps/rejected": -1.2735037803649902, "loss": 1.5421, "nll_loss": 1.4667391777038574, "rewards/accuracies": 0.48046875, "rewards/chosen": -0.18250951170921326, "rewards/margins": 0.008516057394444942, "rewards/rejected": -0.19102558493614197, "step": 128 }, { "epoch": 0.7569485511531638, "grad_norm": 18.762540817260742, "learning_rate": 2.3809523809523806e-07, "log_odds_chosen": -0.01149587519466877, "log_odds_ratio": -0.7697539329528809, "logits/chosen": 1.5571284294128418, "logits/rejected": 1.4197614192962646, "logps/chosen": -1.1996402740478516, "logps/rejected": -1.206023931503296, "loss": 1.4771, "nll_loss": 1.3902133703231812, "rewards/accuracies": 0.4609375, "rewards/chosen": -0.17994605004787445, "rewards/margins": 0.000957544194534421, "rewards/rejected": -0.1809035986661911, "step": 160 }, { "epoch": 0.9083382613837966, "grad_norm": 14.901942253112793, "learning_rate": 2.857142857142857e-07, "log_odds_chosen": -0.0564657598733902, "log_odds_ratio": -0.791755735874176, "logits/chosen": 1.7110127210617065, "logits/rejected": 1.559685468673706, "logps/chosen": -1.201224446296692, "logps/rejected": -1.1592237949371338, "loss": 1.4155, "nll_loss": 1.3239426612854004, "rewards/accuracies": 0.50390625, "rewards/chosen": -0.18018370866775513, "rewards/margins": -0.00630012946203351, "rewards/rejected": -0.1738835722208023, "step": 192 }, { "epoch": 1.0597279716144294, "grad_norm": 14.1319580078125, "learning_rate": 3.333333333333333e-07, "log_odds_chosen": -0.07105285674333572, "log_odds_ratio": -0.7984029650688171, "logits/chosen": 1.623414397239685, "logits/rejected": 1.496307134628296, "logps/chosen": -1.1715890169143677, "logps/rejected": -1.1218650341033936, "loss": 1.3513, "nll_loss": 1.2719416618347168, "rewards/accuracies": 0.46484375, "rewards/chosen": -0.1757383644580841, "rewards/margins": -0.007458594627678394, "rewards/rejected": -0.16827978193759918, "step": 224 }, { "epoch": 1.2111176818450622, "grad_norm": 13.425606727600098, "learning_rate": 3.809523809523809e-07, "log_odds_chosen": 0.10578853636980057, "log_odds_ratio": -0.70930016040802, "logits/chosen": 1.6068717241287231, "logits/rejected": 1.3819518089294434, "logps/chosen": -1.1136094331741333, "logps/rejected": -1.1912662982940674, "loss": 1.3252, "nll_loss": 1.2232904434204102, "rewards/accuracies": 0.546875, "rewards/chosen": -0.16704143583774567, "rewards/margins": 0.0116485096514225, "rewards/rejected": -0.17868994176387787, "step": 256 }, { "epoch": 1.362507392075695, "grad_norm": 11.342605590820312, "learning_rate": 4.285714285714285e-07, "log_odds_chosen": 0.13081349432468414, "log_odds_ratio": -0.6838027238845825, "logits/chosen": 1.4501845836639404, "logits/rejected": 1.3310956954956055, "logps/chosen": -1.0919809341430664, "logps/rejected": -1.1673154830932617, "loss": 1.2755, "nll_loss": 1.1733828783035278, "rewards/accuracies": 0.578125, "rewards/chosen": -0.16379712522029877, "rewards/margins": 0.011300182901322842, "rewards/rejected": -0.1750973016023636, "step": 288 }, { "epoch": 1.5138971023063275, "grad_norm": 12.543194770812988, "learning_rate": 4.761904761904761e-07, "log_odds_chosen": 0.19491538405418396, "log_odds_ratio": -0.6595159769058228, "logits/chosen": 1.4903298616409302, "logits/rejected": 1.3049672842025757, "logps/chosen": -1.0332714319229126, "logps/rejected": -1.1428489685058594, "loss": 1.2223, "nll_loss": 1.0796581506729126, "rewards/accuracies": 0.6171875, "rewards/chosen": -0.15499071776866913, "rewards/margins": 0.0164366252720356, "rewards/rejected": -0.17142733931541443, "step": 320 }, { "epoch": 1.6652868125369604, "grad_norm": 8.179709434509277, "learning_rate": 4.999654636727764e-07, "log_odds_chosen": 0.14331884682178497, "log_odds_ratio": -0.6748344302177429, "logits/chosen": 1.4205052852630615, "logits/rejected": 1.3244390487670898, "logps/chosen": -1.0807911157608032, "logps/rejected": -1.159712314605713, "loss": 1.1776, "nll_loss": 1.0815861225128174, "rewards/accuracies": 0.609375, "rewards/chosen": -0.16211867332458496, "rewards/margins": 0.011838208884000778, "rewards/rejected": -0.17395688593387604, "step": 352 }, { "epoch": 1.8166765227675932, "grad_norm": 9.002681732177734, "learning_rate": 4.996892303047305e-07, "log_odds_chosen": 0.15229541063308716, "log_odds_ratio": -0.6689931154251099, "logits/chosen": 1.3082184791564941, "logits/rejected": 1.208222508430481, "logps/chosen": -1.0531638860702515, "logps/rejected": -1.1305123567581177, "loss": 1.1209, "nll_loss": 1.026604175567627, "rewards/accuracies": 0.62890625, "rewards/chosen": -0.15797458589076996, "rewards/margins": 0.01160226296633482, "rewards/rejected": -0.16957685351371765, "step": 384 }, { "epoch": 1.8923713778829097, "eval_log_odds_chosen": 0.9119634628295898, "eval_log_odds_ratio": -0.3477023243904114, "eval_logits/chosen": 0.8482466340065002, "eval_logits/rejected": 0.7518002986907959, "eval_logps/chosen": -0.7484418153762817, "eval_logps/rejected": -1.3053876161575317, "eval_loss": 0.84984290599823, "eval_nll_loss": 0.7749183773994446, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.11226626485586166, "eval_rewards/margins": 0.0835418850183487, "eval_rewards/rejected": -0.19580814242362976, "eval_runtime": 1.7821, "eval_samples_per_second": 76.877, "eval_steps_per_second": 10.101, "step": 400 }, { "epoch": 1.968066232998226, "grad_norm": 10.631780624389648, "learning_rate": 4.991370688303038e-07, "log_odds_chosen": 0.20428910851478577, "log_odds_ratio": -0.6562178134918213, "logits/chosen": 1.2808618545532227, "logits/rejected": 1.1230928897857666, "logps/chosen": -1.0897853374481201, "logps/rejected": -1.1929757595062256, "loss": 1.0818, "nll_loss": 1.0095133781433105, "rewards/accuracies": 0.625, "rewards/chosen": -0.16346779465675354, "rewards/margins": 0.015478584915399551, "rewards/rejected": -0.1789463758468628, "step": 416 }, { "epoch": 2.119455943228859, "grad_norm": 9.022457122802734, "learning_rate": 4.983095894354857e-07, "log_odds_chosen": 0.22953583300113678, "log_odds_ratio": -0.6313825845718384, "logits/chosen": 1.349506139755249, "logits/rejected": 1.1360180377960205, "logps/chosen": -1.0178955793380737, "logps/rejected": -1.142075538635254, "loss": 1.0595, "nll_loss": 0.9702749848365784, "rewards/accuracies": 0.63671875, "rewards/chosen": -0.15268434584140778, "rewards/margins": 0.018626993522047997, "rewards/rejected": -0.17131134867668152, "step": 448 }, { "epoch": 2.2708456534594914, "grad_norm": 8.519028663635254, "learning_rate": 4.972077065562821e-07, "log_odds_chosen": 0.20490483939647675, "log_odds_ratio": -0.6597353219985962, "logits/chosen": 1.2364730834960938, "logits/rejected": 1.1246590614318848, "logps/chosen": -1.0860800743103027, "logps/rejected": -1.1871649026870728, "loss": 1.0455, "nll_loss": 0.9942155480384827, "rewards/accuracies": 0.65625, "rewards/chosen": -0.16291199624538422, "rewards/margins": 0.015162724070250988, "rewards/rejected": -0.17807474732398987, "step": 480 }, { "epoch": 2.4222353636901244, "grad_norm": 7.702695369720459, "learning_rate": 4.958326378681848e-07, "log_odds_chosen": 0.3035791516304016, "log_odds_ratio": -0.6072664260864258, "logits/chosen": 1.2193742990493774, "logits/rejected": 1.0568186044692993, "logps/chosen": -1.029651165008545, "logps/rejected": -1.1974754333496094, "loss": 1.031, "nll_loss": 0.9461196660995483, "rewards/accuracies": 0.7109375, "rewards/chosen": -0.15444767475128174, "rewards/margins": 0.02517363429069519, "rewards/rejected": -0.17962132394313812, "step": 512 }, { "epoch": 2.573625073920757, "grad_norm": 8.201448440551758, "learning_rate": 4.941859029405353e-07, "log_odds_chosen": 0.35751351714134216, "log_odds_ratio": -0.5834794044494629, "logits/chosen": 1.2276177406311035, "logits/rejected": 1.0265512466430664, "logps/chosen": -1.0028650760650635, "logps/rejected": -1.1897025108337402, "loss": 1.0218, "nll_loss": 0.9072933793067932, "rewards/accuracies": 0.73828125, "rewards/chosen": -0.15042978525161743, "rewards/margins": 0.028025589883327484, "rewards/rejected": -0.17845536768436432, "step": 544 }, { "epoch": 2.72501478415139, "grad_norm": 7.634998798370361, "learning_rate": 4.922693215572695e-07, "log_odds_chosen": 0.45870620012283325, "log_odds_ratio": -0.54433274269104, "logits/chosen": 1.1980278491973877, "logits/rejected": 1.0682458877563477, "logps/chosen": -0.978523313999176, "logps/rejected": -1.243023157119751, "loss": 0.9966, "nll_loss": 0.921144962310791, "rewards/accuracies": 0.7578125, "rewards/chosen": -0.14677852392196655, "rewards/margins": 0.03967496007680893, "rewards/rejected": -0.1864534616470337, "step": 576 }, { "epoch": 2.8764044943820224, "grad_norm": 7.217565059661865, "learning_rate": 4.900850117058999e-07, "log_odds_chosen": 0.47186481952667236, "log_odds_ratio": -0.5484339594841003, "logits/chosen": 1.152608871459961, "logits/rejected": 1.015822172164917, "logps/chosen": -1.01084566116333, "logps/rejected": -1.277451992034912, "loss": 0.9987, "nll_loss": 0.9013168215751648, "rewards/accuracies": 0.7421875, "rewards/chosen": -0.1516268402338028, "rewards/margins": 0.03999098762869835, "rewards/rejected": -0.19161783158779144, "step": 608 }, { "epoch": 3.0277942046126554, "grad_norm": 6.927852630615234, "learning_rate": 4.876353872369572e-07, "log_odds_chosen": 0.48829925060272217, "log_odds_ratio": -0.5393761396408081, "logits/chosen": 1.0784587860107422, "logits/rejected": 0.9411880970001221, "logps/chosen": -1.0088391304016113, "logps/rejected": -1.271460771560669, "loss": 0.9837, "nll_loss": 0.9265193343162537, "rewards/accuracies": 0.765625, "rewards/chosen": -0.15132588148117065, "rewards/margins": 0.039393242448568344, "rewards/rejected": -0.1907191127538681, "step": 640 }, { "epoch": 3.179183914843288, "grad_norm": 6.685938358306885, "learning_rate": 4.849231551964771e-07, "log_odds_chosen": 0.562548041343689, "log_odds_ratio": -0.5139177441596985, "logits/chosen": 1.1162034273147583, "logits/rejected": 0.927276611328125, "logps/chosen": -0.9777481555938721, "logps/rejected": -1.3050942420959473, "loss": 0.9845, "nll_loss": 0.8839849233627319, "rewards/accuracies": 0.78125, "rewards/chosen": -0.14666223526000977, "rewards/margins": 0.04910193011164665, "rewards/rejected": -0.19576415419578552, "step": 672 }, { "epoch": 3.330573625073921, "grad_norm": 5.24590539932251, "learning_rate": 4.819513128344813e-07, "log_odds_chosen": 0.4602447748184204, "log_odds_ratio": -0.5505639314651489, "logits/chosen": 1.1351033449172974, "logits/rejected": 0.9407525062561035, "logps/chosen": -0.9991594552993774, "logps/rejected": -1.2416499853134155, "loss": 0.9658, "nll_loss": 0.8639576435089111, "rewards/accuracies": 0.75, "rewards/chosen": -0.14987392723560333, "rewards/margins": 0.036373574286699295, "rewards/rejected": -0.18624748289585114, "step": 704 }, { "epoch": 3.4819633353045534, "grad_norm": 6.936483860015869, "learning_rate": 4.787231442927586e-07, "log_odds_chosen": 0.5815439820289612, "log_odds_ratio": -0.5048896074295044, "logits/chosen": 1.0991628170013428, "logits/rejected": 0.8964717984199524, "logps/chosen": -0.9201152920722961, "logps/rejected": -1.242402195930481, "loss": 0.9681, "nll_loss": 0.8501954674720764, "rewards/accuracies": 0.7890625, "rewards/chosen": -0.13801729679107666, "rewards/margins": 0.04834304004907608, "rewards/rejected": -0.18636034429073334, "step": 736 }, { "epoch": 3.6333530455351863, "grad_norm": 6.671252250671387, "learning_rate": 4.752422169756047e-07, "log_odds_chosen": 0.494718998670578, "log_odds_ratio": -0.5431851148605347, "logits/chosen": 1.1249089241027832, "logits/rejected": 1.0157999992370605, "logps/chosen": -1.0117340087890625, "logps/rejected": -1.293691635131836, "loss": 0.978, "nll_loss": 0.9269427061080933, "rewards/accuracies": 0.7578125, "rewards/chosen": -0.15176010131835938, "rewards/margins": 0.042293645441532135, "rewards/rejected": -0.1940537393093109, "step": 768 }, { "epoch": 3.7847427557658193, "grad_norm": 7.028476715087891, "learning_rate": 4.715123776075336e-07, "log_odds_chosen": 0.5061647891998291, "log_odds_ratio": -0.5440715551376343, "logits/chosen": 1.0956813097000122, "logits/rejected": 0.9653363823890686, "logps/chosen": -1.0257270336151123, "logps/rejected": -1.3192400932312012, "loss": 0.9528, "nll_loss": 0.8593652844429016, "rewards/accuracies": 0.75390625, "rewards/chosen": -0.15385906398296356, "rewards/margins": 0.044026970863342285, "rewards/rejected": -0.19788604974746704, "step": 800 }, { "epoch": 3.7847427557658193, "eval_log_odds_chosen": 1.0081945657730103, "eval_log_odds_ratio": -0.3223256468772888, "eval_logits/chosen": 0.7514240145683289, "eval_logits/rejected": 0.6671679615974426, "eval_logps/chosen": -0.7009862065315247, "eval_logps/rejected": -1.2981789112091064, "eval_loss": 0.7884585857391357, "eval_nll_loss": 0.7162714600563049, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.10514792799949646, "eval_rewards/margins": 0.08957889676094055, "eval_rewards/rejected": -0.1947268396615982, "eval_runtime": 1.7846, "eval_samples_per_second": 76.767, "eval_steps_per_second": 10.086, "step": 800 }, { "epoch": 3.936132465996452, "grad_norm": 6.8125834465026855, "learning_rate": 4.675377479823153e-07, "log_odds_chosen": 0.6920242309570312, "log_odds_ratio": -0.4726037383079529, "logits/chosen": 1.0377426147460938, "logits/rejected": 0.9017472863197327, "logps/chosen": -0.922009289264679, "logps/rejected": -1.3119571208953857, "loss": 0.9638, "nll_loss": 0.8638713359832764, "rewards/accuracies": 0.82421875, "rewards/chosen": -0.13830138742923737, "rewards/margins": 0.05849217250943184, "rewards/rejected": -0.1967935562133789, "step": 832 }, { "epoch": 4.087522176227084, "grad_norm": 6.372574806213379, "learning_rate": 4.6332272040803887e-07, "log_odds_chosen": 0.6877175569534302, "log_odds_ratio": -0.46182751655578613, "logits/chosen": 1.1110641956329346, "logits/rejected": 0.9074443578720093, "logps/chosen": -0.9194135069847107, "logps/rejected": -1.3282839059829712, "loss": 0.9604, "nll_loss": 0.8556405901908875, "rewards/accuracies": 0.83203125, "rewards/chosen": -0.13791203498840332, "rewards/margins": 0.06133056432008743, "rewards/rejected": -0.19924262166023254, "step": 864 }, { "epoch": 4.238911886457718, "grad_norm": 6.756438255310059, "learning_rate": 4.588719528532341e-07, "log_odds_chosen": 0.6642757058143616, "log_odds_ratio": -0.4779506325721741, "logits/chosen": 1.0594482421875, "logits/rejected": 0.9946908354759216, "logps/chosen": -0.965737521648407, "logps/rejected": -1.359665870666504, "loss": 0.954, "nll_loss": 0.8865021467208862, "rewards/accuracies": 0.77734375, "rewards/chosen": -0.1448606252670288, "rewards/margins": 0.059089258313179016, "rewards/rejected": -0.20394988358020782, "step": 896 }, { "epoch": 4.39030159668835, "grad_norm": 7.890772342681885, "learning_rate": 4.5419036379941414e-07, "log_odds_chosen": 0.7298649549484253, "log_odds_ratio": -0.4601740837097168, "logits/chosen": 1.1765400171279907, "logits/rejected": 0.9228672981262207, "logps/chosen": -0.9468764066696167, "logps/rejected": -1.3811423778533936, "loss": 0.9453, "nll_loss": 0.8525615930557251, "rewards/accuracies": 0.8203125, "rewards/chosen": -0.1420314460992813, "rewards/margins": 0.06513990461826324, "rewards/rejected": -0.20717135071754456, "step": 928 }, { "epoch": 4.541691306918983, "grad_norm": 6.184362888336182, "learning_rate": 4.492831268057306e-07, "log_odds_chosen": 0.7427738904953003, "log_odds_ratio": -0.46771693229675293, "logits/chosen": 1.0225163698196411, "logits/rejected": 0.9077222943305969, "logps/chosen": -0.9970439672470093, "logps/rejected": -1.456943154335022, "loss": 0.938, "nll_loss": 0.8868236541748047, "rewards/accuracies": 0.77734375, "rewards/chosen": -0.14955660700798035, "rewards/margins": 0.06898489594459534, "rewards/rejected": -0.2185414880514145, "step": 960 }, { "epoch": 4.693081017149615, "grad_norm": 6.933351993560791, "learning_rate": 4.441556647917446e-07, "log_odds_chosen": 0.8609212636947632, "log_odds_ratio": -0.43812429904937744, "logits/chosen": 1.0455502271652222, "logits/rejected": 0.906272292137146, "logps/chosen": -0.9208173155784607, "logps/rejected": -1.457839012145996, "loss": 0.9434, "nll_loss": 0.8511086106300354, "rewards/accuracies": 0.83203125, "rewards/chosen": -0.13812260329723358, "rewards/margins": 0.08055327087640762, "rewards/rejected": -0.2186758816242218, "step": 992 }, { "epoch": 4.844470727380249, "grad_norm": 6.150376796722412, "learning_rate": 4.3881364404463375e-07, "log_odds_chosen": 0.9446333050727844, "log_odds_ratio": -0.4172128438949585, "logits/chosen": 1.110432744026184, "logits/rejected": 0.8510321974754333, "logps/chosen": -0.9673236608505249, "logps/rejected": -1.5632784366607666, "loss": 0.9264, "nll_loss": 0.8780388832092285, "rewards/accuracies": 0.81640625, "rewards/chosen": -0.14509856700897217, "rewards/margins": 0.08939322084188461, "rewards/rejected": -0.234491765499115, "step": 1024 }, { "epoch": 4.995860437610881, "grad_norm": 5.648180961608887, "learning_rate": 4.332629679574565e-07, "log_odds_chosen": 0.9642012715339661, "log_odds_ratio": -0.42083150148391724, "logits/chosen": 1.0487498044967651, "logits/rejected": 0.8362730741500854, "logps/chosen": -0.9618784189224243, "logps/rejected": -1.5873997211456299, "loss": 0.925, "nll_loss": 0.8492802381515503, "rewards/accuracies": 0.8203125, "rewards/chosen": -0.1442817747592926, "rewards/margins": 0.09382818639278412, "rewards/rejected": -0.23810997605323792, "step": 1056 }, { "epoch": 5.147250147841514, "grad_norm": 6.209354400634766, "learning_rate": 4.2750977050539503e-07, "log_odds_chosen": 1.127962350845337, "log_odds_ratio": -0.3810023367404938, "logits/chosen": 0.9537469148635864, "logits/rejected": 0.8183348178863525, "logps/chosen": -0.9366539120674133, "logps/rejected": -1.6753818988800049, "loss": 0.9233, "nll_loss": 0.8444766998291016, "rewards/accuracies": 0.828125, "rewards/chosen": -0.140498086810112, "rewards/margins": 0.11080917716026306, "rewards/rejected": -0.25130727887153625, "step": 1088 }, { "epoch": 5.298639858072146, "grad_norm": 6.09550666809082, "learning_rate": 4.2156040946718343e-07, "log_odds_chosen": 1.1001328229904175, "log_odds_ratio": -0.40834715962409973, "logits/chosen": 0.9538164734840393, "logits/rejected": 0.8565899133682251, "logps/chosen": -0.9979989528656006, "logps/rejected": -1.736957311630249, "loss": 0.9264, "nll_loss": 0.8773810863494873, "rewards/accuracies": 0.8203125, "rewards/chosen": -0.1496998369693756, "rewards/margins": 0.11084374785423279, "rewards/rejected": -0.2605435848236084, "step": 1120 }, { "epoch": 5.45002956830278, "grad_norm": 5.165525913238525, "learning_rate": 4.154214593992149e-07, "log_odds_chosen": 1.4560502767562866, "log_odds_ratio": -0.36019906401634216, "logits/chosen": 1.0402196645736694, "logits/rejected": 0.8284226655960083, "logps/chosen": -0.9208565950393677, "logps/rejected": -1.9513992071151733, "loss": 0.9184, "nll_loss": 0.8742519617080688, "rewards/accuracies": 0.83984375, "rewards/chosen": -0.13812850415706635, "rewards/margins": 0.15458139777183533, "rewards/rejected": -0.2927098870277405, "step": 1152 }, { "epoch": 5.601419278533412, "grad_norm": 5.565188407897949, "learning_rate": 4.090997043700909e-07, "log_odds_chosen": 1.8058509826660156, "log_odds_ratio": -0.34831157326698303, "logits/chosen": 0.9435930252075195, "logits/rejected": 0.7780628204345703, "logps/chosen": -0.9824676513671875, "logps/rejected": -2.3647959232330322, "loss": 0.915, "nll_loss": 0.9026926159858704, "rewards/accuracies": 0.828125, "rewards/chosen": -0.1473701447248459, "rewards/margins": 0.2073492407798767, "rewards/rejected": -0.3547194004058838, "step": 1184 }, { "epoch": 5.677114133648729, "eval_log_odds_chosen": 1.515297532081604, "eval_log_odds_ratio": -0.21902640163898468, "eval_logits/chosen": 0.5878681540489197, "eval_logits/rejected": 0.514284610748291, "eval_logps/chosen": -0.6912536025047302, "eval_logps/rejected": -1.6357617378234863, "eval_loss": 0.7435688972473145, "eval_nll_loss": 0.6848150491714478, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.10368803888559341, "eval_rewards/margins": 0.14167624711990356, "eval_rewards/rejected": -0.2453642636537552, "eval_runtime": 1.7754, "eval_samples_per_second": 77.164, "eval_steps_per_second": 10.138, "step": 1200 }, { "epoch": 5.752808988764045, "grad_norm": 5.417468547821045, "learning_rate": 4.0260213046364076e-07, "log_odds_chosen": 2.085019111633301, "log_odds_ratio": -0.31234151124954224, "logits/chosen": 0.9959389567375183, "logits/rejected": 0.7742877006530762, "logps/chosen": -0.9441136121749878, "logps/rejected": -2.5619935989379883, "loss": 0.9004, "nll_loss": 0.8535679578781128, "rewards/accuracies": 0.8359375, "rewards/chosen": -0.1416170299053192, "rewards/margins": 0.24268200993537903, "rewards/rejected": -0.38429906964302063, "step": 1216 }, { "epoch": 5.904198698994678, "grad_norm": 7.9577178955078125, "learning_rate": 3.959359180586975e-07, "log_odds_chosen": 2.5801219940185547, "log_odds_ratio": -0.31198883056640625, "logits/chosen": 0.9304694533348083, "logits/rejected": 0.7099679112434387, "logps/chosen": -1.004030466079712, "logps/rejected": -3.1341824531555176, "loss": 0.9199, "nll_loss": 0.888052225112915, "rewards/accuracies": 0.8515625, "rewards/chosen": -0.15060456097126007, "rewards/margins": 0.3195228576660156, "rewards/rejected": -0.4701274335384369, "step": 1248 }, { "epoch": 6.055588409225311, "grad_norm": 7.205864906311035, "learning_rate": 3.891084338941603e-07, "log_odds_chosen": 2.632976770401001, "log_odds_ratio": -0.33525609970092773, "logits/chosen": 0.8941175937652588, "logits/rejected": 0.712418794631958, "logps/chosen": -0.9687196016311646, "logps/rejected": -3.1566426753997803, "loss": 0.9156, "nll_loss": 0.8564908504486084, "rewards/accuracies": 0.84765625, "rewards/chosen": -0.14530794322490692, "rewards/margins": 0.3281884789466858, "rewards/rejected": -0.4734964370727539, "step": 1280 }, { "epoch": 6.206978119455943, "grad_norm": 5.557631015777588, "learning_rate": 3.8212722292811383e-07, "log_odds_chosen": 2.9796371459960938, "log_odds_ratio": -0.3066112995147705, "logits/chosen": 0.9908494353294373, "logits/rejected": 0.7001262903213501, "logps/chosen": -0.9531494975090027, "logps/rejected": -3.451514720916748, "loss": 0.8945, "nll_loss": 0.8682339191436768, "rewards/accuracies": 0.83203125, "rewards/chosen": -0.1429724246263504, "rewards/margins": 0.3747548460960388, "rewards/rejected": -0.517727255821228, "step": 1312 }, { "epoch": 6.358367829686576, "grad_norm": 7.154934883117676, "learning_rate": 3.75e-07, "log_odds_chosen": 3.150144338607788, "log_odds_ratio": -0.30607521533966064, "logits/chosen": 0.8532112240791321, "logits/rejected": 0.688872754573822, "logps/chosen": -0.9391156435012817, "logps/rejected": -3.6156792640686035, "loss": 0.9013, "nll_loss": 0.8500258922576904, "rewards/accuracies": 0.84765625, "rewards/chosen": -0.14086736738681793, "rewards/margins": 0.40148457884788513, "rewards/rejected": -0.5423519611358643, "step": 1344 }, { "epoch": 6.509757539917208, "grad_norm": 5.582004070281982, "learning_rate": 3.67734641305055e-07, "log_odds_chosen": 3.518749475479126, "log_odds_ratio": -0.26204630732536316, "logits/chosen": 1.0231519937515259, "logits/rejected": 0.6429997682571411, "logps/chosen": -0.8629344701766968, "logps/rejected": -3.8456180095672607, "loss": 0.9023, "nll_loss": 0.801094114780426, "rewards/accuracies": 0.8671875, "rewards/chosen": -0.12944017350673676, "rewards/margins": 0.4474025368690491, "rewards/rejected": -0.576842725276947, "step": 1376 }, { "epoch": 6.661147250147842, "grad_norm": 6.333008766174316, "learning_rate": 3.6033917569043597e-07, "log_odds_chosen": 3.5107364654541016, "log_odds_ratio": -0.25962206721305847, "logits/chosen": 0.9015189409255981, "logits/rejected": 0.604630172252655, "logps/chosen": -0.9094609618186951, "logps/rejected": -3.901463270187378, "loss": 0.8982, "nll_loss": 0.8305466175079346, "rewards/accuracies": 0.87109375, "rewards/chosen": -0.1364191472530365, "rewards/margins": 0.44880032539367676, "rewards/rejected": -0.5852195024490356, "step": 1408 }, { "epoch": 6.812536960378474, "grad_norm": 9.112639427185059, "learning_rate": 3.528217757826529e-07, "log_odds_chosen": 3.822404384613037, "log_odds_ratio": -0.28991812467575073, "logits/chosen": 0.9384167790412903, "logits/rejected": 0.6002436280250549, "logps/chosen": -0.966259241104126, "logps/rejected": -4.308917999267578, "loss": 0.899, "nll_loss": 0.8516695499420166, "rewards/accuracies": 0.83203125, "rewards/chosen": -0.1449388712644577, "rewards/margins": 0.5013989210128784, "rewards/rejected": -0.6463377475738525, "step": 1440 }, { "epoch": 6.963926670609107, "grad_norm": 7.574125289916992, "learning_rate": 3.4519074895611236e-07, "log_odds_chosen": 3.943324327468872, "log_odds_ratio": -0.26691746711730957, "logits/chosen": 0.9103025197982788, "logits/rejected": 0.6238164901733398, "logps/chosen": -0.8985946774482727, "logps/rejected": -4.343371391296387, "loss": 0.8962, "nll_loss": 0.8179515600204468, "rewards/accuracies": 0.8984375, "rewards/chosen": -0.13478921353816986, "rewards/margins": 0.516716480255127, "rewards/rejected": -0.651505708694458, "step": 1472 }, { "epoch": 7.11531638083974, "grad_norm": 6.7364115715026855, "learning_rate": 3.374545281527537e-07, "log_odds_chosen": 4.374906539916992, "log_odds_ratio": -0.2600148320198059, "logits/chosen": 0.9600415229797363, "logits/rejected": 0.6132468581199646, "logps/chosen": -0.9232965707778931, "logps/rejected": -4.797858715057373, "loss": 0.8895, "nll_loss": 0.8346379995346069, "rewards/accuracies": 0.87109375, "rewards/chosen": -0.13849450647830963, "rewards/margins": 0.5811843872070312, "rewards/rejected": -0.7196788787841797, "step": 1504 }, { "epoch": 7.266706091070373, "grad_norm": 8.94677448272705, "learning_rate": 3.296216625629211e-07, "log_odds_chosen": 3.412320375442505, "log_odds_ratio": -0.2966606616973877, "logits/chosen": 0.9029962420463562, "logits/rejected": 0.6578757762908936, "logps/chosen": -0.9623314738273621, "logps/rejected": -3.89890193939209, "loss": 0.8925, "nll_loss": 0.8433880805969238, "rewards/accuracies": 0.8671875, "rewards/chosen": -0.14434972405433655, "rewards/margins": 0.4404855966567993, "rewards/rejected": -0.5848353505134583, "step": 1536 }, { "epoch": 7.418095801301005, "grad_norm": 5.7957353591918945, "learning_rate": 3.2170080817777257e-07, "log_odds_chosen": 4.052781581878662, "log_odds_ratio": -0.2798649072647095, "logits/chosen": 0.9068763256072998, "logits/rejected": 0.6250233054161072, "logps/chosen": -0.9688931107521057, "logps/rejected": -4.529140472412109, "loss": 0.9004, "nll_loss": 0.8530284762382507, "rewards/accuracies": 0.859375, "rewards/chosen": -0.14533399045467377, "rewards/margins": 0.534037172794342, "rewards/rejected": -0.6793711185455322, "step": 1568 }, { "epoch": 7.569485511531638, "grad_norm": 6.567281723022461, "learning_rate": 3.137007182236637e-07, "log_odds_chosen": 3.9496092796325684, "log_odds_ratio": -0.25981855392456055, "logits/chosen": 0.9131721258163452, "logits/rejected": 0.6535216569900513, "logps/chosen": -0.9185097813606262, "logps/rejected": -4.3686933517456055, "loss": 0.892, "nll_loss": 0.8527241945266724, "rewards/accuracies": 0.87890625, "rewards/chosen": -0.1377764791250229, "rewards/margins": 0.5175275206565857, "rewards/rejected": -0.655303955078125, "step": 1600 }, { "epoch": 7.569485511531638, "eval_log_odds_chosen": 1.6673216819763184, "eval_log_odds_ratio": -0.19718672335147858, "eval_logits/chosen": 0.5625311136245728, "eval_logits/rejected": 0.4984322190284729, "eval_logps/chosen": -0.6667929887771606, "eval_logps/rejected": -1.6969513893127441, "eval_loss": 0.7337117195129395, "eval_nll_loss": 0.6776795387268066, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.1000189557671547, "eval_rewards/margins": 0.15452374517917633, "eval_rewards/rejected": -0.2545427083969116, "eval_runtime": 1.7831, "eval_samples_per_second": 76.831, "eval_steps_per_second": 10.095, "step": 1600 }, { "epoch": 7.720875221762271, "grad_norm": 8.987198829650879, "learning_rate": 3.056302334890786e-07, "log_odds_chosen": 3.926710844039917, "log_odds_ratio": -0.2714899480342865, "logits/chosen": 0.9191571474075317, "logits/rejected": 0.5953992605209351, "logps/chosen": -0.9368714094161987, "logps/rejected": -4.384879112243652, "loss": 0.888, "nll_loss": 0.8406177759170532, "rewards/accuracies": 0.8671875, "rewards/chosen": -0.14053073525428772, "rewards/margins": 0.5172011852264404, "rewards/rejected": -0.6577318906784058, "step": 1632 }, { "epoch": 7.872264931992904, "grad_norm": 5.6181230545043945, "learning_rate": 2.974982725547975e-07, "log_odds_chosen": 3.617192506790161, "log_odds_ratio": -0.32495206594467163, "logits/chosen": 0.8457501530647278, "logits/rejected": 0.6253533363342285, "logps/chosen": -1.0049875974655151, "logps/rejected": -4.156848907470703, "loss": 0.8977, "nll_loss": 0.8724310994148254, "rewards/accuracies": 0.85546875, "rewards/chosen": -0.1507481336593628, "rewards/margins": 0.47277915477752686, "rewards/rejected": -0.6235272884368896, "step": 1664 }, { "epoch": 8.023654642223537, "grad_norm": 5.31005334854126, "learning_rate": 2.893138219380963e-07, "log_odds_chosen": 4.234038829803467, "log_odds_ratio": -0.30224624276161194, "logits/chosen": 0.922171950340271, "logits/rejected": 0.5847682952880859, "logps/chosen": -0.9686514139175415, "logps/rejected": -4.734119892120361, "loss": 0.8864, "nll_loss": 0.8605988025665283, "rewards/accuracies": 0.859375, "rewards/chosen": -0.14529772102832794, "rewards/margins": 0.5648203492164612, "rewards/rejected": -0.7101180553436279, "step": 1696 }, { "epoch": 8.175044352454169, "grad_norm": 4.773166179656982, "learning_rate": 2.810859261618713e-07, "log_odds_chosen": 4.176573753356934, "log_odds_ratio": -0.2747136056423187, "logits/chosen": 0.9669155478477478, "logits/rejected": 0.6131560206413269, "logps/chosen": -0.941318690776825, "logps/rejected": -4.627261638641357, "loss": 0.8908, "nll_loss": 0.8429233431816101, "rewards/accuracies": 0.875, "rewards/chosen": -0.1411978155374527, "rewards/margins": 0.5528914928436279, "rewards/rejected": -0.6940892934799194, "step": 1728 }, { "epoch": 8.326434062684802, "grad_norm": 7.928328990936279, "learning_rate": 2.728236777596621e-07, "log_odds_chosen": 4.232769012451172, "log_odds_ratio": -0.2622223496437073, "logits/chosen": 0.8704826831817627, "logits/rejected": 0.6309795379638672, "logps/chosen": -0.9345431327819824, "logps/rejected": -4.6482343673706055, "loss": 0.8856, "nll_loss": 0.849586009979248, "rewards/accuracies": 0.87109375, "rewards/chosen": -0.14018146693706512, "rewards/margins": 0.5570536851882935, "rewards/rejected": -0.6972352266311646, "step": 1760 }, { "epoch": 8.477823772915436, "grad_norm": 7.50920295715332, "learning_rate": 2.6453620722761895e-07, "log_odds_chosen": 3.835066795349121, "log_odds_ratio": -0.2713623344898224, "logits/chosen": 0.8469685316085815, "logits/rejected": 0.5746083855628967, "logps/chosen": -0.9510048031806946, "logps/rejected": -4.287370204925537, "loss": 0.9005, "nll_loss": 0.8307653069496155, "rewards/accuracies": 0.8828125, "rewards/chosen": -0.14265072345733643, "rewards/margins": 0.5004547238349915, "rewards/rejected": -0.6431055068969727, "step": 1792 }, { "epoch": 8.629213483146067, "grad_norm": 4.420612812042236, "learning_rate": 2.5623267293451823e-07, "log_odds_chosen": 4.375966548919678, "log_odds_ratio": -0.26864683628082275, "logits/chosen": 0.8368352055549622, "logits/rejected": 0.5574530959129333, "logps/chosen": -0.9161982536315918, "logps/rejected": -4.765947341918945, "loss": 0.8711, "nll_loss": 0.8084649443626404, "rewards/accuracies": 0.890625, "rewards/chosen": -0.13742974400520325, "rewards/margins": 0.577462375164032, "rewards/rejected": -0.7148921489715576, "step": 1824 }, { "epoch": 8.7806031933767, "grad_norm": 5.372297286987305, "learning_rate": 2.4792225100097575e-07, "log_odds_chosen": 4.036057472229004, "log_odds_ratio": -0.28991392254829407, "logits/chosen": 0.8479549884796143, "logits/rejected": 0.6130175590515137, "logps/chosen": -0.996108889579773, "logps/rejected": -4.58188009262085, "loss": 0.8868, "nll_loss": 0.8779551386833191, "rewards/accuracies": 0.8671875, "rewards/chosen": -0.14941634237766266, "rewards/margins": 0.5378656387329102, "rewards/rejected": -0.6872820258140564, "step": 1856 }, { "epoch": 8.931992903607332, "grad_norm": 7.286854267120361, "learning_rate": 2.3961412515904335e-07, "log_odds_chosen": 4.6014862060546875, "log_odds_ratio": -0.23827242851257324, "logits/chosen": 0.8562659621238708, "logits/rejected": 0.5500348806381226, "logps/chosen": -0.8830623030662537, "logps/rejected": -4.95844841003418, "loss": 0.8922, "nll_loss": 0.8241187930107117, "rewards/accuracies": 0.91796875, "rewards/chosen": -0.132459357380867, "rewards/margins": 0.6113079190254211, "rewards/rejected": -0.743767261505127, "step": 1888 }, { "epoch": 9.083382613837966, "grad_norm": 5.4477949142456055, "learning_rate": 2.3131747660339394e-07, "log_odds_chosen": 4.262630939483643, "log_odds_ratio": -0.25891953706741333, "logits/chosen": 0.7675349712371826, "logits/rejected": 0.51315838098526, "logps/chosen": -0.9374942779541016, "logps/rejected": -4.686108589172363, "loss": 0.8815, "nll_loss": 0.8197700381278992, "rewards/accuracies": 0.91015625, "rewards/chosen": -0.14062415063381195, "rewards/margins": 0.562292218208313, "rewards/rejected": -0.7029163837432861, "step": 1920 }, { "epoch": 9.234772324068599, "grad_norm": 5.214437484741211, "learning_rate": 2.2304147384531036e-07, "log_odds_chosen": 4.728519439697266, "log_odds_ratio": -0.26717641949653625, "logits/chosen": 0.8268774747848511, "logits/rejected": 0.5454421639442444, "logps/chosen": -0.9330585598945618, "logps/rejected": -5.14246129989624, "loss": 0.8819, "nll_loss": 0.8327500820159912, "rewards/accuracies": 0.86328125, "rewards/chosen": -0.13995879888534546, "rewards/margins": 0.6314104795455933, "rewards/rejected": -0.771369218826294, "step": 1952 }, { "epoch": 9.38616203429923, "grad_norm": 6.283268928527832, "learning_rate": 2.1479526258069083e-07, "log_odds_chosen": 4.715708255767822, "log_odds_ratio": -0.24205940961837769, "logits/chosen": 0.920197069644928, "logits/rejected": 0.5509434342384338, "logps/chosen": -0.9300947189331055, "logps/rejected": -5.131900787353516, "loss": 0.8768, "nll_loss": 0.8383646011352539, "rewards/accuracies": 0.87890625, "rewards/chosen": -0.13951420783996582, "rewards/margins": 0.6302710175514221, "rewards/rejected": -0.7697851657867432, "step": 1984 }, { "epoch": 9.461856889414548, "eval_log_odds_chosen": 1.7867234945297241, "eval_log_odds_ratio": -0.18122754991054535, "eval_logits/chosen": 0.5212496519088745, "eval_logits/rejected": 0.4629932940006256, "eval_logps/chosen": -0.6597533226013184, "eval_logps/rejected": -1.7732388973236084, "eval_loss": 0.7272647619247437, "eval_nll_loss": 0.6736801266670227, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.09896300733089447, "eval_rewards/margins": 0.16702282428741455, "eval_rewards/rejected": -0.2659858167171478, "eval_runtime": 1.766, "eval_samples_per_second": 77.575, "eval_steps_per_second": 10.192, "step": 2000 }, { "epoch": 9.537551744529864, "grad_norm": 4.955827713012695, "learning_rate": 2.065879555832674e-07, "log_odds_chosen": 4.195652008056641, "log_odds_ratio": -0.25658515095710754, "logits/chosen": 0.8670744895935059, "logits/rejected": 0.6037735939025879, "logps/chosen": -0.9333707094192505, "logps/rejected": -4.614899158477783, "loss": 0.8862, "nll_loss": 0.8495485782623291, "rewards/accuracies": 0.87109375, "rewards/chosen": -0.14000560343265533, "rewards/margins": 0.5522292852401733, "rewards/rejected": -0.6922348737716675, "step": 2016 }, { "epoch": 9.688941454760498, "grad_norm": 5.796345233917236, "learning_rate": 1.984286226342056e-07, "log_odds_chosen": 4.511747360229492, "log_odds_ratio": -0.27976271510124207, "logits/chosen": 0.8292367458343506, "logits/rejected": 0.5408206582069397, "logps/chosen": -0.9318006038665771, "logps/rejected": -4.940333843231201, "loss": 0.8937, "nll_loss": 0.824824869632721, "rewards/accuracies": 0.86328125, "rewards/chosen": -0.13977007567882538, "rewards/margins": 0.6012800931930542, "rewards/rejected": -0.7410501837730408, "step": 2048 }, { "epoch": 9.84033116499113, "grad_norm": 5.052858352661133, "learning_rate": 1.9032628049921556e-07, "log_odds_chosen": 4.3274006843566895, "log_odds_ratio": -0.2658219337463379, "logits/chosen": 0.7942694425582886, "logits/rejected": 0.5170871615409851, "logps/chosen": -0.9714781045913696, "logps/rejected": -4.801671028137207, "loss": 0.896, "nll_loss": 0.8439369201660156, "rewards/accuracies": 0.8828125, "rewards/chosen": -0.14572171866893768, "rewards/margins": 0.5745289325714111, "rewards/rejected": -0.72025066614151, "step": 2080 }, { "epoch": 9.991720875221763, "grad_norm": 5.781661510467529, "learning_rate": 1.8228988296424875e-07, "log_odds_chosen": 4.903880596160889, "log_odds_ratio": -0.24629831314086914, "logits/chosen": 0.888929009437561, "logits/rejected": 0.5151562690734863, "logps/chosen": -0.9433965682983398, "logps/rejected": -5.327086925506592, "loss": 0.8761, "nll_loss": 0.8339080214500427, "rewards/accuracies": 0.8828125, "rewards/chosen": -0.1415095031261444, "rewards/margins": 0.6575536131858826, "rewards/rejected": -0.7990630865097046, "step": 2112 }, { "epoch": 10.143110585452394, "grad_norm": 8.988626480102539, "learning_rate": 1.7432831094079352e-07, "log_odds_chosen": 4.3950042724609375, "log_odds_ratio": -0.28862205147743225, "logits/chosen": 0.8134148120880127, "logits/rejected": 0.5847084522247314, "logps/chosen": -1.0293586254119873, "logps/rejected": -4.959186553955078, "loss": 0.8813, "nll_loss": 0.8774588108062744, "rewards/accuracies": 0.85546875, "rewards/chosen": -0.15440379083156586, "rewards/margins": 0.5894742608070374, "rewards/rejected": -0.7438780069351196, "step": 2144 }, { "epoch": 10.294500295683028, "grad_norm": 5.275697231292725, "learning_rate": 1.6645036265170313e-07, "log_odds_chosen": 5.46366548538208, "log_odds_ratio": -0.27606436610221863, "logits/chosen": 0.8438766598701477, "logits/rejected": 0.5199805498123169, "logps/chosen": -0.9803435802459717, "logps/rejected": -5.95693826675415, "loss": 0.8932, "nll_loss": 0.8328185677528381, "rewards/accuracies": 0.86328125, "rewards/chosen": -0.14705155789852142, "rewards/margins": 0.7464891076087952, "rewards/rejected": -0.8935407400131226, "step": 2176 }, { "epoch": 10.445890005913661, "grad_norm": 6.923160552978516, "learning_rate": 1.5866474390840124e-07, "log_odds_chosen": 4.528408050537109, "log_odds_ratio": -0.25843387842178345, "logits/chosen": 0.8353314399719238, "logits/rejected": 0.5368306636810303, "logps/chosen": -0.9630373120307922, "logps/rejected": -4.988365173339844, "loss": 0.887, "nll_loss": 0.855586051940918, "rewards/accuracies": 0.8984375, "rewards/chosen": -0.14445561170578003, "rewards/margins": 0.6037992238998413, "rewards/rejected": -0.7482547760009766, "step": 2208 }, { "epoch": 10.597279716144293, "grad_norm": 4.630692005157471, "learning_rate": 1.5098005849021078e-07, "log_odds_chosen": 4.724957466125488, "log_odds_ratio": -0.2811046242713928, "logits/chosen": 0.858523428440094, "logits/rejected": 0.5616721510887146, "logps/chosen": -0.9630488753318787, "logps/rejected": -5.188055038452148, "loss": 0.8694, "nll_loss": 0.858130693435669, "rewards/accuracies": 0.86328125, "rewards/chosen": -0.14445732533931732, "rewards/margins": 0.6337509155273438, "rewards/rejected": -0.778208315372467, "step": 2240 }, { "epoch": 10.748669426374926, "grad_norm": 6.591275215148926, "learning_rate": 1.4340479863643656e-07, "log_odds_chosen": 4.770135402679443, "log_odds_ratio": -0.2736424207687378, "logits/chosen": 0.7936345934867859, "logits/rejected": 0.5366979837417603, "logps/chosen": -0.9466649889945984, "logps/rejected": -5.203468322753906, "loss": 0.8882, "nll_loss": 0.8353475332260132, "rewards/accuracies": 0.85546875, "rewards/chosen": -0.141999751329422, "rewards/margins": 0.6385205984115601, "rewards/rejected": -0.7805203795433044, "step": 2272 }, { "epoch": 10.90005913660556, "grad_norm": 5.084187984466553, "learning_rate": 1.3594733566170925e-07, "log_odds_chosen": 4.994205474853516, "log_odds_ratio": -0.30068373680114746, "logits/chosen": 0.8074924945831299, "logits/rejected": 0.522384524345398, "logps/chosen": -0.9593102335929871, "logps/rejected": -5.447037220001221, "loss": 0.8834, "nll_loss": 0.8404646515846252, "rewards/accuracies": 0.8203125, "rewards/chosen": -0.14389653503894806, "rewards/margins": 0.673159122467041, "rewards/rejected": -0.8170557022094727, "step": 2304 }, { "epoch": 11.051448846836191, "grad_norm": 5.465320110321045, "learning_rate": 1.2861591070496192e-07, "log_odds_chosen": 4.723004341125488, "log_odds_ratio": -0.25821179151535034, "logits/chosen": 0.8570014238357544, "logits/rejected": 0.5345165133476257, "logps/chosen": -0.9341294765472412, "logps/rejected": -5.139418601989746, "loss": 0.8586, "nll_loss": 0.8409022092819214, "rewards/accuracies": 0.89453125, "rewards/chosen": -0.14011943340301514, "rewards/margins": 0.6307933330535889, "rewards/rejected": -0.770912766456604, "step": 2336 }, { "epoch": 11.202838557066825, "grad_norm": 5.109860420227051, "learning_rate": 1.2141862562226164e-07, "log_odds_chosen": 4.454768180847168, "log_odds_ratio": -0.24100762605667114, "logits/chosen": 0.7960795760154724, "logits/rejected": 0.511499285697937, "logps/chosen": -0.9228134751319885, "logps/rejected": -4.836323261260986, "loss": 0.8823, "nll_loss": 0.8171857595443726, "rewards/accuracies": 0.8984375, "rewards/chosen": -0.13842202723026276, "rewards/margins": 0.5870264172554016, "rewards/rejected": -0.725448489189148, "step": 2368 }, { "epoch": 11.354228267297458, "grad_norm": 4.377430438995361, "learning_rate": 1.1436343403356016e-07, "log_odds_chosen": 4.902271270751953, "log_odds_ratio": -0.24684929847717285, "logits/chosen": 0.8083094358444214, "logits/rejected": 0.5171899199485779, "logps/chosen": -0.9131155610084534, "logps/rejected": -5.286437034606934, "loss": 0.8823, "nll_loss": 0.8163360953330994, "rewards/accuracies": 0.89453125, "rewards/chosen": -0.13696734607219696, "rewards/margins": 0.6559982299804688, "rewards/rejected": -0.7929655313491821, "step": 2400 }, { "epoch": 11.354228267297458, "eval_log_odds_chosen": 1.8271435499191284, "eval_log_odds_ratio": -0.17579954862594604, "eval_logits/chosen": 0.49480167031288147, "eval_logits/rejected": 0.437588095664978, "eval_logps/chosen": -0.657990038394928, "eval_logps/rejected": -1.80092453956604, "eval_loss": 0.7247140407562256, "eval_nll_loss": 0.6719491481781006, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.09869851171970367, "eval_rewards/margins": 0.1714402139186859, "eval_rewards/rejected": -0.2701387107372284, "eval_runtime": 1.7829, "eval_samples_per_second": 76.839, "eval_steps_per_second": 10.096, "step": 2400 }, { "epoch": 11.50561797752809, "grad_norm": 6.997631072998047, "learning_rate": 1.0745813253325956e-07, "log_odds_chosen": 4.850367069244385, "log_odds_ratio": -0.23768070340156555, "logits/chosen": 0.8919247984886169, "logits/rejected": 0.5253655910491943, "logps/chosen": -0.9427354335784912, "logps/rejected": -5.268039226531982, "loss": 0.8907, "nll_loss": 0.8325998783111572, "rewards/accuracies": 0.90625, "rewards/chosen": -0.14141032099723816, "rewards/margins": 0.6487956643104553, "rewards/rejected": -0.7902059555053711, "step": 2432 }, { "epoch": 11.657007687758723, "grad_norm": 6.977694988250732, "learning_rate": 1.007103520743035e-07, "log_odds_chosen": 4.430591106414795, "log_odds_ratio": -0.2736847698688507, "logits/chosen": 0.772072434425354, "logits/rejected": 0.5454930067062378, "logps/chosen": -0.972098171710968, "logps/rejected": -4.8969268798828125, "loss": 0.8668, "nll_loss": 0.864302396774292, "rewards/accuracies": 0.8828125, "rewards/chosen": -0.14581473171710968, "rewards/margins": 0.5887242555618286, "rewards/rejected": -0.7345390319824219, "step": 2464 }, { "epoch": 11.808397397989355, "grad_norm": 9.901198387145996, "learning_rate": 9.412754953531663e-08, "log_odds_chosen": 5.721859455108643, "log_odds_ratio": -0.2376585453748703, "logits/chosen": 0.9159454107284546, "logits/rejected": 0.49302536249160767, "logps/chosen": -0.905667781829834, "logps/rejected": -6.09743070602417, "loss": 0.8778, "nll_loss": 0.8210791945457458, "rewards/accuracies": 0.8984375, "rewards/chosen": -0.135850191116333, "rewards/margins": 0.7787644267082214, "rewards/rejected": -0.9146146178245544, "step": 2496 }, { "epoch": 11.959787108219988, "grad_norm": 6.6628241539001465, "learning_rate": 8.771699948011203e-08, "log_odds_chosen": 4.282519817352295, "log_odds_ratio": -0.2792586088180542, "logits/chosen": 0.790172815322876, "logits/rejected": 0.563973069190979, "logps/chosen": -0.9786302447319031, "logps/rejected": -4.77611780166626, "loss": 0.8802, "nll_loss": 0.8442527651786804, "rewards/accuracies": 0.84765625, "rewards/chosen": -0.14679455757141113, "rewards/margins": 0.5696231722831726, "rewards/rejected": -0.716417670249939, "step": 2528 }, { "epoch": 12.111176818450621, "grad_norm": 5.591745853424072, "learning_rate": 8.148578611867113e-08, "log_odds_chosen": 4.849425315856934, "log_odds_ratio": -0.29553845524787903, "logits/chosen": 0.8502916097640991, "logits/rejected": 0.5881719589233398, "logps/chosen": -0.9942740201950073, "logps/rejected": -5.380496025085449, "loss": 0.8794, "nll_loss": 0.894903302192688, "rewards/accuracies": 0.83984375, "rewards/chosen": -0.1491411030292511, "rewards/margins": 0.6579334139823914, "rewards/rejected": -0.8070744276046753, "step": 2560 }, { "epoch": 12.262566528681253, "grad_norm": 4.799871921539307, "learning_rate": 7.544079547848181e-08, "log_odds_chosen": 4.629427909851074, "log_odds_ratio": -0.2579698860645294, "logits/chosen": 0.8144665360450745, "logits/rejected": 0.5345531702041626, "logps/chosen": -0.9962482452392578, "logps/rejected": -5.126289367675781, "loss": 0.8853, "nll_loss": 0.8719948530197144, "rewards/accuracies": 0.87890625, "rewards/chosen": -0.14943724870681763, "rewards/margins": 0.6195061802864075, "rewards/rejected": -0.7689434885978699, "step": 2592 }, { "epoch": 12.413956238911886, "grad_norm": 5.2031779289245605, "learning_rate": 6.958870779488446e-08, "log_odds_chosen": 5.763055801391602, "log_odds_ratio": -0.24303670227527618, "logits/chosen": 0.85135418176651, "logits/rejected": 0.5018079876899719, "logps/chosen": -0.9315154552459717, "logps/rejected": -6.163926124572754, "loss": 0.8732, "nll_loss": 0.8289435505867004, "rewards/accuracies": 0.875, "rewards/chosen": -0.13972733914852142, "rewards/margins": 0.78486168384552, "rewards/rejected": -0.9245890378952026, "step": 2624 }, { "epoch": 12.56534594914252, "grad_norm": 4.5774712562561035, "learning_rate": 6.393599012883707e-08, "log_odds_chosen": 4.685327529907227, "log_odds_ratio": -0.2833007574081421, "logits/chosen": 0.7489104270935059, "logits/rejected": 0.5779923796653748, "logps/chosen": -0.9675414562225342, "logps/rejected": -5.168377876281738, "loss": 0.8694, "nll_loss": 0.8434449434280396, "rewards/accuracies": 0.875, "rewards/chosen": -0.14513123035430908, "rewards/margins": 0.6301255226135254, "rewards/rejected": -0.7752567529678345, "step": 2656 }, { "epoch": 12.716735659373152, "grad_norm": 5.854611396789551, "learning_rate": 5.848888922025552e-08, "log_odds_chosen": 5.014122009277344, "log_odds_ratio": -0.23267918825149536, "logits/chosen": 0.8252905607223511, "logits/rejected": 0.4858684539794922, "logps/chosen": -0.8893996477127075, "logps/rejected": -5.326512336730957, "loss": 0.878, "nll_loss": 0.8163630366325378, "rewards/accuracies": 0.8828125, "rewards/chosen": -0.13340994715690613, "rewards/margins": 0.6655669212341309, "rewards/rejected": -0.7989768981933594, "step": 2688 }, { "epoch": 12.868125369603785, "grad_norm": 5.542015075683594, "learning_rate": 5.325342458482779e-08, "log_odds_chosen": 5.052638530731201, "log_odds_ratio": -0.2500526010990143, "logits/chosen": 0.8215246796607971, "logits/rejected": 0.573950469493866, "logps/chosen": -0.8597905039787292, "logps/rejected": -5.335259437561035, "loss": 0.8812, "nll_loss": 0.8173032999038696, "rewards/accuracies": 0.875, "rewards/chosen": -0.12896858155727386, "rewards/margins": 0.6713204383850098, "rewards/rejected": -0.8002889156341553, "step": 2720 }, { "epoch": 13.019515079834418, "grad_norm": 7.424806118011475, "learning_rate": 4.823538186193096e-08, "log_odds_chosen": 5.35725212097168, "log_odds_ratio": -0.23181939125061035, "logits/chosen": 0.8148990273475647, "logits/rejected": 0.4551333785057068, "logps/chosen": -0.9124429225921631, "logps/rejected": -5.717087268829346, "loss": 0.8778, "nll_loss": 0.8277573585510254, "rewards/accuracies": 0.91015625, "rewards/chosen": -0.13686645030975342, "rewards/margins": 0.7206966876983643, "rewards/rejected": -0.8575630784034729, "step": 2752 }, { "epoch": 13.17090479006505, "grad_norm": 6.039958953857422, "learning_rate": 4.3440306421001324e-08, "log_odds_chosen": 5.5131001472473145, "log_odds_ratio": -0.24206629395484924, "logits/chosen": 0.873075008392334, "logits/rejected": 0.5212752223014832, "logps/chosen": -0.8922577500343323, "logps/rejected": -5.86539888381958, "loss": 0.8901, "nll_loss": 0.8136817216873169, "rewards/accuracies": 0.88671875, "rewards/chosen": -0.13383866846561432, "rewards/margins": 0.745971143245697, "rewards/rejected": -0.8798097968101501, "step": 2784 }, { "epoch": 13.246599645180366, "eval_log_odds_chosen": 1.8457978963851929, "eval_log_odds_ratio": -0.17291945219039917, "eval_logits/chosen": 0.5009181499481201, "eval_logits/rejected": 0.446205198764801, "eval_logps/chosen": -0.6597917675971985, "eval_logps/rejected": -1.8198742866516113, "eval_loss": 0.7256795763969421, "eval_nll_loss": 0.6736116409301758, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.09896877408027649, "eval_rewards/margins": 0.17401237785816193, "eval_rewards/rejected": -0.2729811668395996, "eval_runtime": 1.7675, "eval_samples_per_second": 77.512, "eval_steps_per_second": 10.184, "step": 2800 }, { "epoch": 13.322294500295683, "grad_norm": 4.649291515350342, "learning_rate": 3.887349723342303e-08, "log_odds_chosen": 5.655206203460693, "log_odds_ratio": -0.22572118043899536, "logits/chosen": 0.8335084915161133, "logits/rejected": 0.4884824752807617, "logps/chosen": -0.8668183088302612, "logps/rejected": -5.9456024169921875, "loss": 0.8773, "nll_loss": 0.7930561900138855, "rewards/accuracies": 0.90234375, "rewards/chosen": -0.13002273440361023, "rewards/margins": 0.7618176937103271, "rewards/rejected": -0.8918405175209045, "step": 2816 }, { "epoch": 13.473684210526315, "grad_norm": 5.706801414489746, "learning_rate": 3.454000101670901e-08, "log_odds_chosen": 4.356830596923828, "log_odds_ratio": -0.24235375225543976, "logits/chosen": 0.7453078031539917, "logits/rejected": 0.5251801609992981, "logps/chosen": -0.9370274543762207, "logps/rejected": -4.772754192352295, "loss": 0.8771, "nll_loss": 0.8157171010971069, "rewards/accuracies": 0.88671875, "rewards/chosen": -0.14055413007736206, "rewards/margins": 0.5753591060638428, "rewards/rejected": -0.7159131765365601, "step": 2848 }, { "epoch": 13.625073920756948, "grad_norm": 6.6824140548706055, "learning_rate": 3.044460665744283e-08, "log_odds_chosen": 4.974400043487549, "log_odds_ratio": -0.24002020061016083, "logits/chosen": 0.7889403700828552, "logits/rejected": 0.4931294322013855, "logps/chosen": -0.9762779474258423, "logps/rejected": -5.440495491027832, "loss": 0.8849, "nll_loss": 0.8199655413627625, "rewards/accuracies": 0.8984375, "rewards/chosen": -0.14644168317317963, "rewards/margins": 0.6696327328681946, "rewards/rejected": -0.8160744905471802, "step": 2880 }, { "epoch": 13.776463630987582, "grad_norm": 9.858070373535156, "learning_rate": 2.659183991914696e-08, "log_odds_chosen": 4.271711349487305, "log_odds_ratio": -0.25790902972221375, "logits/chosen": 0.7586400508880615, "logits/rejected": 0.5483137369155884, "logps/chosen": -0.9079785346984863, "logps/rejected": -4.65129280090332, "loss": 0.8755, "nll_loss": 0.8172128200531006, "rewards/accuracies": 0.87890625, "rewards/chosen": -0.1361967921257019, "rewards/margins": 0.561497151851654, "rewards/rejected": -0.697693943977356, "step": 2912 }, { "epoch": 13.927853341218214, "grad_norm": 4.99421501159668, "learning_rate": 2.298595844092377e-08, "log_odds_chosen": 5.054343223571777, "log_odds_ratio": -0.2358601987361908, "logits/chosen": 0.7982761859893799, "logits/rejected": 0.5060718655586243, "logps/chosen": -0.9570282697677612, "logps/rejected": -5.482752799987793, "loss": 0.8707, "nll_loss": 0.8115738034248352, "rewards/accuracies": 0.90234375, "rewards/chosen": -0.143554225564003, "rewards/margins": 0.6788586974143982, "rewards/rejected": -0.82241290807724, "step": 2944 }, { "epoch": 14.079243051448847, "grad_norm": 17.175851821899414, "learning_rate": 1.9630947032398066e-08, "log_odds_chosen": 5.8499908447265625, "log_odds_ratio": -0.22148607671260834, "logits/chosen": 0.817506730556488, "logits/rejected": 0.44914665818214417, "logps/chosen": -0.8968250751495361, "logps/rejected": -6.185724258422852, "loss": 0.8673, "nll_loss": 0.8207356333732605, "rewards/accuracies": 0.921875, "rewards/chosen": -0.13452376425266266, "rewards/margins": 0.7933349013328552, "rewards/rejected": -0.9278587698936462, "step": 2976 }, { "epoch": 14.23063276167948, "grad_norm": 7.170802593231201, "learning_rate": 1.653051327015911e-08, "log_odds_chosen": 4.76658296585083, "log_odds_ratio": -0.24812592566013336, "logits/chosen": 0.8145585060119629, "logits/rejected": 0.5187351703643799, "logps/chosen": -0.9258391261100769, "logps/rejected": -5.176287651062012, "loss": 0.8781, "nll_loss": 0.8292718529701233, "rewards/accuracies": 0.90625, "rewards/chosen": -0.13887587189674377, "rewards/margins": 0.6375671625137329, "rewards/rejected": -0.7764431834220886, "step": 3008 }, { "epoch": 14.382022471910112, "grad_norm": 5.404478073120117, "learning_rate": 1.368808340056879e-08, "log_odds_chosen": 5.262024879455566, "log_odds_ratio": -0.22128547728061676, "logits/chosen": 0.7849254608154297, "logits/rejected": 0.4733457863330841, "logps/chosen": -0.9194588661193848, "logps/rejected": -5.613149166107178, "loss": 0.8665, "nll_loss": 0.8110780715942383, "rewards/accuracies": 0.8828125, "rewards/chosen": -0.1379188597202301, "rewards/margins": 0.704053521156311, "rewards/rejected": -0.8419723510742188, "step": 3040 }, { "epoch": 14.533412182140745, "grad_norm": 4.717693328857422, "learning_rate": 1.1106798553464802e-08, "log_odds_chosen": 5.532874584197998, "log_odds_ratio": -0.23889514803886414, "logits/chosen": 0.887575626373291, "logits/rejected": 0.503061056137085, "logps/chosen": -0.9478439092636108, "logps/rejected": -5.9591827392578125, "loss": 0.8689, "nll_loss": 0.8499802947044373, "rewards/accuracies": 0.9140625, "rewards/chosen": -0.1421765685081482, "rewards/margins": 0.7517008185386658, "rewards/rejected": -0.893877387046814, "step": 3072 }, { "epoch": 14.684801892371379, "grad_norm": 7.475513458251953, "learning_rate": 8.789511270941269e-09, "log_odds_chosen": 4.4497551918029785, "log_odds_ratio": -0.27013376355171204, "logits/chosen": 0.7935608625411987, "logits/rejected": 0.5559485554695129, "logps/chosen": -0.9605445861816406, "logps/rejected": -4.917541980743408, "loss": 0.8786, "nll_loss": 0.8641871213912964, "rewards/accuracies": 0.8828125, "rewards/chosen": -0.1440816968679428, "rewards/margins": 0.5935496091842651, "rewards/rejected": -0.7376313209533691, "step": 3104 }, { "epoch": 14.83619160260201, "grad_norm": 6.8675408363342285, "learning_rate": 6.738782355044048e-09, "log_odds_chosen": 4.509281635284424, "log_odds_ratio": -0.27578622102737427, "logits/chosen": 0.7721443772315979, "logits/rejected": 0.5139036774635315, "logps/chosen": -0.9913955926895142, "logps/rejected": -5.035284042358398, "loss": 0.8838, "nll_loss": 0.8683611154556274, "rewards/accuracies": 0.88671875, "rewards/chosen": -0.14870934188365936, "rewards/margins": 0.6065833568572998, "rewards/rejected": -0.7552926540374756, "step": 3136 }, { "epoch": 14.987581312832644, "grad_norm": 7.102670669555664, "learning_rate": 4.956878037864043e-09, "log_odds_chosen": 4.306816101074219, "log_odds_ratio": -0.30200034379959106, "logits/chosen": 0.8607514500617981, "logits/rejected": 0.591871440410614, "logps/chosen": -0.9792557954788208, "logps/rejected": -4.773169040679932, "loss": 0.8869, "nll_loss": 0.8911793231964111, "rewards/accuracies": 0.859375, "rewards/chosen": -0.1468883752822876, "rewards/margins": 0.569087028503418, "rewards/rejected": -0.7159753441810608, "step": 3168 }, { "epoch": 15.138971023063275, "grad_norm": 4.992292881011963, "learning_rate": 3.4457674771554422e-09, "log_odds_chosen": 4.759942054748535, "log_odds_ratio": -0.2575688362121582, "logits/chosen": 0.7185624241828918, "logits/rejected": 0.42112159729003906, "logps/chosen": -0.9415456652641296, "logps/rejected": -5.170385360717773, "loss": 0.858, "nll_loss": 0.8277443647384644, "rewards/accuracies": 0.8828125, "rewards/chosen": -0.14123186469078064, "rewards/margins": 0.6343258619308472, "rewards/rejected": -0.775557816028595, "step": 3200 }, { "epoch": 15.138971023063275, "eval_log_odds_chosen": 1.8564934730529785, "eval_log_odds_ratio": -0.17145967483520508, "eval_logits/chosen": 0.48080742359161377, "eval_logits/rejected": 0.4276208281517029, "eval_logps/chosen": -0.6593887209892273, "eval_logps/rejected": -1.8252443075180054, "eval_loss": 0.7243954539299011, "eval_nll_loss": 0.6726279854774475, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.09890830516815186, "eval_rewards/margins": 0.17487837374210358, "eval_rewards/rejected": -0.27378666400909424, "eval_runtime": 1.7744, "eval_samples_per_second": 77.208, "eval_steps_per_second": 10.144, "step": 3200 }, { "epoch": 15.290360733293909, "grad_norm": 6.751287937164307, "learning_rate": 2.2071205802468297e-09, "log_odds_chosen": 4.854089736938477, "log_odds_ratio": -0.2636704742908478, "logits/chosen": 0.7567326426506042, "logits/rejected": 0.527582585811615, "logps/chosen": -0.9423821568489075, "logps/rejected": -5.277737617492676, "loss": 0.8852, "nll_loss": 0.844541609287262, "rewards/accuracies": 0.875, "rewards/chosen": -0.14135733246803284, "rewards/margins": 0.6503032445907593, "rewards/rejected": -0.7916606068611145, "step": 3232 }, { "epoch": 15.441750443524542, "grad_norm": 5.534750938415527, "learning_rate": 1.2423061586496476e-09, "log_odds_chosen": 5.184489727020264, "log_odds_ratio": -0.24983780086040497, "logits/chosen": 0.8209244608879089, "logits/rejected": 0.5052769780158997, "logps/chosen": -0.9556353688240051, "logps/rejected": -5.621804237365723, "loss": 0.8706, "nll_loss": 0.8418364524841309, "rewards/accuracies": 0.88671875, "rewards/chosen": -0.14334531128406525, "rewards/margins": 0.699925422668457, "rewards/rejected": -0.8432707786560059, "step": 3264 }, { "epoch": 15.593140153755174, "grad_norm": 5.217104434967041, "learning_rate": 5.523904154037528e-10, "log_odds_chosen": 5.348480701446533, "log_odds_ratio": -0.2507275640964508, "logits/chosen": 0.8220376372337341, "logits/rejected": 0.5271560549736023, "logps/chosen": -0.9200209975242615, "logps/rejected": -5.755062103271484, "loss": 0.887, "nll_loss": 0.8451349139213562, "rewards/accuracies": 0.85546875, "rewards/chosen": -0.1380031555891037, "rewards/margins": 0.7252561450004578, "rewards/rejected": -0.8632593154907227, "step": 3296 }, { "epoch": 15.744529863985807, "grad_norm": 6.9226460456848145, "learning_rate": 1.3813576683111006e-10, "log_odds_chosen": 4.370879650115967, "log_odds_ratio": -0.24154168367385864, "logits/chosen": 0.7712342739105225, "logits/rejected": 0.5409867763519287, "logps/chosen": -0.9708598256111145, "logps/rejected": -4.831565856933594, "loss": 0.8729, "nll_loss": 0.8363229036331177, "rewards/accuracies": 0.921875, "rewards/chosen": -0.14562898874282837, "rewards/margins": 0.5791059136390686, "rewards/rejected": -0.724734902381897, "step": 3328 }, { "epoch": 15.89591957421644, "grad_norm": 7.291532516479492, "learning_rate": 0.0, "log_odds_chosen": 5.1468186378479, "log_odds_ratio": -0.2334214597940445, "logits/chosen": 0.8100905418395996, "logits/rejected": 0.48369458317756653, "logps/chosen": -0.8902687430381775, "logps/rejected": -5.482838153839111, "loss": 0.883, "nll_loss": 0.8243392705917358, "rewards/accuracies": 0.890625, "rewards/chosen": -0.1335403174161911, "rewards/margins": 0.688885509967804, "rewards/rejected": -0.8224257826805115, "step": 3360 }, { "epoch": 15.89591957421644, "eval_log_odds_chosen": 1.8541311025619507, "eval_log_odds_ratio": -0.17156726121902466, "eval_logits/chosen": 0.4940509796142578, "eval_logits/rejected": 0.4394443929195404, "eval_logps/chosen": -0.6573522090911865, "eval_logps/rejected": -1.8197245597839355, "eval_loss": 0.7246665954589844, "eval_nll_loss": 0.6722227334976196, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.09860283136367798, "eval_rewards/margins": 0.17435584962368011, "eval_rewards/rejected": -0.2729586660861969, "eval_runtime": 1.7715, "eval_samples_per_second": 77.334, "eval_steps_per_second": 10.161, "step": 3360 } ], "logging_steps": 32, "max_steps": 3360, "num_input_tokens_seen": 0, "num_train_epochs": 16, "save_steps": 400, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }