{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.998451213216314, "eval_steps": 100, "global_step": 2904, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.7182130584192438e-09, "logits/chosen": -2.9648265838623047, "logits/rejected": -2.9711227416992188, "logps/chosen": -256.0919494628906, "logps/rejected": -234.60708618164062, "loss": 1.0, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 1.718213058419244e-08, "logits/chosen": -3.049875497817993, "logits/rejected": -3.0188238620758057, "logps/chosen": -276.6912536621094, "logps/rejected": -202.39605712890625, "loss": 1.0001, "rewards/accuracies": 0.4375, "rewards/chosen": 0.0005764114903286099, "rewards/margins": -0.006484686397016048, "rewards/rejected": 0.007061097305268049, "step": 10 }, { "epoch": 0.02, "learning_rate": 3.436426116838488e-08, "logits/chosen": -2.988577127456665, "logits/rejected": -2.9995627403259277, "logps/chosen": -312.2018127441406, "logps/rejected": -246.76266479492188, "loss": 1.0026, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": 0.003506724489852786, "rewards/margins": -0.0012849611230194569, "rewards/rejected": 0.004791685380041599, "step": 20 }, { "epoch": 0.03, "learning_rate": 5.154639175257731e-08, "logits/chosen": -3.063732624053955, "logits/rejected": -3.0357906818389893, "logps/chosen": -260.15679931640625, "logps/rejected": -224.3686065673828, "loss": 0.9974, "rewards/accuracies": 0.581250011920929, "rewards/chosen": 0.0021156296133995056, "rewards/margins": 0.0043937130831182, "rewards/rejected": -0.0022780844010412693, "step": 30 }, { "epoch": 0.04, "learning_rate": 6.872852233676976e-08, "logits/chosen": -3.070286273956299, "logits/rejected": -3.0322961807250977, "logps/chosen": -299.5580139160156, "logps/rejected": -250.05123901367188, "loss": 0.9971, "rewards/accuracies": 0.5625, "rewards/chosen": 0.00013011172995902598, "rewards/margins": -0.0006899217842146754, "rewards/rejected": 0.0008200337179005146, "step": 40 }, { "epoch": 0.05, "learning_rate": 8.59106529209622e-08, "logits/chosen": -3.019392490386963, "logits/rejected": -3.024167060852051, "logps/chosen": -289.3621520996094, "logps/rejected": -224.00979614257812, "loss": 1.003, "rewards/accuracies": 0.46875, "rewards/chosen": -0.0025630726013332605, "rewards/margins": -0.0055747563019394875, "rewards/rejected": 0.003011685097590089, "step": 50 }, { "epoch": 0.06, "learning_rate": 1.0309278350515462e-07, "logits/chosen": -3.0416665077209473, "logits/rejected": -3.020573616027832, "logps/chosen": -247.55380249023438, "logps/rejected": -226.4866943359375, "loss": 0.9977, "rewards/accuracies": 0.606249988079071, "rewards/chosen": 0.0035637759137898684, "rewards/margins": 0.0046446239575743675, "rewards/rejected": -0.0010808479273691773, "step": 60 }, { "epoch": 0.07, "learning_rate": 1.202749140893471e-07, "logits/chosen": -3.0761704444885254, "logits/rejected": -3.058954954147339, "logps/chosen": -305.7156677246094, "logps/rejected": -251.75009155273438, "loss": 0.9938, "rewards/accuracies": 0.5625, "rewards/chosen": 0.004508626647293568, "rewards/margins": 0.007502266205847263, "rewards/rejected": -0.0029936402570456266, "step": 70 }, { "epoch": 0.08, "learning_rate": 1.3745704467353952e-07, "logits/chosen": -3.049072742462158, "logits/rejected": -3.0234384536743164, "logps/chosen": -293.57989501953125, "logps/rejected": -240.2385711669922, "loss": 0.9985, "rewards/accuracies": 0.53125, "rewards/chosen": 0.002101506572216749, "rewards/margins": 0.002054845681414008, "rewards/rejected": 4.6660610678372905e-05, "step": 80 }, { "epoch": 0.09, "learning_rate": 1.5463917525773197e-07, "logits/chosen": -3.079655885696411, "logits/rejected": -3.0430655479431152, "logps/chosen": -259.3849792480469, "logps/rejected": -216.38330078125, "loss": 0.9993, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": 0.001981315901502967, "rewards/margins": 0.003211658913642168, "rewards/rejected": -0.0012303430121392012, "step": 90 }, { "epoch": 0.1, "learning_rate": 1.718213058419244e-07, "logits/chosen": -3.0111169815063477, "logits/rejected": -3.006265640258789, "logps/chosen": -267.73577880859375, "logps/rejected": -222.9344482421875, "loss": 1.0009, "rewards/accuracies": 0.5625, "rewards/chosen": 0.005559581331908703, "rewards/margins": 0.004569889511913061, "rewards/rejected": 0.000989692285656929, "step": 100 }, { "epoch": 0.11, "learning_rate": 1.8900343642611682e-07, "logits/chosen": -3.0228209495544434, "logits/rejected": -2.9778640270233154, "logps/chosen": -269.3376770019531, "logps/rejected": -230.95877075195312, "loss": 0.9971, "rewards/accuracies": 0.5, "rewards/chosen": 0.0013723246520385146, "rewards/margins": 0.0024911228101700544, "rewards/rejected": -0.0011187975760549307, "step": 110 }, { "epoch": 0.12, "learning_rate": 2.0618556701030925e-07, "logits/chosen": -3.0604119300842285, "logits/rejected": -3.0274159908294678, "logps/chosen": -310.97454833984375, "logps/rejected": -232.7030029296875, "loss": 0.9997, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": 0.0033910819329321384, "rewards/margins": 0.0011182299349457026, "rewards/rejected": 0.002272851997986436, "step": 120 }, { "epoch": 0.13, "learning_rate": 2.2336769759450173e-07, "logits/chosen": -3.1223320960998535, "logits/rejected": -3.0860095024108887, "logps/chosen": -286.6527099609375, "logps/rejected": -241.933349609375, "loss": 0.996, "rewards/accuracies": 0.543749988079071, "rewards/chosen": 0.0031617667991667986, "rewards/margins": 0.005714719649404287, "rewards/rejected": -0.0025529528502374887, "step": 130 }, { "epoch": 0.14, "learning_rate": 2.405498281786942e-07, "logits/chosen": -3.045279026031494, "logits/rejected": -3.040912628173828, "logps/chosen": -292.4465637207031, "logps/rejected": -234.72903442382812, "loss": 0.9968, "rewards/accuracies": 0.5, "rewards/chosen": 0.0010984055697917938, "rewards/margins": 0.002134153386577964, "rewards/rejected": -0.00103574781678617, "step": 140 }, { "epoch": 0.15, "learning_rate": 2.5773195876288655e-07, "logits/chosen": -3.0267093181610107, "logits/rejected": -3.0170674324035645, "logps/chosen": -275.6455993652344, "logps/rejected": -256.4563903808594, "loss": 0.9998, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": -0.0011621230514720082, "rewards/margins": -2.7875230443896726e-05, "rewards/rejected": -0.001134247868321836, "step": 150 }, { "epoch": 0.17, "learning_rate": 2.7491408934707903e-07, "logits/chosen": -3.08597731590271, "logits/rejected": -3.0747408866882324, "logps/chosen": -278.91754150390625, "logps/rejected": -218.88558959960938, "loss": 0.9949, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.004570486024022102, "rewards/margins": 0.01147634070366621, "rewards/rejected": -0.006905855145305395, "step": 160 }, { "epoch": 0.18, "learning_rate": 2.9209621993127146e-07, "logits/chosen": -3.0499391555786133, "logits/rejected": -3.036341905593872, "logps/chosen": -292.2102966308594, "logps/rejected": -236.95703125, "loss": 0.9925, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0019660559482872486, "rewards/margins": 0.009962075389921665, "rewards/rejected": -0.00799601897597313, "step": 170 }, { "epoch": 0.19, "learning_rate": 3.0927835051546394e-07, "logits/chosen": -3.091757297515869, "logits/rejected": -3.080824375152588, "logps/chosen": -248.6970672607422, "logps/rejected": -211.7375946044922, "loss": 0.993, "rewards/accuracies": 0.53125, "rewards/chosen": 0.0008403388783335686, "rewards/margins": 0.004896977450698614, "rewards/rejected": -0.004056639038026333, "step": 180 }, { "epoch": 0.2, "learning_rate": 3.2646048109965636e-07, "logits/chosen": -3.0578689575195312, "logits/rejected": -3.033844232559204, "logps/chosen": -246.07040405273438, "logps/rejected": -200.9595184326172, "loss": 0.9978, "rewards/accuracies": 0.5625, "rewards/chosen": 0.0035698451101779938, "rewards/margins": 0.012628579512238503, "rewards/rejected": -0.009058734402060509, "step": 190 }, { "epoch": 0.21, "learning_rate": 3.436426116838488e-07, "logits/chosen": -3.08526349067688, "logits/rejected": -3.063560724258423, "logps/chosen": -252.265869140625, "logps/rejected": -192.21331787109375, "loss": 0.99, "rewards/accuracies": 0.5625, "rewards/chosen": 0.009008857421576977, "rewards/margins": 0.016440508887171745, "rewards/rejected": -0.007431652396917343, "step": 200 }, { "epoch": 0.22, "learning_rate": 3.608247422680412e-07, "logits/chosen": -2.9606690406799316, "logits/rejected": -2.906953811645508, "logps/chosen": -292.0260925292969, "logps/rejected": -215.34036254882812, "loss": 0.9855, "rewards/accuracies": 0.5625, "rewards/chosen": 0.0046757906675338745, "rewards/margins": 0.013047484681010246, "rewards/rejected": -0.008371694944798946, "step": 210 }, { "epoch": 0.23, "learning_rate": 3.7800687285223364e-07, "logits/chosen": -2.990668773651123, "logits/rejected": -2.986696481704712, "logps/chosen": -260.21832275390625, "logps/rejected": -237.1192626953125, "loss": 0.9839, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": 0.006101504433900118, "rewards/margins": 0.011856775730848312, "rewards/rejected": -0.005755270831286907, "step": 220 }, { "epoch": 0.24, "learning_rate": 3.9518900343642607e-07, "logits/chosen": -3.0254712104797363, "logits/rejected": -3.0170722007751465, "logps/chosen": -263.41680908203125, "logps/rejected": -240.377685546875, "loss": 0.9888, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.006732765585184097, "rewards/margins": 0.014450904913246632, "rewards/rejected": -0.007718136068433523, "step": 230 }, { "epoch": 0.25, "learning_rate": 4.123711340206185e-07, "logits/chosen": -3.078042507171631, "logits/rejected": -3.0471174716949463, "logps/chosen": -299.16107177734375, "logps/rejected": -214.18759155273438, "loss": 0.9867, "rewards/accuracies": 0.59375, "rewards/chosen": 0.006704004947096109, "rewards/margins": 0.01422835886478424, "rewards/rejected": -0.007524352520704269, "step": 240 }, { "epoch": 0.26, "learning_rate": 4.2955326460481097e-07, "logits/chosen": -3.026909589767456, "logits/rejected": -3.018611431121826, "logps/chosen": -264.2486572265625, "logps/rejected": -233.31826782226562, "loss": 0.9832, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.011163066141307354, "rewards/margins": 0.019244546070694923, "rewards/rejected": -0.008081478998064995, "step": 250 }, { "epoch": 0.27, "learning_rate": 4.4673539518900345e-07, "logits/chosen": -3.0170772075653076, "logits/rejected": -3.0285823345184326, "logps/chosen": -263.9449157714844, "logps/rejected": -219.4688262939453, "loss": 0.9828, "rewards/accuracies": 0.59375, "rewards/chosen": 0.010579807683825493, "rewards/margins": 0.024952661246061325, "rewards/rejected": -0.014372853562235832, "step": 260 }, { "epoch": 0.28, "learning_rate": 4.639175257731959e-07, "logits/chosen": -3.0661990642547607, "logits/rejected": -3.0676910877227783, "logps/chosen": -274.22003173828125, "logps/rejected": -229.6044158935547, "loss": 0.9799, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": 0.004864403046667576, "rewards/margins": 0.016584355384111404, "rewards/rejected": -0.011719951406121254, "step": 270 }, { "epoch": 0.29, "learning_rate": 4.810996563573884e-07, "logits/chosen": -3.031026840209961, "logits/rejected": -3.0113377571105957, "logps/chosen": -283.8157653808594, "logps/rejected": -235.0233612060547, "loss": 0.9754, "rewards/accuracies": 0.65625, "rewards/chosen": 0.013933306559920311, "rewards/margins": 0.027073601260781288, "rewards/rejected": -0.013140290975570679, "step": 280 }, { "epoch": 0.3, "learning_rate": 4.982817869415807e-07, "logits/chosen": -3.094398021697998, "logits/rejected": -3.0440070629119873, "logps/chosen": -270.22052001953125, "logps/rejected": -223.65493774414062, "loss": 0.9774, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.008421173319220543, "rewards/margins": 0.019475247710943222, "rewards/rejected": -0.01105407439172268, "step": 290 }, { "epoch": 0.31, "learning_rate": 4.982778415614236e-07, "logits/chosen": -3.023087978363037, "logits/rejected": -2.9992034435272217, "logps/chosen": -244.30337524414062, "logps/rejected": -218.7770538330078, "loss": 0.9735, "rewards/accuracies": 0.706250011920929, "rewards/chosen": 0.008720096200704575, "rewards/margins": 0.029811996966600418, "rewards/rejected": -0.021091898903250694, "step": 300 }, { "epoch": 0.32, "learning_rate": 4.963643321852277e-07, "logits/chosen": -3.0621352195739746, "logits/rejected": -3.0481069087982178, "logps/chosen": -299.58758544921875, "logps/rejected": -257.4301452636719, "loss": 0.9719, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.011654629372060299, "rewards/margins": 0.02595471777021885, "rewards/rejected": -0.01430008839815855, "step": 310 }, { "epoch": 0.33, "learning_rate": 4.944508228090318e-07, "logits/chosen": -3.026646137237549, "logits/rejected": -3.0066604614257812, "logps/chosen": -242.5664825439453, "logps/rejected": -187.6553497314453, "loss": 0.9641, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": 0.014026440680027008, "rewards/margins": 0.0420592799782753, "rewards/rejected": -0.02803283929824829, "step": 320 }, { "epoch": 0.34, "learning_rate": 4.925373134328357e-07, "logits/chosen": -3.0689878463745117, "logits/rejected": -3.052264928817749, "logps/chosen": -303.94036865234375, "logps/rejected": -238.4488067626953, "loss": 0.9634, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.02398153766989708, "rewards/margins": 0.042572326958179474, "rewards/rejected": -0.018590793013572693, "step": 330 }, { "epoch": 0.35, "learning_rate": 4.906238040566398e-07, "logits/chosen": -2.995407819747925, "logits/rejected": -2.9780545234680176, "logps/chosen": -235.09848022460938, "logps/rejected": -236.380859375, "loss": 0.9616, "rewards/accuracies": 0.581250011920929, "rewards/chosen": 0.006325787398964167, "rewards/margins": 0.03254387527704239, "rewards/rejected": -0.02621809020638466, "step": 340 }, { "epoch": 0.36, "learning_rate": 4.887102946804438e-07, "logits/chosen": -3.066584348678589, "logits/rejected": -3.03863263130188, "logps/chosen": -306.0690612792969, "logps/rejected": -245.27407836914062, "loss": 0.9508, "rewards/accuracies": 0.71875, "rewards/chosen": 0.02622169815003872, "rewards/margins": 0.059129487723112106, "rewards/rejected": -0.032907791435718536, "step": 350 }, { "epoch": 0.37, "learning_rate": 4.867967853042479e-07, "logits/chosen": -3.0108275413513184, "logits/rejected": -3.008779525756836, "logps/chosen": -294.0123596191406, "logps/rejected": -248.9111785888672, "loss": 0.9496, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.008144224062561989, "rewards/margins": 0.045391060411930084, "rewards/rejected": -0.037246834486722946, "step": 360 }, { "epoch": 0.38, "learning_rate": 4.84883275928052e-07, "logits/chosen": -3.048269510269165, "logits/rejected": -3.011050224304199, "logps/chosen": -303.9602355957031, "logps/rejected": -269.4437561035156, "loss": 0.9473, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.021487019956111908, "rewards/margins": 0.06678290665149689, "rewards/rejected": -0.04529587924480438, "step": 370 }, { "epoch": 0.39, "learning_rate": 4.82969766551856e-07, "logits/chosen": -3.0777668952941895, "logits/rejected": -3.068040370941162, "logps/chosen": -282.12713623046875, "logps/rejected": -236.7052459716797, "loss": 0.9515, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": 0.02360449731349945, "rewards/margins": 0.05431375652551651, "rewards/rejected": -0.03070926107466221, "step": 380 }, { "epoch": 0.4, "learning_rate": 4.810562571756601e-07, "logits/chosen": -2.9710302352905273, "logits/rejected": -2.983682155609131, "logps/chosen": -272.12713623046875, "logps/rejected": -235.8425750732422, "loss": 0.9475, "rewards/accuracies": 0.643750011920929, "rewards/chosen": 0.013235519640147686, "rewards/margins": 0.05444386601448059, "rewards/rejected": -0.04120834544301033, "step": 390 }, { "epoch": 0.41, "learning_rate": 4.791427477994642e-07, "logits/chosen": -3.0254111289978027, "logits/rejected": -3.006087303161621, "logps/chosen": -254.69107055664062, "logps/rejected": -210.39474487304688, "loss": 0.9402, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.022726301103830338, "rewards/margins": 0.06853805482387543, "rewards/rejected": -0.04581175372004509, "step": 400 }, { "epoch": 0.42, "learning_rate": 4.772292384232682e-07, "logits/chosen": -3.0280232429504395, "logits/rejected": -2.9936630725860596, "logps/chosen": -261.80731201171875, "logps/rejected": -251.14950561523438, "loss": 0.9398, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": 0.014309520833194256, "rewards/margins": 0.050246305763721466, "rewards/rejected": -0.03593678027391434, "step": 410 }, { "epoch": 0.43, "learning_rate": 4.753157290470723e-07, "logits/chosen": -3.0422613620758057, "logits/rejected": -3.004459857940674, "logps/chosen": -245.8754425048828, "logps/rejected": -202.38157653808594, "loss": 0.9391, "rewards/accuracies": 0.643750011920929, "rewards/chosen": 0.005309578962624073, "rewards/margins": 0.0503067672252655, "rewards/rejected": -0.0449971929192543, "step": 420 }, { "epoch": 0.44, "learning_rate": 4.7340221967087635e-07, "logits/chosen": -3.061112880706787, "logits/rejected": -3.045253038406372, "logps/chosen": -257.9686584472656, "logps/rejected": -239.047119140625, "loss": 0.9323, "rewards/accuracies": 0.6875, "rewards/chosen": 0.012545446865260601, "rewards/margins": 0.07196511328220367, "rewards/rejected": -0.0594196543097496, "step": 430 }, { "epoch": 0.45, "learning_rate": 4.714887102946804e-07, "logits/chosen": -3.0016167163848877, "logits/rejected": -2.9664511680603027, "logps/chosen": -278.2596740722656, "logps/rejected": -246.8672637939453, "loss": 0.9194, "rewards/accuracies": 0.71875, "rewards/chosen": 0.00037018657894805074, "rewards/margins": 0.0750364139676094, "rewards/rejected": -0.07466623187065125, "step": 440 }, { "epoch": 0.46, "learning_rate": 4.6957520091848447e-07, "logits/chosen": -3.0514495372772217, "logits/rejected": -3.0419204235076904, "logps/chosen": -272.3708801269531, "logps/rejected": -227.4208221435547, "loss": 0.9356, "rewards/accuracies": 0.668749988079071, "rewards/chosen": 0.01730353757739067, "rewards/margins": 0.059229202568531036, "rewards/rejected": -0.04192566126585007, "step": 450 }, { "epoch": 0.47, "learning_rate": 4.6766169154228853e-07, "logits/chosen": -3.0307857990264893, "logits/rejected": -3.0139756202697754, "logps/chosen": -264.16168212890625, "logps/rejected": -219.8409881591797, "loss": 0.9122, "rewards/accuracies": 0.65625, "rewards/chosen": 0.007891577668488026, "rewards/margins": 0.07121269404888153, "rewards/rejected": -0.06332111358642578, "step": 460 }, { "epoch": 0.49, "learning_rate": 4.657481821660926e-07, "logits/chosen": -3.016810178756714, "logits/rejected": -3.019348621368408, "logps/chosen": -238.8083038330078, "logps/rejected": -212.76193237304688, "loss": 0.9028, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.028794366866350174, "rewards/margins": 0.08307679742574692, "rewards/rejected": -0.054282426834106445, "step": 470 }, { "epoch": 0.5, "learning_rate": 4.6383467278989666e-07, "logits/chosen": -3.044045925140381, "logits/rejected": -3.0309886932373047, "logps/chosen": -282.59814453125, "logps/rejected": -229.63858032226562, "loss": 0.9063, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.022610556334257126, "rewards/margins": 0.09912824630737305, "rewards/rejected": -0.07651769369840622, "step": 480 }, { "epoch": 0.51, "learning_rate": 4.6192116341370067e-07, "logits/chosen": -3.0251693725585938, "logits/rejected": -3.040748119354248, "logps/chosen": -289.87896728515625, "logps/rejected": -243.85952758789062, "loss": 0.8948, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": 0.03339407593011856, "rewards/margins": 0.1147690862417221, "rewards/rejected": -0.08137501776218414, "step": 490 }, { "epoch": 0.52, "learning_rate": 4.6000765403750473e-07, "logits/chosen": -3.0105278491973877, "logits/rejected": -2.9718642234802246, "logps/chosen": -244.4474334716797, "logps/rejected": -221.8011474609375, "loss": 0.8829, "rewards/accuracies": 0.668749988079071, "rewards/chosen": 0.017204025760293007, "rewards/margins": 0.0894416943192482, "rewards/rejected": -0.07223766297101974, "step": 500 }, { "epoch": 0.53, "learning_rate": 4.580941446613088e-07, "logits/chosen": -2.9197583198547363, "logits/rejected": -2.915809392929077, "logps/chosen": -304.55645751953125, "logps/rejected": -253.95828247070312, "loss": 0.8834, "rewards/accuracies": 0.71875, "rewards/chosen": 0.02509082481265068, "rewards/margins": 0.11200642585754395, "rewards/rejected": -0.08691558986902237, "step": 510 }, { "epoch": 0.54, "learning_rate": 4.5618063528511285e-07, "logits/chosen": -3.0240163803100586, "logits/rejected": -2.998610019683838, "logps/chosen": -270.31378173828125, "logps/rejected": -242.4883270263672, "loss": 0.8813, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.02489570900797844, "rewards/margins": 0.13008132576942444, "rewards/rejected": -0.1051856279373169, "step": 520 }, { "epoch": 0.55, "learning_rate": 4.542671259089169e-07, "logits/chosen": -3.0373079776763916, "logits/rejected": -3.0401182174682617, "logps/chosen": -235.19534301757812, "logps/rejected": -221.75363159179688, "loss": 0.8933, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.00027992590912617743, "rewards/margins": 0.09569151699542999, "rewards/rejected": -0.09541159123182297, "step": 530 }, { "epoch": 0.56, "learning_rate": 4.52353616532721e-07, "logits/chosen": -3.0647318363189697, "logits/rejected": -3.054605484008789, "logps/chosen": -280.6692810058594, "logps/rejected": -237.3024444580078, "loss": 0.8826, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.03587502986192703, "rewards/margins": 0.12422885000705719, "rewards/rejected": -0.08835381269454956, "step": 540 }, { "epoch": 0.57, "learning_rate": 4.5044010715652504e-07, "logits/chosen": -3.0048820972442627, "logits/rejected": -3.0057966709136963, "logps/chosen": -256.4432373046875, "logps/rejected": -229.13198852539062, "loss": 0.878, "rewards/accuracies": 0.6875, "rewards/chosen": 0.039617545902729034, "rewards/margins": 0.11413271725177765, "rewards/rejected": -0.07451517134904861, "step": 550 }, { "epoch": 0.58, "learning_rate": 4.485265977803291e-07, "logits/chosen": -3.0452122688293457, "logits/rejected": -3.041473865509033, "logps/chosen": -293.8966369628906, "logps/rejected": -251.0624542236328, "loss": 0.8656, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.04036722332239151, "rewards/margins": 0.1490786224603653, "rewards/rejected": -0.10871138423681259, "step": 560 }, { "epoch": 0.59, "learning_rate": 4.4661308840413316e-07, "logits/chosen": -3.0647714138031006, "logits/rejected": -3.01206636428833, "logps/chosen": -260.71343994140625, "logps/rejected": -221.20632934570312, "loss": 0.8666, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.011453949846327305, "rewards/margins": 0.13622693717479706, "rewards/rejected": -0.12477298080921173, "step": 570 }, { "epoch": 0.6, "learning_rate": 4.446995790279372e-07, "logits/chosen": -3.034562349319458, "logits/rejected": -2.989062547683716, "logps/chosen": -291.01446533203125, "logps/rejected": -241.4232177734375, "loss": 0.8519, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": 0.035387031733989716, "rewards/margins": 0.18669307231903076, "rewards/rejected": -0.15130606293678284, "step": 580 }, { "epoch": 0.61, "learning_rate": 4.4278606965174123e-07, "logits/chosen": -3.012864351272583, "logits/rejected": -3.005479097366333, "logps/chosen": -243.08975219726562, "logps/rejected": -216.92074584960938, "loss": 0.8492, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.002515086205676198, "rewards/margins": 0.121725894510746, "rewards/rejected": -0.11921081691980362, "step": 590 }, { "epoch": 0.62, "learning_rate": 4.408725602755453e-07, "logits/chosen": -3.013345718383789, "logits/rejected": -3.0360682010650635, "logps/chosen": -248.50326538085938, "logps/rejected": -203.6788787841797, "loss": 0.8617, "rewards/accuracies": 0.706250011920929, "rewards/chosen": 0.0035158656537532806, "rewards/margins": 0.14544394612312317, "rewards/rejected": -0.1419280618429184, "step": 600 }, { "epoch": 0.63, "learning_rate": 4.3895905089934936e-07, "logits/chosen": -3.047393321990967, "logits/rejected": -3.0524885654449463, "logps/chosen": -255.6022491455078, "logps/rejected": -219.2570343017578, "loss": 0.8256, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.03486616909503937, "rewards/margins": 0.1496470421552658, "rewards/rejected": -0.11478086560964584, "step": 610 }, { "epoch": 0.64, "learning_rate": 4.370455415231534e-07, "logits/chosen": -3.066741466522217, "logits/rejected": -3.046435832977295, "logps/chosen": -305.30712890625, "logps/rejected": -249.01968383789062, "loss": 0.8203, "rewards/accuracies": 0.731249988079071, "rewards/chosen": 0.04162443429231644, "rewards/margins": 0.18165114521980286, "rewards/rejected": -0.14002671837806702, "step": 620 }, { "epoch": 0.65, "learning_rate": 4.351320321469575e-07, "logits/chosen": -2.990051746368408, "logits/rejected": -2.992987632751465, "logps/chosen": -251.0054168701172, "logps/rejected": -238.8704376220703, "loss": 0.8282, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.028837282210588455, "rewards/margins": 0.1591511368751526, "rewards/rejected": -0.13031385838985443, "step": 630 }, { "epoch": 0.66, "learning_rate": 4.3321852277076154e-07, "logits/chosen": -3.0534205436706543, "logits/rejected": -3.0434939861297607, "logps/chosen": -256.14019775390625, "logps/rejected": -216.6497802734375, "loss": 0.8083, "rewards/accuracies": 0.731249988079071, "rewards/chosen": 0.034832023084163666, "rewards/margins": 0.20265790820121765, "rewards/rejected": -0.16782590746879578, "step": 640 }, { "epoch": 0.67, "learning_rate": 4.313050133945656e-07, "logits/chosen": -3.0195870399475098, "logits/rejected": -3.027886390686035, "logps/chosen": -277.5207214355469, "logps/rejected": -232.2840576171875, "loss": 0.8119, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.05330047011375427, "rewards/margins": 0.21206972002983093, "rewards/rejected": -0.15876924991607666, "step": 650 }, { "epoch": 0.68, "learning_rate": 4.2939150401836967e-07, "logits/chosen": -3.0407612323760986, "logits/rejected": -3.014266014099121, "logps/chosen": -271.2508850097656, "logps/rejected": -233.294189453125, "loss": 0.792, "rewards/accuracies": 0.71875, "rewards/chosen": 0.02551809512078762, "rewards/margins": 0.21874013543128967, "rewards/rejected": -0.1932220160961151, "step": 660 }, { "epoch": 0.69, "learning_rate": 4.2747799464217373e-07, "logits/chosen": -2.994800567626953, "logits/rejected": -2.9698376655578613, "logps/chosen": -287.5726623535156, "logps/rejected": -237.8695068359375, "loss": 0.8042, "rewards/accuracies": 0.71875, "rewards/chosen": 0.006596171762794256, "rewards/margins": 0.18577079474925995, "rewards/rejected": -0.17917463183403015, "step": 670 }, { "epoch": 0.7, "learning_rate": 4.255644852659778e-07, "logits/chosen": -3.0441243648529053, "logits/rejected": -3.053039073944092, "logps/chosen": -281.0970153808594, "logps/rejected": -227.56851196289062, "loss": 0.8389, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.022974971681833267, "rewards/margins": 0.15209509432315826, "rewards/rejected": -0.1291201412677765, "step": 680 }, { "epoch": 0.71, "learning_rate": 4.236509758897818e-07, "logits/chosen": -3.0892813205718994, "logits/rejected": -3.0816287994384766, "logps/chosen": -258.57781982421875, "logps/rejected": -230.27615356445312, "loss": 0.8064, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": 0.006377282552421093, "rewards/margins": 0.21148601174354553, "rewards/rejected": -0.20510873198509216, "step": 690 }, { "epoch": 0.72, "learning_rate": 4.2173746651358586e-07, "logits/chosen": -3.0129265785217285, "logits/rejected": -2.9877090454101562, "logps/chosen": -275.82568359375, "logps/rejected": -219.6047821044922, "loss": 0.8098, "rewards/accuracies": 0.668749988079071, "rewards/chosen": 0.04388806223869324, "rewards/margins": 0.2091568410396576, "rewards/rejected": -0.16526879370212555, "step": 700 }, { "epoch": 0.73, "learning_rate": 4.198239571373899e-07, "logits/chosen": -2.9762985706329346, "logits/rejected": -2.986323833465576, "logps/chosen": -262.74371337890625, "logps/rejected": -207.13418579101562, "loss": 0.8056, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.02364834025502205, "rewards/margins": 0.2192881554365158, "rewards/rejected": -0.19563981890678406, "step": 710 }, { "epoch": 0.74, "learning_rate": 4.17910447761194e-07, "logits/chosen": -3.0574042797088623, "logits/rejected": -3.0168094635009766, "logps/chosen": -296.2746276855469, "logps/rejected": -256.06854248046875, "loss": 0.8097, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.013486223295331001, "rewards/margins": 0.14934802055358887, "rewards/rejected": -0.13586178421974182, "step": 720 }, { "epoch": 0.75, "learning_rate": 4.1599693838499805e-07, "logits/chosen": -3.0353150367736816, "logits/rejected": -3.0224339962005615, "logps/chosen": -261.10992431640625, "logps/rejected": -251.72293090820312, "loss": 0.7899, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.007094231434166431, "rewards/margins": 0.16344769299030304, "rewards/rejected": -0.1705418974161148, "step": 730 }, { "epoch": 0.76, "learning_rate": 4.140834290088021e-07, "logits/chosen": -3.008420944213867, "logits/rejected": -2.9702980518341064, "logps/chosen": -269.81903076171875, "logps/rejected": -199.1494903564453, "loss": 0.7627, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.01843448355793953, "rewards/margins": 0.2920437455177307, "rewards/rejected": -0.2736092209815979, "step": 740 }, { "epoch": 0.77, "learning_rate": 4.121699196326062e-07, "logits/chosen": -3.004692792892456, "logits/rejected": -2.9878294467926025, "logps/chosen": -262.6126708984375, "logps/rejected": -220.02096557617188, "loss": 0.7653, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.001659922068938613, "rewards/margins": 0.25197452306747437, "rewards/rejected": -0.2503146231174469, "step": 750 }, { "epoch": 0.78, "learning_rate": 4.1025641025641024e-07, "logits/chosen": -2.9879119396209717, "logits/rejected": -2.980886459350586, "logps/chosen": -262.13287353515625, "logps/rejected": -249.5322723388672, "loss": 0.782, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.036843255162239075, "rewards/margins": 0.17954595386981964, "rewards/rejected": -0.21638920903205872, "step": 760 }, { "epoch": 0.8, "learning_rate": 4.083429008802143e-07, "logits/chosen": -3.0227646827697754, "logits/rejected": -3.0109972953796387, "logps/chosen": -266.5917663574219, "logps/rejected": -237.83108520507812, "loss": 0.7486, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.03099486604332924, "rewards/margins": 0.23689258098602295, "rewards/rejected": -0.20589768886566162, "step": 770 }, { "epoch": 0.81, "learning_rate": 4.0642939150401836e-07, "logits/chosen": -2.9976096153259277, "logits/rejected": -2.9851810932159424, "logps/chosen": -283.94677734375, "logps/rejected": -237.71841430664062, "loss": 0.7486, "rewards/accuracies": 0.75, "rewards/chosen": -0.020093750208616257, "rewards/margins": 0.27248382568359375, "rewards/rejected": -0.2925775945186615, "step": 780 }, { "epoch": 0.82, "learning_rate": 4.0451588212782237e-07, "logits/chosen": -3.031033754348755, "logits/rejected": -3.024602174758911, "logps/chosen": -261.4927978515625, "logps/rejected": -222.33651733398438, "loss": 0.7052, "rewards/accuracies": 0.706250011920929, "rewards/chosen": 0.016252126544713974, "rewards/margins": 0.25844720005989075, "rewards/rejected": -0.24219508469104767, "step": 790 }, { "epoch": 0.83, "learning_rate": 4.0260237275162643e-07, "logits/chosen": -3.017972946166992, "logits/rejected": -2.993112564086914, "logps/chosen": -277.76177978515625, "logps/rejected": -241.6676483154297, "loss": 0.765, "rewards/accuracies": 0.643750011920929, "rewards/chosen": 0.010549797676503658, "rewards/margins": 0.2188224494457245, "rewards/rejected": -0.20827265083789825, "step": 800 }, { "epoch": 0.84, "learning_rate": 4.006888633754305e-07, "logits/chosen": -3.0501556396484375, "logits/rejected": -3.039248466491699, "logps/chosen": -261.3684387207031, "logps/rejected": -231.1971893310547, "loss": 0.6973, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": 0.002077583223581314, "rewards/margins": 0.3389395475387573, "rewards/rejected": -0.3368619680404663, "step": 810 }, { "epoch": 0.85, "learning_rate": 3.9877535399923456e-07, "logits/chosen": -3.063708782196045, "logits/rejected": -3.0399699211120605, "logps/chosen": -294.5484924316406, "logps/rejected": -228.28854370117188, "loss": 0.7335, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.037904877215623856, "rewards/margins": 0.2777422368526459, "rewards/rejected": -0.2398373782634735, "step": 820 }, { "epoch": 0.86, "learning_rate": 3.968618446230386e-07, "logits/chosen": -3.044889211654663, "logits/rejected": -3.0049989223480225, "logps/chosen": -280.3213195800781, "logps/rejected": -223.97787475585938, "loss": 0.6679, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.06323406845331192, "rewards/margins": 0.37538376450538635, "rewards/rejected": -0.31214970350265503, "step": 830 }, { "epoch": 0.87, "learning_rate": 3.949483352468427e-07, "logits/chosen": -3.0053951740264893, "logits/rejected": -2.98051118850708, "logps/chosen": -252.00387573242188, "logps/rejected": -218.14602661132812, "loss": 0.7566, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.040188662707805634, "rewards/margins": 0.23952028155326843, "rewards/rejected": -0.2797089219093323, "step": 840 }, { "epoch": 0.88, "learning_rate": 3.9303482587064674e-07, "logits/chosen": -3.0364222526550293, "logits/rejected": -3.0096614360809326, "logps/chosen": -253.96923828125, "logps/rejected": -229.8144989013672, "loss": 0.7089, "rewards/accuracies": 0.75, "rewards/chosen": 0.007532055489718914, "rewards/margins": 0.29781442880630493, "rewards/rejected": -0.29028236865997314, "step": 850 }, { "epoch": 0.89, "learning_rate": 3.911213164944508e-07, "logits/chosen": -3.0369744300842285, "logits/rejected": -3.0353286266326904, "logps/chosen": -290.00042724609375, "logps/rejected": -273.43267822265625, "loss": 0.7019, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.018222318962216377, "rewards/margins": 0.31897154450416565, "rewards/rejected": -0.3007492423057556, "step": 860 }, { "epoch": 0.9, "learning_rate": 3.8920780711825487e-07, "logits/chosen": -2.9672696590423584, "logits/rejected": -2.9645469188690186, "logps/chosen": -307.1138610839844, "logps/rejected": -237.83786010742188, "loss": 0.6937, "rewards/accuracies": 0.706250011920929, "rewards/chosen": 0.005494369193911552, "rewards/margins": 0.31811192631721497, "rewards/rejected": -0.31261754035949707, "step": 870 }, { "epoch": 0.91, "learning_rate": 3.8729429774205893e-07, "logits/chosen": -3.0339221954345703, "logits/rejected": -3.0139739513397217, "logps/chosen": -284.10772705078125, "logps/rejected": -243.3460235595703, "loss": 0.6766, "rewards/accuracies": 0.71875, "rewards/chosen": -0.01381886936724186, "rewards/margins": 0.38050729036331177, "rewards/rejected": -0.39432623982429504, "step": 880 }, { "epoch": 0.92, "learning_rate": 3.8538078836586294e-07, "logits/chosen": -3.0409202575683594, "logits/rejected": -3.017521381378174, "logps/chosen": -274.10675048828125, "logps/rejected": -235.25332641601562, "loss": 0.7214, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.045437611639499664, "rewards/margins": 0.3723521828651428, "rewards/rejected": -0.32691454887390137, "step": 890 }, { "epoch": 0.93, "learning_rate": 3.83467278989667e-07, "logits/chosen": -3.0040934085845947, "logits/rejected": -2.9863858222961426, "logps/chosen": -264.234130859375, "logps/rejected": -212.1512908935547, "loss": 0.6845, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": 0.005959497299045324, "rewards/margins": 0.3572615683078766, "rewards/rejected": -0.3513020873069763, "step": 900 }, { "epoch": 0.94, "learning_rate": 3.8155376961347106e-07, "logits/chosen": -3.0645086765289307, "logits/rejected": -3.007006883621216, "logps/chosen": -230.1156005859375, "logps/rejected": -207.70779418945312, "loss": 0.6515, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.018168695271015167, "rewards/margins": 0.38268524408340454, "rewards/rejected": -0.4008539617061615, "step": 910 }, { "epoch": 0.95, "learning_rate": 3.796402602372751e-07, "logits/chosen": -3.0344386100769043, "logits/rejected": -3.020028829574585, "logps/chosen": -273.54412841796875, "logps/rejected": -222.2449493408203, "loss": 0.6707, "rewards/accuracies": 0.6875, "rewards/chosen": -0.026546839624643326, "rewards/margins": 0.2530173659324646, "rewards/rejected": -0.27956423163414, "step": 920 }, { "epoch": 0.96, "learning_rate": 3.777267508610792e-07, "logits/chosen": -2.9949254989624023, "logits/rejected": -3.0048325061798096, "logps/chosen": -247.9457244873047, "logps/rejected": -203.7804412841797, "loss": 0.6833, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.05914358049631119, "rewards/margins": 0.3101976811885834, "rewards/rejected": -0.3693412244319916, "step": 930 }, { "epoch": 0.97, "learning_rate": 3.7581324148488325e-07, "logits/chosen": -3.0347888469696045, "logits/rejected": -2.994046688079834, "logps/chosen": -263.569091796875, "logps/rejected": -231.2215118408203, "loss": 0.6597, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.021738069131970406, "rewards/margins": 0.3827739953994751, "rewards/rejected": -0.36103588342666626, "step": 940 }, { "epoch": 0.98, "learning_rate": 3.738997321086873e-07, "logits/chosen": -3.0062592029571533, "logits/rejected": -2.993868589401245, "logps/chosen": -276.2633056640625, "logps/rejected": -232.21493530273438, "loss": 0.6677, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.03908178210258484, "rewards/margins": 0.2943773865699768, "rewards/rejected": -0.33345913887023926, "step": 950 }, { "epoch": 0.99, "learning_rate": 3.7198622273249137e-07, "logits/chosen": -2.9820735454559326, "logits/rejected": -3.0040230751037598, "logps/chosen": -275.2674865722656, "logps/rejected": -240.7872314453125, "loss": 0.6137, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": 0.020224738866090775, "rewards/margins": 0.3282891809940338, "rewards/rejected": -0.30806440114974976, "step": 960 }, { "epoch": 1.0, "eval_logits/chosen": -3.0153579711914062, "eval_logits/rejected": -2.9988856315612793, "eval_logps/chosen": -271.4433288574219, "eval_logps/rejected": -232.6822967529297, "eval_loss": 0.6276752948760986, "eval_rewards/accuracies": 0.7039999961853027, "eval_rewards/chosen": -0.028680188581347466, "eval_rewards/margins": 0.3904646337032318, "eval_rewards/rejected": -0.4191448390483856, "eval_runtime": 449.0184, "eval_samples_per_second": 4.454, "eval_steps_per_second": 0.278, "step": 968 }, { "epoch": 1.0, "learning_rate": 3.7007271335629544e-07, "logits/chosen": -3.0252368450164795, "logits/rejected": -3.0065531730651855, "logps/chosen": -275.3318176269531, "logps/rejected": -244.02072143554688, "loss": 0.6284, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.04684365913271904, "rewards/margins": 0.3803178668022156, "rewards/rejected": -0.42716145515441895, "step": 970 }, { "epoch": 1.01, "learning_rate": 3.681592039800995e-07, "logits/chosen": -3.0262959003448486, "logits/rejected": -3.0287508964538574, "logps/chosen": -271.2191162109375, "logps/rejected": -233.7063446044922, "loss": 0.6202, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.028351956978440285, "rewards/margins": 0.39485400915145874, "rewards/rejected": -0.4232059419155121, "step": 980 }, { "epoch": 1.02, "learning_rate": 3.662456946039035e-07, "logits/chosen": -3.0115294456481934, "logits/rejected": -3.0146679878234863, "logps/chosen": -251.9388885498047, "logps/rejected": -229.016357421875, "loss": 0.6954, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.07080521434545517, "rewards/margins": 0.35120025277137756, "rewards/rejected": -0.42200547456741333, "step": 990 }, { "epoch": 1.03, "learning_rate": 3.6433218522770757e-07, "logits/chosen": -3.007986068725586, "logits/rejected": -2.9926083087921143, "logps/chosen": -262.0426940917969, "logps/rejected": -256.99188232421875, "loss": 0.6533, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.070511594414711, "rewards/margins": 0.3111626207828522, "rewards/rejected": -0.381674200296402, "step": 1000 }, { "epoch": 1.04, "learning_rate": 3.6241867585151163e-07, "logits/chosen": -2.9853007793426514, "logits/rejected": -3.0082547664642334, "logps/chosen": -270.3041076660156, "logps/rejected": -236.4681854248047, "loss": 0.6359, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.022838518023490906, "rewards/margins": 0.3421871066093445, "rewards/rejected": -0.3650256097316742, "step": 1010 }, { "epoch": 1.05, "learning_rate": 3.605051664753157e-07, "logits/chosen": -2.9447665214538574, "logits/rejected": -2.933103084564209, "logps/chosen": -266.8084411621094, "logps/rejected": -215.50308227539062, "loss": 0.538, "rewards/accuracies": 0.75, "rewards/chosen": -0.04661910608410835, "rewards/margins": 0.4469257891178131, "rewards/rejected": -0.49354487657546997, "step": 1020 }, { "epoch": 1.06, "learning_rate": 3.5859165709911975e-07, "logits/chosen": -3.0248591899871826, "logits/rejected": -2.98162579536438, "logps/chosen": -277.4466552734375, "logps/rejected": -250.49893188476562, "loss": 0.5931, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.004877015482634306, "rewards/margins": 0.4228130280971527, "rewards/rejected": -0.4276900887489319, "step": 1030 }, { "epoch": 1.07, "learning_rate": 3.566781477229238e-07, "logits/chosen": -3.0339090824127197, "logits/rejected": -2.9957573413848877, "logps/chosen": -275.76910400390625, "logps/rejected": -235.79830932617188, "loss": 0.6007, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08596549183130264, "rewards/margins": 0.29382139444351196, "rewards/rejected": -0.3797869086265564, "step": 1040 }, { "epoch": 1.08, "learning_rate": 3.547646383467279e-07, "logits/chosen": -2.9797756671905518, "logits/rejected": -2.9523847103118896, "logps/chosen": -284.53839111328125, "logps/rejected": -214.0476531982422, "loss": 0.5431, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07950444519519806, "rewards/margins": 0.4745730459690094, "rewards/rejected": -0.5540775060653687, "step": 1050 }, { "epoch": 1.09, "learning_rate": 3.5285112897053194e-07, "logits/chosen": -2.946882963180542, "logits/rejected": -2.9203898906707764, "logps/chosen": -257.57611083984375, "logps/rejected": -244.93075561523438, "loss": 0.5401, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.020254051312804222, "rewards/margins": 0.4997115135192871, "rewards/rejected": -0.5199655294418335, "step": 1060 }, { "epoch": 1.1, "learning_rate": 3.50937619594336e-07, "logits/chosen": -2.991321563720703, "logits/rejected": -3.0015194416046143, "logps/chosen": -279.9203186035156, "logps/rejected": -243.14889526367188, "loss": 0.5657, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08546855300664902, "rewards/margins": 0.4243212342262268, "rewards/rejected": -0.5097898244857788, "step": 1070 }, { "epoch": 1.12, "learning_rate": 3.4902411021814007e-07, "logits/chosen": -3.034787893295288, "logits/rejected": -2.9858031272888184, "logps/chosen": -259.6640319824219, "logps/rejected": -238.2606201171875, "loss": 0.5362, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.11487703025341034, "rewards/margins": 0.41897639632225037, "rewards/rejected": -0.5338534116744995, "step": 1080 }, { "epoch": 1.13, "learning_rate": 3.4711060084194413e-07, "logits/chosen": -2.951054096221924, "logits/rejected": -2.953270673751831, "logps/chosen": -288.8511962890625, "logps/rejected": -241.43490600585938, "loss": 0.5902, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.041309602558612823, "rewards/margins": 0.47613269090652466, "rewards/rejected": -0.5174422860145569, "step": 1090 }, { "epoch": 1.14, "learning_rate": 3.4519709146574814e-07, "logits/chosen": -3.0297837257385254, "logits/rejected": -2.9690465927124023, "logps/chosen": -249.8524932861328, "logps/rejected": -211.01486206054688, "loss": 0.579, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.15785838663578033, "rewards/margins": 0.3855217397212982, "rewards/rejected": -0.5433801412582397, "step": 1100 }, { "epoch": 1.15, "learning_rate": 3.432835820895522e-07, "logits/chosen": -3.023965358734131, "logits/rejected": -3.012373685836792, "logps/chosen": -296.8121643066406, "logps/rejected": -269.64410400390625, "loss": 0.576, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.013493712060153484, "rewards/margins": 0.45747965574264526, "rewards/rejected": -0.47097334265708923, "step": 1110 }, { "epoch": 1.16, "learning_rate": 3.4137007271335626e-07, "logits/chosen": -2.9927139282226562, "logits/rejected": -2.995068073272705, "logps/chosen": -238.1875, "logps/rejected": -238.9112548828125, "loss": 0.5405, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07948704063892365, "rewards/margins": 0.4464842677116394, "rewards/rejected": -0.5259712934494019, "step": 1120 }, { "epoch": 1.17, "learning_rate": 3.394565633371603e-07, "logits/chosen": -3.00760555267334, "logits/rejected": -2.992061138153076, "logps/chosen": -304.2665100097656, "logps/rejected": -251.5317840576172, "loss": 0.5381, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.1150180920958519, "rewards/margins": 0.46640753746032715, "rewards/rejected": -0.581425666809082, "step": 1130 }, { "epoch": 1.18, "learning_rate": 3.375430539609644e-07, "logits/chosen": -3.0006496906280518, "logits/rejected": -3.0168707370758057, "logps/chosen": -260.1852111816406, "logps/rejected": -252.5244598388672, "loss": 0.4865, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.16791771352291107, "rewards/margins": 0.4443301260471344, "rewards/rejected": -0.6122478246688843, "step": 1140 }, { "epoch": 1.19, "learning_rate": 3.3562954458476845e-07, "logits/chosen": -3.023045063018799, "logits/rejected": -2.9913861751556396, "logps/chosen": -295.21502685546875, "logps/rejected": -226.15426635742188, "loss": 0.5825, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.09654710441827774, "rewards/margins": 0.504024088382721, "rewards/rejected": -0.6005711555480957, "step": 1150 }, { "epoch": 1.2, "learning_rate": 3.337160352085725e-07, "logits/chosen": -2.9731438159942627, "logits/rejected": -2.9854893684387207, "logps/chosen": -260.6565856933594, "logps/rejected": -235.22476196289062, "loss": 0.5259, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.0705583468079567, "rewards/margins": 0.5393149852752686, "rewards/rejected": -0.6098732948303223, "step": 1160 }, { "epoch": 1.21, "learning_rate": 3.3180252583237657e-07, "logits/chosen": -3.015777111053467, "logits/rejected": -2.9891409873962402, "logps/chosen": -270.416748046875, "logps/rejected": -232.32089233398438, "loss": 0.5407, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.13838811218738556, "rewards/margins": 0.4489242136478424, "rewards/rejected": -0.5873123407363892, "step": 1170 }, { "epoch": 1.22, "learning_rate": 3.2988901645618063e-07, "logits/chosen": -2.980250597000122, "logits/rejected": -2.9568262100219727, "logps/chosen": -257.48504638671875, "logps/rejected": -231.20101928710938, "loss": 0.5238, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.26241081953048706, "rewards/margins": 0.4477602541446686, "rewards/rejected": -0.7101710438728333, "step": 1180 }, { "epoch": 1.23, "learning_rate": 3.279755070799847e-07, "logits/chosen": -2.987285614013672, "logits/rejected": -2.9756875038146973, "logps/chosen": -250.1853790283203, "logps/rejected": -234.43313598632812, "loss": 0.4216, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.1883806735277176, "rewards/margins": 0.47481974959373474, "rewards/rejected": -0.6632004380226135, "step": 1190 }, { "epoch": 1.24, "learning_rate": 3.260619977037887e-07, "logits/chosen": -2.9940743446350098, "logits/rejected": -2.9866111278533936, "logps/chosen": -261.4402770996094, "logps/rejected": -236.15670776367188, "loss": 0.4046, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.11559490859508514, "rewards/margins": 0.5072110891342163, "rewards/rejected": -0.6228059530258179, "step": 1200 }, { "epoch": 1.25, "learning_rate": 3.2414848832759277e-07, "logits/chosen": -3.0217831134796143, "logits/rejected": -2.9786057472229004, "logps/chosen": -263.88592529296875, "logps/rejected": -227.23828125, "loss": 0.4809, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.1470792442560196, "rewards/margins": 0.5500885844230652, "rewards/rejected": -0.6971677541732788, "step": 1210 }, { "epoch": 1.26, "learning_rate": 3.2223497895139683e-07, "logits/chosen": -2.983037233352661, "logits/rejected": -2.9867138862609863, "logps/chosen": -270.10089111328125, "logps/rejected": -262.2214660644531, "loss": 0.5297, "rewards/accuracies": 0.6875, "rewards/chosen": -0.19438357651233673, "rewards/margins": 0.3766476809978485, "rewards/rejected": -0.5710312128067017, "step": 1220 }, { "epoch": 1.27, "learning_rate": 3.203214695752009e-07, "logits/chosen": -2.923466920852661, "logits/rejected": -2.9358396530151367, "logps/chosen": -291.2686767578125, "logps/rejected": -251.558837890625, "loss": 0.5026, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.16642309725284576, "rewards/margins": 0.3848158121109009, "rewards/rejected": -0.5512388944625854, "step": 1230 }, { "epoch": 1.28, "learning_rate": 3.1840796019900495e-07, "logits/chosen": -3.007678508758545, "logits/rejected": -2.988664388656616, "logps/chosen": -291.9914245605469, "logps/rejected": -263.6539611816406, "loss": 0.4669, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.1000869870185852, "rewards/margins": 0.597449004650116, "rewards/rejected": -0.6975361108779907, "step": 1240 }, { "epoch": 1.29, "learning_rate": 3.16494450822809e-07, "logits/chosen": -3.0168919563293457, "logits/rejected": -2.987907886505127, "logps/chosen": -249.50704956054688, "logps/rejected": -224.2857208251953, "loss": 0.4447, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.16220508515834808, "rewards/margins": 0.5248938798904419, "rewards/rejected": -0.6870989203453064, "step": 1250 }, { "epoch": 1.3, "learning_rate": 3.145809414466131e-07, "logits/chosen": -3.0371620655059814, "logits/rejected": -3.0139615535736084, "logps/chosen": -273.0102233886719, "logps/rejected": -234.0775909423828, "loss": 0.439, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.13673745095729828, "rewards/margins": 0.6153367161750793, "rewards/rejected": -0.752074122428894, "step": 1260 }, { "epoch": 1.31, "learning_rate": 3.1266743207041714e-07, "logits/chosen": -3.0206820964813232, "logits/rejected": -3.0111212730407715, "logps/chosen": -309.1382751464844, "logps/rejected": -276.2449645996094, "loss": 0.4547, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1778876632452011, "rewards/margins": 0.6206526756286621, "rewards/rejected": -0.7985404133796692, "step": 1270 }, { "epoch": 1.32, "learning_rate": 3.107539226942212e-07, "logits/chosen": -3.007878303527832, "logits/rejected": -3.0043411254882812, "logps/chosen": -238.5860137939453, "logps/rejected": -227.81234741210938, "loss": 0.5382, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.1700361967086792, "rewards/margins": 0.43853870034217834, "rewards/rejected": -0.6085748672485352, "step": 1280 }, { "epoch": 1.33, "learning_rate": 3.0884041331802526e-07, "logits/chosen": -2.970362901687622, "logits/rejected": -2.969972848892212, "logps/chosen": -257.76812744140625, "logps/rejected": -230.56820678710938, "loss": 0.4622, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.14981916546821594, "rewards/margins": 0.5685046315193176, "rewards/rejected": -0.718323826789856, "step": 1290 }, { "epoch": 1.34, "learning_rate": 3.0692690394182927e-07, "logits/chosen": -3.0062780380249023, "logits/rejected": -2.9634342193603516, "logps/chosen": -278.4779968261719, "logps/rejected": -237.66873168945312, "loss": 0.3758, "rewards/accuracies": 0.71875, "rewards/chosen": -0.20812156796455383, "rewards/margins": 0.5646450519561768, "rewards/rejected": -0.7727665305137634, "step": 1300 }, { "epoch": 1.35, "learning_rate": 3.0501339456563334e-07, "logits/chosen": -2.974381923675537, "logits/rejected": -2.9355669021606445, "logps/chosen": -286.21453857421875, "logps/rejected": -236.1365203857422, "loss": 0.378, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.08342897891998291, "rewards/margins": 0.6408315896987915, "rewards/rejected": -0.7242605090141296, "step": 1310 }, { "epoch": 1.36, "learning_rate": 3.030998851894374e-07, "logits/chosen": -3.02203369140625, "logits/rejected": -3.0106561183929443, "logps/chosen": -244.7522430419922, "logps/rejected": -214.9333038330078, "loss": 0.3807, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.24016663432121277, "rewards/margins": 0.5892859697341919, "rewards/rejected": -0.829452633857727, "step": 1320 }, { "epoch": 1.37, "learning_rate": 3.0118637581324146e-07, "logits/chosen": -2.9896299839019775, "logits/rejected": -2.9215025901794434, "logps/chosen": -263.9308166503906, "logps/rejected": -236.4021453857422, "loss": 0.4104, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.14813081920146942, "rewards/margins": 0.6776358485221863, "rewards/rejected": -0.8257666826248169, "step": 1330 }, { "epoch": 1.38, "learning_rate": 2.992728664370455e-07, "logits/chosen": -3.0315442085266113, "logits/rejected": -3.035961866378784, "logps/chosen": -261.2752990722656, "logps/rejected": -235.47705078125, "loss": 0.3998, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.2596365213394165, "rewards/margins": 0.658948540687561, "rewards/rejected": -0.9185851812362671, "step": 1340 }, { "epoch": 1.39, "learning_rate": 2.973593570608496e-07, "logits/chosen": -3.0204250812530518, "logits/rejected": -3.015413761138916, "logps/chosen": -246.57290649414062, "logps/rejected": -231.91238403320312, "loss": 0.4193, "rewards/accuracies": 0.65625, "rewards/chosen": -0.3196146488189697, "rewards/margins": 0.4760715365409851, "rewards/rejected": -0.7956861257553101, "step": 1350 }, { "epoch": 1.4, "learning_rate": 2.9544584768465365e-07, "logits/chosen": -3.016695022583008, "logits/rejected": -2.9959843158721924, "logps/chosen": -275.1457824707031, "logps/rejected": -220.0662078857422, "loss": 0.4373, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.33876991271972656, "rewards/margins": 0.457784503698349, "rewards/rejected": -0.796554446220398, "step": 1360 }, { "epoch": 1.41, "learning_rate": 2.935323383084577e-07, "logits/chosen": -3.059567451477051, "logits/rejected": -2.9867048263549805, "logps/chosen": -287.88153076171875, "logps/rejected": -235.1467742919922, "loss": 0.3824, "rewards/accuracies": 0.71875, "rewards/chosen": -0.264303594827652, "rewards/margins": 0.6922810673713684, "rewards/rejected": -0.956584632396698, "step": 1370 }, { "epoch": 1.42, "learning_rate": 2.9161882893226177e-07, "logits/chosen": -2.975642442703247, "logits/rejected": -2.9326682090759277, "logps/chosen": -251.0466766357422, "logps/rejected": -223.5084686279297, "loss": 0.4333, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.1487085521221161, "rewards/margins": 0.689045250415802, "rewards/rejected": -0.8377537727355957, "step": 1380 }, { "epoch": 1.44, "learning_rate": 2.8970531955606583e-07, "logits/chosen": -3.0104973316192627, "logits/rejected": -2.9991507530212402, "logps/chosen": -296.5973205566406, "logps/rejected": -226.55111694335938, "loss": 0.3786, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.20407001674175262, "rewards/margins": 0.6605352163314819, "rewards/rejected": -0.8646053075790405, "step": 1390 }, { "epoch": 1.45, "learning_rate": 2.8779181017986984e-07, "logits/chosen": -2.9817981719970703, "logits/rejected": -2.988542079925537, "logps/chosen": -285.8086853027344, "logps/rejected": -253.98934936523438, "loss": 0.4188, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.25984063744544983, "rewards/margins": 0.5409084558486938, "rewards/rejected": -0.8007491230964661, "step": 1400 }, { "epoch": 1.46, "learning_rate": 2.858783008036739e-07, "logits/chosen": -2.99088978767395, "logits/rejected": -2.9880213737487793, "logps/chosen": -269.12982177734375, "logps/rejected": -245.9726104736328, "loss": 0.3797, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.3341473937034607, "rewards/margins": 0.6310809254646301, "rewards/rejected": -0.9652281999588013, "step": 1410 }, { "epoch": 1.47, "learning_rate": 2.8396479142747797e-07, "logits/chosen": -3.031111717224121, "logits/rejected": -3.011475086212158, "logps/chosen": -265.3778381347656, "logps/rejected": -236.4595947265625, "loss": 0.3353, "rewards/accuracies": 0.75, "rewards/chosen": -0.23791718482971191, "rewards/margins": 0.7006685733795166, "rewards/rejected": -0.938585638999939, "step": 1420 }, { "epoch": 1.48, "learning_rate": 2.8205128205128203e-07, "logits/chosen": -2.972764492034912, "logits/rejected": -2.9663169384002686, "logps/chosen": -261.602294921875, "logps/rejected": -261.4017639160156, "loss": 0.3131, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.2686450779438019, "rewards/margins": 0.671106219291687, "rewards/rejected": -0.9397512674331665, "step": 1430 }, { "epoch": 1.49, "learning_rate": 2.801377726750861e-07, "logits/chosen": -3.018611192703247, "logits/rejected": -2.9920201301574707, "logps/chosen": -253.02285766601562, "logps/rejected": -222.1746826171875, "loss": 0.3765, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.35335665941238403, "rewards/margins": 0.5318818092346191, "rewards/rejected": -0.8852384686470032, "step": 1440 }, { "epoch": 1.5, "learning_rate": 2.7822426329889015e-07, "logits/chosen": -2.9993600845336914, "logits/rejected": -2.953437328338623, "logps/chosen": -272.62347412109375, "logps/rejected": -223.8088836669922, "loss": 0.3979, "rewards/accuracies": 0.6875, "rewards/chosen": -0.3333335518836975, "rewards/margins": 0.6397331953048706, "rewards/rejected": -0.9730666875839233, "step": 1450 }, { "epoch": 1.51, "learning_rate": 2.763107539226942e-07, "logits/chosen": -3.0080342292785645, "logits/rejected": -2.9877243041992188, "logps/chosen": -285.1849670410156, "logps/rejected": -218.37954711914062, "loss": 0.3865, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.35278716683387756, "rewards/margins": 0.6462265253067017, "rewards/rejected": -0.9990137815475464, "step": 1460 }, { "epoch": 1.52, "learning_rate": 2.743972445464983e-07, "logits/chosen": -2.989492177963257, "logits/rejected": -2.9886727333068848, "logps/chosen": -276.1953430175781, "logps/rejected": -228.0172882080078, "loss": 0.4094, "rewards/accuracies": 0.65625, "rewards/chosen": -0.3594445586204529, "rewards/margins": 0.6459834575653076, "rewards/rejected": -1.0054280757904053, "step": 1470 }, { "epoch": 1.53, "learning_rate": 2.7248373517030234e-07, "logits/chosen": -2.9692909717559814, "logits/rejected": -2.978031635284424, "logps/chosen": -260.3178405761719, "logps/rejected": -236.1215362548828, "loss": 0.3159, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.33140963315963745, "rewards/margins": 0.5777403116226196, "rewards/rejected": -0.9091499447822571, "step": 1480 }, { "epoch": 1.54, "learning_rate": 2.705702257941064e-07, "logits/chosen": -2.9462051391601562, "logits/rejected": -2.9286155700683594, "logps/chosen": -289.8208923339844, "logps/rejected": -247.3198699951172, "loss": 0.3091, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3897753953933716, "rewards/margins": 0.5884745717048645, "rewards/rejected": -0.978249728679657, "step": 1490 }, { "epoch": 1.55, "learning_rate": 2.686567164179104e-07, "logits/chosen": -3.018886089324951, "logits/rejected": -2.9545936584472656, "logps/chosen": -265.15240478515625, "logps/rejected": -227.3736114501953, "loss": 0.3156, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.2435026615858078, "rewards/margins": 0.8812161684036255, "rewards/rejected": -1.1247189044952393, "step": 1500 }, { "epoch": 1.56, "learning_rate": 2.6674320704171447e-07, "logits/chosen": -3.019289016723633, "logits/rejected": -2.9821600914001465, "logps/chosen": -288.81646728515625, "logps/rejected": -226.24755859375, "loss": 0.262, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.3103589713573456, "rewards/margins": 0.6744868159294128, "rewards/rejected": -0.9848458170890808, "step": 1510 }, { "epoch": 1.57, "learning_rate": 2.6482969766551853e-07, "logits/chosen": -2.918996572494507, "logits/rejected": -2.9260575771331787, "logps/chosen": -229.50146484375, "logps/rejected": -233.02432250976562, "loss": 0.3571, "rewards/accuracies": 0.6875, "rewards/chosen": -0.3969925045967102, "rewards/margins": 0.5756487250328064, "rewards/rejected": -0.9726413488388062, "step": 1520 }, { "epoch": 1.58, "learning_rate": 2.629161882893226e-07, "logits/chosen": -3.0124337673187256, "logits/rejected": -2.9816842079162598, "logps/chosen": -272.998779296875, "logps/rejected": -232.07638549804688, "loss": 0.2496, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.3978312909603119, "rewards/margins": 0.6203471422195435, "rewards/rejected": -1.0181784629821777, "step": 1530 }, { "epoch": 1.59, "learning_rate": 2.6100267891312666e-07, "logits/chosen": -3.0306756496429443, "logits/rejected": -3.016284465789795, "logps/chosen": -293.61407470703125, "logps/rejected": -241.7076873779297, "loss": 0.2565, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.3279929757118225, "rewards/margins": 0.6071383953094482, "rewards/rejected": -0.9351313710212708, "step": 1540 }, { "epoch": 1.6, "learning_rate": 2.590891695369307e-07, "logits/chosen": -2.9170010089874268, "logits/rejected": -2.904465436935425, "logps/chosen": -251.91354370117188, "logps/rejected": -234.70751953125, "loss": 0.2908, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.33537206053733826, "rewards/margins": 0.6658948063850403, "rewards/rejected": -1.0012669563293457, "step": 1550 }, { "epoch": 1.61, "learning_rate": 2.571756601607348e-07, "logits/chosen": -3.0143392086029053, "logits/rejected": -2.9897260665893555, "logps/chosen": -301.2428894042969, "logps/rejected": -244.96145629882812, "loss": 0.2721, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3570322096347809, "rewards/margins": 0.7769542336463928, "rewards/rejected": -1.133986473083496, "step": 1560 }, { "epoch": 1.62, "learning_rate": 2.5526215078453884e-07, "logits/chosen": -3.022491216659546, "logits/rejected": -2.9913883209228516, "logps/chosen": -284.7639465332031, "logps/rejected": -248.6448211669922, "loss": 0.2595, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.3536800742149353, "rewards/margins": 0.7084030508995056, "rewards/rejected": -1.0620832443237305, "step": 1570 }, { "epoch": 1.63, "learning_rate": 2.533486414083429e-07, "logits/chosen": -3.0248942375183105, "logits/rejected": -2.9973325729370117, "logps/chosen": -284.40472412109375, "logps/rejected": -241.6700897216797, "loss": 0.1589, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.3532782196998596, "rewards/margins": 0.7998963594436646, "rewards/rejected": -1.153174638748169, "step": 1580 }, { "epoch": 1.64, "learning_rate": 2.5143513203214697e-07, "logits/chosen": -2.9913268089294434, "logits/rejected": -2.971450090408325, "logps/chosen": -319.8451232910156, "logps/rejected": -254.3859100341797, "loss": 0.3029, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.31969529390335083, "rewards/margins": 0.671107292175293, "rewards/rejected": -0.9908025860786438, "step": 1590 }, { "epoch": 1.65, "learning_rate": 2.49521622655951e-07, "logits/chosen": -2.959479331970215, "logits/rejected": -2.908963441848755, "logps/chosen": -272.8207092285156, "logps/rejected": -235.93020629882812, "loss": 0.2796, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.34670349955558777, "rewards/margins": 0.7621387243270874, "rewards/rejected": -1.108842134475708, "step": 1600 }, { "epoch": 1.66, "learning_rate": 2.4760811327975504e-07, "logits/chosen": -2.9981067180633545, "logits/rejected": -2.9972851276397705, "logps/chosen": -278.3575744628906, "logps/rejected": -239.57223510742188, "loss": 0.2067, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.5294784307479858, "rewards/margins": 0.6004038453102112, "rewards/rejected": -1.1298822164535522, "step": 1610 }, { "epoch": 1.67, "learning_rate": 2.456946039035591e-07, "logits/chosen": -3.030169725418091, "logits/rejected": -2.9914770126342773, "logps/chosen": -289.63116455078125, "logps/rejected": -249.74649047851562, "loss": 0.3592, "rewards/accuracies": 0.6875, "rewards/chosen": -0.44787079095840454, "rewards/margins": 0.63862544298172, "rewards/rejected": -1.086496114730835, "step": 1620 }, { "epoch": 1.68, "learning_rate": 2.4378109452736316e-07, "logits/chosen": -2.9960074424743652, "logits/rejected": -2.980121374130249, "logps/chosen": -267.38446044921875, "logps/rejected": -244.11593627929688, "loss": 0.2287, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.34468650817871094, "rewards/margins": 0.8173492550849915, "rewards/rejected": -1.1620357036590576, "step": 1630 }, { "epoch": 1.69, "learning_rate": 2.418675851511672e-07, "logits/chosen": -2.913470506668091, "logits/rejected": -2.90181040763855, "logps/chosen": -268.4359436035156, "logps/rejected": -238.85806274414062, "loss": 0.2227, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.43024927377700806, "rewards/margins": 0.583633303642273, "rewards/rejected": -1.0138825178146362, "step": 1640 }, { "epoch": 1.7, "learning_rate": 2.399540757749713e-07, "logits/chosen": -2.9995226860046387, "logits/rejected": -2.971139430999756, "logps/chosen": -276.39031982421875, "logps/rejected": -242.7020263671875, "loss": 0.2584, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.5093586444854736, "rewards/margins": 0.657143771648407, "rewards/rejected": -1.1665024757385254, "step": 1650 }, { "epoch": 1.71, "learning_rate": 2.3804056639877535e-07, "logits/chosen": -2.95731258392334, "logits/rejected": -2.9587581157684326, "logps/chosen": -248.0506134033203, "logps/rejected": -226.7265625, "loss": 0.2349, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.3466361165046692, "rewards/margins": 0.8473577499389648, "rewards/rejected": -1.1939939260482788, "step": 1660 }, { "epoch": 1.72, "learning_rate": 2.361270570225794e-07, "logits/chosen": -3.0136914253234863, "logits/rejected": -2.9863269329071045, "logps/chosen": -285.3213195800781, "logps/rejected": -233.94619750976562, "loss": 0.1567, "rewards/accuracies": 0.71875, "rewards/chosen": -0.3250366151332855, "rewards/margins": 0.8868409991264343, "rewards/rejected": -1.2118775844573975, "step": 1670 }, { "epoch": 1.73, "learning_rate": 2.3421354764638345e-07, "logits/chosen": -2.9790773391723633, "logits/rejected": -2.952543020248413, "logps/chosen": -276.2936706542969, "logps/rejected": -257.79718017578125, "loss": 0.1955, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.4911643862724304, "rewards/margins": 0.6523909568786621, "rewards/rejected": -1.1435552835464478, "step": 1680 }, { "epoch": 1.74, "learning_rate": 2.323000382701875e-07, "logits/chosen": -2.9914324283599854, "logits/rejected": -2.98734974861145, "logps/chosen": -322.0937805175781, "logps/rejected": -261.5361633300781, "loss": 0.1863, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.3622845411300659, "rewards/margins": 0.9477508664131165, "rewards/rejected": -1.3100353479385376, "step": 1690 }, { "epoch": 1.76, "learning_rate": 2.3038652889399157e-07, "logits/chosen": -2.9850406646728516, "logits/rejected": -2.972008228302002, "logps/chosen": -262.46197509765625, "logps/rejected": -237.19375610351562, "loss": 0.1809, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.53041011095047, "rewards/margins": 0.7410814762115479, "rewards/rejected": -1.2714916467666626, "step": 1700 }, { "epoch": 1.77, "learning_rate": 2.2847301951779563e-07, "logits/chosen": -2.983722448348999, "logits/rejected": -2.956228256225586, "logps/chosen": -272.92156982421875, "logps/rejected": -256.91107177734375, "loss": 0.1089, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.5018629431724548, "rewards/margins": 0.8895123600959778, "rewards/rejected": -1.391375184059143, "step": 1710 }, { "epoch": 1.78, "learning_rate": 2.265595101415997e-07, "logits/chosen": -2.9394657611846924, "logits/rejected": -2.92991304397583, "logps/chosen": -273.16815185546875, "logps/rejected": -262.08917236328125, "loss": 0.1644, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5341050624847412, "rewards/margins": 0.7611141800880432, "rewards/rejected": -1.2952191829681396, "step": 1720 }, { "epoch": 1.79, "learning_rate": 2.2464600076540373e-07, "logits/chosen": -2.946911334991455, "logits/rejected": -2.931849718093872, "logps/chosen": -308.5828552246094, "logps/rejected": -247.25338745117188, "loss": 0.1064, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.5544737577438354, "rewards/margins": 0.9120496511459351, "rewards/rejected": -1.46652352809906, "step": 1730 }, { "epoch": 1.8, "learning_rate": 2.227324913892078e-07, "logits/chosen": -2.951788902282715, "logits/rejected": -2.909658908843994, "logps/chosen": -295.8861389160156, "logps/rejected": -243.9182891845703, "loss": 0.0733, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.5971349477767944, "rewards/margins": 0.7489143013954163, "rewards/rejected": -1.3460490703582764, "step": 1740 }, { "epoch": 1.81, "learning_rate": 2.2081898201301186e-07, "logits/chosen": -3.023937702178955, "logits/rejected": -2.9923651218414307, "logps/chosen": -301.7521057128906, "logps/rejected": -257.3502197265625, "loss": 0.1576, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5676156282424927, "rewards/margins": 0.8253081440925598, "rewards/rejected": -1.3929237127304077, "step": 1750 }, { "epoch": 1.82, "learning_rate": 2.1890547263681592e-07, "logits/chosen": -2.979418992996216, "logits/rejected": -2.979773998260498, "logps/chosen": -278.7250061035156, "logps/rejected": -281.96417236328125, "loss": 0.1553, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5305044651031494, "rewards/margins": 0.8628344535827637, "rewards/rejected": -1.393338918685913, "step": 1760 }, { "epoch": 1.83, "learning_rate": 2.1699196326061998e-07, "logits/chosen": -2.93343448638916, "logits/rejected": -2.9477057456970215, "logps/chosen": -259.96685791015625, "logps/rejected": -260.68145751953125, "loss": 0.2389, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.5586804151535034, "rewards/margins": 0.8505626916885376, "rewards/rejected": -1.409243106842041, "step": 1770 }, { "epoch": 1.84, "learning_rate": 2.1507845388442402e-07, "logits/chosen": -3.001537799835205, "logits/rejected": -3.000216007232666, "logps/chosen": -253.8172607421875, "logps/rejected": -229.71261596679688, "loss": 0.117, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.6867496371269226, "rewards/margins": 0.839769184589386, "rewards/rejected": -1.5265188217163086, "step": 1780 }, { "epoch": 1.85, "learning_rate": 2.1316494450822808e-07, "logits/chosen": -2.9645118713378906, "logits/rejected": -2.921504497528076, "logps/chosen": -249.779296875, "logps/rejected": -217.2705078125, "loss": 0.187, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.6474353075027466, "rewards/margins": 0.7807954549789429, "rewards/rejected": -1.428230881690979, "step": 1790 }, { "epoch": 1.86, "learning_rate": 2.1125143513203214e-07, "logits/chosen": -2.995269298553467, "logits/rejected": -2.9834041595458984, "logps/chosen": -293.59716796875, "logps/rejected": -229.48092651367188, "loss": 0.0751, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.6189619302749634, "rewards/margins": 0.7833685874938965, "rewards/rejected": -1.4023306369781494, "step": 1800 }, { "epoch": 1.87, "learning_rate": 2.093379257558362e-07, "logits/chosen": -2.9907360076904297, "logits/rejected": -2.963219404220581, "logps/chosen": -274.832275390625, "logps/rejected": -237.86355590820312, "loss": 0.2011, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.7243114709854126, "rewards/margins": 0.5377382636070251, "rewards/rejected": -1.2620497941970825, "step": 1810 }, { "epoch": 1.88, "learning_rate": 2.0742441637964026e-07, "logits/chosen": -2.9230685234069824, "logits/rejected": -2.88169264793396, "logps/chosen": -309.228515625, "logps/rejected": -246.1265411376953, "loss": -0.039, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.5196012258529663, "rewards/margins": 1.0259116888046265, "rewards/rejected": -1.5455129146575928, "step": 1820 }, { "epoch": 1.89, "learning_rate": 2.055109070034443e-07, "logits/chosen": -2.9902000427246094, "logits/rejected": -2.9472696781158447, "logps/chosen": -300.9866027832031, "logps/rejected": -249.76412963867188, "loss": 0.1368, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.6899394989013672, "rewards/margins": 0.7042897343635559, "rewards/rejected": -1.3942292928695679, "step": 1830 }, { "epoch": 1.9, "learning_rate": 2.0359739762724836e-07, "logits/chosen": -3.0022895336151123, "logits/rejected": -2.973423719406128, "logps/chosen": -313.137939453125, "logps/rejected": -254.30709838867188, "loss": 0.0727, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.5544772744178772, "rewards/margins": 1.1537253856658936, "rewards/rejected": -1.7082027196884155, "step": 1840 }, { "epoch": 1.91, "learning_rate": 2.0168388825105242e-07, "logits/chosen": -2.976815700531006, "logits/rejected": -2.958930492401123, "logps/chosen": -285.1069641113281, "logps/rejected": -275.8247985839844, "loss": 0.046, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5782260894775391, "rewards/margins": 0.8343210220336914, "rewards/rejected": -1.41254723072052, "step": 1850 }, { "epoch": 1.92, "learning_rate": 1.997703788748565e-07, "logits/chosen": -2.997415065765381, "logits/rejected": -2.95662784576416, "logps/chosen": -302.427734375, "logps/rejected": -215.8485565185547, "loss": 0.0356, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.4519409239292145, "rewards/margins": 1.185298204421997, "rewards/rejected": -1.6372392177581787, "step": 1860 }, { "epoch": 1.93, "learning_rate": 1.9785686949866055e-07, "logits/chosen": -2.943821668624878, "logits/rejected": -2.9366466999053955, "logps/chosen": -267.79620361328125, "logps/rejected": -246.8135223388672, "loss": 0.158, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.6127734184265137, "rewards/margins": 0.7575459480285645, "rewards/rejected": -1.3703193664550781, "step": 1870 }, { "epoch": 1.94, "learning_rate": 1.9594336012246458e-07, "logits/chosen": -2.941462278366089, "logits/rejected": -2.9433281421661377, "logps/chosen": -253.16256713867188, "logps/rejected": -241.6164093017578, "loss": 0.0191, "rewards/accuracies": 0.75, "rewards/chosen": -0.5904272198677063, "rewards/margins": 0.8854333758354187, "rewards/rejected": -1.475860595703125, "step": 1880 }, { "epoch": 1.95, "learning_rate": 1.9402985074626865e-07, "logits/chosen": -2.990996837615967, "logits/rejected": -2.9396445751190186, "logps/chosen": -286.08404541015625, "logps/rejected": -246.9237823486328, "loss": 0.0619, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.7161077260971069, "rewards/margins": 0.9890392422676086, "rewards/rejected": -1.7051467895507812, "step": 1890 }, { "epoch": 1.96, "learning_rate": 1.921163413700727e-07, "logits/chosen": -2.9665896892547607, "logits/rejected": -2.985443353652954, "logps/chosen": -273.56964111328125, "logps/rejected": -255.6343536376953, "loss": 0.1713, "rewards/accuracies": 0.65625, "rewards/chosen": -0.7685881853103638, "rewards/margins": 0.8922961354255676, "rewards/rejected": -1.6608844995498657, "step": 1900 }, { "epoch": 1.97, "learning_rate": 1.9020283199387677e-07, "logits/chosen": -2.993872880935669, "logits/rejected": -2.9838452339172363, "logps/chosen": -268.5338134765625, "logps/rejected": -235.5106658935547, "loss": 0.1585, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.5740541219711304, "rewards/margins": 0.8445619344711304, "rewards/rejected": -1.4186161756515503, "step": 1910 }, { "epoch": 1.98, "learning_rate": 1.8828932261768083e-07, "logits/chosen": -2.918458938598633, "logits/rejected": -2.924969434738159, "logps/chosen": -275.4915466308594, "logps/rejected": -230.2731170654297, "loss": -0.0004, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.717719316482544, "rewards/margins": 1.1288950443267822, "rewards/rejected": -1.8466142416000366, "step": 1920 }, { "epoch": 1.99, "learning_rate": 1.8637581324148487e-07, "logits/chosen": -3.0128183364868164, "logits/rejected": -3.0055766105651855, "logps/chosen": -273.6071472167969, "logps/rejected": -248.68997192382812, "loss": 0.0705, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7629455327987671, "rewards/margins": 1.0152510404586792, "rewards/rejected": -1.7781963348388672, "step": 1930 }, { "epoch": 2.0, "eval_logits/chosen": -2.983034372329712, "eval_logits/rejected": -2.960925817489624, "eval_logps/chosen": -277.86474609375, "eval_logps/rejected": -245.16693115234375, "eval_loss": 0.05700839310884476, "eval_rewards/accuracies": 0.6959999799728394, "eval_rewards/chosen": -0.6708189845085144, "eval_rewards/margins": 0.9967920184135437, "eval_rewards/rejected": -1.6676111221313477, "eval_runtime": 448.2745, "eval_samples_per_second": 4.462, "eval_steps_per_second": 0.279, "step": 1937 }, { "epoch": 2.0, "learning_rate": 1.8446230386528893e-07, "logits/chosen": -2.99873423576355, "logits/rejected": -2.9760866165161133, "logps/chosen": -270.5379943847656, "logps/rejected": -233.87255859375, "loss": 0.104, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.6196664571762085, "rewards/margins": 0.8855097889900208, "rewards/rejected": -1.505176305770874, "step": 1940 }, { "epoch": 2.01, "learning_rate": 1.82548794489093e-07, "logits/chosen": -2.972491502761841, "logits/rejected": -2.9654781818389893, "logps/chosen": -272.2441711425781, "logps/rejected": -247.496337890625, "loss": 0.0695, "rewards/accuracies": 0.6875, "rewards/chosen": -0.8281705975532532, "rewards/margins": 0.8387929201126099, "rewards/rejected": -1.6669635772705078, "step": 1950 }, { "epoch": 2.02, "learning_rate": 1.8063528511289706e-07, "logits/chosen": -2.9022774696350098, "logits/rejected": -2.8986656665802, "logps/chosen": -272.8119201660156, "logps/rejected": -286.4425964355469, "loss": 0.0376, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.6966532468795776, "rewards/margins": 1.0244591236114502, "rewards/rejected": -1.7211124897003174, "step": 1960 }, { "epoch": 2.03, "learning_rate": 1.7872177573670112e-07, "logits/chosen": -3.0008738040924072, "logits/rejected": -2.9414420127868652, "logps/chosen": -269.3557434082031, "logps/rejected": -228.98892211914062, "loss": 0.1134, "rewards/accuracies": 0.75, "rewards/chosen": -0.6135867238044739, "rewards/margins": 1.0139942169189453, "rewards/rejected": -1.627581000328064, "step": 1970 }, { "epoch": 2.04, "learning_rate": 1.7680826636050515e-07, "logits/chosen": -2.97796893119812, "logits/rejected": -2.9451377391815186, "logps/chosen": -290.0522155761719, "logps/rejected": -247.7688446044922, "loss": 0.0676, "rewards/accuracies": 0.71875, "rewards/chosen": -0.7555999755859375, "rewards/margins": 0.919529139995575, "rewards/rejected": -1.6751289367675781, "step": 1980 }, { "epoch": 2.05, "learning_rate": 1.7489475698430921e-07, "logits/chosen": -2.9556636810302734, "logits/rejected": -2.9225146770477295, "logps/chosen": -281.0018615722656, "logps/rejected": -257.39471435546875, "loss": -0.0304, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7463997006416321, "rewards/margins": 0.9945909380912781, "rewards/rejected": -1.7409906387329102, "step": 1990 }, { "epoch": 2.07, "learning_rate": 1.7298124760811328e-07, "logits/chosen": -2.9892733097076416, "logits/rejected": -2.9609320163726807, "logps/chosen": -265.5694885253906, "logps/rejected": -227.8389434814453, "loss": 0.068, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.8352034687995911, "rewards/margins": 1.0397050380706787, "rewards/rejected": -1.874908685684204, "step": 2000 }, { "epoch": 2.08, "learning_rate": 1.7106773823191734e-07, "logits/chosen": -2.9671616554260254, "logits/rejected": -2.937986373901367, "logps/chosen": -281.4359436035156, "logps/rejected": -241.79080200195312, "loss": -0.0155, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.6781337857246399, "rewards/margins": 1.3058927059173584, "rewards/rejected": -1.984026312828064, "step": 2010 }, { "epoch": 2.09, "learning_rate": 1.691542288557214e-07, "logits/chosen": -2.9833037853240967, "logits/rejected": -2.943441152572632, "logps/chosen": -295.3065490722656, "logps/rejected": -253.66360473632812, "loss": 0.0664, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.7511407136917114, "rewards/margins": 0.9544817209243774, "rewards/rejected": -1.7056224346160889, "step": 2020 }, { "epoch": 2.1, "learning_rate": 1.6724071947952544e-07, "logits/chosen": -2.9694552421569824, "logits/rejected": -2.9318954944610596, "logps/chosen": -254.4512939453125, "logps/rejected": -235.69595336914062, "loss": 0.0632, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.7740479111671448, "rewards/margins": 0.9885379672050476, "rewards/rejected": -1.762585997581482, "step": 2030 }, { "epoch": 2.11, "learning_rate": 1.653272101033295e-07, "logits/chosen": -2.9927315711975098, "logits/rejected": -2.9617929458618164, "logps/chosen": -286.9222412109375, "logps/rejected": -262.54559326171875, "loss": -0.0387, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.7180510759353638, "rewards/margins": 1.0184663534164429, "rewards/rejected": -1.7365175485610962, "step": 2040 }, { "epoch": 2.12, "learning_rate": 1.6341370072713356e-07, "logits/chosen": -2.9783358573913574, "logits/rejected": -2.9684910774230957, "logps/chosen": -288.4617614746094, "logps/rejected": -269.187255859375, "loss": -0.0243, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.75580233335495, "rewards/margins": 0.9117294549942017, "rewards/rejected": -1.6675317287445068, "step": 2050 }, { "epoch": 2.13, "learning_rate": 1.6150019135093762e-07, "logits/chosen": -3.0292530059814453, "logits/rejected": -2.9775586128234863, "logps/chosen": -298.93585205078125, "logps/rejected": -247.0900115966797, "loss": -0.0146, "rewards/accuracies": 0.75, "rewards/chosen": -0.6352591514587402, "rewards/margins": 1.129183053970337, "rewards/rejected": -1.7644420862197876, "step": 2060 }, { "epoch": 2.14, "learning_rate": 1.5958668197474169e-07, "logits/chosen": -3.0193440914154053, "logits/rejected": -2.99593186378479, "logps/chosen": -263.82464599609375, "logps/rejected": -277.60418701171875, "loss": 0.0702, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.6851625442504883, "rewards/margins": 0.9985010027885437, "rewards/rejected": -1.6836636066436768, "step": 2070 }, { "epoch": 2.15, "learning_rate": 1.5767317259854572e-07, "logits/chosen": -3.0217273235321045, "logits/rejected": -2.970132350921631, "logps/chosen": -307.5477600097656, "logps/rejected": -248.69448852539062, "loss": -0.024, "rewards/accuracies": 0.71875, "rewards/chosen": -0.65143883228302, "rewards/margins": 1.1803841590881348, "rewards/rejected": -1.8318227529525757, "step": 2080 }, { "epoch": 2.16, "learning_rate": 1.5575966322234978e-07, "logits/chosen": -2.931065559387207, "logits/rejected": -2.9219722747802734, "logps/chosen": -276.0883483886719, "logps/rejected": -247.0487060546875, "loss": -0.052, "rewards/accuracies": 0.75, "rewards/chosen": -0.64451664686203, "rewards/margins": 1.1405279636383057, "rewards/rejected": -1.7850444316864014, "step": 2090 }, { "epoch": 2.17, "learning_rate": 1.5384615384615385e-07, "logits/chosen": -3.0177485942840576, "logits/rejected": -3.0004374980926514, "logps/chosen": -263.8681640625, "logps/rejected": -226.38119506835938, "loss": -0.047, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.8407020568847656, "rewards/margins": 1.1751278638839722, "rewards/rejected": -2.0158302783966064, "step": 2100 }, { "epoch": 2.18, "learning_rate": 1.519326444699579e-07, "logits/chosen": -2.9868111610412598, "logits/rejected": -2.961195468902588, "logps/chosen": -294.43402099609375, "logps/rejected": -237.2691650390625, "loss": -0.1127, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.678774356842041, "rewards/margins": 1.201111078262329, "rewards/rejected": -1.8798853158950806, "step": 2110 }, { "epoch": 2.19, "learning_rate": 1.5001913509376197e-07, "logits/chosen": -3.0007481575012207, "logits/rejected": -2.9801931381225586, "logps/chosen": -290.3331298828125, "logps/rejected": -263.9862976074219, "loss": -0.071, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.908436119556427, "rewards/margins": 0.9944915771484375, "rewards/rejected": -1.9029273986816406, "step": 2120 }, { "epoch": 2.2, "learning_rate": 1.4810562571756603e-07, "logits/chosen": -3.011329174041748, "logits/rejected": -2.980834484100342, "logps/chosen": -259.76031494140625, "logps/rejected": -239.0861053466797, "loss": 0.0556, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.774993360042572, "rewards/margins": 1.0526189804077148, "rewards/rejected": -1.8276125192642212, "step": 2130 }, { "epoch": 2.21, "learning_rate": 1.4619211634137007e-07, "logits/chosen": -2.9631924629211426, "logits/rejected": -2.954556703567505, "logps/chosen": -248.605224609375, "logps/rejected": -215.9401397705078, "loss": -0.0457, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.8637608289718628, "rewards/margins": 1.0648829936981201, "rewards/rejected": -1.9286441802978516, "step": 2140 }, { "epoch": 2.22, "learning_rate": 1.4427860696517413e-07, "logits/chosen": -2.950859308242798, "logits/rejected": -2.961940288543701, "logps/chosen": -328.16314697265625, "logps/rejected": -277.8406982421875, "loss": 0.0939, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7997218370437622, "rewards/margins": 1.051257848739624, "rewards/rejected": -1.8509798049926758, "step": 2150 }, { "epoch": 2.23, "learning_rate": 1.423650975889782e-07, "logits/chosen": -3.0021846294403076, "logits/rejected": -2.9544687271118164, "logps/chosen": -284.53570556640625, "logps/rejected": -254.57559204101562, "loss": -0.1516, "rewards/accuracies": 0.75, "rewards/chosen": -0.8068240880966187, "rewards/margins": 1.2634921073913574, "rewards/rejected": -2.0703163146972656, "step": 2160 }, { "epoch": 2.24, "learning_rate": 1.4045158821278225e-07, "logits/chosen": -2.968425750732422, "logits/rejected": -2.9926857948303223, "logps/chosen": -295.5940246582031, "logps/rejected": -265.8125, "loss": -0.1379, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.7207796573638916, "rewards/margins": 1.3109276294708252, "rewards/rejected": -2.0317070484161377, "step": 2170 }, { "epoch": 2.25, "learning_rate": 1.3853807883658632e-07, "logits/chosen": -3.0091452598571777, "logits/rejected": -2.9704108238220215, "logps/chosen": -298.40948486328125, "logps/rejected": -241.05337524414062, "loss": 0.1214, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.9363592863082886, "rewards/margins": 0.984167754650116, "rewards/rejected": -1.9205271005630493, "step": 2180 }, { "epoch": 2.26, "learning_rate": 1.3662456946039035e-07, "logits/chosen": -2.924776554107666, "logits/rejected": -2.920172691345215, "logps/chosen": -259.19580078125, "logps/rejected": -247.6489715576172, "loss": 0.0587, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.8932672739028931, "rewards/margins": 0.9966402053833008, "rewards/rejected": -1.8899074792861938, "step": 2190 }, { "epoch": 2.27, "learning_rate": 1.3471106008419441e-07, "logits/chosen": -2.9807791709899902, "logits/rejected": -2.9884274005889893, "logps/chosen": -244.3290252685547, "logps/rejected": -231.4929656982422, "loss": 0.0254, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.9376288652420044, "rewards/margins": 1.005837082862854, "rewards/rejected": -1.9434658288955688, "step": 2200 }, { "epoch": 2.28, "learning_rate": 1.3279755070799848e-07, "logits/chosen": -2.9636523723602295, "logits/rejected": -2.898196220397949, "logps/chosen": -302.7827453613281, "logps/rejected": -254.11367797851562, "loss": -0.0107, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.0612095594406128, "rewards/margins": 0.9029472470283508, "rewards/rejected": -1.9641568660736084, "step": 2210 }, { "epoch": 2.29, "learning_rate": 1.3088404133180254e-07, "logits/chosen": -2.962622880935669, "logits/rejected": -2.9335360527038574, "logps/chosen": -321.44342041015625, "logps/rejected": -250.2067108154297, "loss": -0.2537, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.8546531796455383, "rewards/margins": 1.267183780670166, "rewards/rejected": -2.1218371391296387, "step": 2220 }, { "epoch": 2.3, "learning_rate": 1.289705319556066e-07, "logits/chosen": -2.961509943008423, "logits/rejected": -2.923830509185791, "logps/chosen": -294.05279541015625, "logps/rejected": -256.3243103027344, "loss": -0.0748, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.8142738342285156, "rewards/margins": 1.3003737926483154, "rewards/rejected": -2.114647626876831, "step": 2230 }, { "epoch": 2.31, "learning_rate": 1.2705702257941064e-07, "logits/chosen": -2.9922921657562256, "logits/rejected": -2.9666550159454346, "logps/chosen": -281.14788818359375, "logps/rejected": -261.50445556640625, "loss": -0.103, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.889809787273407, "rewards/margins": 1.2682868242263794, "rewards/rejected": -2.1580967903137207, "step": 2240 }, { "epoch": 2.32, "learning_rate": 1.251435132032147e-07, "logits/chosen": -3.009636878967285, "logits/rejected": -2.9912712574005127, "logps/chosen": -262.1191101074219, "logps/rejected": -254.8567352294922, "loss": 0.0064, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.9876982569694519, "rewards/margins": 0.9436071515083313, "rewards/rejected": -1.9313055276870728, "step": 2250 }, { "epoch": 2.33, "learning_rate": 1.2323000382701873e-07, "logits/chosen": -3.0256762504577637, "logits/rejected": -3.006740093231201, "logps/chosen": -309.2908630371094, "logps/rejected": -252.59323120117188, "loss": 0.0352, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.9725021123886108, "rewards/margins": 1.1422688961029053, "rewards/rejected": -2.1147711277008057, "step": 2260 }, { "epoch": 2.34, "learning_rate": 1.213164944508228e-07, "logits/chosen": -2.9900639057159424, "logits/rejected": -2.9898974895477295, "logps/chosen": -299.51177978515625, "logps/rejected": -267.8308410644531, "loss": -0.1218, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.0168074369430542, "rewards/margins": 1.148054838180542, "rewards/rejected": -2.1648621559143066, "step": 2270 }, { "epoch": 2.35, "learning_rate": 1.1940298507462686e-07, "logits/chosen": -2.986194610595703, "logits/rejected": -2.9787726402282715, "logps/chosen": -285.0082702636719, "logps/rejected": -241.463134765625, "loss": -0.0902, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.0312423706054688, "rewards/margins": 1.1112394332885742, "rewards/rejected": -2.142481803894043, "step": 2280 }, { "epoch": 2.36, "learning_rate": 1.1748947569843092e-07, "logits/chosen": -2.949153184890747, "logits/rejected": -2.939624786376953, "logps/chosen": -294.7286682128906, "logps/rejected": -233.32864379882812, "loss": -0.2292, "rewards/accuracies": 0.6875, "rewards/chosen": -0.8459509015083313, "rewards/margins": 1.125945806503296, "rewards/rejected": -1.9718964099884033, "step": 2290 }, { "epoch": 2.37, "learning_rate": 1.1557596632223497e-07, "logits/chosen": -3.0164811611175537, "logits/rejected": -3.015223741531372, "logps/chosen": -283.99652099609375, "logps/rejected": -248.39602661132812, "loss": -0.175, "rewards/accuracies": 0.65625, "rewards/chosen": -1.068522334098816, "rewards/margins": 0.9977639317512512, "rewards/rejected": -2.066286325454712, "step": 2300 }, { "epoch": 2.39, "learning_rate": 1.1366245694603903e-07, "logits/chosen": -2.9585719108581543, "logits/rejected": -2.9644482135772705, "logps/chosen": -282.3656921386719, "logps/rejected": -257.11041259765625, "loss": -0.04, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -1.0202432870864868, "rewards/margins": 1.0421122312545776, "rewards/rejected": -2.0623555183410645, "step": 2310 }, { "epoch": 2.4, "learning_rate": 1.1174894756984308e-07, "logits/chosen": -2.976578712463379, "logits/rejected": -2.958589792251587, "logps/chosen": -243.4874725341797, "logps/rejected": -247.5500030517578, "loss": -0.1438, "rewards/accuracies": 0.71875, "rewards/chosen": -1.131103277206421, "rewards/margins": 1.045607328414917, "rewards/rejected": -2.176710605621338, "step": 2320 }, { "epoch": 2.41, "learning_rate": 1.0983543819364714e-07, "logits/chosen": -2.966090440750122, "logits/rejected": -2.963219165802002, "logps/chosen": -265.3279113769531, "logps/rejected": -245.90011596679688, "loss": -0.3008, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1297423839569092, "rewards/margins": 1.0481914281845093, "rewards/rejected": -2.177933931350708, "step": 2330 }, { "epoch": 2.42, "learning_rate": 1.079219288174512e-07, "logits/chosen": -2.8891921043395996, "logits/rejected": -2.8980748653411865, "logps/chosen": -245.95260620117188, "logps/rejected": -274.68511962890625, "loss": -0.0161, "rewards/accuracies": 0.6875, "rewards/chosen": -1.029191255569458, "rewards/margins": 1.0524277687072754, "rewards/rejected": -2.0816187858581543, "step": 2340 }, { "epoch": 2.43, "learning_rate": 1.0600841944125525e-07, "logits/chosen": -2.954099178314209, "logits/rejected": -2.9609456062316895, "logps/chosen": -305.5159606933594, "logps/rejected": -262.00189208984375, "loss": 0.0565, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0684045553207397, "rewards/margins": 1.0408602952957153, "rewards/rejected": -2.109265089035034, "step": 2350 }, { "epoch": 2.44, "learning_rate": 1.0409491006505931e-07, "logits/chosen": -2.95751690864563, "logits/rejected": -2.9540932178497314, "logps/chosen": -272.47802734375, "logps/rejected": -252.9427490234375, "loss": -0.1484, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.9607385396957397, "rewards/margins": 1.1778024435043335, "rewards/rejected": -2.1385409832000732, "step": 2360 }, { "epoch": 2.45, "learning_rate": 1.0218140068886336e-07, "logits/chosen": -2.9053263664245605, "logits/rejected": -2.8971660137176514, "logps/chosen": -288.0937805175781, "logps/rejected": -249.78091430664062, "loss": -0.0786, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.9809226989746094, "rewards/margins": 1.1126328706741333, "rewards/rejected": -2.0935556888580322, "step": 2370 }, { "epoch": 2.46, "learning_rate": 1.0026789131266743e-07, "logits/chosen": -2.902618169784546, "logits/rejected": -2.8822264671325684, "logps/chosen": -282.31939697265625, "logps/rejected": -234.1844940185547, "loss": -0.0294, "rewards/accuracies": 0.6875, "rewards/chosen": -1.0446959733963013, "rewards/margins": 1.026447057723999, "rewards/rejected": -2.0711429119110107, "step": 2380 }, { "epoch": 2.47, "learning_rate": 9.835438193647149e-08, "logits/chosen": -2.979097366333008, "logits/rejected": -2.939335584640503, "logps/chosen": -292.04156494140625, "logps/rejected": -249.23880004882812, "loss": -0.0569, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.1423760652542114, "rewards/margins": 1.2263530492782593, "rewards/rejected": -2.3687291145324707, "step": 2390 }, { "epoch": 2.48, "learning_rate": 9.644087256027554e-08, "logits/chosen": -2.9777472019195557, "logits/rejected": -2.9249491691589355, "logps/chosen": -300.14605712890625, "logps/rejected": -261.2189025878906, "loss": -0.1677, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.053171992301941, "rewards/margins": 1.085288166999817, "rewards/rejected": -2.1384599208831787, "step": 2400 }, { "epoch": 2.49, "learning_rate": 9.45273631840796e-08, "logits/chosen": -2.9415881633758545, "logits/rejected": -2.920991897583008, "logps/chosen": -283.33758544921875, "logps/rejected": -262.96820068359375, "loss": -0.1276, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.8991168737411499, "rewards/margins": 1.2601044178009033, "rewards/rejected": -2.1592211723327637, "step": 2410 }, { "epoch": 2.5, "learning_rate": 9.261385380788366e-08, "logits/chosen": -2.9756853580474854, "logits/rejected": -2.946068525314331, "logps/chosen": -280.66241455078125, "logps/rejected": -240.4520263671875, "loss": -0.126, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.0988253355026245, "rewards/margins": 0.9154653549194336, "rewards/rejected": -2.0142908096313477, "step": 2420 }, { "epoch": 2.51, "learning_rate": 9.070034443168771e-08, "logits/chosen": -2.981102466583252, "logits/rejected": -2.961693286895752, "logps/chosen": -277.573974609375, "logps/rejected": -287.72515869140625, "loss": -0.0153, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.1206835508346558, "rewards/margins": 1.1004483699798584, "rewards/rejected": -2.2211318016052246, "step": 2430 }, { "epoch": 2.52, "learning_rate": 8.878683505549177e-08, "logits/chosen": -2.969409465789795, "logits/rejected": -2.9719467163085938, "logps/chosen": -269.2553405761719, "logps/rejected": -276.8422546386719, "loss": -0.2463, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.0086815357208252, "rewards/margins": 1.2824007272720337, "rewards/rejected": -2.2910819053649902, "step": 2440 }, { "epoch": 2.53, "learning_rate": 8.687332567929582e-08, "logits/chosen": -2.9511923789978027, "logits/rejected": -2.945295810699463, "logps/chosen": -259.01507568359375, "logps/rejected": -261.44781494140625, "loss": -0.1105, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.238194465637207, "rewards/margins": 0.8837486505508423, "rewards/rejected": -2.1219429969787598, "step": 2450 }, { "epoch": 2.54, "learning_rate": 8.495981630309988e-08, "logits/chosen": -2.9451937675476074, "logits/rejected": -2.9014077186584473, "logps/chosen": -306.05902099609375, "logps/rejected": -251.77072143554688, "loss": -0.0642, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.9629167318344116, "rewards/margins": 1.1698607206344604, "rewards/rejected": -2.132777452468872, "step": 2460 }, { "epoch": 2.55, "learning_rate": 8.304630692690395e-08, "logits/chosen": -2.968233108520508, "logits/rejected": -2.9597315788269043, "logps/chosen": -272.21258544921875, "logps/rejected": -263.3721008300781, "loss": -0.1204, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.0844926834106445, "rewards/margins": 1.1088446378707886, "rewards/rejected": -2.1933372020721436, "step": 2470 }, { "epoch": 2.56, "learning_rate": 8.1132797550708e-08, "logits/chosen": -3.0234591960906982, "logits/rejected": -2.9642250537872314, "logps/chosen": -313.2720947265625, "logps/rejected": -251.24020385742188, "loss": -0.1713, "rewards/accuracies": 0.75, "rewards/chosen": -0.7700778245925903, "rewards/margins": 1.3790977001190186, "rewards/rejected": -2.1491756439208984, "step": 2480 }, { "epoch": 2.57, "learning_rate": 7.921928817451206e-08, "logits/chosen": -2.968733549118042, "logits/rejected": -2.9718379974365234, "logps/chosen": -277.2424621582031, "logps/rejected": -242.42172241210938, "loss": -0.1563, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.095190167427063, "rewards/margins": 1.008971095085144, "rewards/rejected": -2.104161262512207, "step": 2490 }, { "epoch": 2.58, "learning_rate": 7.73057787983161e-08, "logits/chosen": -2.9798028469085693, "logits/rejected": -2.9650533199310303, "logps/chosen": -290.0262756347656, "logps/rejected": -264.9449157714844, "loss": -0.2883, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -1.0747302770614624, "rewards/margins": 1.2478855848312378, "rewards/rejected": -2.3226161003112793, "step": 2500 }, { "epoch": 2.59, "learning_rate": 7.539226942212017e-08, "logits/chosen": -2.945732593536377, "logits/rejected": -2.94990611076355, "logps/chosen": -305.3632507324219, "logps/rejected": -274.59722900390625, "loss": -0.2619, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.0448615550994873, "rewards/margins": 1.2908263206481934, "rewards/rejected": -2.3356876373291016, "step": 2510 }, { "epoch": 2.6, "learning_rate": 7.347876004592423e-08, "logits/chosen": -2.9896063804626465, "logits/rejected": -2.9576222896575928, "logps/chosen": -272.3258361816406, "logps/rejected": -214.65298461914062, "loss": -0.1643, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.9911131858825684, "rewards/margins": 1.24199640750885, "rewards/rejected": -2.23310923576355, "step": 2520 }, { "epoch": 2.61, "learning_rate": 7.156525066972828e-08, "logits/chosen": -2.9311654567718506, "logits/rejected": -2.9140172004699707, "logps/chosen": -268.74444580078125, "logps/rejected": -236.22988891601562, "loss": -0.0763, "rewards/accuracies": 0.6875, "rewards/chosen": -1.111546277999878, "rewards/margins": 1.021420955657959, "rewards/rejected": -2.132966995239258, "step": 2530 }, { "epoch": 2.62, "learning_rate": 6.965174129353234e-08, "logits/chosen": -2.9497828483581543, "logits/rejected": -2.9311363697052, "logps/chosen": -305.40240478515625, "logps/rejected": -264.26177978515625, "loss": -0.1631, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.7808889746665955, "rewards/margins": 1.4971510171890259, "rewards/rejected": -2.2780401706695557, "step": 2540 }, { "epoch": 2.63, "learning_rate": 6.773823191733639e-08, "logits/chosen": -2.8948826789855957, "logits/rejected": -2.8769936561584473, "logps/chosen": -272.2402648925781, "logps/rejected": -246.4182891845703, "loss": -0.2377, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.8501654863357544, "rewards/margins": 1.4621152877807617, "rewards/rejected": -2.3122806549072266, "step": 2550 }, { "epoch": 2.64, "learning_rate": 6.582472254114045e-08, "logits/chosen": -2.9354665279388428, "logits/rejected": -2.8916828632354736, "logps/chosen": -272.43682861328125, "logps/rejected": -274.33819580078125, "loss": -0.0427, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.1567806005477905, "rewards/margins": 0.9909998178482056, "rewards/rejected": -2.147780179977417, "step": 2560 }, { "epoch": 2.65, "learning_rate": 6.391121316494451e-08, "logits/chosen": -2.9822142124176025, "logits/rejected": -2.982553482055664, "logps/chosen": -282.8048400878906, "logps/rejected": -264.55401611328125, "loss": -0.2413, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -1.2675011157989502, "rewards/margins": 1.1336963176727295, "rewards/rejected": -2.4011974334716797, "step": 2570 }, { "epoch": 2.66, "learning_rate": 6.199770378874856e-08, "logits/chosen": -2.946916103363037, "logits/rejected": -2.921715497970581, "logps/chosen": -295.14581298828125, "logps/rejected": -264.893798828125, "loss": -0.1178, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0069832801818848, "rewards/margins": 1.1966971158981323, "rewards/rejected": -2.2036805152893066, "step": 2580 }, { "epoch": 2.67, "learning_rate": 6.008419441255262e-08, "logits/chosen": -2.996048927307129, "logits/rejected": -2.9277799129486084, "logps/chosen": -288.4164733886719, "logps/rejected": -240.5204315185547, "loss": -0.1271, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -1.2253119945526123, "rewards/margins": 0.9576913714408875, "rewards/rejected": -2.1830034255981445, "step": 2590 }, { "epoch": 2.68, "learning_rate": 5.817068503635668e-08, "logits/chosen": -2.979665756225586, "logits/rejected": -3.006110668182373, "logps/chosen": -309.9973449707031, "logps/rejected": -266.8681945800781, "loss": -0.1954, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.072455644607544, "rewards/margins": 1.285042643547058, "rewards/rejected": -2.3574986457824707, "step": 2600 }, { "epoch": 2.69, "learning_rate": 5.6257175660160735e-08, "logits/chosen": -2.995903968811035, "logits/rejected": -2.985555410385132, "logps/chosen": -255.1632537841797, "logps/rejected": -262.6217346191406, "loss": -0.1921, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.135871171951294, "rewards/margins": 1.1645699739456177, "rewards/rejected": -2.300441026687622, "step": 2610 }, { "epoch": 2.71, "learning_rate": 5.4343666283964784e-08, "logits/chosen": -2.947361946105957, "logits/rejected": -2.9222099781036377, "logps/chosen": -283.39483642578125, "logps/rejected": -236.3388214111328, "loss": -0.2522, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.0431753396987915, "rewards/margins": 1.4626045227050781, "rewards/rejected": -2.50577974319458, "step": 2620 }, { "epoch": 2.72, "learning_rate": 5.243015690776884e-08, "logits/chosen": -2.934537649154663, "logits/rejected": -2.903463840484619, "logps/chosen": -297.0821228027344, "logps/rejected": -241.7076416015625, "loss": -0.1882, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.178007960319519, "rewards/margins": 1.0751577615737915, "rewards/rejected": -2.2531659603118896, "step": 2630 }, { "epoch": 2.73, "learning_rate": 5.05166475315729e-08, "logits/chosen": -2.9361720085144043, "logits/rejected": -2.882931709289551, "logps/chosen": -270.83148193359375, "logps/rejected": -255.24337768554688, "loss": -0.1525, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2923060655593872, "rewards/margins": 1.1520329713821411, "rewards/rejected": -2.444338798522949, "step": 2640 }, { "epoch": 2.74, "learning_rate": 4.860313815537696e-08, "logits/chosen": -2.9840073585510254, "logits/rejected": -2.9296116828918457, "logps/chosen": -283.15643310546875, "logps/rejected": -250.57608032226562, "loss": -0.1079, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.0855220556259155, "rewards/margins": 1.3181664943695068, "rewards/rejected": -2.4036881923675537, "step": 2650 }, { "epoch": 2.75, "learning_rate": 4.668962877918101e-08, "logits/chosen": -2.9590346813201904, "logits/rejected": -2.9196345806121826, "logps/chosen": -256.04315185546875, "logps/rejected": -279.31787109375, "loss": -0.1673, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.3440836668014526, "rewards/margins": 1.061953067779541, "rewards/rejected": -2.406036853790283, "step": 2660 }, { "epoch": 2.76, "learning_rate": 4.477611940298507e-08, "logits/chosen": -2.9351706504821777, "logits/rejected": -2.9165854454040527, "logps/chosen": -302.4515075683594, "logps/rejected": -260.8388366699219, "loss": -0.4567, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.1327465772628784, "rewards/margins": 1.2759268283843994, "rewards/rejected": -2.4086735248565674, "step": 2670 }, { "epoch": 2.77, "learning_rate": 4.2862610026789124e-08, "logits/chosen": -3.013035297393799, "logits/rejected": -2.989163398742676, "logps/chosen": -270.7334899902344, "logps/rejected": -254.8367156982422, "loss": -0.1713, "rewards/accuracies": 0.6875, "rewards/chosen": -1.0498313903808594, "rewards/margins": 1.175312876701355, "rewards/rejected": -2.225144147872925, "step": 2680 }, { "epoch": 2.78, "learning_rate": 4.0949100650593186e-08, "logits/chosen": -2.880460023880005, "logits/rejected": -2.861250400543213, "logps/chosen": -269.2752990722656, "logps/rejected": -265.5714111328125, "loss": -0.0863, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.1616452932357788, "rewards/margins": 1.2222042083740234, "rewards/rejected": -2.383849620819092, "step": 2690 }, { "epoch": 2.79, "learning_rate": 3.903559127439724e-08, "logits/chosen": -2.9040732383728027, "logits/rejected": -2.920821189880371, "logps/chosen": -267.18621826171875, "logps/rejected": -227.2675018310547, "loss": -0.3545, "rewards/accuracies": 0.71875, "rewards/chosen": -1.007155179977417, "rewards/margins": 1.408747911453247, "rewards/rejected": -2.415903091430664, "step": 2700 }, { "epoch": 2.8, "learning_rate": 3.71220818982013e-08, "logits/chosen": -2.9555585384368896, "logits/rejected": -2.891514778137207, "logps/chosen": -293.35809326171875, "logps/rejected": -241.7179412841797, "loss": -0.0836, "rewards/accuracies": 0.59375, "rewards/chosen": -1.3558762073516846, "rewards/margins": 0.7381645441055298, "rewards/rejected": -2.094040632247925, "step": 2710 }, { "epoch": 2.81, "learning_rate": 3.520857252200535e-08, "logits/chosen": -2.9766342639923096, "logits/rejected": -2.9516689777374268, "logps/chosen": -288.6914978027344, "logps/rejected": -282.8217468261719, "loss": -0.079, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -1.2634086608886719, "rewards/margins": 0.9370874166488647, "rewards/rejected": -2.200496196746826, "step": 2720 }, { "epoch": 2.82, "learning_rate": 3.3295063145809414e-08, "logits/chosen": -2.9203312397003174, "logits/rejected": -2.923161268234253, "logps/chosen": -278.013916015625, "logps/rejected": -267.54168701171875, "loss": -0.1356, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.0634254217147827, "rewards/margins": 1.14893639087677, "rewards/rejected": -2.212362051010132, "step": 2730 }, { "epoch": 2.83, "learning_rate": 3.138155376961347e-08, "logits/chosen": -3.015432834625244, "logits/rejected": -2.9883949756622314, "logps/chosen": -306.4672546386719, "logps/rejected": -284.0825500488281, "loss": -0.0924, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2742321491241455, "rewards/margins": 1.3397479057312012, "rewards/rejected": -2.6139800548553467, "step": 2740 }, { "epoch": 2.84, "learning_rate": 2.9468044393417525e-08, "logits/chosen": -2.9456729888916016, "logits/rejected": -2.9101481437683105, "logps/chosen": -281.6629638671875, "logps/rejected": -247.2931671142578, "loss": -0.2122, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.8153074383735657, "rewards/margins": 1.5913559198379517, "rewards/rejected": -2.406663417816162, "step": 2750 }, { "epoch": 2.85, "learning_rate": 2.755453501722158e-08, "logits/chosen": -2.8928332328796387, "logits/rejected": -2.8488776683807373, "logps/chosen": -262.3460693359375, "logps/rejected": -253.6611328125, "loss": -0.3582, "rewards/accuracies": 0.625, "rewards/chosen": -1.2655181884765625, "rewards/margins": 1.1255066394805908, "rewards/rejected": -2.3910248279571533, "step": 2760 }, { "epoch": 2.86, "learning_rate": 2.564102564102564e-08, "logits/chosen": -2.9541738033294678, "logits/rejected": -2.9483609199523926, "logps/chosen": -281.6482849121094, "logps/rejected": -263.13116455078125, "loss": -0.2229, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.135956048965454, "rewards/margins": 1.4537389278411865, "rewards/rejected": -2.5896952152252197, "step": 2770 }, { "epoch": 2.87, "learning_rate": 2.3727516264829695e-08, "logits/chosen": -2.96710467338562, "logits/rejected": -2.935039520263672, "logps/chosen": -314.2068176269531, "logps/rejected": -288.13592529296875, "loss": -0.0912, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.2961633205413818, "rewards/margins": 1.0811083316802979, "rewards/rejected": -2.3772716522216797, "step": 2780 }, { "epoch": 2.88, "learning_rate": 2.1814006888633754e-08, "logits/chosen": -2.9459080696105957, "logits/rejected": -2.89158296585083, "logps/chosen": -286.0101318359375, "logps/rejected": -239.49160766601562, "loss": -0.115, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -1.2027372121810913, "rewards/margins": 1.0859975814819336, "rewards/rejected": -2.2887349128723145, "step": 2790 }, { "epoch": 2.89, "learning_rate": 1.990049751243781e-08, "logits/chosen": -2.9392828941345215, "logits/rejected": -2.9443650245666504, "logps/chosen": -291.0428771972656, "logps/rejected": -242.24203491210938, "loss": -0.2681, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.150545358657837, "rewards/margins": 1.183664083480835, "rewards/rejected": -2.334209680557251, "step": 2800 }, { "epoch": 2.9, "learning_rate": 1.7986988136241865e-08, "logits/chosen": -2.936471462249756, "logits/rejected": -2.891524076461792, "logps/chosen": -287.06024169921875, "logps/rejected": -288.4886169433594, "loss": -0.2539, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.1829683780670166, "rewards/margins": 1.063999891281128, "rewards/rejected": -2.2469685077667236, "step": 2810 }, { "epoch": 2.91, "learning_rate": 1.6073478760045924e-08, "logits/chosen": -2.9530060291290283, "logits/rejected": -2.9200854301452637, "logps/chosen": -283.59716796875, "logps/rejected": -236.90316772460938, "loss": -0.1698, "rewards/accuracies": 0.71875, "rewards/chosen": -1.1723743677139282, "rewards/margins": 1.1659984588623047, "rewards/rejected": -2.3383727073669434, "step": 2820 }, { "epoch": 2.92, "learning_rate": 1.4159969383849981e-08, "logits/chosen": -2.9582297801971436, "logits/rejected": -2.9348087310791016, "logps/chosen": -269.26531982421875, "logps/rejected": -241.8841094970703, "loss": -0.3465, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.1937872171401978, "rewards/margins": 1.0226060152053833, "rewards/rejected": -2.216393232345581, "step": 2830 }, { "epoch": 2.93, "learning_rate": 1.2246460007654037e-08, "logits/chosen": -2.985964775085449, "logits/rejected": -2.9667985439300537, "logps/chosen": -297.0205078125, "logps/rejected": -251.09805297851562, "loss": -0.1413, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.407981276512146, "rewards/margins": 0.9606062173843384, "rewards/rejected": -2.3685877323150635, "step": 2840 }, { "epoch": 2.94, "learning_rate": 1.0332950631458094e-08, "logits/chosen": -2.9180400371551514, "logits/rejected": -2.9097437858581543, "logps/chosen": -291.8419494628906, "logps/rejected": -265.8045349121094, "loss": -0.116, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.1430332660675049, "rewards/margins": 1.2068586349487305, "rewards/rejected": -2.3498916625976562, "step": 2850 }, { "epoch": 2.95, "learning_rate": 8.419441255262151e-09, "logits/chosen": -2.9281063079833984, "logits/rejected": -2.8641605377197266, "logps/chosen": -270.81256103515625, "logps/rejected": -246.7694549560547, "loss": -0.2278, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -1.335326910018921, "rewards/margins": 1.1297729015350342, "rewards/rejected": -2.465099811553955, "step": 2860 }, { "epoch": 2.96, "learning_rate": 6.505931879066207e-09, "logits/chosen": -2.948106050491333, "logits/rejected": -2.921739339828491, "logps/chosen": -276.6488952636719, "logps/rejected": -248.07608032226562, "loss": -0.1698, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.1927988529205322, "rewards/margins": 1.302239179611206, "rewards/rejected": -2.4950382709503174, "step": 2870 }, { "epoch": 2.97, "learning_rate": 4.592422502870264e-09, "logits/chosen": -2.9207305908203125, "logits/rejected": -2.8921730518341064, "logps/chosen": -288.192138671875, "logps/rejected": -244.62954711914062, "loss": -0.1019, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -1.206674337387085, "rewards/margins": 0.8675975799560547, "rewards/rejected": -2.0742716789245605, "step": 2880 }, { "epoch": 2.98, "learning_rate": 2.6789131266743202e-09, "logits/chosen": -3.0061116218566895, "logits/rejected": -2.972447156906128, "logps/chosen": -306.70977783203125, "logps/rejected": -261.82244873046875, "loss": -0.1622, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -1.2506418228149414, "rewards/margins": 1.2404968738555908, "rewards/rejected": -2.4911389350891113, "step": 2890 }, { "epoch": 2.99, "learning_rate": 7.654037504783773e-10, "logits/chosen": -2.974818706512451, "logits/rejected": -2.963379383087158, "logps/chosen": -263.7333679199219, "logps/rejected": -262.7867126464844, "loss": -0.2602, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.150649070739746, "rewards/margins": 1.2455689907073975, "rewards/rejected": -2.3962180614471436, "step": 2900 }, { "epoch": 3.0, "eval_logits/chosen": -2.965512990951538, "eval_logits/rejected": -2.9399757385253906, "eval_logps/chosen": -282.7847900390625, "eval_logps/rejected": -252.9479217529297, "eval_loss": -0.203842431306839, "eval_rewards/accuracies": 0.6840000152587891, "eval_rewards/chosen": -1.1628247499465942, "eval_rewards/margins": 1.2828813791275024, "eval_rewards/rejected": -2.4457061290740967, "eval_runtime": 446.4813, "eval_samples_per_second": 4.479, "eval_steps_per_second": 0.28, "step": 2904 }, { "epoch": 3.0, "step": 2904, "total_flos": 0.0, "train_loss": 0.36701411283100355, "train_runtime": 84636.1866, "train_samples_per_second": 2.196, "train_steps_per_second": 0.034 } ], "logging_steps": 10, "max_steps": 2904, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }